diff --git a/pufferlib/ocean/drive/binding.c b/pufferlib/ocean/drive/binding.c index 3ca6affd3..303917c86 100644 --- a/pufferlib/ocean/drive/binding.c +++ b/pufferlib/ocean/drive/binding.c @@ -365,6 +365,10 @@ static int my_log(PyObject *dict, Log *log) { assign_to_dict(dict, "goals_reached_this_episode", log->goals_reached_this_episode); assign_to_dict(dict, "speed_at_goal", log->speed_at_goal); assign_to_dict(dict, "distance_without_collision", log->distance_without_collision); + assign_to_dict(dict, "lane_center_rate", log->lane_center_rate); + assign_to_dict(dict, "comfort_violation_count", log->comfort_violation_count); + assign_to_dict(dict, "velocity_progress_sum", log->velocity_progress_sum); + assign_to_dict(dict, "avg_speed_per_agent", log->avg_speed_per_agent); // assign_to_dict(dict, "avg_displacement_error", log->avg_displacement_error); return 0; } diff --git a/pufferlib/ocean/drive/drive.h b/pufferlib/ocean/drive/drive.h index 107bd5c0c..7f7152ab7 100644 --- a/pufferlib/ocean/drive/drive.h +++ b/pufferlib/ocean/drive/drive.h @@ -140,7 +140,10 @@ // Offsets #define COLLISION_RANGE 5 #define Z_RANGE 3 -#define Z_BUFFER 4.0f // 4.0m buffer for different z level checking +#define Z_BUFFER 4.0f // 4.0m buffer for different z level checking +#define SPEED_LIMIT 20.0f // Hardcoded speed limit value +#define COMFORT_JERK_THRESHOLD 5.0f // For JERK model comfort +#define COMFORT_ACCEL_THRESHOLD 3.0f // For JERK and CLASSIC model comfort // Jerk action space (for JERK dynamics model) static const float JERK_LONG[4] = {-15.0f, -4.0f, 0.0f, 4.0f}; @@ -203,10 +206,17 @@ struct Log { float goals_sampled_this_episode; float offroad_rate; float collision_rate; + float red_light_violation_rate; // placeholder for later + float num_goals_reached; + float comfort_violation_count; + float velocity_progress_sum; + float lane_center_rate; + float lane_heading_aligned_rate; float completion_rate; float offroad_per_agent; float collisions_per_agent; float dnf_rate; + float avg_displacement_error; float n; float lane_alignment_rate; float speed_at_goal; @@ -214,6 +224,7 @@ struct Log { float active_agent_count; float expert_static_agent_count; float static_agent_count; + float avg_speed_per_agent; }; typedef struct GridMapEntity GridMapEntity; @@ -1278,6 +1289,7 @@ bool check_line_intersection(float p1[2], float p2[2], float q1[2], float q2[2]) } void add_log(Drive *env) { + int safe_timestep = (env->timestep > 0) ? env->timestep : 1; for (int i = 0; i < env->active_agent_count; i++) { int agent_idx = env->active_agent_indices[i]; Agent *agent = &env->agents[agent_idx]; @@ -1293,7 +1305,8 @@ void add_log(Drive *env) { env->log.offroad_per_agent += offroad_per_agent; float collisions_per_agent = env->logs[i].collisions_per_agent; env->log.collisions_per_agent += collisions_per_agent; - + float avg_speed_per_agent = env->logs[i].avg_speed_per_agent; + env->log.avg_speed_per_agent += avg_speed_per_agent / safe_timestep; float frac_goal_reached = agent->goals_reached_this_episode / agent->goals_sampled_this_episode; // Update score, which is an aggregate measure whether the agent fully solved its task @@ -1321,10 +1334,13 @@ void add_log(Drive *env) { env->log.episode_length += env->logs[i].episode_length; env->log.episode_return += env->logs[i].episode_return; env->log.distance_without_collision += env->logs[i].distance_without_collision; + env->log.comfort_violation_count += env->logs[i].comfort_violation_count / safe_timestep; + env->log.velocity_progress_sum += env->logs[i].velocity_progress_sum / safe_timestep; // Log composition counts per agent so vec_log averaging recovers the per-env value env->log.active_agent_count += env->active_agent_count; env->log.expert_static_agent_count += env->expert_static_agent_count; env->log.static_agent_count += env->static_agent_count; + env->log.lane_center_rate += env->logs[i].lane_center_rate / safe_timestep; env->log.n += 1; } } @@ -1913,6 +1929,33 @@ void compute_agent_metrics(Drive *env, int agent_idx) { int lane_aligned = (fabs(agent->metrics_array[LANE_ANGLE_IDX]) > 0.965) ? 1 : 0; agent->metrics_array[LANE_ALIGNED_IDX] = lane_aligned; } + float speed_magnitude = sqrtf(agent->sim_vx * agent->sim_vx + agent->sim_vy * agent->sim_vy); + // Velocity metric (GIGAFLOW) - forward progress aligned with lane + const float VELOCITY_MIN_SPEED = 2.5f; // m/s + if (speed_magnitude > VELOCITY_MIN_SPEED && best_candidate_entity_idx != -1) { + float cos_theta = agent->metrics_array[LANE_ANGLE_IDX]; + agent->metrics_array[VELOCITY_PROGRESS_IDX] = fmaxf(cos_theta, 0.0f); + } else { + agent->metrics_array[VELOCITY_PROGRESS_IDX] = 0.0f; + } + + // Comfort metric (GIGAFLOW) - only for JERK dynamics, modified for CLASSIC (Custom) + int accel_violation = + (fabsf(agent->a_long) > COMFORT_ACCEL_THRESHOLD) + (fabsf(agent->a_lat) > COMFORT_ACCEL_THRESHOLD); + + if (env->dynamics_model == JERK) { + + int jerk_violation = + (fabsf(agent->jerk_long) > COMFORT_JERK_THRESHOLD || fabsf(agent->jerk_lat) > COMFORT_JERK_THRESHOLD) ? 1 + : 0; + agent->metrics_array[COMFORT_VIOLATION_IDX] = (float)(accel_violation + jerk_violation); + } else { + agent->metrics_array[COMFORT_VIOLATION_IDX] = (float)(accel_violation); + } + + // Overspeed penalization - Hardcoded target speed value of 20m/s right now - WILL HAVE TO BE UPDATED ONCE NEW DATA + // FORMAT COMES IN + agent->metrics_array[SPEED_LIMIT_IDX] = (speed_magnitude > SPEED_LIMIT + 2.0f) ? 1.0f : 0.0f; // Check for vehicle collisions int car_collided_with_index = collision_check(env, agent_idx); @@ -1969,10 +2012,6 @@ void compute_observations(Drive *env) { float v_dot_heading = ego_entity->sim_vx * cos_heading + ego_entity->sim_vy * sin_heading; float signed_speed = copysignf(speed_magnitude, v_dot_heading); - // Adding speed limit calculation - float speed_limit = 20.0f; - // We need to add speed limit calculation - // Adding lane angle and center information float lane_center_dist = ego_entity->metrics_array[LANE_DIST_IDX] / LANE_DISTANCE_NORMALIZATION; lane_center_dist = fmaxf(-1.0f, fminf(1.0f, lane_center_dist)); @@ -2005,14 +2044,14 @@ void compute_observations(Drive *env) { obs[10] = (ego_entity->respawn_timestep != -1) ? 1 : 0; obs[11] = normalized_goal_speed_min; obs[12] = normalized_goal_speed_max; - obs[13] = fminf(speed_limit / MAX_SPEED, 1.0f); + obs[13] = fminf(SPEED_LIMIT / MAX_SPEED, 1.0f); obs[14] = lane_center_dist; obs[15] = ego_entity->metrics_array[LANE_ANGLE_IDX]; } else { obs[7] = (ego_entity->respawn_timestep != -1) ? 1 : 0; obs[8] = normalized_goal_speed_min; obs[9] = normalized_goal_speed_max; - obs[10] = fminf(speed_limit / MAX_SPEED, 1.0f); + obs[10] = fminf(SPEED_LIMIT / MAX_SPEED, 1.0f); obs[11] = lane_center_dist; obs[12] = ego_entity->metrics_array[LANE_ANGLE_IDX]; } @@ -2510,7 +2549,7 @@ void c_step(Drive *env) { } } - // Compute rewards + // COMPUTE REWARDS for (int i = 0; i < env->active_agent_count; i++) { int agent_idx = env->active_agent_indices[i]; Agent *agent = &env->agents[agent_idx]; @@ -2521,16 +2560,32 @@ void c_step(Drive *env) { if (collision_state == NO_COLLISION) { env->logs[i].distance_without_collision = agent->cumulative_displacement; } + float cos_heading = cosf(agent->sim_heading); + float sin_heading = sinf(agent->sim_heading); + float speed_magnitude = sqrtf(agent->sim_vx * agent->sim_vx + agent->sim_vy * agent->sim_vy); + float v_dot_heading = agent->sim_vx * cos_heading + agent->sim_vy * sin_heading; + float signed_speed = copysignf(speed_magnitude, v_dot_heading); if (collision_state > 0) { if (collision_state == VEHICLE_COLLISION) { - env->rewards[i] += env->reward_vehicle_collision; - env->logs[i].episode_return += env->reward_vehicle_collision; + float reward_collision; + if (env->reward_conditioning) { + reward_collision = agent->reward_coefs[REWARD_COEF_COLLISION] - 0.1f * speed_magnitude; + } else { + reward_collision = env->reward_vehicle_collision; + } + env->rewards[i] += reward_collision; + env->logs[i].episode_return += reward_collision; env->logs[i].collision_rate = 1.0f; env->logs[i].collisions_per_agent += 1.0f; } else if (collision_state == OFFROAD) { - env->rewards[i] += env->reward_offroad_collision; - env->logs[i].episode_return += env->reward_offroad_collision; + if (env->reward_conditioning) { + env->rewards[i] += agent->reward_coefs[REWARD_COEF_OFFROAD]; + env->logs[i].episode_return += agent->reward_coefs[REWARD_COEF_OFFROAD]; + } else { + env->rewards[i] += env->reward_offroad_collision; + env->logs[i].episode_return = env->reward_offroad_collision; + } env->logs[i].offroad_rate = 1.0f; env->logs[i].offroad_per_agent += 1.0f; } @@ -2571,43 +2626,99 @@ void c_step(Drive *env) { agent->metrics_array[REACHED_GOAL_IDX] = 1.0f; env->logs[i].speed_at_goal = current_speed; } - int lane_aligned = agent->metrics_array[LANE_ALIGNED_IDX]; + + // Add values to logs first + + // Lane alignment + int lane_aligned = env->agents[agent_idx].metrics_array[LANE_ALIGNED_IDX]; env->logs[i].lane_alignment_rate = lane_aligned; - // Add lane rewards - // Get lane angle metric: cos(θ_f) where θ_f = heading diff from lane - float cos_theta = agent->metrics_array[LANE_ANGLE_IDX]; - float theta_f = acosf(fminf(fmaxf(cos_theta, -1.0f), 1.0f)); // Get |θ_f| from cos - // env->logs[i].lane_heading_aligned_rate += (cos_theta >= LANE_ALIGN_COS_THRESHOLD) ? 1.0f : 0.0f; <- Commented - // becaue this is already catered by us + // Lane center distance + float lane_center_distance = agent->metrics_array[LANE_DIST_IDX]; + env->logs[i].lane_center_rate += fabsf(lane_center_distance) < 0.5f ? 1.0f : 0.0f; - // Rl-align (GIGAFLOW): min(cos,0) + vel_align*min(cos*v,0) + 0.0025*(1-|θ|/(π/2)) - float against_lane_penalty = fminf(cos_theta, 0.0f); // negative when >90 degrees off - float vel_aligned_penalty = - agent->reward_coefs[REWARD_COEF_VEL_ALIGN] * fminf(cos_theta * agent->sim_speed, 0.0f); - float alignment_bonus = 0.0025f * (1.0f - theta_f / (M_PI / 2.0f)); + // Comfort violation Count + float comfort_violations = agent->metrics_array[COMFORT_VIOLATION_IDX]; + env->logs[i].comfort_violation_count += comfort_violations; - float lane_align_reward = agent->reward_coefs[REWARD_COEF_LANE_ALIGN] * env->dt * - (against_lane_penalty + vel_aligned_penalty + alignment_bonus); + // Velocity progress sum + float velocity_progress = agent->metrics_array[VELOCITY_PROGRESS_IDX]; + env->logs[i].velocity_progress_sum += velocity_progress; - env->rewards[i] += lane_align_reward; - env->logs[i].episode_return += lane_align_reward; + // Average speed per agent + env->logs[i].avg_speed_per_agent += speed_magnitude; - // Rl-center (GIGAFLOW): -α * dt * (|x_f - bias| - 0.05/(exp(|x_f - bias| - 0.5)) - float lane_center_distance = agent->metrics_array[LANE_DIST_IDX]; - float adjusted_dist = fabsf(lane_center_distance - agent->reward_coefs[REWARD_COEF_CENTER_BIAS]); - float exp_decay = 0.05f / expf(adjusted_dist - 0.5f); + // We only add lane level rewards when conditioning is turned on + if (env->reward_conditioning) { - float lane_center_reward = - agent->reward_coefs[REWARD_COEF_LANE_CENTER] * env->dt * ((cos_theta > 0.5f) * adjusted_dist - exp_decay); + // Add lane rewards + // Get lane angle metric: cos(θ_f) where θ_f = heading diff from lane + float cos_theta = agent->metrics_array[LANE_ANGLE_IDX]; + float theta_f = acosf(fminf(fmaxf(cos_theta, -1.0f), 1.0f)); // Get |θ_f| from cos + // env->logs[i].lane_heading_aligned_rate += (cos_theta >= LANE_ALIGN_COS_THRESHOLD) ? 1.0f : 0.0f; <- + // Commented becaue this is already catered by us - env->rewards[i] += lane_center_reward; - // env->logs[i].lane_center_rate += fabsf(lane_center_distance) < 0.5f ? 1.0f : 0.0f; <- Commented for now ( - // need to add this to add_log) - env->logs[i].episode_return += lane_center_reward; - // + // Rl-align (GIGAFLOW): min(cos,0) + vel_align*min(cos*v,0) + 0.0025*(1-|θ|/(π/2)) + float against_lane_penalty = fminf(cos_theta, 0.0f); // negative when >90 degrees off + float vel_aligned_penalty = + agent->reward_coefs[REWARD_COEF_VEL_ALIGN] * fminf(cos_theta * speed_magnitude, 0.0f); + float alignment_bonus = 0.0025f * (1.0f - theta_f / (M_PI / 2.0f)); + + float lane_align_reward = agent->reward_coefs[REWARD_COEF_LANE_ALIGN] * env->dt * + (against_lane_penalty + vel_aligned_penalty + alignment_bonus); + + env->rewards[i] += lane_align_reward; + env->logs[i].episode_return += lane_align_reward; + + // Rl-center (GIGAFLOW): -α * dt * (|x_f - bias| - 0.05/(exp(|x_f - bias| - 0.5)) + float adjusted_dist = fabsf(lane_center_distance - agent->reward_coefs[REWARD_COEF_CENTER_BIAS]); + float exp_decay = 0.05f / expf(adjusted_dist - 0.5f); + float lane_center_reward = agent->reward_coefs[REWARD_COEF_LANE_CENTER] * env->dt * + ((cos_theta > 0.5f) * adjusted_dist - exp_decay); + + env->rewards[i] += lane_center_reward; + env->logs[i].episode_return += lane_center_reward; + // OTHER REWARDS (From Gigaflow) + + // Red light violation reward (GIGAFLOW) - OMITTED + + // Comfort reward (GIGAFLOW) + + float comfort_penalty = agent->reward_coefs[REWARD_COEF_COMFORT] * comfort_violations; + + env->rewards[i] += comfort_penalty; + env->logs[i].episode_return += comfort_penalty; - // Further support TO BE ADDED for all other types of reward conditioning + // Velocity reward (GIGAFLOW) + float velocity_reward = agent->reward_coefs[REWARD_COEF_VELOCITY] * env->dt * velocity_progress; + + env->rewards[i] += velocity_reward; + env->logs[i].episode_return += velocity_reward; + + // Timestep reward (GIGAFLOW) + float accel = sqrtf(agent->a_long * agent->a_long + agent->a_lat * agent->a_lat); + // Only penalize when moving (v > 0) or accelerating (a > 0) + if (speed_magnitude > 0.01f || accel > 0.01f) { + float timestep_penalty = agent->reward_coefs[REWARD_COEF_TIMESTEP] * env->dt; + + env->rewards[i] += timestep_penalty; + env->logs[i].episode_return += timestep_penalty; + } + + // Reverse reward (GIGAFLOW) + if (signed_speed < -0.01f) { + float reverse_penalty = agent->reward_coefs[REWARD_COEF_REVERSE] * env->dt; + + env->rewards[i] += reverse_penalty; + env->logs[i].episode_return += reverse_penalty; + } + + // Over speed reward (GIGAFLOW++) + float speed_reward = agent->reward_coefs[REWARD_COEF_OVERSPEED] * agent->metrics_array[SPEED_LIMIT_IDX]; + + env->rewards[i] += speed_reward; + env->logs[i].episode_return += speed_reward; + } } if (env->goal_behavior == GOAL_RESPAWN) {