👋 Quick Start#

This guide will help you create your first robotic RL environment with Genesis Forge and train an agent in just a few minutes.

Installation#

First, install Genesis Forge and its dependencies:

pip install genesis-forge

For training, you’ll also need an RL library. We recommend RSL-RL:

pip install tensorboard rsl-rl-lib>=2.2.4

Genesis Simulator#

This framework is built around the Genesis Simulator. If you’re new to Genesis, we recommend starting with the Hello, Genesis guide to get familiar with the fundamentals.

Your First Environment#

Let’s create a simple locomotion environment where a quadruped robot learns to walk forward. Here’s a complete environment:

environment.py#
""" A simple Go2 robot locomotion environment"""

import torch
import genesis as gs
from genesis_forge import ManagedEnvironment
from genesis_forge.managers import (
    RewardManager,
    TerminationManager,
    EntityManager,
    ObservationManager,
    PositionActionManager,
)
from genesis_forge.mdp import reset, rewards, terminations


class MyFirstEnv(ManagedEnvironment):
    def __init__(
        self,
        num_envs: int = 1,
        dt: float = 1/50,  # 50Hz control frequency
        max_episode_length_s: int = 20,
        headless: bool = True,
    ):
        super().__init__(
            num_envs=num_envs,
            dt=dt,
            max_episode_length_sec=max_episode_length_s,
        )

        # Build the simulation scene
        self.scene = gs.Scene(
            show_viewer=not headless,
            sim_options=gs.options.SimOptions(dt=self.dt, substeps=2),
        )
        self.scene.add_entity(gs.morphs.Plane())
        self.robot = self.scene.add_entity(
            gs.morphs.URDF(
                file="urdf/go2/urdf/go2.urdf", # this is loaded from the genesis lib
                pos=[0.0, 0.0, 0.4],
                quat=[1.0, 0.0, 0.0, 0.0],
            ),
        )

    def config(self):
        """Configure managers"""

        # Set target velocity (forward at 0.5 m/s)
        self.target_vel = torch.zeros((self.num_envs, 3), device=gs.device)
        self.target_vel[:, 0] = 0.5  # X-axis velocity

        # Entity Manager - Handles robot resets
        self.robot_manager = EntityManager(
            self,
            entity_attr="robot",
            on_reset={
                "position": {
                    "fn": reset.position,
                    "params": {
                        "position": [0.0, 0.0, 0.4],
                        "quat": [1.0, 0.0, 0.0, 0.0],
                    },
                },
            },
        )

        # Action Manager - Maps step actions to joint positions
        self.action_manager = PositionActionManager(
            self,
            joint_names=[".*"],  # Control all joints
            default_pos={
                ".*_hip_joint": 0.0,
                "FL_thigh_joint": 0.8,
                "FR_thigh_joint": 0.8,
                "RL_thigh_joint": 1.0,
                "RR_thigh_joint": 1.0,
                ".*_calf_joint": -1.5,
            },
            scale=0.25,  # Scale actions
            use_default_offset=True, # DOF actions are relative to the default_pos positions

            # PD controller gains
            pd_kp=20,
            pd_kv=0.5,
        )

        # Reward Manager - Defines what behaviors to encourage
        RewardManager(
            self,
            logging_enabled=True,
            cfg={
                # Maintain target height
                "base_height": {
                    "weight": -50.0,
                    "fn": rewards.base_height,
                    "params": {"target_height": 0.3},
                },
                # Track forward velocity
                "velocity_tracking": {
                    "weight": 1.0,
                    "fn": rewards.command_tracking_lin_vel,
                    "params": {"command": self.target_vel[:, :2]},
                },
                # Minimize vertical motion
                "vertical_velocity": {
                    "weight": -1.0,
                    "fn": rewards.lin_vel_z_l2,
                },
                # Encourage smooth actions
                "action_smoothness": {
                    "weight": -0.005,
                    "fn": rewards.action_rate_l2,
                },
            },
        )

        # Termination Manager - Defines when episodes end
        self.termination_manager = TerminationManager(
            self,
            logging_enabled=True,
            term_cfg={
                # Episode should end (timeout) when it hits the max episode length
                "timeout": {
                    "fn": terminations.timeout,
                    "time_out": True,
                },
                # Terminate if the robot is falling over
                "fall_over": {
                    "fn": terminations.bad_orientation,
                    "params": {"limit_angle": 10}, # degrees
                },
            },
        )

        # Observation Manager - Defines what the agent observes
        ObservationManager(
            self,
            cfg={
                "angular_velocity": {
                    "fn": lambda env: self.robot_manager.get_angular_velocity(),
                    "scale": 0.25,
                },
                "linear_velocity": {
                    "fn": lambda env: self.robot_manager.get_linear_velocity(),
                    "scale": 2.0,
                },
                "projected_gravity": {
                    "fn": lambda env: self.robot_manager.get_projected_gravity(),
                },
                "joint_positions": {
                    "fn": lambda env: self.action_manager.get_dofs_position(),
                },
                "joint_velocities": {
                    "fn": lambda env: self.action_manager.get_dofs_velocity(),
                    "scale": 0.05,
                },
                "actions": {
                    "fn": lambda env: self.action_manager.get_actions(),
                },
            },
        )

Training Your Agent#

Now let’s train a policy using RSL-RL:

train.py#
import genesis as gs
from genesis_forge.wrappers import RslRlWrapper, VideoWrapper
from rsl_rl.runners import OnPolicyRunner
from my_first_env import MyFirstEnv

# Initialize Genesis
gs.init(backend=gs.gpu)  # Use gs.cpu if no GPU available

# Create environment with 4096 parallel simulations
env = MyFirstEnv(num_envs=4096, headless=True)

# Add video recording during training (optional)
env = VideoWrapper(
    env,
    video_length_sec=10,
    out_dir="./videos",
    episode_trigger=lambda ep: ep % 2 == 0,  # Record every 2nd episode
)

# Wrap for RSL-RL compatibility
env = RslRlWrapper(env)
env.build()
env.reset()

# PPO training configuration
train_cfg = {
    "algorithm": {
        "class_name": "PPO",
        "learning_rate": 0.001,
        "num_learning_epochs": 5,
        "num_mini_batches": 4,
        "gamma": 0.99,
        "clip_param": 0.2,
    },
    "policy": {
        "class_name": "ActorCritic",
        "activation": "elu",
          "actor_hidden_dims": [512, 256, 128],
          "critic_hidden_dims": [512, 256, 128],
    },
    "runner": {
        "max_iterations": 300,
        "save_interval": 100,
        "experiment_name": "my_first_robot",
    },
    "seed": 42,
    "num_steps_per_env": 24,
}

# Train the agent
runner = OnPolicyRunner(env, train_cfg, "./logs", device=gs.device)
runner.learn(num_learning_iterations=300)

print("Training complete! Check ./logs for results and ./videos for recordings.")

Run the training with:

python train.py

Evaluating Your Trained Policy#

After training, you can visualize your trained policy:

import torch
import genesis as gs
from genesis_forge.wrappers import RslRlWrapper
from rsl_rl.runners import OnPolicyRunner
from my_first_env import MyFirstEnv

# Initialize Genesis with visualization
gs.init(backend=gs.gpu)

# Create a single environment with visualization
env = MyFirstEnv(num_envs=1, headless=False)
env = RslRlWrapper(env)
env.build()
env.reset()

# Load the trained policy
runner = OnPolicyRunner(env, train_cfg, "./logs", device=gs.device)
runner.load("./logs/my_first_robot/model_300.pt")
policy = runner.get_inference_policy(device=gs.device)

# Run the policy
obs, _ = env.reset()
with torch.no_grad():
    while True:
        actions = policy(obs)
        obs, _, _, _ = env.step(actions)

Other examples#

Explore the complete examples in the repository:

Troubleshooting#

GPU Memory Issues#

If you run out of GPU memory, reduce the number of parallel environments:

env = MyFirstEnv(num_envs=1024)  # Instead of 4096

Poor Policy Performance#

  • Increase training iterations

  • Tune reward weights

  • Add curriculum learning for complex tasks

  • Check that observations are properly scaled

Next Steps#

Happy training! 🚀