OpenRL-Lab · huangshiyu13 · Jul 21, 2023 · Jul 21, 2023
diff --git a/examples/isaac/README.md b/examples/isaac/README.md
@@ -0,0 +1,28 @@
+## Installation
+
+### 1. Simulator
+
+ Download [Nvidia Isaac Sim](https://developer.nvidia.com/isaac-sim) and install it.
+
+Note 1: If you download the container version of Nvidia Isaac Sim running in the cloud, simulation interface can't be visualized. 
+
+Note 2: Latest version Isaac Sim 2022.2.1 provides a built-in Python 3.7 environment that packages can use, similar to a system-level Python install. We recommend using this Python environment when running the Python scripts.
+
+### 2. RL tasks
+Install [Omniverse Isaac Gym Reinforcement Learning Environments for Isaac Sim](https://github.com/NVIDIA-Omniverse/OmniIsaacGymEnvs). This  repository contains Reinforcement Learning tasks that can be run with the latest release of Isaac Sim.
+
+Please make sure you follow the above [repo](https://github.com/NVIDIA-Omniverse/OmniIsaacGymEnvs) and successfully run the training script:
+
+``` shell
+PYTHON_PATH scripts/rlgames_train.py task=Ant headless=True
+```
+
+## Usage
+
+`cfg` folder provides Cartpole task configs in Isaac Sim following [Omniverse Isaac Gym Reinforcement Learning Environments for Isaac Sim](https://github.com/NVIDIA-Omniverse/OmniIsaacGymEnvs).
+
+You can use `OpenRL` to train Isaac Sim Cartpole via:
+
+``` shell
+PYTHON_PATH train_ppo.py
+```
diff --git a/examples/isaac/cfg/config.yaml b/examples/isaac/cfg/config.yaml
@@ -0,0 +1,64 @@
+
+# Task name - used to pick the class to load
+task_name: ${task.name}
+# experiment name. defaults to name of training config
+experiment: ''
+
+# if set to positive integer, overrides the default number of environments
+num_envs: ''
+
+# seed - set to -1 to choose random seed
+seed: 42
+# set to True for deterministic performance
+torch_deterministic: False
+
+# set the maximum number of learning iterations to train for. overrides default per-environment setting
+max_iterations: ''
+
+## Device config
+physics_engine: 'physx'
+# whether to use cpu or gpu pipeline
+pipeline: 'gpu'
+# whether to use cpu or gpu physx
+sim_device: 'gpu'
+# used for gpu simulation only - device id for running sim and task if pipeline=gpu
+device_id: 0
+# device to run RL
+rl_device: 'cuda:0'
+# multi-GPU training
+multi_gpu: False
+
+## PhysX arguments
+num_threads: 4 # Number of worker threads per scene used by PhysX - for CPU PhysX only.
+solver_type: 1 # 0: pgs, 1: tgs
+
+# RLGames Arguments
+# test - if set, run policy in inference mode (requires setting checkpoint to load)
+test: False
+# used to set checkpoint path
+checkpoint: ''
+
+# disables rendering
+headless: False
+# enables native livestream
+enable_livestream: False
+# timeout for MT script
+mt_timeout: 30
+
+wandb_activate: False
+wandb_group: ''
+wandb_name: Cartpole # default name
+wandb_entity: ''
+wandb_project: 'omniisaacgymenvs'
+
+# set default task and default training config based on task
+defaults:
+  - task: Cartpole
+  - hydra/job_logging: disabled
+
+# set the directory where the output files get saved
+hydra:
+  output_subdir: null
+  run:
+    dir: .
+
diff --git a/examples/isaac/cfg/task/Cartpole.yaml b/examples/isaac/cfg/task/Cartpole.yaml
@@ -0,0 +1,78 @@
+# used to create the object
+name: Cartpole
+
+physics_engine: ${..physics_engine}
+
+# if given, will override the device setting in gym. 
+env:
+  numEnvs: ${resolve_default:512,${...num_envs}}
+  envSpacing: 4.0
+  resetDist: 3.0
+  maxEffort: 400.0
+
+  clipObservations: 5.0
+  clipActions: 1.0
+  controlFrequencyInv: 2 # 60 Hz
+
+sim:
+  dt: 0.0083 # 1/120 s
+  use_gpu_pipeline: ${eq:${...pipeline},"gpu"}
+  gravity: [0.0, 0.0, -9.81]
+  add_ground_plane: True
+  add_distant_light: True
+  use_flatcache: True
+  enable_scene_query_support: False
+  disable_contact_processing: False
+
+  # set to True if you use camera sensors in the environment
+  enable_cameras: False
+
+  default_physics_material:
+    static_friction: 1.0
+    dynamic_friction: 1.0
+    restitution: 0.0
+
+  physx:
+    worker_thread_count: ${....num_threads}
+    solver_type: ${....solver_type}
+    use_gpu: ${eq:${....sim_device},"gpu"} # set to False to run on CPU
+    solver_position_iteration_count: 4
+    solver_velocity_iteration_count: 0
+    contact_offset: 0.02
+    rest_offset: 0.001
+    bounce_threshold_velocity: 0.2
+    friction_offset_threshold: 0.04
+    friction_correlation_distance: 0.025
+    enable_sleeping: True
+    enable_stabilization: True
+    max_depenetration_velocity: 100.0
+
+    # GPU buffers
+    gpu_max_rigid_contact_count: 524288
+    gpu_max_rigid_patch_count: 81920
+    gpu_found_lost_pairs_capacity: 1024
+    gpu_found_lost_aggregate_pairs_capacity: 262144
+    gpu_total_aggregate_pairs_capacity: 1024
+    gpu_max_soft_body_contacts: 1048576
+    gpu_max_particle_contacts: 1048576
+    gpu_heap_capacity: 67108864
+    gpu_temp_buffer_capacity: 16777216
+    gpu_max_num_partitions: 8
+
+  Cartpole:
+    # -1 to use default values
+    override_usd_defaults: False
+    enable_self_collisions: False
+    enable_gyroscopic_forces: True
+    # also in stage params
+    # per-actor
+    solver_position_iteration_count: 4
+    solver_velocity_iteration_count: 0
+    sleep_threshold: 0.005
+    stabilization_threshold: 0.001
+    # per-body
+    density: -1
+    max_depenetration_velocity: 100.0
+    # per-shape
+    contact_offset: 0.02
+    rest_offset: 0.001
diff --git a/examples/isaac/train_ppo.py b/examples/isaac/train_ppo.py
@@ -0,0 +1,166 @@
+""""""
+import numpy as np
+
+from openrl.configs.config import create_config_parser
+from openrl.envs.common import make
+from openrl.envs.vec_env import BaseVecEnv
+from openrl.modules.common import PPONet as Net
+from openrl.runners.common import PPOAgent as Agent
+
+from omniisaacgymenvs.utils.hydra_cfg.hydra_utils import *
+from omniisaacgymenvs.utils.hydra_cfg.reformat import omegaconf_to_dict, print_dict
+# from omniisaacgymenvs.utils.rlgames.rlgames_utils import RLGPUAlgoObserver, RLGPUEnv
+from omniisaacgymenvs.utils.task_util import initialize_task
+# from omniisaacgymenvs.utils.config_utils.path_utils import retrieve_checkpoint_path
+
+from omniisaacgymenvs.envs.vec_env_rlgames import VecEnvRLGames
+
+import hydra
+from omegaconf import DictConfig
+
+# from rl_games.common import env_configurations, vecenv
+# from rl_games.torch_runner import Runner
+
+import datetime
+import os
+import torch
+import pdb
+
+from typing import (
+    Any,
+    Dict,
+    List,
+    Optional,
+    Sequence,
+    SupportsFloat,
+    Tuple,
+    Type,
+    TypeVar,
+    Union,
+)
+
+from gymnasium import spaces
+from gymnasium.core import ActType, ObsType, WrapperActType, WrapperObsType
+from gymnasium.utils import seeding
+
+class Isaac2OpenRLWrapper:
+    def __init__(self, env:VecEnvRLGames) -> BaseVecEnv:
+        self.env = env
+
+    @property
+    def parallel_env_num(self) -> int:
+        return self.env.num_envs
+
+    @property
+    def action_space(
+        self,
+    ) -> Union[spaces.Space[ActType], spaces.Space[WrapperActType]]:
+        """Return the :attr:`Env` :attr:`action_space` unless overwritten then the wrapper :attr:`action_space` is used."""
+        return self.env.action_space
+
+    @property
+    def observation_space(
+        self,
+    ) -> Union[spaces.Space[ObsType], spaces.Space[WrapperObsType]]:
+        """Return the :attr:`Env` :attr:`observation_space` unless overwritten then the wrapper :attr:`observation_space` is used."""
+        return self.env.observation_space
+
+    def reset(self, **kwargs):
+        """Reset all environments."""
+        obs_dict = self.env.reset()
+        return obs_dict['obs'].unsqueeze(1).cpu().numpy()
+
+    def step(self, actions, extra_data: Optional[Dict[str, Any]] = None):
+        """Step all environments."""
+        # pdb.set_trace()
+        actions = torch.from_numpy(actions).squeeze(-1)
+
+        obs_dict, self._rew, self._resets, self._extras = self.env.step(actions)
+
+        obs = obs_dict['obs'].unsqueeze(1).cpu().numpy()
+        rewards = self._rew.unsqueeze(-1).unsqueeze(-1).cpu().numpy()
+        dones = self._resets.unsqueeze(-1).cpu().numpy().astype(bool)
+
+        infos = []
+        for i in range(dones.shape[0]):
+            infos.append({})
+
+        return obs, rewards, dones, infos
+
+    def close(self, **kwargs):
+        return self.env.close()
+
+    @property
+    def agent_num(self):
+        return 1
+
+    @property
+    def use_monitor(self):
+        return False
+
+    @property
+    def env_name(self):
+        return 'Isaac-'+self.env._task.name
+
+    def batch_rewards(self, buffer):
+        return {}
+
+
+@hydra.main(config_name="config", config_path="cfg")
+def train_and_evaluate(cfg_isaac: DictConfig):
+    '''
+    cfg_isaac:
+        defined in the cfg/config.yaml following hydra framework to build isaac sim environment.
+        default task: CartPole
+    cfg:
+        defined in OpenRL framework to build the algorithm.
+    '''
+
+    cfg_parser = create_config_parser()
+    cfg = cfg_parser.parse_args()
+
+    # create environment
+    num_envs = 9 # set environment parallelism to 9
+    cfg_isaac.num_envs = num_envs
+    print(cfg_isaac)
+    cfg_dict = omegaconf_to_dict(cfg_isaac)
+    print_dict(cfg_dict)    
+    headless = True # headless must be True when using Isaac sim docker.
+    enable_viewport = "enable_cameras" in cfg_isaac.task.sim and cfg_isaac.task.sim.enable_cameras
+    isaac_env = VecEnvRLGames(headless=headless, sim_device=cfg_isaac.device_id, enable_livestream=cfg_isaac.enable_livestream, enable_viewport=enable_viewport)
+    task = initialize_task(cfg_dict, isaac_env)
+    env = Isaac2OpenRLWrapper(isaac_env)
+
+    net = Net(
+        env,
+        cfg=cfg,
+    )
+    # initialize the trainer
+    agent = Agent(net)
+    # start training, set total number of training steps to 20000
+    agent.train(total_time_steps=40000)
+
+
+    # begin to test
+    # The trained agent sets up the interactive environment it needs.
+    agent.set_env(env)
+    # Initialize the environment and get initial observations and environmental information.
+    obs = env.reset()
+    done = False
+    step = 0
+    total_re = 0.
+    while not np.any(done):
+        # Based on environmental observation input, predict next action.
+        action, _ = agent.act(obs, deterministic=True)
+        obs, r, done, info = env.step(action)
+        step += 1
+        if step % 50 == 0:
+            print(f"{step}: reward:{np.mean(r)}")
+        total_re+=np.mean(r)
+    print(f"Total reward:{total_re}")
+    env.close()
+
+
+if __name__ == "__main__":
+    train_and_evaluate()
+