Thanks to visit codestin.com
Credit goes to github.com

Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 28 additions & 0 deletions examples/isaac/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
## Installation

### 1. Simulator

Download [Nvidia Isaac Sim](https://developer.nvidia.com/isaac-sim) and install it.

Note 1: If you download the container version of Nvidia Isaac Sim running in the cloud, simulation interface can't be visualized.

Note 2: Latest version Isaac Sim 2022.2.1 provides a built-in Python 3.7 environment that packages can use, similar to a system-level Python install. We recommend using this Python environment when running the Python scripts.

### 2. RL tasks
Install [Omniverse Isaac Gym Reinforcement Learning Environments for Isaac Sim](https://github.com/NVIDIA-Omniverse/OmniIsaacGymEnvs). This repository contains Reinforcement Learning tasks that can be run with the latest release of Isaac Sim.

Please make sure you follow the above [repo](https://github.com/NVIDIA-Omniverse/OmniIsaacGymEnvs) and successfully run the training script:

``` shell
PYTHON_PATH scripts/rlgames_train.py task=Ant headless=True
```

## Usage

`cfg` folder provides Cartpole task configs in Isaac Sim following [Omniverse Isaac Gym Reinforcement Learning Environments for Isaac Sim](https://github.com/NVIDIA-Omniverse/OmniIsaacGymEnvs).

You can use `OpenRL` to train Isaac Sim Cartpole via:

``` shell
PYTHON_PATH train_ppo.py
```
64 changes: 64 additions & 0 deletions examples/isaac/cfg/config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@

# Task name - used to pick the class to load
task_name: ${task.name}
# experiment name. defaults to name of training config
experiment: ''

# if set to positive integer, overrides the default number of environments
num_envs: ''

# seed - set to -1 to choose random seed
seed: 42
# set to True for deterministic performance
torch_deterministic: False

# set the maximum number of learning iterations to train for. overrides default per-environment setting
max_iterations: ''

## Device config
physics_engine: 'physx'
# whether to use cpu or gpu pipeline
pipeline: 'gpu'
# whether to use cpu or gpu physx
sim_device: 'gpu'
# used for gpu simulation only - device id for running sim and task if pipeline=gpu
device_id: 0
# device to run RL
rl_device: 'cuda:0'
# multi-GPU training
multi_gpu: False

## PhysX arguments
num_threads: 4 # Number of worker threads per scene used by PhysX - for CPU PhysX only.
solver_type: 1 # 0: pgs, 1: tgs

# RLGames Arguments
# test - if set, run policy in inference mode (requires setting checkpoint to load)
test: False
# used to set checkpoint path
checkpoint: ''

# disables rendering
headless: False
# enables native livestream
enable_livestream: False
# timeout for MT script
mt_timeout: 30

wandb_activate: False
wandb_group: ''
wandb_name: Cartpole # default name
wandb_entity: ''
wandb_project: 'omniisaacgymenvs'

# set default task and default training config based on task
defaults:
- task: Cartpole
- hydra/job_logging: disabled

# set the directory where the output files get saved
hydra:
output_subdir: null
run:
dir: .

78 changes: 78 additions & 0 deletions examples/isaac/cfg/task/Cartpole.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
# used to create the object
name: Cartpole

physics_engine: ${..physics_engine}

# if given, will override the device setting in gym.
env:
numEnvs: ${resolve_default:512,${...num_envs}}
envSpacing: 4.0
resetDist: 3.0
maxEffort: 400.0

clipObservations: 5.0
clipActions: 1.0
controlFrequencyInv: 2 # 60 Hz

sim:
dt: 0.0083 # 1/120 s
use_gpu_pipeline: ${eq:${...pipeline},"gpu"}
gravity: [0.0, 0.0, -9.81]
add_ground_plane: True
add_distant_light: True
use_flatcache: True
enable_scene_query_support: False
disable_contact_processing: False

# set to True if you use camera sensors in the environment
enable_cameras: False

default_physics_material:
static_friction: 1.0
dynamic_friction: 1.0
restitution: 0.0

physx:
worker_thread_count: ${....num_threads}
solver_type: ${....solver_type}
use_gpu: ${eq:${....sim_device},"gpu"} # set to False to run on CPU
solver_position_iteration_count: 4
solver_velocity_iteration_count: 0
contact_offset: 0.02
rest_offset: 0.001
bounce_threshold_velocity: 0.2
friction_offset_threshold: 0.04
friction_correlation_distance: 0.025
enable_sleeping: True
enable_stabilization: True
max_depenetration_velocity: 100.0

# GPU buffers
gpu_max_rigid_contact_count: 524288
gpu_max_rigid_patch_count: 81920
gpu_found_lost_pairs_capacity: 1024
gpu_found_lost_aggregate_pairs_capacity: 262144
gpu_total_aggregate_pairs_capacity: 1024
gpu_max_soft_body_contacts: 1048576
gpu_max_particle_contacts: 1048576
gpu_heap_capacity: 67108864
gpu_temp_buffer_capacity: 16777216
gpu_max_num_partitions: 8

Cartpole:
# -1 to use default values
override_usd_defaults: False
enable_self_collisions: False
enable_gyroscopic_forces: True
# also in stage params
# per-actor
solver_position_iteration_count: 4
solver_velocity_iteration_count: 0
sleep_threshold: 0.005
stabilization_threshold: 0.001
# per-body
density: -1
max_depenetration_velocity: 100.0
# per-shape
contact_offset: 0.02
rest_offset: 0.001
166 changes: 166 additions & 0 deletions examples/isaac/train_ppo.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,166 @@
""""""
import numpy as np

from openrl.configs.config import create_config_parser
from openrl.envs.common import make
from openrl.envs.vec_env import BaseVecEnv
from openrl.modules.common import PPONet as Net
from openrl.runners.common import PPOAgent as Agent

from omniisaacgymenvs.utils.hydra_cfg.hydra_utils import *
from omniisaacgymenvs.utils.hydra_cfg.reformat import omegaconf_to_dict, print_dict
# from omniisaacgymenvs.utils.rlgames.rlgames_utils import RLGPUAlgoObserver, RLGPUEnv
from omniisaacgymenvs.utils.task_util import initialize_task
# from omniisaacgymenvs.utils.config_utils.path_utils import retrieve_checkpoint_path

from omniisaacgymenvs.envs.vec_env_rlgames import VecEnvRLGames

import hydra
from omegaconf import DictConfig

# from rl_games.common import env_configurations, vecenv
# from rl_games.torch_runner import Runner

import datetime
import os
import torch
import pdb

from typing import (
Any,
Dict,
List,
Optional,
Sequence,
SupportsFloat,
Tuple,
Type,
TypeVar,
Union,
)

from gymnasium import spaces
from gymnasium.core import ActType, ObsType, WrapperActType, WrapperObsType
from gymnasium.utils import seeding

class Isaac2OpenRLWrapper:
def __init__(self, env:VecEnvRLGames) -> BaseVecEnv:
self.env = env

@property
def parallel_env_num(self) -> int:
return self.env.num_envs

@property
def action_space(
self,
) -> Union[spaces.Space[ActType], spaces.Space[WrapperActType]]:
"""Return the :attr:`Env` :attr:`action_space` unless overwritten then the wrapper :attr:`action_space` is used."""
return self.env.action_space

@property
def observation_space(
self,
) -> Union[spaces.Space[ObsType], spaces.Space[WrapperObsType]]:
"""Return the :attr:`Env` :attr:`observation_space` unless overwritten then the wrapper :attr:`observation_space` is used."""
return self.env.observation_space

def reset(self, **kwargs):
"""Reset all environments."""
obs_dict = self.env.reset()
return obs_dict['obs'].unsqueeze(1).cpu().numpy()

def step(self, actions, extra_data: Optional[Dict[str, Any]] = None):
"""Step all environments."""
# pdb.set_trace()
actions = torch.from_numpy(actions).squeeze(-1)

obs_dict, self._rew, self._resets, self._extras = self.env.step(actions)

obs = obs_dict['obs'].unsqueeze(1).cpu().numpy()
rewards = self._rew.unsqueeze(-1).unsqueeze(-1).cpu().numpy()
dones = self._resets.unsqueeze(-1).cpu().numpy().astype(bool)

infos = []
for i in range(dones.shape[0]):
infos.append({})

return obs, rewards, dones, infos

def close(self, **kwargs):
return self.env.close()

@property
def agent_num(self):
return 1

@property
def use_monitor(self):
return False

@property
def env_name(self):
return 'Isaac-'+self.env._task.name

def batch_rewards(self, buffer):
return {}


@hydra.main(config_name="config", config_path="cfg")
def train_and_evaluate(cfg_isaac: DictConfig):
'''
cfg_isaac:
defined in the cfg/config.yaml following hydra framework to build isaac sim environment.
default task: CartPole
cfg:
defined in OpenRL framework to build the algorithm.
'''

cfg_parser = create_config_parser()
cfg = cfg_parser.parse_args()

# create environment
num_envs = 9 # set environment parallelism to 9
cfg_isaac.num_envs = num_envs
print(cfg_isaac)
cfg_dict = omegaconf_to_dict(cfg_isaac)
print_dict(cfg_dict)
headless = True # headless must be True when using Isaac sim docker.
enable_viewport = "enable_cameras" in cfg_isaac.task.sim and cfg_isaac.task.sim.enable_cameras
isaac_env = VecEnvRLGames(headless=headless, sim_device=cfg_isaac.device_id, enable_livestream=cfg_isaac.enable_livestream, enable_viewport=enable_viewport)
task = initialize_task(cfg_dict, isaac_env)
env = Isaac2OpenRLWrapper(isaac_env)

net = Net(
env,
cfg=cfg,
)
# initialize the trainer
agent = Agent(net)
# start training, set total number of training steps to 20000
agent.train(total_time_steps=40000)


# begin to test
# The trained agent sets up the interactive environment it needs.
agent.set_env(env)
# Initialize the environment and get initial observations and environmental information.
obs = env.reset()
done = False
step = 0
total_re = 0.
while not np.any(done):
# Based on environmental observation input, predict next action.
action, _ = agent.act(obs, deterministic=True)
obs, r, done, info = env.step(action)
step += 1
if step % 50 == 0:
print(f"{step}: reward:{np.mean(r)}")
total_re+=np.mean(r)
print(f"Total reward:{total_re}")
env.close()


if __name__ == "__main__":
train_and_evaluate()