From fdb51042e088b9d9a17f4bf24f5facd6dc89e26b Mon Sep 17 00:00:00 2001 From: huangshiyu Date: Mon, 8 May 2023 17:38:06 +0800 Subject: [PATCH 1/5] add notebooks --- notebooks/openrl_demo.ipynb | 957 ++++++++++++++++++++++++++++++++++++ 1 file changed, 957 insertions(+) create mode 100644 notebooks/openrl_demo.ipynb diff --git a/notebooks/openrl_demo.ipynb b/notebooks/openrl_demo.ipynb new file mode 100644 index 00000000..3d258d0e --- /dev/null +++ b/notebooks/openrl_demo.ipynb @@ -0,0 +1,957 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "collapsed": true + }, + "source": [ + "# OpenRL Demo\n", + "\n", + "OpenRL is a unified reinforcement learnig framework. In this Colab, we will present serval basic usages of OpenRL." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Installation\n", + "\n", + "First of all, we should install `openrl` via pip:" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "scrolled": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Found existing installation: openrl 0.0.9\n", + "Uninstalling openrl-0.0.9:\n", + " Successfully uninstalled openrl-0.0.9\n", + "Looking in indexes: https://pypi.tuna.tsinghua.edu.cn/simple\n", + "Collecting git+https://github.com/huangshiyu13/openrl.git@dev\n", + " Cloning https://github.com/huangshiyu13/openrl.git (to revision dev) to /private/var/folders/1_/frxjhmqs1s71_h9pflvwgvk40000gn/T/pip-req-build-eumcq89n\n", + " Running command git clone --filter=blob:none --quiet https://github.com/huangshiyu13/openrl.git /private/var/folders/1_/frxjhmqs1s71_h9pflvwgvk40000gn/T/pip-req-build-eumcq89n\n", + " Running command git checkout -b dev --track origin/dev\n", + " Switched to a new branch 'dev'\n", + " branch 'dev' set up to track 'origin/dev'.\n", + " Resolved https://github.com/huangshiyu13/openrl.git to commit 38d196c14589820009087527068ad71b7a4aa334\n", + " Installing build dependencies ... \u001b[?25ldone\n", + "\u001b[?25h Getting requirements to build wheel ... \u001b[?25ldone\n", + "\u001b[?25h Preparing metadata (pyproject.toml) ... \u001b[?25ldone\n", + "\u001b[?25hRequirement already satisfied: setuptools>=67.0 in /Users/hsy17/ai3.9/lib/python3.9/site-packages (from openrl==0.0.9) (67.7.2)\n", + "Requirement already satisfied: gymnasium in /Users/hsy17/ai3.9/lib/python3.9/site-packages (from openrl==0.0.9) (0.28.1)\n", + "Requirement already satisfied: click in /Users/hsy17/ai3.9/lib/python3.9/site-packages (from openrl==0.0.9) (8.0.3)\n", + "Requirement already satisfied: termcolor in /Users/hsy17/ai3.9/lib/python3.9/site-packages (from openrl==0.0.9) (2.2.0)\n", + "Requirement already satisfied: gym in /Users/hsy17/ai3.9/lib/python3.9/site-packages (from openrl==0.0.9) (0.21.0)\n", + "Requirement already satisfied: torch in /Users/hsy17/ai3.9/lib/python3.9/site-packages (from openrl==0.0.9) (2.0.0)\n", + "Requirement already satisfied: treevalue in /Users/hsy17/ai3.9/lib/python3.9/site-packages (from openrl==0.0.9) (1.4.10)\n", + "Requirement already satisfied: rich in /Users/hsy17/ai3.9/lib/python3.9/site-packages (from openrl==0.0.9) (13.3.2)\n", + "Requirement already satisfied: wandb in /Users/hsy17/ai3.9/lib/python3.9/site-packages (from openrl==0.0.9) (0.14.0)\n", + "Requirement already satisfied: seaborn in /Users/hsy17/ai3.9/lib/python3.9/site-packages (from openrl==0.0.9) (0.12.2)\n", + "Requirement already satisfied: jsonargparse in /Users/hsy17/ai3.9/lib/python3.9/site-packages (from openrl==0.0.9) (4.20.1)\n", + "Requirement already satisfied: imageio in /Users/hsy17/ai3.9/lib/python3.9/site-packages (from openrl==0.0.9) (2.27.0)\n", + "Requirement already satisfied: opencv-python in /Users/hsy17/ai3.9/lib/python3.9/site-packages (from openrl==0.0.9) (4.7.0.72)\n", + "Requirement already satisfied: pygame in /Users/hsy17/ai3.9/lib/python3.9/site-packages (from openrl==0.0.9) (2.1.3.dev8)\n", + "Requirement already satisfied: numpy>=1.18.0 in /Users/hsy17/ai3.9/lib/python3.9/site-packages (from gym->openrl==0.0.9) (1.21.4)\n", + "Requirement already satisfied: cloudpickle>=1.2.0 in /Users/hsy17/ai3.9/lib/python3.9/site-packages (from gym->openrl==0.0.9) (2.2.1)\n", + "Requirement already satisfied: jax-jumpy>=1.0.0 in /Users/hsy17/ai3.9/lib/python3.9/site-packages (from gymnasium->openrl==0.0.9) (1.0.0)\n", + "Requirement already satisfied: typing-extensions>=4.3.0 in /Users/hsy17/ai3.9/lib/python3.9/site-packages (from gymnasium->openrl==0.0.9) (4.5.0)\n", + "Requirement already satisfied: farama-notifications>=0.0.1 in /Users/hsy17/ai3.9/lib/python3.9/site-packages (from gymnasium->openrl==0.0.9) (0.0.4)\n", + "Requirement already satisfied: importlib-metadata>=4.8.0 in /Users/hsy17/ai3.9/lib/python3.9/site-packages (from gymnasium->openrl==0.0.9) (6.0.1)\n", + "Requirement already satisfied: pillow>=8.3.2 in /Users/hsy17/ai3.9/lib/python3.9/site-packages (from imageio->openrl==0.0.9) (9.5.0)\n", + "Requirement already satisfied: PyYAML>=3.13 in /Users/hsy17/ai3.9/lib/python3.9/site-packages (from jsonargparse->openrl==0.0.9) (6.0)\n", + "Requirement already satisfied: markdown-it-py<3.0.0,>=2.2.0 in /Users/hsy17/ai3.9/lib/python3.9/site-packages (from rich->openrl==0.0.9) (2.2.0)\n", + "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /Users/hsy17/ai3.9/lib/python3.9/site-packages (from rich->openrl==0.0.9) (2.14.0)\n", + "Requirement already satisfied: pandas>=0.25 in /Users/hsy17/ai3.9/lib/python3.9/site-packages (from seaborn->openrl==0.0.9) (1.3.4)\n", + "Requirement already satisfied: matplotlib!=3.6.1,>=3.1 in /Users/hsy17/ai3.9/lib/python3.9/site-packages (from seaborn->openrl==0.0.9) (3.7.1)\n", + "Requirement already satisfied: filelock in /Users/hsy17/ai3.9/lib/python3.9/site-packages (from torch->openrl==0.0.9) (3.10.0)\n", + "Requirement already satisfied: sympy in /Users/hsy17/ai3.9/lib/python3.9/site-packages (from torch->openrl==0.0.9) (1.11.1)\n", + "Requirement already satisfied: networkx in /Users/hsy17/ai3.9/lib/python3.9/site-packages (from torch->openrl==0.0.9) (2.8.3)\n", + "Requirement already satisfied: jinja2 in /Users/hsy17/ai3.9/lib/python3.9/site-packages (from torch->openrl==0.0.9) (3.0.3)\n", + "Requirement already satisfied: enum-tools in /Users/hsy17/ai3.9/lib/python3.9/site-packages (from treevalue->openrl==0.0.9) (0.9.0.post1)\n", + "Requirement already satisfied: graphviz~=0.17 in /Users/hsy17/ai3.9/lib/python3.9/site-packages (from treevalue->openrl==0.0.9) (0.20.1)\n", + "Requirement already satisfied: dill~=0.3.4 in /Users/hsy17/ai3.9/lib/python3.9/site-packages (from treevalue->openrl==0.0.9) (0.3.6)\n", + "Requirement already satisfied: hbutils>=0.0.1 in /Users/hsy17/ai3.9/lib/python3.9/site-packages (from treevalue->openrl==0.0.9) (0.8.2)\n", + "Requirement already satisfied: GitPython!=3.1.29,>=1.0.0 in /Users/hsy17/ai3.9/lib/python3.9/site-packages (from wandb->openrl==0.0.9) (3.1.31)\n", + "Requirement already satisfied: requests<3,>=2.0.0 in /Users/hsy17/ai3.9/lib/python3.9/site-packages (from wandb->openrl==0.0.9) (2.28.2)\n", + "Requirement already satisfied: psutil>=5.0.0 in /Users/hsy17/ai3.9/lib/python3.9/site-packages (from wandb->openrl==0.0.9) (5.9.4)\n", + "Requirement already satisfied: sentry-sdk>=1.0.0 in /Users/hsy17/ai3.9/lib/python3.9/site-packages (from wandb->openrl==0.0.9) (1.17.0)\n", + "Requirement already satisfied: docker-pycreds>=0.4.0 in /Users/hsy17/ai3.9/lib/python3.9/site-packages (from wandb->openrl==0.0.9) (0.4.0)\n", + "Requirement already satisfied: pathtools in /Users/hsy17/ai3.9/lib/python3.9/site-packages (from wandb->openrl==0.0.9) (0.1.2)\n", + "Requirement already satisfied: setproctitle in /Users/hsy17/ai3.9/lib/python3.9/site-packages (from wandb->openrl==0.0.9) (1.3.2)\n", + "Requirement already satisfied: appdirs>=1.4.3 in /Users/hsy17/ai3.9/lib/python3.9/site-packages (from wandb->openrl==0.0.9) (1.4.4)\n", + "Requirement already satisfied: protobuf!=4.21.0,<5,>=3.19.0 in /Users/hsy17/ai3.9/lib/python3.9/site-packages (from wandb->openrl==0.0.9) (3.20.3)\n", + "Requirement already satisfied: six>=1.4.0 in /Users/hsy17/ai3.9/lib/python3.9/site-packages (from docker-pycreds>=0.4.0->wandb->openrl==0.0.9) (1.16.0)\n", + "Requirement already satisfied: gitdb<5,>=4.0.1 in /Users/hsy17/ai3.9/lib/python3.9/site-packages (from GitPython!=3.1.29,>=1.0.0->wandb->openrl==0.0.9) (4.0.10)\n", + "Requirement already satisfied: packaging>=21.3 in /Users/hsy17/ai3.9/lib/python3.9/site-packages (from hbutils>=0.0.1->treevalue->openrl==0.0.9) (23.0)\n", + "Requirement already satisfied: pytimeparse>=1.1.8 in /Users/hsy17/ai3.9/lib/python3.9/site-packages (from hbutils>=0.0.1->treevalue->openrl==0.0.9) (1.1.8)\n", + "Requirement already satisfied: bitmath>=1.3.3.1 in /Users/hsy17/ai3.9/lib/python3.9/site-packages (from hbutils>=0.0.1->treevalue->openrl==0.0.9) (1.3.3.1)\n", + "Requirement already satisfied: chardet<5,>=3.0.4 in /Users/hsy17/ai3.9/lib/python3.9/site-packages (from hbutils>=0.0.1->treevalue->openrl==0.0.9) (3.0.4)\n", + "Requirement already satisfied: zipp>=0.5 in /Users/hsy17/ai3.9/lib/python3.9/site-packages (from importlib-metadata>=4.8.0->gymnasium->openrl==0.0.9) (3.6.0)\n", + "Requirement already satisfied: mdurl~=0.1 in /Users/hsy17/ai3.9/lib/python3.9/site-packages (from markdown-it-py<3.0.0,>=2.2.0->rich->openrl==0.0.9) (0.1.2)\n", + "Requirement already satisfied: contourpy>=1.0.1 in /Users/hsy17/ai3.9/lib/python3.9/site-packages (from matplotlib!=3.6.1,>=3.1->seaborn->openrl==0.0.9) (1.0.7)\n", + "Requirement already satisfied: cycler>=0.10 in /Users/hsy17/ai3.9/lib/python3.9/site-packages (from matplotlib!=3.6.1,>=3.1->seaborn->openrl==0.0.9) (0.11.0)\n", + "Requirement already satisfied: fonttools>=4.22.0 in /Users/hsy17/ai3.9/lib/python3.9/site-packages (from matplotlib!=3.6.1,>=3.1->seaborn->openrl==0.0.9) (4.39.2)\n", + "Requirement already satisfied: kiwisolver>=1.0.1 in /Users/hsy17/ai3.9/lib/python3.9/site-packages (from matplotlib!=3.6.1,>=3.1->seaborn->openrl==0.0.9) (1.4.4)\n", + "Requirement already satisfied: pyparsing>=2.3.1 in /Users/hsy17/ai3.9/lib/python3.9/site-packages (from matplotlib!=3.6.1,>=3.1->seaborn->openrl==0.0.9) (3.0.9)\n", + "Requirement already satisfied: python-dateutil>=2.7 in /Users/hsy17/ai3.9/lib/python3.9/site-packages (from matplotlib!=3.6.1,>=3.1->seaborn->openrl==0.0.9) (2.8.2)\n", + "Requirement already satisfied: importlib-resources>=3.2.0 in /Users/hsy17/ai3.9/lib/python3.9/site-packages (from matplotlib!=3.6.1,>=3.1->seaborn->openrl==0.0.9) (5.12.0)\n", + "Requirement already satisfied: pytz>=2017.3 in /Users/hsy17/ai3.9/lib/python3.9/site-packages (from pandas>=0.25->seaborn->openrl==0.0.9) (2021.3)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /Users/hsy17/ai3.9/lib/python3.9/site-packages (from requests<3,>=2.0.0->wandb->openrl==0.0.9) (3.1.0)\n", + "Requirement already satisfied: idna<4,>=2.5 in /Users/hsy17/ai3.9/lib/python3.9/site-packages (from requests<3,>=2.0.0->wandb->openrl==0.0.9) (3.4)\n", + "Requirement already satisfied: urllib3<1.27,>=1.21.1 in /Users/hsy17/ai3.9/lib/python3.9/site-packages (from requests<3,>=2.0.0->wandb->openrl==0.0.9) (1.26.15)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /Users/hsy17/ai3.9/lib/python3.9/site-packages (from requests<3,>=2.0.0->wandb->openrl==0.0.9) (2022.12.7)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Requirement already satisfied: MarkupSafe>=2.0 in /Users/hsy17/ai3.9/lib/python3.9/site-packages (from jinja2->torch->openrl==0.0.9) (2.0.1)\n", + "Requirement already satisfied: mpmath>=0.19 in /Users/hsy17/ai3.9/lib/python3.9/site-packages (from sympy->torch->openrl==0.0.9) (1.3.0)\n", + "Requirement already satisfied: smmap<6,>=3.0.1 in /Users/hsy17/ai3.9/lib/python3.9/site-packages (from gitdb<5,>=4.0.1->GitPython!=3.1.29,>=1.0.0->wandb->openrl==0.0.9) (5.0.0)\n", + "Building wheels for collected packages: openrl\n", + " Building wheel for openrl (pyproject.toml) ... \u001b[?25ldone\n", + "\u001b[?25h Created wheel for openrl: filename=openrl-0.0.9-py3-none-any.whl size=187884 sha256=449bf905783ad09a41bc885950ec52f027961bb2f03a4c64627fd32106fc6f6b\n", + " Stored in directory: /private/var/folders/1_/frxjhmqs1s71_h9pflvwgvk40000gn/T/pip-ephem-wheel-cache-oejmsslv/wheels/55/a5/38/eda450adaafeb499c5245882add17afd27d04548c38e5f8736\n", + "Successfully built openrl\n", + "Installing collected packages: openrl\n", + "Successfully installed openrl-0.0.9\n" + ] + } + ], + "source": [ + "!pip uninstall openrl -y\n", + "# !pip install git+https://github.com/OpenRL-Lab/openrl.git\n", + "!pip install git+https://github.com/huangshiyu13/openrl.git@dev" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "After the installation, we can check the version of `openrl` via:" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "OPENRL version: \u001b[31mv0.0.9\u001b[0m\u001b[0m\r\n", + "Developed by OpenRL Contributors, Email: \u001b[31mhuangshiyu@4paradigm.com\u001b[0m\u001b[0m\r\n" + ] + } + ], + "source": [ + "!openrl --version" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Users can also get the system infomation via:" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "- OS: \u001b[31mmacOS-13.3.1-arm64-arm-64bit Darwin Kernel Version 22.4.0: Mon Mar 6 21:00:41 PST 2023; root:xnu-8796.101.5~3/RELEASE_ARM64_T8103\u001b[0m\u001b[0m\r\n", + "- Python: \u001b[31m3.9.16\u001b[0m\u001b[0m\r\n", + "- OpenRL: \u001b[31mv0.0.9\u001b[0m\u001b[0m\r\n", + "- PyTorch: \u001b[31m2.0.0\u001b[0m\u001b[0m\r\n", + "- GPU Enabled: \u001b[31mFalse\u001b[0m\u001b[0m\r\n", + "- Numpy: \u001b[31m1.21.4\u001b[0m\u001b[0m\r\n", + "- Gymnasium: \u001b[31m0.28.1\u001b[0m\u001b[0m\r\n" + ] + } + ], + "source": [ + "!openrl --system_info" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Training\n", + "\n", + "Users can train CartPole agent with `openrl` with a few lines:" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/hsy17/ai3.9/lib/python3.9/site-packages/gymnasium/wrappers/step_api_compatibility.py:40: DeprecationWarning: \u001b[33mWARN: Initializing environment in (old) done step API which returns one bool instead of two.\u001b[0m\n", + " deprecation(\n" + ] + }, + { + "data": { + "text/html": [ + "
[05/08/23 17:31:01] INFO     2023-05-08 17:31:01,752 [INFO] Episode: 0/11                             logger.py:150\n",
+       "
\n" + ], + "text/plain": [ + "\u001b[2;36m[05/08/23 17:31:01]\u001b[0m\u001b[2;36m \u001b[0m\u001b[34mINFO \u001b[0m \u001b[1;36m2023\u001b[0m-\u001b[1;36m05\u001b[0m-\u001b[1;36m08\u001b[0m \u001b[1;92m17:31:01\u001b[0m,\u001b[1;36m752\u001b[0m \u001b[1m[\u001b[0mINFO\u001b[1m]\u001b[0m Episode: \u001b[1;36m0\u001b[0m/\u001b[1;36m11\u001b[0m \u001b]8;id=271493;file:///Users/hsy17/ai3.9/lib/python3.9/site-packages/openrl/utils/logger.py\u001b\\\u001b[2mlogger.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=536110;file:///Users/hsy17/ai3.9/lib/python3.9/site-packages/openrl/utils/logger.py#150\u001b\\\u001b[2m150\u001b[0m\u001b]8;;\u001b\\\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
[05/08/23 17:31:02] INFO     2023-05-08 17:31:02,055 [INFO] Episode: 1/11                             logger.py:150\n",
+       "
\n" + ], + "text/plain": [ + "\u001b[2;36m[05/08/23 17:31:02]\u001b[0m\u001b[2;36m \u001b[0m\u001b[34mINFO \u001b[0m \u001b[1;36m2023\u001b[0m-\u001b[1;36m05\u001b[0m-\u001b[1;36m08\u001b[0m \u001b[1;92m17:31:02\u001b[0m,\u001b[1;36m055\u001b[0m \u001b[1m[\u001b[0mINFO\u001b[1m]\u001b[0m Episode: \u001b[1;36m1\u001b[0m/\u001b[1;36m11\u001b[0m \u001b]8;id=870163;file:///Users/hsy17/ai3.9/lib/python3.9/site-packages/openrl/utils/logger.py\u001b\\\u001b[2mlogger.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=318046;file:///Users/hsy17/ai3.9/lib/python3.9/site-packages/openrl/utils/logger.py#150\u001b\\\u001b[2m150\u001b[0m\u001b]8;;\u001b\\\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
                    INFO     2023-05-08 17:31:02,246 [INFO] Episode: 2/11                             logger.py:150\n",
+       "
\n" + ], + "text/plain": [ + "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[34mINFO \u001b[0m \u001b[1;36m2023\u001b[0m-\u001b[1;36m05\u001b[0m-\u001b[1;36m08\u001b[0m \u001b[1;92m17:31:02\u001b[0m,\u001b[1;36m246\u001b[0m \u001b[1m[\u001b[0mINFO\u001b[1m]\u001b[0m Episode: \u001b[1;36m2\u001b[0m/\u001b[1;36m11\u001b[0m \u001b]8;id=952225;file:///Users/hsy17/ai3.9/lib/python3.9/site-packages/openrl/utils/logger.py\u001b\\\u001b[2mlogger.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=229053;file:///Users/hsy17/ai3.9/lib/python3.9/site-packages/openrl/utils/logger.py#150\u001b\\\u001b[2m150\u001b[0m\u001b]8;;\u001b\\\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
                    INFO     2023-05-08 17:31:02,437 [INFO] Episode: 3/11                             logger.py:150\n",
+       "
\n" + ], + "text/plain": [ + "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[34mINFO \u001b[0m \u001b[1;36m2023\u001b[0m-\u001b[1;36m05\u001b[0m-\u001b[1;36m08\u001b[0m \u001b[1;92m17:31:02\u001b[0m,\u001b[1;36m437\u001b[0m \u001b[1m[\u001b[0mINFO\u001b[1m]\u001b[0m Episode: \u001b[1;36m3\u001b[0m/\u001b[1;36m11\u001b[0m \u001b]8;id=792518;file:///Users/hsy17/ai3.9/lib/python3.9/site-packages/openrl/utils/logger.py\u001b\\\u001b[2mlogger.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=99437;file:///Users/hsy17/ai3.9/lib/python3.9/site-packages/openrl/utils/logger.py#150\u001b\\\u001b[2m150\u001b[0m\u001b]8;;\u001b\\\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
                    INFO     2023-05-08 17:31:02,636 [INFO] Episode: 4/11                             logger.py:150\n",
+       "
\n" + ], + "text/plain": [ + "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[34mINFO \u001b[0m \u001b[1;36m2023\u001b[0m-\u001b[1;36m05\u001b[0m-\u001b[1;36m08\u001b[0m \u001b[1;92m17:31:02\u001b[0m,\u001b[1;36m636\u001b[0m \u001b[1m[\u001b[0mINFO\u001b[1m]\u001b[0m Episode: \u001b[1;36m4\u001b[0m/\u001b[1;36m11\u001b[0m \u001b]8;id=558433;file:///Users/hsy17/ai3.9/lib/python3.9/site-packages/openrl/utils/logger.py\u001b\\\u001b[2mlogger.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=739426;file:///Users/hsy17/ai3.9/lib/python3.9/site-packages/openrl/utils/logger.py#150\u001b\\\u001b[2m150\u001b[0m\u001b]8;;\u001b\\\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
                    INFO     2023-05-08 17:31:02,832 [INFO] Episode: 5/11                             logger.py:150\n",
+       "
\n" + ], + "text/plain": [ + "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[34mINFO \u001b[0m \u001b[1;36m2023\u001b[0m-\u001b[1;36m05\u001b[0m-\u001b[1;36m08\u001b[0m \u001b[1;92m17:31:02\u001b[0m,\u001b[1;36m832\u001b[0m \u001b[1m[\u001b[0mINFO\u001b[1m]\u001b[0m Episode: \u001b[1;36m5\u001b[0m/\u001b[1;36m11\u001b[0m \u001b]8;id=325213;file:///Users/hsy17/ai3.9/lib/python3.9/site-packages/openrl/utils/logger.py\u001b\\\u001b[2mlogger.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=103560;file:///Users/hsy17/ai3.9/lib/python3.9/site-packages/openrl/utils/logger.py#150\u001b\\\u001b[2m150\u001b[0m\u001b]8;;\u001b\\\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
[05/08/23 17:31:03] INFO     2023-05-08 17:31:03,024 [INFO] Episode: 6/11                             logger.py:150\n",
+       "
\n" + ], + "text/plain": [ + "\u001b[2;36m[05/08/23 17:31:03]\u001b[0m\u001b[2;36m \u001b[0m\u001b[34mINFO \u001b[0m \u001b[1;36m2023\u001b[0m-\u001b[1;36m05\u001b[0m-\u001b[1;36m08\u001b[0m \u001b[1;92m17:31:03\u001b[0m,\u001b[1;36m024\u001b[0m \u001b[1m[\u001b[0mINFO\u001b[1m]\u001b[0m Episode: \u001b[1;36m6\u001b[0m/\u001b[1;36m11\u001b[0m \u001b]8;id=717209;file:///Users/hsy17/ai3.9/lib/python3.9/site-packages/openrl/utils/logger.py\u001b\\\u001b[2mlogger.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=346236;file:///Users/hsy17/ai3.9/lib/python3.9/site-packages/openrl/utils/logger.py#150\u001b\\\u001b[2m150\u001b[0m\u001b]8;;\u001b\\\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
                    INFO     2023-05-08 17:31:03,214 [INFO] Episode: 7/11                             logger.py:150\n",
+       "
\n" + ], + "text/plain": [ + "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[34mINFO \u001b[0m \u001b[1;36m2023\u001b[0m-\u001b[1;36m05\u001b[0m-\u001b[1;36m08\u001b[0m \u001b[1;92m17:31:03\u001b[0m,\u001b[1;36m214\u001b[0m \u001b[1m[\u001b[0mINFO\u001b[1m]\u001b[0m Episode: \u001b[1;36m7\u001b[0m/\u001b[1;36m11\u001b[0m \u001b]8;id=455262;file:///Users/hsy17/ai3.9/lib/python3.9/site-packages/openrl/utils/logger.py\u001b\\\u001b[2mlogger.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=331556;file:///Users/hsy17/ai3.9/lib/python3.9/site-packages/openrl/utils/logger.py#150\u001b\\\u001b[2m150\u001b[0m\u001b]8;;\u001b\\\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
                    INFO     2023-05-08 17:31:03,412 [INFO] Episode: 8/11                             logger.py:150\n",
+       "
\n" + ], + "text/plain": [ + "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[34mINFO \u001b[0m \u001b[1;36m2023\u001b[0m-\u001b[1;36m05\u001b[0m-\u001b[1;36m08\u001b[0m \u001b[1;92m17:31:03\u001b[0m,\u001b[1;36m412\u001b[0m \u001b[1m[\u001b[0mINFO\u001b[1m]\u001b[0m Episode: \u001b[1;36m8\u001b[0m/\u001b[1;36m11\u001b[0m \u001b]8;id=579363;file:///Users/hsy17/ai3.9/lib/python3.9/site-packages/openrl/utils/logger.py\u001b\\\u001b[2mlogger.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=500181;file:///Users/hsy17/ai3.9/lib/python3.9/site-packages/openrl/utils/logger.py#150\u001b\\\u001b[2m150\u001b[0m\u001b]8;;\u001b\\\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
                    INFO     2023-05-08 17:31:03,686 [INFO] Episode: 9/11                             logger.py:150\n",
+       "
\n" + ], + "text/plain": [ + "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[34mINFO \u001b[0m \u001b[1;36m2023\u001b[0m-\u001b[1;36m05\u001b[0m-\u001b[1;36m08\u001b[0m \u001b[1;92m17:31:03\u001b[0m,\u001b[1;36m686\u001b[0m \u001b[1m[\u001b[0mINFO\u001b[1m]\u001b[0m Episode: \u001b[1;36m9\u001b[0m/\u001b[1;36m11\u001b[0m \u001b]8;id=65304;file:///Users/hsy17/ai3.9/lib/python3.9/site-packages/openrl/utils/logger.py\u001b\\\u001b[2mlogger.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=844132;file:///Users/hsy17/ai3.9/lib/python3.9/site-packages/openrl/utils/logger.py#150\u001b\\\u001b[2m150\u001b[0m\u001b]8;;\u001b\\\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
                    INFO     2023-05-08 17:31:03,878 [INFO] Episode: 10/11                            logger.py:150\n",
+       "
\n" + ], + "text/plain": [ + "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[34mINFO \u001b[0m \u001b[1;36m2023\u001b[0m-\u001b[1;36m05\u001b[0m-\u001b[1;36m08\u001b[0m \u001b[1;92m17:31:03\u001b[0m,\u001b[1;36m878\u001b[0m \u001b[1m[\u001b[0mINFO\u001b[1m]\u001b[0m Episode: \u001b[1;36m10\u001b[0m/\u001b[1;36m11\u001b[0m \u001b]8;id=97802;file:///Users/hsy17/ai3.9/lib/python3.9/site-packages/openrl/utils/logger.py\u001b\\\u001b[2mlogger.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=754665;file:///Users/hsy17/ai3.9/lib/python3.9/site-packages/openrl/utils/logger.py#150\u001b\\\u001b[2m150\u001b[0m\u001b]8;;\u001b\\\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "import numpy as np\n", + "from openrl.envs.common import make\n", + "from openrl.modules.common import PPONet as Net\n", + "from openrl.runners.common import PPOAgent as Agent\n", + "# create environment\n", + "env = make(\"CartPole-v1\", env_num=9)\n", + "# create network\n", + "net = Net(env)\n", + "# create training agent\n", + "agent = Agent(net)\n", + "# begin to train!\n", + "agent.train(total_time_steps=20000)\n", + "env.close()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Test\n", + "\n", + "After the training, users can test the agent with:" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/hsy17/ai3.9/lib/python3.9/site-packages/gymnasium/wrappers/step_api_compatibility.py:40: DeprecationWarning: \u001b[33mWARN: Initializing environment in (old) done step API which returns one bool instead of two.\u001b[0m\n", + " deprecation(\n", + "/Users/hsy17/ai3.9/lib/python3.9/site-packages/gymnasium/wrappers/step_api_compatibility.py:40: DeprecationWarning: \u001b[33mWARN: Initializing environment in (old) done step API which returns one bool instead of two.\u001b[0m\n", + " deprecation(\n", + "/Users/hsy17/ai3.9/lib/python3.9/site-packages/gymnasium/wrappers/step_api_compatibility.py:40: DeprecationWarning: \u001b[33mWARN: Initializing environment in (old) done step API which returns one bool instead of two.\u001b[0m\n", + " deprecation(\n", + "/Users/hsy17/ai3.9/lib/python3.9/site-packages/gymnasium/wrappers/step_api_compatibility.py:40: DeprecationWarning: \u001b[33mWARN: Initializing environment in (old) done step API which returns one bool instead of two.\u001b[0m\n", + " deprecation(\n", + "/Users/hsy17/ai3.9/lib/python3.9/site-packages/gymnasium/wrappers/step_api_compatibility.py:40: DeprecationWarning: \u001b[33mWARN: Initializing environment in (old) done step API which returns one bool instead of two.\u001b[0m\n", + " deprecation(\n", + "/Users/hsy17/ai3.9/lib/python3.9/site-packages/gymnasium/wrappers/step_api_compatibility.py:40: DeprecationWarning: \u001b[33mWARN: Initializing environment in (old) done step API which returns one bool instead of two.\u001b[0m\n", + " deprecation(\n", + "/Users/hsy17/ai3.9/lib/python3.9/site-packages/gymnasium/wrappers/step_api_compatibility.py:40: DeprecationWarning: \u001b[33mWARN: Initializing environment in (old) done step API which returns one bool instead of two.\u001b[0m\n", + " deprecation(\n", + "/Users/hsy17/ai3.9/lib/python3.9/site-packages/gymnasium/wrappers/step_api_compatibility.py:40: DeprecationWarning: \u001b[33mWARN: Initializing environment in (old) done step API which returns one bool instead of two.\u001b[0m\n", + " deprecation(\n", + "/Users/hsy17/ai3.9/lib/python3.9/site-packages/gymnasium/wrappers/step_api_compatibility.py:40: DeprecationWarning: \u001b[33mWARN: Initializing environment in (old) done step API which returns one bool instead of two.\u001b[0m\n", + " deprecation(\n", + "2023-05-08 17:31:34.240 Python[2621:4087937] ApplePersistenceIgnoreState: Existing state will not be touched. New state will be written to /var/folders/1_/frxjhmqs1s71_h9pflvwgvk40000gn/T/org.python.python.savedState\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "1: reward:1.0\n", + "2: reward:1.0\n", + "3: reward:1.0\n", + "4: reward:1.0\n", + "5: reward:1.0\n", + "6: reward:1.0\n", + "7: reward:1.0\n", + "8: reward:1.0\n", + "9: reward:1.0\n", + "10: reward:1.0\n", + "11: reward:1.0\n", + "12: reward:1.0\n", + "13: reward:1.0\n", + "14: reward:1.0\n", + "15: reward:1.0\n", + "16: reward:1.0\n", + "17: reward:1.0\n", + "18: reward:1.0\n", + "19: reward:1.0\n", + "20: reward:1.0\n", + "21: reward:1.0\n", + "22: reward:1.0\n", + "23: reward:1.0\n", + "24: reward:1.0\n", + "25: reward:1.0\n", + "26: reward:1.0\n", + "27: reward:1.0\n", + "28: reward:1.0\n", + "29: reward:1.0\n", + "30: reward:1.0\n", + "31: reward:1.0\n", + "32: reward:1.0\n", + "33: reward:1.0\n", + "34: reward:1.0\n", + "35: reward:1.0\n", + "36: reward:1.0\n", + "37: reward:1.0\n", + "38: reward:1.0\n", + "39: reward:1.0\n", + "40: reward:1.0\n", + "41: reward:1.0\n", + "42: reward:1.0\n", + "43: reward:1.0\n", + "44: reward:1.0\n", + "45: reward:1.0\n", + "46: reward:1.0\n", + "47: reward:1.0\n", + "48: reward:1.0\n", + "49: reward:1.0\n", + "50: reward:1.0\n", + "51: reward:1.0\n", + "52: reward:1.0\n", + "53: reward:1.0\n", + "54: reward:1.0\n", + "55: reward:1.0\n", + "56: reward:1.0\n", + "57: reward:1.0\n", + "58: reward:1.0\n", + "59: reward:1.0\n", + "60: reward:1.0\n", + "61: reward:1.0\n", + "62: reward:1.0\n", + "63: reward:1.0\n", + "64: reward:1.0\n", + "65: reward:1.0\n", + "66: reward:1.0\n", + "67: reward:1.0\n", + "68: reward:1.0\n", + "69: reward:1.0\n", + "70: reward:1.0\n", + "71: reward:1.0\n", + "72: reward:1.0\n", + "73: reward:1.0\n", + "74: reward:1.0\n", + "75: reward:1.0\n", + "76: reward:1.0\n", + "77: reward:1.0\n", + "78: reward:1.0\n", + "79: reward:1.0\n", + "80: reward:1.0\n", + "81: reward:1.0\n", + "82: reward:1.0\n", + "83: reward:1.0\n", + "84: reward:1.0\n", + "85: reward:1.0\n", + "86: reward:1.0\n", + "87: reward:1.0\n", + "88: reward:1.0\n", + "89: reward:1.0\n", + "90: reward:1.0\n", + "91: reward:1.0\n", + "92: reward:1.0\n", + "93: reward:1.0\n", + "94: reward:1.0\n", + "95: reward:1.0\n", + "96: reward:1.0\n", + "97: reward:1.0\n", + "98: reward:1.0\n", + "99: reward:1.0\n", + "100: reward:1.0\n", + "101: reward:1.0\n", + "102: reward:1.0\n", + "103: reward:1.0\n", + "104: reward:1.0\n", + "105: reward:1.0\n", + "106: reward:1.0\n", + "107: reward:1.0\n", + "108: reward:1.0\n", + "109: reward:1.0\n", + "110: reward:1.0\n", + "111: reward:1.0\n", + "112: reward:1.0\n", + "113: reward:1.0\n", + "114: reward:1.0\n", + "115: reward:1.0\n", + "116: reward:1.0\n", + "117: reward:1.0\n", + "118: reward:1.0\n", + "119: reward:1.0\n", + "120: reward:1.0\n", + "121: reward:1.0\n", + "122: reward:1.0\n", + "123: reward:1.0\n", + "124: reward:1.0\n", + "125: reward:1.0\n", + "126: reward:1.0\n", + "127: reward:1.0\n", + "128: reward:1.0\n", + "129: reward:1.0\n", + "130: reward:1.0\n", + "131: reward:1.0\n", + "132: reward:1.0\n", + "133: reward:1.0\n", + "134: reward:1.0\n", + "135: reward:1.0\n", + "136: reward:1.0\n", + "137: reward:1.0\n", + "138: reward:1.0\n", + "139: reward:1.0\n", + "140: reward:1.0\n", + "141: reward:1.0\n", + "142: reward:1.0\n", + "143: reward:1.0\n", + "144: reward:1.0\n", + "145: reward:1.0\n", + "146: reward:1.0\n", + "147: reward:1.0\n", + "148: reward:1.0\n", + "149: reward:1.0\n", + "150: reward:1.0\n", + "151: reward:1.0\n", + "152: reward:1.0\n", + "153: reward:1.0\n", + "154: reward:1.0\n", + "155: reward:1.0\n", + "156: reward:1.0\n", + "157: reward:1.0\n", + "158: reward:1.0\n", + "159: reward:1.0\n", + "160: reward:1.0\n", + "161: reward:1.0\n", + "162: reward:1.0\n", + "163: reward:1.0\n", + "164: reward:1.0\n", + "165: reward:1.0\n", + "166: reward:1.0\n", + "167: reward:1.0\n", + "168: reward:1.0\n", + "169: reward:1.0\n", + "170: reward:1.0\n", + "171: reward:1.0\n", + "172: reward:1.0\n", + "173: reward:1.0\n", + "174: reward:1.0\n", + "175: reward:1.0\n", + "176: reward:1.0\n", + "177: reward:1.0\n", + "178: reward:1.0\n", + "179: reward:1.0\n", + "180: reward:1.0\n", + "181: reward:1.0\n", + "182: reward:1.0\n", + "183: reward:1.0\n", + "184: reward:1.0\n", + "185: reward:1.0\n", + "186: reward:1.0\n", + "187: reward:1.0\n", + "188: reward:1.0\n", + "189: reward:1.0\n", + "190: reward:1.0\n", + "191: reward:1.0\n", + "192: reward:1.0\n", + "193: reward:1.0\n", + "194: reward:1.0\n", + "195: reward:1.0\n", + "196: reward:1.0\n", + "197: reward:1.0\n", + "198: reward:1.0\n", + "199: reward:1.0\n", + "200: reward:1.0\n", + "201: reward:1.0\n", + "202: reward:1.0\n", + "203: reward:1.0\n", + "204: reward:1.0\n", + "205: reward:1.0\n", + "206: reward:1.0\n", + "207: reward:1.0\n", + "208: reward:1.0\n", + "209: reward:1.0\n", + "210: reward:1.0\n", + "211: reward:1.0\n", + "212: reward:1.0\n", + "213: reward:1.0\n", + "214: reward:1.0\n", + "215: reward:1.0\n", + "216: reward:1.0\n", + "217: reward:1.0\n", + "218: reward:1.0\n", + "219: reward:1.0\n", + "220: reward:1.0\n", + "221: reward:1.0\n", + "222: reward:1.0\n", + "223: reward:1.0\n", + "224: reward:1.0\n", + "225: reward:1.0\n", + "226: reward:1.0\n", + "227: reward:1.0\n", + "228: reward:1.0\n", + "229: reward:1.0\n", + "230: reward:1.0\n", + "231: reward:1.0\n", + "232: reward:1.0\n", + "233: reward:1.0\n", + "234: reward:1.0\n", + "235: reward:1.0\n", + "236: reward:1.0\n", + "237: reward:1.0\n", + "238: reward:1.0\n", + "239: reward:1.0\n", + "240: reward:1.0\n", + "241: reward:1.0\n", + "242: reward:1.0\n", + "243: reward:1.0\n", + "244: reward:1.0\n", + "245: reward:1.0\n", + "246: reward:1.0\n", + "247: reward:1.0\n", + "248: reward:1.0\n", + "249: reward:1.0\n", + "250: reward:1.0\n", + "251: reward:1.0\n", + "252: reward:1.0\n", + "253: reward:1.0\n", + "254: reward:1.0\n", + "255: reward:1.0\n", + "256: reward:1.0\n", + "257: reward:1.0\n", + "258: reward:1.0\n", + "259: reward:1.0\n", + "260: reward:1.0\n", + "261: reward:1.0\n", + "262: reward:1.0\n", + "263: reward:1.0\n", + "264: reward:1.0\n", + "265: reward:1.0\n", + "266: reward:1.0\n", + "267: reward:1.0\n", + "268: reward:1.0\n", + "269: reward:1.0\n", + "270: reward:1.0\n", + "271: reward:1.0\n", + "272: reward:1.0\n", + "273: reward:1.0\n", + "274: reward:1.0\n", + "275: reward:1.0\n", + "276: reward:1.0\n", + "277: reward:1.0\n", + "278: reward:1.0\n", + "279: reward:1.0\n", + "280: reward:1.0\n", + "281: reward:1.0\n", + "282: reward:1.0\n", + "283: reward:1.0\n", + "284: reward:1.0\n", + "285: reward:1.0\n", + "286: reward:1.0\n", + "287: reward:1.0\n", + "288: reward:1.0\n", + "289: reward:1.0\n", + "290: reward:1.0\n", + "291: reward:1.0\n", + "292: reward:1.0\n", + "293: reward:1.0\n", + "294: reward:1.0\n", + "295: reward:1.0\n", + "296: reward:1.0\n", + "297: reward:1.0\n", + "298: reward:1.0\n", + "299: reward:1.0\n", + "300: reward:1.0\n", + "301: reward:1.0\n", + "302: reward:1.0\n", + "303: reward:1.0\n", + "304: reward:1.0\n", + "305: reward:1.0\n", + "306: reward:1.0\n", + "307: reward:1.0\n", + "308: reward:1.0\n", + "309: reward:1.0\n", + "310: reward:1.0\n", + "311: reward:1.0\n", + "312: reward:1.0\n", + "313: reward:1.0\n", + "314: reward:1.0\n", + "315: reward:1.0\n", + "316: reward:1.0\n", + "317: reward:1.0\n", + "318: reward:1.0\n", + "319: reward:1.0\n", + "320: reward:1.0\n", + "321: reward:1.0\n", + "322: reward:1.0\n", + "323: reward:1.0\n", + "324: reward:1.0\n", + "325: reward:1.0\n", + "326: reward:1.0\n", + "327: reward:1.0\n", + "328: reward:1.0\n", + "329: reward:1.0\n", + "330: reward:1.0\n", + "331: reward:1.0\n", + "332: reward:1.0\n", + "333: reward:1.0\n", + "334: reward:1.0\n", + "335: reward:1.0\n", + "336: reward:1.0\n", + "337: reward:1.0\n", + "338: reward:1.0\n", + "339: reward:1.0\n", + "340: reward:1.0\n", + "341: reward:1.0\n", + "342: reward:1.0\n", + "343: reward:1.0\n", + "344: reward:1.0\n", + "345: reward:1.0\n", + "346: reward:1.0\n", + "347: reward:1.0\n", + "348: reward:1.0\n", + "349: reward:1.0\n", + "350: reward:1.0\n", + "351: reward:1.0\n", + "352: reward:1.0\n", + "353: reward:1.0\n", + "354: reward:1.0\n", + "355: reward:1.0\n", + "356: reward:1.0\n", + "357: reward:1.0\n", + "358: reward:1.0\n", + "359: reward:1.0\n", + "360: reward:1.0\n", + "361: reward:1.0\n", + "362: reward:1.0\n", + "363: reward:1.0\n", + "364: reward:1.0\n", + "365: reward:1.0\n", + "366: reward:1.0\n", + "367: reward:1.0\n", + "368: reward:1.0\n", + "369: reward:1.0\n", + "370: reward:1.0\n", + "371: reward:1.0\n", + "372: reward:1.0\n", + "373: reward:1.0\n", + "374: reward:1.0\n", + "375: reward:1.0\n", + "376: reward:1.0\n", + "377: reward:1.0\n", + "378: reward:1.0\n", + "379: reward:1.0\n", + "380: reward:1.0\n", + "381: reward:1.0\n", + "382: reward:1.0\n", + "383: reward:1.0\n", + "384: reward:1.0\n", + "385: reward:1.0\n", + "386: reward:1.0\n", + "387: reward:1.0\n", + "388: reward:1.0\n", + "389: reward:1.0\n", + "390: reward:1.0\n", + "391: reward:1.0\n", + "392: reward:1.0\n", + "393: reward:1.0\n", + "394: reward:1.0\n", + "395: reward:1.0\n", + "396: reward:1.0\n", + "397: reward:1.0\n", + "398: reward:1.0\n", + "399: reward:1.0\n", + "400: reward:1.0\n", + "401: reward:1.0\n", + "402: reward:1.0\n", + "403: reward:1.0\n", + "404: reward:1.0\n", + "405: reward:1.0\n", + "406: reward:1.0\n", + "407: reward:1.0\n", + "408: reward:1.0\n", + "409: reward:1.0\n", + "410: reward:1.0\n", + "411: reward:1.0\n", + "412: reward:1.0\n", + "413: reward:1.0\n", + "414: reward:1.0\n", + "415: reward:1.0\n", + "416: reward:1.0\n", + "417: reward:1.0\n", + "418: reward:1.0\n", + "419: reward:1.0\n", + "420: reward:1.0\n", + "421: reward:1.0\n", + "422: reward:1.0\n", + "423: reward:1.0\n", + "424: reward:1.0\n", + "425: reward:1.0\n", + "426: reward:1.0\n", + "427: reward:1.0\n", + "428: reward:1.0\n", + "429: reward:1.0\n", + "430: reward:1.0\n", + "431: reward:1.0\n", + "432: reward:1.0\n", + "433: reward:1.0\n", + "434: reward:1.0\n", + "435: reward:1.0\n", + "436: reward:1.0\n", + "437: reward:1.0\n", + "438: reward:1.0\n", + "439: reward:1.0\n", + "440: reward:1.0\n", + "441: reward:1.0\n", + "442: reward:1.0\n", + "443: reward:1.0\n", + "444: reward:1.0\n", + "445: reward:1.0\n", + "446: reward:1.0\n", + "447: reward:1.0\n", + "448: reward:1.0\n", + "449: reward:1.0\n", + "450: reward:1.0\n", + "451: reward:1.0\n", + "452: reward:1.0\n", + "453: reward:1.0\n", + "454: reward:1.0\n", + "455: reward:1.0\n", + "456: reward:1.0\n", + "457: reward:1.0\n", + "458: reward:1.0\n", + "459: reward:1.0\n", + "460: reward:1.0\n", + "461: reward:1.0\n", + "462: reward:1.0\n", + "463: reward:1.0\n", + "464: reward:1.0\n", + "465: reward:1.0\n", + "466: reward:1.0\n", + "467: reward:1.0\n", + "468: reward:1.0\n", + "469: reward:1.0\n", + "470: reward:1.0\n", + "471: reward:1.0\n", + "472: reward:1.0\n", + "473: reward:1.0\n", + "474: reward:1.0\n", + "475: reward:1.0\n", + "476: reward:1.0\n", + "477: reward:1.0\n", + "478: reward:1.0\n", + "479: reward:1.0\n", + "480: reward:1.0\n", + "481: reward:1.0\n", + "482: reward:1.0\n", + "483: reward:1.0\n", + "484: reward:1.0\n", + "485: reward:1.0\n", + "486: reward:1.0\n", + "487: reward:1.0\n", + "488: reward:1.0\n", + "489: reward:1.0\n", + "490: reward:1.0\n", + "491: reward:1.0\n", + "492: reward:1.0\n", + "493: reward:1.0\n", + "494: reward:1.0\n", + "495: reward:1.0\n", + "496: reward:1.0\n", + "497: reward:1.0\n", + "498: reward:1.0\n", + "499: reward:1.0\n", + "500: reward:1.0\n" + ] + } + ], + "source": [ + " # Begin to test\n", + "env = make(\"CartPole-v1\", render_mode=\"group_human\", env_num=9, asynchronous=True)\n", + "agent.set_env(env)\n", + "obs, info = env.reset()\n", + "done = False\n", + "step = 0\n", + "while not np.any(done):\n", + " # The agent predicts the next action based on environmental observations.\n", + " action, _ = agent.act(obs, deterministic=True)\n", + " obs, r, done, info = env.step(action)\n", + " step += 1\n", + " print(f\"{step}: reward:{np.mean(r)}\")\n", + "env.close()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.16" + } + }, + "nbformat": 4, + "nbformat_minor": 1 +} From 01ab3003c9aaf7a3b2b7a8f7e32d75efb8ebb87a Mon Sep 17 00:00:00 2001 From: huangshiyu Date: Mon, 8 May 2023 17:43:53 +0800 Subject: [PATCH 2/5] update --- openrl/envs/vec_env/base_venv.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/openrl/envs/vec_env/base_venv.py b/openrl/envs/vec_env/base_venv.py index b52f4ea1..33fd068c 100644 --- a/openrl/envs/vec_env/base_venv.py +++ b/openrl/envs/vec_env/base_venv.py @@ -15,6 +15,7 @@ # limitations under the License. """""" +import sys import warnings from abc import ABC, abstractmethod from typing import Any, List, Optional, Sequence, Union @@ -24,6 +25,8 @@ from openrl.envs.vec_env.utils.util import tile_images +IN_COLAB = "google.colab" in sys.modules + class BaseVecEnv( ABC, @@ -160,7 +163,12 @@ def render(self, mode: Optional[str] = None) -> Optional[np.ndarray]: if mode == "human": import cv2 # pytype:disable=import-error - cv2.imshow("Vec_Env:{}".format(self.env_name), bigimg[:, :, ::-1]) + if IN_COLAB: + from google.colab.patches import cv2_imshow + + cv2_imshow(bigimg[:, :, ::-1]) + else: + cv2.imshow("Vec_Env:{}".format(self.env_name), bigimg[:, :, ::-1]) cv2.waitKey(1) elif mode in [None, "rgb_array"]: return bigimg From 5238e3a720a467d4c5f09831b7bd84aa771c9a79 Mon Sep 17 00:00:00 2001 From: huangshiyu Date: Mon, 8 May 2023 17:48:33 +0800 Subject: [PATCH 3/5] update --- openrl/envs/vec_env/base_venv.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/openrl/envs/vec_env/base_venv.py b/openrl/envs/vec_env/base_venv.py index 33fd068c..1511d350 100644 --- a/openrl/envs/vec_env/base_venv.py +++ b/openrl/envs/vec_env/base_venv.py @@ -161,15 +161,15 @@ def render(self, mode: Optional[str] = None) -> Optional[np.ndarray]: # Create a big image by tiling images from subprocesses bigimg = tile_images(imgs) if mode == "human": - import cv2 # pytype:disable=import-error - if IN_COLAB: from google.colab.patches import cv2_imshow cv2_imshow(bigimg[:, :, ::-1]) else: + import cv2 # pytype:disable=import-error + cv2.imshow("Vec_Env:{}".format(self.env_name), bigimg[:, :, ::-1]) - cv2.waitKey(1) + cv2.waitKey(1) elif mode in [None, "rgb_array"]: return bigimg else: From d9add9897397e0a38eff5ccf6c9a816dd757d2dc Mon Sep 17 00:00:00 2001 From: huangshiyu Date: Mon, 8 May 2023 19:07:17 +0800 Subject: [PATCH 4/5] - update --- README.md | 2 + README_en.md | 2 + notebooks/openrl_demo.ipynb | 957 ------------------------------------ 3 files changed, 4 insertions(+), 957 deletions(-) delete mode 100644 notebooks/openrl_demo.ipynb diff --git a/README.md b/README.md index efd5eedf..89833da2 100644 --- a/README.md +++ b/README.md @@ -97,6 +97,8 @@ pip install -e . openrl --version ``` +**Tips**:无需安装,通过Colab在线试用OpenRL: [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/15VBA-B7AJF8dBazzRcWAxJxZI7Pl9m-g?usp=sharing) + ## 使用Docker OpenRL目前也提供了包含显卡支持和非显卡支持的Docker镜像。 diff --git a/README_en.md b/README_en.md index f4b18264..7909295a 100644 --- a/README_en.md +++ b/README_en.md @@ -120,6 +120,8 @@ After installation, users can check the version of OpenRL through command line: openrl --version ``` +**Tips**: No installation required, try OpenRL online through Colab: [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/15VBA-B7AJF8dBazzRcWAxJxZI7Pl9m-g?usp=sharing) + ## Use Docker OpenRL currently provides Docker images with and without GPU support. diff --git a/notebooks/openrl_demo.ipynb b/notebooks/openrl_demo.ipynb deleted file mode 100644 index 3d258d0e..00000000 --- a/notebooks/openrl_demo.ipynb +++ /dev/null @@ -1,957 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "collapsed": true - }, - "source": [ - "# OpenRL Demo\n", - "\n", - "OpenRL is a unified reinforcement learnig framework. In this Colab, we will present serval basic usages of OpenRL." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Installation\n", - "\n", - "First of all, we should install `openrl` via pip:" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": { - "scrolled": false - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Found existing installation: openrl 0.0.9\n", - "Uninstalling openrl-0.0.9:\n", - " Successfully uninstalled openrl-0.0.9\n", - "Looking in indexes: https://pypi.tuna.tsinghua.edu.cn/simple\n", - "Collecting git+https://github.com/huangshiyu13/openrl.git@dev\n", - " Cloning https://github.com/huangshiyu13/openrl.git (to revision dev) to /private/var/folders/1_/frxjhmqs1s71_h9pflvwgvk40000gn/T/pip-req-build-eumcq89n\n", - " Running command git clone --filter=blob:none --quiet https://github.com/huangshiyu13/openrl.git /private/var/folders/1_/frxjhmqs1s71_h9pflvwgvk40000gn/T/pip-req-build-eumcq89n\n", - " Running command git checkout -b dev --track origin/dev\n", - " Switched to a new branch 'dev'\n", - " branch 'dev' set up to track 'origin/dev'.\n", - " Resolved https://github.com/huangshiyu13/openrl.git to commit 38d196c14589820009087527068ad71b7a4aa334\n", - " Installing build dependencies ... \u001b[?25ldone\n", - "\u001b[?25h Getting requirements to build wheel ... \u001b[?25ldone\n", - "\u001b[?25h Preparing metadata (pyproject.toml) ... \u001b[?25ldone\n", - "\u001b[?25hRequirement already satisfied: setuptools>=67.0 in /Users/hsy17/ai3.9/lib/python3.9/site-packages (from openrl==0.0.9) (67.7.2)\n", - "Requirement already satisfied: gymnasium in /Users/hsy17/ai3.9/lib/python3.9/site-packages (from openrl==0.0.9) (0.28.1)\n", - "Requirement already satisfied: click in /Users/hsy17/ai3.9/lib/python3.9/site-packages (from openrl==0.0.9) (8.0.3)\n", - "Requirement already satisfied: termcolor in /Users/hsy17/ai3.9/lib/python3.9/site-packages (from openrl==0.0.9) (2.2.0)\n", - "Requirement already satisfied: gym in /Users/hsy17/ai3.9/lib/python3.9/site-packages (from openrl==0.0.9) (0.21.0)\n", - "Requirement already satisfied: torch in /Users/hsy17/ai3.9/lib/python3.9/site-packages (from openrl==0.0.9) (2.0.0)\n", - "Requirement already satisfied: treevalue in /Users/hsy17/ai3.9/lib/python3.9/site-packages (from openrl==0.0.9) (1.4.10)\n", - "Requirement already satisfied: rich in /Users/hsy17/ai3.9/lib/python3.9/site-packages (from openrl==0.0.9) (13.3.2)\n", - "Requirement already satisfied: wandb in /Users/hsy17/ai3.9/lib/python3.9/site-packages (from openrl==0.0.9) (0.14.0)\n", - "Requirement already satisfied: seaborn in /Users/hsy17/ai3.9/lib/python3.9/site-packages (from openrl==0.0.9) (0.12.2)\n", - "Requirement already satisfied: jsonargparse in /Users/hsy17/ai3.9/lib/python3.9/site-packages (from openrl==0.0.9) (4.20.1)\n", - "Requirement already satisfied: imageio in /Users/hsy17/ai3.9/lib/python3.9/site-packages (from openrl==0.0.9) (2.27.0)\n", - "Requirement already satisfied: opencv-python in /Users/hsy17/ai3.9/lib/python3.9/site-packages (from openrl==0.0.9) (4.7.0.72)\n", - "Requirement already satisfied: pygame in /Users/hsy17/ai3.9/lib/python3.9/site-packages (from openrl==0.0.9) (2.1.3.dev8)\n", - "Requirement already satisfied: numpy>=1.18.0 in /Users/hsy17/ai3.9/lib/python3.9/site-packages (from gym->openrl==0.0.9) (1.21.4)\n", - "Requirement already satisfied: cloudpickle>=1.2.0 in /Users/hsy17/ai3.9/lib/python3.9/site-packages (from gym->openrl==0.0.9) (2.2.1)\n", - "Requirement already satisfied: jax-jumpy>=1.0.0 in /Users/hsy17/ai3.9/lib/python3.9/site-packages (from gymnasium->openrl==0.0.9) (1.0.0)\n", - "Requirement already satisfied: typing-extensions>=4.3.0 in /Users/hsy17/ai3.9/lib/python3.9/site-packages (from gymnasium->openrl==0.0.9) (4.5.0)\n", - "Requirement already satisfied: farama-notifications>=0.0.1 in /Users/hsy17/ai3.9/lib/python3.9/site-packages (from gymnasium->openrl==0.0.9) (0.0.4)\n", - "Requirement already satisfied: importlib-metadata>=4.8.0 in /Users/hsy17/ai3.9/lib/python3.9/site-packages (from gymnasium->openrl==0.0.9) (6.0.1)\n", - "Requirement already satisfied: pillow>=8.3.2 in /Users/hsy17/ai3.9/lib/python3.9/site-packages (from imageio->openrl==0.0.9) (9.5.0)\n", - "Requirement already satisfied: PyYAML>=3.13 in /Users/hsy17/ai3.9/lib/python3.9/site-packages (from jsonargparse->openrl==0.0.9) (6.0)\n", - "Requirement already satisfied: markdown-it-py<3.0.0,>=2.2.0 in /Users/hsy17/ai3.9/lib/python3.9/site-packages (from rich->openrl==0.0.9) (2.2.0)\n", - "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /Users/hsy17/ai3.9/lib/python3.9/site-packages (from rich->openrl==0.0.9) (2.14.0)\n", - "Requirement already satisfied: pandas>=0.25 in /Users/hsy17/ai3.9/lib/python3.9/site-packages (from seaborn->openrl==0.0.9) (1.3.4)\n", - "Requirement already satisfied: matplotlib!=3.6.1,>=3.1 in /Users/hsy17/ai3.9/lib/python3.9/site-packages (from seaborn->openrl==0.0.9) (3.7.1)\n", - "Requirement already satisfied: filelock in /Users/hsy17/ai3.9/lib/python3.9/site-packages (from torch->openrl==0.0.9) (3.10.0)\n", - "Requirement already satisfied: sympy in /Users/hsy17/ai3.9/lib/python3.9/site-packages (from torch->openrl==0.0.9) (1.11.1)\n", - "Requirement already satisfied: networkx in /Users/hsy17/ai3.9/lib/python3.9/site-packages (from torch->openrl==0.0.9) (2.8.3)\n", - "Requirement already satisfied: jinja2 in /Users/hsy17/ai3.9/lib/python3.9/site-packages (from torch->openrl==0.0.9) (3.0.3)\n", - "Requirement already satisfied: enum-tools in /Users/hsy17/ai3.9/lib/python3.9/site-packages (from treevalue->openrl==0.0.9) (0.9.0.post1)\n", - "Requirement already satisfied: graphviz~=0.17 in /Users/hsy17/ai3.9/lib/python3.9/site-packages (from treevalue->openrl==0.0.9) (0.20.1)\n", - "Requirement already satisfied: dill~=0.3.4 in /Users/hsy17/ai3.9/lib/python3.9/site-packages (from treevalue->openrl==0.0.9) (0.3.6)\n", - "Requirement already satisfied: hbutils>=0.0.1 in /Users/hsy17/ai3.9/lib/python3.9/site-packages (from treevalue->openrl==0.0.9) (0.8.2)\n", - "Requirement already satisfied: GitPython!=3.1.29,>=1.0.0 in /Users/hsy17/ai3.9/lib/python3.9/site-packages (from wandb->openrl==0.0.9) (3.1.31)\n", - "Requirement already satisfied: requests<3,>=2.0.0 in /Users/hsy17/ai3.9/lib/python3.9/site-packages (from wandb->openrl==0.0.9) (2.28.2)\n", - "Requirement already satisfied: psutil>=5.0.0 in /Users/hsy17/ai3.9/lib/python3.9/site-packages (from wandb->openrl==0.0.9) (5.9.4)\n", - "Requirement already satisfied: sentry-sdk>=1.0.0 in /Users/hsy17/ai3.9/lib/python3.9/site-packages (from wandb->openrl==0.0.9) (1.17.0)\n", - "Requirement already satisfied: docker-pycreds>=0.4.0 in /Users/hsy17/ai3.9/lib/python3.9/site-packages (from wandb->openrl==0.0.9) (0.4.0)\n", - "Requirement already satisfied: pathtools in /Users/hsy17/ai3.9/lib/python3.9/site-packages (from wandb->openrl==0.0.9) (0.1.2)\n", - "Requirement already satisfied: setproctitle in /Users/hsy17/ai3.9/lib/python3.9/site-packages (from wandb->openrl==0.0.9) (1.3.2)\n", - "Requirement already satisfied: appdirs>=1.4.3 in /Users/hsy17/ai3.9/lib/python3.9/site-packages (from wandb->openrl==0.0.9) (1.4.4)\n", - "Requirement already satisfied: protobuf!=4.21.0,<5,>=3.19.0 in /Users/hsy17/ai3.9/lib/python3.9/site-packages (from wandb->openrl==0.0.9) (3.20.3)\n", - "Requirement already satisfied: six>=1.4.0 in /Users/hsy17/ai3.9/lib/python3.9/site-packages (from docker-pycreds>=0.4.0->wandb->openrl==0.0.9) (1.16.0)\n", - "Requirement already satisfied: gitdb<5,>=4.0.1 in /Users/hsy17/ai3.9/lib/python3.9/site-packages (from GitPython!=3.1.29,>=1.0.0->wandb->openrl==0.0.9) (4.0.10)\n", - "Requirement already satisfied: packaging>=21.3 in /Users/hsy17/ai3.9/lib/python3.9/site-packages (from hbutils>=0.0.1->treevalue->openrl==0.0.9) (23.0)\n", - "Requirement already satisfied: pytimeparse>=1.1.8 in /Users/hsy17/ai3.9/lib/python3.9/site-packages (from hbutils>=0.0.1->treevalue->openrl==0.0.9) (1.1.8)\n", - "Requirement already satisfied: bitmath>=1.3.3.1 in /Users/hsy17/ai3.9/lib/python3.9/site-packages (from hbutils>=0.0.1->treevalue->openrl==0.0.9) (1.3.3.1)\n", - "Requirement already satisfied: chardet<5,>=3.0.4 in /Users/hsy17/ai3.9/lib/python3.9/site-packages (from hbutils>=0.0.1->treevalue->openrl==0.0.9) (3.0.4)\n", - "Requirement already satisfied: zipp>=0.5 in /Users/hsy17/ai3.9/lib/python3.9/site-packages (from importlib-metadata>=4.8.0->gymnasium->openrl==0.0.9) (3.6.0)\n", - "Requirement already satisfied: mdurl~=0.1 in /Users/hsy17/ai3.9/lib/python3.9/site-packages (from markdown-it-py<3.0.0,>=2.2.0->rich->openrl==0.0.9) (0.1.2)\n", - "Requirement already satisfied: contourpy>=1.0.1 in /Users/hsy17/ai3.9/lib/python3.9/site-packages (from matplotlib!=3.6.1,>=3.1->seaborn->openrl==0.0.9) (1.0.7)\n", - "Requirement already satisfied: cycler>=0.10 in /Users/hsy17/ai3.9/lib/python3.9/site-packages (from matplotlib!=3.6.1,>=3.1->seaborn->openrl==0.0.9) (0.11.0)\n", - "Requirement already satisfied: fonttools>=4.22.0 in /Users/hsy17/ai3.9/lib/python3.9/site-packages (from matplotlib!=3.6.1,>=3.1->seaborn->openrl==0.0.9) (4.39.2)\n", - "Requirement already satisfied: kiwisolver>=1.0.1 in /Users/hsy17/ai3.9/lib/python3.9/site-packages (from matplotlib!=3.6.1,>=3.1->seaborn->openrl==0.0.9) (1.4.4)\n", - "Requirement already satisfied: pyparsing>=2.3.1 in /Users/hsy17/ai3.9/lib/python3.9/site-packages (from matplotlib!=3.6.1,>=3.1->seaborn->openrl==0.0.9) (3.0.9)\n", - "Requirement already satisfied: python-dateutil>=2.7 in /Users/hsy17/ai3.9/lib/python3.9/site-packages (from matplotlib!=3.6.1,>=3.1->seaborn->openrl==0.0.9) (2.8.2)\n", - "Requirement already satisfied: importlib-resources>=3.2.0 in /Users/hsy17/ai3.9/lib/python3.9/site-packages (from matplotlib!=3.6.1,>=3.1->seaborn->openrl==0.0.9) (5.12.0)\n", - "Requirement already satisfied: pytz>=2017.3 in /Users/hsy17/ai3.9/lib/python3.9/site-packages (from pandas>=0.25->seaborn->openrl==0.0.9) (2021.3)\n", - "Requirement already satisfied: charset-normalizer<4,>=2 in /Users/hsy17/ai3.9/lib/python3.9/site-packages (from requests<3,>=2.0.0->wandb->openrl==0.0.9) (3.1.0)\n", - "Requirement already satisfied: idna<4,>=2.5 in /Users/hsy17/ai3.9/lib/python3.9/site-packages (from requests<3,>=2.0.0->wandb->openrl==0.0.9) (3.4)\n", - "Requirement already satisfied: urllib3<1.27,>=1.21.1 in /Users/hsy17/ai3.9/lib/python3.9/site-packages (from requests<3,>=2.0.0->wandb->openrl==0.0.9) (1.26.15)\n", - "Requirement already satisfied: certifi>=2017.4.17 in /Users/hsy17/ai3.9/lib/python3.9/site-packages (from requests<3,>=2.0.0->wandb->openrl==0.0.9) (2022.12.7)\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Requirement already satisfied: MarkupSafe>=2.0 in /Users/hsy17/ai3.9/lib/python3.9/site-packages (from jinja2->torch->openrl==0.0.9) (2.0.1)\n", - "Requirement already satisfied: mpmath>=0.19 in /Users/hsy17/ai3.9/lib/python3.9/site-packages (from sympy->torch->openrl==0.0.9) (1.3.0)\n", - "Requirement already satisfied: smmap<6,>=3.0.1 in /Users/hsy17/ai3.9/lib/python3.9/site-packages (from gitdb<5,>=4.0.1->GitPython!=3.1.29,>=1.0.0->wandb->openrl==0.0.9) (5.0.0)\n", - "Building wheels for collected packages: openrl\n", - " Building wheel for openrl (pyproject.toml) ... \u001b[?25ldone\n", - "\u001b[?25h Created wheel for openrl: filename=openrl-0.0.9-py3-none-any.whl size=187884 sha256=449bf905783ad09a41bc885950ec52f027961bb2f03a4c64627fd32106fc6f6b\n", - " Stored in directory: /private/var/folders/1_/frxjhmqs1s71_h9pflvwgvk40000gn/T/pip-ephem-wheel-cache-oejmsslv/wheels/55/a5/38/eda450adaafeb499c5245882add17afd27d04548c38e5f8736\n", - "Successfully built openrl\n", - "Installing collected packages: openrl\n", - "Successfully installed openrl-0.0.9\n" - ] - } - ], - "source": [ - "!pip uninstall openrl -y\n", - "# !pip install git+https://github.com/OpenRL-Lab/openrl.git\n", - "!pip install git+https://github.com/huangshiyu13/openrl.git@dev" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "After the installation, we can check the version of `openrl` via:" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "OPENRL version: \u001b[31mv0.0.9\u001b[0m\u001b[0m\r\n", - "Developed by OpenRL Contributors, Email: \u001b[31mhuangshiyu@4paradigm.com\u001b[0m\u001b[0m\r\n" - ] - } - ], - "source": [ - "!openrl --version" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Users can also get the system infomation via:" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "- OS: \u001b[31mmacOS-13.3.1-arm64-arm-64bit Darwin Kernel Version 22.4.0: Mon Mar 6 21:00:41 PST 2023; root:xnu-8796.101.5~3/RELEASE_ARM64_T8103\u001b[0m\u001b[0m\r\n", - "- Python: \u001b[31m3.9.16\u001b[0m\u001b[0m\r\n", - "- OpenRL: \u001b[31mv0.0.9\u001b[0m\u001b[0m\r\n", - "- PyTorch: \u001b[31m2.0.0\u001b[0m\u001b[0m\r\n", - "- GPU Enabled: \u001b[31mFalse\u001b[0m\u001b[0m\r\n", - "- Numpy: \u001b[31m1.21.4\u001b[0m\u001b[0m\r\n", - "- Gymnasium: \u001b[31m0.28.1\u001b[0m\u001b[0m\r\n" - ] - } - ], - "source": [ - "!openrl --system_info" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Training\n", - "\n", - "Users can train CartPole agent with `openrl` with a few lines:" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/hsy17/ai3.9/lib/python3.9/site-packages/gymnasium/wrappers/step_api_compatibility.py:40: DeprecationWarning: \u001b[33mWARN: Initializing environment in (old) done step API which returns one bool instead of two.\u001b[0m\n", - " deprecation(\n" - ] - }, - { - "data": { - "text/html": [ - "
[05/08/23 17:31:01] INFO     2023-05-08 17:31:01,752 [INFO] Episode: 0/11                             logger.py:150\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m[05/08/23 17:31:01]\u001b[0m\u001b[2;36m \u001b[0m\u001b[34mINFO \u001b[0m \u001b[1;36m2023\u001b[0m-\u001b[1;36m05\u001b[0m-\u001b[1;36m08\u001b[0m \u001b[1;92m17:31:01\u001b[0m,\u001b[1;36m752\u001b[0m \u001b[1m[\u001b[0mINFO\u001b[1m]\u001b[0m Episode: \u001b[1;36m0\u001b[0m/\u001b[1;36m11\u001b[0m \u001b]8;id=271493;file:///Users/hsy17/ai3.9/lib/python3.9/site-packages/openrl/utils/logger.py\u001b\\\u001b[2mlogger.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=536110;file:///Users/hsy17/ai3.9/lib/python3.9/site-packages/openrl/utils/logger.py#150\u001b\\\u001b[2m150\u001b[0m\u001b]8;;\u001b\\\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
[05/08/23 17:31:02] INFO     2023-05-08 17:31:02,055 [INFO] Episode: 1/11                             logger.py:150\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m[05/08/23 17:31:02]\u001b[0m\u001b[2;36m \u001b[0m\u001b[34mINFO \u001b[0m \u001b[1;36m2023\u001b[0m-\u001b[1;36m05\u001b[0m-\u001b[1;36m08\u001b[0m \u001b[1;92m17:31:02\u001b[0m,\u001b[1;36m055\u001b[0m \u001b[1m[\u001b[0mINFO\u001b[1m]\u001b[0m Episode: \u001b[1;36m1\u001b[0m/\u001b[1;36m11\u001b[0m \u001b]8;id=870163;file:///Users/hsy17/ai3.9/lib/python3.9/site-packages/openrl/utils/logger.py\u001b\\\u001b[2mlogger.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=318046;file:///Users/hsy17/ai3.9/lib/python3.9/site-packages/openrl/utils/logger.py#150\u001b\\\u001b[2m150\u001b[0m\u001b]8;;\u001b\\\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    INFO     2023-05-08 17:31:02,246 [INFO] Episode: 2/11                             logger.py:150\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[34mINFO \u001b[0m \u001b[1;36m2023\u001b[0m-\u001b[1;36m05\u001b[0m-\u001b[1;36m08\u001b[0m \u001b[1;92m17:31:02\u001b[0m,\u001b[1;36m246\u001b[0m \u001b[1m[\u001b[0mINFO\u001b[1m]\u001b[0m Episode: \u001b[1;36m2\u001b[0m/\u001b[1;36m11\u001b[0m \u001b]8;id=952225;file:///Users/hsy17/ai3.9/lib/python3.9/site-packages/openrl/utils/logger.py\u001b\\\u001b[2mlogger.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=229053;file:///Users/hsy17/ai3.9/lib/python3.9/site-packages/openrl/utils/logger.py#150\u001b\\\u001b[2m150\u001b[0m\u001b]8;;\u001b\\\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    INFO     2023-05-08 17:31:02,437 [INFO] Episode: 3/11                             logger.py:150\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[34mINFO \u001b[0m \u001b[1;36m2023\u001b[0m-\u001b[1;36m05\u001b[0m-\u001b[1;36m08\u001b[0m \u001b[1;92m17:31:02\u001b[0m,\u001b[1;36m437\u001b[0m \u001b[1m[\u001b[0mINFO\u001b[1m]\u001b[0m Episode: \u001b[1;36m3\u001b[0m/\u001b[1;36m11\u001b[0m \u001b]8;id=792518;file:///Users/hsy17/ai3.9/lib/python3.9/site-packages/openrl/utils/logger.py\u001b\\\u001b[2mlogger.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=99437;file:///Users/hsy17/ai3.9/lib/python3.9/site-packages/openrl/utils/logger.py#150\u001b\\\u001b[2m150\u001b[0m\u001b]8;;\u001b\\\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    INFO     2023-05-08 17:31:02,636 [INFO] Episode: 4/11                             logger.py:150\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[34mINFO \u001b[0m \u001b[1;36m2023\u001b[0m-\u001b[1;36m05\u001b[0m-\u001b[1;36m08\u001b[0m \u001b[1;92m17:31:02\u001b[0m,\u001b[1;36m636\u001b[0m \u001b[1m[\u001b[0mINFO\u001b[1m]\u001b[0m Episode: \u001b[1;36m4\u001b[0m/\u001b[1;36m11\u001b[0m \u001b]8;id=558433;file:///Users/hsy17/ai3.9/lib/python3.9/site-packages/openrl/utils/logger.py\u001b\\\u001b[2mlogger.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=739426;file:///Users/hsy17/ai3.9/lib/python3.9/site-packages/openrl/utils/logger.py#150\u001b\\\u001b[2m150\u001b[0m\u001b]8;;\u001b\\\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    INFO     2023-05-08 17:31:02,832 [INFO] Episode: 5/11                             logger.py:150\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[34mINFO \u001b[0m \u001b[1;36m2023\u001b[0m-\u001b[1;36m05\u001b[0m-\u001b[1;36m08\u001b[0m \u001b[1;92m17:31:02\u001b[0m,\u001b[1;36m832\u001b[0m \u001b[1m[\u001b[0mINFO\u001b[1m]\u001b[0m Episode: \u001b[1;36m5\u001b[0m/\u001b[1;36m11\u001b[0m \u001b]8;id=325213;file:///Users/hsy17/ai3.9/lib/python3.9/site-packages/openrl/utils/logger.py\u001b\\\u001b[2mlogger.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=103560;file:///Users/hsy17/ai3.9/lib/python3.9/site-packages/openrl/utils/logger.py#150\u001b\\\u001b[2m150\u001b[0m\u001b]8;;\u001b\\\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
[05/08/23 17:31:03] INFO     2023-05-08 17:31:03,024 [INFO] Episode: 6/11                             logger.py:150\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m[05/08/23 17:31:03]\u001b[0m\u001b[2;36m \u001b[0m\u001b[34mINFO \u001b[0m \u001b[1;36m2023\u001b[0m-\u001b[1;36m05\u001b[0m-\u001b[1;36m08\u001b[0m \u001b[1;92m17:31:03\u001b[0m,\u001b[1;36m024\u001b[0m \u001b[1m[\u001b[0mINFO\u001b[1m]\u001b[0m Episode: \u001b[1;36m6\u001b[0m/\u001b[1;36m11\u001b[0m \u001b]8;id=717209;file:///Users/hsy17/ai3.9/lib/python3.9/site-packages/openrl/utils/logger.py\u001b\\\u001b[2mlogger.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=346236;file:///Users/hsy17/ai3.9/lib/python3.9/site-packages/openrl/utils/logger.py#150\u001b\\\u001b[2m150\u001b[0m\u001b]8;;\u001b\\\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    INFO     2023-05-08 17:31:03,214 [INFO] Episode: 7/11                             logger.py:150\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[34mINFO \u001b[0m \u001b[1;36m2023\u001b[0m-\u001b[1;36m05\u001b[0m-\u001b[1;36m08\u001b[0m \u001b[1;92m17:31:03\u001b[0m,\u001b[1;36m214\u001b[0m \u001b[1m[\u001b[0mINFO\u001b[1m]\u001b[0m Episode: \u001b[1;36m7\u001b[0m/\u001b[1;36m11\u001b[0m \u001b]8;id=455262;file:///Users/hsy17/ai3.9/lib/python3.9/site-packages/openrl/utils/logger.py\u001b\\\u001b[2mlogger.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=331556;file:///Users/hsy17/ai3.9/lib/python3.9/site-packages/openrl/utils/logger.py#150\u001b\\\u001b[2m150\u001b[0m\u001b]8;;\u001b\\\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    INFO     2023-05-08 17:31:03,412 [INFO] Episode: 8/11                             logger.py:150\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[34mINFO \u001b[0m \u001b[1;36m2023\u001b[0m-\u001b[1;36m05\u001b[0m-\u001b[1;36m08\u001b[0m \u001b[1;92m17:31:03\u001b[0m,\u001b[1;36m412\u001b[0m \u001b[1m[\u001b[0mINFO\u001b[1m]\u001b[0m Episode: \u001b[1;36m8\u001b[0m/\u001b[1;36m11\u001b[0m \u001b]8;id=579363;file:///Users/hsy17/ai3.9/lib/python3.9/site-packages/openrl/utils/logger.py\u001b\\\u001b[2mlogger.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=500181;file:///Users/hsy17/ai3.9/lib/python3.9/site-packages/openrl/utils/logger.py#150\u001b\\\u001b[2m150\u001b[0m\u001b]8;;\u001b\\\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    INFO     2023-05-08 17:31:03,686 [INFO] Episode: 9/11                             logger.py:150\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[34mINFO \u001b[0m \u001b[1;36m2023\u001b[0m-\u001b[1;36m05\u001b[0m-\u001b[1;36m08\u001b[0m \u001b[1;92m17:31:03\u001b[0m,\u001b[1;36m686\u001b[0m \u001b[1m[\u001b[0mINFO\u001b[1m]\u001b[0m Episode: \u001b[1;36m9\u001b[0m/\u001b[1;36m11\u001b[0m \u001b]8;id=65304;file:///Users/hsy17/ai3.9/lib/python3.9/site-packages/openrl/utils/logger.py\u001b\\\u001b[2mlogger.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=844132;file:///Users/hsy17/ai3.9/lib/python3.9/site-packages/openrl/utils/logger.py#150\u001b\\\u001b[2m150\u001b[0m\u001b]8;;\u001b\\\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    INFO     2023-05-08 17:31:03,878 [INFO] Episode: 10/11                            logger.py:150\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[34mINFO \u001b[0m \u001b[1;36m2023\u001b[0m-\u001b[1;36m05\u001b[0m-\u001b[1;36m08\u001b[0m \u001b[1;92m17:31:03\u001b[0m,\u001b[1;36m878\u001b[0m \u001b[1m[\u001b[0mINFO\u001b[1m]\u001b[0m Episode: \u001b[1;36m10\u001b[0m/\u001b[1;36m11\u001b[0m \u001b]8;id=97802;file:///Users/hsy17/ai3.9/lib/python3.9/site-packages/openrl/utils/logger.py\u001b\\\u001b[2mlogger.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=754665;file:///Users/hsy17/ai3.9/lib/python3.9/site-packages/openrl/utils/logger.py#150\u001b\\\u001b[2m150\u001b[0m\u001b]8;;\u001b\\\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "import numpy as np\n", - "from openrl.envs.common import make\n", - "from openrl.modules.common import PPONet as Net\n", - "from openrl.runners.common import PPOAgent as Agent\n", - "# create environment\n", - "env = make(\"CartPole-v1\", env_num=9)\n", - "# create network\n", - "net = Net(env)\n", - "# create training agent\n", - "agent = Agent(net)\n", - "# begin to train!\n", - "agent.train(total_time_steps=20000)\n", - "env.close()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Test\n", - "\n", - "After the training, users can test the agent with:" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/hsy17/ai3.9/lib/python3.9/site-packages/gymnasium/wrappers/step_api_compatibility.py:40: DeprecationWarning: \u001b[33mWARN: Initializing environment in (old) done step API which returns one bool instead of two.\u001b[0m\n", - " deprecation(\n", - "/Users/hsy17/ai3.9/lib/python3.9/site-packages/gymnasium/wrappers/step_api_compatibility.py:40: DeprecationWarning: \u001b[33mWARN: Initializing environment in (old) done step API which returns one bool instead of two.\u001b[0m\n", - " deprecation(\n", - "/Users/hsy17/ai3.9/lib/python3.9/site-packages/gymnasium/wrappers/step_api_compatibility.py:40: DeprecationWarning: \u001b[33mWARN: Initializing environment in (old) done step API which returns one bool instead of two.\u001b[0m\n", - " deprecation(\n", - "/Users/hsy17/ai3.9/lib/python3.9/site-packages/gymnasium/wrappers/step_api_compatibility.py:40: DeprecationWarning: \u001b[33mWARN: Initializing environment in (old) done step API which returns one bool instead of two.\u001b[0m\n", - " deprecation(\n", - "/Users/hsy17/ai3.9/lib/python3.9/site-packages/gymnasium/wrappers/step_api_compatibility.py:40: DeprecationWarning: \u001b[33mWARN: Initializing environment in (old) done step API which returns one bool instead of two.\u001b[0m\n", - " deprecation(\n", - "/Users/hsy17/ai3.9/lib/python3.9/site-packages/gymnasium/wrappers/step_api_compatibility.py:40: DeprecationWarning: \u001b[33mWARN: Initializing environment in (old) done step API which returns one bool instead of two.\u001b[0m\n", - " deprecation(\n", - "/Users/hsy17/ai3.9/lib/python3.9/site-packages/gymnasium/wrappers/step_api_compatibility.py:40: DeprecationWarning: \u001b[33mWARN: Initializing environment in (old) done step API which returns one bool instead of two.\u001b[0m\n", - " deprecation(\n", - "/Users/hsy17/ai3.9/lib/python3.9/site-packages/gymnasium/wrappers/step_api_compatibility.py:40: DeprecationWarning: \u001b[33mWARN: Initializing environment in (old) done step API which returns one bool instead of two.\u001b[0m\n", - " deprecation(\n", - "/Users/hsy17/ai3.9/lib/python3.9/site-packages/gymnasium/wrappers/step_api_compatibility.py:40: DeprecationWarning: \u001b[33mWARN: Initializing environment in (old) done step API which returns one bool instead of two.\u001b[0m\n", - " deprecation(\n", - "2023-05-08 17:31:34.240 Python[2621:4087937] ApplePersistenceIgnoreState: Existing state will not be touched. New state will be written to /var/folders/1_/frxjhmqs1s71_h9pflvwgvk40000gn/T/org.python.python.savedState\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "1: reward:1.0\n", - "2: reward:1.0\n", - "3: reward:1.0\n", - "4: reward:1.0\n", - "5: reward:1.0\n", - "6: reward:1.0\n", - "7: reward:1.0\n", - "8: reward:1.0\n", - "9: reward:1.0\n", - "10: reward:1.0\n", - "11: reward:1.0\n", - "12: reward:1.0\n", - "13: reward:1.0\n", - "14: reward:1.0\n", - "15: reward:1.0\n", - "16: reward:1.0\n", - "17: reward:1.0\n", - "18: reward:1.0\n", - "19: reward:1.0\n", - "20: reward:1.0\n", - "21: reward:1.0\n", - "22: reward:1.0\n", - "23: reward:1.0\n", - "24: reward:1.0\n", - "25: reward:1.0\n", - "26: reward:1.0\n", - "27: reward:1.0\n", - "28: reward:1.0\n", - "29: reward:1.0\n", - "30: reward:1.0\n", - "31: reward:1.0\n", - "32: reward:1.0\n", - "33: reward:1.0\n", - "34: reward:1.0\n", - "35: reward:1.0\n", - "36: reward:1.0\n", - "37: reward:1.0\n", - "38: reward:1.0\n", - "39: reward:1.0\n", - "40: reward:1.0\n", - "41: reward:1.0\n", - "42: reward:1.0\n", - "43: reward:1.0\n", - "44: reward:1.0\n", - "45: reward:1.0\n", - "46: reward:1.0\n", - "47: reward:1.0\n", - "48: reward:1.0\n", - "49: reward:1.0\n", - "50: reward:1.0\n", - "51: reward:1.0\n", - "52: reward:1.0\n", - "53: reward:1.0\n", - "54: reward:1.0\n", - "55: reward:1.0\n", - "56: reward:1.0\n", - "57: reward:1.0\n", - "58: reward:1.0\n", - "59: reward:1.0\n", - "60: reward:1.0\n", - "61: reward:1.0\n", - "62: reward:1.0\n", - "63: reward:1.0\n", - "64: reward:1.0\n", - "65: reward:1.0\n", - "66: reward:1.0\n", - "67: reward:1.0\n", - "68: reward:1.0\n", - "69: reward:1.0\n", - "70: reward:1.0\n", - "71: reward:1.0\n", - "72: reward:1.0\n", - "73: reward:1.0\n", - "74: reward:1.0\n", - "75: reward:1.0\n", - "76: reward:1.0\n", - "77: reward:1.0\n", - "78: reward:1.0\n", - "79: reward:1.0\n", - "80: reward:1.0\n", - "81: reward:1.0\n", - "82: reward:1.0\n", - "83: reward:1.0\n", - "84: reward:1.0\n", - "85: reward:1.0\n", - "86: reward:1.0\n", - "87: reward:1.0\n", - "88: reward:1.0\n", - "89: reward:1.0\n", - "90: reward:1.0\n", - "91: reward:1.0\n", - "92: reward:1.0\n", - "93: reward:1.0\n", - "94: reward:1.0\n", - "95: reward:1.0\n", - "96: reward:1.0\n", - "97: reward:1.0\n", - "98: reward:1.0\n", - "99: reward:1.0\n", - "100: reward:1.0\n", - "101: reward:1.0\n", - "102: reward:1.0\n", - "103: reward:1.0\n", - "104: reward:1.0\n", - "105: reward:1.0\n", - "106: reward:1.0\n", - "107: reward:1.0\n", - "108: reward:1.0\n", - "109: reward:1.0\n", - "110: reward:1.0\n", - "111: reward:1.0\n", - "112: reward:1.0\n", - "113: reward:1.0\n", - "114: reward:1.0\n", - "115: reward:1.0\n", - "116: reward:1.0\n", - "117: reward:1.0\n", - "118: reward:1.0\n", - "119: reward:1.0\n", - "120: reward:1.0\n", - "121: reward:1.0\n", - "122: reward:1.0\n", - "123: reward:1.0\n", - "124: reward:1.0\n", - "125: reward:1.0\n", - "126: reward:1.0\n", - "127: reward:1.0\n", - "128: reward:1.0\n", - "129: reward:1.0\n", - "130: reward:1.0\n", - "131: reward:1.0\n", - "132: reward:1.0\n", - "133: reward:1.0\n", - "134: reward:1.0\n", - "135: reward:1.0\n", - "136: reward:1.0\n", - "137: reward:1.0\n", - "138: reward:1.0\n", - "139: reward:1.0\n", - "140: reward:1.0\n", - "141: reward:1.0\n", - "142: reward:1.0\n", - "143: reward:1.0\n", - "144: reward:1.0\n", - "145: reward:1.0\n", - "146: reward:1.0\n", - "147: reward:1.0\n", - "148: reward:1.0\n", - "149: reward:1.0\n", - "150: reward:1.0\n", - "151: reward:1.0\n", - "152: reward:1.0\n", - "153: reward:1.0\n", - "154: reward:1.0\n", - "155: reward:1.0\n", - "156: reward:1.0\n", - "157: reward:1.0\n", - "158: reward:1.0\n", - "159: reward:1.0\n", - "160: reward:1.0\n", - "161: reward:1.0\n", - "162: reward:1.0\n", - "163: reward:1.0\n", - "164: reward:1.0\n", - "165: reward:1.0\n", - "166: reward:1.0\n", - "167: reward:1.0\n", - "168: reward:1.0\n", - "169: reward:1.0\n", - "170: reward:1.0\n", - "171: reward:1.0\n", - "172: reward:1.0\n", - "173: reward:1.0\n", - "174: reward:1.0\n", - "175: reward:1.0\n", - "176: reward:1.0\n", - "177: reward:1.0\n", - "178: reward:1.0\n", - "179: reward:1.0\n", - "180: reward:1.0\n", - "181: reward:1.0\n", - "182: reward:1.0\n", - "183: reward:1.0\n", - "184: reward:1.0\n", - "185: reward:1.0\n", - "186: reward:1.0\n", - "187: reward:1.0\n", - "188: reward:1.0\n", - "189: reward:1.0\n", - "190: reward:1.0\n", - "191: reward:1.0\n", - "192: reward:1.0\n", - "193: reward:1.0\n", - "194: reward:1.0\n", - "195: reward:1.0\n", - "196: reward:1.0\n", - "197: reward:1.0\n", - "198: reward:1.0\n", - "199: reward:1.0\n", - "200: reward:1.0\n", - "201: reward:1.0\n", - "202: reward:1.0\n", - "203: reward:1.0\n", - "204: reward:1.0\n", - "205: reward:1.0\n", - "206: reward:1.0\n", - "207: reward:1.0\n", - "208: reward:1.0\n", - "209: reward:1.0\n", - "210: reward:1.0\n", - "211: reward:1.0\n", - "212: reward:1.0\n", - "213: reward:1.0\n", - "214: reward:1.0\n", - "215: reward:1.0\n", - "216: reward:1.0\n", - "217: reward:1.0\n", - "218: reward:1.0\n", - "219: reward:1.0\n", - "220: reward:1.0\n", - "221: reward:1.0\n", - "222: reward:1.0\n", - "223: reward:1.0\n", - "224: reward:1.0\n", - "225: reward:1.0\n", - "226: reward:1.0\n", - "227: reward:1.0\n", - "228: reward:1.0\n", - "229: reward:1.0\n", - "230: reward:1.0\n", - "231: reward:1.0\n", - "232: reward:1.0\n", - "233: reward:1.0\n", - "234: reward:1.0\n", - "235: reward:1.0\n", - "236: reward:1.0\n", - "237: reward:1.0\n", - "238: reward:1.0\n", - "239: reward:1.0\n", - "240: reward:1.0\n", - "241: reward:1.0\n", - "242: reward:1.0\n", - "243: reward:1.0\n", - "244: reward:1.0\n", - "245: reward:1.0\n", - "246: reward:1.0\n", - "247: reward:1.0\n", - "248: reward:1.0\n", - "249: reward:1.0\n", - "250: reward:1.0\n", - "251: reward:1.0\n", - "252: reward:1.0\n", - "253: reward:1.0\n", - "254: reward:1.0\n", - "255: reward:1.0\n", - "256: reward:1.0\n", - "257: reward:1.0\n", - "258: reward:1.0\n", - "259: reward:1.0\n", - "260: reward:1.0\n", - "261: reward:1.0\n", - "262: reward:1.0\n", - "263: reward:1.0\n", - "264: reward:1.0\n", - "265: reward:1.0\n", - "266: reward:1.0\n", - "267: reward:1.0\n", - "268: reward:1.0\n", - "269: reward:1.0\n", - "270: reward:1.0\n", - "271: reward:1.0\n", - "272: reward:1.0\n", - "273: reward:1.0\n", - "274: reward:1.0\n", - "275: reward:1.0\n", - "276: reward:1.0\n", - "277: reward:1.0\n", - "278: reward:1.0\n", - "279: reward:1.0\n", - "280: reward:1.0\n", - "281: reward:1.0\n", - "282: reward:1.0\n", - "283: reward:1.0\n", - "284: reward:1.0\n", - "285: reward:1.0\n", - "286: reward:1.0\n", - "287: reward:1.0\n", - "288: reward:1.0\n", - "289: reward:1.0\n", - "290: reward:1.0\n", - "291: reward:1.0\n", - "292: reward:1.0\n", - "293: reward:1.0\n", - "294: reward:1.0\n", - "295: reward:1.0\n", - "296: reward:1.0\n", - "297: reward:1.0\n", - "298: reward:1.0\n", - "299: reward:1.0\n", - "300: reward:1.0\n", - "301: reward:1.0\n", - "302: reward:1.0\n", - "303: reward:1.0\n", - "304: reward:1.0\n", - "305: reward:1.0\n", - "306: reward:1.0\n", - "307: reward:1.0\n", - "308: reward:1.0\n", - "309: reward:1.0\n", - "310: reward:1.0\n", - "311: reward:1.0\n", - "312: reward:1.0\n", - "313: reward:1.0\n", - "314: reward:1.0\n", - "315: reward:1.0\n", - "316: reward:1.0\n", - "317: reward:1.0\n", - "318: reward:1.0\n", - "319: reward:1.0\n", - "320: reward:1.0\n", - "321: reward:1.0\n", - "322: reward:1.0\n", - "323: reward:1.0\n", - "324: reward:1.0\n", - "325: reward:1.0\n", - "326: reward:1.0\n", - "327: reward:1.0\n", - "328: reward:1.0\n", - "329: reward:1.0\n", - "330: reward:1.0\n", - "331: reward:1.0\n", - "332: reward:1.0\n", - "333: reward:1.0\n", - "334: reward:1.0\n", - "335: reward:1.0\n", - "336: reward:1.0\n", - "337: reward:1.0\n", - "338: reward:1.0\n", - "339: reward:1.0\n", - "340: reward:1.0\n", - "341: reward:1.0\n", - "342: reward:1.0\n", - "343: reward:1.0\n", - "344: reward:1.0\n", - "345: reward:1.0\n", - "346: reward:1.0\n", - "347: reward:1.0\n", - "348: reward:1.0\n", - "349: reward:1.0\n", - "350: reward:1.0\n", - "351: reward:1.0\n", - "352: reward:1.0\n", - "353: reward:1.0\n", - "354: reward:1.0\n", - "355: reward:1.0\n", - "356: reward:1.0\n", - "357: reward:1.0\n", - "358: reward:1.0\n", - "359: reward:1.0\n", - "360: reward:1.0\n", - "361: reward:1.0\n", - "362: reward:1.0\n", - "363: reward:1.0\n", - "364: reward:1.0\n", - "365: reward:1.0\n", - "366: reward:1.0\n", - "367: reward:1.0\n", - "368: reward:1.0\n", - "369: reward:1.0\n", - "370: reward:1.0\n", - "371: reward:1.0\n", - "372: reward:1.0\n", - "373: reward:1.0\n", - "374: reward:1.0\n", - "375: reward:1.0\n", - "376: reward:1.0\n", - "377: reward:1.0\n", - "378: reward:1.0\n", - "379: reward:1.0\n", - "380: reward:1.0\n", - "381: reward:1.0\n", - "382: reward:1.0\n", - "383: reward:1.0\n", - "384: reward:1.0\n", - "385: reward:1.0\n", - "386: reward:1.0\n", - "387: reward:1.0\n", - "388: reward:1.0\n", - "389: reward:1.0\n", - "390: reward:1.0\n", - "391: reward:1.0\n", - "392: reward:1.0\n", - "393: reward:1.0\n", - "394: reward:1.0\n", - "395: reward:1.0\n", - "396: reward:1.0\n", - "397: reward:1.0\n", - "398: reward:1.0\n", - "399: reward:1.0\n", - "400: reward:1.0\n", - "401: reward:1.0\n", - "402: reward:1.0\n", - "403: reward:1.0\n", - "404: reward:1.0\n", - "405: reward:1.0\n", - "406: reward:1.0\n", - "407: reward:1.0\n", - "408: reward:1.0\n", - "409: reward:1.0\n", - "410: reward:1.0\n", - "411: reward:1.0\n", - "412: reward:1.0\n", - "413: reward:1.0\n", - "414: reward:1.0\n", - "415: reward:1.0\n", - "416: reward:1.0\n", - "417: reward:1.0\n", - "418: reward:1.0\n", - "419: reward:1.0\n", - "420: reward:1.0\n", - "421: reward:1.0\n", - "422: reward:1.0\n", - "423: reward:1.0\n", - "424: reward:1.0\n", - "425: reward:1.0\n", - "426: reward:1.0\n", - "427: reward:1.0\n", - "428: reward:1.0\n", - "429: reward:1.0\n", - "430: reward:1.0\n", - "431: reward:1.0\n", - "432: reward:1.0\n", - "433: reward:1.0\n", - "434: reward:1.0\n", - "435: reward:1.0\n", - "436: reward:1.0\n", - "437: reward:1.0\n", - "438: reward:1.0\n", - "439: reward:1.0\n", - "440: reward:1.0\n", - "441: reward:1.0\n", - "442: reward:1.0\n", - "443: reward:1.0\n", - "444: reward:1.0\n", - "445: reward:1.0\n", - "446: reward:1.0\n", - "447: reward:1.0\n", - "448: reward:1.0\n", - "449: reward:1.0\n", - "450: reward:1.0\n", - "451: reward:1.0\n", - "452: reward:1.0\n", - "453: reward:1.0\n", - "454: reward:1.0\n", - "455: reward:1.0\n", - "456: reward:1.0\n", - "457: reward:1.0\n", - "458: reward:1.0\n", - "459: reward:1.0\n", - "460: reward:1.0\n", - "461: reward:1.0\n", - "462: reward:1.0\n", - "463: reward:1.0\n", - "464: reward:1.0\n", - "465: reward:1.0\n", - "466: reward:1.0\n", - "467: reward:1.0\n", - "468: reward:1.0\n", - "469: reward:1.0\n", - "470: reward:1.0\n", - "471: reward:1.0\n", - "472: reward:1.0\n", - "473: reward:1.0\n", - "474: reward:1.0\n", - "475: reward:1.0\n", - "476: reward:1.0\n", - "477: reward:1.0\n", - "478: reward:1.0\n", - "479: reward:1.0\n", - "480: reward:1.0\n", - "481: reward:1.0\n", - "482: reward:1.0\n", - "483: reward:1.0\n", - "484: reward:1.0\n", - "485: reward:1.0\n", - "486: reward:1.0\n", - "487: reward:1.0\n", - "488: reward:1.0\n", - "489: reward:1.0\n", - "490: reward:1.0\n", - "491: reward:1.0\n", - "492: reward:1.0\n", - "493: reward:1.0\n", - "494: reward:1.0\n", - "495: reward:1.0\n", - "496: reward:1.0\n", - "497: reward:1.0\n", - "498: reward:1.0\n", - "499: reward:1.0\n", - "500: reward:1.0\n" - ] - } - ], - "source": [ - " # Begin to test\n", - "env = make(\"CartPole-v1\", render_mode=\"group_human\", env_num=9, asynchronous=True)\n", - "agent.set_env(env)\n", - "obs, info = env.reset()\n", - "done = False\n", - "step = 0\n", - "while not np.any(done):\n", - " # The agent predicts the next action based on environmental observations.\n", - " action, _ = agent.act(obs, deterministic=True)\n", - " obs, r, done, info = env.step(action)\n", - " step += 1\n", - " print(f\"{step}: reward:{np.mean(r)}\")\n", - "env.close()" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.9.16" - } - }, - "nbformat": 4, - "nbformat_minor": 1 -} From 0082dc534f73ca2743b1e6b570e0265cac77e53f Mon Sep 17 00:00:00 2001 From: huangshiyu Date: Mon, 8 May 2023 19:07:44 +0800 Subject: [PATCH 5/5] - add colab --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 89833da2..601a66d9 100644 --- a/README.md +++ b/README.md @@ -103,6 +103,7 @@ openrl --version OpenRL目前也提供了包含显卡支持和非显卡支持的Docker镜像。 如果用户的电脑上没有英伟达显卡,则可以通过以下命令获取不包含显卡插件的镜像: + ```bash sudo docker pull openrllab/openrl-cpu ```