Thanks to visit codestin.com
Credit goes to github.com

Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions lerobot/common/datasets/factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ def make_offline_buffer(cfg, sampler=None):
sampler=sampler,
batch_size=batch_size,
pin_memory=pin_memory,
prefetch=prefetch,
prefetch=prefetch if isinstance(prefetch, int) else None,
)
elif cfg.env.name == "pusht":
offline_buffer = PushtExperienceReplay(
Expand All @@ -79,7 +79,7 @@ def make_offline_buffer(cfg, sampler=None):
sampler=sampler,
batch_size=batch_size,
pin_memory=pin_memory,
prefetch=prefetch,
prefetch=prefetch if isinstance(prefetch, int) else None,
)
else:
raise ValueError(cfg.env.name)
Expand Down
127 changes: 95 additions & 32 deletions lerobot/common/datasets/pusht.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import logging
import math
import os
from pathlib import Path
from typing import Callable
Expand Down Expand Up @@ -134,20 +135,32 @@ def __init__(
else:
storage = TensorStorage(TensorDict.load_memmap(self.root / dataset_id))

mean_std = self._compute_or_load_mean_std(storage)
mean_std["next", "observation", "image"] = mean_std["observation", "image"]
mean_std["next", "observation", "state"] = mean_std["observation", "state"]
stats = self._compute_or_load_stats(storage)
transform = NormalizeTransform(
mean_std,
stats,
in_keys=[
("observation", "image"),
# TODO(rcadene): imagenet normalization is applied inside diffusion policy
# We need to automate this for tdmpc and others
# ("observation", "image"),
("observation", "state"),
("next", "observation", "image"),
("next", "observation", "state"),
# TODO(rcadene): for tdmpc, we might want next image and state
# ("next", "observation", "image"),
# ("next", "observation", "state"),
("action"),
],
mode="min_max",
)

# TODO(rcadene): make normalization strategy configurable between mean_std, min_max, manual_min_max, min_max_from_spec
transform.stats["observation", "state", "min"] = torch.tensor(
[13.456424, 32.938293], dtype=torch.float32
)
transform.stats["observation", "state", "max"] = torch.tensor(
[496.14618, 510.9579], dtype=torch.float32
)
transform.stats["action", "min"] = torch.tensor([12.0, 25.0], dtype=torch.float32)
transform.stats["action", "max"] = torch.tensor([511.0, 511.0], dtype=torch.float32)

if writer is None:
writer = ImmutableDatasetWriter()
if collate_fn is None:
Expand Down Expand Up @@ -282,61 +295,111 @@ def _download_and_preproc(self):

return TensorStorage(td_data.lock_())

def _compute_mean_std(self, storage, num_batch=10, batch_size=32):
def _compute_stats(self, storage, num_batch=100, batch_size=32):
rb = TensorDictReplayBuffer(
storage=storage,
batch_size=batch_size,
prefetch=True,
)
batch = rb.sample()
image_mean = torch.zeros(batch["observation", "image"].shape[1])
image_std = torch.zeros(batch["observation", "image"].shape[1])
state_mean = torch.zeros(batch["observation", "state"].shape[1])
state_std = torch.zeros(batch["observation", "state"].shape[1])
action_mean = torch.zeros(batch["action"].shape[1])
action_std = torch.zeros(batch["action"].shape[1])

image_channels = batch["observation", "image"].shape[1]
image_mean = torch.zeros(image_channels)
image_std = torch.zeros(image_channels)
image_max = torch.tensor([-math.inf] * image_channels)
image_min = torch.tensor([math.inf] * image_channels)

state_channels = batch["observation", "state"].shape[1]
state_mean = torch.zeros(state_channels)
state_std = torch.zeros(state_channels)
state_max = torch.tensor([-math.inf] * state_channels)
state_min = torch.tensor([math.inf] * state_channels)

action_channels = batch["action"].shape[1]
action_mean = torch.zeros(action_channels)
action_std = torch.zeros(action_channels)
action_max = torch.tensor([-math.inf] * action_channels)
action_min = torch.tensor([math.inf] * action_channels)

for _ in tqdm.tqdm(range(num_batch)):
image_mean += einops.reduce(batch["observation", "image"], "b c h w -> c", reduction="mean")
state_mean += batch["observation", "state"].mean(dim=0)
action_mean += batch["action"].mean(dim=0)
image_mean += einops.reduce(batch["observation", "image"], "b c h w -> c", "mean")
state_mean += einops.reduce(batch["observation", "state"], "b c -> c", "mean")
action_mean += einops.reduce(batch["action"], "b c -> c", "mean")

b_image_max = einops.reduce(batch["observation", "image"], "b c h w -> c", "max")
b_image_min = einops.reduce(batch["observation", "image"], "b c h w -> c", "min")
b_state_max = einops.reduce(batch["observation", "state"], "b c -> c", "max")
b_state_min = einops.reduce(batch["observation", "state"], "b c -> c", "min")
b_action_max = einops.reduce(batch["action"], "b c -> c", "max")
b_action_min = einops.reduce(batch["action"], "b c -> c", "min")
image_max = torch.maximum(image_max, b_image_max)
image_min = torch.maximum(image_min, b_image_min)
state_max = torch.maximum(state_max, b_state_max)
state_min = torch.maximum(state_min, b_state_min)
action_max = torch.maximum(action_max, b_action_max)
action_min = torch.maximum(action_min, b_action_min)

batch = rb.sample()

image_mean /= num_batch
state_mean /= num_batch
action_mean /= num_batch

for i in tqdm.tqdm(range(num_batch)):
image_mean_batch = einops.reduce(batch["observation", "image"], "b c h w -> c", reduction="mean")
image_std += (image_mean_batch - image_mean) ** 2
state_std += (batch["observation", "state"].mean(dim=0) - state_mean) ** 2
action_std += (batch["action"].mean(dim=0) - action_mean) ** 2
b_image_mean = einops.reduce(batch["observation", "image"], "b c h w -> c", "mean")
b_state_mean = einops.reduce(batch["observation", "state"], "b c -> c", "mean")
b_action_mean = einops.reduce(batch["action"], "b c -> c", "mean")
image_std += (b_image_mean - image_mean) ** 2
state_std += (b_state_mean - state_mean) ** 2
action_std += (b_action_mean - action_mean) ** 2

b_image_max = einops.reduce(batch["observation", "image"], "b c h w -> c", "max")
b_image_min = einops.reduce(batch["observation", "image"], "b c h w -> c", "min")
b_state_max = einops.reduce(batch["observation", "state"], "b c -> c", "max")
b_state_min = einops.reduce(batch["observation", "state"], "b c -> c", "min")
b_action_max = einops.reduce(batch["action"], "b c -> c", "max")
b_action_min = einops.reduce(batch["action"], "b c -> c", "min")
image_max = torch.maximum(image_max, b_image_max)
image_min = torch.maximum(image_min, b_image_min)
state_max = torch.maximum(state_max, b_state_max)
state_min = torch.maximum(state_min, b_state_min)
action_max = torch.maximum(action_max, b_action_max)
action_min = torch.maximum(action_min, b_action_min)

if i < num_batch - 1:
batch = rb.sample()

image_std = torch.sqrt(image_std / num_batch)
state_std = torch.sqrt(state_std / num_batch)
action_std = torch.sqrt(action_std / num_batch)

mean_std = TensorDict(
stats = TensorDict(
{
("observation", "image", "mean"): image_mean[None, :, None, None],
("observation", "image", "std"): image_std[None, :, None, None],
("observation", "image", "max"): image_max[None, :, None, None],
("observation", "image", "min"): image_min[None, :, None, None],
("observation", "state", "mean"): state_mean[None, :],
("observation", "state", "std"): state_std[None, :],
("observation", "state", "max"): state_max[None, :],
("observation", "state", "min"): state_min[None, :],
("action", "mean"): action_mean[None, :],
("action", "std"): action_std[None, :],
("action", "max"): action_max[None, :],
("action", "min"): action_min[None, :],
},
batch_size=[],
)
return mean_std

def _compute_or_load_mean_std(self, storage) -> TensorDict:
mean_std_path = self.root / self.dataset_id / "mean_std.pth"
if mean_std_path.exists():
mean_std = torch.load(mean_std_path)
stats["next", "observation", "image"] = stats["observation", "image"]
stats["next", "observation", "state"] = stats["observation", "state"]
return stats

def _compute_or_load_stats(self, storage) -> TensorDict:
stats_path = self.root / self.dataset_id / "stats.pth"
if stats_path.exists():
stats = torch.load(stats_path)
else:
logging.info(f"compute_mean_std and save to {mean_std_path}")
mean_std = self._compute_mean_std(storage)
torch.save(mean_std, mean_std_path)
return mean_std
logging.info(f"compute_stats and save to {stats_path}")
stats = self._compute_stats(storage)
torch.save(stats, stats_path)
return stats
12 changes: 5 additions & 7 deletions lerobot/common/envs/factory.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
from torchrl.envs.transforms import StepCounter, TransformedEnv

from lerobot.common.envs.transforms import Prod


def make_env(cfg, transform=None):
kwargs = {
"frame_skip": cfg.env.action_repeat,
"from_pixels": cfg.env.from_pixels,
"pixels_only": cfg.env.pixels_only,
"image_size": cfg.env.image_size,
# TODO(rcadene): do we want a specific eval_env_seed?
"seed": cfg.seed,
}

if cfg.env.name == "simxarm":
Expand All @@ -19,6 +19,8 @@ def make_env(cfg, transform=None):
elif cfg.env.name == "pusht":
from lerobot.common.envs.pusht import PushtEnv

# assert kwargs["seed"] > 200, "Seed 0-200 are used for the demonstration dataset, so we don't want to seed the eval env with this range."

clsfunc = PushtEnv
else:
raise ValueError(cfg.env.name)
Expand All @@ -28,12 +30,8 @@ def make_env(cfg, transform=None):
# limit rollout to max_steps
env = TransformedEnv(env, StepCounter(max_steps=cfg.env.episode_length))

if cfg.env.name == "pusht":
# to ensure pusht is in [0,255] like simxarm
env.append_transform(Prod(in_keys=[("observation", "image")], prod=255.0))

if transform is not None:
# useful to add mean and std normalization
# useful to add normalization
env.append_transform(transform)

return env
Expand Down
76 changes: 67 additions & 9 deletions lerobot/common/envs/pusht.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import importlib
from collections import deque
from typing import Optional

import torch
Expand Down Expand Up @@ -27,12 +28,16 @@ def __init__(
image_size=None,
seed=1337,
device="cpu",
num_prev_obs=1,
num_prev_action=0,
):
super().__init__(device=device, batch_size=[])
self.frame_skip = frame_skip
self.from_pixels = from_pixels
self.pixels_only = pixels_only
self.image_size = image_size
self.num_prev_obs = num_prev_obs
self.num_prev_action = num_prev_action

if pixels_only:
assert from_pixels
Expand All @@ -56,6 +61,12 @@ def __init__(
self._make_spec()
self._current_seed = self.set_seed(seed)

if self.num_prev_obs > 0:
self._prev_obs_image_queue = deque(maxlen=self.num_prev_obs)
self._prev_obs_state_queue = deque(maxlen=self.num_prev_obs)
if self.num_prev_action > 0:
self._prev_action_queue = deque(maxlen=self.num_prev_action)

def render(self, mode="rgb_array", width=384, height=384):
if width != height:
raise NotImplementedError()
Expand All @@ -67,15 +78,15 @@ def render(self, mode="rgb_array", width=384, height=384):

def _format_raw_obs(self, raw_obs):
if self.from_pixels:
obs = {"image": torch.from_numpy(raw_obs["image"])}
image = torch.from_numpy(raw_obs["image"])
obs = {"image": image}

if not self.pixels_only:
obs["state"] = torch.from_numpy(raw_obs["agent_pos"]).type(torch.float32)
else:
# TODO:
obs = {"state": torch.from_numpy(raw_obs["observation"]).type(torch.float32)}

obs = TensorDict(obs, batch_size=[])
return obs

def _reset(self, tensordict: Optional[TensorDict] = None):
Expand All @@ -87,9 +98,25 @@ def _reset(self, tensordict: Optional[TensorDict] = None):
raw_obs = self._env.reset()
assert self._current_seed == self._env._seed

obs = self._format_raw_obs(raw_obs)

if self.num_prev_obs > 0:
stacked_obs = {}
if "image" in obs:
self._prev_obs_image_queue = deque(
[obs["image"]] * (self.num_prev_obs + 1), maxlen=(self.num_prev_obs + 1)
)
stacked_obs["image"] = torch.stack(list(self._prev_obs_image_queue))
if "state" in obs:
self._prev_obs_state_queue = deque(
[obs["state"]] * (self.num_prev_obs + 1), maxlen=(self.num_prev_obs + 1)
)
stacked_obs["state"] = torch.stack(list(self._prev_obs_state_queue))
obs = stacked_obs

td = TensorDict(
{
"observation": self._format_raw_obs(raw_obs),
"observation": TensorDict(obs, batch_size=[]),
"done": torch.tensor([False], dtype=torch.bool),
},
batch_size=[],
Expand All @@ -100,18 +127,37 @@ def _reset(self, tensordict: Optional[TensorDict] = None):

def _step(self, tensordict: TensorDict):
td = tensordict
# remove batch dim
action = td["action"].squeeze(0).numpy()
action = td["action"].numpy()
# step expects shape=(4,) so we pad if necessary
# TODO(rcadene): add info["is_success"] and info["success"] ?
sum_reward = 0
for _ in range(self.frame_skip):
raw_obs, reward, done, info = self._env.step(action)

if action.ndim == 1:
action = action.repeat(self.frame_skip, 1)
else:
if self.frame_skip > 1:
raise NotImplementedError()

num_action_steps = action.shape[0]
for i in range(num_action_steps):
raw_obs, reward, done, info = self._env.step(action[i])
sum_reward += reward

obs = self._format_raw_obs(raw_obs)

if self.num_prev_obs > 0:
stacked_obs = {}
if "image" in obs:
self._prev_obs_image_queue.append(obs["image"])
stacked_obs["image"] = torch.stack(list(self._prev_obs_image_queue))
if "state" in obs:
self._prev_obs_state_queue.append(obs["state"])
stacked_obs["state"] = torch.stack(list(self._prev_obs_state_queue))
obs = stacked_obs

td = TensorDict(
{
"observation": self._format_raw_obs(raw_obs),
"observation": TensorDict(obs, batch_size=[]),
"reward": torch.tensor([sum_reward], dtype=torch.float32),
# succes and done are true when coverage > self.success_threshold in env
"done": torch.tensor([done], dtype=torch.bool),
Expand All @@ -124,14 +170,22 @@ def _step(self, tensordict: TensorDict):
def _make_spec(self):
obs = {}
if self.from_pixels:
image_shape = (3, self.image_size, self.image_size)
if self.num_prev_obs > 0:
image_shape = (self.num_prev_obs, *image_shape)

obs["image"] = BoundedTensorSpec(
low=0,
high=1,
shape=(3, self.image_size, self.image_size),
shape=image_shape,
dtype=torch.float32,
device=self.device,
)
if not self.pixels_only:
state_shape = self._env.observation_space["agent_pos"].shape
if self.num_prev_obs > 0:
state_shape = (self.num_prev_obs, *state_shape)

obs["state"] = BoundedTensorSpec(
low=0,
high=512,
Expand All @@ -141,6 +195,10 @@ def _make_spec(self):
)
else:
# TODO(rcadene): add observation_space achieved_goal and desired_goal?
state_shape = self._env.observation_space["observation"].shape
if self.num_prev_obs > 0:
state_shape = (self.num_prev_obs, *state_shape)

obs["state"] = UnboundedContinuousTensorSpec(
# TODO:
shape=self._env.observation_space["observation"].shape,
Expand Down
Loading