Thanks to visit codestin.com
Credit goes to github.com

Skip to content
Merged
Changes from 1 commit
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
ea9fda8
passes all but 1 test case
TyTodd Jun 12, 2025
7be0dcf
Migrated Audio feature to use torchcodec as a backend. Fixed how form…
TyTodd Jun 13, 2025
c0d3fce
fixed audio and video features so they now pass the test_dataset_with…
TyTodd Jun 13, 2025
12511a3
added load dataset test case to test_video.py
TyTodd Jun 13, 2025
72f3ade
Modified documentation to document new torchcodec implementation of V…
TyTodd Jun 13, 2025
c1843c3
code formatting for torchcodec changes
TyTodd Jun 14, 2025
8b29d61
Merge branch 'main' into torchcodec-decoding
TyTodd Jun 14, 2025
c4a1ac0
Merge branch 'main' into torchcodec-decoding
TyTodd Jun 17, 2025
4dfff64
Merge branch 'main' into torchcodec-decoding
lhoestq Jun 17, 2025
e8b68e5
Update src/datasets/features/audio.py
TyTodd Jun 17, 2025
e9a4a14
added backwards compatibility support and _hf_encoded for Audio feature.
TyTodd Jun 17, 2025
6c0e425
move AudioDecoder to its own file
lhoestq Jun 18, 2025
e74a9ee
naming
lhoestq Jun 18, 2025
28e0173
docs
lhoestq Jun 18, 2025
c50c505
style
lhoestq Jun 18, 2025
806a4ba
update tests
lhoestq Jun 19, 2025
f5a53c4
Merge branch 'main' into torchcodec-decoding
lhoestq Jun 19, 2025
3ee5f90
no torchcodec for windows
lhoestq Jun 19, 2025
eb6324c
further cleaning
lhoestq Jun 19, 2025
8a1e0bc
fix
lhoestq Jun 19, 2025
661b574
install ffmpeg in ci
lhoestq Jun 19, 2025
8036265
fix ffmpeg installation
lhoestq Jun 19, 2025
b582c5b
fix mono backward compatibility
lhoestq Jun 19, 2025
4e265db
fix ffmpeg
lhoestq Jun 19, 2025
f043c0c
again
lhoestq Jun 19, 2025
37763db
fix mono backward compat
lhoestq Jun 19, 2025
5198748
fix tests
lhoestq Jun 19, 2025
f06ef21
fix tests
lhoestq Jun 19, 2025
4a637bd
again
lhoestq Jun 19, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
added load dataset test case to test_video.py
  • Loading branch information
TyTodd committed Jun 13, 2025
commit 12511a342ad9590b697c9e7b462180531d10d2bc
31 changes: 28 additions & 3 deletions tests/features/test_video.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import pytest

from datasets import Dataset, Features, Video, Value
from datasets import Dataset, Features, Video, Value, Audio, load_dataset

from ..utils import require_torchcodec

Expand Down Expand Up @@ -91,7 +91,7 @@ def test_dataset_with_video_map_and_formatted(shared_datadir):
# assert isinstance(example["video"][0], np.ndarray)


# Added Test Case
# Dataset casting and mapping
def test_dataset_with_video_feature_map_is_decoded(shared_datadir):
video_path = str(shared_datadir / "test_video_66x50.mov")
data = {"video": [video_path], "text": ["Hello"]}
Expand All @@ -118,4 +118,29 @@ def process_audio_sampling_rate_by_batch(batch):
decoded_dset = dset.map(process_audio_sampling_rate_by_batch, batched=True)
for item in decoded_dset.cast_column("video", Video(decode=False)):
assert item.keys() == {"video", "text", "double_begin_stream_seconds"}
assert item["double_begin_stream_seconds"] == 0.0
assert item["double_begin_stream_seconds"] == 0.0

@pytest.fixture
def jsonl_video_dataset_path(shared_datadir, tmp_path_factory):
import json

video_path = str(shared_datadir / "test_video_66x50.mov")
data = [{"video": video_path, "text": "Hello world!"}]
path = str(tmp_path_factory.mktemp("data") / "video_dataset.jsonl")
with open(path, "w") as f:
for item in data:
f.write(json.dumps(item) + "\n")
return path

@require_torchcodec
@pytest.mark.parametrize("streaming", [False, True])
def test_load_dataset_with_video_feature(streaming, jsonl_video_dataset_path, shared_datadir):
from torchcodec.decoders import VideoDecoder
video_path = str(shared_datadir / "test_video_66x50.mov")
data_files = jsonl_video_dataset_path
features = Features({"video": Video(), "text": Value("string")})
dset = load_dataset("json", split="train", data_files=data_files, features=features, streaming=streaming)
item = dset[0] if not streaming else next(iter(dset))
assert item.keys() == {"video", "text"}
assert isinstance(item["video"], VideoDecoder)
assert item["video"].get_frame_at(0).data.shape == (3, 50, 66)