-
Notifications
You must be signed in to change notification settings - Fork 501
Expand file tree
/
Copy pathdataset.py
More file actions
240 lines (195 loc) · 9.59 KB
/
Copy pathdataset.py
File metadata and controls
240 lines (195 loc) · 9.59 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
#
# PySceneDetect: Python-Based Video Scene Detector
# -------------------------------------------------------------------
# [ Site: https://scenedetect.com ]
# [ Docs: https://scenedetect.com/docs/ ]
# [ Github: https://github.com/Breakthrough/PySceneDetect/ ]
#
# Copyright (C) 2026 Brandon Castellano <http://www.bcastell.com>.
# PySceneDetect is licensed under the BSD 3-Clause License; see the
# included LICENSE file, or visit one of the above pages for details.
#
"""Benchmark dataset definitions and registry.
Each :class:`Dataset` is a corpus of :class:`Sample` records (video file + typed ground truth)
loaded eagerly at construction. Ground-truth files for the supported corpora are at most a few
hundred kilobytes total, so eager loading avoids re-reading the same files for every sweep cell.
Add a new dataset by:
1. Subclassing :class:`Dataset` and populating ``self._samples`` in ``__init__``.
2. Registering it in :data:`DATASETS` under the name used by ``--dataset``.
"""
from __future__ import annotations
import glob
import json
import logging
import os
from collections.abc import Iterator
from dataclasses import dataclass
from pathlib import Path
from benchmark.evaluator import EventInterval, Frames, GroundTruth
logger = logging.getLogger("pyscenedetect")
@dataclass(frozen=True)
class Sample:
"""One scored video: a path on disk plus its typed ground truth."""
video_file: Path
ground_truth: GroundTruth
class Dataset:
"""Iterable corpus of :class:`Sample` records.
Subclasses populate ``self._samples`` in their constructor; this base provides the iteration
and length protocol. ``event_types`` advertises which TRECVID-SBD event categories the
dataset's ground truth contains, so consumers can skip columns/tables for categories that
have no events (e.g. fade transitions on BBC/AutoShot).
"""
event_types: frozenset[str] = frozenset({"hard_cut"})
_samples: list[Sample]
def __iter__(self) -> Iterator[Sample]:
return iter(self._samples)
def __len__(self) -> int:
return len(self._samples)
def _read_tab_separated_cuts(scene_file: str) -> list[Frames]:
"""Parse a BBC/AutoShot-style annotation file.
Each line is tab-separated; the second column is the 0-based frame index of a
hard cut. Returns 1-based frame indices, matching the convention used by
:class:`scenedetect.FrameTimecode`.
"""
with open(scene_file) as f:
return [int(line.strip().split("\t")[1]) + 1 for line in f]
class BBCDataset(Dataset):
"""The BBC Planet Earth dataset.
Baraldi et al., "A Deep Siamese Network for Scene Detection in Broadcast Videos",
ACM Multimedia 2015. https://arxiv.org/abs/1510.08893
11 long-form videos (``BBC/videos/bbc_<id>.mp4``) with hard-cut annotations in
``BBC/fixed/<id>-scenes.txt``.
"""
def __init__(self, dataset_dir: str):
video_files = sorted(glob.glob(os.path.join(dataset_dir, "videos", "*.mp4")))
scene_files = sorted(glob.glob(os.path.join(dataset_dir, "fixed", "*.txt")))
if len(video_files) != len(scene_files):
raise ValueError(
f"BBC dataset at {dataset_dir!r}: {len(video_files)} videos but "
f"{len(scene_files)} annotation files."
)
self._samples: list[Sample] = []
for video_file, scene_file in zip(video_files, scene_files, strict=True):
video_id = os.path.basename(video_file).replace("bbc_", "").split(".")[0]
scene_id = os.path.basename(scene_file).split("-")[0]
if video_id != scene_id:
raise ValueError(f"BBC id mismatch: {video_file} vs {scene_file}")
self._samples.append(
Sample(
video_file=Path(video_file),
ground_truth=GroundTruth(hard_cuts=_read_tab_separated_cuts(scene_file)),
)
)
class AutoShotDataset(Dataset):
"""The AutoShot dataset (test splits).
Zhu et al., "AutoShot: A Short Video Dataset and State-of-the-Art Shot Boundary
Detection", CVPRW 2023. The original test set has 200 videos; 36 are no longer
publicly available, so the corpus iterates over whatever is present on disk.
Videos at ``AutoShot/videos/<id>.mp4``, hard-cut annotations at
``AutoShot/annotations/<id>.txt``.
"""
def __init__(self, dataset_dir: str):
# 36 of the original 200 videos are no longer publicly available, so intersect
# by id rather than zipping the directory listings strictly.
videos_by_id = {
os.path.basename(p).split(".")[0]: p
for p in glob.glob(os.path.join(dataset_dir, "videos", "*.mp4"))
}
scenes_by_id = {
os.path.basename(p).split(".")[0]: p
for p in glob.glob(os.path.join(dataset_dir, "annotations", "*.txt"))
}
self._samples: list[Sample] = [
Sample(
video_file=Path(videos_by_id[vid]),
ground_truth=GroundTruth(hard_cuts=_read_tab_separated_cuts(scenes_by_id[vid])),
)
for vid in sorted(videos_by_id.keys() & scenes_by_id.keys())
]
class ClipShotsDataset(Dataset):
"""The ClipShots dataset (test split by default).
Tang et al., "Fast Video Shot Transition Localization with Deep Structured Models",
ACCV 2018. https://github.com/Tangshitao/ClipShots
The only in-tree dataset with typed gradual-transition (fade/dissolve) ground truth in
addition to hard cuts. Layout under ``ClipShots/``::
annotations/{train,test,only_gradual}.json
video_lists/{train,test,only_gradual}.txt (optional split filter)
videos/*.mp4
Each annotation entry is ``{"transitions": [[start, end], ...], "frame_num": float}``.
Hard cuts are single-frame spans (``end == start + 1``); wider spans are gradual
transitions. Unlike the BBC/AutoShot annotations, ClipShots frame indices already match
PySceneDetect's boundary-frame convention (the prediction's ``frame_num`` lines up with
``transition[1]`` directly), so no offset is applied here.
Loading rules:
- Videos listed in ``video_lists/<split>.txt`` but absent from the annotations JSON are
silently ignored (the filter runs against the JSON, not the other way).
- Annotations whose ``.mp4`` is not on disk are skipped (so partial corpora work).
- Malformed transitions (fewer than 2 entries, negative span, zero-width span) are
skipped with a warning rather than crashing the load.
Only the ``ClipShotsDataset(dir, split=...)`` constructor honors a non-default split;
the registry entry in :data:`DATASETS` always loads the ``test`` split.
"""
event_types = frozenset({"hard_cut", "fade"})
def __init__(self, dataset_dir: str, split: str = "test"):
ann_path = os.path.join(dataset_dir, "annotations", f"{split}.json")
videos_dir = os.path.join(dataset_dir, "videos")
with open(ann_path) as f:
annotations: dict = json.load(f)
split_list_path = os.path.join(dataset_dir, "video_lists", f"{split}.txt")
if os.path.exists(split_list_path):
with open(split_list_path) as allow_f:
allowed = {line.strip() for line in allow_f if line.strip()}
annotations = {k: v for k, v in annotations.items() if k in allowed}
total = len(annotations)
skipped_missing = 0
self._samples: list[Sample] = []
for video_name in sorted(annotations):
video_path = os.path.join(videos_dir, video_name)
if not os.path.exists(video_path):
skipped_missing += 1
continue
hard_cuts: list[Frames] = []
fades: list[EventInterval] = []
# `... or []` (not `.get(k, [])`) so an explicit JSON `null` is treated as empty.
for transition in annotations[video_name].get("transitions") or []:
if len(transition) < 2:
logger.warning("ClipShots %s: malformed transition %r", video_name, transition)
continue
start, end = int(transition[0]), int(transition[1])
span = end - start
if span == 1:
hard_cuts.append(end)
elif span > 1:
fades.append(EventInterval(start=start, end=end))
else:
logger.warning(
"ClipShots %s: skipping degenerate transition %r", video_name, transition
)
self._samples.append(
Sample(
video_file=Path(video_path),
ground_truth=GroundTruth(hard_cuts=hard_cuts, fades=fades),
)
)
logger.info(
"ClipShots %s: loaded %d/%d samples (%d videos missing on disk)",
split,
len(self._samples),
total,
skipped_missing,
)
# Mapping of --dataset names to constructors. Typed as a plain callable so
# subclass-specific positional signatures (each takes ``dataset_dir: str``)
# aren't widened away by the base ``Dataset`` class's empty ``__init__``.
DATASETS: dict[str, type] = {
"BBC": BBCDataset,
"AutoShot": AutoShotDataset,
"ClipShots": ClipShotsDataset,
}
def resolve_dataset(name: str, root: str | None) -> Dataset:
"""Instantiate the named dataset.
``root`` overrides the default repo-relative path; pass ``None`` (or the empty string)
to use ``benchmark/<name>/``.
"""
base = root if root else "benchmark"
return DATASETS[name](os.path.join(base, name))