forked from nvpro-samples/vk_gaussian_splatting
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathdepth_video_loader.h
More file actions
459 lines (391 loc) · 12.7 KB
/
depth_video_loader.h
File metadata and controls
459 lines (391 loc) · 12.7 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
/*
* Copyright (c) 2023-2025, NVIDIA CORPORATION. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* SPDX-FileCopyrightText: Copyright (c) 2023-2025, NVIDIA CORPORATION.
* SPDX-License-Identifier: Apache-2.0
*/
#pragma once
#include <filesystem>
#include <string>
#include <vector>
#include <cstdint>
#include <mutex>
#include <atomic>
#include <memory>
#include <thread>
#include <chrono>
#include <volk.h>
#ifdef WITH_VIDEO_DECODER
#include "video_decoder.h"
#endif
#ifdef WITH_VIDEO_DECODER
extern "C" {
#include <libavcodec/avcodec.h>
#include <libavformat/avformat.h>
#include <libswscale/swscale.h>
}
#endif
namespace vk_viewer {
/**
* @brief Metadata structure for depth video playback
*
* Loaded from metadata.json, contains paths and configuration
* for synchronized video + depth video playback.
*/
struct DepthVideoMetadata {
std::string videoPath; // Path to source video
std::string depthVideoPath; // Path to depth video (H.265/HEVC lossless)
int32_t frameCount = 0;
double fps = 0.0;
int32_t sourceWidth = 0;
int32_t sourceHeight = 0;
std::string modelId;
std::string deviceSpec;
int32_t processRes = 0;
double processingTimeS = 0.0;
double avgFps = 0.0;
std::string format; // e.g., "hevc_lossless"
float zMin = 0.0f;
float zMax = 0.0f;
// Side-by-side depth + normals support
bool hasNormals = false; // Whether normals are available
bool sideBySide = false; // Whether video is side-by-side (depth | normals)
int32_t normalsWidth = 0; // Width of normals portion (same as depthWidth if side-by-side)
// Computed values
int32_t depthWidth = 0;
int32_t depthHeight = 0;
};
/**
* @brief Single depth frame with metric depth values
*/
struct DepthVideoFrame {
uint32_t timestampMs;
uint32_t width;
uint32_t height;
std::vector<float> data; // Depth values in meters
float zMin;
float zMax;
};
/**
* @brief Decoded video frame from FFmpeg
*/
struct DecodedVideoFrame {
std::vector<uint8_t> data; // RGBA or grayscale pixel data
int width = 0;
int height = 0;
double timestamp = 0.0; // In seconds
};
/**
* @brief Load metadata.json file
*
* @param metadataPath Path to metadata.json file or directory containing it
* @param outMetadata Output metadata structure
* @return true if successfully loaded
*/
bool loadDepthVideoMetadata(const std::filesystem::path& metadataPath, DepthVideoMetadata& outMetadata);
/**
* @brief Synchronized RGB+Depth frame for buffered playback
*
* Contains both RGB video data and metric depth data for a single frame,
* enabling instant random-access seeking once buffered.
*/
struct PlaybackFrame
{
double timestampSec = 0.0;
uint32_t frameIndex = 0;
uint32_t width = 0;
uint32_t height = 0;
std::vector<uint8_t> rgbRGBA; // RGBA8 video data (CPU fallback)
std::vector<float> depthMeters; // Per-pixel depth in meters
// HW Decoding Support
VkImage rgbImage = VK_NULL_HANDLE;
VkFormat rgbFormat = VK_FORMAT_UNDEFINED;
VkImageLayout rgbLayout = VK_IMAGE_LAYOUT_UNDEFINED;
VkSemaphore rgbSemaphore = VK_NULL_HANDLE;
std::shared_ptr<void> hwFrameRef; // Keep alive the AVFrame/VkImage
float zMin = 0.0f;
float zMax = 1.0f;
};
/**
* @brief Thread-safe buffer for synchronized video+depth frames
*
* Used by VideoDepthPlaybackManager to enable instant random-access
* seeking within buffered regions. Frames are appended by a background
* thread and can be read by the render thread.
*/
class SynchronizedFrameBuffer
{
public:
void init(double totalDurationSec, uint32_t frameCount, double fps);
void clear();
void pushFrame(PlaybackFrame&& frame);
size_t getBufferedFrameCount() const { return m_bufferedCount.load(); }
double getBufferedDurationSec() const;
double getTotalDurationSec() const { return m_totalDurationSec; }
uint32_t getTotalFrameCount() const { return m_totalFrameCount; }
double getFps() const { return m_fps; }
float getBufferedRatio() const;
bool isFullyBuffered() const;
bool getFrameAtTime(double tSec, PlaybackFrame& out) const;
bool getFrameByIndex(uint32_t index, PlaybackFrame& out) const;
private:
std::vector<PlaybackFrame> m_frames;
std::atomic<size_t> m_bufferedCount{0};
double m_totalDurationSec = 0.0;
uint32_t m_totalFrameCount = 0;
double m_fps = 30.0;
mutable std::mutex m_mutex;
};
/**
* @brief FFmpeg-based depth video loader for offline preprocessed sequences
*
* Loads depth video (H.265 lossless or similar) and decodes frames on demand.
* Depth values are stored as normalized 8-bit grayscale in the video,
* then converted back to metric depth using zMin/zMax from metadata.
*
* This class is designed to work alongside the existing VideoDecoder class
* for synchronized video + depth playback.
*/
class DepthVideoLoader {
public:
DepthVideoLoader();
~DepthVideoLoader();
/**
* @brief Open depth video file and initialize FFmpeg
* @param depthVideoPath Path to depth video file
* @param metadata Associated metadata
* @return true if successful
*/
bool open(const std::filesystem::path& depthVideoPath, const DepthVideoMetadata& metadata);
/**
* @brief Close and cleanup resources
*/
void close();
/**
* @brief Check if loader is open
*/
bool isOpen() const { return m_isOpen.load(); }
/**
* @brief Get frame count
*/
int64_t getFrameCount() const { return m_frameCount; }
/**
* @brief Get frame rate
*/
double getFrameRate() const { return m_frameRate; }
/**
* @brief Get video dimensions
*/
void getDimensions(int& width, int& height) const;
/**
* @brief Get duration in seconds
*/
double getDuration() const { return m_duration; }
/**
* @brief Get current playback position in seconds
*/
double getCurrentTime() const { return m_currentTime.load(); }
/**
* @brief Seek to specific timestamp
* @param timestamp Timestamp in seconds
* @return true if successful
*/
bool seekToTime(double timestamp);
/**
* @brief Get next depth frame (thread-safe)
* @param frame Output depth frame with metric values
* @return true if frame available, false if end of stream or error
*/
bool getNextFrame(DepthVideoFrame& frame);
/**
* @brief Get depth frame by index
* @param index Frame index (0-based)
* @param frame Output depth frame
* @return true if successful
*/
bool getFrame(int64_t index, DepthVideoFrame& frame);
/**
* @brief Get depth frame by timestamp (closest match)
* @param timestampMs Timestamp in milliseconds
* @param frame Output depth frame
* @return true if successful
*/
bool getFrameByTimestamp(uint32_t timestampMs, DepthVideoFrame& frame);
private:
/**
* @brief Initialize FFmpeg contexts
*/
bool initializeFFmpeg();
/**
* @brief Cleanup FFmpeg contexts
*/
void cleanupFFmpeg();
/**
* @brief Decode next frame from video
* @return true if frame decoded successfully
*/
bool decodeNextFrame();
/**
* @brief Convert decoded grayscale frame to metric depth
* @param grayscaleData 8-bit grayscale pixel data
* @param width Frame width
* @param height Frame height
* @param outputFrame Output depth frame with metric values
*/
void convertToMetricDepth(const uint8_t* grayscaleData, int width, int height, DepthVideoFrame& outputFrame);
#ifdef WITH_VIDEO_DECODER
// FFmpeg contexts
AVFormatContext* m_formatContext = nullptr;
AVCodecContext* m_codecContext = nullptr;
SwsContext* m_swsContext = nullptr;
AVFrame* m_avFrame = nullptr;
AVFrame* m_grayscaleFrame = nullptr;
AVPacket* m_packet = nullptr;
#endif
// Video stream information
int m_videoStreamIndex = -1;
int m_width = 0;
int m_height = 0;
double m_frameRate = 0.0;
double m_duration = 0.0;
int64_t m_frameCount = 0;
// Playback state
std::atomic<bool> m_isOpen{false};
std::atomic<bool> m_eof{false};
std::atomic<double> m_currentTime{0.0};
// Frame buffer for current frame
std::vector<uint8_t> m_currentFrameData;
std::mutex m_frameMutex;
int64_t m_currentFrameIndex = -1;
double m_currentFramePts = 0.0;
// Metadata
DepthVideoMetadata m_metadata;
// Error handling
std::string m_errorMessage;
};
/**
* @brief Combined video + depth playback manager with buffered frames
*
* Manages both video and depth video playback with synchronization.
* Uses a buffered approach: frames are decoded into memory first,
* then playback and seeking operate on the local buffer.
*/
class VideoDepthPlaybackManager {
public:
VideoDepthPlaybackManager() = default;
~VideoDepthPlaybackManager();
/**
* @brief Open video and depth video from metadata.json
* @param metadataPath Path to metadata.json or directory containing it
* @param instance Vulkan instance (optional, for HW decoding)
* @param physicalDevice Physical device (optional, for HW decoding)
* @param device Vulkan device (optional, for HW decoding)
* @param queueFamilyIndex Queue family index (optional, for HW decoding)
* @param queueIndex Queue index (optional, for HW decoding)
* @return true if successfully opened both video and depth
*/
bool openFromMetadata(const std::filesystem::path& metadataPath,
VkInstance instance = VK_NULL_HANDLE,
VkPhysicalDevice physicalDevice = VK_NULL_HANDLE,
VkDevice device = VK_NULL_HANDLE,
uint32_t queueFamilyIndex = 0,
uint32_t queueIndex = 0);
/**
* @brief Close and cleanup
*/
void close();
/**
* @brief Check if playback is active
*/
bool isPlaying() const { return m_isPlaying.load(); }
/**
* @brief Get metadata
*/
const DepthVideoMetadata& getMetadata() const { return m_metadata; }
/**
* @brief Start playback
*/
void play();
/**
* @brief Pause playback
*/
void pause();
/**
* @brief Toggle play/pause
*/
void togglePlayPause();
/**
* @brief Seek to timestamp (clamped to buffered region)
* @param timestamp Timestamp in seconds
*/
void seek(double timestamp);
/**
* @brief Get current playback time (based on clock, not decoder)
*/
double getCurrentTime() const;
/**
* @brief Check if paused
*/
bool isPaused() const { return m_paused.load(); }
/**
* @brief Check if playback reached end of buffered content
*/
bool isAtEnd() const;
/**
* @brief Get total duration from metadata
*/
double getDuration() const;
/**
* @brief Get buffered duration (how much is ready for playback)
*/
double getBufferedDuration() const;
/**
* @brief Get buffer fill ratio (0.0 to 1.0)
*/
float getBufferedRatio() const;
/**
* @brief Check if fully buffered
*/
bool isFullyBuffered() const;
/**
* @brief Check if buffering is in progress
*/
bool isBuffering() const { return m_bufferingActive.load(); }
/**
* @brief Get frame at specified time from buffer
* @param tSec Time in seconds
* @param out Output frame
* @return true if frame available
*/
bool getFrameAtTime(double tSec, PlaybackFrame& out) const;
private:
void prebufferThread();
void updatePlaybackTime();
#ifdef WITH_VIDEO_DECODER
std::unique_ptr<VideoDecoder> m_videoDecoder;
#endif
std::unique_ptr<DepthVideoLoader> m_depthLoader;
DepthVideoMetadata m_metadata;
SynchronizedFrameBuffer m_buffer;
std::thread m_prebufferThread;
std::atomic<bool> m_bufferingActive{false};
std::atomic<bool> m_stopBuffering{false};
std::atomic<bool> m_isPlaying{false};
std::atomic<bool> m_paused{true};
std::chrono::steady_clock::time_point m_playbackStartTime;
double m_seekOffsetSec = 0.0;
mutable std::mutex m_timeMutex;
};
} // namespace vk_viewer