|
import torch |
|
import numpy as np |
|
import ffmpeg |
|
|
|
size = 224 |
|
|
|
def get_video_dim(video_path): |
|
probe = ffmpeg.probe(video_path) |
|
video_stream = next( |
|
(stream for stream in probe["streams"] if stream["codec_type"] == "video"), |
|
None, |
|
) |
|
width = int(video_stream["width"]) |
|
height = int(video_stream["height"]) |
|
num, denum = video_stream["avg_frame_rate"].split("/") |
|
frame_rate = int(num) / int(denum) |
|
return height, width, frame_rate |
|
|
|
def get_output_dim(self, h, w): |
|
if isinstance(self.size, tuple) and len(self.size) == 2: |
|
return self.size |
|
elif h >= w: |
|
return int(h * self.size / w), self.size |
|
else: |
|
return self.size, int(w * self.size / h) |
|
|
|
h, w, fr = get_video_dim(video_path) |
|
height, width = get_output_dim(h, w) |
|
|
|
cmd = ( |
|
ffmpeg.input(video_path) |
|
.filter("fps", fps=1) |
|
.filter("scale", width, height) |
|
) |
|
|
|
x = int((width - size) / 2.0) |
|
y = int((height - size) / 2.0) |
|
cmd = cmd.crop(x, y, size, size) |
|
out, _ = cmd.output("pipe:", format="rawvideo", pix_fmt="rgb24").run( |
|
capture_stdout=True, quiet=True |
|
) |
|
|
|
height, width = 224, 224 |
|
video = np.frombuffer(out, np.uint8).reshape([-1, height, width, 3]) |
|
video = torch.from_numpy(video.astype("float32")) |
|
|