Add decode_video option and change default fps_source to 'fps' (#1222)

Zulko · Oct 3, 2020 · 074efc7 · 074efc7
1 parent 8e305c2
commit 074efc7
Show file tree

Hide file tree

Showing 8 changed files with 81 additions and 36 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -15,6 +15,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - New `pix_fmt` parameter in `VideoFileClip`, `VideoClip.write_videofile()`, `VideoClip.write_gif()` that allows passing a custom `pix_fmt` parameter such as `"bgr24"` to FFmpeg [#1237]
 
 ### Changed <!-- for changes in existing functionality -->
+- `ffmpeg_parse_infos()` and `VideoFileClip` now have optional `decode_file` parameter that ensures that the detected duration is correct, but may take a long time to run [#1063, #1222]
+- `ffmpeg_parse_infos()` and `VideoFileClip` now use `fps` metadata instead of `tbr` to detect a video's fps value [#1222]
 - `FFMPEG_AudioReader.close_proc()` -> `FFMPEG_AudioReader.close()` for consistency with `FFMPEG_VideoReader` [#1220]
 - `ffmpeg_parse_infos()` and `VideoFileClip` now detect the actual duration of the decoded video instead of the duration stored in its metadata [#1063]
 

diff --git a/media/bitmap.mp4 b/media/bitmap.mp4
diff --git a/moviepy/audio/io/AudioFileClip.py b/moviepy/audio/io/AudioFileClip.py
@@ -65,13 +65,19 @@ class AudioFileClip(AudioClip):
     """
 
     @convert_path_to_string("filename")
-    def __init__(self, filename, buffersize=200000, nbytes=2, fps=44100):
+    def __init__(
+        self, filename, decode_file=False, buffersize=200000, nbytes=2, fps=44100
+    ):
 
         AudioClip.__init__(self)
 
         self.filename = filename
         self.reader = FFMPEG_AudioReader(
-            filename, fps=fps, nbytes=nbytes, buffersize=buffersize
+            filename,
+            decode_file=decode_file,
+            fps=fps,
+            nbytes=nbytes,
+            buffersize=buffersize,
         )
         self.fps = fps
         self.duration = self.reader.duration

diff --git a/moviepy/audio/io/readers.py b/moviepy/audio/io/readers.py
@@ -39,16 +39,25 @@ class FFMPEG_AudioReader:
     """
 
     def __init__(
-        self, filename, buffersize, print_infos=False, fps=44100, nbytes=2, nchannels=2
+        self,
+        filename,
+        buffersize,
+        decode_file=False,
+        print_infos=False,
+        fps=44100,
+        nbytes=2,
+        nchannels=2,
     ):
-
+        # TODO bring FFMPEG_AudioReader more in line with FFMPEG_VideoReader
+        # E.g. here self.pos is still 1-indexed.
+        # (or have them inherit from a shared parent class)
         self.filename = filename
         self.nbytes = nbytes
         self.fps = fps
         self.f = "s%dle" % (8 * nbytes)
         self.acodec = "pcm_s%dle" % (8 * nbytes)
         self.nchannels = nchannels
-        infos = ffmpeg_parse_infos(filename)
+        infos = ffmpeg_parse_infos(filename, decode_file=decode_file)
         self.duration = infos["duration"]
         if "video_duration" in infos:
             self.duration = infos["video_duration"]

diff --git a/moviepy/video/io/VideoFileClip.py b/moviepy/video/io/VideoFileClip.py
@@ -83,14 +83,15 @@ class VideoFileClip(VideoClip):
     def __init__(
         self,
         filename,
+        decode_file=False,
         has_mask=False,
         audio=True,
         audio_buffersize=200000,
         target_resolution=None,
         resize_algorithm="bicubic",
         audio_fps=44100,
         audio_nbytes=2,
-        fps_source="tbr",
+        fps_source="fps",
         pix_fmt=None,
     ):
 
@@ -101,6 +102,7 @@ def __init__(
             pix_fmt = "rgba" if has_mask else "rgb24"
         self.reader = FFMPEG_VideoReader(
             filename,
+            decode_file=decode_file,
             pix_fmt=pix_fmt,
             target_resolution=target_resolution,
             resize_algo=resize_algorithm,

diff --git a/moviepy/video/io/ffmpeg_reader.py b/moviepy/video/io/ffmpeg_reader.py
@@ -20,18 +20,21 @@ class FFMPEG_VideoReader:
     def __init__(
         self,
         filename,
+        decode_file=True,
         print_infos=False,
         bufsize=None,
         pix_fmt="rgb24",
         check_duration=True,
         target_resolution=None,
         resize_algo="bicubic",
-        fps_source="tbr",
+        fps_source="fps",
     ):
 
         self.filename = filename
         self.proc = None
-        infos = ffmpeg_parse_infos(filename, print_infos, check_duration, fps_source)
+        infos = ffmpeg_parse_infos(
+            filename, decode_file, print_infos, check_duration, fps_source
+        )
         self.fps = infos["video_fps"]
         self.size = infos["video_size"]
         self.rotation = infos["video_rotation"]
@@ -70,7 +73,11 @@ def __init__(
         self.initialize()
 
     def initialize(self, starttime=0):
-        """Opens the file, creates the pipe. """
+        """
+        Opens the file, creates the pipe.
+        Sets self.pos to the appropriate value (1 if starttime == 0 because
+        it pre-reads the first frame)
+        """
 
         self.close(delete_lastread=False)  # if any
 
@@ -117,12 +124,14 @@ def initialize(self, starttime=0):
             popen_params["creationflags"] = 0x08000000
         self.proc = sp.Popen(cmd, **popen_params)
 
-        # This will be incremented by the subsequent `read_frame`
-        self.pos = self.get_frame_number(starttime) - 1
+        # self.pos represents the (0-indexed) index of the frame that is next in line
+        # to be read by self.read_frame().
+        # Eg when self.pos is 1, the 2nd frame will be read next.
+        self.pos = self.get_frame_number(starttime)
         self.lastread = self.read_frame()
 
     def skip_frames(self, n=1):
-        """Reads and throws away n frames """
+        """Reads and throws away n frames"""
         w, h = self.size
         for i in range(n):
             self.proc.stdout.read(self.depth * w * h)
@@ -131,18 +140,23 @@ def skip_frames(self, n=1):
         self.pos += n
 
     def read_frame(self):
+        """
+        Reads the next frame from the file.
+        Note that upon (re)initialization, the first frame will already have been read
+        and stored in ``self.lastread``.
+        """
         w, h = self.size
         nbytes = self.depth * w * h
 
         s = self.proc.stdout.read(nbytes)
-        self.pos += 1
 
         if len(s) != nbytes:
             warnings.warn(
-                "Warning: in file %s, " % (self.filename)
+                f"In file {self.filename}, "
                 + "%d bytes wanted but %d bytes read," % (nbytes, len(s))
-                + "at frame %d/%d, at time %.02f/%.02f sec. "
-                % (self.pos, self.nframes, 1.0 * self.pos / self.fps, self.duration)
+                + f"at frame index {self.pos} (out of a total {self.nframes} frames), "
+                + f"at time %.02f/%.02f sec. "
+                % (1.0 * self.pos / self.fps, self.duration)
                 + "Using the last valid frame instead.",
                 UserWarning,
             )
@@ -168,6 +182,9 @@ def read_frame(self):
             result.shape = (h, w, len(s) // (w * h))  # reshape((h, w, len(s)//(w*h)))
             self.lastread = result
 
+        # We have to do this down here because `self.pos` is used in the warning above
+        self.pos += 1
+
         return result
 
     def get_frame(self, t):
@@ -179,7 +196,10 @@ def get_frame(self, t):
         whenever possible, by moving between adjacent frames.
         """
 
-        pos = self.get_frame_number(t)
+        # + 1 so that it represents the frame position that it will be
+        # after the frame is read. This makes the later comparisions easier.
+        pos = self.get_frame_number(t) + 1
+
         # Initialize proc if it is not open
         if not self.proc:
             print(f"Proc not detected")
@@ -204,7 +224,7 @@ def get_frame_number(self, t):
         # imprecisions a 3.0 can become a 2.99999999... which makes the int()
         # go to the previous integer. This makes the fetching more robust when you
         # are getting the nth frame by writing get_frame(n/fps).
-        return int(self.fps * t + 0.00001) + 1
+        return int(self.fps * t + 0.00001)
 
     def close(self, delete_lastread=True):
         if self.proc:
@@ -254,7 +274,11 @@ def ffmpeg_read_image(filename, with_mask=True, pix_fmt=None):
 
 
 def ffmpeg_parse_infos(
-    filename, print_infos=False, check_duration=True, fps_source="tbr"
+    filename,
+    decode_file=False,
+    print_infos=False,
+    check_duration=True,
+    fps_source="fps",
 ):
     """Get file infos using ffmpeg.
 
@@ -266,12 +290,10 @@ def ffmpeg_parse_infos(
     fetching the uncomplete frames at the end, which raises an error.
 
     """
-
-    # open the file in a pipe, provoke an error, read output
-    is_GIF = filename.endswith(".gif")
-    cmd = [FFMPEG_BINARY, "-i", filename, "-acodec", "copy", "-f", "null", "-"]
-    if is_GIF:
-        cmd += ["-f", "null", "/dev/null"]
+    # Open the file in a pipe, read output
+    cmd = [FFMPEG_BINARY, "-i", filename]
+    if decode_file:
+        cmd.extend(["-f", "null", "-"])
 
     popen_params = {
         "bufsize": 10 ** 5,
@@ -312,19 +334,15 @@ def ffmpeg_parse_infos(
 
     if check_duration:
         try:
-            keyword = "frame=" if is_GIF else "Duration: "
-            # for large GIFS the "full" duration is presented as the last element in the list.
-            index = -1 if is_GIF else 0
-            line = [l for l in lines if keyword in l][index]
+            if decode_file:
+                line = [l for l in lines if "time=" in l][-1]
+            else:
+                line = [l for l in lines if "Duration:" in l][-1]
             match = re.findall("([0-9][0-9]:[0-9][0-9]:[0-9][0-9].[0-9][0-9])", line)[0]
             result["duration"] = cvsecs(match)
         except Exception:
             raise IOError(
-                (
-                    "MoviePy error: failed to read the duration of file %s.\n"
-                    "Here are the file infos returned by ffmpeg:\n\n%s"
-                )
-                % (filename, infos)
+                f"MoviePy error: failed to read the duration of file {filename}.\nHere are the file infos returned by ffmpeg:\n\n{infos}"
             )
 
     # get the output line that speaks about video
@@ -397,7 +415,7 @@ def get_fps():
                 result["video_fps"] = x * coef
 
         if check_duration:
-            result["video_nframes"] = int(result["duration"] * result["video_fps"]) + 1
+            result["video_nframes"] = int(result["duration"] * result["video_fps"])
             result["video_duration"] = result["duration"]
         else:
             result["video_nframes"] = 1

diff --git a/moviepy/video/io/html_tools.py b/moviepy/video/io/html_tools.py
@@ -154,7 +154,7 @@ def html_embed(
 
     if filetype in ["audio", "video"]:
 
-        duration = ffmpeg_parse_infos(filename)["duration"]
+        duration = ffmpeg_parse_infos(filename, decode_file=True)["duration"]
         if duration > maxduration:
             raise ValueError(
                 "The duration of video %s (%.1f) exceeds the 'maxduration' "

diff --git a/tests/test_ffmpeg_reader.py b/tests/test_ffmpeg_reader.py
@@ -28,6 +28,14 @@ def test_ffmpeg_parse_infos():
     assert d["video_bitrate"]
 
 
+def test_ffmpeg_parse_infos_duration():
+    infos = ffmpeg_parse_infos("media/big_buck_bunny_0_30.webm")
+    assert infos["video_nframes"] == 720
+
+    infos = ffmpeg_parse_infos("media/bitmap.mp4")
+    assert infos["video_nframes"] == 5
+
+
 def test_ffmpeg_parse_infos_for_i926():
     d = ffmpeg_parse_infos("tests/resource/sintel_with_15_chapters.mp4")
     assert d["audio_found"]