From 386208c9a3f0f111d7948e8602f5973f6005db2a Mon Sep 17 00:00:00 2001
From: goatzillax <13460445+goatzillax@users.noreply.github.com>
Date: Sun, 14 Jul 2024 20:50:46 -0500
Subject: [PATCH] Add basic thumbnail generation based on highest scoring frame
 (#169)

* Add basic thumbnail generation based on highest scoring frame in an event.

This is a very basic cut of this functionality and doesn't comprehend certain
options like comp_file.  But it can still be quite useful as is.

fixes Breakthrough/DVR-Scan#159

* Default thumbnail option to None (disabled)

* Thumbnail code formatting cleanups

---------

Co-authored-by: goatzilla <goatzillax@users.noreply.github.com>
---
 dvr_scan/cli/__init__.py   |  8 +++++++
 dvr_scan/cli/config.py     |  2 ++
 dvr_scan/cli/controller.py |  2 ++
 dvr_scan/scanner.py        | 48 ++++++++++++++++++++++++++++++++++++++
 4 files changed, 60 insertions(+)

diff --git a/dvr_scan/cli/__init__.py b/dvr_scan/cli/__init__.py
index fc89bc4..cdcae4e 100644
--- a/dvr_scan/cli/__init__.py
+++ b/dvr_scan/cli/__init__.py
@@ -639,6 +639,14 @@ def get_cli_parser(user_config: ConfigRegistry):
               ' output will be appended to the existing contents.'),
     )
 
+    parser.add_argument(
+        '--thumbnails',
+        metavar='method',
+        type=str,
+        default=None,
+        help=('Produce event thumbnail(s).'),
+    )
+
     parser.add_argument(
         '-v',
         '--verbosity',
diff --git a/dvr_scan/cli/config.py b/dvr_scan/cli/config.py
index d4d6b6b..e740a8e 100644
--- a/dvr_scan/cli/config.py
+++ b/dvr_scan/cli/config.py
@@ -347,6 +347,7 @@ def from_config(config_value: str, default: 'RGBValue') -> 'RGBValue':
     'bounding-box-color': RGBValue(0xFF0000),
     'bounding-box-thickness': 0.0032,
     'bounding-box-min-size': 0.032,
+    'thumbnails': None,
 }
 """Mapping of valid configuration file parameters and their default values or placeholders.
 The types of these values are used when decoding the configuration file. Valid choices for
@@ -358,6 +359,7 @@ def from_config(config_value: str, default: 'RGBValue') -> 'RGBValue':
     'output-mode': ['scan_only', 'opencv', 'copy', 'ffmpeg'],
     'verbosity': ['debug', 'info', 'warning', 'error'],
     'bg-subtractor': ['MOG2', 'CNT', 'MOG2_CUDA'],
+    'thumbnails': ['highscore'],
 }
 """Mapping of string options which can only be of a particular set of values. We use a list instead
 of a set to preserve order when generating error contexts. Values are case-insensitive, and must be
diff --git a/dvr_scan/cli/controller.py b/dvr_scan/cli/controller.py
index 80aee22..9873413 100644
--- a/dvr_scan/cli/controller.py
+++ b/dvr_scan/cli/controller.py
@@ -284,6 +284,8 @@ def run_dvr_scan(settings: ProgramSettings) -> ty.List[ty.Tuple[FrameTimecode, F
         time_post_event=settings.get('time-post-event'),
     )
 
+    scanner.set_thumbnail_params(thumbnails=settings.get('thumbnails'),)
+
     scanner.set_video_time(
         start_time=settings.get_arg('start-time'),
         end_time=settings.get_arg('end-time'),
diff --git a/dvr_scan/scanner.py b/dvr_scan/scanner.py
index b8a236e..fd25896 100644
--- a/dvr_scan/scanner.py
+++ b/dvr_scan/scanner.py
@@ -270,9 +270,15 @@ def __init__(self,
         self._mask_writer: Optional[cv2.VideoWriter] = None
         self._num_events: int = 0
 
+        # Thumbnail production (set_thumbnail_params)
+        self._thumbnails = None
+        self._highscore = 0
+        self._highframe = None
+
         # Make sure we initialize defaults now that we loaded the input videos.
         self.set_detection_params()
         self.set_event_params()
+        self.set_thumbnail_params()
         self.set_video_time()
 
     @property
@@ -433,6 +439,9 @@ def set_event_params(self,
         self._pre_event_len = FrameTimecode(time_pre_event, self._input.framerate)
         self._post_event_len = FrameTimecode(time_post_event, self._input.framerate)
 
+    def set_thumbnail_params(self, thumbnails: str = None):
+        self._thumbnails = thumbnails
+
     def set_video_time(self,
                        start_time: Optional[Union[int, float, str]] = None,
                        end_time: Optional[Union[int, float, str]] = None,
@@ -665,6 +674,11 @@ def scan(self) -> Optional[DetectionResult]:
             if frame_score >= self._max_threshold:
                 frame_score = 0
             above_threshold = frame_score >= self._threshold
+
+            if above_threshold and frame_score > self._highscore:
+                self._highscore = frame_score
+                self._highframe = frame.frame_bgr
+
             event_window.append(frame_score)
             # The first frame fed to the detector can sometimes produce unreliable results due
             # to it not having any previous information to compare against.
@@ -706,6 +720,24 @@ def scan(self) -> Optional[DetectionResult]:
                     num_frames_post_event += 1
                     if num_frames_post_event >= post_event_len:
                         in_motion_event = False
+
+                        logger.debug("event %d high score %f" %
+                                     (1 + self._num_events, self._highscore))
+                        if self._thumbnails == "highscore":
+                            video_name = get_filename(
+                                path=self._input.paths[0], include_extension=False)
+                            output_path = (
+                                self._comp_file if self._comp_file else OUTPUT_FILE_TEMPLATE.format(
+                                    VIDEO_NAME=video_name,
+                                    EVENT_NUMBER='%04d' % (1 + self._num_events),
+                                    EXTENSION='jpg',
+                                ))
+                            if self._output_dir:
+                                output_path = os.path.join(self._output_dir, output_path)
+                            cv2.imwrite(output_path, self._highframe)
+                            self._highscore = 0
+                            self._highframe = None
+
                         # Calculate event end based on the last frame we had with motion plus
                         # the post event length time. We also need to compensate for the number
                         # of frames that we skipped that could have had motion.
@@ -782,6 +814,22 @@ def scan(self) -> Optional[DetectionResult]:
             # curr_pos already includes the presentation duration of the frame.
             event_end = FrameTimecode(self._input.position.frame_num, self._input.framerate)
             event_list.append(MotionEvent(start=event_start, end=event_end))
+
+            logger.debug("event %d high score %f" % (1 + self._num_events, self._highscore))
+            if self._thumbnails == "highscore":
+                video_name = get_filename(path=self._input.paths[0], include_extension=False)
+                output_path = (
+                    self._comp_file if self._comp_file else OUTPUT_FILE_TEMPLATE.format(
+                        VIDEO_NAME=video_name,
+                        EVENT_NUMBER='%04d' % (1 + self._num_events),
+                        EXTENSION='jpg',
+                    ))
+                if self._output_dir:
+                    output_path = os.path.join(self._output_dir, output_path)
+                cv2.imwrite(output_path, self._highframe)
+                self._highscore = 0
+                self._highframe = None
+
             if self._output_mode != OutputMode.SCAN_ONLY:
                 encode_queue.put(MotionEvent(start=event_start, end=event_end))