Add files via upload

alex04072000 · May 24, 2019 · 144fcd6 · 144fcd6
1 parent 95701e3
commit 144fcd6
Show file tree

Hide file tree

Showing 3 changed files with 179 additions and 0 deletions.
diff --git a/1_move_files.txt b/1_move_files.txt
@@ -0,0 +1,80 @@
+"""
+After extracting the RAR, we run this to move all the files into
+the appropriate train/test folders.
+Should only run this file once!
+"""
+import os
+import os.path
+
+def get_train_test_lists(version='01'):
+    """
+    Using one of the train/test files (01, 02, or 03), get the filename
+    breakdowns we'll later use to move everything.
+    """
+    # Get our files based on version.
+    test_file = os.path.join('ucfTrainTestlist', 'testlist' + version + '.txt')
+    train_file = os.path.join('ucfTrainTestlist', 'trainlist' + version + '.txt')
+
+    # Build the test list.
+    with open(test_file) as fin:
+        test_list = ['UCF101/'+row.strip() for row in list(fin)]
+
+    # Build the train list. Extra step to remove the class index.
+    with open(train_file) as fin:
+        train_list = ['UCF101/'+row.split()[0] for row in list(fin)]
+
+    # Set the groups in a dictionary.
+    file_groups = {
+        'UCF101_train': train_list,
+        'UCF101_test': test_list
+    }
+
+    return file_groups
+
+def move_files(file_groups):
+    """This assumes all of our files are currently in _this_ directory.
+    So move them to the appropriate spot. Only needs to happen once.
+    """
+    # Do each of our groups.
+    for group, videos in file_groups.items():
+
+        # Do each of our videos.
+        for video in videos:
+
+            # Get the parts.
+            parts = video.split(os.path.sep)
+            datasetname = parts[0]
+            classname = parts[1]
+            filename = parts[2]
+
+            # Check if this class exists in UCF101_train/test
+            if not os.path.exists(os.path.join(group, classname)):
+                print("Creating folder for %s/%s" % (group, classname))
+                os.makedirs(os.path.join(group, classname))
+
+            # Check if we have already moved this file, or at least that it
+            # exists to move.
+            if not os.path.exists(os.path.join('UCF101',classname,filename)):
+                print("Can't find %s to move. Skipping." % (filename))
+                continue
+
+            # Move it.
+            dest = os.path.join(group, classname, filename)
+            print("Moving %s to %s" % (os.path.join('UCF101',filename), dest))
+            os.rename(os.path.join('UCF101',classname,filename), dest)
+
+    print("Done.")
+
+def main():
+    """
+    Go through each of our train/test text files and move the videos
+    to the right place.
+    """
+    # Get the videos in groups so we can move them.
+    group_lists = get_train_test_lists()
+
+    # Move the files.
+    move_files(group_lists)
+
+if __name__ == '__main__':
+    main()
diff --git a/2_filter_psnr.txt b/2_filter_psnr.txt
@@ -0,0 +1,70 @@
+import glob
+import os
+import numpy as np
+import cv2
+import operator
+
+data_path = 'UCF101_train'
+NUM = 276918
+
+classes = glob.glob(os.path.join(data_path, '**'))
+print(len(classes))
+
+# count suppose frame counts of each class
+total_counts = 0
+class_counts = []
+for i in range(len(classes)):
+    png_files = glob.glob(os.path.join(classes[i], '*.png'))
+    print('class ' + str(i) + ': ' + str(len(png_files)))
+    class_counts.append(len(png_files))
+    total_counts += len(png_files)
+
+filtered_counts = []
+for i in range(len(classes)):
+    filtered_counts.append(int(float(NUM)*class_counts[i]/float(total_counts)))
+    print('filtered class ' + str(i) + ': ' + str(filtered_counts[i]))
+
+def psnr(x1, x2):
+    MSE = np.mean(np.square(x1-x2))
+    MSE = np.maximum(MSE, 1e-10)
+    return 10 * np.log10(1 / MSE)
+
+
+
+"""for training set"""
+# calculate PSNR and sort
+f0 = open('frame1.txt', 'w')
+f1 = open('frame2.txt', 'w')
+f2 = open('frame3.txt', 'w')
+for i in range(len(classes)):
+    print('filtering... ' + str(i))
+    triplets_dict = []
+    png_files = glob.glob(os.path.join(classes[i], '*.png'))
+    png_files = sorted(png_files)
+    for j in range(1, len(png_files)-1):
+        idx = int(png_files[j][-8:-4])
+        if png_files[j-1] == (png_files[j][:-8] + str(idx-1).zfill(4) + '.png') \
+            and png_files[j+1] == (png_files[j][:-8] + str(idx+1).zfill(4) + '.png'):
+            img0 = cv2.imread(png_files[j-1]).astype(np.float32) / 255.0
+            img1 = cv2.imread(png_files[j]).astype(np.float32) / 255.0
+            img2 = cv2.imread(png_files[j+1]).astype(np.float32) / 255.0
+
+            psnr0 = psnr(img0, img1)
+            psnr1 = psnr(img1, img2)
+
+            triplets_dict.append((png_files[j], (psnr0 + psnr1) / 2.0))
+            # triplets_dict[png_files[i]] = (psnr0 + psnr1) / 2.0
+
+    triplets_dict = sorted(triplets_dict, key=lambda tup: tup[1])
+
+    print('class ' + str(i) + ', psnr threshold = ' + str(triplets_dict[filtered_counts[i]][1]))
+
+    for j in range(filtered_counts[i]):
+        idx = int(triplets_dict[j][0][-8:-4])
+        f0.write('./'+triplets_dict[j][0][:-8] + str(idx-1).zfill(4) + '.png' + '\n')
+        f1.write('./'+triplets_dict[j][0][:-8] + str(idx).zfill(4) + '.png' + '\n')
+        f2.write('./'+triplets_dict[j][0][:-8] + str(idx+1).zfill(4) + '.png' + '\n')
+
+f0.close()
+f1.close()
+f2.close()
diff --git a/extract_only.txt b/extract_only.txt
@@ -0,0 +1,29 @@
+#! /bin/bash
+set -e
+
+# Extract frame triplets from the UCF-101 dataset
+# (http://crcv.ucf.edu/data/UCF101.php). Run as follows:
+#
+#   ./extract-ucf101.sh dir/file.avi
+#
+# Or, with parallel from moreutils, you can do it for all videos
+# over many cores:
+#
+#   parallel -j 12 ./extract-ucf101.sh ::: $( find -name \*.avi )
+#   //parallel -j 12 ./extract-ucf101.sh -- $( find -name \*.avi )
+#
+# but do note that this will produce ~250 GB of PNGs, probably many
+# more frames than you actually would get to use for training
+# and likely straining the file system with ~5M files.
+#
+# The script will create a set of frame files that you can easily combine
+# and use for training:
+#
+#  for N in 1 2 3; do echo $N; cat $( find -name \*_frame$N.txt | sort -u )  > ../frame$N.txt; done
+
+FILE=$1
+PREFIX=$( dirname $FILE )/$( basename $FILE | tr -c "a-zA-Z0-9_-" "_" )
+
+echo $FILE
+
+ffmpeg -loglevel error -i $FILE -vf scale=256:256 ${PREFIX}_%04d.png