Skip to content

Commit

Permalink
Add files via upload
Browse files Browse the repository at this point in the history
  • Loading branch information
alex04072000 authored May 24, 2019
1 parent 95701e3 commit 144fcd6
Show file tree
Hide file tree
Showing 3 changed files with 179 additions and 0 deletions.
80 changes: 80 additions & 0 deletions 1_move_files.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
"""
After extracting the RAR, we run this to move all the files into
the appropriate train/test folders.
Should only run this file once!
"""
import os
import os.path

def get_train_test_lists(version='01'):
"""
Using one of the train/test files (01, 02, or 03), get the filename
breakdowns we'll later use to move everything.
"""
# Get our files based on version.
test_file = os.path.join('ucfTrainTestlist', 'testlist' + version + '.txt')
train_file = os.path.join('ucfTrainTestlist', 'trainlist' + version + '.txt')

# Build the test list.
with open(test_file) as fin:
test_list = ['UCF101/'+row.strip() for row in list(fin)]

# Build the train list. Extra step to remove the class index.
with open(train_file) as fin:
train_list = ['UCF101/'+row.split()[0] for row in list(fin)]

# Set the groups in a dictionary.
file_groups = {
'UCF101_train': train_list,
'UCF101_test': test_list
}

return file_groups

def move_files(file_groups):
"""This assumes all of our files are currently in _this_ directory.
So move them to the appropriate spot. Only needs to happen once.
"""
# Do each of our groups.
for group, videos in file_groups.items():

# Do each of our videos.
for video in videos:

# Get the parts.
parts = video.split(os.path.sep)
datasetname = parts[0]
classname = parts[1]
filename = parts[2]

# Check if this class exists in UCF101_train/test
if not os.path.exists(os.path.join(group, classname)):
print("Creating folder for %s/%s" % (group, classname))
os.makedirs(os.path.join(group, classname))

# Check if we have already moved this file, or at least that it
# exists to move.
if not os.path.exists(os.path.join('UCF101',classname,filename)):
print("Can't find %s to move. Skipping." % (filename))
continue

# Move it.
dest = os.path.join(group, classname, filename)
print("Moving %s to %s" % (os.path.join('UCF101',filename), dest))
os.rename(os.path.join('UCF101',classname,filename), dest)

print("Done.")

def main():
"""
Go through each of our train/test text files and move the videos
to the right place.
"""
# Get the videos in groups so we can move them.
group_lists = get_train_test_lists()

# Move the files.
move_files(group_lists)

if __name__ == '__main__':
main()
70 changes: 70 additions & 0 deletions 2_filter_psnr.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
import glob
import os
import numpy as np
import cv2
import operator

data_path = 'UCF101_train'
NUM = 276918

classes = glob.glob(os.path.join(data_path, '**'))
print(len(classes))

# count suppose frame counts of each class
total_counts = 0
class_counts = []
for i in range(len(classes)):
png_files = glob.glob(os.path.join(classes[i], '*.png'))
print('class ' + str(i) + ': ' + str(len(png_files)))
class_counts.append(len(png_files))
total_counts += len(png_files)

filtered_counts = []
for i in range(len(classes)):
filtered_counts.append(int(float(NUM)*class_counts[i]/float(total_counts)))
print('filtered class ' + str(i) + ': ' + str(filtered_counts[i]))

def psnr(x1, x2):
MSE = np.mean(np.square(x1-x2))
MSE = np.maximum(MSE, 1e-10)
return 10 * np.log10(1 / MSE)



"""for training set"""
# calculate PSNR and sort
f0 = open('frame1.txt', 'w')
f1 = open('frame2.txt', 'w')
f2 = open('frame3.txt', 'w')
for i in range(len(classes)):
print('filtering... ' + str(i))
triplets_dict = []
png_files = glob.glob(os.path.join(classes[i], '*.png'))
png_files = sorted(png_files)
for j in range(1, len(png_files)-1):
idx = int(png_files[j][-8:-4])
if png_files[j-1] == (png_files[j][:-8] + str(idx-1).zfill(4) + '.png') \
and png_files[j+1] == (png_files[j][:-8] + str(idx+1).zfill(4) + '.png'):
img0 = cv2.imread(png_files[j-1]).astype(np.float32) / 255.0
img1 = cv2.imread(png_files[j]).astype(np.float32) / 255.0
img2 = cv2.imread(png_files[j+1]).astype(np.float32) / 255.0

psnr0 = psnr(img0, img1)
psnr1 = psnr(img1, img2)

triplets_dict.append((png_files[j], (psnr0 + psnr1) / 2.0))
# triplets_dict[png_files[i]] = (psnr0 + psnr1) / 2.0

triplets_dict = sorted(triplets_dict, key=lambda tup: tup[1])

print('class ' + str(i) + ', psnr threshold = ' + str(triplets_dict[filtered_counts[i]][1]))

for j in range(filtered_counts[i]):
idx = int(triplets_dict[j][0][-8:-4])
f0.write('./'+triplets_dict[j][0][:-8] + str(idx-1).zfill(4) + '.png' + '\n')
f1.write('./'+triplets_dict[j][0][:-8] + str(idx).zfill(4) + '.png' + '\n')
f2.write('./'+triplets_dict[j][0][:-8] + str(idx+1).zfill(4) + '.png' + '\n')

f0.close()
f1.close()
f2.close()
29 changes: 29 additions & 0 deletions extract_only.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
#! /bin/bash
set -e

# Extract frame triplets from the UCF-101 dataset
# (http://crcv.ucf.edu/data/UCF101.php). Run as follows:
#
# ./extract-ucf101.sh dir/file.avi
#
# Or, with parallel from moreutils, you can do it for all videos
# over many cores:
#
# parallel -j 12 ./extract-ucf101.sh ::: $( find -name \*.avi )
# //parallel -j 12 ./extract-ucf101.sh -- $( find -name \*.avi )
#
# but do note that this will produce ~250 GB of PNGs, probably many
# more frames than you actually would get to use for training
# and likely straining the file system with ~5M files.
#
# The script will create a set of frame files that you can easily combine
# and use for training:
#
# for N in 1 2 3; do echo $N; cat $( find -name \*_frame$N.txt | sort -u ) > ../frame$N.txt; done

FILE=$1
PREFIX=$( dirname $FILE )/$( basename $FILE | tr -c "a-zA-Z0-9_-" "_" )

echo $FILE

ffmpeg -loglevel error -i $FILE -vf scale=256:256 ${PREFIX}_%04d.png

0 comments on commit 144fcd6

Please sign in to comment.