-
Notifications
You must be signed in to change notification settings - Fork 26
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
95701e3
commit 144fcd6
Showing
3 changed files
with
179 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,80 @@ | ||
""" | ||
After extracting the RAR, we run this to move all the files into | ||
the appropriate train/test folders. | ||
Should only run this file once! | ||
""" | ||
import os | ||
import os.path | ||
|
||
def get_train_test_lists(version='01'): | ||
""" | ||
Using one of the train/test files (01, 02, or 03), get the filename | ||
breakdowns we'll later use to move everything. | ||
""" | ||
# Get our files based on version. | ||
test_file = os.path.join('ucfTrainTestlist', 'testlist' + version + '.txt') | ||
train_file = os.path.join('ucfTrainTestlist', 'trainlist' + version + '.txt') | ||
|
||
# Build the test list. | ||
with open(test_file) as fin: | ||
test_list = ['UCF101/'+row.strip() for row in list(fin)] | ||
|
||
# Build the train list. Extra step to remove the class index. | ||
with open(train_file) as fin: | ||
train_list = ['UCF101/'+row.split()[0] for row in list(fin)] | ||
|
||
# Set the groups in a dictionary. | ||
file_groups = { | ||
'UCF101_train': train_list, | ||
'UCF101_test': test_list | ||
} | ||
|
||
return file_groups | ||
|
||
def move_files(file_groups): | ||
"""This assumes all of our files are currently in _this_ directory. | ||
So move them to the appropriate spot. Only needs to happen once. | ||
""" | ||
# Do each of our groups. | ||
for group, videos in file_groups.items(): | ||
|
||
# Do each of our videos. | ||
for video in videos: | ||
|
||
# Get the parts. | ||
parts = video.split(os.path.sep) | ||
datasetname = parts[0] | ||
classname = parts[1] | ||
filename = parts[2] | ||
|
||
# Check if this class exists in UCF101_train/test | ||
if not os.path.exists(os.path.join(group, classname)): | ||
print("Creating folder for %s/%s" % (group, classname)) | ||
os.makedirs(os.path.join(group, classname)) | ||
|
||
# Check if we have already moved this file, or at least that it | ||
# exists to move. | ||
if not os.path.exists(os.path.join('UCF101',classname,filename)): | ||
print("Can't find %s to move. Skipping." % (filename)) | ||
continue | ||
|
||
# Move it. | ||
dest = os.path.join(group, classname, filename) | ||
print("Moving %s to %s" % (os.path.join('UCF101',filename), dest)) | ||
os.rename(os.path.join('UCF101',classname,filename), dest) | ||
|
||
print("Done.") | ||
|
||
def main(): | ||
""" | ||
Go through each of our train/test text files and move the videos | ||
to the right place. | ||
""" | ||
# Get the videos in groups so we can move them. | ||
group_lists = get_train_test_lists() | ||
|
||
# Move the files. | ||
move_files(group_lists) | ||
|
||
if __name__ == '__main__': | ||
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,70 @@ | ||
import glob | ||
import os | ||
import numpy as np | ||
import cv2 | ||
import operator | ||
|
||
data_path = 'UCF101_train' | ||
NUM = 276918 | ||
|
||
classes = glob.glob(os.path.join(data_path, '**')) | ||
print(len(classes)) | ||
|
||
# count suppose frame counts of each class | ||
total_counts = 0 | ||
class_counts = [] | ||
for i in range(len(classes)): | ||
png_files = glob.glob(os.path.join(classes[i], '*.png')) | ||
print('class ' + str(i) + ': ' + str(len(png_files))) | ||
class_counts.append(len(png_files)) | ||
total_counts += len(png_files) | ||
|
||
filtered_counts = [] | ||
for i in range(len(classes)): | ||
filtered_counts.append(int(float(NUM)*class_counts[i]/float(total_counts))) | ||
print('filtered class ' + str(i) + ': ' + str(filtered_counts[i])) | ||
|
||
def psnr(x1, x2): | ||
MSE = np.mean(np.square(x1-x2)) | ||
MSE = np.maximum(MSE, 1e-10) | ||
return 10 * np.log10(1 / MSE) | ||
|
||
|
||
|
||
"""for training set""" | ||
# calculate PSNR and sort | ||
f0 = open('frame1.txt', 'w') | ||
f1 = open('frame2.txt', 'w') | ||
f2 = open('frame3.txt', 'w') | ||
for i in range(len(classes)): | ||
print('filtering... ' + str(i)) | ||
triplets_dict = [] | ||
png_files = glob.glob(os.path.join(classes[i], '*.png')) | ||
png_files = sorted(png_files) | ||
for j in range(1, len(png_files)-1): | ||
idx = int(png_files[j][-8:-4]) | ||
if png_files[j-1] == (png_files[j][:-8] + str(idx-1).zfill(4) + '.png') \ | ||
and png_files[j+1] == (png_files[j][:-8] + str(idx+1).zfill(4) + '.png'): | ||
img0 = cv2.imread(png_files[j-1]).astype(np.float32) / 255.0 | ||
img1 = cv2.imread(png_files[j]).astype(np.float32) / 255.0 | ||
img2 = cv2.imread(png_files[j+1]).astype(np.float32) / 255.0 | ||
|
||
psnr0 = psnr(img0, img1) | ||
psnr1 = psnr(img1, img2) | ||
|
||
triplets_dict.append((png_files[j], (psnr0 + psnr1) / 2.0)) | ||
# triplets_dict[png_files[i]] = (psnr0 + psnr1) / 2.0 | ||
|
||
triplets_dict = sorted(triplets_dict, key=lambda tup: tup[1]) | ||
|
||
print('class ' + str(i) + ', psnr threshold = ' + str(triplets_dict[filtered_counts[i]][1])) | ||
|
||
for j in range(filtered_counts[i]): | ||
idx = int(triplets_dict[j][0][-8:-4]) | ||
f0.write('./'+triplets_dict[j][0][:-8] + str(idx-1).zfill(4) + '.png' + '\n') | ||
f1.write('./'+triplets_dict[j][0][:-8] + str(idx).zfill(4) + '.png' + '\n') | ||
f2.write('./'+triplets_dict[j][0][:-8] + str(idx+1).zfill(4) + '.png' + '\n') | ||
|
||
f0.close() | ||
f1.close() | ||
f2.close() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
#! /bin/bash | ||
set -e | ||
|
||
# Extract frame triplets from the UCF-101 dataset | ||
# (http://crcv.ucf.edu/data/UCF101.php). Run as follows: | ||
# | ||
# ./extract-ucf101.sh dir/file.avi | ||
# | ||
# Or, with parallel from moreutils, you can do it for all videos | ||
# over many cores: | ||
# | ||
# parallel -j 12 ./extract-ucf101.sh ::: $( find -name \*.avi ) | ||
# //parallel -j 12 ./extract-ucf101.sh -- $( find -name \*.avi ) | ||
# | ||
# but do note that this will produce ~250 GB of PNGs, probably many | ||
# more frames than you actually would get to use for training | ||
# and likely straining the file system with ~5M files. | ||
# | ||
# The script will create a set of frame files that you can easily combine | ||
# and use for training: | ||
# | ||
# for N in 1 2 3; do echo $N; cat $( find -name \*_frame$N.txt | sort -u ) > ../frame$N.txt; done | ||
|
||
FILE=$1 | ||
PREFIX=$( dirname $FILE )/$( basename $FILE | tr -c "a-zA-Z0-9_-" "_" ) | ||
|
||
echo $FILE | ||
|
||
ffmpeg -loglevel error -i $FILE -vf scale=256:256 ${PREFIX}_%04d.png |