Skip to content

Commit

Permalink
data/labels preprocessing script
Browse files Browse the repository at this point in the history
  • Loading branch information
abhimanyudubey committed Aug 12, 2016
1 parent b16c8b2 commit 33052f4
Showing 1 changed file with 54 additions and 0 deletions.
54 changes: 54 additions & 0 deletions code/data_preprocess_convert_votes_to_labels.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
import os,sys
import glob

if __name__=="__main__":

if(len(sys.argv)<3):
print "Usage python data_preprocess_convert_votes_to_labels.py <folder-name> <output-folder-name> <optional-file-name-prefix>"
else:
# converting the votes format to the label format required by caffe
dir_src = sys.argv[1]
dir_dst = sys.argv[2]
prefix_filename = ""
if(len(sys.argv)>3):
prefix_filename = sys.argv[3]

if not os.path.exists(dir_src):
print "Source directory not found, exiting"
else:
# source directory exists, continuing
print "Source directory located at ",dir_src
list_csv = glob.glob(os.path.join(dir_src,"*.csv"))
print len(list_csv),"files found at source"

if not os.path.exists(dir_dst):
os.makedirs(dir_dst)

for file_csv in list_csv:
print "Processing file ",file_csv
with open(file_csv,'r') as f:
fl = open(os.path.join(dir_dst,os.path.basename(file_csv)+".left"),'w')
fr = open(os.path.join(dir_dst,os.path.basename(file_csv)+".right"),'w')
next(f)
# skipping first line as it has only headers everytime
for line in f:
line = line.strip().split(",")
file_left = line[0]
file_right = line[1]
winner = line[2]
line_l = os.path.join(prefix_filename,file_left)
line_r = os.path.join(prefix_filename,file_right)
if winner == file_left:
# left file wins
line_l = line_l + " 1"
line_r = line_r + " 0"
else:
line_l = line_l + " 0"
line_r = line_r + " 1"
fl.write(line_l+"\n")
fr.write(line_r+"\n")
fl.close()
fr.close()



0 comments on commit 33052f4

Please sign in to comment.