-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcheck_assignment7_public.py
75 lines (69 loc) · 3.7 KB
/
check_assignment7_public.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
#!/usr/bin/python
"""CS 489 Big Data Infrastructure (Winter 2016): Self-check script
This file can be open to students
Usage:
run this file on 'bigdata2016w' repository with github-username
ex) check_assignment7_public.py [github-username]
"""
import sys
import os
from subprocess import call
import re
import argparse
# add prefix 'a' if github-username starts from a numeric character
def convertusername(u):
return re.sub(r'^(\d+.*)',r'a\1',u)
def check_a3(username,reducers):
"""Run assignment7 in Altiscale environment"""
call(["mvn","clean","package"])
print("Shakespeare:")
call(["hadoop","jar","target/bigdata2016w-0.1.0-SNAPSHOT.jar",
"ca.uwaterloo.cs.bigdata2016w.{0}.assignment7.BuildInvertedIndexHBase".format(username),
"-config", "/home/hbase-0.98.16-hadoop2/conf/hbase-site.xml",
"-input", "/shared/cs489/data/Shakespeare.txt",
"-table", "cs489-2016w-erahman-a7-index-shakespeare", "-reducers", str(reducers) ])
print("Question 1.")
call(["hadoop","jar","target/bigdata2016w-0.1.0-SNAPSHOT.jar",
"ca.uwaterloo.cs.bigdata2016w.{0}.assignment7.BooleanRetrievalHBase".format(username),
"-config", "/home/hbase-0.98.16-hadoop2/conf/hbase-site.xml",
"-table","cs489-2016w-erahman-a7-index-shakespeare",
"-collection","/shared/cs489/data/Shakespeare.txt",
"-query", "outrageous fortune AND"])
print("Question 2.")
call(["hadoop","jar","target/bigdata2016w-0.1.0-SNAPSHOT.jar",
"ca.uwaterloo.cs.bigdata2016w.{0}.assignment7.BooleanRetrievalHBase".format(username),
"-config", "/home/hbase-0.98.16-hadoop2/conf/hbase-site.xml",
"-table", "cs489-2016w-erahman-a7-index-shakespeare",
"-collection","/shared/cs489/data/Shakespeare.txt",
"-query", "white red OR rose AND pluck AND"])
print("Wiki")
call(["hadoop","jar","target/bigdata2016w-0.1.0-SNAPSHOT.jar",
"ca.uwaterloo.cs.bigdata2016w.{0}.assignment7.BuildInvertedIndexHBase".format(username),
"-input", "/shared/cs489/data/enwiki-20151201-pages-articles-0.1sample.txt",
"-config", "/home/hbase-0.98.16-hadoop2/conf/hbase-site.xml",
"-table", "cs489-2016w-erahman-a7-index-wiki", "-reducers", str(reducers) ])
print("Question 3.")
call(["hadoop","jar","target/bigdata2016w-0.1.0-SNAPSHOT.jar",
"ca.uwaterloo.cs.bigdata2016w.{0}.assignment7.BooleanRetrievalHBase".format(username),
"-table", "cs489-2016w-erahman-a7-index-wiki",
"-config", "/home/hbase-0.98.16-hadoop2/conf/hbase-site.xml",
"-collection", "/shared/cs489/data/enwiki-20151201-pages-articles-0.1sample.txt",
"-query", "waterloo stanford OR cheriton AND"])
print("Question 4.")
call(["hadoop","jar","target/bigdata2016w-0.1.0-SNAPSHOT.jar",
"ca.uwaterloo.cs.bigdata2016w.{0}.assignment7.BooleanRetrievalHBase".format(username),
"-config", "/home/hbase-0.98.16-hadoop2/conf/hbase-site.xml",
"-table", "cs489-2016w-erahman-a7-index-wiki",
"-collection", "/shared/cs489/data/enwiki-20151201-pages-articles-0.1sample.txt",
"-query", "internet startup AND canada AND ontario AND"])
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="CS 489/689 W15 A7 Public Test Script for Altiscale")
parser.add_argument('username',metavar='[Github Username]',
help="Github username to be used as package name",type=str)
parser.add_argument('-r','--reducers',help="Number of reducers to use.",type=int,default=1)
args=parser.parse_args()
try:
converted_userid = convertusername(args.username)
check_a3(converted_userid,args.reducers)
except Exception as e:
print(e)