-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsyntax.py
150 lines (112 loc) · 4.57 KB
/
syntax.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Thu May 4 20:06:07 2017
@author: christopher
"""
from os.path import basename
from os.path import splitext
from re import compile as rcompile
def checkParenExpr(path):
collect = False
exprs = []
expr = ''
i = 0
while i < len(path):
c = path[i]
if c == '(':
collect = True
i += 1
continue
elif c == ')':
collect = False
exprs.append(expr)
expr = ''
if collect:
expr += c
i += 1
for e in exprs:
path = path.replace(e, e.replace('_', '-'))
return path
def extractStudentNamesFromPath (path):
return [x.split('(')[0].replace('-', ' ') for x in basename(path).split('.')[0].split('_')[2:]]
class SubmissionSyntaxCorrector(object):
def __init__ (self, tutLastname, sheetNr):
self.__tutLastname = tutLastname
self.__sheetNr = sheetNr
self.__exts = ['zip', 'tar', 'tar.gz', 'rar']
pat = self.__tutLastname.upper()\
+ '_Blatt' + str(sheetNr).zfill(2)\
+ '_(.+-.+(\(.+-.+\))*)+'\
+ '\.(zip|tar\.gz|tar|rar)'
self.__pattern = rcompile(pat)
def isCorrect (self, path):
if self.__pattern.match(basename(path)):
return True
else:
return False
def isSupportedArchive (self, path):
return splitext(path)[1][1:] in self.__exts
def filterPaths (self, paths):
return ([x for x in paths if self.isCorrect(x)], [x for x in paths if not self.isCorrect(x)])
def autocorrect (self, path, foldManager):
path = basename(path)
if self.isCorrect(path):
return (path, True)
# if extension is not supported there is no hope for auto-correct
extension = splitext(path)[1]
if extension[1:] not in self.__exts:
return (path, False)
# remove spaces
if ' ' in path:
path = path.replace(' ', '_').replace(',', '_')
parts = checkParenExpr(path[:-len(extension)]).split('_')
students = []
namepat = rcompile('(.+-.+(\(.+-.+\))*)+')
for part in parts:
# a part containing a number -> skip
if any(char.isdigit() for char in part):
continue
# caps segment will be ignored
elif part.isupper():
continue
# correct name part
elif namepat.match(part):
students.append(part)
# part in CamelCase
elif len([i for i, c in enumerate(part) if c.isupper()]) > 1:
indi = [i for i, c in enumerate(part) if c.isupper()][1:]
tmp = []
for ind in indi:
l = part[:ind]
r = part[ind:]
if l not in tmp:
tmp.append(l)
if r not in tmp:
tmp.append(r)
name = ' '.join(tmp)
res = foldManager.findStudentByName(name, status = 'Local;Imported')
if len(res) == 1:
students.append(res[0]['Name'].replace(' ', '-'))
elif len(res) > 1:
continue
elif len(res) == 0 and len(name) > 6:
return (path, False)
# try to check if its a known name part
else:
res = foldManager.findStudentByName(part.capitalize(), status = 'Local;Imported')
if len(res) == 1:
tmp = res[0]['Name'].replace(' ', '-')
if tmp not in students:
students.append(tmp)
elif len(res) > 1: # name part thats not unambiguously
return (path, False)
# no students
if len(students) == 0:
return (path, False)
students = sorted(students, key = lambda x: x.split('-')[0])
npath = self.__tutLastname.upper() + '_Blatt' + str(self.__sheetNr).zfill(2)
for student in students:
npath += '_' + student
npath += extension
return (npath, True)