forked from PenguinRage/Words_of_Similarity
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmyautomata.py
39 lines (28 loc) · 768 Bytes
/
myautomata.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
import automata
import bisect
import random
import json
class Matcher(object):
def __init__(self, l):
self.l = l
self.probes = 0
def __call__(self, w):
self.probes += 1
pos = bisect.bisect_left(self.l, w)
if pos < len(self.l):
return self.l[pos]
else:
return None
f = open('known.json')
known = json.loads(f.read())
words = [x.strip().lower() for x in open('wordsEn.txt')]
m = Matcher(words)
results = []
for k,v in known.iteritems():
results.append(list(automata.find_all_matches(k,v,m)))
numresults = len(results)
common_set = set(results[0])
for i in range(numresults):
common_set.intersection_update(set(results[i]))
print sorted(common_set)
print len(common_set)