diff --git a/.gitignore b/.gitignore index 9a604e4..a5fa65a 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ Configuration *.pyc +*.swp *~ diff --git a/extractedTextSplitter.py b/extractedTextSplitter.py index 1bab8bc..04612c1 100644 --- a/extractedTextSplitter.py +++ b/extractedTextSplitter.py @@ -553,7 +553,9 @@ def initMatchResults(self): """ Initialize matchesByType and allMatches to empty lists """ - self.matchesByType = { t: [] for t in self.regexTypes } + self.matchesByType = {} + for t in self.regexTypes: + self.matchesByType[t] = [] self.allMatches = [] # ---------------------------------- @@ -598,6 +600,8 @@ def getRegexTypes(self): return self.regexTypes # ----------------------- if __name__ == "__main__": # some ad hoc tests + print "Running ad hoc tests - modify these as needed" + if False: # TypedRegexMatcher tests PARA_BOUNDARY = '\n\n' regexDict = { @@ -654,7 +658,7 @@ def runSectionTest(sp, doc): "\n1234567890" + \ '\n' + SUPP_DATA_TAG + \ "\n1234567890" - doc = open('6114980.txt', 'r').read() + #doc = open('6114980.txt', 'r').read() sp = ExtTextSplitter(maxFraction=0.79, minFraction=.05) #print sp.getRegexMatcher().getRegexStr() runSectionTest(sp, doc)