Creating updates to code base to make this runnabel

52inc · Aug 11, 2017 · fc17818 · fc17818
1 parent a3e5c33
commit fc17818
Show file tree

Hide file tree

Showing 14 changed files with 84 additions and 1,811 deletions.
diff --git a/.gitignore b/.gitignore
@@ -124,5 +124,4 @@ Icon
 .AppleDesktop
 Network Trash Folder
 Temporary Items
-.apdisk
-
+.apdisk
diff --git a/naughty_words/__init__.py b/naughty_words/__init__.py
@@ -50,80 +50,35 @@ def filter(self, text: str,
 class NaughtyWords(object):
     def __init__(self,
                  preprocessors: Iterable[Type[Preprocessor]]=None,
-                 filters: Iterable[Type[Filter]]=None):
+                 filters: Iterable[Type[Filter]]=None,
+                 profanities: Sequence[str] = None):
 
         self._preprocessors = []
         self._filters = []
-        self._context = {}
+        self._context = {'profanities': profanities}
 
         for p in preprocessors:
             assert isinstance(p, Preprocessor), "Attempted to register an invalid preprocessor"
             self._preprocessors.append(p)
 
         for f in filters:
             assert isinstance(f, Filter), "Attempted to register an invalid filter"
-            self._filters.append(p)
-
-    def _algorithm(self, match_type='first'):
-        if self.naughty_words is None or self.text is None:
-            return None
-        if match_type is not 'first':
-            words = []
-
-        for word in self.naughty_words:
-            if self.dumb_string_match(word):
-                if match_type is not 'first':
-                    words.append(word)
-                else:
-                    return word
-            elif self.regex_string_match(word):
-                if match_type is not 'first':
-                    words.append(word)
-                else:
-                    return word
-
-        if match_type is not 'first':
-            return words if words is not [] else None
-        else:
-            return None
+            self._filters.append(f)
 
     def run_filters(self, text, **kwargs):
         cur_text = text
-        cur_context = {}
+        cur_context = self._context
 
         for pre in self._preprocessors:
-            cur_text, cur_context = pre(cur_text, cur_context)
+            cur_text, cur_context = pre.process(cur_text, cur_context)
 
         # TODO: Loop through, run all filters
-        pass
-
-    def dumb_string_match(self, word):
-        return word in self.text
-
-    def regex_string_match(self, word):
-        alpha_num_word = re.sub('\W', '', word)
-        pattern = self.profanity_expression(alpha_num_word)
-        return re.search(pattern, self.text)
-
-    @classmethod
-    def escaped_expression(self, characters, escaped_characters, quantifier='*?'):
-        re_expressions = escaped_characters
-        for character in characters:
-            re_expressions.append(re.escape(character))
-        return f"[{''.join(re_expressions)}]{quantifier}"
-
-    def profanity_expression(self, word):
-        separating_expression = self.escaped_expression('a-zA-Z', ['^'])
-        return r''.join(self.escaped_expression(character, [], '+?') + separating_expression for character in word)
-
-    def first_match(self):
-        self._preprocess()
-        return self._algorithm()
-
-    def all_matches(self):
-        self._preprocess()
-        return self._algorithm(match_type='all')
-
-    def has_profanity(self):
-        self._preprocess()
-        return self._algorithm() is not None
+        matches = []
+        for filter in self._filters:
+            if kwargs['only_first']:
+                match = filter.filter(cur_text, cur_context, **kwargs)
+                if match is not []:
+                    return match
+            else:
+                matches.extend(filter.filter(cur_text, cur_context, **kwargs))
+        return matches if matches is not [] else None
diff --git a/naughty_words/defaults.py b/naughty_words/defaults.py
@@ -0,0 +1,21 @@
+import pkgutil
+from naughty_words import NaughtyWords
+from naughty_words.preprocessors import EmptyPreprocessor
+from naughty_words.filters import CommonSubstitutions
+
+data = pkgutil.get_data(__name__, 'wordlists/profanities.txt')
+profanity_list = data.decode('utf-8').split('\n')
+
+
+def has_profanity(text, additional=None, blacklist=None, profanities=None):
+    if not profanities:
+        global profanity_list
+        profanities = profanity_list
+    if additional:
+        profanities = list(set(profanities).update(set(additional)))
+    if blacklist:
+        profanities = list(set(profanities).difference(set(blacklist)))
+    naughty_words = NaughtyWords(preprocessors=[EmptyPreprocessor()],
+                                 filters=[CommonSubstitutions()],
+                                 profanities=profanities)
+    return naughty_words.run_filters(text, only_first=True) is not None
diff --git a/naughty_words/filters/__init__.py b/naughty_words/filters/__init__.py
@@ -1 +1,2 @@
-from naughty_words.filters.common_substitutions import *
+from naughty_words.filters.basic_string_match import *
+from naughty_words.filters.common_substitutions import *
diff --git a/naughty_words/filters/basic_string_match.py b/naughty_words/filters/basic_string_match.py
diff --git a/naughty_words/filters/common_substitutions.py b/naughty_words/filters/common_substitutions.py
@@ -1,13 +1,45 @@
-from naughty_words import NaughtyWords
+import re
+from naughty_words import Filter, ProfanityException
 from naughty_words.utils.confusables import standard_character_substitutions, separating_characters
 
 
-class CommonSubstitutions(NaughtyWords):
+class CommonSubstitutions(Filter):
+
+    @classmethod
+    def escaped_expression(cls, characters, escaped_characters, quantifier='*?'):
+        re_expressions = escaped_characters
+        for character in characters:
+            re_expressions.append(re.escape(character))
+        return f"[{''.join(re_expressions)}]{quantifier}"
 
     def profanity_expression(self, word):
         expression = ''
         separating_expression = self.escaped_expression(separating_characters, ['\s'])
         for character in word:
-            expression = expression + self.escaped_expression(standard_character_substitutions[character], [], '+?') + separating_expression
+            try:
+                expression = expression + self.escaped_expression(standard_character_substitutions[character], [], '+?') + separating_expression
+            except KeyError:
+                expression = expression + self.escaped_expression(character, [], '+?') + separating_expression
+
         return expression
 
+    def filter(self, text: str,
+               context: dict,
+               only_first: bool=True,
+               raise_on_match: bool= False):
+        profanities = context['profanities']
+        matches = []
+        for profanity in profanities:
+            # TODO add solidified case for stopping emoji
+            alpha_num_word = re.sub('\W', '', profanity)
+            pattern = self.profanity_expression(alpha_num_word)
+            if re.search(pattern, text):
+                if raise_on_match:
+                    raise ProfanityException()
+                elif only_first:
+                    return profanity
+                else:
+                    matches.append(profanity)
+        return matches
+
+
diff --git a/naughty_words/preprocessors/__init__.py b/naughty_words/preprocessors/__init__.py
@@ -0,0 +1 @@
+from naughty_words.preprocessors.empty_preprocessor import *
diff --git a/naughty_words/preprocessors/empty_preprocessor.py b/naughty_words/preprocessors/empty_preprocessor.py
@@ -0,0 +1,6 @@
+from naughty_words import Preprocessor
+
+
+class EmptyPreprocessor(Preprocessor):
+    def process(self, text: str, context: dict):
+        return text, context
diff --git a/naughty_words/profanity/List-of-Dirty-Naughty-Obscene-and-Otherwise-Bad-Words b/naughty_words/profanity/List-of-Dirty-Naughty-Obscene-and-Otherwise-Bad-Words
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		from naughty_words.preprocessors.empty_preprocessor import *