Skip to content

Commit

Permalink
Creating updates to code base to make this runnabel
Browse files Browse the repository at this point in the history
  • Loading branch information
ulmentflam committed Aug 11, 2017
1 parent a3e5c33 commit fc17818
Show file tree
Hide file tree
Showing 14 changed files with 84 additions and 1,811 deletions.
3 changes: 1 addition & 2 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -124,5 +124,4 @@ Icon
.AppleDesktop
Network Trash Folder
Temporary Items
.apdisk

.apdisk
75 changes: 15 additions & 60 deletions naughty_words/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,80 +50,35 @@ def filter(self, text: str,
class NaughtyWords(object):
def __init__(self,
preprocessors: Iterable[Type[Preprocessor]]=None,
filters: Iterable[Type[Filter]]=None):
filters: Iterable[Type[Filter]]=None,
profanities: Sequence[str] = None):

self._preprocessors = []
self._filters = []
self._context = {}
self._context = {'profanities': profanities}

for p in preprocessors:
assert isinstance(p, Preprocessor), "Attempted to register an invalid preprocessor"
self._preprocessors.append(p)

for f in filters:
assert isinstance(f, Filter), "Attempted to register an invalid filter"
self._filters.append(p)

def _algorithm(self, match_type='first'):
if self.naughty_words is None or self.text is None:
return None
if match_type is not 'first':
words = []

for word in self.naughty_words:
if self.dumb_string_match(word):
if match_type is not 'first':
words.append(word)
else:
return word
elif self.regex_string_match(word):
if match_type is not 'first':
words.append(word)
else:
return word

if match_type is not 'first':
return words if words is not [] else None
else:
return None
self._filters.append(f)

def run_filters(self, text, **kwargs):
cur_text = text
cur_context = {}
cur_context = self._context

for pre in self._preprocessors:
cur_text, cur_context = pre(cur_text, cur_context)
cur_text, cur_context = pre.process(cur_text, cur_context)

# TODO: Loop through, run all filters
pass

def dumb_string_match(self, word):
return word in self.text

def regex_string_match(self, word):
alpha_num_word = re.sub('\W', '', word)
pattern = self.profanity_expression(alpha_num_word)
return re.search(pattern, self.text)

@classmethod
def escaped_expression(self, characters, escaped_characters, quantifier='*?'):
re_expressions = escaped_characters
for character in characters:
re_expressions.append(re.escape(character))
return f"[{''.join(re_expressions)}]{quantifier}"

def profanity_expression(self, word):
separating_expression = self.escaped_expression('a-zA-Z', ['^'])
return r''.join(self.escaped_expression(character, [], '+?') + separating_expression for character in word)

def first_match(self):
self._preprocess()
return self._algorithm()

def all_matches(self):
self._preprocess()
return self._algorithm(match_type='all')

def has_profanity(self):
self._preprocess()
return self._algorithm() is not None
matches = []
for filter in self._filters:
if kwargs['only_first']:
match = filter.filter(cur_text, cur_context, **kwargs)
if match is not []:
return match
else:
matches.extend(filter.filter(cur_text, cur_context, **kwargs))
return matches if matches is not [] else None
21 changes: 21 additions & 0 deletions naughty_words/defaults.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
import pkgutil
from naughty_words import NaughtyWords
from naughty_words.preprocessors import EmptyPreprocessor
from naughty_words.filters import CommonSubstitutions

data = pkgutil.get_data(__name__, 'wordlists/profanities.txt')
profanity_list = data.decode('utf-8').split('\n')


def has_profanity(text, additional=None, blacklist=None, profanities=None):
if not profanities:
global profanity_list
profanities = profanity_list
if additional:
profanities = list(set(profanities).update(set(additional)))
if blacklist:
profanities = list(set(profanities).difference(set(blacklist)))
naughty_words = NaughtyWords(preprocessors=[EmptyPreprocessor()],
filters=[CommonSubstitutions()],
profanities=profanities)
return naughty_words.run_filters(text, only_first=True) is not None
3 changes: 2 additions & 1 deletion naughty_words/filters/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
from naughty_words.filters.common_substitutions import *
from naughty_words.filters.basic_string_match import *
from naughty_words.filters.common_substitutions import *
Empty file.
38 changes: 35 additions & 3 deletions naughty_words/filters/common_substitutions.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,45 @@
from naughty_words import NaughtyWords
import re
from naughty_words import Filter, ProfanityException
from naughty_words.utils.confusables import standard_character_substitutions, separating_characters


class CommonSubstitutions(NaughtyWords):
class CommonSubstitutions(Filter):

@classmethod
def escaped_expression(cls, characters, escaped_characters, quantifier='*?'):
re_expressions = escaped_characters
for character in characters:
re_expressions.append(re.escape(character))
return f"[{''.join(re_expressions)}]{quantifier}"

def profanity_expression(self, word):
expression = ''
separating_expression = self.escaped_expression(separating_characters, ['\s'])
for character in word:
expression = expression + self.escaped_expression(standard_character_substitutions[character], [], '+?') + separating_expression
try:
expression = expression + self.escaped_expression(standard_character_substitutions[character], [], '+?') + separating_expression
except KeyError:
expression = expression + self.escaped_expression(character, [], '+?') + separating_expression

return expression

def filter(self, text: str,
context: dict,
only_first: bool=True,
raise_on_match: bool= False):
profanities = context['profanities']
matches = []
for profanity in profanities:
# TODO add solidified case for stopping emoji
alpha_num_word = re.sub('\W', '', profanity)
pattern = self.profanity_expression(alpha_num_word)
if re.search(pattern, text):
if raise_on_match:
raise ProfanityException()
elif only_first:
return profanity
else:
matches.append(profanity)
return matches


1 change: 1 addition & 0 deletions naughty_words/preprocessors/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from naughty_words.preprocessors.empty_preprocessor import *
6 changes: 6 additions & 0 deletions naughty_words/preprocessors/empty_preprocessor.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
from naughty_words import Preprocessor


class EmptyPreprocessor(Preprocessor):
def process(self, text: str, context: dict):
return text, context
Submodule List-of-Dirty-Naughty-Obscene-and-Otherwise-Bad-Words deleted from a48bfe
Loading

0 comments on commit fc17818

Please sign in to comment.