Skip to content

Commit

Permalink
Common word filters are a go
Browse files Browse the repository at this point in the history
  • Loading branch information
ulmentflam committed Aug 11, 2017
1 parent fc17818 commit 3dfbee0
Show file tree
Hide file tree
Showing 5 changed files with 31 additions and 12 deletions.
11 changes: 3 additions & 8 deletions naughty_words/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,12 +73,7 @@ def run_filters(self, text, **kwargs):
cur_text, cur_context = pre.process(cur_text, cur_context)

# TODO: Loop through, run all filters
matches = []
matches = set()
for filter in self._filters:
if kwargs['only_first']:
match = filter.filter(cur_text, cur_context, **kwargs)
if match is not []:
return match
else:
matches.extend(filter.filter(cur_text, cur_context, **kwargs))
return matches if matches is not [] else None
matches = matches.union(set(filter.filter(cur_text, cur_context, **kwargs)))
return matches
16 changes: 15 additions & 1 deletion naughty_words/defaults.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,4 +18,18 @@ def has_profanity(text, additional=None, blacklist=None, profanities=None):
naughty_words = NaughtyWords(preprocessors=[EmptyPreprocessor()],
filters=[CommonSubstitutions()],
profanities=profanities)
return naughty_words.run_filters(text, only_first=True) is not None
return len(naughty_words.run_filters(text, only_first=True)) != 0


def get_all_profanity(text, additional=None, blacklist=None, profanities=None):
if not profanities:
global profanity_list
profanities = profanity_list
if additional:
profanities = list(set(profanities).update(set(additional)))
if blacklist:
profanities = list(set(profanities).difference(set(blacklist)))
naughty_words = NaughtyWords(preprocessors=[EmptyPreprocessor()],
filters=[CommonSubstitutions()],
profanities=profanities)
return naughty_words.run_filters(text, only_first=False)
5 changes: 4 additions & 1 deletion naughty_words/filters/common_substitutions.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,12 +32,15 @@ def filter(self, text: str,
for profanity in profanities:
# TODO add solidified case for stopping emoji
alpha_num_word = re.sub('\W', '', profanity)
if alpha_num_word is '':
continue
pattern = self.profanity_expression(alpha_num_word)
if re.search(pattern, text):
if raise_on_match:
raise ProfanityException()
elif only_first:
return profanity
matches.append(profanity)
return matches
else:
matches.append(profanity)
return matches
Expand Down
3 changes: 2 additions & 1 deletion naughty_words/wordlists/profanities.txt
Original file line number Diff line number Diff line change
Expand Up @@ -1602,4 +1602,5 @@ pindick
mutha fukkah
niggardliness
fister
beaner
beaner
🖕
8 changes: 7 additions & 1 deletion tests/test_filters.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,11 @@

from naughty_words.defaults import has_profanity


def test_default_filter_no_match():
assert has_profanity("No profanity here") is False
assert has_profanity("No ducking profanity here") is False


def test_default_filter_has_match():
assert has_profanity("You fucking know there's profanity here") is True

0 comments on commit 3dfbee0

Please sign in to comment.