-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Creating updates to code base to make this runnabel
- Loading branch information
1 parent
a3e5c33
commit fc17818
Showing
14 changed files
with
84 additions
and
1,811 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -124,5 +124,4 @@ Icon | |
.AppleDesktop | ||
Network Trash Folder | ||
Temporary Items | ||
.apdisk | ||
|
||
.apdisk |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
import pkgutil | ||
from naughty_words import NaughtyWords | ||
from naughty_words.preprocessors import EmptyPreprocessor | ||
from naughty_words.filters import CommonSubstitutions | ||
|
||
data = pkgutil.get_data(__name__, 'wordlists/profanities.txt') | ||
profanity_list = data.decode('utf-8').split('\n') | ||
|
||
|
||
def has_profanity(text, additional=None, blacklist=None, profanities=None): | ||
if not profanities: | ||
global profanity_list | ||
profanities = profanity_list | ||
if additional: | ||
profanities = list(set(profanities).update(set(additional))) | ||
if blacklist: | ||
profanities = list(set(profanities).difference(set(blacklist))) | ||
naughty_words = NaughtyWords(preprocessors=[EmptyPreprocessor()], | ||
filters=[CommonSubstitutions()], | ||
profanities=profanities) | ||
return naughty_words.run_filters(text, only_first=True) is not None |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1,2 @@ | ||
from naughty_words.filters.common_substitutions import * | ||
from naughty_words.filters.basic_string_match import * | ||
from naughty_words.filters.common_substitutions import * |
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,13 +1,45 @@ | ||
from naughty_words import NaughtyWords | ||
import re | ||
from naughty_words import Filter, ProfanityException | ||
from naughty_words.utils.confusables import standard_character_substitutions, separating_characters | ||
|
||
|
||
class CommonSubstitutions(NaughtyWords): | ||
class CommonSubstitutions(Filter): | ||
|
||
@classmethod | ||
def escaped_expression(cls, characters, escaped_characters, quantifier='*?'): | ||
re_expressions = escaped_characters | ||
for character in characters: | ||
re_expressions.append(re.escape(character)) | ||
return f"[{''.join(re_expressions)}]{quantifier}" | ||
|
||
def profanity_expression(self, word): | ||
expression = '' | ||
separating_expression = self.escaped_expression(separating_characters, ['\s']) | ||
for character in word: | ||
expression = expression + self.escaped_expression(standard_character_substitutions[character], [], '+?') + separating_expression | ||
try: | ||
expression = expression + self.escaped_expression(standard_character_substitutions[character], [], '+?') + separating_expression | ||
except KeyError: | ||
expression = expression + self.escaped_expression(character, [], '+?') + separating_expression | ||
|
||
return expression | ||
|
||
def filter(self, text: str, | ||
context: dict, | ||
only_first: bool=True, | ||
raise_on_match: bool= False): | ||
profanities = context['profanities'] | ||
matches = [] | ||
for profanity in profanities: | ||
# TODO add solidified case for stopping emoji | ||
alpha_num_word = re.sub('\W', '', profanity) | ||
pattern = self.profanity_expression(alpha_num_word) | ||
if re.search(pattern, text): | ||
if raise_on_match: | ||
raise ProfanityException() | ||
elif only_first: | ||
return profanity | ||
else: | ||
matches.append(profanity) | ||
return matches | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
from naughty_words.preprocessors.empty_preprocessor import * |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
from naughty_words import Preprocessor | ||
|
||
|
||
class EmptyPreprocessor(Preprocessor): | ||
def process(self, text: str, context: dict): | ||
return text, context |
1 change: 0 additions & 1 deletion
1
naughty_words/profanity/List-of-Dirty-Naughty-Obscene-and-Otherwise-Bad-Words
Submodule List-of-Dirty-Naughty-Obscene-and-Otherwise-Bad-Words
deleted from
a48bfe
Oops, something went wrong.