-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy path__init__.py
79 lines (58 loc) · 2.37 KB
/
__init__.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
import re
import collections
from typing import Optional, Iterable, Tuple, Type, Sequence
class ProfanityException(Exception):
pass
class Preprocessor(object):
"""
Abstract preprocessor for text
"""
def process(self, text: str, context: dict) -> Tuple[str, dict]:
"""
Preprocess text (substitutions, duplicates, etc.)
:param text: text to be processed
:param context: additional context to be passed to processors in the chain
:return: tuple containing the processed text and updated context object
"""
raise NotImplementedError()
class Filter(object):
"""
Abstract filter for naughty words
"""
def filter(self, text: str,
context: dict,
only_first: bool=True,
raise_on_match: bool= False) -> Optional[Sequence[str]]:
"""
Filter text, returning a list of profanity discovered
:param text: text to process
:param context: context dictionary from preprocessors or other filters
:param only_first: whether to stop matching on the first detected profanity
:param raise_on_match: whether to raise a ProfanityException on detection of profanity
:return: list of profane words discovered
"""
raise NotImplementedError()
class NaughtyWords(object):
def __init__(self,
preprocessors: Iterable[Type[Preprocessor]]=None,
filters: Iterable[Type[Filter]]=None,
profanities: Sequence[str] = None):
self._preprocessors = []
self._filters = []
self._context = {'profanities': profanities}
for p in preprocessors:
assert isinstance(p, Preprocessor), "Attempted to register an invalid preprocessor"
self._preprocessors.append(p)
for f in filters:
assert isinstance(f, Filter), "Attempted to register an invalid filter"
self._filters.append(f)
def run_filters(self, text, **kwargs):
cur_text = text
cur_context = self._context
for pre in self._preprocessors:
cur_text, cur_context = pre.process(cur_text, cur_context)
# TODO: Loop through, run all filters
matches = set()
for filter in self._filters:
matches = matches.union(set(filter.filter(cur_text, cur_context, **kwargs)))
return matches