Skip to content

Commit

Permalink
nit
Browse files Browse the repository at this point in the history
  • Loading branch information
guipenedo committed Dec 26, 2024
1 parent cd18c59 commit aae7e33
Showing 1 changed file with 3 additions and 4 deletions.
7 changes: 3 additions & 4 deletions src/datatrove/pipeline/extractors/resiliparse.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,16 +29,15 @@ class Resiliparse(BaseExtractor):
def __init__(
self,
preserve_formatting: bool = True,
main_content: bool = True,
main_content: bool = True, # default is false
list_bullets: bool = True,
alt_texts: bool = False,
links: bool = False,
links: bool = False, # default is true
form_fields: bool = False,
noscript: bool = False,
comments: bool = True,
comments: bool = None,
skip_elements: list = None,
timeout: float = 0.1,
**kwargs,
):
super().__init__(timeout)
self.preserve_formatting = preserve_formatting
Expand Down

0 comments on commit aae7e33

Please sign in to comment.