diff --git a/tests/unit_tests.py b/tests/unit_tests.py index 9f5a536f..987aa694 100644 --- a/tests/unit_tests.py +++ b/tests/unit_tests.py @@ -293,6 +293,21 @@ def test_formatting(): my_document = html.fromstring('

Title

This here is in bold font.

') my_result = extract(my_document, output_format='txt', include_formatting=True, config=ZERO_CONFIG) assert my_result == '### Title\n**This here is in bold font.**' + # code sections + my_document = html.fromstring('

Title

Here is a code sample:

import trafilatura

') + my_result = extract(my_document, output_format='txt', include_formatting=True, config=ZERO_CONFIG) + assert my_result == """### Title +Here is a code sample: +`import trafilatura`""" + my_document = html.fromstring('

Title

Here is a code sample:

import trafilatura\ntrafilatura.extract("")

') + my_result = extract(my_document, output_format='txt', include_formatting=True, config=ZERO_CONFIG) + assert my_result == """### Title +Here is a code sample: +``` +import trafilatura +trafilatura.extract("") +```""" + # nested my_document = html.fromstring('

This here is in bold and italic font.

') my_result = extract(my_document, output_format='xml', include_formatting=True, config=ZERO_CONFIG) diff --git a/trafilatura/xml.py b/trafilatura/xml.py index 5a1814e5..3b577bb0 100644 --- a/trafilatura/xml.py +++ b/trafilatura/xml.py @@ -230,6 +230,11 @@ def replace_element_text(element, include_formatting): rend = element.get('rend') if rend in HI_FORMATTING: element.text = f'{HI_FORMATTING[rend]}{element.text}{HI_FORMATTING[rend]}' + elif element.tag == 'code': + if '\n' in element.text: + element.text = f'```\n{element.text}\n```' + else: + element.text = f'`{element.text}`' # handle links if element.tag == 'ref': if element.text is not None: