From 74d16447bcb95dc44d8c5c07b12779c7a2b99622 Mon Sep 17 00:00:00 2001 From: p0n1 Date: Fri, 28 Jun 2024 17:51:37 +0800 Subject: [PATCH] fix: Ignore ncx and use lxml-xml option to suppress warning --- audiobook_generator/book_parsers/epub_book_parser.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/audiobook_generator/book_parsers/epub_book_parser.py b/audiobook_generator/book_parsers/epub_book_parser.py index 9e14072..abc6712 100644 --- a/audiobook_generator/book_parsers/epub_book_parser.py +++ b/audiobook_generator/book_parsers/epub_book_parser.py @@ -16,7 +16,7 @@ class EpubBookParser(BaseBookParser): def __init__(self, config: GeneralConfig): super().__init__(config) logger.setLevel(config.log) - self.book = epub.read_epub(self.config.input_file) + self.book = epub.read_epub(self.config.input_file, {"ignore_ncx": True}) def __str__(self) -> str: return super().__str__() @@ -44,7 +44,7 @@ def get_chapters(self, break_string) -> List[Tuple[str, str]]: chapters = [] for item in self.book.get_items_of_type(ebooklib.ITEM_DOCUMENT): content = item.get_content() - soup = BeautifulSoup(content, "lxml") + soup = BeautifulSoup(content, "lxml-xml") raw = soup.get_text(strip=False) logger.debug(f"Raw text: <{raw[:]}>")