Fix metadata recognition fb2 files

AllieQpzm · Mar 19, 2021 · 2760a78 · 2760a78
1 parent 8f5c649
commit 2760a78
Show file tree

Hide file tree

Showing 4 changed files with 59 additions and 183 deletions.
diff --git a/cps/fb2.py b/cps/fb2.py
@@ -30,50 +30,50 @@ def get_fb2_info(tmp_file_path, original_file_extension):
     }
 
     fb2_file = open(tmp_file_path)
-    tree = etree.fromstring(fb2_file.read())
+    tree = etree.fromstring(fb2_file.read().encode())
 
     authors = tree.xpath('/fb:FictionBook/fb:description/fb:title-info/fb:author', namespaces=ns)
 
     def get_author(element):
         last_name = element.xpath('fb:last-name/text()', namespaces=ns)
         if len(last_name):
-            last_name = last_name[0].encode('utf-8')
+            last_name = last_name[0]
         else:
             last_name = u''
         middle_name = element.xpath('fb:middle-name/text()', namespaces=ns)
         if len(middle_name):
-            middle_name = middle_name[0].encode('utf-8')
+            middle_name = middle_name[0]
         else:
             middle_name = u''
         first_name = element.xpath('fb:first-name/text()', namespaces=ns)
         if len(first_name):
-            first_name = first_name[0].encode('utf-8')
+            first_name = first_name[0]
         else:
             first_name = u''
-        return (first_name.decode('utf-8') + u' '
-                + middle_name.decode('utf-8') + u' '
-                + last_name.decode('utf-8')).encode('utf-8')
+        return (first_name + u' '
+                + middle_name + u' '
+                + last_name)
 
     author = str(", ".join(map(get_author, authors)))
 
     title = tree.xpath('/fb:FictionBook/fb:description/fb:title-info/fb:book-title/text()', namespaces=ns)
     if len(title):
-        title = str(title[0].encode('utf-8'))
+        title = str(title[0])
     else:
         title = u''
     description = tree.xpath('/fb:FictionBook/fb:description/fb:publish-info/fb:book-name/text()', namespaces=ns)
     if len(description):
-        description = str(description[0].encode('utf-8'))
+        description = str(description[0])
     else:
         description = u''
 
     return BookMeta(
         file_path=tmp_file_path,
         extension=original_file_extension,
-        title=title.decode('utf-8'),
-        author=author.decode('utf-8'),
+        title=title,
+        author=author,
         cover=None,
-        description=description.decode('utf-8'),
+        description=description,
         tags="",
         series="",
         series_id="",

diff --git a/cps/server.py b/cps/server.py
@@ -251,10 +251,11 @@ def start(self):
         finally:
             self.wsgiserver = None
 
+        # prevent irritating log of pending tasks message from asyncio
+        logger.get('asyncio').setLevel(logger.logging.CRITICAL)
+
         if not self.restart:
             log.info("Performing shutdown of Calibre-Web")
-            # prevent irritating log of pending tasks message from asyncio
-            logger.get('asyncio').setLevel(logger.logging.CRITICAL)
             return True
 
         log.info("Performing restart of Calibre-Web")

diff --git a/cps/uploader.py b/cps/uploader.py
@@ -214,7 +214,7 @@ def parse_xmp(pdf_file):
     if xmp_info:
         try:
             xmp_author = xmp_info.dc_creator # list
-        except:
+        except AttributeError:
             xmp_author = ['Unknown']
 
         if xmp_info.dc_title: 
@@ -228,20 +228,22 @@ def parse_xmp(pdf_file):
             xmp_description = ''
 
         languages = []
-        for i in xmp_info.dc_language:
-            #calibre-web currently only takes one language.
-            languages.append(isoLanguages.get_lang3(i))
+        try:
+            for i in xmp_info.dc_language:
+                languages.append(isoLanguages.get_lang3(i))
+        except AttributeError:
+            languages.append('')
 
         xmp_tags = ', '.join(xmp_info.dc_subject)
         xmp_publisher = ', '.join(xmp_info.dc_publisher)
-        xmp_languages = xmp_info.dc_language
 
         return {'author': xmp_author,
-                    'title': xmp_title,
-                    'subject': xmp_description,
-                    'tags': xmp_tags, 'languages': languages,
-                    'publisher': xmp_publisher
-                    }
+                'title': xmp_title,
+                'subject': xmp_description,
+                'tags': xmp_tags,
+                'languages': languages,
+                'publisher': xmp_publisher
+                }
 
 
 def pdf_meta(tmp_file_path, original_file_name, original_file_extension):
@@ -250,8 +252,6 @@ def pdf_meta(tmp_file_path, original_file_name, original_file_extension):
 
     if use_pdf_meta:
         with open(tmp_file_path, 'rb') as f:
-            languages = [""]
-            publisher = ""
             pdf_file = PdfFileReader(f)
             doc_info = pdf_file.getDocumentInfo()
             xmp_info = parse_xmp(pdf_file)
@@ -263,6 +263,13 @@ def pdf_meta(tmp_file_path, original_file_name, original_file_extension):
         tags = xmp_info['tags']
         languages = xmp_info['languages']
         publisher = xmp_info['publisher']
+    else:
+        author = u'Unknown'
+        title = ''
+        languages = [""]
+        publisher = ""
+        subject = ""
+        tags = ""
 
     if doc_info:
         if author == '':
@@ -273,14 +280,8 @@ def pdf_meta(tmp_file_path, original_file_name, original_file_extension):
             subject = doc_info.subject
         if tags == '' and '/Keywords' in doc_info:
             tags = doc_info['/Keywords']
-
     else:
-        author= u'Unknown'
         title = original_file_name
-        subject = ""
-        tags = ""
-        languages = [""]
-        publisher = ""
 
     return BookMeta(
         file_path=tmp_file_path,