Skip to content

Commit

Permalink
Fix metadata recognition fb2 files
Browse files Browse the repository at this point in the history
  • Loading branch information
OzzieIsaacs committed Mar 19, 2021
1 parent 8f5c649 commit 2760a78
Show file tree
Hide file tree
Showing 4 changed files with 59 additions and 183 deletions.
24 changes: 12 additions & 12 deletions cps/fb2.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,50 +30,50 @@ def get_fb2_info(tmp_file_path, original_file_extension):
}

fb2_file = open(tmp_file_path)
tree = etree.fromstring(fb2_file.read())
tree = etree.fromstring(fb2_file.read().encode())

authors = tree.xpath('/fb:FictionBook/fb:description/fb:title-info/fb:author', namespaces=ns)

def get_author(element):
last_name = element.xpath('fb:last-name/text()', namespaces=ns)
if len(last_name):
last_name = last_name[0].encode('utf-8')
last_name = last_name[0]
else:
last_name = u''
middle_name = element.xpath('fb:middle-name/text()', namespaces=ns)
if len(middle_name):
middle_name = middle_name[0].encode('utf-8')
middle_name = middle_name[0]
else:
middle_name = u''
first_name = element.xpath('fb:first-name/text()', namespaces=ns)
if len(first_name):
first_name = first_name[0].encode('utf-8')
first_name = first_name[0]
else:
first_name = u''
return (first_name.decode('utf-8') + u' '
+ middle_name.decode('utf-8') + u' '
+ last_name.decode('utf-8')).encode('utf-8')
return (first_name + u' '
+ middle_name + u' '
+ last_name)

author = str(", ".join(map(get_author, authors)))

title = tree.xpath('/fb:FictionBook/fb:description/fb:title-info/fb:book-title/text()', namespaces=ns)
if len(title):
title = str(title[0].encode('utf-8'))
title = str(title[0])
else:
title = u''
description = tree.xpath('/fb:FictionBook/fb:description/fb:publish-info/fb:book-name/text()', namespaces=ns)
if len(description):
description = str(description[0].encode('utf-8'))
description = str(description[0])
else:
description = u''

return BookMeta(
file_path=tmp_file_path,
extension=original_file_extension,
title=title.decode('utf-8'),
author=author.decode('utf-8'),
title=title,
author=author,
cover=None,
description=description.decode('utf-8'),
description=description,
tags="",
series="",
series_id="",
Expand Down
5 changes: 3 additions & 2 deletions cps/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -251,10 +251,11 @@ def start(self):
finally:
self.wsgiserver = None

# prevent irritating log of pending tasks message from asyncio
logger.get('asyncio').setLevel(logger.logging.CRITICAL)

if not self.restart:
log.info("Performing shutdown of Calibre-Web")
# prevent irritating log of pending tasks message from asyncio
logger.get('asyncio').setLevel(logger.logging.CRITICAL)
return True

log.info("Performing restart of Calibre-Web")
Expand Down
37 changes: 19 additions & 18 deletions cps/uploader.py
Original file line number Diff line number Diff line change
Expand Up @@ -214,7 +214,7 @@ def parse_xmp(pdf_file):
if xmp_info:
try:
xmp_author = xmp_info.dc_creator # list
except:
except AttributeError:
xmp_author = ['Unknown']

if xmp_info.dc_title:
Expand All @@ -228,20 +228,22 @@ def parse_xmp(pdf_file):
xmp_description = ''

languages = []
for i in xmp_info.dc_language:
#calibre-web currently only takes one language.
languages.append(isoLanguages.get_lang3(i))
try:
for i in xmp_info.dc_language:
languages.append(isoLanguages.get_lang3(i))
except AttributeError:
languages.append('')

xmp_tags = ', '.join(xmp_info.dc_subject)
xmp_publisher = ', '.join(xmp_info.dc_publisher)
xmp_languages = xmp_info.dc_language

return {'author': xmp_author,
'title': xmp_title,
'subject': xmp_description,
'tags': xmp_tags, 'languages': languages,
'publisher': xmp_publisher
}
'title': xmp_title,
'subject': xmp_description,
'tags': xmp_tags,
'languages': languages,
'publisher': xmp_publisher
}


def pdf_meta(tmp_file_path, original_file_name, original_file_extension):
Expand All @@ -250,8 +252,6 @@ def pdf_meta(tmp_file_path, original_file_name, original_file_extension):

if use_pdf_meta:
with open(tmp_file_path, 'rb') as f:
languages = [""]
publisher = ""
pdf_file = PdfFileReader(f)
doc_info = pdf_file.getDocumentInfo()
xmp_info = parse_xmp(pdf_file)
Expand All @@ -263,6 +263,13 @@ def pdf_meta(tmp_file_path, original_file_name, original_file_extension):
tags = xmp_info['tags']
languages = xmp_info['languages']
publisher = xmp_info['publisher']
else:
author = u'Unknown'
title = ''
languages = [""]
publisher = ""
subject = ""
tags = ""

if doc_info:
if author == '':
Expand All @@ -273,14 +280,8 @@ def pdf_meta(tmp_file_path, original_file_name, original_file_extension):
subject = doc_info.subject
if tags == '' and '/Keywords' in doc_info:
tags = doc_info['/Keywords']

else:
author= u'Unknown'
title = original_file_name
subject = ""
tags = ""
languages = [""]
publisher = ""

return BookMeta(
file_path=tmp_file_path,
Expand Down
Loading

0 comments on commit 2760a78

Please sign in to comment.