From 2760a7816de90e838d0122210b9c3195dd1c7a4d Mon Sep 17 00:00:00 2001 From: Ozzie Isaacs Date: Fri, 19 Mar 2021 15:26:41 +0100 Subject: [PATCH] Fix metadata recognition fb2 files --- cps/fb2.py | 24 ++-- cps/server.py | 5 +- cps/uploader.py | 37 ++--- test/Calibre-Web TestSummary_Linux.html | 176 ++++-------------------- 4 files changed, 59 insertions(+), 183 deletions(-) diff --git a/cps/fb2.py b/cps/fb2.py index 59df19ba..d7b03d5b 100644 --- a/cps/fb2.py +++ b/cps/fb2.py @@ -30,50 +30,50 @@ def get_fb2_info(tmp_file_path, original_file_extension): } fb2_file = open(tmp_file_path) - tree = etree.fromstring(fb2_file.read()) + tree = etree.fromstring(fb2_file.read().encode()) authors = tree.xpath('/fb:FictionBook/fb:description/fb:title-info/fb:author', namespaces=ns) def get_author(element): last_name = element.xpath('fb:last-name/text()', namespaces=ns) if len(last_name): - last_name = last_name[0].encode('utf-8') + last_name = last_name[0] else: last_name = u'' middle_name = element.xpath('fb:middle-name/text()', namespaces=ns) if len(middle_name): - middle_name = middle_name[0].encode('utf-8') + middle_name = middle_name[0] else: middle_name = u'' first_name = element.xpath('fb:first-name/text()', namespaces=ns) if len(first_name): - first_name = first_name[0].encode('utf-8') + first_name = first_name[0] else: first_name = u'' - return (first_name.decode('utf-8') + u' ' - + middle_name.decode('utf-8') + u' ' - + last_name.decode('utf-8')).encode('utf-8') + return (first_name + u' ' + + middle_name + u' ' + + last_name) author = str(", ".join(map(get_author, authors))) title = tree.xpath('/fb:FictionBook/fb:description/fb:title-info/fb:book-title/text()', namespaces=ns) if len(title): - title = str(title[0].encode('utf-8')) + title = str(title[0]) else: title = u'' description = tree.xpath('/fb:FictionBook/fb:description/fb:publish-info/fb:book-name/text()', namespaces=ns) if len(description): - description = str(description[0].encode('utf-8')) + description = str(description[0]) else: description = u'' return BookMeta( file_path=tmp_file_path, extension=original_file_extension, - title=title.decode('utf-8'), - author=author.decode('utf-8'), + title=title, + author=author, cover=None, - description=description.decode('utf-8'), + description=description, tags="", series="", series_id="", diff --git a/cps/server.py b/cps/server.py index a96858c9..9b79f77d 100644 --- a/cps/server.py +++ b/cps/server.py @@ -251,10 +251,11 @@ class WebServer(object): finally: self.wsgiserver = None + # prevent irritating log of pending tasks message from asyncio + logger.get('asyncio').setLevel(logger.logging.CRITICAL) + if not self.restart: log.info("Performing shutdown of Calibre-Web") - # prevent irritating log of pending tasks message from asyncio - logger.get('asyncio').setLevel(logger.logging.CRITICAL) return True log.info("Performing restart of Calibre-Web") diff --git a/cps/uploader.py b/cps/uploader.py index a4fe8453..9257c3a9 100644 --- a/cps/uploader.py +++ b/cps/uploader.py @@ -214,7 +214,7 @@ def parse_xmp(pdf_file): if xmp_info: try: xmp_author = xmp_info.dc_creator # list - except: + except AttributeError: xmp_author = ['Unknown'] if xmp_info.dc_title: @@ -228,20 +228,22 @@ def parse_xmp(pdf_file): xmp_description = '' languages = [] - for i in xmp_info.dc_language: - #calibre-web currently only takes one language. - languages.append(isoLanguages.get_lang3(i)) + try: + for i in xmp_info.dc_language: + languages.append(isoLanguages.get_lang3(i)) + except AttributeError: + languages.append('') xmp_tags = ', '.join(xmp_info.dc_subject) xmp_publisher = ', '.join(xmp_info.dc_publisher) - xmp_languages = xmp_info.dc_language return {'author': xmp_author, - 'title': xmp_title, - 'subject': xmp_description, - 'tags': xmp_tags, 'languages': languages, - 'publisher': xmp_publisher - } + 'title': xmp_title, + 'subject': xmp_description, + 'tags': xmp_tags, + 'languages': languages, + 'publisher': xmp_publisher + } def pdf_meta(tmp_file_path, original_file_name, original_file_extension): @@ -250,8 +252,6 @@ def pdf_meta(tmp_file_path, original_file_name, original_file_extension): if use_pdf_meta: with open(tmp_file_path, 'rb') as f: - languages = [""] - publisher = "" pdf_file = PdfFileReader(f) doc_info = pdf_file.getDocumentInfo() xmp_info = parse_xmp(pdf_file) @@ -263,6 +263,13 @@ def pdf_meta(tmp_file_path, original_file_name, original_file_extension): tags = xmp_info['tags'] languages = xmp_info['languages'] publisher = xmp_info['publisher'] + else: + author = u'Unknown' + title = '' + languages = [""] + publisher = "" + subject = "" + tags = "" if doc_info: if author == '': @@ -273,14 +280,8 @@ def pdf_meta(tmp_file_path, original_file_name, original_file_extension): subject = doc_info.subject if tags == '' and '/Keywords' in doc_info: tags = doc_info['/Keywords'] - else: - author= u'Unknown' title = original_file_name - subject = "" - tags = "" - languages = [""] - publisher = "" return BookMeta( file_path=tmp_file_path, diff --git a/test/Calibre-Web TestSummary_Linux.html b/test/Calibre-Web TestSummary_Linux.html index c2467a1b..907b044b 100644 --- a/test/Calibre-Web TestSummary_Linux.html +++ b/test/Calibre-Web TestSummary_Linux.html @@ -37,20 +37,20 @@
-

Start Time: 2021-03-19 06:57:32

+

Start Time: 2021-03-19 16:22:43

-

Stop Time: 2021-03-19 09:30:30

+

Stop Time: 2021-03-19 18:56:39

-

Duration: 2h 4 min

+

Duration: 2h 5 min

@@ -802,11 +802,11 @@ - + TestEditBooks 33 - 26 - 5 + 31 + 0 0 2 @@ -870,151 +870,47 @@ - +
TestEditBooks - test_edit_custom_float
- -
- FAIL -
- - - - + PASS - +
TestEditBooks - test_edit_custom_int
- -
- FAIL -
- - - - + PASS - +
TestEditBooks - test_edit_custom_rating
- -
- FAIL -
- - - - + PASS - +
TestEditBooks - test_edit_custom_single_select
- -
- FAIL -
- - - - + PASS - +
TestEditBooks - test_edit_custom_text
- -
- FAIL -
- - - - + PASS @@ -1699,12 +1595,12 @@ AssertionError: 'Lulu 人 Ä' != '-2.5' - + TestKoboSync 9 - 8 + 9 + 0 0 - 1 0 Detail @@ -1713,33 +1609,11 @@ AssertionError: 'Lulu 人 Ä' != '-2.5' - +
TestKoboSync - test_book_download
- -
- ERROR -
- - - - + PASS @@ -3529,9 +3403,9 @@ IndexError: list index out of range Total 300 - 287 - 5 - 1 + 293 + 0 + 0 7   @@ -3902,7 +3776,7 @@ IndexError: list index out of range