diff --git a/cps/comic.py b/cps/comic.py index c2b30197..c1f1fd63 100644 --- a/cps/comic.py +++ b/cps/comic.py @@ -154,7 +154,8 @@ def get_comic_info(tmp_file_path, original_file_name, original_file_extension, r tags="", series=loadedMetadata.series or "", series_id=loadedMetadata.issue or "", - languages=loadedMetadata.language) + languages=loadedMetadata.language, + publisher="") return BookMeta( file_path=tmp_file_path, @@ -166,4 +167,5 @@ def get_comic_info(tmp_file_path, original_file_name, original_file_extension, r tags="", series="", series_id="", - languages="") + languages="", + publisher="") diff --git a/cps/constants.py b/cps/constants.py index bcb5d0c5..e9c26cb1 100644 --- a/cps/constants.py +++ b/cps/constants.py @@ -152,7 +152,7 @@ def selected_roles(dictionary): # :rtype: BookMeta BookMeta = namedtuple('BookMeta', 'file_path, extension, title, author, cover, description, tags, series, ' - 'series_id, languages') + 'series_id, languages, publisher') STABLE_VERSION = {'version': '0.6.12 Beta'} diff --git a/cps/editbooks.py b/cps/editbooks.py index 42bda734..28cad5c5 100644 --- a/cps/editbooks.py +++ b/cps/editbooks.py @@ -444,10 +444,10 @@ def edit_book_languages(languages, book, upload=False): return modify_database_object(input_l, book.languages, db.Languages, calibre_db.session, 'languages') -def edit_book_publisher(to_save, book): +def edit_book_publisher(publishers, book): changed = False - if to_save["publisher"]: - publisher = to_save["publisher"].rstrip().strip() + if publishers: + publisher = publishers.rstrip().strip() if len(book.publishers) == 0 or (len(book.publishers) > 0 and publisher != book.publishers[0].name): changed |= modify_database_object([publisher], book.publishers, db.Publishers, calibre_db.session, 'publisher') @@ -740,7 +740,7 @@ def edit_book(book_id): book.pubdate = db.Books.DEFAULT_PUBDATE # handle book publisher - modif_date |= edit_book_publisher(to_save, book) + modif_date |= edit_book_publisher(to_save['publisher'], book) # handle book languages modif_date |= edit_book_languages(to_save['languages'], book) @@ -867,6 +867,9 @@ def create_book_on_upload(modif_date, meta): # handle tags modif_date |= edit_book_tags(meta.tags, db_book) + # handle publisher + modif_date |= edit_book_publisher(meta.publisher, db_book) + # handle series modif_date |= edit_book_series(meta.series, db_book) diff --git a/cps/epub.py b/cps/epub.py index 583e4eda..5833c2aa 100644 --- a/cps/epub.py +++ b/cps/epub.py @@ -142,4 +142,5 @@ def get_epub_info(tmp_file_path, original_file_name, original_file_extension): tags=epub_metadata['subject'].encode('utf-8').decode('utf-8'), series=epub_metadata['series'].encode('utf-8').decode('utf-8'), series_id=epub_metadata['series_id'].encode('utf-8').decode('utf-8'), - languages=epub_metadata['language']) + languages=epub_metadata['language'], + publisher="") diff --git a/cps/fb2.py b/cps/fb2.py index bdb3d1d5..d7b03d5b 100644 --- a/cps/fb2.py +++ b/cps/fb2.py @@ -30,51 +30,52 @@ def get_fb2_info(tmp_file_path, original_file_extension): } fb2_file = open(tmp_file_path) - tree = etree.fromstring(fb2_file.read()) + tree = etree.fromstring(fb2_file.read().encode()) authors = tree.xpath('/fb:FictionBook/fb:description/fb:title-info/fb:author', namespaces=ns) def get_author(element): last_name = element.xpath('fb:last-name/text()', namespaces=ns) if len(last_name): - last_name = last_name[0].encode('utf-8') + last_name = last_name[0] else: last_name = u'' middle_name = element.xpath('fb:middle-name/text()', namespaces=ns) if len(middle_name): - middle_name = middle_name[0].encode('utf-8') + middle_name = middle_name[0] else: middle_name = u'' first_name = element.xpath('fb:first-name/text()', namespaces=ns) if len(first_name): - first_name = first_name[0].encode('utf-8') + first_name = first_name[0] else: first_name = u'' - return (first_name.decode('utf-8') + u' ' - + middle_name.decode('utf-8') + u' ' - + last_name.decode('utf-8')).encode('utf-8') + return (first_name + u' ' + + middle_name + u' ' + + last_name) author = str(", ".join(map(get_author, authors))) title = tree.xpath('/fb:FictionBook/fb:description/fb:title-info/fb:book-title/text()', namespaces=ns) if len(title): - title = str(title[0].encode('utf-8')) + title = str(title[0]) else: title = u'' description = tree.xpath('/fb:FictionBook/fb:description/fb:publish-info/fb:book-name/text()', namespaces=ns) if len(description): - description = str(description[0].encode('utf-8')) + description = str(description[0]) else: description = u'' return BookMeta( file_path=tmp_file_path, extension=original_file_extension, - title=title.decode('utf-8'), - author=author.decode('utf-8'), + title=title, + author=author, cover=None, - description=description.decode('utf-8'), + description=description, tags="", series="", series_id="", - languages="") + languages="", + publisher="") diff --git a/cps/isoLanguages.py b/cps/isoLanguages.py index d177b53e..e447a623 100644 --- a/cps/isoLanguages.py +++ b/cps/isoLanguages.py @@ -57,28 +57,30 @@ def get_language_name(locale, lang_code): def get_language_codes(locale, language_names, remainder=None): language_names = set(x.strip().lower() for x in language_names if x) - languages = list() + lang = list() for k, v in get_language_names(locale).items(): v = v.lower() if v in language_names: - languages.append(k) + lang.append(k) language_names.remove(v) if remainder is not None: remainder.extend(language_names) - return languages + return lang + def get_valid_language_codes(locale, language_names, remainder=None): - languages = list() + lang = list() if "" in language_names: language_names.remove("") for k, __ in get_language_names(locale).items(): if k in language_names: - languages.append(k) + lang.append(k) language_names.remove(k) if remainder is not None and len(language_names): remainder.extend(language_names) - return languages + return lang + def get_lang3(lang): try: diff --git a/cps/server.py b/cps/server.py index 8122e5a6..9b79f77d 100644 --- a/cps/server.py +++ b/cps/server.py @@ -251,7 +251,7 @@ class WebServer(object): finally: self.wsgiserver = None - # prevent irritiating log of pending tasks message from asyncio + # prevent irritating log of pending tasks message from asyncio logger.get('asyncio').setLevel(logger.logging.CRITICAL) if not self.restart: diff --git a/cps/updater.py b/cps/updater.py index 8ded18ee..278f437b 100644 --- a/cps/updater.py +++ b/cps/updater.py @@ -330,6 +330,7 @@ class Updater(threading.Thread): @staticmethod def _load_nightly_data(repository_url, commit, status): + update_data = dict() try: headers = {'Accept': 'application/vnd.github.v3+json'} r = requests.get(repository_url + '/git/commits/' + commit['object']['sha'], diff --git a/cps/uploader.py b/cps/uploader.py index 6ada4f5c..9257c3a9 100644 --- a/cps/uploader.py +++ b/cps/uploader.py @@ -22,11 +22,10 @@ import hashlib from tempfile import gettempdir from flask_babel import gettext as _ -from . import logger, comic +from . import logger, comic, isoLanguages, get_locale from .constants import BookMeta from .helper import split_authors - log = logger.create() @@ -45,12 +44,17 @@ except (ImportError, RuntimeError) as e: use_generic_pdf_cover = True try: - from PyPDF2 import PdfFileReader - from PyPDF2 import __version__ as PyPdfVersion + from PyPDF3 import PdfFileReader + from PyPDF3 import __version__ as PyPdfVersion use_pdf_meta = True -except ImportError as e: - log.debug('Cannot import PyPDF2, extracting pdf metadata will not work: %s', e) - use_pdf_meta = False +except ImportError as ex: + try: + from PyPDF2 import PdfFileReader + from PyPDF2 import __version__ as PyPdfVersion + use_pdf_meta = True + except ImportError as e: + log.debug('Cannot import PyPDF3/PyPDF2, extracting pdf metadata will not work: %s / %s', e) + use_pdf_meta = False try: from . import epub @@ -82,7 +86,7 @@ def process(tmp_file_path, original_file_name, original_file_extension, rarExecu original_file_name, original_file_extension, rarExecutable) - except Exception as ex: + except Exception as ex: log.warning('cannot parse metadata, using default: %s', ex) if meta and meta.title.strip() and meta.author.strip(): @@ -98,39 +102,199 @@ def default_meta(tmp_file_path, original_file_name, original_file_extension): extension=original_file_extension, title=original_file_name, author=_(u'Unknown'), - cover=None, + cover=None, #pdf_preview(tmp_file_path, original_file_name), description="", tags="", series="", series_id="", - languages="") + languages="", + publisher="") + + +def parse_xmp(pdf_file): + """ + Parse XMP Metadata and prepare for BookMeta object + """ + try: + xmp_info = pdf_file.getXmpMetadata() + except Exception as e: + log.debug('Can not read XMP metadata', e) + return None + + if xmp_info: + try: + xmp_author = xmp_info.dc_creator # list + except AttributeError: + xmp_author = [''] + + if xmp_info.dc_title: + xmp_title = xmp_info.dc_title['x-default'] + else: + xmp_title = '' + + if xmp_info.dc_description: + xmp_description = xmp_info.dc_description['x-default'] + else: + xmp_description = '' + + languages = [] + try: + for i in xmp_info.dc_language: + #calibre-web currently only takes one language. + languages.append(isoLanguages.get_lang3(i)) + except: + languages.append('') + + xmp_tags = ', '.join(xmp_info.dc_subject) + xmp_publisher = ', '.join(xmp_info.dc_publisher) + + return {'author': xmp_author, + 'title': xmp_title, + 'subject': xmp_description, + 'tags': xmp_tags, 'languages': languages, + 'publisher': xmp_publisher + } + + +def parse_xmp(pdf_file): + """ + Parse XMP Metadata and prepare for BookMeta object + """ + try: + xmp_info = pdf_file.getXmpMetadata() + except Exception as e: + log.debug('Can not read XMP metadata', e) + return None + + if xmp_info: + try: + xmp_author = xmp_info.dc_creator # list + except: + xmp_author = [''] + + if xmp_info.dc_title: + xmp_title = xmp_info.dc_title['x-default'] + else: + xmp_title = '' + + if xmp_info.dc_description: + xmp_description = xmp_info.dc_description['x-default'] + else: + xmp_description = '' + + languages = [] + try: + for i in xmp_info.dc_language: + languages.append(isoLanguages.get_lang3(i)) + except AttributeError: + languages= [""] + + xmp_tags = ', '.join(xmp_info.dc_subject) + xmp_publisher = ', '.join(xmp_info.dc_publisher) + + return {'author': xmp_author, + 'title': xmp_title, + 'subject': xmp_description, + 'tags': xmp_tags, + 'languages': languages, + 'publisher': xmp_publisher + } + + +def parse_xmp(pdf_file): + """ + Parse XMP Metadata and prepare for BookMeta object + """ + try: + xmp_info = pdf_file.getXmpMetadata() + except Exception as e: + log.debug('Can not read XMP metadata', e) + return None + + if xmp_info: + try: + xmp_author = xmp_info.dc_creator # list + except AttributeError: + xmp_author = ['Unknown'] + + if xmp_info.dc_title: + xmp_title = xmp_info.dc_title['x-default'] + else: + xmp_title = '' + + if xmp_info.dc_description: + xmp_description = xmp_info.dc_description['x-default'] + else: + xmp_description = '' + + languages = [] + try: + for i in xmp_info.dc_language: + languages.append(isoLanguages.get_lang3(i)) + except AttributeError: + languages.append('') + + xmp_tags = ', '.join(xmp_info.dc_subject) + xmp_publisher = ', '.join(xmp_info.dc_publisher) + + return {'author': xmp_author, + 'title': xmp_title, + 'subject': xmp_description, + 'tags': xmp_tags, + 'languages': languages, + 'publisher': xmp_publisher + } def pdf_meta(tmp_file_path, original_file_name, original_file_extension): doc_info = None + xmp_info = None + if use_pdf_meta: with open(tmp_file_path, 'rb') as f: - doc_info = PdfFileReader(f).getDocumentInfo() - if doc_info: - author = doc_info.author if doc_info.author else u'Unknown' - title = doc_info.title if doc_info.title else original_file_name - subject = doc_info.subject + pdf_file = PdfFileReader(f) + doc_info = pdf_file.getDocumentInfo() + xmp_info = parse_xmp(pdf_file) + + if xmp_info: + author = ' & '.join(split_authors(xmp_info['author'])) + title = xmp_info['title'] + subject = xmp_info['subject'] + tags = xmp_info['tags'] + languages = xmp_info['languages'] + publisher = xmp_info['publisher'] else: author = u'Unknown' - title = original_file_name + title = '' + languages = [""] + publisher = "" subject = "" + tags = "" + + if doc_info: + if author == '': + author = ' & '.join(split_authors([doc_info.author])) if doc_info.author else u'Unknown' + if title == '': + title = doc_info.title if doc_info.title else original_file_name + if subject == '': + subject = doc_info.subject + if tags == '' and '/Keywords' in doc_info: + tags = doc_info['/Keywords'] + else: + title = original_file_name return BookMeta( file_path=tmp_file_path, extension=original_file_extension, title=title, - author=' & '.join(split_authors([author])), + author=author, cover=pdf_preview(tmp_file_path, original_file_name), description=subject, - tags="", + tags=tags, series="", series_id="", - languages="") + languages=','.join(languages), + publisher=publisher) def pdf_preview(tmp_file_path, tmp_dir): diff --git a/optional-requirements.txt b/optional-requirements.txt index a94bb506..3283777b 100644 --- a/optional-requirements.txt +++ b/optional-requirements.txt @@ -30,7 +30,7 @@ rarfile>=2.7 # other natsort>=2.2.0,<7.1.0 -comicapi>= 2.1.3,<2.2.0 +comicapi>= 2.2.0,<2.3.0 #Kobo integration jsonschema>=3.2.0,<3.3.0 diff --git a/requirements.txt b/requirements.txt index 94dc7f3a..04aaa000 100644 --- a/requirements.txt +++ b/requirements.txt @@ -6,7 +6,7 @@ singledispatch>=3.4.0.0,<3.5.0.0 backports_abc>=0.4 Flask>=1.0.2,<1.2.0 iso-639>=0.4.5,<0.5.0 -PyPDF2>=1.26.0,<1.27.0 +PyPDF3>=1.0.0,<1.0.4 pytz>=2016.10 requests>=2.11.1,<2.25.0 SQLAlchemy>=1.3.0,<1.4.0 diff --git a/setup.cfg b/setup.cfg index 88624195..89e0f598 100644 --- a/setup.cfg +++ b/setup.cfg @@ -42,7 +42,7 @@ install_requires = backports_abc>=0.4 Flask>=1.0.2,<1.2.0 iso-639>=0.4.5,<0.5.0 - PyPDF2>=1.26.0,<1.27.0 + PyPDF3>=1.0.0,<1.0.4 pytz>=2016.10 requests>=2.11.1,<2.25.0 SQLAlchemy>=1.3.0,<1.4.0 @@ -52,9 +52,9 @@ install_requires = [options.extras_require] gdrive = - google-api-python-client>=1.7.11,<1.8.0 - gevent>=1.2.1,<20.6.0 - greenlet>=0.4.12,<0.4.17 + google-api-python-client>=1.7.11,<1.13.0 + gevent>20.6.0,<21.2.0 + greenlet>=0.4.17,<1.1.0 httplib2>=0.9.2,<0.18.0 oauth2client>=4.0.0,<4.1.4 uritemplate>=3.0.0,<3.1.0 @@ -68,16 +68,16 @@ goodreads = goodreads>=0.3.2,<0.4.0 python-Levenshtein>=0.12.0,<0.13.0 ldap = - python-ldap>=3.0.0,<3.3.0 + python-ldap>=3.0.0,<3.4.0 Flask-SimpleLDAP>=1.4.0,<1.5.0 oauth = Flask-Dance>=1.4.0,<3.1.0 SQLAlchemy-Utils>=0.33.5,<0.37.0 metadata = - lxml>=3.8.0,<4.6.0 + lxml>=3.8.0,<4.7.0 rarfile>=2.7 comics = - natsort>=2.2.0 + natsort>=2.2.0,<7.1.0 comicapi>= 2.1.3,<2.2.0 kobo = jsonschema>=3.2.0,<3.3.0