From 7f6bc28ecaf5f12802f0c11fd9c530238b379058 Mon Sep 17 00:00:00 2001 From: NiLuJe Date: Mon, 7 Dec 2020 00:09:47 +0100 Subject: [PATCH] BookInfoManager: Convert thumbnail compression to ZSTD * Avoid the Lua string copies when compressing/uncompressing the bb thumbs * Delay the statement reset so that the pointer returned by the BLOB query is actually usable ;). * Re-use a ZSTD decompression context * Store date & mtime for successfully extracted documents c.f., https://github.com/NiLuJe/koreader/commit/10477501f90471e84df68f561274f19a9a202dfb#r44914917 * Switch to user_version pragma to handle DB schema versioning * Update file size/date on *each* extraction attempt. @poirez-z raised a good point, this ought to be useful to deem a file re-parseable after an update. * Remove xutil Functionality has been moved to ffi/zlib & ffi/zstd * Preserve settings when migrating the BookInfo DB * Warn via InfoMessage that the DB was updated * Only store the series name in series, and move the index in series_index (Column names chosen to match Calibre's). * Handle the new series_index column in BookInfo consumers * This allows us to get rid of the code that stripped empty decimals from series index in *most* places, since it's now a real Lua number, and the string formatting library does that magic on its own. --- .../apps/filemanager/filemanagerbookinfo.lua | 11 +- frontend/util.lua | 4 +- .../coverbrowser.koplugin/bookinfomanager.lua | 184 ++++++++++++------ plugins/coverbrowser.koplugin/listmenu.lua | 8 +- plugins/coverbrowser.koplugin/mosaicmenu.lua | 8 +- plugins/coverbrowser.koplugin/xutil.lua | 35 ---- 6 files changed, 149 insertions(+), 101 deletions(-) delete mode 100644 plugins/coverbrowser.koplugin/xutil.lua diff --git a/frontend/apps/filemanager/filemanagerbookinfo.lua b/frontend/apps/filemanager/filemanagerbookinfo.lua index 746eec5df..17743e697 100644 --- a/frontend/apps/filemanager/filemanagerbookinfo.lua +++ b/frontend/apps/filemanager/filemanagerbookinfo.lua @@ -157,8 +157,15 @@ function BookInfo:show(file, book_props) local series = book_props.series if series == "" or series == nil then series = _("N/A") - else -- Shorten calibre series decimal number (#4.0 => #4) - series = series:gsub("(#%d+)%.0$", "%1") + else + -- If we were fed a BookInfo book_props (e.g., covermenu), series index is in a separate field + if book_props.series_index then + -- Here, we're assured that series_index is a Lua number, so round integers are automatically displayed without decimals + series = book_props.series .. " #" .. book_props.series_index + else + -- But here, if we have a plain doc_props series with an index, drop empty decimals from round integers. + series = book_props.series:gsub("(#%d+)%.0+$", "%1") + end end table.insert(kv_pairs, { _("Series:"), BD.auto(series) }) diff --git a/frontend/util.lua b/frontend/util.lua index 42e2e3934..863ce8495 100644 --- a/frontend/util.lua +++ b/frontend/util.lua @@ -814,7 +814,7 @@ end function util.splitFilePathName(file) if file == nil or file == "" then return "", "" end if string.find(file, "/") == nil then return "", file end - return file:match'(.*/)(.*)' + return file:match("(.*/)(.*)") end --- Splits a file name into its pure file name and suffix @@ -823,7 +823,7 @@ end function util.splitFileNameSuffix(file) if file == nil or file == "" then return "", "" end if string.find(file, "%.") == nil then return file, "" end - return file:match'(.*)%.(.*)' + return file:match("(.*)%.(.*)") end --- Gets file extension diff --git a/plugins/coverbrowser.koplugin/bookinfomanager.lua b/plugins/coverbrowser.koplugin/bookinfomanager.lua index ec4a377bb..d1d0d2772 100644 --- a/plugins/coverbrowser.koplugin/bookinfomanager.lua +++ b/plugins/coverbrowser.koplugin/bookinfomanager.lua @@ -4,36 +4,37 @@ local DataStorage = require("datastorage") local Device = require("device") local DocumentRegistry = require("document/documentregistry") local FFIUtil = require("ffi/util") +local InfoMessage = require("ui/widget/infomessage") local RenderImage = require("ui/renderimage") local SQ3 = require("lua-ljsqlite3/init") local UIManager = require("ui/uimanager") local lfs = require("libs/libkoreader-lfs") local logger = require("logger") local util = require("util") +local zstd = require("ffi/zstd") local _ = require("gettext") local N_ = _.ngettext local T = FFIUtil.template --- Util functions needed by this plugin, but that may be added to existing base/ffi/ files -local xutil = require("xutil") - -- Database definition -local BOOKINFO_DB_VERSION = "2-20170701" +local BOOKINFO_DB_VERSION = 20201210 local BOOKINFO_DB_SCHEMA = [[ - -- For caching book cover and metadata + -- To cache book cover and metadata CREATE TABLE IF NOT EXISTS bookinfo ( -- Internal book cache id - -- (not to be used to identify a book, it may change for a same book) + -- (not to be used to identify a book, it may change) bcid INTEGER PRIMARY KEY AUTOINCREMENT, -- File location and filename directory TEXT NOT NULL, -- split by dir/name so we can get all files in a directory - filename TEXT NOT NULL, -- and can implement pruning of no more existing files + filename TEXT NOT NULL, -- and can implement pruning of deleted files + filesize INTEGER, -- size in bytes at most recent extraction time + filemtime INTEGER, -- mtime at most recent extraction time -- Extraction status and result - in_progress INTEGER, -- 0 (done), >0 : nb of tries (to avoid re-doing extractions that crashed us) + in_progress INTEGER, -- 0 (done), >0 : nb of tries (to avoid retrying failed extractions forever) unsupported TEXT, -- NULL if supported / reason for being unsupported - cover_fetched TEXT, -- NULL / 'Y' = action of fetching cover was made (whether we got one or not) + cover_fetched TEXT, -- NULL / 'Y' = we tried to fetch a cover (but we may not have gotten one) has_meta TEXT, -- NULL / 'Y' = has metadata (title, authors...) has_cover TEXT, -- NULL / 'Y' = has cover image (cover_*) cover_sizetag TEXT, -- 'M' (Medium, MosaicMenuItem) / 's' (small, ListMenuItem) @@ -50,6 +51,7 @@ local BOOKINFO_DB_SCHEMA = [[ title TEXT, authors TEXT, series TEXT, + series_index REAL, language TEXT, keywords TEXT, description TEXT, @@ -57,26 +59,24 @@ local BOOKINFO_DB_SCHEMA = [[ -- Cover image cover_w INTEGER, -- blitbuffer width cover_h INTEGER, -- blitbuffer height - cover_btype INTEGER, -- blitbuffer type (internal) - cover_bpitch INTEGER, -- blitbuffer pitch (internal) - cover_datalen INTEGER, -- blitbuffer uncompressed data length - cover_dataz BLOB -- blitbuffer data compressed with zlib + cover_bb_type INTEGER, -- blitbuffer type (internal) + cover_bb_stride INTEGER, -- blitbuffer stride (internal) + cover_bb_data BLOB -- blitbuffer data compressed with zstd ); CREATE UNIQUE INDEX IF NOT EXISTS dir_filename ON bookinfo(directory, filename); - -- For keeping track of DB schema version + -- To keep track of CoverBrowser settings CREATE TABLE IF NOT EXISTS config ( key TEXT PRIMARY KEY, value TEXT ); - -- this will not override previous version value, so we'll get the old one if old schema - INSERT OR IGNORE INTO config VALUES ('version', ']] .. BOOKINFO_DB_VERSION .. [['); - ]] local BOOKINFO_COLS_SET = { "directory", "filename", + "filesize", + "filemtime", "in_progress", "unsupported", "cover_fetched", @@ -89,15 +89,15 @@ local BOOKINFO_COLS_SET = { "title", "authors", "series", + "series_index", "language", "keywords", "description", "cover_w", "cover_h", - "cover_btype", - "cover_bpitch", - "cover_datalen", - "cover_dataz", + "cover_bb_type", + "cover_bb_stride", + "cover_bb_data", } local bookinfo_values_sql = {} -- for "VALUES (?, ?, ?,...)" insert sql part @@ -150,16 +150,35 @@ function BookInfoManager:createDB() -- Less error cases to check if we do it that way -- Create it (noop if already there) db_conn:exec(BOOKINFO_DB_SCHEMA) - -- Check version (not updated by previous exec if already there) - local res = db_conn:exec("SELECT value FROM config where key='version';") - if res[1][1] ~= BOOKINFO_DB_VERSION then - logger.warn("BookInfo cache DB schema updated from version", res[1][1], "to version", BOOKINFO_DB_VERSION) + -- Check version + local db_version = tonumber(db_conn:rowexec("PRAGMA user_version;")) + if db_version < BOOKINFO_DB_VERSION then + logger.warn("BookInfo cache DB schema updated from version", db_version, "to version", BOOKINFO_DB_VERSION) logger.warn("Deleting existing", self.db_location, "to recreate it") + + -- We'll try to preserve settings, though + self:loadSettings(db_conn) + db_conn:close() os.remove(self.db_location) + -- Re-create it db_conn = SQ3.open(self.db_location) db_conn:exec(BOOKINFO_DB_SCHEMA) + + -- Restore non-deprecated settings + for k, v in pairs(self.settings) do + if k ~= "version" then + self:saveSetting(k, v, true) + end + end + self:loadSettings(db_conn) + + -- Update version + db_conn:exec(string.format("PRAGMA user_version=%d;", BOOKINFO_DB_VERSION)) + + -- Say hi! + UIManager:show(InfoMessage:new{text =_("BookInfo cache database schema updated."), timeout = 3 }) end db_conn:close() self.db_created = true @@ -216,19 +235,29 @@ function BookInfoManager:compactDb() end -- Settings management, stored in 'config' table -function BookInfoManager:loadSettings() +function BookInfoManager:loadSettings(db_conn) if lfs.attributes(self.db_location, "mode") ~= "file" then -- no db, empty config self.settings = {} return end self.settings = {} - self:openDbConnection() - local res = self.db_conn:exec("SELECT key, value FROM config;") - local keys = res[1] - local values = res[2] - for i, key in ipairs(keys) do - self.settings[key] = values[i] + + local my_db_conn + if db_conn then + my_db_conn = db_conn + else + self:openDbConnection() + my_db_conn = self.db_conn + end + + local res = my_db_conn:exec("SELECT key, value FROM config;") + if res then + local keys = res[1] + local values = res[2] + for i, key in ipairs(keys) do + self.settings[key] = values[i] + end end end @@ -239,7 +268,7 @@ function BookInfoManager:getSetting(key) return self.settings[key] end -function BookInfoManager:saveSetting(key, value) +function BookInfoManager:saveSetting(key, value, skip_reload) if not value or value == false or value == "" then if lfs.attributes(self.db_location, "mode") ~= "file" then -- If no db created, no need to save (and create db) an empty value @@ -257,8 +286,11 @@ function BookInfoManager:saveSetting(key, value) stmt:bind(key, value) stmt:step() -- commited stmt:clearbind():reset() -- cleanup - -- Reload settings, so we may get (or not if it failed) what we just saved - self:loadSettings() + + -- Optionally, reload settings, so we may get (or not if it failed) what we just saved + if not skip_reload then + self:loadSettings() + end end -- Bookinfo management @@ -274,6 +306,10 @@ function BookInfoManager:getBookInfo(filepath, get_cover) return { directory = directory, filename = filename, + --[[ + filesize = lfs.attributes(filepath, "size"), + filemtime = lfs.attributes(filepath, "modification"), + --]] in_progress = 0, cover_fetched = "Y", has_meta = nil, @@ -289,9 +325,11 @@ function BookInfoManager:getBookInfo(filepath, get_cover) self:openDbConnection() local row = self.get_stmt:bind(directory, filename):step() - self.get_stmt:clearbind():reset() -- get ready for next query + -- NOTE: We do not reset right now because we'll be querying a BLOB, + -- so we need the data it points to to still be there ;). if not row then -- filepath not in db + self.get_stmt:clearbind():reset() -- get ready for next query return nil end @@ -313,16 +351,27 @@ function BookInfoManager:getBookInfo(filepath, get_cover) if bookinfo["has_cover"] then bookinfo["cover_w"] = tonumber(row[num]) bookinfo["cover_h"] = tonumber(row[num+1]) - local cover_data = xutil.zlib_uncompress(row[num+5], row[num+4]) - row[num+5] = nil -- release memory used by cover_dataz - -- Blitbuffer.fromstring() expects : w, h, bb_type, bb_data, pitch - bookinfo["cover_bb"] = Blitbuffer.fromstring(row[num], row[num+1], row[num+2], cover_data, row[num+3]) - -- release memory used by uncompressed data: - cover_data = nil -- luacheck: no unused + local bbtype = tonumber(row[num+2]) + local bbstride = tonumber(row[num+3]) + -- This is a blob_mt table! Essentially, a (ptr, size) tuple. + local cover_blob = row[num+4] + -- The pointer returned by SQLite is only valid until the next step/reset/finalize! + -- (which means its memory management is entirely in the hands of SQLite) + local cover_data, cover_size = zstd.zstd_uncompress_ctx(cover_blob[1], cover_blob[2]) + -- Double-check that the size of the uncompressed BB is as expected... + local expected_cover_size = bbstride * bookinfo["cover_h"] + assert(cover_size == expected_cover_size, "Uncompressed a " .. tonumber(cover_size) .. "b BB instead of the expected " .. tonumber(expected_cover_size) .. "b") + -- That one, on the other hand, is on the heap, so we can use it without making a copy. + local cover_bb = Blitbuffer.new(bookinfo["cover_w"], bookinfo["cover_h"], bbtype, cover_data, bbstride, bookinfo["cover_w"]) + -- Mark its data pointer as safe to free() on GC + cover_bb:setAllocated(1) + bookinfo["cover_bb"] = cover_bb end break end end + + self.get_stmt:clearbind():reset() -- get ready for next query return bookinfo end @@ -395,6 +444,11 @@ function BookInfoManager:extractBookInfo(filepath, cover_specs) return -- Last insert done for this book, we're giving up end + -- Update this on each extraction attempt. Might be useful to reset the counter in case file gets updated. + local file_attr = lfs.attributes(filepath) + dbrow.filesize = file_attr.size + dbrow.filemtime = file_attr.modification + -- Proceed with extracting info local document = DocumentRegistry:openDocument(filepath) local loaded = true @@ -423,7 +477,28 @@ function BookInfoManager:extractBookInfo(filepath, cover_specs) end if props.title and props.title ~= "" then dbrow.title = props.title end if props.authors and props.authors ~= "" then dbrow.authors = props.authors end - if props.series and props.series ~= "" then dbrow.series = props.series end + if props.series and props.series ~= "" then + -- NOTE: If there's a series index in there, split it off to series_index, and only store the name in series. + -- This property is currently only set by: + -- * DjVu, for which I couldn't find a real standard for metadata fields + -- (we currently use Series for this field, c.f., https://exiftool.org/TagNames/DjVu.html). + -- * CRe, which could offer us a split getSeriesName & getSeriesNumber... + -- except getSeriesNumber does an atoi, so it'd murder decimal values. + -- So, instead, parse how it formats the whole thing as a string ;). + if string.find(props.series, "#") then + dbrow.series, dbrow.series_index = props.series:match("(.*) #(%d+%.?%d-)$") + if dbrow.series_index then + -- We're inserting via a bind method, so make sure we feed it a Lua number, because it's a REAL in the db. + dbrow.series_index = tonumber(dbrow.series_index) + else + -- If the index pattern didn't match (e.g., nothing after the octothorp, or a string), + -- restore the full thing as the series name. + dbrow.series = props.series + end + else + dbrow.series = props.series + end + end if props.language and props.language ~= "" then dbrow.language = props.language end if props.keywords and props.keywords ~= "" then dbrow.keywords = props.keywords end if props.description and props.description ~= "" then dbrow.description = props.description end @@ -447,18 +522,16 @@ function BookInfoManager:extractBookInfo(filepath, cover_specs) cbb_h = math.min(math.floor(cbb_h * scale_factor)+1, spec_max_cover_h) cover_bb = RenderImage:scaleBlitBuffer(cover_bb, cbb_w, cbb_h, true) end - dbrow.cover_w = cbb_w - dbrow.cover_h = cbb_h - dbrow.cover_btype = cover_bb:getType() - dbrow.cover_bpitch = tonumber(cover_bb.stride) - local cover_data = Blitbuffer.tostring(cover_bb) - cover_bb:free() -- free bb before compressing to save memory - dbrow.cover_datalen = cover_data:len() - local cover_dataz = xutil.zlib_compress(cover_data) - -- release memory used by uncompressed data: - cover_data = nil -- luacheck: no unused - dbrow.cover_dataz = SQ3.blob(cover_dataz) -- cast to blob for sqlite - logger.dbg("cover for", filename, "scaled by", scale_factor, "=>", cbb_w, "x", cbb_h, ", compressed from", dbrow.cover_datalen, "to", cover_dataz:len()) + dbrow.cover_w = cover_bb.w + dbrow.cover_h = cover_bb.h + dbrow.cover_bb_type = cover_bb:getType() + dbrow.cover_bb_stride = tonumber(cover_bb.stride) + local cover_size = cover_bb.stride * cover_bb.h + local cover_zst_ptr, cover_zst_size = zstd.zstd_compress(cover_bb.data, cover_size) + dbrow.cover_bb_data = SQ3.blob(cover_zst_ptr, cover_zst_size) -- cast to blob for sqlite + logger.dbg("cover for", filename, "scaled by", scale_factor, "=>", cover_bb.w, "x", cover_bb.h, ", compressed from", tonumber(cover_size), "to", tonumber(cover_zst_size)) + -- We're done with the uncompressed bb now, and the compressed one has been managed by SQLite ;) + cover_bb:free() end end end @@ -684,7 +757,6 @@ end -- Batch extraction function BookInfoManager:extractBooksInDirectory(path, cover_specs) local Geom = require("ui/geometry") - local InfoMessage = require("ui/widget/infomessage") local TopContainer = require("ui/widget/container/topcontainer") local Trapper = require("ui/trapper") local Screen = require("device").screen diff --git a/plugins/coverbrowser.koplugin/listmenu.lua b/plugins/coverbrowser.koplugin/listmenu.lua index 0778496fc..598d1b547 100644 --- a/plugins/coverbrowser.koplugin/listmenu.lua +++ b/plugins/coverbrowser.koplugin/listmenu.lua @@ -540,9 +540,11 @@ function ListMenuItem:update() end -- add Series metadata if requested if bookinfo.series then - -- Shorten calibre series decimal number (#4.0 => #4) - bookinfo.series = bookinfo.series:gsub("(#%d+)%.0$", "%1") - bookinfo.series = BD.auto(bookinfo.series) + if bookinfo.series_index then + bookinfo.series = BD.auto(bookinfo.series .. " #" .. bookinfo.series_index) + else + bookinfo.series = BD.auto(bookinfo.series) + end if series_mode == "append_series_to_title" then if title then title = title .. " - " .. bookinfo.series diff --git a/plugins/coverbrowser.koplugin/mosaicmenu.lua b/plugins/coverbrowser.koplugin/mosaicmenu.lua index c82f4cd30..3825625c6 100644 --- a/plugins/coverbrowser.koplugin/mosaicmenu.lua +++ b/plugins/coverbrowser.koplugin/mosaicmenu.lua @@ -595,9 +595,11 @@ function MosaicMenuItem:update() local series_mode = BookInfoManager:getSetting("series_mode") local title_add, authors_add if bookinfo.series then - -- Shorten calibre series decimal number (#4.0 => #4) - bookinfo.series = bookinfo.series:gsub("(#%d+)%.0$", "%1") - bookinfo.series = BD.auto(bookinfo.series) + if bookinfo.series_index then + bookinfo.series = BD.auto(bookinfo.series .. " #" .. bookinfo.series_index) + else + bookinfo.series = BD.auto(bookinfo.series) + end if series_mode == "append_series_to_title" then if bookinfo.title then title_add = " - " .. bookinfo.series diff --git a/plugins/coverbrowser.koplugin/xutil.lua b/plugins/coverbrowser.koplugin/xutil.lua deleted file mode 100644 index e5cc6d83a..000000000 --- a/plugins/coverbrowser.koplugin/xutil.lua +++ /dev/null @@ -1,35 +0,0 @@ -local ffi = require("ffi") - --- Utilities functions needed by this plugin, but that may be added to --- existing base/ffi/ files -local xutil = {} - --- Data compression/decompression of strings thru zlib (may be put in a new base/ffi/zlib.lua) --- from http://luajit.org/ext_ffi_tutorial.html -ffi.cdef[[ -unsigned long compressBound(unsigned long sourceLen); -int compress2(uint8_t *dest, unsigned long *destLen, - const uint8_t *source, unsigned long sourceLen, int level); -int uncompress(uint8_t *dest, unsigned long *destLen, - const uint8_t *source, unsigned long sourceLen); -]] -local zlib = ffi.load(ffi.os == "Windows" and "zlib1" or "z") - -function xutil.zlib_compress(data) - local n = zlib.compressBound(#data) - local buf = ffi.new("uint8_t[?]", n) - local buflen = ffi.new("unsigned long[1]", n) - local res = zlib.compress2(buf, buflen, data, #data, 9) - assert(res == 0) - return ffi.string(buf, buflen[0]) -end - -function xutil.zlib_uncompress(zdata, datalen) - local buf = ffi.new("uint8_t[?]", datalen) - local buflen = ffi.new("unsigned long[1]", datalen) - local res = zlib.uncompress(buf, buflen, zdata, #zdata) - assert(res == 0) - return ffi.string(buf, buflen[0]) -end - -return xutil