From 7863a7ad70f6226807c8ae860b50043c9398883a Mon Sep 17 00:00:00 2001 From: NiLuJe Date: Mon, 16 Jan 2023 19:36:22 +0100 Subject: [PATCH] Misc: Natural sorting refactor (#10023) * Move natural sorting algo to a dedicated sort module to avoid code duplication * Use a slightly more accurate algorithm, and speed it up by caching intermediary strings * Calibre: Use natural sorting in metadata search (fix #10009) --- .../filemanager/filemanagerfilesearcher.lua | 8 +- frontend/sort.lua | 133 ++++++++++++++++++ frontend/ui/widget/filechooser.lua | 21 +-- frontend/userpatch.lua | 12 +- plugins/calibre.koplugin/search.lua | 10 +- plugins/coverbrowser.koplugin/covermenu.lua | 2 +- 6 files changed, 163 insertions(+), 23 deletions(-) create mode 100644 frontend/sort.lua diff --git a/frontend/apps/filemanager/filemanagerfilesearcher.lua b/frontend/apps/filemanager/filemanagerfilesearcher.lua index 152e66a30..3c143e6b8 100644 --- a/frontend/apps/filemanager/filemanagerfilesearcher.lua +++ b/frontend/apps/filemanager/filemanagerfilesearcher.lua @@ -219,7 +219,13 @@ function FileSearcher:showSearchResults() local collate = G_reader_settings:readSetting("collate") or "strcoll" local reverse_collate = G_reader_settings:isTrue("reverse_collate") - local sorting = FileChooser:getSortingFunction(collate, reverse_collate) + -- If we have a FileChooser instance, use it, to be able to make use of its natsort cache + local sorting + if self.ui.file_chooser then + sorting = self.ui.file_chooser:getSortingFunction(collate, reverse_collate) + else + sorting = FileChooser:getSortingFunction(collate, reverse_collate) + end table.sort(self.results, sorting) self.search_menu:switchItemTable(T(_("Search results (%1)"), #self.results), self.results) diff --git a/frontend/sort.lua b/frontend/sort.lua new file mode 100644 index 000000000..70fca7dd7 --- /dev/null +++ b/frontend/sort.lua @@ -0,0 +1,133 @@ +--[[-- +This module contains a collection of comparison functions (or factories for comparison functions) for `table.sort`. +@module sort +]] + +local sort = {} + +--[[ +Natural sorting functions, for use with table.sort + +--]] +-- Original implementation by Paul Kulchenko +--[[ +local function addLeadingZeroes(d) + local dec, n = string.match(d, "(%.?)0*(.+)") + return #dec > 0 and ("%.12f"):format(d) or ("%s%03d%s"):format(dec, #n, n) +end +function sort.natsort(a, b) + return tostring(a):gsub("%.?%d+", addLeadingZeroes)..("%3d"):format(#b) + < tostring(b):gsub("%.?%d+", addLeadingZeroes)..("%3d"):format(#a) +end +--]] +-- Hardened (but more expensive) implementation by Egor Skriptunoff, with an UTF-8 tweak by Paul Kulchenko +--[[ +local function natsort_conv(s) + local res, dot = "", "" + for n, m, c in tostring(s):gmatch("(0*(%d*))(.?)") do + if n == "" then + dot, c = "", dot..c + else + res = res..(dot == "" and ("%03d%s"):format(#m, m) + or "."..n) + dot, c = c:match("(%.?)(.*)") + end + res = res..c:gsub("[%z\1-\127\192-\255]", "\0%0") + end + return res +end +--]] +-- The above conversion is *fairly* expensive, +-- and table.sort ensures that it'll be called on identical strings multiple times, +-- so keeping a cache of massaged strings makes sense. +-- +-- We can rely on LRU to avoid explicit cache maintenance concerns +-- (given the type of content we massage, the memory impact is fairly insignificant). +-- The extra persistence this affords us also happens to help with the FM use-case ;). + +-- Dumb persistent hash-map => cold, ~200 to 250ms; hot: ~150ms (which roughly matches sorting by numerical file attributes). +-- (Numbers are from the FM sorting 350 entries (mostly composed of author names) on an H2O; an uncached run takes ~650ms). +--[[ +local natsort_cache = {} + +function sort.natsort(a, b) + local ca, cb = natsort_cache[a], natsort_cache[b] + if not ca then + ca = natsort_conv(a) + natsort_cache[a] = ca + end + if not cb then + cb = natsort_conv(b) + natsort_cache[b] = cb + end + + return ca < cb or ca == cb and a < b +end +--]] + +-- LRU => cold, ~200 to 250ms; hot ~150 to 175ms (which is barely any slower than a dumb hash-map, yay, LRU and LuaJIT magic). +--[[ +local lru = require("ffi/lru") +local natsort_cache = lru.new(1024, nil, false) + +function sort.natsort(a, b) + local ca, cb = natsort_cache:get(a), natsort_cache:get(b) + if not ca then + ca = natsort_conv(a) + natsort_cache:set(a, ca) + end + if not cb then + cb = natsort_conv(b) + natsort_cache:set(b, cb) + end + + return ca < cb or ca == cb and a < b +end +--]] + +--[[-- +Generates a natural sorting comparison function for table.sort. + +@param cache Optional, hashmap used to cache the processed strings to speed up sorting +@return The cmp function to feed to `table.sort` +@return The cache used (same object as the passed one, if any; will be created if not) + +@usage + +-- t is an array of strings, we don't want to keep the cache around +table.sort(t, sort.natsort_cmp()) + +-- t is an array of arrays, we want to sort the strings in the "text" field of the inner arrays, and we want to keep the cache around. +local cmp, cache +cmp, cache = sort.natsort_cmp(cache) +table.sort(t, function(a, b) return cmp(a.text, b.text) end) +]] +function sort.natsort_cmp(cache) + if not cache then + cache = {} + end + + local function natsort_conv(s) + local res, dot = "", "" + for n, m, c in tostring(s):gmatch("(0*(%d*))(.?)") do + if n == "" then + dot, c = "", dot..c + else + res = res..(dot == "" and ("%03d%s"):format(#m, m) + or "."..n) + dot, c = c:match("(%.?)(.*)") + end + res = res..c:gsub("[%z\1-\127\192-\255]", "\0%0") + end + cache[s] = res + return res + end + + local function natsort(a, b) + local ca, cb = cache[a] or natsort_conv(a), cache[b] or natsort_conv(b) + return ca < cb or ca == cb and a < b + end + return natsort, cache +end + +return sort diff --git a/frontend/ui/widget/filechooser.lua b/frontend/ui/widget/filechooser.lua index fce4d72ba..a3a3b01ed 100644 --- a/frontend/ui/widget/filechooser.lua +++ b/frontend/ui/widget/filechooser.lua @@ -12,6 +12,7 @@ local ffiUtil = require("ffi/util") local T = ffiUtil.template local _ = require("gettext") local Screen = Device.screen +local sort = require("sort") local util = require("util") local getFileNameSuffix = util.getFileNameSuffix local getFriendlySize = util.getFriendlySize @@ -232,14 +233,18 @@ function FileChooser:getSortingFunction(collate, reverse_collate) return a.percent_finished < b.percent_finished end elseif collate == "natural" then - -- adapted from: http://notebook.kulchenko.com/algorithms/alphanumeric-natural-sorting-for-humans-in-lua - local function addLeadingZeroes(d) - local dec, n = string.match(d, "(%.?)0*(.+)") - return #dec > 0 and ("%.12f"):format(d) or ("%s%03d%s"):format(dec, #n, n) - end - sorting = function(a, b) - return tostring(a.name):gsub("%.?%d+", addLeadingZeroes)..("%3d"):format(#b.name) - < tostring(b.name):gsub("%.?%d+",addLeadingZeroes)..("%3d"):format(#a.name) + local natsort + -- Only keep the cache if we're an *instance* of FileChooser + if self ~= FileChooser then + natsort, self.natsort_cache = sort.natsort_cmp(self.natsort_cache) + sorting = function(a, b) + return natsort(a.name, b.name) + end + else + natsort = sort.natsort_cmp() + sorting = function(a, b) + return natsort(a.name, b.name) + end end else sorting = function(a, b) diff --git a/frontend/userpatch.lua b/frontend/userpatch.lua index 996db8333..415036180 100644 --- a/frontend/userpatch.lua +++ b/frontend/userpatch.lua @@ -25,6 +25,7 @@ end local lfs = require("libs/libkoreader-lfs") local logger = require("logger") +local sort = require("sort") local DataStorage = require("datastorage") -- the directory KOReader is installed in (and runs from) @@ -55,16 +56,7 @@ local function runUserPatchTasks(dir, priority) return -- nothing to do end - local function addLeadingZeroes(d) - local dec, n = string.match(d, "(%.?)0*(.+)") - return #dec > 0 and ("%.12f"):format(d) or ("%s%03d%s"):format(dec, #n, n) - end - local function sorting(a, b) - return tostring(a):gsub("%.?%d+", addLeadingZeroes)..("%3d"):format(#b) - < tostring(b):gsub("%.?%d+", addLeadingZeroes)..("%3d"):format(#a) - end - - table.sort(patches, sorting) + table.sort(patches, sort.natsort_cmp()) for i, entry in ipairs(patches) do local fullpath = dir .. "/" .. entry diff --git a/plugins/calibre.koplugin/search.lua b/plugins/calibre.koplugin/search.lua index 614a55077..3b91809d0 100644 --- a/plugins/calibre.koplugin/search.lua +++ b/plugins/calibre.koplugin/search.lua @@ -17,8 +17,9 @@ local WidgetContainer = require("ui/widget/container/widgetcontainer") local lfs = require("libs/libkoreader-lfs") local logger = require("logger") local rapidjson = require("rapidjson") -local util = require("util") +local sort = require("sort") local time = require("ui/time") +local util = require("util") local _ = require("gettext") local T = require("ffi/util").template @@ -159,6 +160,7 @@ end local CalibreSearch = WidgetContainer:extend{ books = {}, libraries = {}, + natsort_cache = {}, last_scan = {}, search_options = { "cache_metadata", @@ -271,7 +273,7 @@ function CalibreSearch:bookCatalog(t, option) entry.info = getBookInfo(book) entry.path = book.rootpath .. "/" .. book.lpath if series and book.series_index then - local major, minor = string.format("%05.2f", book.series_index):match("([^.]+).([^.]+)") + local major, minor = string.format("%05.2f", book.series_index):match("([^.]+)%.([^.]+)") if minor ~= "00" then subseries = true end @@ -448,7 +450,8 @@ function CalibreSearch:switchResults(t, title, is_child, page) title = _("Search results") end - table.sort(t, function(v1,v2) return v1.text < v2.text end) + local natsort = sort.natsort_cmp(self.natsort_cache) + table.sort(t, function(a, b) return natsort(a.text, b.text) end) if is_child then local path_entry = {} @@ -549,6 +552,7 @@ end function CalibreSearch:invalidateCache() self.cache_books:delete() self.books = {} + self.natsort_cache = {} end -- get metadata from cache or calibre files diff --git a/plugins/coverbrowser.koplugin/covermenu.lua b/plugins/coverbrowser.koplugin/covermenu.lua index 55084d618..d95818c60 100644 --- a/plugins/coverbrowser.koplugin/covermenu.lua +++ b/plugins/coverbrowser.koplugin/covermenu.lua @@ -683,7 +683,7 @@ function CoverMenu:onCloseWidget() end) nb_drawings_since_last_collectgarbage = 0 - -- Call original Menu:onCloseWidget (no subclass seems to override it) + -- Call the object's original onCloseWidget (i.e., Menu's, as none our our expected subclasses currently implement it) Menu.onCloseWidget(self) end