Misc: Natural sorting refactor (#10023)

* Move natural sorting algo to a dedicated sort module to avoid code duplication
* Use a slightly more accurate algorithm, and speed it up by caching intermediary strings
* Calibre: Use natural sorting in metadata search (fix #10009)
reviewable/pr10036/r1
NiLuJe 1 year ago committed by GitHub
parent bb900aa9a7
commit 7863a7ad70
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -219,7 +219,13 @@ function FileSearcher:showSearchResults()
local collate = G_reader_settings:readSetting("collate") or "strcoll"
local reverse_collate = G_reader_settings:isTrue("reverse_collate")
local sorting = FileChooser:getSortingFunction(collate, reverse_collate)
-- If we have a FileChooser instance, use it, to be able to make use of its natsort cache
local sorting
if self.ui.file_chooser then
sorting = self.ui.file_chooser:getSortingFunction(collate, reverse_collate)
else
sorting = FileChooser:getSortingFunction(collate, reverse_collate)
end
table.sort(self.results, sorting)
self.search_menu:switchItemTable(T(_("Search results (%1)"), #self.results), self.results)

@ -0,0 +1,133 @@
--[[--
This module contains a collection of comparison functions (or factories for comparison functions) for `table.sort`.
@module sort
]]
local sort = {}
--[[
Natural sorting functions, for use with table.sort
<http://notebook.kulchenko.com/algorithms/alphanumeric-natural-sorting-for-humans-in-lua>
--]]
-- Original implementation by Paul Kulchenko
--[[
local function addLeadingZeroes(d)
local dec, n = string.match(d, "(%.?)0*(.+)")
return #dec > 0 and ("%.12f"):format(d) or ("%s%03d%s"):format(dec, #n, n)
end
function sort.natsort(a, b)
return tostring(a):gsub("%.?%d+", addLeadingZeroes)..("%3d"):format(#b)
< tostring(b):gsub("%.?%d+", addLeadingZeroes)..("%3d"):format(#a)
end
--]]
-- Hardened (but more expensive) implementation by Egor Skriptunoff, with an UTF-8 tweak by Paul Kulchenko
--[[
local function natsort_conv(s)
local res, dot = "", ""
for n, m, c in tostring(s):gmatch("(0*(%d*))(.?)") do
if n == "" then
dot, c = "", dot..c
else
res = res..(dot == "" and ("%03d%s"):format(#m, m)
or "."..n)
dot, c = c:match("(%.?)(.*)")
end
res = res..c:gsub("[%z\1-\127\192-\255]", "\0%0")
end
return res
end
--]]
-- The above conversion is *fairly* expensive,
-- and table.sort ensures that it'll be called on identical strings multiple times,
-- so keeping a cache of massaged strings makes sense.
-- <https://github.com/koreader/koreader/pull/10023#discussion_r1069776657>
-- We can rely on LRU to avoid explicit cache maintenance concerns
-- (given the type of content we massage, the memory impact is fairly insignificant).
-- The extra persistence this affords us also happens to help with the FM use-case ;).
-- Dumb persistent hash-map => cold, ~200 to 250ms; hot: ~150ms (which roughly matches sorting by numerical file attributes).
-- (Numbers are from the FM sorting 350 entries (mostly composed of author names) on an H2O; an uncached run takes ~650ms).
--[[
local natsort_cache = {}
function sort.natsort(a, b)
local ca, cb = natsort_cache[a], natsort_cache[b]
if not ca then
ca = natsort_conv(a)
natsort_cache[a] = ca
end
if not cb then
cb = natsort_conv(b)
natsort_cache[b] = cb
end
return ca < cb or ca == cb and a < b
end
--]]
-- LRU => cold, ~200 to 250ms; hot ~150 to 175ms (which is barely any slower than a dumb hash-map, yay, LRU and LuaJIT magic).
--[[
local lru = require("ffi/lru")
local natsort_cache = lru.new(1024, nil, false)
function sort.natsort(a, b)
local ca, cb = natsort_cache:get(a), natsort_cache:get(b)
if not ca then
ca = natsort_conv(a)
natsort_cache:set(a, ca)
end
if not cb then
cb = natsort_conv(b)
natsort_cache:set(b, cb)
end
return ca < cb or ca == cb and a < b
end
--]]
--[[--
Generates a natural sorting comparison function for table.sort.
@param cache Optional, hashmap used to cache the processed strings to speed up sorting
@return The cmp function to feed to `table.sort`
@return The cache used (same object as the passed one, if any; will be created if not)
@usage
-- t is an array of strings, we don't want to keep the cache around
table.sort(t, sort.natsort_cmp())
-- t is an array of arrays, we want to sort the strings in the "text" field of the inner arrays, and we want to keep the cache around.
local cmp, cache
cmp, cache = sort.natsort_cmp(cache)
table.sort(t, function(a, b) return cmp(a.text, b.text) end)
]]
function sort.natsort_cmp(cache)
if not cache then
cache = {}
end
local function natsort_conv(s)
local res, dot = "", ""
for n, m, c in tostring(s):gmatch("(0*(%d*))(.?)") do
if n == "" then
dot, c = "", dot..c
else
res = res..(dot == "" and ("%03d%s"):format(#m, m)
or "."..n)
dot, c = c:match("(%.?)(.*)")
end
res = res..c:gsub("[%z\1-\127\192-\255]", "\0%0")
end
cache[s] = res
return res
end
local function natsort(a, b)
local ca, cb = cache[a] or natsort_conv(a), cache[b] or natsort_conv(b)
return ca < cb or ca == cb and a < b
end
return natsort, cache
end
return sort

@ -12,6 +12,7 @@ local ffiUtil = require("ffi/util")
local T = ffiUtil.template
local _ = require("gettext")
local Screen = Device.screen
local sort = require("sort")
local util = require("util")
local getFileNameSuffix = util.getFileNameSuffix
local getFriendlySize = util.getFriendlySize
@ -232,14 +233,18 @@ function FileChooser:getSortingFunction(collate, reverse_collate)
return a.percent_finished < b.percent_finished
end
elseif collate == "natural" then
-- adapted from: http://notebook.kulchenko.com/algorithms/alphanumeric-natural-sorting-for-humans-in-lua
local function addLeadingZeroes(d)
local dec, n = string.match(d, "(%.?)0*(.+)")
return #dec > 0 and ("%.12f"):format(d) or ("%s%03d%s"):format(dec, #n, n)
end
sorting = function(a, b)
return tostring(a.name):gsub("%.?%d+", addLeadingZeroes)..("%3d"):format(#b.name)
< tostring(b.name):gsub("%.?%d+",addLeadingZeroes)..("%3d"):format(#a.name)
local natsort
-- Only keep the cache if we're an *instance* of FileChooser
if self ~= FileChooser then
natsort, self.natsort_cache = sort.natsort_cmp(self.natsort_cache)
sorting = function(a, b)
return natsort(a.name, b.name)
end
else
natsort = sort.natsort_cmp()
sorting = function(a, b)
return natsort(a.name, b.name)
end
end
else
sorting = function(a, b)

@ -25,6 +25,7 @@ end
local lfs = require("libs/libkoreader-lfs")
local logger = require("logger")
local sort = require("sort")
local DataStorage = require("datastorage")
-- the directory KOReader is installed in (and runs from)
@ -55,16 +56,7 @@ local function runUserPatchTasks(dir, priority)
return -- nothing to do
end
local function addLeadingZeroes(d)
local dec, n = string.match(d, "(%.?)0*(.+)")
return #dec > 0 and ("%.12f"):format(d) or ("%s%03d%s"):format(dec, #n, n)
end
local function sorting(a, b)
return tostring(a):gsub("%.?%d+", addLeadingZeroes)..("%3d"):format(#b)
< tostring(b):gsub("%.?%d+", addLeadingZeroes)..("%3d"):format(#a)
end
table.sort(patches, sorting)
table.sort(patches, sort.natsort_cmp())
for i, entry in ipairs(patches) do
local fullpath = dir .. "/" .. entry

@ -17,8 +17,9 @@ local WidgetContainer = require("ui/widget/container/widgetcontainer")
local lfs = require("libs/libkoreader-lfs")
local logger = require("logger")
local rapidjson = require("rapidjson")
local util = require("util")
local sort = require("sort")
local time = require("ui/time")
local util = require("util")
local _ = require("gettext")
local T = require("ffi/util").template
@ -159,6 +160,7 @@ end
local CalibreSearch = WidgetContainer:extend{
books = {},
libraries = {},
natsort_cache = {},
last_scan = {},
search_options = {
"cache_metadata",
@ -271,7 +273,7 @@ function CalibreSearch:bookCatalog(t, option)
entry.info = getBookInfo(book)
entry.path = book.rootpath .. "/" .. book.lpath
if series and book.series_index then
local major, minor = string.format("%05.2f", book.series_index):match("([^.]+).([^.]+)")
local major, minor = string.format("%05.2f", book.series_index):match("([^.]+)%.([^.]+)")
if minor ~= "00" then
subseries = true
end
@ -448,7 +450,8 @@ function CalibreSearch:switchResults(t, title, is_child, page)
title = _("Search results")
end
table.sort(t, function(v1,v2) return v1.text < v2.text end)
local natsort = sort.natsort_cmp(self.natsort_cache)
table.sort(t, function(a, b) return natsort(a.text, b.text) end)
if is_child then
local path_entry = {}
@ -549,6 +552,7 @@ end
function CalibreSearch:invalidateCache()
self.cache_books:delete()
self.books = {}
self.natsort_cache = {}
end
-- get metadata from cache or calibre files

@ -683,7 +683,7 @@ function CoverMenu:onCloseWidget()
end)
nb_drawings_since_last_collectgarbage = 0
-- Call original Menu:onCloseWidget (no subclass seems to override it)
-- Call the object's original onCloseWidget (i.e., Menu's, as none our our expected subclasses currently implement it)
Menu.onCloseWidget(self)
end

Loading…
Cancel
Save