Cache: Rewrite based on lua-lru

Ought to be faster than our naive array-based approach.
Especially for the glyph cache, which has a solid amount of elements,
and is mostly cache hits.
(There are few things worse for performance in Lua than
table.remove @ !tail and table.insert @ !tail, which this was full of :/).

DocCache: New module that's now an actual Cache instance instead of a
weird hack. Replaces "Cache" (the instance) as used across Document &
co.
Only Cache instance with on-disk persistence.

ImageCache: Update to new Cache.

GlyphCache: Update to new Cache.
Also, actually free glyph bbs on eviction.
reviewable/pr7635/r1
NiLuJe 3 years ago
parent ce624be8b8
commit 21b067792d

@ -1,6 +1,6 @@
local Cache = require("cache")
local ConfirmBox = require("ui/widget/confirmbox")
local Device = require("device")
local DocCache = require("document/doccache")
local Event = require("ui/event")
local Geom = require("ui/geometry")
local GestureRange = require("ui/gesturerange")
@ -458,9 +458,9 @@ function ReaderZooming:getZoom(pageno)
or self.zoom_factor
zoom = zoom_w * zoom_factor
end
if zoom and zoom > 10 and not Cache:willAccept(zoom * (self.dimen.w * self.dimen.h + 512)) then
if zoom and zoom > 10 and not DocCache:willAccept(zoom * (self.dimen.w * self.dimen.h + 512)) then
logger.dbg("zoom too large, adjusting")
while not Cache:willAccept(zoom * (self.dimen.w * self.dimen.h + 512)) do
while not DocCache:willAccept(zoom * (self.dimen.w * self.dimen.h + 512)) do
if zoom > 100 then
zoom = zoom - 50
elseif zoom > 10 then

@ -5,10 +5,10 @@ It works using data gathered from a document interface.
]]--
local BD = require("ui/bidi")
local Cache = require("cache")
local ConfirmBox = require("ui/widget/confirmbox")
local Device = require("device")
local DeviceListener = require("device/devicelistener")
local DocCache = require("document/doccache")
local DocSettings = require("docsettings")
local DocumentRegistry = require("document/documentregistry")
local Event = require("ui/event")
@ -733,8 +733,8 @@ function ReaderUI:onClose(full_refresh)
if self.dialog ~= self then
self:saveSettings()
end
-- serialize last used items for later launch
Cache:serialize()
-- Serialize the most recently displayed page for later launch
DocCache:serialize()
if self.document ~= nil then
logger.dbg("closing document")
self:notifyCloseDocument()

@ -1,10 +1,10 @@
--[[
A global LRU cache
A LRU cache, based on https://github.com/starius/lua-lru
]]--
local DataStorage = require("datastorage")
local lfs = require("libs/libkoreader-lfs")
local logger = require("logger")
local lru = require("ffi/lru")
local md5 = require("ffi/sha2").md5
local CanvasContext = require("document/canvascontext")
@ -12,6 +12,52 @@ if CanvasContext.should_restrict_JIT then
jit.off(true, true)
end
local Cache = {
-- Cache configuration:
-- Max storage space, in bytes
size = 8 * 1024 * 1024,
-- Average item size is used to compute the amount of slots in the LRU
avg_itemsize = 8196,
-- Generally, only DocCache uses this
disk_cache = false,
cache_path = nil,
}
function Cache:new(o)
o = o or {}
setmetatable(o, self)
self.__index = self
if o.init then o:init() end
return o
end
function Cache:init()
-- Compute the amount of slots in the LRU based on the max size & the average item size
self.slots = math.floor(self.size / self.avg_itemsize)
self.cache = lru.new(self.slots, self.size)
if self.disk_cache then
self.cached = self:_getDiskCache()
else
-- No need to go through our own check or even get methods if there's no disk cache, hit lru directly
self.check = self.cache.get
end
end
--[[
-- return a snapshot of disk cached items for subsequent check
--]]
function Cache:_getDiskCache()
local cached = {}
for key_md5 in lfs.dir(self.cache_path) do
local file = self.cache_path .. key_md5
if lfs.attributes(file, "mode") == "file" then
cached[key_md5] = file
end
end
return cached
end
-- For documentation purposes, here's a battle-tested shell version of calcFreeMem
--[[
if grep -q 'MemAvailable' /proc/meminfo ; then
@ -37,7 +83,7 @@ end
--]]
-- And here's our simplified Lua version...
local function calcFreeMem()
function Cache:_calcFreeMem()
local memtotal, memfree, memavailable, buffers, cached
local meminfo = io.open("/proc/meminfo", "r")
@ -101,98 +147,23 @@ local function calcFreeMem()
end
end
local function calcCacheMemSize()
local min = DGLOBAL_CACHE_SIZE_MINIMUM
local max = DGLOBAL_CACHE_SIZE_MAXIMUM
local calc = calcFreeMem() * (DGLOBAL_CACHE_FREE_PROPORTION or 0)
return math.min(max, math.max(min, calc))
end
local cache_path = DataStorage:getDataDir() .. "/cache/"
--[[
-- return a snapshot of disk cached items for subsequent check
--]]
local function getDiskCache()
local cached = {}
for key_md5 in lfs.dir(cache_path) do
local file = cache_path .. key_md5
if lfs.attributes(file, "mode") == "file" then
cached[key_md5] = file
end
end
return cached
end
local Cache = {
-- cache configuration:
max_memsize = calcCacheMemSize(),
-- cache state:
current_memsize = 0,
-- associative cache
cache = {},
-- this will hold the LRU order of the cache
cache_order = {},
-- disk Cache snapshot
cached = getDiskCache(),
}
function Cache:new(o)
o = o or {}
setmetatable(o, self)
self.__index = self
return o
end
-- internal: remove reference in cache_order list
function Cache:_unref(key)
for i = #self.cache_order, 1, -1 do
if self.cache_order[i] == key then
table.remove(self.cache_order, i)
break
end
end
end
-- internal: free cache item
function Cache:_free(key)
self.current_memsize = self.current_memsize - self.cache[key].size
self.cache[key]:onFree()
self.cache[key] = nil
end
-- drop an item named via key from the cache
function Cache:drop(key)
if not self.cache[key] then return end
self:_unref(key)
self:_free(key)
end
function Cache:insert(key, object)
-- make sure that one key only exists once: delete existing
self:drop(key)
-- If this object is single-handledly too large for the cache, we're done
if object.size > self.max_memsize then
-- If this object is single-handledly too large for the cache, don't cache it.
if not self:willAccept(object.size) then
logger.warn("Too much memory would be claimed by caching", key)
return
end
-- If inserting this obect would blow the cache's watermark,
-- start dropping least recently used items first.
-- (they are at the end of the cache_order array)
while self.current_memsize + object.size > self.max_memsize do
local removed_key = table.remove(self.cache_order)
if removed_key then
self:_free(removed_key)
else
logger.warn("Cache accounting is broken")
break
end
end
-- Insert new object in front of the LRU order
table.insert(self.cache_order, 1, key)
self.cache[key] = object
self.current_memsize = self.current_memsize + object.size
self.cache:set(key, object, object.size)
-- Accounting debugging
--[[
print(string.format("Cache %s (%d/%d) [%.2f/%.2f @ ~%db] inserted %db key: %s",
self,
self.cache:used_slots(), self.slots, self.cache:used_size() / 1024 / 1024,
self.size / 1024 / 1024, self.cache:used_size() / self.cache:used_slots(),
object.size, key))
--]]
end
--[[
@ -200,13 +171,9 @@ end
-- if ItemClass is given, disk cache is also checked.
--]]
function Cache:check(key, ItemClass)
if self.cache[key] then
if self.cache_order[1] ~= key then
-- Move key in front of the LRU list (i.e., MRU)
self:_unref(key)
table.insert(self.cache_order, 1, key)
end
return self.cache[key]
local value = self.cache:get(key)
if value then
return value
elseif ItemClass then
local cached = self.cached[md5(key)]
if cached then
@ -225,12 +192,21 @@ function Cache:check(key, ItemClass)
end
end
-- Shortcut when disk_cache is disabled
function Cache:get(key)
return self.cache:get(key)
end
function Cache:willAccept(size)
-- We only allow single objects to fill 75% of the cache
return size*4 < self.max_memsize*3
-- We only allow a single object to fill 75% of the cache
return size*4 < self.size*3
end
function Cache:serialize()
if not self.disk_cache then
return
end
-- Calculate the current disk cache size
local cached_size = 0
local sorted_caches = {}
@ -243,11 +219,9 @@ function Cache:serialize()
-- Only serialize the second most recently used cache item (as the MRU would be the *hinted* page).
local mru_key
local mru_found = 0
for _, key in ipairs(self.cache_order) do
local cache_item = self.cache[key]
for key, item in self.cache:pairs() do
-- Only dump cache items that actually request persistence
if cache_item.persistent and cache_item.dump then
if item.persistent and item.dump then
mru_key = key
mru_found = mru_found + 1
if mru_found >= 2 then
@ -257,12 +231,12 @@ function Cache:serialize()
end
end
if mru_key then
local cache_full_path = cache_path .. md5(mru_key)
local cache_full_path = self.cache_path .. md5(mru_key)
local cache_file_exists = lfs.attributes(cache_full_path)
if not cache_file_exists then
logger.dbg("Dumping cache item", mru_key)
local cache_item = self.cache[mru_key]
local cache_item = self.cache:get(mru_key)
local cache_size = cache_item:dump(cache_full_path)
if cache_size then
cached_size = cached_size + cache_size
@ -271,7 +245,7 @@ function Cache:serialize()
end
-- Allocate the same amount of storage to the disk cache than the memory cache
while cached_size > self.max_memsize do
while cached_size > self.size do
-- discard the least recently used cache
local discarded = table.remove(sorted_caches)
if discarded then
@ -288,17 +262,12 @@ end
-- Blank the cache
function Cache:clear()
for k, _ in pairs(self.cache) do
self.cache[k]:onFree()
end
self.cache = {}
self.cache_order = {}
self.current_memsize = 0
self.cache:clear()
end
-- Terribly crappy workaround: evict half the cache if we appear to be redlining on free RAM...
function Cache:memoryPressureCheck()
local memfree, memtotal = calcFreeMem()
local memfree, memtotal = self:_calcFreeMem()
-- Nonsensical values? (!Linux), skip this.
if memtotal == 0 then
@ -308,10 +277,7 @@ function Cache:memoryPressureCheck()
-- If less that 20% of the total RAM is free, drop half the Cache...
if memfree / memtotal < 0.20 then
logger.warn("Running low on memory, evicting half of the cache...")
for i = #self.cache_order / 2, 1, -1 do
local removed_key = table.remove(self.cache_order)
self:_free(removed_key)
end
self.cache:chop()
-- And finish by forcing a GC sweep now...
collectgarbage()
@ -321,11 +287,19 @@ end
-- Refresh the disk snapshot (mainly used by ui/data/onetime_migration)
function Cache:refreshSnapshot()
self.cached = getDiskCache()
if not self.disk_cache then
return
end
self.cached = self:_getDiskCache()
end
-- Evict the disk cache (ditto)
function Cache:clearDiskCache()
if not self.disk_cache then
return
end
for _, file in pairs(self.cached) do
os.remove(file)
end

@ -16,6 +16,9 @@ function CacheItem:new(o)
return o
end
-- Called on eviction.
-- We generally use it to free C/FFI ressources *immediately* (as opposed to relying on our Userdata/FFI finalizers to do it "later" on GC).
-- c.f., TileCacheItem, GlyphCacheItem & ImageCacheItem
function CacheItem:onFree()
end

@ -0,0 +1,24 @@
--[[
"Global" LRU cache used by Document & friends.
--]]
local Cache = require("cache")
local CanvasContext = require("document/canvascontext")
local DataStorage = require("datastorage")
local function calcCacheMemSize()
local min = DGLOBAL_CACHE_SIZE_MINIMUM
local max = DGLOBAL_CACHE_SIZE_MAXIMUM
local calc = Cache:_calcFreeMem() * (DGLOBAL_CACHE_FREE_PROPORTION or 0)
return math.min(max, math.max(min, calc))
end
local DocCache = Cache:new{
size = calcCacheMemSize(),
-- Average item size is a screen's worth of bitmap, mixed with a few much smaller tables (pgdim, pglinks, etc.), hence the / 3
avg_itemsize = math.floor(CanvasContext:getWidth() * CanvasContext:getHeight() * (CanvasContext.is_color_rendering_enabled and 4 or 1) / 3),
disk_cache = true,
cache_path = DataStorage:getDataDir() .. "/cache/",
}
return DocCache

@ -1,7 +1,7 @@
local Blitbuffer = require("ffi/blitbuffer")
local Cache = require("cache")
local CacheItem = require("cacheitem")
local Configurable = require("configurable")
local DocCache = require("document/doccache")
local DrawContext = require("ffi/drawcontext")
local CanvasContext = require("document/canvascontext")
local Geom = require("ui/geometry")
@ -172,14 +172,14 @@ end
-- this might be overridden by a document implementation
function Document:getNativePageDimensions(pageno)
local hash = "pgdim|"..self.file.."|"..pageno
local cached = Cache:check(hash)
local cached = DocCache:check(hash)
if cached then
return cached[1]
end
local page = self._document:openPage(pageno)
local page_size_w, page_size_h = page:getSize(self.dc_null)
local page_size = Geom:new{ w = page_size_w, h = page_size_h }
Cache:insert(hash, CacheItem:new{ page_size })
DocCache:insert(hash, CacheItem:new{ page_size })
page:close()
return page_size
end
@ -372,10 +372,10 @@ end
function Document:renderPage(pageno, rect, zoom, rotation, gamma, render_mode)
local hash_excerpt
local hash = self:getFullPageHash(pageno, zoom, rotation, gamma, render_mode, self.render_color)
local tile = Cache:check(hash, TileCacheItem)
local tile = DocCache:check(hash, TileCacheItem)
if not tile then
hash_excerpt = hash.."|"..tostring(rect)
tile = Cache:check(hash_excerpt)
tile = DocCache:check(hash_excerpt)
end
if tile then return tile end
@ -385,7 +385,7 @@ function Document:renderPage(pageno, rect, zoom, rotation, gamma, render_mode)
-- this will be the size we actually render
local size = page_size
-- we prefer to render the full page, if it fits into cache
if not Cache:willAccept(size.w * size.h * (self.render_color and 4 or 1) + 512) then
if not DocCache:willAccept(size.w * size.h * (self.render_color and 4 or 1) + 512) then
-- whole page won't fit into cache
logger.dbg("rendering only part of the page")
--- @todo figure out how to better segment the page
@ -430,7 +430,7 @@ function Document:renderPage(pageno, rect, zoom, rotation, gamma, render_mode)
local page = self._document:openPage(pageno)
page:draw(dc, tile.bb, size.x, size.y, render_mode)
page:close()
Cache:insert(hash, tile)
DocCache:insert(hash, tile)
self:postRenderPage()
return tile
@ -440,7 +440,7 @@ end
--- @todo this should trigger a background operation
function Document:hintPage(pageno, zoom, rotation, gamma, render_mode)
--- @note: Crappy safeguard around memory issues like in #7627: if we're eating too much RAM, drop half the cache...
Cache:memoryPressureCheck()
DocCache:memoryPressureCheck()
logger.dbg("hinting page", pageno)
self:renderPage(pageno, nil, zoom, rotation, gamma, render_mode)

@ -2,11 +2,11 @@
Interface to k2pdfoptlib backend.
--]]
local Cache = require("cache")
local CacheItem = require("cacheitem")
local CanvasContext = require("document/canvascontext")
local DataStorage = require("datastorage")
local DEBUG = require("dbg")
local DocCache = require("document/doccache")
local Document = require("document/document")
local Geom = require("ui/geometry")
local KOPTContext = require("ffi/koptcontext")
@ -27,11 +27,9 @@ local KoptInterface = {
local ContextCacheItem = CacheItem:new{}
function ContextCacheItem:onFree()
if self.kctx.free then
KoptInterface:waitForContext(self.kctx)
logger.dbg("free koptcontext", self.kctx)
self.kctx:free()
end
KoptInterface:waitForContext(self.kctx)
logger.dbg("ContextCacheItem: free KOPTContext", self.kctx)
self.kctx:free()
end
function ContextCacheItem:dump(filename)
@ -186,7 +184,7 @@ function KoptInterface:getAutoBBox(doc, pageno)
}
local context_hash = self:getContextHash(doc, pageno, bbox)
local hash = "autobbox|"..context_hash
local cached = Cache:check(hash)
local cached = DocCache:check(hash)
if not cached then
local page = doc._document:openPage(pageno)
local kc = self:createContext(doc, pageno, bbox)
@ -198,7 +196,7 @@ function KoptInterface:getAutoBBox(doc, pageno)
else
bbox = Document.getPageBBox(doc, pageno)
end
Cache:insert(hash, CacheItem:new{ autobbox = bbox, size = 160 })
DocCache:insert(hash, CacheItem:new{ autobbox = bbox, size = 160 })
page:close()
kc:free()
return bbox
@ -215,7 +213,7 @@ function KoptInterface:getSemiAutoBBox(doc, pageno)
local bbox = Document.getPageBBox(doc, pageno)
local context_hash = self:getContextHash(doc, pageno, bbox)
local hash = "semiautobbox|"..context_hash
local cached = Cache:check(hash)
local cached = DocCache:check(hash)
if not cached then
local page = doc._document:openPage(pageno)
local kc = self:createContext(doc, pageno, bbox)
@ -233,7 +231,7 @@ function KoptInterface:getSemiAutoBBox(doc, pageno)
auto_bbox = bbox
end
page:close()
Cache:insert(hash, CacheItem:new{ semiautobbox = auto_bbox, size = 160 })
DocCache:insert(hash, CacheItem:new{ semiautobbox = auto_bbox, size = 160 })
kc:free()
return auto_bbox
else
@ -251,7 +249,7 @@ function KoptInterface:getCachedContext(doc, pageno)
local bbox = doc:getPageBBox(pageno)
local context_hash = self:getContextHash(doc, pageno, bbox)
local kctx_hash = "kctx|"..context_hash
local cached = Cache:check(kctx_hash, ContextCacheItem)
local cached = DocCache:check(kctx_hash, ContextCacheItem)
if not cached then
-- If kctx is not cached, create one and get reflowed bmp in foreground.
local kc = self:createContext(doc, pageno, bbox)
@ -267,7 +265,7 @@ function KoptInterface:getCachedContext(doc, pageno)
local fullwidth, fullheight = kc:getPageDim()
logger.dbg("reflowed page", pageno, "fullwidth:", fullwidth, "fullheight:", fullheight)
self.last_context_size = fullwidth * fullheight + 3072 -- estimation
Cache:insert(kctx_hash, ContextCacheItem:new{
DocCache:insert(kctx_hash, ContextCacheItem:new{
persistent = true,
size = self.last_context_size,
kctx = kc
@ -336,12 +334,12 @@ function KoptInterface:renderReflowedPage(doc, pageno, rect, zoom, rotation, ren
local context_hash = self:getContextHash(doc, pageno, bbox)
local renderpg_hash = "renderpg|"..context_hash
local cached = Cache:check(renderpg_hash)
local cached = DocCache:check(renderpg_hash)
if not cached then
-- do the real reflowing if kctx has not been cached yet
local kc = self:getCachedContext(doc, pageno)
local fullwidth, fullheight = kc:getPageDim()
if not Cache:willAccept(fullwidth * fullheight) then
if not DocCache:willAccept(fullwidth * fullheight) then
-- whole page won't fit into cache
error("aborting, since we don't have enough cache for this page")
end
@ -352,7 +350,7 @@ function KoptInterface:renderReflowedPage(doc, pageno, rect, zoom, rotation, ren
}
tile.bb = kc:dstToBlitBuffer()
tile.size = tonumber(tile.bb.stride) * tile.bb.h + 512 -- estimation
Cache:insert(renderpg_hash, tile)
DocCache:insert(renderpg_hash, tile)
return tile
else
return cached
@ -370,7 +368,7 @@ function KoptInterface:renderOptimizedPage(doc, pageno, rect, zoom, rotation, re
local context_hash = self:getContextHash(doc, pageno, bbox)
local renderpg_hash = "renderoptpg|"..context_hash..zoom
local cached = Cache:check(renderpg_hash, TileCacheItem)
local cached = DocCache:check(renderpg_hash, TileCacheItem)
if not cached then
local page_size = Document.getNativePageDimensions(doc, pageno)
local full_page_bbox = {
@ -399,7 +397,7 @@ function KoptInterface:renderOptimizedPage(doc, pageno, rect, zoom, rotation, re
tile.bb = kc:dstToBlitBuffer()
tile.size = tonumber(tile.bb.stride) * tile.bb.h + 512 -- estimation
kc:free()
Cache:insert(renderpg_hash, tile)
DocCache:insert(renderpg_hash, tile)
return tile
else
return cached
@ -429,7 +427,7 @@ function KoptInterface:hintReflowedPage(doc, pageno, zoom, rotation, gamma, rend
local bbox = doc:getPageBBox(pageno)
local context_hash = self:getContextHash(doc, pageno, bbox)
local kctx_hash = "kctx|"..context_hash
local cached = Cache:check(kctx_hash)
local cached = DocCache:check(kctx_hash)
if not cached then
local kc = self:createContext(doc, pageno, bbox)
local page = doc._document:openPage(pageno)
@ -438,7 +436,7 @@ function KoptInterface:hintReflowedPage(doc, pageno, zoom, rotation, gamma, rend
kc:setPreCache()
page:reflow(kc, 0)
page:close()
Cache:insert(kctx_hash, ContextCacheItem:new{
DocCache:insert(kctx_hash, ContextCacheItem:new{
size = self.last_context_size or self.default_context_size,
kctx = kc,
})
@ -496,16 +494,16 @@ function KoptInterface:getReflowedTextBoxes(doc, pageno)
local bbox = doc:getPageBBox(pageno)
local context_hash = self:getContextHash(doc, pageno, bbox)
local hash = "rfpgboxes|"..context_hash
local cached = Cache:check(hash)
local cached = DocCache:check(hash)
if not cached then
local kctx_hash = "kctx|"..context_hash
cached = Cache:check(kctx_hash)
cached = DocCache:check(kctx_hash)
if cached then
local kc = self:waitForContext(cached.kctx)
--kc:setDebug()
local fullwidth, fullheight = kc:getPageDim()
local boxes, nr_word = kc:getReflowedWordBoxes("dst", 0, 0, fullwidth, fullheight)
Cache:insert(hash, CacheItem:new{ rfpgboxes = boxes, size = 192 * nr_word }) -- estimation
DocCache:insert(hash, CacheItem:new{ rfpgboxes = boxes, size = 192 * nr_word }) -- estimation
return boxes
end
else
@ -520,16 +518,16 @@ function KoptInterface:getNativeTextBoxes(doc, pageno)
local bbox = doc:getPageBBox(pageno)
local context_hash = self:getContextHash(doc, pageno, bbox)
local hash = "nativepgboxes|"..context_hash
local cached = Cache:check(hash)
local cached = DocCache:check(hash)
if not cached then
local kctx_hash = "kctx|"..context_hash
cached = Cache:check(kctx_hash)
cached = DocCache:check(kctx_hash)
if cached then
local kc = self:waitForContext(cached.kctx)
--kc:setDebug()
local fullwidth, fullheight = kc:getPageDim()
local boxes, nr_word = kc:getNativeWordBoxes("dst", 0, 0, fullwidth, fullheight)
Cache:insert(hash, CacheItem:new{ nativepgboxes = boxes, size = 192 * nr_word }) -- estimation
DocCache:insert(hash, CacheItem:new{ nativepgboxes = boxes, size = 192 * nr_word }) -- estimation
return boxes
end
else
@ -546,17 +544,17 @@ function KoptInterface:getReflowedTextBoxesFromScratch(doc, pageno)
local bbox = doc:getPageBBox(pageno)
local context_hash = self:getContextHash(doc, pageno, bbox)
local hash = "scratchrfpgboxes|"..context_hash
local cached = Cache:check(hash)
local cached = DocCache:check(hash)
if not cached then
local kctx_hash = "kctx|"..context_hash
cached = Cache:check(kctx_hash)
cached = DocCache:check(kctx_hash)
if cached then
local reflowed_kc = self:waitForContext(cached.kctx)
local fullwidth, fullheight = reflowed_kc:getPageDim()
local kc = self:createContext(doc, pageno)
kc:copyDestBMP(reflowed_kc)
local boxes, nr_word = kc:getNativeWordBoxes("dst", 0, 0, fullwidth, fullheight)
Cache:insert(hash, CacheItem:new{ scratchrfpgboxes = boxes, size = 192 * nr_word }) -- estimation
DocCache:insert(hash, CacheItem:new{ scratchrfpgboxes = boxes, size = 192 * nr_word }) -- estimation
kc:free()
return boxes
end
@ -589,7 +587,7 @@ Done by OCR pre-processing in Tesseract and Leptonica.
--]]
function KoptInterface:getNativeTextBoxesFromScratch(doc, pageno)
local hash = "scratchnativepgboxes|"..doc.file.."|"..pageno
local cached = Cache:check(hash)
local cached = DocCache:check(hash)
if not cached then
local page_size = Document.getNativePageDimensions(doc, pageno)
local bbox = {
@ -602,7 +600,7 @@ function KoptInterface:getNativeTextBoxesFromScratch(doc, pageno)
local page = doc._document:openPage(pageno)
page:getPagePix(kc)
local boxes, nr_word = kc:getNativeWordBoxes("src", 0, 0, page_size.w, page_size.h)
Cache:insert(hash, CacheItem:new{ scratchnativepgboxes = boxes, size = 192 * nr_word }) -- estimation
DocCache:insert(hash, CacheItem:new{ scratchnativepgboxes = boxes, size = 192 * nr_word }) -- estimation
page:close()
kc:free()
return boxes
@ -619,7 +617,7 @@ function KoptInterface:getPageBlock(doc, pageno, x, y)
local bbox = doc:getPageBBox(pageno)
local context_hash = self:getContextHash(doc, pageno, bbox)
local hash = "pageblocks|"..context_hash
local cached = Cache:check(hash)
local cached = DocCache:check(hash)
if not cached then
local page_size = Document.getNativePageDimensions(doc, pageno)
local full_page_bbox = {
@ -633,7 +631,7 @@ function KoptInterface:getPageBlock(doc, pageno, x, y)
local page = doc._document:openPage(pageno)
page:getPagePix(kc)
kc:findPageBlocks()
Cache:insert(hash, CacheItem:new{ kctx = kc, size = 3072 }) -- estimation
DocCache:insert(hash, CacheItem:new{ kctx = kc, size = 3072 }) -- estimation
page:close()
kctx = kc
else
@ -646,8 +644,8 @@ end
Get word from OCR providing selected word box.
--]]
function KoptInterface:getOCRWord(doc, pageno, wbox)
if not Cache:check(self.ocrengine) then
Cache:insert(self.ocrengine, OCREngine:new{ ocrengine = KOPTContext.new(), size = 3072 }) -- estimation
if not DocCache:check(self.ocrengine) then
DocCache:insert(self.ocrengine, OCREngine:new{ ocrengine = KOPTContext.new(), size = 3072 }) -- estimation
end
if doc.configurable.text_wrap == 1 then
return self:getReflewOCRWord(doc, pageno, wbox.sbox)
@ -664,17 +662,17 @@ function KoptInterface:getReflewOCRWord(doc, pageno, rect)
local bbox = doc:getPageBBox(pageno)
local context_hash = self:getContextHash(doc, pageno, bbox)
local hash = "rfocrword|"..context_hash..rect.x..rect.y..rect.w..rect.h
local cached = Cache:check(hash)
local cached = DocCache:check(hash)
if not cached then
local kctx_hash = "kctx|"..context_hash
cached = Cache:check(kctx_hash)
cached = DocCache:check(kctx_hash)
if cached then
local kc = self:waitForContext(cached.kctx)
local _, word = pcall(
kc.getTOCRWord, kc, "dst",
rect.x, rect.y, rect.w, rect.h,
self.tessocr_data, self.ocr_lang, self.ocr_type, 0, 1)
Cache:insert(hash, CacheItem:new{ rfocrword = word, size = #word + 64 }) -- estimation
DocCache:insert(hash, CacheItem:new{ rfocrword = word, size = #word + 64 }) -- estimation
return word
end
else
@ -689,7 +687,7 @@ function KoptInterface:getNativeOCRWord(doc, pageno, rect)
self.ocr_lang = doc.configurable.doc_language
local hash = "ocrword|"..doc.file.."|"..pageno..rect.x..rect.y..rect.w..rect.h
logger.dbg("hash", hash)
local cached = Cache:check(hash)
local cached = DocCache:check(hash)
if not cached then
local bbox = {
x0 = rect.x - math.floor(rect.h * 0.3),
@ -707,7 +705,7 @@ function KoptInterface:getNativeOCRWord(doc, pageno, rect)
kc.getTOCRWord, kc, "src",
0, 0, word_w, word_h,
self.tessocr_data, self.ocr_lang, self.ocr_type, 0, 1)
Cache:insert(hash, CacheItem:new{ ocrword = word, size = #word + 64 }) -- estimation
DocCache:insert(hash, CacheItem:new{ ocrword = word, size = #word + 64 }) -- estimation
logger.dbg("word", word)
page:close()
kc:free()
@ -721,8 +719,8 @@ end
Get text from OCR providing selected text boxes.
--]]
function KoptInterface:getOCRText(doc, pageno, tboxes)
if not Cache:check(self.ocrengine) then
Cache:insert(self.ocrengine, OCREngine:new{ ocrengine = KOPTContext.new(), size = 3072 }) -- estimation
if not DocCache:check(self.ocrengine) then
DocCache:insert(self.ocrengine, OCREngine:new{ ocrengine = KOPTContext.new(), size = 3072 }) -- estimation
end
logger.info("Not implemented yet")
end

@ -1,6 +1,6 @@
local Cache = require("cache")
local CacheItem = require("cacheitem")
local CanvasContext = require("document/canvascontext")
local DocCache = require("document/doccache")
local DocSettings = require("docsettings")
local Document = require("document/document")
local DrawContext = require("ffi/drawcontext")
@ -139,7 +139,7 @@ end
function PdfDocument:getUsedBBox(pageno)
local hash = "pgubbox|"..self.file.."|"..self.reflowable_font_size.."|"..pageno
local cached = Cache:check(hash)
local cached = DocCache:check(hash)
if cached then
return cached.ubbox
end
@ -152,7 +152,7 @@ function PdfDocument:getUsedBBox(pageno)
if used.x1 > pwidth then used.x1 = pwidth end
if used.y0 < 0 then used.y0 = 0 end
if used.y1 > pheight then used.y1 = pheight end
Cache:insert(hash, CacheItem:new{
DocCache:insert(hash, CacheItem:new{
ubbox = used,
size = 256, -- might be closer to 160
})
@ -162,13 +162,13 @@ end
function PdfDocument:getPageLinks(pageno)
local hash = "pglinks|"..self.file.."|"..self.reflowable_font_size.."|"..pageno
local cached = Cache:check(hash)
local cached = DocCache:check(hash)
if cached then
return cached.links
end
local page = self._document:openPage(pageno)
local links = page:getPageLinks()
Cache:insert(hash, CacheItem:new{
DocCache:insert(hash, CacheItem:new{
links = links,
size = 64 + (8 * 32 * #links),
})

@ -6,10 +6,8 @@ local logger = require("logger")
local TileCacheItem = CacheItem:new{}
function TileCacheItem:onFree()
if self.bb.free then
logger.dbg("free blitbuffer", self.bb)
self.bb:free()
end
logger.dbg("TileCacheItem: free blitbuffer", self.bb)
self.bb:free()
end
--- @note: Perhaps one day we'll be able to teach bitser or string.buffer about custom structs with pointers to buffers,

@ -171,9 +171,9 @@ if last_migration_date < 20210409 then
logger.warn("os.rename:", err)
end
-- Make sure Cache gets the memo
local Cache = require("cache")
Cache:refreshSnapshot()
-- Make sure DocCache gets the memo
local DocCache = require("document/doccache")
DocCache:refreshSnapshot()
end
-- Calibre, cache migration, https://github.com/koreader/koreader/pull/7528
@ -193,9 +193,9 @@ if last_migration_date < 20210412 then
logger.warn("os.rename:", err)
end
-- Make sure Cache gets the memo
local Cache = require("cache")
Cache:refreshSnapshot()
-- Make sure DocCache gets the memo
local DocCache = require("document/doccache")
DocCache:refreshSnapshot()
end
-- Calibre, cache encoding format change, https://github.com/koreader/koreader/pull/7543
@ -209,12 +209,12 @@ if last_migration_date < 20210414 then
end
end
-- Cache, migration to Persist, https://github.com/koreader/koreader/pull/7624
-- DocCache, migration to Persist, https://github.com/koreader/koreader/pull/7624
if last_migration_date < 20210503 then
logger.info("Performing one-time migration for 20210503")
local Cache = require("cache")
Cache:clearDiskCache()
local DocCache = require("document/doccache")
DocCache:clearDiskCache()
end
-- We're done, store the current migration date

@ -24,13 +24,18 @@ end
local RenderText = {}
local GlyphCache = Cache:new{
max_memsize = 512*1024,
current_memsize = 0,
cache = {},
-- this will hold the LRU order of the cache
cache_order = {}
-- 1 MiB of glyph cache, with 1024 slots
size = 1 * 1024 * 1024,
avg_itemsize = 1024,
}
local GlyphCacheItem = CacheItem:new{}
function GlyphCacheItem:onFree()
logger.dbg("GlyphCacheItem: free blitbuffer", self.bb)
self.bb:free()
end
-- iterator over UTF8 encoded characters in a string
local function utf8Chars(input_text)
local function read_next_glyph(input, pos)
@ -92,7 +97,7 @@ function RenderText:getGlyph(face, charcode, bold)
local glyph = GlyphCache:check(hash)
if glyph then
-- cache hit
return glyph[1]
return glyph
end
local rendered_glyph = face.ftface:renderGlyph(charcode, bold)
if face.ftface:checkGlyph(charcode) == 0 then
@ -112,8 +117,15 @@ function RenderText:getGlyph(face, charcode, bold)
logger.warn("error rendering glyph (charcode=", charcode, ") for face", face)
return
end
glyph = CacheItem:new{rendered_glyph}
glyph.size = tonumber(glyph[1].bb.stride) * glyph[1].bb.h + 320
glyph = GlyphCacheItem:new{
bb = rendered_glyph.bb,
l = rendered_glyph.l,
t = rendered_glyph.t,
r = rendered_glyph.r,
ax = rendered_glyph.ax,
ay = rendered_glyph.ay,
}
glyph.size = tonumber(glyph.bb.stride) * glyph.bb.h + 320
GlyphCache:insert(hash, glyph)
return rendered_glyph
end
@ -306,15 +318,22 @@ function RenderText:getGlyphByIndex(face, glyphindex, bold)
local glyph = GlyphCache:check(hash)
if glyph then
-- cache hit
return glyph[1]
return glyph
end
local rendered_glyph = face.ftface:renderGlyphByIndex(glyphindex, bold and face.embolden_half_strength)
if not rendered_glyph then
logger.warn("error rendering glyph (glyphindex=", glyphindex, ") for face", face)
return
end
glyph = CacheItem:new{rendered_glyph}
glyph.size = tonumber(glyph[1].bb.stride) * glyph[1].bb.h + 320
glyph = GlyphCacheItem:new{
bb = rendered_glyph.bb,
l = rendered_glyph.l,
t = rendered_glyph.t,
r = rendered_glyph.r,
ax = rendered_glyph.ax,
ay = rendered_glyph.ay,
}
glyph.size = tonumber(glyph.bb.stride) * glyph.bb.h + 320
GlyphCache:insert(hash, glyph)
return rendered_glyph
end

@ -39,20 +39,19 @@ end
local DPI_SCALE = get_dpi_scale()
local ImageCache = Cache:new{
max_memsize = 8*1024*1024, -- 8M of image cache
current_memsize = 0,
cache = {},
-- this will hold the LRU order of the cache
cache_order = {}
-- 8 MiB of image cache, with 128 slots
-- Overwhelmingly used for our icons, which are tiny in size, and not very numerous (< 100),
-- but also by ImageViewer (on files, which we never do), and ScreenSaver (again, on image files, but not covers),
-- hence the leeway.
size = 8 * 1024 * 1024,
avg_itemsize = 64 * 1024,
}
local ImageCacheItem = CacheItem:new{}
function ImageCacheItem:onFree()
if self.bb.free then
logger.dbg("free image blitbuffer", self.bb)
self.bb:free()
end
logger.dbg("ImageCacheItem: free blitbuffer", self.bb)
self.bb:free()
end
local ImageWidget = Widget:new{

@ -25,15 +25,14 @@ local _ = require("gettext")
local T = require("ffi/util").template
local CatalogCacheItem = CacheItem:new{
size = 1024, -- fixed size for catalog item
size = 1024, -- fixed size for catalog items
}
-- cache catalog parsed from feed xml
local CatalogCache = Cache:new{
max_memsize = 20*1024, -- keep only 20 items
current_memsize = 0,
cache = {},
cache_order = {},
-- Make it 20 slots
size = 20 * CatalogCacheItem.size,
avg_itemsize = CatalogCacheItem.size,
}
local OPDSBrowser = Menu:extend{

@ -1,11 +1,11 @@
describe("Cache module", function()
local DocumentRegistry, Cache
local DocumentRegistry, DocCache
local doc
local max_page = 1
setup(function()
require("commonrequire")
DocumentRegistry = require("document/documentregistry")
Cache = require("cache")
DocCache = require("document/doccache")
local sample_pdf = "spec/front/unit/data/sample.pdf"
doc = DocumentRegistry:openDocument(sample_pdf)
@ -15,22 +15,22 @@ describe("Cache module", function()
end)
it("should clear cache", function()
Cache:clear()
DocCache:clear()
end)
it("should serialize blitbuffer", function()
for pageno = 1, math.min(max_page, doc.info.number_of_pages) do
doc:renderPage(pageno, nil, 1, 0, 1.0, 0)
Cache:serialize()
DocCache:serialize()
end
Cache:clear()
DocCache:clear()
end)
it("should deserialize blitbuffer", function()
for pageno = 1, math.min(max_page, doc.info.number_of_pages) do
doc:hintPage(pageno, 1, 0, 1.0, 0)
end
Cache:clear()
DocCache:clear()
end)
it("should serialize koptcontext", function()
@ -38,9 +38,9 @@ describe("Cache module", function()
for pageno = 1, math.min(max_page, doc.info.number_of_pages) do
doc:renderPage(pageno, nil, 1, 0, 1.0, 0)
doc:getPageDimensions(pageno)
Cache:serialize()
DocCache:serialize()
end
Cache:clear()
DocCache:clear()
doc.configurable.text_wrap = 0
end)
@ -48,6 +48,6 @@ describe("Cache module", function()
for pageno = 1, math.min(max_page, doc.info.number_of_pages) do
doc:renderPage(pageno, nil, 1, 0, 1.0, 0)
end
Cache:clear()
DocCache:clear()
end)
end)

@ -1,10 +1,10 @@
describe("Koptinterface module", function()
local DocumentRegistry, Koptinterface, Cache
local DocCache, DocumentRegistry, Koptinterface
setup(function()
require("commonrequire")
DocCache = require("document/doccache")
DocumentRegistry = require("document/documentregistry")
Koptinterface = require("document/koptinterface")
Cache = require("cache")
end)
local tall_pdf = "spec/front/unit/data/tall.pdf"
@ -19,7 +19,7 @@ describe("Koptinterface module", function()
doc.configurable.text_wrap = 0
complex_doc.configurable.text_wrap = 0
paper_doc.configurable.text_wrap = 0
Cache:clear()
DocCache:clear()
end)
after_each(function()

Loading…
Cancel
Save