@ -2,11 +2,11 @@
Interface to k2pdfoptlib backend .
--]]
local Cache = require ( " cache " )
local CacheItem = require ( " cacheitem " )
local CanvasContext = require ( " document/canvascontext " )
local DataStorage = require ( " datastorage " )
local DEBUG = require ( " dbg " )
local DocCache = require ( " document/doccache " )
local Document = require ( " document/document " )
local Geom = require ( " ui/geometry " )
local KOPTContext = require ( " ffi/koptcontext " )
@ -27,11 +27,9 @@ local KoptInterface = {
local ContextCacheItem = CacheItem : new { }
function ContextCacheItem : onFree ( )
if self.kctx . free then
KoptInterface : waitForContext ( self.kctx )
logger.dbg ( " free koptc ontext" , self.kctx )
logger.dbg ( " ContextCacheItem: free KOPTC ontext" , self.kctx )
self.kctx : free ( )
end
end
function ContextCacheItem : dump ( filename )
@ -186,7 +184,7 @@ function KoptInterface:getAutoBBox(doc, pageno)
}
local context_hash = self : getContextHash ( doc , pageno , bbox )
local hash = " autobbox| " .. context_hash
local cached = Cache: check ( hash )
local cached = Doc Cache: check ( hash )
if not cached then
local page = doc._document : openPage ( pageno )
local kc = self : createContext ( doc , pageno , bbox )
@ -198,7 +196,7 @@ function KoptInterface:getAutoBBox(doc, pageno)
else
bbox = Document.getPageBBox ( doc , pageno )
end
Cache: insert ( hash , CacheItem : new { autobbox = bbox , size = 160 } )
Doc Cache: insert ( hash , CacheItem : new { autobbox = bbox , size = 160 } )
page : close ( )
kc : free ( )
return bbox
@ -215,7 +213,7 @@ function KoptInterface:getSemiAutoBBox(doc, pageno)
local bbox = Document.getPageBBox ( doc , pageno )
local context_hash = self : getContextHash ( doc , pageno , bbox )
local hash = " semiautobbox| " .. context_hash
local cached = Cache: check ( hash )
local cached = Doc Cache: check ( hash )
if not cached then
local page = doc._document : openPage ( pageno )
local kc = self : createContext ( doc , pageno , bbox )
@ -233,7 +231,7 @@ function KoptInterface:getSemiAutoBBox(doc, pageno)
auto_bbox = bbox
end
page : close ( )
Cache: insert ( hash , CacheItem : new { semiautobbox = auto_bbox , size = 160 } )
Doc Cache: insert ( hash , CacheItem : new { semiautobbox = auto_bbox , size = 160 } )
kc : free ( )
return auto_bbox
else
@ -251,7 +249,7 @@ function KoptInterface:getCachedContext(doc, pageno)
local bbox = doc : getPageBBox ( pageno )
local context_hash = self : getContextHash ( doc , pageno , bbox )
local kctx_hash = " kctx| " .. context_hash
local cached = Cache: check ( kctx_hash , ContextCacheItem )
local cached = Doc Cache: check ( kctx_hash , ContextCacheItem )
if not cached then
-- If kctx is not cached, create one and get reflowed bmp in foreground.
local kc = self : createContext ( doc , pageno , bbox )
@ -267,7 +265,7 @@ function KoptInterface:getCachedContext(doc, pageno)
local fullwidth , fullheight = kc : getPageDim ( )
logger.dbg ( " reflowed page " , pageno , " fullwidth: " , fullwidth , " fullheight: " , fullheight )
self.last_context_size = fullwidth * fullheight + 3072 -- estimation
Cache: insert ( kctx_hash , ContextCacheItem : new {
Doc Cache: insert ( kctx_hash , ContextCacheItem : new {
persistent = true ,
size = self.last_context_size ,
kctx = kc
@ -336,12 +334,12 @@ function KoptInterface:renderReflowedPage(doc, pageno, rect, zoom, rotation, ren
local context_hash = self : getContextHash ( doc , pageno , bbox )
local renderpg_hash = " renderpg| " .. context_hash
local cached = Cache: check ( renderpg_hash )
local cached = Doc Cache: check ( renderpg_hash )
if not cached then
-- do the real reflowing if kctx has not been cached yet
local kc = self : getCachedContext ( doc , pageno )
local fullwidth , fullheight = kc : getPageDim ( )
if not Cache: willAccept ( fullwidth * fullheight ) then
if not Doc Cache: willAccept ( fullwidth * fullheight ) then
-- whole page won't fit into cache
error ( " aborting, since we don't have enough cache for this page " )
end
@ -352,7 +350,7 @@ function KoptInterface:renderReflowedPage(doc, pageno, rect, zoom, rotation, ren
}
tile.bb = kc : dstToBlitBuffer ( )
tile.size = tonumber ( tile.bb . stride ) * tile.bb . h + 512 -- estimation
Cache: insert ( renderpg_hash , tile )
Doc Cache: insert ( renderpg_hash , tile )
return tile
else
return cached
@ -370,7 +368,7 @@ function KoptInterface:renderOptimizedPage(doc, pageno, rect, zoom, rotation, re
local context_hash = self : getContextHash ( doc , pageno , bbox )
local renderpg_hash = " renderoptpg| " .. context_hash .. zoom
local cached = Cache: check ( renderpg_hash , TileCacheItem )
local cached = Doc Cache: check ( renderpg_hash , TileCacheItem )
if not cached then
local page_size = Document.getNativePageDimensions ( doc , pageno )
local full_page_bbox = {
@ -399,7 +397,7 @@ function KoptInterface:renderOptimizedPage(doc, pageno, rect, zoom, rotation, re
tile.bb = kc : dstToBlitBuffer ( )
tile.size = tonumber ( tile.bb . stride ) * tile.bb . h + 512 -- estimation
kc : free ( )
Cache: insert ( renderpg_hash , tile )
Doc Cache: insert ( renderpg_hash , tile )
return tile
else
return cached
@ -429,7 +427,7 @@ function KoptInterface:hintReflowedPage(doc, pageno, zoom, rotation, gamma, rend
local bbox = doc : getPageBBox ( pageno )
local context_hash = self : getContextHash ( doc , pageno , bbox )
local kctx_hash = " kctx| " .. context_hash
local cached = Cache: check ( kctx_hash )
local cached = Doc Cache: check ( kctx_hash )
if not cached then
local kc = self : createContext ( doc , pageno , bbox )
local page = doc._document : openPage ( pageno )
@ -438,7 +436,7 @@ function KoptInterface:hintReflowedPage(doc, pageno, zoom, rotation, gamma, rend
kc : setPreCache ( )
page : reflow ( kc , 0 )
page : close ( )
Cache: insert ( kctx_hash , ContextCacheItem : new {
Doc Cache: insert ( kctx_hash , ContextCacheItem : new {
size = self.last_context_size or self.default_context_size ,
kctx = kc ,
} )
@ -496,16 +494,16 @@ function KoptInterface:getReflowedTextBoxes(doc, pageno)
local bbox = doc : getPageBBox ( pageno )
local context_hash = self : getContextHash ( doc , pageno , bbox )
local hash = " rfpgboxes| " .. context_hash
local cached = Cache: check ( hash )
local cached = Doc Cache: check ( hash )
if not cached then
local kctx_hash = " kctx| " .. context_hash
cached = Cache: check ( kctx_hash )
cached = Doc Cache: check ( kctx_hash )
if cached then
local kc = self : waitForContext ( cached.kctx )
--kc:setDebug()
local fullwidth , fullheight = kc : getPageDim ( )
local boxes , nr_word = kc : getReflowedWordBoxes ( " dst " , 0 , 0 , fullwidth , fullheight )
Cache: insert ( hash , CacheItem : new { rfpgboxes = boxes , size = 192 * nr_word } ) -- estimation
Doc Cache: insert ( hash , CacheItem : new { rfpgboxes = boxes , size = 192 * nr_word } ) -- estimation
return boxes
end
else
@ -520,16 +518,16 @@ function KoptInterface:getNativeTextBoxes(doc, pageno)
local bbox = doc : getPageBBox ( pageno )
local context_hash = self : getContextHash ( doc , pageno , bbox )
local hash = " nativepgboxes| " .. context_hash
local cached = Cache: check ( hash )
local cached = Doc Cache: check ( hash )
if not cached then
local kctx_hash = " kctx| " .. context_hash
cached = Cache: check ( kctx_hash )
cached = Doc Cache: check ( kctx_hash )
if cached then
local kc = self : waitForContext ( cached.kctx )
--kc:setDebug()
local fullwidth , fullheight = kc : getPageDim ( )
local boxes , nr_word = kc : getNativeWordBoxes ( " dst " , 0 , 0 , fullwidth , fullheight )
Cache: insert ( hash , CacheItem : new { nativepgboxes = boxes , size = 192 * nr_word } ) -- estimation
Doc Cache: insert ( hash , CacheItem : new { nativepgboxes = boxes , size = 192 * nr_word } ) -- estimation
return boxes
end
else
@ -546,17 +544,17 @@ function KoptInterface:getReflowedTextBoxesFromScratch(doc, pageno)
local bbox = doc : getPageBBox ( pageno )
local context_hash = self : getContextHash ( doc , pageno , bbox )
local hash = " scratchrfpgboxes| " .. context_hash
local cached = Cache: check ( hash )
local cached = Doc Cache: check ( hash )
if not cached then
local kctx_hash = " kctx| " .. context_hash
cached = Cache: check ( kctx_hash )
cached = Doc Cache: check ( kctx_hash )
if cached then
local reflowed_kc = self : waitForContext ( cached.kctx )
local fullwidth , fullheight = reflowed_kc : getPageDim ( )
local kc = self : createContext ( doc , pageno )
kc : copyDestBMP ( reflowed_kc )
local boxes , nr_word = kc : getNativeWordBoxes ( " dst " , 0 , 0 , fullwidth , fullheight )
Cache: insert ( hash , CacheItem : new { scratchrfpgboxes = boxes , size = 192 * nr_word } ) -- estimation
Doc Cache: insert ( hash , CacheItem : new { scratchrfpgboxes = boxes , size = 192 * nr_word } ) -- estimation
kc : free ( )
return boxes
end
@ -589,7 +587,7 @@ Done by OCR pre-processing in Tesseract and Leptonica.
--]]
function KoptInterface : getNativeTextBoxesFromScratch ( doc , pageno )
local hash = " scratchnativepgboxes| " .. doc.file .. " | " .. pageno
local cached = Cache: check ( hash )
local cached = Doc Cache: check ( hash )
if not cached then
local page_size = Document.getNativePageDimensions ( doc , pageno )
local bbox = {
@ -602,7 +600,7 @@ function KoptInterface:getNativeTextBoxesFromScratch(doc, pageno)
local page = doc._document : openPage ( pageno )
page : getPagePix ( kc )
local boxes , nr_word = kc : getNativeWordBoxes ( " src " , 0 , 0 , page_size.w , page_size.h )
Cache: insert ( hash , CacheItem : new { scratchnativepgboxes = boxes , size = 192 * nr_word } ) -- estimation
Doc Cache: insert ( hash , CacheItem : new { scratchnativepgboxes = boxes , size = 192 * nr_word } ) -- estimation
page : close ( )
kc : free ( )
return boxes
@ -619,7 +617,7 @@ function KoptInterface:getPageBlock(doc, pageno, x, y)
local bbox = doc : getPageBBox ( pageno )
local context_hash = self : getContextHash ( doc , pageno , bbox )
local hash = " pageblocks| " .. context_hash
local cached = Cache: check ( hash )
local cached = Doc Cache: check ( hash )
if not cached then
local page_size = Document.getNativePageDimensions ( doc , pageno )
local full_page_bbox = {
@ -633,7 +631,7 @@ function KoptInterface:getPageBlock(doc, pageno, x, y)
local page = doc._document : openPage ( pageno )
page : getPagePix ( kc )
kc : findPageBlocks ( )
Cache: insert ( hash , CacheItem : new { kctx = kc , size = 3072 } ) -- estimation
Doc Cache: insert ( hash , CacheItem : new { kctx = kc , size = 3072 } ) -- estimation
page : close ( )
kctx = kc
else
@ -646,8 +644,8 @@ end
Get word from OCR providing selected word box .
--]]
function KoptInterface : getOCRWord ( doc , pageno , wbox )
if not Cache: check ( self.ocrengine ) then
Cache: insert ( self.ocrengine , OCREngine : new { ocrengine = KOPTContext.new ( ) , size = 3072 } ) -- estimation
if not Doc Cache: check ( self.ocrengine ) then
Doc Cache: insert ( self.ocrengine , OCREngine : new { ocrengine = KOPTContext.new ( ) , size = 3072 } ) -- estimation
end
if doc.configurable . text_wrap == 1 then
return self : getReflewOCRWord ( doc , pageno , wbox.sbox )
@ -664,17 +662,17 @@ function KoptInterface:getReflewOCRWord(doc, pageno, rect)
local bbox = doc : getPageBBox ( pageno )
local context_hash = self : getContextHash ( doc , pageno , bbox )
local hash = " rfocrword| " .. context_hash .. rect.x .. rect.y .. rect.w .. rect.h
local cached = Cache: check ( hash )
local cached = Doc Cache: check ( hash )
if not cached then
local kctx_hash = " kctx| " .. context_hash
cached = Cache: check ( kctx_hash )
cached = Doc Cache: check ( kctx_hash )
if cached then
local kc = self : waitForContext ( cached.kctx )
local _ , word = pcall (
kc.getTOCRWord , kc , " dst " ,
rect.x , rect.y , rect.w , rect.h ,
self.tessocr_data , self.ocr_lang , self.ocr_type , 0 , 1 )
Cache: insert ( hash , CacheItem : new { rfocrword = word , size = # word + 64 } ) -- estimation
Doc Cache: insert ( hash , CacheItem : new { rfocrword = word , size = # word + 64 } ) -- estimation
return word
end
else
@ -689,7 +687,7 @@ function KoptInterface:getNativeOCRWord(doc, pageno, rect)
self.ocr_lang = doc.configurable . doc_language
local hash = " ocrword| " .. doc.file .. " | " .. pageno .. rect.x .. rect.y .. rect.w .. rect.h
logger.dbg ( " hash " , hash )
local cached = Cache: check ( hash )
local cached = Doc Cache: check ( hash )
if not cached then
local bbox = {
x0 = rect.x - math.floor ( rect.h * 0.3 ) ,
@ -707,7 +705,7 @@ function KoptInterface:getNativeOCRWord(doc, pageno, rect)
kc.getTOCRWord , kc , " src " ,
0 , 0 , word_w , word_h ,
self.tessocr_data , self.ocr_lang , self.ocr_type , 0 , 1 )
Cache: insert ( hash , CacheItem : new { ocrword = word , size = # word + 64 } ) -- estimation
Doc Cache: insert ( hash , CacheItem : new { ocrword = word , size = # word + 64 } ) -- estimation
logger.dbg ( " word " , word )
page : close ( )
kc : free ( )
@ -721,8 +719,8 @@ end
Get text from OCR providing selected text boxes .
--]]
function KoptInterface : getOCRText ( doc , pageno , tboxes )
if not Cache: check ( self.ocrengine ) then
Cache: insert ( self.ocrengine , OCREngine : new { ocrengine = KOPTContext.new ( ) , size = 3072 } ) -- estimation
if not Doc Cache: check ( self.ocrengine ) then
Doc Cache: insert ( self.ocrengine , OCREngine : new { ocrengine = KOPTContext.new ( ) , size = 3072 } ) -- estimation
end
logger.info ( " Not implemented yet " )
end