use pixels from reflowed page when doing OCR in reflowing mode

I hope it will get back OCR accuracy as in build 545.
pull/310/head
chrox 11 years ago
parent 2cd5a083ae
commit 9f42289226

@ -69,8 +69,12 @@ function DjvuDocument:getPageBoxesFromPositions(pageno, ppos0, ppos1)
return self.koptinterface:getPageBoxesFromPositions(self, pageno, ppos0, ppos1)
end
function DjvuDocument:getOCRWord(pageno, rect)
return self.koptinterface:getOCRWord(self, pageno, rect)
function DjvuDocument:getOCRWord(pageno, wbox)
return self.koptinterface:getOCRWord(self, pageno, wbox)
end
function DjvuDocument:getOCRText(pageno, tboxes)
return self.koptinterface:getOCRText(self, pageno, tboxes)
end
function DjvuDocument:getUsedBBox(pageno)

@ -6,6 +6,7 @@ require "ui/screen"
require "ui/reader/readerconfig"
KoptInterface = {
ocrengine = "ocrengine",
tessocr_data = "data",
ocr_lang = "eng",
ocr_type = 3, -- default 0, for more accuracy use 3
@ -419,28 +420,61 @@ function KoptInterface:getTextBoxesFromScratch(doc, pageno)
end
--[[
OCR word inside the rect area of the page
rect should be in native page coordinates
get word from OCR providing selected word box
--]]
function KoptInterface:getOCRWord(doc, pageno, rect)
local ocrengine = "ocrengine"
if not Cache:check(ocrengine) then
local dummy = KOPTContext.new()
Cache:insert(ocrengine, OCREngine:new{ ocrengine = dummy })
function KoptInterface:getOCRWord(doc, pageno, wbox)
if not Cache:check(self.ocrengine) then
Cache:insert(self.ocrengine, OCREngine:new{ ocrengine = KOPTContext.new() })
end
if doc.configurable.text_wrap == 1 then
return self:getReflewOCRWord(doc, pageno, wbox.sbox)
else
return self:getNativeOCRWord(doc, pageno, wbox.sbox)
end
end
--[[
get word from OCR in reflew page
--]]
function KoptInterface:getReflewOCRWord(doc, pageno, rect)
self.ocr_lang = doc.configurable.doc_language
local bbox = doc:getPageBBox(pageno)
local context_hash = self:getContextHash(doc, pageno, bbox)
local hash = "rfocrword|"..context_hash..rect.x..rect.y..rect.w..rect.h
local cached = Cache:check(hash)
if not cached then
local kctx_hash = "kctx|"..context_hash
local cached = Cache:check(kctx_hash)
if cached then
local kc = self:waitForContext(cached.kctx)
local ok, word = pcall(
kc.getTOCRWord, kc,
rect.x, rect.y, rect.w, rect.h,
self.tessocr_data, self.ocr_lang, self.ocr_type, 0, 1)
Cache:insert(hash, CacheItem:new{ rfocrword = word })
return word
end
else
return cached.rfocrword
end
end
--[[
get word from OCR in native page
--]]
function KoptInterface:getNativeOCRWord(doc, pageno, rect)
self.ocr_lang = doc.configurable.doc_language
local hash = "ocrword|"..doc.file.."|"..pageno..rect.x..rect.y..rect.w..rect.h
local cached = Cache:check(hash)
if not cached then
local bbox = {
x0 = rect.x,
y0 = rect.y,
x1 = rect.x + rect.w,
y1 = rect.y + rect.h,
x0 = rect.x - math.floor(rect.h * 0.3),
y0 = rect.y - math.floor(rect.h * 0.3),
x1 = rect.x + rect.w + math.floor(rect.h * 0.3),
y1 = rect.y + rect.h + math.floor(rect.h * 0.3),
}
local kc = self:createContext(doc, pageno, bbox)
--kc:setZoom(30/rect.h)
kc:setZoom(1.0)
kc:setZoom(30/rect.h)
local page = doc._document:openPage(pageno)
page:getPagePix(kc)
local word_w, word_h = kc:getPageDim()
@ -457,6 +491,16 @@ function KoptInterface:getOCRWord(doc, pageno, rect)
end
end
--[[
get text from OCR providing selected text boxes
--]]
function KoptInterface:getOCRText(doc, pageno, tboxes)
if not Cache:check(self.ocrengine) then
Cache:insert(self.ocrengine, OCREngine:new{ ocrengine = KOPTContext.new() })
end
DEBUG("Not implemented yet")
end
--[[
get index of nearest word box around pos
--]]

@ -60,8 +60,12 @@ function PdfDocument:getPageBoxesFromPositions(pageno, ppos0, ppos1)
return self.koptinterface:getPageBoxesFromPositions(self, pageno, ppos0, ppos1)
end
function PdfDocument:getOCRWord(pageno, rect)
return self.koptinterface:getOCRWord(self, pageno, rect)
function PdfDocument:getOCRWord(pageno, wbox)
return self.koptinterface:getOCRWord(self, pageno, wbox)
end
function PdfDocument:getOCRText(pageno, tboxes)
return self.koptinterface:getOCRText(self, pageno, tboxes)
end
function PdfDocument:getUsedBBox(pageno)

@ -169,12 +169,7 @@ function ReaderHighlight:lookup(selected_word)
self.ui:handleEvent(Event:new("LookupWord", selected_word.word))
-- or we will do OCR
else
local word_box = selected_word.pbox:copy()
word_box.x = word_box.x - math.floor(word_box.h * 0.1)
word_box.y = word_box.y - math.floor(word_box.h * 0.1)
word_box.w = word_box.w + math.floor(word_box.h * 0.2)
word_box.h = word_box.h + math.floor(word_box.h * 0.2)
local word = self.ui.document:getOCRWord(self.hold_pos.page, word_box)
local word = self.ui.document:getOCRWord(self.hold_pos.page, selected_word)
DEBUG("OCRed word:", word)
self.ui:handleEvent(Event:new("LookupWord", word))
end
@ -182,17 +177,12 @@ end
function ReaderHighlight:translate(selected_text)
if selected_text.text ~= "" then
self.ui:handleEvent(Event:new("LookupWord", selected_text.text))
self.ui:handleEvent(Event:new("TranslateText", selected_text.text))
-- or we will do OCR
else
local text_pboxes = selected_text.pboxes[1]:copy()
--text_box.x = text_box.x - math.floor(text_box.h * 0.1)
text_pboxes.y = text_pboxes.y - math.floor(text_pboxes.h * 0.2)
--text_box.w = text_box.w + math.floor(text_box.h * 0.2)
text_pboxes.h = text_pboxes.h + math.floor(text_pboxes.h * 0.4)
local text = self.ui.document:getOCRWord(self.hold_pos.page, text_pboxes)
local text = self.ui.document:getOCRText(self.hold_pos.page, selected_text)
DEBUG("OCRed text:", text)
self.ui:handleEvent(Event:new("LookupWord", text))
self.ui:handleEvent(Event:new("TranslateText", text))
end
end

@ -1 +1 @@
Subproject commit aba8feea83a22768ba7360a5da27b0cc4345eefd
Subproject commit dbe989867af6f187ad0ef40cc1bdae735ea9be86
Loading…
Cancel
Save