htmlToPlainText(): Format paragraphs with indentation (#7027)

Cf. <https://www.mobileread.com/forums/showthread.php?p=4072308#post4072308>.
reviewable/pr7046/r1
Frans de Jonge 3 years ago committed by GitHub
parent ad08ce9849
commit dafaf966e4
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -1002,7 +1002,7 @@ This may fail on complex HTML (with styles, scripts, comments), but should be fi
function util.htmlToPlainText(text)
-- Replace <br> and <p> with \n
text = text:gsub("%s*<%s*br%s*/?>%s*", "\n") -- <br> and <br/>
text = text:gsub("%s*<%s*p%s*>%s*", "\n") -- <p>
text = text:gsub("%s*<%s*p%s*>%s*", "\n&nbsp;&nbsp;&nbsp;&nbsp;") -- <p>
text = text:gsub("%s*</%s*p%s*>%s*", "\n") -- </p>
text = text:gsub("%s*<%s*p%s*/>%s*", "\n") -- standalone <p/>
-- Remove all HTML tags
@ -1012,6 +1012,8 @@ function util.htmlToPlainText(text)
-- Trim spaces and new lines at start and end
text = text:gsub("^[\n%s]*", "")
text = text:gsub("[\n%s]*$", "")
-- Trim non-breaking spaces from the start
text = text:gsub("^\xC2\xA0\xC2\xA0\xC2\xA0\xC2\xA0", "")
return text
end

Loading…
Cancel
Save