You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
DeDRM_tools/DeDRM_plugin/stylexml2css.py

291 lines
11 KiB
Python

12 years ago
#! /usr/bin/python
# vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
# For use with Topaz Scripts Version 2.6
12 years ago
import csv
import sys
import os
import getopt
import re
from struct import pack
from struct import unpack
12 years ago
debug = False
12 years ago
class DocParser(object):
def __init__(self, flatxml, fontsize, ph, pw):
self.flatdoc = flatxml.split(b'\n')
12 years ago
self.fontsize = int(fontsize)
self.ph = int(ph) * 1.0
self.pw = int(pw) * 1.0
stags = {
b'paragraph' : 'p',
b'graphic' : '.graphic'
12 years ago
}
attr_val_map = {
b'hang' : 'text-indent: ',
b'indent' : 'text-indent: ',
b'line-space' : 'line-height: ',
b'margin-bottom' : 'margin-bottom: ',
b'margin-left' : 'margin-left: ',
b'margin-right' : 'margin-right: ',
b'margin-top' : 'margin-top: ',
b'space-after' : 'padding-bottom: ',
12 years ago
}
attr_str_map = {
b'align-center' : 'text-align: center; margin-left: auto; margin-right: auto;',
b'align-left' : 'text-align: left;',
b'align-right' : 'text-align: right;',
b'align-justify' : 'text-align: justify;',
b'display-inline' : 'display: inline;',
b'pos-left' : 'text-align: left;',
b'pos-right' : 'text-align: right;',
b'pos-center' : 'text-align: center; margin-left: auto; margin-right: auto;',
12 years ago
}
# find tag if within pos to end inclusive
def findinDoc(self, tagpath, pos, end) :
result = None
docList = self.flatdoc
cnt = len(docList)
if end == -1 :
end = cnt
else:
end = min(cnt,end)
foundat = -1
for j in range(pos, end):
12 years ago
item = docList[j]
if item.find(b'=') >= 0:
(name, argres) = item.split(b'=',1)
12 years ago
else :
name = item
argres = b''
if (isinstance(tagpath,str)):
tagpath = tagpath.encode('utf-8')
12 years ago
if name.endswith(tagpath) :
result = argres
foundat = j
break
return foundat, result
# return list of start positions for the tagpath
def posinDoc(self, tagpath):
startpos = []
pos = 0
res = b""
12 years ago
while res != None :
(foundpos, res) = self.findinDoc(tagpath, pos, -1)
if res != None :
startpos.append(foundpos)
pos = foundpos + 1
return startpos
# returns a vector of integers for the tagpath
def getData(self, tagpath, pos, end, clean=False):
if clean:
digits_only = re.compile(rb'''([0-9]+)''')
12 years ago
argres=[]
(foundat, argt) = self.findinDoc(tagpath, pos, end)
if (argt != None) and (len(argt) > 0) :
argList = argt.split(b'|')
12 years ago
for strval in argList:
if clean:
m = re.search(digits_only, strval)
if m != None:
strval = m.group()
argres.append(int(strval))
return argres
def process(self):
classlst = ''
csspage = '.cl-center { text-align: center; margin-left: auto; margin-right: auto; }\n'
csspage += '.cl-right { text-align: right; }\n'
csspage += '.cl-left { text-align: left; }\n'
csspage += '.cl-justify { text-align: justify; }\n'
# generate a list of each <style> starting point in the stylesheet
styleList= self.posinDoc(b'book.stylesheet.style')
12 years ago
stylecnt = len(styleList)
styleList.append(-1)
# process each style converting what you can
if debug: print(' ', 'Processing styles.')
for j in range(stylecnt):
if debug: print(' ', 'Processing style %d' %(j))
12 years ago
start = styleList[j]
end = styleList[j+1]
(pos, tag) = self.findinDoc(b'style._tag',start,end)
12 years ago
if tag == None :
(pos, tag) = self.findinDoc(b'style.type',start,end)
12 years ago
# Is this something we know how to convert to css
if tag in self.stags :
# get the style class
(pos, sclass) = self.findinDoc(b'style.class',start,end)
12 years ago
if sclass != None:
sclass = sclass.replace(b' ',b'-')
sclass = b'.cl-' + sclass.lower()
12 years ago
else :
sclass = b''
12 years ago
if debug: print('sclass', sclass)
12 years ago
12 years ago
# check for any "after class" specifiers
(pos, aftclass) = self.findinDoc(b'style._after_class',start,end)
12 years ago
if aftclass != None:
aftclass = aftclass.replace(b' ',b'-')
aftclass = b'.cl-' + aftclass.lower()
12 years ago
else :
aftclass = b''
12 years ago
if debug: print('aftclass', aftclass)
12 years ago
12 years ago
cssargs = {}
while True :
(pos1, attr) = self.findinDoc(b'style.rule.attr', start, end)
(pos2, val) = self.findinDoc(b'style.rule.value', start, end)
12 years ago
if debug: print('attr', attr)
if debug: print('val', val)
12 years ago
12 years ago
if attr == None : break
if (attr == b'display') or (attr == b'pos') or (attr == b'align'):
12 years ago
# handle text based attributess
attr = attr + b'-' + val
12 years ago
if attr in self.attr_str_map :
cssargs[attr] = (self.attr_str_map[attr], b'')
12 years ago
else :
# handle value based attributes
if attr in self.attr_val_map :
name = self.attr_val_map[attr]
if attr in (b'margin-bottom', b'margin-top', b'space-after') :
12 years ago
scale = self.ph
elif attr in (b'margin-right', b'indent', b'margin-left', b'hang') :
12 years ago
scale = self.pw
elif attr == b'line-space':
12 years ago
scale = self.fontsize * 2.0
else:
print("Scale not defined!")
scale = 1.0
12 years ago
12 years ago
if val == "":
val = 0
if not ((attr == b'hang') and (int(val) == 0)):
11 years ago
try:
f = float(val)
except:
print("Warning: unrecognised val, ignoring")
11 years ago
val = 0
12 years ago
pv = float(val)/scale
cssargs[attr] = (self.attr_val_map[attr], pv)
keep = True
start = max(pos1, pos2) + 1
# disable all of the after class tags until I figure out how to handle them
if aftclass != "" : keep = False
if keep :
if debug: print('keeping style')
12 years ago
# make sure line-space does not go below 100% or above 300% since
# it can be wacky in some styles
if b'line-space' in cssargs:
seg = cssargs[b'line-space'][0]
val = cssargs[b'line-space'][1]
12 years ago
if val < 1.0: val = 1.0
if val > 3.0: val = 3.0
del cssargs[b'line-space']
cssargs[b'line-space'] = (self.attr_val_map[b'line-space'], val)
12 years ago
# handle modifications for css style hanging indents
if b'hang' in cssargs:
hseg = cssargs[b'hang'][0]
hval = cssargs[b'hang'][1]
del cssargs[b'hang']
cssargs[b'hang'] = (self.attr_val_map[b'hang'], -hval)
12 years ago
mval = 0
mseg = 'margin-left: '
mval = hval
if b'margin-left' in cssargs:
mseg = cssargs[b'margin-left'][0]
mval = cssargs[b'margin-left'][1]
12 years ago
if mval < 0: mval = 0
mval = hval + mval
cssargs[b'margin-left'] = (mseg, mval)
if b'indent' in cssargs:
del cssargs[b'indent']
12 years ago
cssline = sclass + ' { '
for key in iter(cssargs):
mseg = cssargs[key][0]
mval = cssargs[key][1]
if mval == '':
cssline += mseg + ' '
else :
aseg = mseg + '%.1f%%;' % (mval * 100.0)
cssline += aseg + ' '
cssline += '}'
if sclass != '' :
classlst += sclass + '\n'
# handle special case of paragraph class used inside chapter heading
# and non-chapter headings
if sclass != '' :
ctype = sclass[4:7]
if ctype == 'ch1' :
csspage += 'h1' + cssline + '\n'
if ctype == 'ch2' :
csspage += 'h2' + cssline + '\n'
if ctype == 'ch3' :
csspage += 'h3' + cssline + '\n'
if ctype == 'h1-' :
csspage += 'h4' + cssline + '\n'
if ctype == 'h2-' :
csspage += 'h5' + cssline + '\n'
if ctype == 'h3_' :
csspage += 'h6' + cssline + '\n'
if cssline != ' { }':
csspage += self.stags[tag] + cssline + '\n'
return csspage, classlst
def convert2CSS(flatxml, fontsize, ph, pw):
print(' ', 'Using font size:',fontsize)
print(' ', 'Using page height:', ph)
print(' ', 'Using page width:', pw)
12 years ago
# create a document parser
dp = DocParser(flatxml, fontsize, ph, pw)
if debug: print(' ', 'Created DocParser.')
12 years ago
csspage = dp.process()
if debug: print(' ', 'Processed DocParser.')
12 years ago
return csspage
def getpageIDMap(flatxml):
dp = DocParser(flatxml, 0, 0, 0)
pageidnumbers = dp.getData('info.original.pid', 0, -1, True)
return pageidnumbers