diff options
author | Étienne Loks <etienne.loks@iggdrasil.net> | 2019-05-01 13:51:01 +0200 |
---|---|---|
committer | Étienne Loks <etienne.loks@iggdrasil.net> | 2019-06-17 13:21:28 +0200 |
commit | 6e09fe95f07ea2c0a827beda5fc2f2a63751db7f (patch) | |
tree | d6452080600bd7fc377321d4dab58a7fc4333cb2 /ooopy/Transforms.py | |
parent | ce4b7db76f21559b94943229bbeebd9c37c43f49 (diff) | |
download | Ishtar-6e09fe95f07ea2c0a827beda5fc2f2a63751db7f.tar.bz2 Ishtar-6e09fe95f07ea2c0a827beda5fc2f2a63751db7f.zip |
Embed ooopy (last version: 1.11)
Diffstat (limited to 'ooopy/Transforms.py')
-rw-r--r-- | ooopy/Transforms.py | 1237 |
1 files changed, 1237 insertions, 0 deletions
diff --git a/ooopy/Transforms.py b/ooopy/Transforms.py new file mode 100644 index 000000000..50a6c0db8 --- /dev/null +++ b/ooopy/Transforms.py @@ -0,0 +1,1237 @@ +#!/usr/bin/env python +# -*- coding: iso-8859-1 -*- +# Copyright (C) 2005-14 Dr. Ralf Schlatterbeck Open Source Consulting. +# Reichergasse 131, A-3411 Weidling. +# Web: http://www.runtux.com Email: office@runtux.com +# All rights reserved +# **************************************************************************** +# +# This library is free software; you can redistribute it and/or modify +# it under the terms of the GNU Library General Public License as +# published by the Free Software Foundation; either version 2 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Library General Public License for more details. +# +# You should have received a copy of the GNU Library General Public +# License along with this program; if not, write to the Free Software +# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. +# **************************************************************************** + +from __future__ import absolute_import + +import time +import re +try : + from xml.etree.ElementTree import dump, SubElement, Element, tostring +except ImportError : + from elementtree.ElementTree import dump, SubElement, Element, tostring +from copy import deepcopy +from ooopy.OOoPy import OOoPy, autosuper +from ooopy.Transformer import files, split_tag, OOo_Tag, Transform +from ooopy.Transformer import mimetypes, namespace_by_name +from ooopy.Version import VERSION + +# counts in meta.xml +meta_counts = \ + ( 'character-count', 'image-count', 'object-count', 'page-count' + , 'paragraph-count', 'table-count', 'word-count' + ) + +class Access_Attribute (autosuper) : + """ For performance reasons we do not specify a separate transform + for each attribute-read or -change operation. Instead we define + all the attribute accesses we want to perform as objects that + follow the attribute access api and apply them all using an + Attribute_Access in one go. + """ + + def __init__ (self, key = None, prefix = None, ** kw) : + self.__super.__init__ (key = key, prefix = prefix, **kw) + self.key = key + if key : + if not prefix : + prefix = self.__class__.__name__ + self.key = ':'.join ((prefix, key)) + # end def __init__ + + def register (self, transformer) : + self.transformer = transformer + # end def register + + def use_value (self, oldval = None) : + """ Can change the given value by returning the new value. If + returning None or oldval the attribute stays unchanged. + """ + raise NotImplementedError, "use_value must be defined in derived class" + # end def use_value + +# end class Access_Attribute + +class Get_Attribute (Access_Attribute) : + """ An example of not changing an attribute but only storing the + value in the transformer + """ + + def __init__ (self, tag, attr, key, transform = None, ** kw) : + self.__super.__init__ (key = key, **kw) + self.tag = tag + self.attribute = attr + self.transform = transform + # end def __init__ + + def use_value (self, oldval = None) : + self.transformer [self.key] = oldval + return None + # end def use_value + +# end def Get_Attribute + +class Get_Max (Access_Attribute) : + """ Get the maximum value of an attribute """ + + def __init__ (self, tag, attr, key, transform = None, ** kw) : + self.__super.__init__ (key = key, **kw) + self.tag = tag + self.attribute = attr + self.transform = transform + # end def __init__ + + def register (self, transformer) : + self.__super.register (transformer) + self.transformer [self.key] = -1 + # end def register + + def use_value (self, oldval = None) : + if self.transformer [self.key] < oldval : + self.transformer [self.key] = oldval + return None + # end def use_value + +# end def Get_Max + +class Renumber (Access_Attribute) : + """ Specifies a renumbering transform. OOo has a 'name' attribute + for several different tags, e.g., tables, frames, sections etc. + These names must be unique in the whole document. OOo itself + solves this by appending a unique number to a basename for each + element, e.g., sections are named 'Section1', 'Section2', ... + Renumber transforms can be applied to correct the numbering + after operations that destroy the unique numbering, e.g., after + a mailmerge where the same document is repeatedly appended. + + The force parameter specifies if the new renumbered name should + be inserted even if the attribute in question does not exist. + """ + + def __init__ \ + (self, tag, name = None, attr = None, start = 1, force = False) : + self.__super.__init__ () + tag_ns, tag_name = split_tag (tag) + self.tag_ns = tag_ns + self.tag = tag + self.name = name or tag_name [0].upper () + tag_name [1:] + self.num = start + self.force = force + self.attribute = attr + # end def __init__ + + def register (self, transformer) : + self.__super.register (transformer) + if not self.attribute : + self.attribute = OOo_Tag (self.tag_ns, 'name', transformer.mimetype) + # end def register + + def use_value (self, oldval = None) : + if oldval is None and not self.force : + return + name = "%s%d" % (self.name, self.num) + self.num += 1 + return name + # end def use_value + +# end class Renumber + +class Set_Attribute (Access_Attribute) : + """ + Similar to the renumbering transform in that we are assigning + new values to some attributes. But in this case we give keys + into the Transformer dict to replace some tag attributes. + """ + + def __init__ \ + ( self + , tag + , attr + , key = None + , transform = None + , value = None + , oldvalue = None + , ** kw + ) : + self.__super.__init__ (key = key, ** kw) + self.tag = tag + self.attribute = attr + self.transform = transform + self.value = value + self.oldvalue = oldvalue + # end def __init__ + + def use_value (self, oldval) : + if oldval is None : + return None + if self.oldvalue and oldval != self.oldvalue : + return None + if self.key and self.transformer.has_key (self.key) : + return str (self.transformer [self.key]) + return self.value + # end def use_value + +# end class Set_Attribute + +def set_attributes_from_dict (tag, attr, d) : + """ Convenience function: iterate over a dict and return a list of + Set_Attribute objects specifying replacement of attributes in + the dictionary + """ + return [Set_Attribute (tag, attr, oldvalue = k, value = v) + for k,v in d.iteritems () + ] +# end def set_attributes_from_dict + +class Reanchor (Access_Attribute) : + """ + Similar to the renumbering transform in that we are assigning + new values to some attributes. But in this case we want to + relocate objects that are anchored to a page. + """ + + def __init__ (self, offset, tag, attr = None) : + self.__super.__init__ () + self.offset = int (offset) + self.tag = tag + self.attribute = attr + # end def __init__ + + def register (self, transformer) : + self.__super.register (transformer) + if not self.attribute : + self.attribute = \ + OOo_Tag ('text', 'anchor-page-number', transformer.mimetype) + # end def register + + def use_value (self, oldval) : + if oldval is None : + return oldval + return "%d" % (int (oldval) + self.offset) + # end def use_value + +# end class Reanchor + +# +# general transforms applicable to several .xml files +# + +class Attribute_Access (Transform) : + """ + Read or Change attributes in an OOo document. + Can be used for renumbering, moving anchored objects, etc. + Expects a list of attribute changer objects that follow the + attribute changer API. This API is very simple: + + - Member function "use_value" returns the new value of an + attribute, or if unchanged the old value + - The attribute "tag" gives the tag for an element we are + searching + - The attribute "attribute" gives the name of the attribute we + want to read or change. + For examples of the attribute changer API, see Renumber and + Reanchor above. + """ + filename = 'content.xml' + prio = 110 + + def __init__ (self, attrchangers, filename = None, ** kw) : + self.filename = filename or self.filename + self.attrchangers = {} + # allow several changers for a single tag + self.attrchangers [None] = [] + self.changers = attrchangers + self.__super.__init__ (** kw) + # end def __init__ + + def register (self, transformer) : + """ Register transformer with all attrchangers. """ + self.__super.register (transformer) + for r in self.changers : + if r.tag not in self.attrchangers : + self.attrchangers [r.tag] = [] + self.attrchangers [r.tag].append (r) + r.register (transformer) + # end def register + + def apply (self, root) : + """ Search for all tags for which we renumber and replace name """ + for n in [root] + root.findall ('.//*') : + changers = \ + self.attrchangers [None] + self.attrchangers.get (n.tag, []) + for r in changers : + nval = r.use_value (n.get (r.attribute)) + if nval is not None : + n.set (r.attribute, nval) + # end def apply + +# end class Attribute_Access + +# +# META-INF/manifest.xml transforms +# + +class Manifest_Append (Transform) : + """ + The Transformer stores a list of files (and contents) to append. + These files are added to the archive later but need to be + present in the manifest, too. + The file list in the Transformer currently doesn't store a media + type (which is one of the parameters in the manifest), the + current application of this transform is to add pictures -- + these don't have a media type in the files that were checked. + So for now we add an empty media type. + """ + filename = 'META-INF/manifest.xml' + prio = 1000 + + def apply (self, root) : + for n, node in enumerate (root) : + assert node.tag == self.oootag ('manifest', 'file-entry') + path = node.get (self.oootag ('manifest', 'full-path')) + assert (path) + if path == '/' : + break + else : + assert (not "The manifest needs a '/' entry") + for f, _ in self.transformer.appendfiles : + e = Element (self.oootag ('manifest', 'file-entry')) + e.attrib [self.oootag ('manifest', 'full-path')] = f + e.attrib [self.oootag ('manifest', 'media-type')] = '' + root.insert (n + 1, e) + n += 1 + # end def apply + +# end class Manifest_Append + +# +# meta.xml transforms +# + +class Editinfo (Transform) : + """ + This is an example of modifying OOo meta info (edit information, + author, etc). We set some of the items (program that generated + the OOo file, modification time, number of edit cyles and overall + edit duration). It's easy to subclass this transform and replace + the "replace" variable (pun intended) in the derived class. + """ + filename = 'meta.xml' + prio = 20 + repl = \ + { ('meta', 'generator') : 'OOoPy field replacement' + , ('dc', 'date') : time.strftime ('%Y-%m-%dT%H:%M:%S') + , ('meta', 'editing-cycles') : '0' + , ('meta', 'editing-duration') : 'PT0M0S' + } + replace = {} + # iterate over all mimetypes, so this works for all known mimetypes + # of OOo documents. + for m in mimetypes : + for params, value in repl.iteritems () : + replace [OOo_Tag (mimetype = m, *params)] = value + + def apply (self, root) : + for node in root.findall (self.oootag ('office', 'meta') + '/*') : + if self.replace.has_key (node.tag) : + node.text = self.replace [node.tag] + # end def apply +# end class Editinfo + +# +# settings.xml transforms +# + +class Autoupdate (Transform) : + """ + This is an example of modifying OOo settings. We set some of the + AutoUpdate configuration items in OOo to true. We also specify + that links should be updated when reading. + + This was originally intended to make OOo correctly display fields + if they were changed with the Field_Replace below + (similar to pressing F9 after loading the generated document in + OOo). In particular I usually make spaces depend on field + contents so that I don't have spurious spaces if a field is + empty. Now it would be nice if OOo displayed the spaces correctly + after loading a document (It does update the fields before + printing, so this is only a cosmetic problem :-). This apparently + does not work. If anybody knows how to achieve this, please let + me know: mailto:rsc@runtux.com + """ + filename = 'settings.xml' + prio = 20 + + def apply (self, root) : + config = None + for config in root.findall \ + ( self.oootag ('office', 'settings') + + '/' + + self.oootag ('config', 'config-item-set') + ) : + name = config.get (self.oootag ('config', 'name')) + if name == 'configuration-settings' : + break + for node in config.findall (self.oootag ('config', 'config-item')) : + name = node.get (self.oootag ('config', 'name')) + if name == 'LinkUpdateMode' : # update when reading + node.text = '2' + # update fields when reading + if name == 'FieldAutoUpdate' or name == 'ChartAutoUpdate' : + node.text = 'true' + # end def apply +# end class Autoupdate + +# +# content.xml transforms +# + +class Field_Replace (Transform) : + """ + Takes a dict of replacement key-value pairs. The key is the name + of a variable in OOo. Additional replacement key-value pairs may + be specified in ** kw. Alternatively a callback mechanism for + variable name lookups is provided. The callback function is + given the name of a variable in OOo and is expected to return + the replacement value or None if the variable value should not + be replaced. + """ + filename = 'content.xml' + prio = 100 + + def __init__ (self, prio = None, replace = None, ** kw) : + """ replace is something behaving like a dict or something + callable for name lookups + """ + self.__super.__init__ (prio, ** kw) + self.replace = replace or {} + self.dict = kw + # end def __init__ + + def apply (self, root) : + tbody = self.find_tbody (root) + for tag in 'variable-set', 'variable-get', 'variable-input' : + for node in tbody.findall ('.//' + self.oootag ('text', tag)) : + attr = 'name' + if tag == 'text-input' : + attr = 'description' + name = node.get (self.oootag ('text', attr)) + if callable (self.replace) : + replace = self.replace (name) + if replace : + node.text = replace + elif name in self.replace : + node.text = self.replace [name] + elif name in self.dict : + node.text = self.dict [name] + # end def apply +# end class Field_Replace + +class Addpagebreak_Style (Transform) : + """ + This transformation adds a new ad-hoc paragraph style to the + content part of the OOo document. This is needed to be able to + add new page breaks to an OOo document. Adding a new page break + is then a matter of adding an empty paragraph with the given page + break style. + + We first look through all defined paragraph styles for + determining a new paragraph style number. Convention is P<num> + for paragraph styles. We search the highest number and use this + incremented by one for the new style to insert. Then we insert + the new style and store the resulting style name in the + transformer under the key class_name:stylename where class_name + is our own class name. + """ + filename = 'content.xml' + prio = 30 + para = re.compile (r'P([0-9]+)') + + def apply (self, root) : + max_style = 0 + styles = root.find (self.oootag ('office', 'automatic-styles')) + for s in styles.findall ('./' + self.oootag ('style', 'style')) : + m = self.para.match (s.get (self.oootag ('style', 'name'), '')) + if m : + num = int (m.group (1)) + if num > max_style : + max_style = num + stylename = 'P%d' % (max_style + 1) + new = SubElement \ + ( styles + , self.oootag ('style', 'style') + , { self.oootag ('style', 'name') : stylename + , self.oootag ('style', 'family') : 'paragraph' + , self.oootag ('style', 'parent-style-name') : 'Standard' + } + ) + SubElement \ + ( new + , self.properties_tag + , { self.oootag ('fo', 'break-after') : 'page' } + ) + self.set ('stylename', stylename) + # end def apply +# end class Addpagebreak_Style + +class Addpagebreak (Transform) : + """ + This transformation adds a page break to the last page of the OOo + text. This is needed, e.g., when doing mail-merge: We append a + page break to the tbody and then append the next page. This + transform needs the name of the paragraph style specifying the + page break style. Default is to use + 'Addpagebreak_Style:stylename' as the key for + retrieving the page style. Alternatively the page style or the + page style key can be specified in the constructor. + """ + filename = 'content.xml' + prio = 50 + + def __init__ (self, stylename = None, stylekey = None, ** kw) : + self.__super.__init__ (** kw) + self.stylename = stylename + self.stylekey = stylekey or 'Addpagebreak_Style:stylename' + # end def __init__ + + def apply (self, root) : + """append to tbody e.g., <text:p text:style-name="P4"/>""" + tbody = self.find_tbody (root) + stylename = self.stylename or self.transformer [self.stylekey] + SubElement \ + ( tbody + , self.oootag ('text', 'p') + , { self.oootag ('text', 'style-name') : stylename } + ) + # end def apply +# end class Addpagebreak + +class Fix_OOo_Tag (Transform) : + """ + OOo writer conditions are attributes where the *value* is + prefixed by an XML namespace. If the ooow namespace declaration + is not in scope, all conditions will evaluate to false. I + consider this a bug (a violation of the ideas of XML) of OOo. + Nevertheless to make conditions work, we insert the ooow + namespace declaration into the top-level element. + """ + filename = 'content.xml' + prio = 10000 + + def apply (self, root) : + if self.mimetype == mimetypes [1] : + root.set ('xmlns:ooow', namespace_by_name [self.mimetype]['ooow']) + # end def apply +# end class Fix_OOo_Tag + +class _Body_Concat (Transform) : + """ Various methods for modifying the tbody split into various pieces + that have to keep sequence in order to not confuse OOo. + """ + ooo_sections = {} + for m in mimetypes : + ooo_sections [m] = \ + [ { OOo_Tag ('text', 'variable-decls', m) : 1 + , OOo_Tag ('text', 'sequence-decls', m) : 1 + , OOo_Tag ('text', 'user-field-decls', m) : 1 + , OOo_Tag ('office', 'forms', m) : 1 + } + , { OOo_Tag ('draw', 'frame', m) : 1 + , OOo_Tag ('draw', 'rect', m) : 1 + , OOo_Tag ('draw', 'text-box', m) : 1 + } + ] + + def _textbody (self) : + """ + We use the office:body (OOo 1.X)/office:text (OOo 1.X) + element as a container for various transforms... + """ + return Element (self.textbody_tag) + # end def _textbody + + def _divide (self, textbody) : + """ Divide self.copy into parts that must keep their sequence. + We use another textbody tag for storing the parts... + Side-effect of setting self.copyparts is intended. + """ + self.copyparts = self._textbody () + self.copyparts.append (self._textbody ()) + l = len (self.ooo_sections [self.mimetype]) + idx = 0 + for e in textbody : + while idx < l : + if e.tag in self.ooo_sections [self.mimetype][idx] : + break + else : + self.copyparts.append (self._textbody ()) + idx += 1 + self.copyparts [-1].append (e) + declarations = self.copyparts [0] + del self.copyparts [0] + return declarations + # end def _divide + + def divide_body (self, root) : + cont = root + if cont.tag != self.oootag ('office', 'document-content') : + cont = root.find (self.oootag ('office', 'document-content')) + tbody = cont.find (self.oootag ('office', 'body')) + # OOo 2.X has an office:text inside office:body that contains + # the real text contents: + if self.mimetype == mimetypes [1] : + cont = tbody + tbody = cont.find (self.oootag ('office', 'text')) + idx = cont [:].index (tbody) + self.tbody = cont [idx] = self._textbody () + self.declarations = self._divide (tbody) + self.bodyparts = self.copyparts + # end def divide_body + + def append_declarations (self) : + for e in self.declarations : + self.tbody.append (e) + # end def append_declarations + + def append_to_body (self, cp) : + for i in range (len (self.bodyparts)) : + for j in cp [i] : + self.bodyparts [i].append (j) + # end def append_to_body + + def assemble_body (self) : + for p in self.bodyparts : + for e in p : + self.tbody.append (e) + # end def assemble_body + + def _get_meta (self, var, classname = 'Get_Attribute', prefix = "") : + """ get page- and paragraph-count etc. meta-info """ + return int (self.transformer [':'.join ((classname, prefix + var))]) + # end def _get_meta + + def _set_meta (self, var, value, classname = 'Set_Attribute', prefix = "") : + """ set page- and paragraph-count etc. meta-info """ + self.transformer [':'.join ((classname, prefix + var))] = str (value) + # end def _set_meta +# end class _Body_Concat + +class Mailmerge (_Body_Concat) : + """ + This transformation is used to create a mailmerge document using + the current document as the template. In the constructor we get + an iterator that provides a data set for each item in the + iteration. Elements the iterator has to provide are either + something that follows the Mapping Type interface (it looks like + a dict) or something that is callable and can be used for + name-value lookups. + + A precondition for this transform is the application of the + Addpagebreak_Style to guarantee that we know the style + for adding a page break to the current document. Alternatively + the stylename (or the stylekey if a different name should be used + for lookup in the current transformer) can be given in the + constructor. + """ + filename = 'content.xml' + prio = 60 + + def __init__ \ + (self, iterator, stylename = None, stylekey = None, ** kw) : + self.__super.__init__ (** kw) + self.iterator = iterator + self.stylename = stylename + self.stylekey = stylekey + # end def __init__ + + def apply (self, root) : + """ + Copy old tbody, create new empty one and repeatedly append the + new tbody. + """ + pb = Addpagebreak \ + ( stylename = self.stylename + , stylekey = self.stylekey + , transformer = self.transformer + ) + zi = Attribute_Access \ + ( (Get_Max (None, self.oootag ('draw', 'z-index'), 'z-index'),) + , transformer = self.transformer + ) + zi.apply (root) + + pagecount = self._get_meta ('page-count') + z_index = self._get_meta ('z-index', classname = 'Get_Max') + 1 + ra = Attribute_Access \ + ( ( Reanchor (pagecount, self.oootag ('draw', 'text-box')) + , Reanchor (pagecount, self.oootag ('draw', 'rect')) + , Reanchor (pagecount, self.oootag ('draw', 'frame')) + , Reanchor (z_index, None, self.oootag ('draw', 'z-index')) + ) + , transformer = self.transformer # transformer added + ) + self.divide_body (root) + self.bodyparts = [self._textbody () for i in self.copyparts] + + count = 0 + for i in self.iterator : + count += 1 + fr = Field_Replace (replace = i, transformer = self.transformer) + # add page break only to non-empty tbody + # reanchor only after the first mailmerge + if len (self.tbody) : # tbody non-empty (but existing!) + pb.apply (self.bodyparts [-1]) + ra.apply (self.copyparts) + else : + self.append_declarations () + cp = deepcopy (self.copyparts) + fr.apply (cp) + self.append_to_body (cp) + # new page-count: + for i in meta_counts : + self._set_meta (i, count * self._get_meta (i)) + # we have added count-1 paragraphs, because each page-break is a + # paragraph. + p = 'paragraph-count' + self._set_meta \ + (p, self._get_meta (p, classname = 'Set_Attribute') + (count - 1)) + self.assemble_body () + # end def apply +# end class Mailmerge + +def tree_serialise (element, prefix = '', mimetype = mimetypes [1]) : + """ Serialise a style-element of an OOo document (e.g., a + style:font-decl, style:default-style, etc declaration). + We remove the name of the style and return something that is a + representation of the style element which can be used as a + dictionary key. + The serialisation format is a tuple containing the tag as the + first item, the attributes (as key,value pairs returned by + items()) as the second item and the following items are + serialisations of children. + """ + attr = dict (element.attrib) + stylename = OOo_Tag ('style', 'name', mimetype) + if stylename in attr : del attr [stylename] + attr = attr.items () + attr.sort () + attr = tuple (attr) + serial = [prefix + element.tag, attr] + for e in element : + serial.append (tree_serialise (e, prefix, mimetype)) + return tuple (serial) +# end def tree_serialise + +class Concatenate (_Body_Concat) : + """ + This transformation is used to create a new document from a + concatenation of several documents. In the constructor we get a + list of documents to append to the master document. + """ + prio = 80 + style_containers = {} + ref_attrs = {} + for m in mimetypes : + style_containers.update \ + ({ OOo_Tag ('office', 'font-decls', m) : 1 + , OOo_Tag ('office', 'font-face-decls', m) : 1 + , OOo_Tag ('office', 'styles', m) : 1 + , OOo_Tag ('office', 'automatic-styles', m) : 1 + , OOo_Tag ('office', 'master-styles', m) : 1 + }) + # Cross-references in OOo document: + # 'attribute' references another element with 'tag'. + # If attribute names change, we must replace references, too. + # attribute : + # tag + ref_attrs.update \ + ({ OOo_Tag ('style', 'parent-style-name', m) : + OOo_Tag ('style', 'style', m) + , OOo_Tag ('style', 'master-page-name', m) : + OOo_Tag ('style', 'master-page', m) + , OOo_Tag ('style', 'page-layout-name', m) : # OOo 2.X + OOo_Tag ('style', 'page-layout', m) + , OOo_Tag ('style', 'page-master-name', m) : + OOo_Tag ('style', 'page-master', m) + , OOo_Tag ('table', 'style-name', m) : + OOo_Tag ('style', 'style', m) + , OOo_Tag ('text', 'style-name', m) : + OOo_Tag ('style', 'style', m) + , OOo_Tag ('draw', 'style-name', m) : + OOo_Tag ('style', 'style', m) + , OOo_Tag ('draw', 'text-style-name', m) : + OOo_Tag ('style', 'style', m) + }) + stylefiles = ['styles.xml', 'content.xml'] + oofiles = stylefiles + ['meta.xml'] + + body_decl_sections = ['variable-decl', 'sequence-decl'] + + def __init__ (self, * docs, ** kw) : + self.__super.__init__ (** kw) + self.docs = [] + for doc in docs : + self.docs.append (OOoPy (infile = doc)) + assert (self.docs [-1].mimetype == self.docs [0].mimetype) + # end def __init__ + + def apply_all (self, trees) : + assert (self.docs [0].mimetype == self.transformer.mimetype) + self.serialised = {} + self.stylenames = {} + self.namemaps = [{}] + self.tab_depend = {} + for s in self.ref_attrs.itervalues () : + self.namemaps [0][s] = {} + self.body_decls = {} + for s in self.body_decl_sections : + self.body_decls [s] = {} + self.trees = {} + for f in self.oofiles : + self.trees [f] = [trees [f].getroot ()] + self.sections = {} + for f in self.stylefiles : + self.sections [f] = {} + for node in self.trees [f][0] : + self.sections [f][node.tag] = node + for d in self.docs : + self.namemaps.append ({}) + for s in self.ref_attrs.itervalues () : + self.namemaps [-1][s] = {} + for f in self.oofiles : + self.trees [f].append (d.read (f).getroot ()) + # append a pagebreak style, will be optimized away if duplicate + pbs = Addpagebreak_Style (transformer = self.transformer) + pbs.apply (self.trees ['content.xml'][0]) + get_attr = [] + for attr in meta_counts : + a = self.oootag ('meta', attr) + t = self.oootag ('meta', 'document-statistic') + get_attr.append (Get_Attribute (t, a, 'concat-' + attr)) + zi = Attribute_Access \ + ( (Get_Max (None, self.oootag ('draw', 'z-index'), 'z-index'),) + , transformer = self.transformer + ) + zi.apply (self.trees ['content.xml'][0]) + self.zi = Attribute_Access \ + ( (Get_Max (None, self.oootag ('draw', 'z-index'), 'concat-z-index') + , + ) + , transformer = self.transformer + ) + self.getmeta = Attribute_Access \ + (get_attr, filename = 'meta.xml', transformer = self.transformer) + self.pbname = self.transformer \ + [':'.join (('Addpagebreak_Style', 'stylename'))] + for s in self.trees ['styles.xml'][0].findall \ + ('.//' + self.oootag ('style', 'default-style')) : + if s.get (self.oootag ('style', 'family')) == 'paragraph' : + default_style = s + break + self.default_properties = default_style.find \ + ('./' + self.properties_tag) + self.set_pagestyle () + for f in 'styles.xml', 'content.xml' : + self.style_merge (f) + self.body_concat () + self.append_pictures () + # end def apply_all + + def apply_tab_correction (self, node) : + """ Check if node depends on a style which has corrected tabs + if yes, insert all the default tabs *after* the maximum tab + position in that style. + """ + tab_stops = self.oootag ('style', 'tab-stops') + tab_stop = self.oootag ('style', 'tab-stop') + tab_pos = self.oootag ('style', 'position') + parent = node.get (self.oootag ('style', 'parent-style-name')) + if parent in self.tab_depend : + for prop in node : + if prop.tag != self.properties_tag : + continue + for sub in prop : + if sub.tag == tab_stops : + self.tab_depend [parent] = 1 + max = 0 + for ts in sub : + assert (ts.tag == tab_stop) + pos = float (ts.get (tab_pos) [:-2]) + if max < pos : + max = pos + self.insert_tabs (sub, max) + # end def apply_tab_correction + + def _attr_rename (self, idx) : + r = sum \ + ( [ set_attributes_from_dict (None, k, self.namemaps [idx][v]) + for k,v in self.ref_attrs.iteritems () + ] + , [] + ) + return Attribute_Access (r, transformer = self.transformer) + # end def _attr_rename + + def body_concat (self) : + count = {} + for i in meta_counts : + count [i] = self._get_meta (i) + count ['z-index'] = self._get_meta \ + ('z-index', classname = 'Get_Max') + 1 + pb = Addpagebreak \ + (stylename = self.pbname, transformer = self.transformer) + self.divide_body (self.trees ['content.xml'][0]) + self.body_decl (self.declarations, append = 0) + for idx in range (1, len (self.docs) + 1) : + meta = self.trees ['meta.xml'][idx] + content = self.trees ['content.xml'][idx] + tbody = self.find_tbody (content) + self.getmeta.apply (meta) + self.zi.apply (tbody) + + ra = Attribute_Access \ + ( ( Reanchor + (count ['page-count'], self.oootag ('draw', 'text-box')) + , Reanchor + (count ['page-count'], self.oootag ('draw', 'rect')) + , Reanchor + (count ['page-count'], self.oootag ('draw', 'frame')) + , Reanchor + (count ['z-index'], None, self.oootag ('draw', 'z-index')) + ) + , transformer = self.transformer # transformer added + ) + for i in meta_counts : + count [i] += self._get_meta (i, prefix = 'concat-') + count ['paragraph-count'] += 1 + count ['z-index'] += self._get_meta \ + ('z-index', classname = 'Get_Max', prefix = 'concat-') + 1 + namemap = self.namemaps [idx][self.oootag ('style', 'style')] + tr = self._attr_rename (idx) + pb.apply (self.bodyparts [-1]) + tr.apply (content) + ra.apply (content) + declarations = self._divide (tbody) + self.body_decl (declarations) + self.append_to_body (self.copyparts) + self.append_declarations () + self.assemble_body () + for i in meta_counts : + self._set_meta (i, count [i]) + # end def body_concat + + def body_decl (self, decl_section, append = 1) : + for sect in self.body_decl_sections : + s = self.declarations.find \ + ('.//' + self.oootag ('text', sect + 's')) + d = self.body_decls [sect] + t = self.oootag ('text', sect) + for n in decl_section.findall ('.//' + t) : + name = n.get (self.oootag ('text', 'name')) + if name not in d : + if append and s is not None : + s.append (n) + d [name] = 1 + # end def body_decl + + def insert_tabs (self, element, max = 0) : + """ Insert tab stops into the current element. Optionally after + max = the current maximum tab-position + """ + dist_tag = self.oootag ('style', 'tab-stop-distance') + for k in range (1, len (self.tab_correct)) : + if self.tab_correct [-k].isdigit() : + break + l = float (self.tab_correct [:-k]) + unit = self.tab_correct [-k:] + for ts in range (35) : + pos = l * (ts + 1) + if pos > max : + SubElement \ + ( element + , self.oootag ('style', 'tab-stop') + , { self.oootag ('style', 'position') : '%s%s' % (pos, unit) + } + ) + # end def insert_tabs + + def merge_defaultstyle (self, default_style, node) : + assert default_style is not None + assert node is not None + proppath = './' + self.properties_tag + defprops = default_style.find (proppath) + props = node.find (proppath) + sn = self.oootag ('style', 'name') + if props is None : + props = Element (self.properties_tag) + for k, v in defprops.attrib.iteritems () : + if self.default_properties.get (k) != v and not props.get (k) : + if k == self.oootag ('style', 'tab-stop-distance') : + self.tab_correct = v + self.tab_depend = {node.get (sn) : 1} + stps = SubElement \ + (props, self.oootag ('style', 'tab-stops')) + self.insert_tabs (stps) + else : + props.set (k,v) + if len (props) or props.attrib : + node.append (props) + # end def merge_defaultstyle + + def _newname (self, key, oldname) : + stylenum = 0 + if (key, oldname) not in self.stylenames : + self.stylenames [(key, oldname)] = 1 + return oldname + newname = basename = 'Concat_%s' % oldname + while (key, newname) in self.stylenames : + stylenum += 1 + newname = '%s%d' % (basename, stylenum) + self.stylenames [(key, newname)] = 1 + return newname + # end def _newname + + def set_pagestyle (self) : + """ For all documents: search for the first paragraph of the tbody + and get its style. Modify this style to include a reference + to the default page-style if it doesn't contain a reference + to a page style. Insert the new style into the list of + styles and modify the first paragraph to use the new page + style. + This procedure is necessary to make appended documents use + their page style instead of the master page style of the + first document. + FIXME: We should search the style hierarchy backwards for + the style of the first paragraph to check if there is a + reference to a page-style somewhere and not override the + page-style in this case. Otherwise appending complex + documents that use a different page-style for the first page + will not work if the page style is referenced in a style + from which the first paragraph style derives. + """ + for idx in range (1, len (self.docs) + 1) : + croot = self.trees ['content.xml'][idx] + sroot = self.trees ['styles.xml'] [idx] + tbody = self.find_tbody (croot) + para = tbody.find ('./' + self.oootag ('text', 'p')) + if para is None : + para = tbody.find ('./' + self.oootag ('text', 'list')) + tsn = self.oootag ('text', 'style-name') + sname = para.get (tsn) + styles = croot.find (self.oootag ('office', 'automatic-styles')) + ost = sroot.find (self.oootag ('office', 'styles')) + mst = sroot.find (self.oootag ('office', 'master-styles')) + assert mst is not None and len (mst) + assert mst [0].tag == self.oootag ('style', 'master-page') + sntag = self.oootag ('style', 'name') + master = mst [0].get (sntag) + mpn = self.oootag ('style', 'master-page-name') + stytag = self.oootag ('style', 'style') + style = None + for s in styles : + if s.tag == stytag : + # Explicit references to default style converted to + # explicit references to new page style. + if s.get (mpn) == '' : + s.set (mpn, master) + if s.get (sntag) == sname : + style = s + if style is None : + for s in ost : + if s.tag == stytag and s.get (sntag) == sname : + style = s + break + if style is not None and not style.get (mpn) : + newstyle = deepcopy (style) + # Don't register with newname: will be rewritten later + # when appending. We assume that an original doc does + # not already contain a style with _Concat suffix. + newname = sname + '_Concat' + para.set (tsn, newname) + newstyle.set (self.oootag ('style', 'name'), newname) + newstyle.set (mpn, master) + styles.append (newstyle) + # end def set_pagestyle + + def style_merge (self, oofile) : + """ Loop over all the docs in our document list and look up the + styles there. If a style matches an existing style in the + original document, register the style name for later + transformation if the style name in the original document + does not match the style name in the appended document. If + no match is found, append style to master document and add + to serialisation. If the style name already exists in the + master document, a new style name is created. Names of + parent styles are changed when appending -- this means that + parent style names already have to be defined earlier in the + document. + + If there is a reference to a parent style that is not yet + defined, and the parent style is defined later, it is + already too late, so an assertion is raised in this case. + OOo seems to ensure declaration order of dependent styles, + so this should not be a problem. + """ + for idx in range (len (self.trees [oofile])) : + namemap = self.namemaps [idx] + root = self.trees [oofile][idx] + delnode = [] + for nodeidx, node in enumerate (root) : + if node.tag not in self.style_containers : + continue + prefix = '' + # font_decls may have same name in styles.xml and content.xml + if node.tag == self.font_decls_tag : + prefix = oofile + default_style = None + for n in node : + if ( n.tag == self.oootag ('style', 'default-style') + and ( n.get (self.oootag ('style', 'family')) + == 'paragraph' + ) + ) : + default_style = n + name = n.get (self.oootag ('style', 'name'), None) + if not name : continue + if ( idx != 0 + and name == 'Standard' + and n.get (self.oootag ('style', 'class')) == 'text' + and ( n.get (self.oootag ('style', 'family')) + == 'paragraph' + ) + ) : + self.merge_defaultstyle (default_style, n) + self.apply_tab_correction (n) + key = prefix + n.tag + if key not in namemap : namemap [key] = {} + tr = self._attr_rename (idx) + tr.apply (n) + sn = tree_serialise (n, prefix, self.mimetype) + if sn in self.serialised : + newname = self.serialised [sn] + if name != newname : + assert \ + ( name not in namemap [key] + or namemap [key][name] == newname + ) + namemap [key][name] = newname + # optimize original doc: remove duplicate styles + if not idx and node.tag != self.font_decls_tag : + pass + #delnode.append (nodeidx) + else : + newname = self._newname (key, name) + self.serialised [sn] = newname + if newname != name : + n.set (self.oootag ('style', 'name'), newname) + dn = self.oootag ('style', 'display-name') + disp_name = n.get (dn) + if disp_name : + n.set (dn, 'Concat ' + disp_name) + namemap [key][name] = newname + if idx != 0 : + self.sections [oofile][node.tag].append (n) + assert not delnode or not idx + delnode.reverse () + for i in delnode : + del node [i] + # end style_merge + + def append_pictures (self) : + for doc in self.docs : + for f in doc.izip.infolist () : + if f.filename.startswith ('Pictures/') : + self.transformer.appendfiles.append \ + ((f.filename, doc.izip.read (f.filename))) + # end def append_pictures + +# end class Concatenate + +def renumber_frames (mimetype) : + return \ + [ Renumber (OOo_Tag ('draw', 'text-box', mimetype), 'Frame') # OOo 1.X + , Renumber (OOo_Tag ('draw', 'frame', mimetype), 'Frame') # OOo 2.X + ] +# end def renumber_frames + +def renumber_sections (mimetype) : + return [Renumber (OOo_Tag ('text', 'section', mimetype))] +# end def renumber_sections + +def renumber_tables (mimetype) : + return [Renumber (OOo_Tag ('table', 'table', mimetype))] +# end def renumber_tables + +def renumber_images (mimetype) : + return [Renumber (OOo_Tag ('draw', 'image', mimetype))] +# end def renumber_images + +def renumber_xml_id (mimetype) : + if mimetype == mimetypes [0] : + return [] + xmlid = OOo_Tag ('xml', 'id', mimetype) + return [Renumber (OOo_Tag ('text', 'list', mimetype), 'list', xmlid)] +# end def renumber_xml_id + +def renumber_all (mimetype) : + """ Factory function for all renumberings parameterized with + mimetype + """ + return Attribute_Access \ + ( renumber_frames (mimetype) + + renumber_sections (mimetype) + + renumber_tables (mimetype) + + renumber_images (mimetype) + + renumber_xml_id (mimetype) + ) +# end def renumber_all + +# used to have a separate Pagecount transform -- generalized to get +# some of the meta information using an Attribute_Access transform +# and set the same information later after possibly being updated by +# other transforms. We use another naming convention here for storing +# the info retrieved from the OOo document: We use the attribute name in +# the meta-information to store (and later retrieve) the information. + +def get_meta (mimetype) : + """ Factory function for Attribute_Access to get all interesting + meta-data + """ + get_attr = [] + for attr in meta_counts : + a = OOo_Tag ('meta', attr, mimetype) + t = OOo_Tag ('meta', 'document-statistic', mimetype) + get_attr.append (Get_Attribute (t, a, attr)) + return Attribute_Access (get_attr, prio = 20, filename = 'meta.xml') +# end def get_meta + +def set_meta (mimetype) : + """ Factory function for Attribute_Access to set all interesting + meta-data + """ + set_attr = [] + for attr in meta_counts : + a = OOo_Tag ('meta', attr, mimetype) + t = OOo_Tag ('meta', 'document-statistic', mimetype) + set_attr.append (Set_Attribute (t, a, attr)) + return Attribute_Access (set_attr, prio = 120, filename = 'meta.xml') +# end def set_meta |