diff options
Diffstat (limited to 'ooopy/Transforms.py')
| -rw-r--r-- | ooopy/Transforms.py | 1237 | 
1 files changed, 1237 insertions, 0 deletions
| diff --git a/ooopy/Transforms.py b/ooopy/Transforms.py new file mode 100644 index 000000000..50a6c0db8 --- /dev/null +++ b/ooopy/Transforms.py @@ -0,0 +1,1237 @@ +#!/usr/bin/env python +# -*- coding: iso-8859-1 -*- +# Copyright (C) 2005-14 Dr. Ralf Schlatterbeck Open Source Consulting. +# Reichergasse 131, A-3411 Weidling. +# Web: http://www.runtux.com Email: office@runtux.com +# All rights reserved +# **************************************************************************** +# +# This library is free software; you can redistribute it and/or modify +# it under the terms of the GNU Library General Public License as +# published by the Free Software Foundation; either version 2 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the +# GNU Library General Public License for more details. +# +# You should have received a copy of the GNU Library General Public +# License along with this program; if not, write to the Free Software +# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. +# **************************************************************************** + +from __future__              import absolute_import + +import time +import re +try : +    from xml.etree.ElementTree   import dump, SubElement, Element, tostring +except ImportError : +    from elementtree.ElementTree import dump, SubElement, Element, tostring +from copy                    import deepcopy +from ooopy.OOoPy             import OOoPy, autosuper +from ooopy.Transformer       import files, split_tag, OOo_Tag, Transform +from ooopy.Transformer       import mimetypes, namespace_by_name +from ooopy.Version           import VERSION + +# counts in meta.xml +meta_counts = \ +    ( 'character-count', 'image-count', 'object-count', 'page-count' +    , 'paragraph-count', 'table-count', 'word-count' +    ) + +class Access_Attribute (autosuper) : +    """ For performance reasons we do not specify a separate transform +        for each attribute-read or -change operation. Instead we define +        all the attribute accesses we want to perform as objects that +        follow the attribute access api and apply them all using an +        Attribute_Access in one go. +    """ + +    def __init__ (self, key = None, prefix = None, ** kw) : +        self.__super.__init__ (key = key, prefix = prefix, **kw) +        self.key = key +        if key : +            if not prefix : +                prefix   = self.__class__.__name__ +            self.key = ':'.join ((prefix, key)) +    # end def __init__ + +    def register (self, transformer) : +        self.transformer = transformer +    # end def register + +    def use_value (self, oldval = None) : +        """ Can change the given value by returning the new value. If +            returning None or oldval the attribute stays unchanged. +        """ +        raise NotImplementedError, "use_value must be defined in derived class" +    # end def use_value + +# end class Access_Attribute + +class Get_Attribute (Access_Attribute) : +    """ An example of not changing an attribute but only storing the +        value in the transformer +    """ + +    def __init__ (self, tag, attr, key, transform = None, ** kw) : +        self.__super.__init__ (key = key, **kw) +        self.tag        = tag +        self.attribute  = attr +        self.transform  = transform +    # end def __init__ + +    def use_value (self, oldval = None) : +        self.transformer [self.key] = oldval +        return None +    # end def use_value + +# end def Get_Attribute + +class Get_Max (Access_Attribute) : +    """ Get the maximum value of an attribute """ + +    def __init__ (self, tag, attr, key, transform = None, ** kw) : +        self.__super.__init__ (key = key, **kw) +        self.tag        = tag +        self.attribute  = attr +        self.transform  = transform +    # end def __init__ + +    def register (self, transformer) : +        self.__super.register (transformer) +        self.transformer [self.key] = -1 +    # end def register + +    def use_value (self, oldval = None) : +        if  self.transformer [self.key] < oldval : +            self.transformer [self.key] = oldval +        return None +    # end def use_value + +# end def Get_Max + +class Renumber (Access_Attribute) : +    """ Specifies a renumbering transform. OOo has a 'name' attribute +        for several different tags, e.g., tables, frames, sections etc. +        These names must be unique in the whole document. OOo itself +        solves this by appending a unique number to a basename for each +        element, e.g., sections are named 'Section1', 'Section2', ... +        Renumber transforms can be applied to correct the numbering +        after operations that destroy the unique numbering, e.g., after +        a mailmerge where the same document is repeatedly appended. + +        The force parameter specifies if the new renumbered name should +        be inserted even if the attribute in question does not exist. +    """ + +    def __init__ \ +        (self, tag, name = None, attr = None, start = 1, force = False) : +        self.__super.__init__ () +        tag_ns, tag_name = split_tag (tag) +        self.tag_ns      = tag_ns +        self.tag         = tag +        self.name        = name or tag_name [0].upper () + tag_name [1:] +        self.num         = start +        self.force       = force +        self.attribute   = attr +    # end def __init__ + +    def register (self, transformer) : +        self.__super.register (transformer) +        if not self.attribute : +            self.attribute = OOo_Tag (self.tag_ns, 'name', transformer.mimetype) +    # end def register + +    def use_value (self, oldval = None) : +        if oldval is None and not self.force : +            return +        name = "%s%d" % (self.name, self.num) +        self.num += 1 +        return name +    # end def use_value + +# end class Renumber + +class Set_Attribute (Access_Attribute) : +    """ +        Similar to the renumbering transform in that we are assigning +        new values to some attributes. But in this case we give keys +        into the Transformer dict to replace some tag attributes. +    """ + +    def __init__ \ +        ( self +        , tag +        , attr +        , key       = None +        , transform = None +        , value     = None +        , oldvalue  = None +        , ** kw +        ) : +        self.__super.__init__ (key = key, ** kw) +        self.tag        = tag +        self.attribute  = attr +        self.transform  = transform +        self.value      = value +        self.oldvalue   = oldvalue +    # end def __init__ + +    def use_value (self, oldval) : +        if oldval is None : +            return None +        if self.oldvalue and oldval != self.oldvalue : +            return None +        if self.key and self.transformer.has_key (self.key) : +            return str (self.transformer [self.key]) +        return self.value +    # end def use_value + +# end class Set_Attribute + +def set_attributes_from_dict (tag, attr, d) : +    """ Convenience function: iterate over a dict and return a list of +        Set_Attribute objects specifying replacement of attributes in +        the dictionary +    """ +    return [Set_Attribute (tag, attr, oldvalue = k, value = v) +            for k,v in d.iteritems () +           ] +# end def set_attributes_from_dict + +class Reanchor (Access_Attribute) : +    """ +        Similar to the renumbering transform in that we are assigning +        new values to some attributes. But in this case we want to +        relocate objects that are anchored to a page. +    """ + +    def __init__ (self, offset, tag, attr = None) : +        self.__super.__init__ () +        self.offset     = int (offset) +        self.tag        = tag +        self.attribute  = attr +    # end def __init__ + +    def register (self, transformer) : +        self.__super.register (transformer) +        if not self.attribute : +            self.attribute = \ +                OOo_Tag ('text', 'anchor-page-number', transformer.mimetype) +    # end def register + +    def use_value (self, oldval) : +        if oldval is None : +            return oldval +        return "%d" % (int (oldval) + self.offset) +    # end def use_value + +# end class Reanchor + +# +# general transforms applicable to several .xml files +# + +class Attribute_Access (Transform) : +    """ +        Read or Change attributes in an OOo document. +        Can be used for renumbering, moving anchored objects, etc. +        Expects a list of attribute changer objects that follow the +        attribute changer API. This API is very simple: + +        - Member function "use_value" returns the new value of an +          attribute, or if unchanged the old value +        - The attribute "tag" gives the tag for an element we are +          searching +        - The attribute "attribute" gives the name of the attribute we +          want to read or change. +        For examples of the attribute changer API, see Renumber and +        Reanchor above. +    """ +    filename = 'content.xml' +    prio     = 110 + +    def __init__ (self, attrchangers, filename = None, ** kw) : +        self.filename     = filename or self.filename +        self.attrchangers = {} +        # allow several changers for a single tag +        self.attrchangers [None] = [] +        self.changers = attrchangers +        self.__super.__init__ (** kw) +    # end def __init__ + +    def register (self, transformer) : +        """ Register transformer with all attrchangers. """ +        self.__super.register (transformer) +        for r in self.changers : +            if r.tag not in self.attrchangers : +                self.attrchangers [r.tag] = [] +            self.attrchangers [r.tag].append (r) +            r.register (transformer) +    # end def register + +    def apply (self, root) : +        """ Search for all tags for which we renumber and replace name """ +        for n in [root] + root.findall ('.//*') : +            changers = \ +                self.attrchangers [None] + self.attrchangers.get (n.tag, []) +            for r in changers : +                nval = r.use_value (n.get (r.attribute)) +                if nval is not None : +                    n.set (r.attribute, nval) +    # end def apply + +# end class Attribute_Access + +# +# META-INF/manifest.xml transforms +# + +class Manifest_Append (Transform) : +    """ +        The Transformer stores a list of files (and contents) to append. +        These files are added to the archive later but need to be +        present in the manifest, too. +        The file list in the Transformer currently doesn't store a media +        type (which is one of the parameters in the manifest), the +        current application of this transform is to add pictures -- +        these don't have a media type in the files that were checked. +        So for now we add an empty media type. +    """ +    filename = 'META-INF/manifest.xml' +    prio     = 1000 + +    def apply (self, root) : +        for n, node in enumerate (root) : +            assert node.tag == self.oootag ('manifest', 'file-entry') +            path = node.get (self.oootag ('manifest', 'full-path')) +            assert (path) +            if path == '/' : +                break +        else : +            assert (not "The manifest needs a '/' entry") +        for f, _ in self.transformer.appendfiles : +            e = Element (self.oootag ('manifest', 'file-entry')) +            e.attrib [self.oootag ('manifest', 'full-path')]  = f +            e.attrib [self.oootag ('manifest', 'media-type')] = '' +            root.insert (n + 1, e) +            n += 1 +    # end def apply + +# end class Manifest_Append + +# +# meta.xml transforms +# + +class Editinfo (Transform) : +    """ +        This is an example of modifying OOo meta info (edit information, +        author, etc). We set some of the items (program that generated +        the OOo file, modification time, number of edit cyles and overall +        edit duration).  It's easy to subclass this transform and replace +        the "replace" variable (pun intended) in the derived class. +    """ +    filename = 'meta.xml' +    prio     = 20 +    repl     = \ +        { ('meta', 'generator')        : 'OOoPy field replacement' +        , ('dc',   'date')             : time.strftime ('%Y-%m-%dT%H:%M:%S') +        , ('meta', 'editing-cycles')   : '0' +        , ('meta', 'editing-duration') : 'PT0M0S' +        } +    replace  = {} +    # iterate over all mimetypes, so this works for all known mimetypes +    # of OOo documents. +    for m in mimetypes : +        for params, value in repl.iteritems () : +            replace [OOo_Tag (mimetype = m, *params)] = value + +    def apply (self, root) : +        for node in root.findall (self.oootag ('office', 'meta') + '/*') : +            if self.replace.has_key (node.tag) : +                node.text = self.replace [node.tag] +    # end def apply +# end class Editinfo + +# +# settings.xml transforms +# + +class Autoupdate (Transform) : +    """ +        This is an example of modifying OOo settings. We set some of the +        AutoUpdate configuration items in OOo to true. We also specify +        that links should be updated when reading. + +        This was originally intended to make OOo correctly display fields +        if they were changed with the Field_Replace below +        (similar to pressing F9 after loading the generated document in +        OOo). In particular I usually make spaces depend on field +        contents so that I don't have spurious spaces if a field is +        empty. Now it would be nice if OOo displayed the spaces correctly +        after loading a document (It does update the fields before +        printing, so this is only a cosmetic problem :-). This apparently +        does not work. If anybody knows how to achieve this, please let +        me know: mailto:rsc@runtux.com +    """ +    filename = 'settings.xml' +    prio     = 20 + +    def apply (self, root) : +        config = None +        for config in root.findall \ +            ( self.oootag ('office', 'settings') +            + '/' +            + self.oootag ('config', 'config-item-set') +            ) : +            name = config.get (self.oootag ('config', 'name')) +            if name == 'configuration-settings' : +                break +        for node in config.findall (self.oootag ('config', 'config-item')) : +            name = node.get (self.oootag ('config', 'name')) +            if name == 'LinkUpdateMode' :  # update when reading +                node.text = '2' +            # update fields when reading +            if name == 'FieldAutoUpdate' or name == 'ChartAutoUpdate' : +                node.text = 'true' +    # end def apply +# end class Autoupdate + +# +# content.xml transforms +# + +class Field_Replace (Transform) : +    """ +        Takes a dict of replacement key-value pairs. The key is the name +        of a variable in OOo. Additional replacement key-value pairs may +        be specified in ** kw. Alternatively a callback mechanism for +        variable name lookups is provided. The callback function is +        given the name of a variable in OOo and is expected to return +        the replacement value or None if the variable value should not +        be replaced. +    """ +    filename = 'content.xml' +    prio     = 100 + +    def __init__ (self, prio = None, replace = None, ** kw) : +        """ replace is something behaving like a dict or something +            callable for name lookups +        """ +        self.__super.__init__ (prio, ** kw) +        self.replace  = replace or {} +        self.dict     = kw +    # end def __init__ + +    def apply (self, root) : +        tbody = self.find_tbody (root) +        for tag in 'variable-set', 'variable-get', 'variable-input' : +            for node in tbody.findall ('.//' + self.oootag ('text', tag)) : +                attr = 'name' +                if tag == 'text-input' : +                    attr = 'description' +                name = node.get (self.oootag ('text', attr)) +                if callable (self.replace) : +                    replace = self.replace (name) +                    if replace : +                        node.text = replace +                elif name in self.replace : +                    node.text = self.replace [name] +                elif name in self.dict : +                    node.text = self.dict    [name] +    # end def apply +# end class Field_Replace + +class Addpagebreak_Style (Transform) : +    """ +        This transformation adds a new ad-hoc paragraph style to the +        content part of the OOo document. This is needed to be able to +        add new page breaks to an OOo document. Adding a new page break +        is then a matter of adding an empty paragraph with the given page +        break style. + +        We first look through all defined paragraph styles for +        determining a new paragraph style number. Convention is P<num> +        for paragraph styles. We search the highest number and use this +        incremented by one for the new style to insert. Then we insert +        the new style and store the resulting style name in the +        transformer under the key class_name:stylename where class_name +        is our own class name. +    """ +    filename = 'content.xml' +    prio     = 30 +    para     = re.compile (r'P([0-9]+)') + +    def apply (self, root) : +        max_style = 0 +        styles = root.find (self.oootag ('office', 'automatic-styles')) +        for s in styles.findall ('./' + self.oootag ('style', 'style')) : +            m = self.para.match (s.get (self.oootag ('style', 'name'), '')) +            if m : +                num = int (m.group (1)) +                if num > max_style : +                    max_style = num +        stylename = 'P%d' % (max_style + 1) +        new = SubElement \ +            ( styles +            , self.oootag ('style', 'style') +            , { self.oootag ('style', 'name')              : stylename +              , self.oootag ('style', 'family')            : 'paragraph' +              , self.oootag ('style', 'parent-style-name') : 'Standard' +              } +            ) +        SubElement \ +            ( new +            , self.properties_tag +            , { self.oootag ('fo', 'break-after') : 'page' } +            ) +        self.set ('stylename', stylename) +    # end def apply +# end class Addpagebreak_Style + +class Addpagebreak (Transform) : +    """ +        This transformation adds a page break to the last page of the OOo +        text. This is needed, e.g., when doing mail-merge: We append a +        page break to the tbody and then append the next page. This +        transform needs the name of the paragraph style specifying the +        page break style. Default is to use +        'Addpagebreak_Style:stylename' as the key for +        retrieving the page style. Alternatively the page style or the +        page style key can be specified in the constructor. +    """ +    filename = 'content.xml' +    prio     = 50 + +    def __init__ (self, stylename = None, stylekey = None, ** kw) : +        self.__super.__init__ (** kw) +        self.stylename = stylename +        self.stylekey  = stylekey or 'Addpagebreak_Style:stylename' +    # end def __init__ + +    def apply (self, root) : +        """append to tbody e.g., <text:p text:style-name="P4"/>""" +        tbody     = self.find_tbody (root) +        stylename = self.stylename or self.transformer [self.stylekey] +        SubElement \ +            ( tbody +            , self.oootag ('text', 'p') +            , { self.oootag ('text', 'style-name') : stylename } +            ) +    # end def apply +# end class Addpagebreak + +class Fix_OOo_Tag (Transform) : +    """ +        OOo writer conditions are attributes where the *value* is +        prefixed by an XML namespace. If the ooow namespace declaration +        is not in scope, all conditions will evaluate to false. I +        consider this a bug (a violation of the ideas of XML) of OOo. +        Nevertheless to make conditions work, we insert the ooow +        namespace declaration into the top-level element. +    """ +    filename = 'content.xml' +    prio     = 10000 + +    def apply (self, root) : +        if self.mimetype == mimetypes [1] : +            root.set ('xmlns:ooow', namespace_by_name [self.mimetype]['ooow']) +    # end def apply +# end class Fix_OOo_Tag + +class _Body_Concat (Transform) : +    """ Various methods for modifying the tbody split into various pieces +        that have to keep sequence in order to not confuse OOo. +    """ +    ooo_sections  = {} +    for m in mimetypes : +        ooo_sections [m] = \ +            [ { OOo_Tag ('text', 'variable-decls',   m) : 1 +              , OOo_Tag ('text', 'sequence-decls',   m) : 1 +              , OOo_Tag ('text', 'user-field-decls', m) : 1 +              , OOo_Tag ('office', 'forms',          m) : 1 +              } +            , { OOo_Tag ('draw', 'frame',            m) : 1 +              , OOo_Tag ('draw', 'rect',             m) : 1 +              , OOo_Tag ('draw', 'text-box',         m) : 1 +              } +            ] + +    def _textbody (self) : +        """ +            We use the office:body (OOo 1.X)/office:text (OOo 1.X) +            element as a container for various transforms... +        """ +        return Element (self.textbody_tag) +    # end def _textbody + +    def _divide (self, textbody) : +        """ Divide self.copy into parts that must keep their sequence. +            We use another textbody tag for storing the parts... +            Side-effect of setting self.copyparts is intended. +        """ +        self.copyparts = self._textbody () +        self.copyparts.append (self._textbody ()) +        l = len (self.ooo_sections [self.mimetype]) +        idx = 0 +        for e in textbody : +            while idx < l : +                if e.tag in self.ooo_sections [self.mimetype][idx] : +                    break +                else : +                    self.copyparts.append (self._textbody ()) +                    idx += 1 +            self.copyparts [-1].append (e) +        declarations = self.copyparts [0] +        del self.copyparts [0] +        return declarations +    # end def _divide + +    def divide_body (self, root) : +        cont       = root +        if cont.tag != self.oootag ('office', 'document-content') : +            cont   = root.find  (self.oootag ('office', 'document-content')) +        tbody      = cont.find  (self.oootag ('office', 'body')) +        # OOo 2.X has an office:text inside office:body that contains +        # the real text contents: +        if self.mimetype == mimetypes [1] : +            cont   = tbody +            tbody  = cont.find (self.oootag ('office', 'text')) +        idx        = cont [:].index (tbody) +        self.tbody = cont [idx] = self._textbody () +        self.declarations = self._divide (tbody) +        self.bodyparts    = self.copyparts +    # end def divide_body + +    def append_declarations (self) : +        for e in self.declarations : +            self.tbody.append (e) +    # end def append_declarations + +    def append_to_body (self, cp) : +        for i in range (len (self.bodyparts)) : +            for j in cp [i] : +                self.bodyparts [i].append (j) +    # end def append_to_body + +    def assemble_body (self) : +        for p in self.bodyparts : +            for e in p : +                self.tbody.append (e) +    # end def assemble_body + +    def _get_meta (self, var, classname = 'Get_Attribute', prefix = "") : +        """ get page- and paragraph-count etc. meta-info """ +        return int (self.transformer [':'.join ((classname, prefix + var))]) +    # end def _get_meta + +    def _set_meta (self, var, value, classname = 'Set_Attribute', prefix = "") : +        """ set page- and paragraph-count etc. meta-info """ +        self.transformer [':'.join ((classname, prefix + var))] = str (value) +    # end def _set_meta +# end class _Body_Concat + +class Mailmerge (_Body_Concat) : +    """ +        This transformation is used to create a mailmerge document using +        the current document as the template. In the constructor we get +        an iterator that provides a data set for each item in the +        iteration. Elements the iterator has to provide are either +        something that follows the Mapping Type interface (it looks like +        a dict) or something that is callable and can be used for +        name-value lookups. + +        A precondition for this transform is the application of the +        Addpagebreak_Style to guarantee that we know the style +        for adding a page break to the current document. Alternatively +        the stylename (or the stylekey if a different name should be used +        for lookup in the current transformer) can be given in the +        constructor. +    """ +    filename = 'content.xml' +    prio     = 60 + +    def __init__ \ +        (self, iterator, stylename = None, stylekey = None, ** kw) : +        self.__super.__init__ (** kw) +        self.iterator  = iterator +        self.stylename = stylename +        self.stylekey  = stylekey +    # end def __init__ + +    def apply (self, root) : +        """ +            Copy old tbody, create new empty one and repeatedly append the +            new tbody. +        """ +        pb = Addpagebreak \ +            ( stylename   = self.stylename +            , stylekey    = self.stylekey +            , transformer = self.transformer +            ) +        zi = Attribute_Access \ +            ( (Get_Max (None, self.oootag ('draw', 'z-index'), 'z-index'),) +            , transformer = self.transformer +            ) +        zi.apply (root) + +        pagecount  = self._get_meta ('page-count') +        z_index    = self._get_meta ('z-index', classname = 'Get_Max') + 1 +        ra         = Attribute_Access \ +            ( ( Reanchor (pagecount, self.oootag ('draw', 'text-box')) +              , Reanchor (pagecount, self.oootag ('draw', 'rect')) +              , Reanchor (pagecount, self.oootag ('draw', 'frame')) +              , Reanchor (z_index, None, self.oootag ('draw', 'z-index')) +              ) +            , transformer = self.transformer # transformer added +            ) +        self.divide_body (root) +        self.bodyparts = [self._textbody () for i in self.copyparts] + +        count = 0 +        for i in self.iterator : +            count += 1 +            fr = Field_Replace (replace = i, transformer = self.transformer) +            # add page break only to non-empty tbody +            # reanchor only after the first mailmerge +            if len (self.tbody) : # tbody non-empty (but existing!) +                pb.apply (self.bodyparts [-1]) +                ra.apply (self.copyparts) +            else : +                self.append_declarations () +            cp = deepcopy (self.copyparts) +            fr.apply (cp) +            self.append_to_body (cp) +        # new page-count: +        for i in meta_counts : +            self._set_meta (i, count * self._get_meta (i)) +        # we have added count-1 paragraphs, because each page-break is a +        # paragraph. +        p = 'paragraph-count' +        self._set_meta \ +            (p, self._get_meta (p, classname = 'Set_Attribute') + (count - 1)) +        self.assemble_body () +    # end def apply +# end class Mailmerge + +def tree_serialise (element, prefix = '', mimetype = mimetypes [1]) : +    """ Serialise a style-element of an OOo document (e.g., a +        style:font-decl, style:default-style, etc declaration). +        We remove the name of the style and return something that is a +        representation of the style element which can be used as a +        dictionary key. +        The serialisation format is a tuple containing the tag as the +        first item, the attributes (as key,value pairs returned by +        items()) as the second item and the following items are +        serialisations of children. +    """ +    attr = dict (element.attrib) +    stylename = OOo_Tag ('style', 'name', mimetype) +    if stylename in attr : del attr [stylename] +    attr = attr.items () +    attr.sort () +    attr = tuple (attr) +    serial = [prefix + element.tag, attr] +    for e in element : +        serial.append (tree_serialise (e, prefix, mimetype)) +    return tuple (serial) +# end def tree_serialise + +class Concatenate (_Body_Concat) : +    """ +        This transformation is used to create a new document from a +        concatenation of several documents.  In the constructor we get a +        list of documents to append to the master document. +    """ +    prio     = 80 +    style_containers = {} +    ref_attrs        = {} +    for m in mimetypes : +        style_containers.update \ +            ({ OOo_Tag ('office', 'font-decls',       m) : 1 +             , OOo_Tag ('office', 'font-face-decls',  m) : 1 +             , OOo_Tag ('office', 'styles',           m) : 1 +             , OOo_Tag ('office', 'automatic-styles', m) : 1 +             , OOo_Tag ('office', 'master-styles',    m) : 1 +            }) +        # Cross-references in OOo document: +        # 'attribute' references another element with 'tag'. +        # If attribute names change, we must replace references, too. +        #     attribute                                : +        #     tag +        ref_attrs.update \ +            ({ OOo_Tag ('style', 'parent-style-name', m) : +               OOo_Tag ('style', 'style',             m) +             , OOo_Tag ('style', 'master-page-name',  m) : +               OOo_Tag ('style', 'master-page',       m) +             , OOo_Tag ('style', 'page-layout-name',  m) : # OOo 2.X +               OOo_Tag ('style', 'page-layout',       m) +             , OOo_Tag ('style', 'page-master-name',  m) : +               OOo_Tag ('style', 'page-master',       m) +             , OOo_Tag ('table', 'style-name',        m) : +               OOo_Tag ('style', 'style',             m) +             , OOo_Tag ('text',  'style-name',        m) : +               OOo_Tag ('style', 'style',             m) +             , OOo_Tag ('draw',  'style-name',        m) : +               OOo_Tag ('style', 'style',             m) +             , OOo_Tag ('draw',  'text-style-name',   m) : +               OOo_Tag ('style', 'style',             m) +            }) +    stylefiles = ['styles.xml', 'content.xml'] +    oofiles    = stylefiles + ['meta.xml'] + +    body_decl_sections = ['variable-decl', 'sequence-decl'] + +    def __init__ (self, * docs, ** kw) : +        self.__super.__init__ (** kw) +        self.docs = [] +        for doc in docs : +            self.docs.append (OOoPy (infile = doc)) +            assert (self.docs [-1].mimetype == self.docs [0].mimetype) +    # end def __init__ + +    def apply_all (self, trees) : +        assert (self.docs [0].mimetype == self.transformer.mimetype) +        self.serialised = {} +        self.stylenames = {} +        self.namemaps   = [{}] +        self.tab_depend = {} +        for s in self.ref_attrs.itervalues () : +            self.namemaps [0][s] = {} +        self.body_decls = {} +        for s in self.body_decl_sections : +            self.body_decls [s] = {} +        self.trees      = {} +        for f in self.oofiles : +            self.trees [f] = [trees [f].getroot ()] +        self.sections   = {} +        for f in self.stylefiles : +            self.sections [f] = {} +            for node in self.trees [f][0] : +                self.sections [f][node.tag] = node +        for d in self.docs : +            self.namemaps.append ({}) +            for s in self.ref_attrs.itervalues () : +                self.namemaps [-1][s] = {} +            for f in self.oofiles : +                self.trees [f].append (d.read (f).getroot ()) +        # append a pagebreak style, will be optimized away if duplicate +        pbs = Addpagebreak_Style (transformer = self.transformer) +        pbs.apply (self.trees ['content.xml'][0]) +        get_attr = [] +        for attr in meta_counts : +            a = self.oootag ('meta', attr) +            t = self.oootag ('meta', 'document-statistic') +            get_attr.append (Get_Attribute (t, a, 'concat-' + attr)) +        zi = Attribute_Access \ +            ( (Get_Max (None, self.oootag ('draw', 'z-index'), 'z-index'),) +            , transformer = self.transformer +            ) +        zi.apply (self.trees ['content.xml'][0]) +        self.zi = Attribute_Access \ +            ( (Get_Max (None, self.oootag ('draw', 'z-index'), 'concat-z-index') +              , +              ) +            , transformer = self.transformer +            ) +        self.getmeta = Attribute_Access \ +            (get_attr, filename = 'meta.xml', transformer = self.transformer) +        self.pbname = self.transformer \ +            [':'.join (('Addpagebreak_Style', 'stylename'))] +        for s in self.trees ['styles.xml'][0].findall \ +            ('.//' + self.oootag ('style', 'default-style')) : +            if s.get (self.oootag ('style', 'family')) == 'paragraph' : +                default_style = s +                break +        self.default_properties = default_style.find \ +            ('./' + self.properties_tag) +        self.set_pagestyle () +        for f in 'styles.xml', 'content.xml' : +            self.style_merge (f) +        self.body_concat () +        self.append_pictures () +    # end def apply_all + +    def apply_tab_correction (self, node) : +        """ Check if node depends on a style which has corrected tabs +            if yes, insert all the default tabs *after* the maximum tab +            position in that style. +        """ +        tab_stops = self.oootag ('style', 'tab-stops') +        tab_stop  = self.oootag ('style', 'tab-stop') +        tab_pos   = self.oootag ('style', 'position') +        parent    = node.get (self.oootag ('style', 'parent-style-name')) +        if parent in self.tab_depend : +            for prop in node : +                if prop.tag != self.properties_tag : +                    continue +                for sub in prop : +                    if sub.tag == tab_stops : +                        self.tab_depend [parent] = 1 +                        max = 0 +                        for ts in sub : +                            assert (ts.tag == tab_stop) +                            pos = float (ts.get (tab_pos) [:-2]) +                            if max < pos : +                                max = pos +                        self.insert_tabs (sub, max) +    # end def apply_tab_correction + +    def _attr_rename (self, idx) : +        r = sum \ +            ( [ set_attributes_from_dict (None, k, self.namemaps [idx][v]) +                for k,v in self.ref_attrs.iteritems () +              ] +            , [] +            ) +        return Attribute_Access (r, transformer = self.transformer) +    # end def _attr_rename + +    def body_concat (self) : +        count = {} +        for i in meta_counts : +            count [i] = self._get_meta (i) +        count ['z-index'] = self._get_meta \ +            ('z-index', classname = 'Get_Max') + 1 +        pb   = Addpagebreak \ +            (stylename = self.pbname, transformer = self.transformer) +        self.divide_body (self.trees ['content.xml'][0]) +        self.body_decl (self.declarations, append = 0) +        for idx in range (1, len (self.docs) + 1) : +            meta    = self.trees ['meta.xml'][idx] +            content = self.trees ['content.xml'][idx] +            tbody   = self.find_tbody (content) +            self.getmeta.apply (meta) +            self.zi.apply      (tbody) + +            ra = Attribute_Access \ +              ( ( Reanchor  +                    (count ['page-count'], self.oootag ('draw', 'text-box')) +                , Reanchor +                    (count ['page-count'], self.oootag ('draw', 'rect')) +                , Reanchor +                    (count ['page-count'], self.oootag ('draw', 'frame')) +                , Reanchor +                    (count ['z-index'], None, self.oootag ('draw', 'z-index')) +                ) +              , transformer = self.transformer # transformer added +              ) +            for i in meta_counts : +                count [i] += self._get_meta (i, prefix = 'concat-') +            count ['paragraph-count'] += 1 +            count ['z-index'] += self._get_meta \ +                ('z-index', classname = 'Get_Max', prefix = 'concat-') + 1 +            namemap = self.namemaps [idx][self.oootag ('style', 'style')] +            tr      = self._attr_rename (idx) +            pb.apply (self.bodyparts [-1]) +            tr.apply (content) +            ra.apply (content) +            declarations = self._divide (tbody) +            self.body_decl (declarations) +            self.append_to_body (self.copyparts) +        self.append_declarations () +        self.assemble_body       () +        for i in meta_counts : +            self._set_meta (i, count [i]) +    # end def body_concat + +    def body_decl (self, decl_section, append = 1) : +        for sect in self.body_decl_sections : +            s = self.declarations.find \ +                ('.//' + self.oootag ('text', sect + 's')) +            d = self.body_decls [sect] +            t = self.oootag ('text', sect) +            for n in decl_section.findall ('.//' + t) : +                name = n.get (self.oootag ('text', 'name')) +                if name not in d : +                    if append and s is not None : +                        s.append (n) +                    d [name] = 1 +    # end def body_decl + +    def insert_tabs (self, element, max = 0) : +        """ Insert tab stops into the current element. Optionally after +            max = the current maximum tab-position +        """ +        dist_tag = self.oootag ('style', 'tab-stop-distance') +        for k in range (1, len (self.tab_correct)) : +            if self.tab_correct [-k].isdigit() : +                break +        l    = float (self.tab_correct [:-k]) +        unit = self.tab_correct [-k:] +        for ts in range (35) : +            pos = l * (ts + 1) +            if pos > max : +                SubElement \ +                    ( element +                    , self.oootag ('style', 'tab-stop') +                    , { self.oootag ('style', 'position') : '%s%s' % (pos, unit) +                      } +                    ) +    # end def insert_tabs + +    def merge_defaultstyle (self, default_style, node) : +        assert default_style is not None +        assert node is not None +        proppath = './' + self.properties_tag +        defprops = default_style.find (proppath) +        props    = node.find          (proppath) +        sn       = self.oootag ('style', 'name') +        if props is None : +            props = Element (self.properties_tag) +        for k, v in defprops.attrib.iteritems () : +            if self.default_properties.get (k) != v and not props.get (k) : +                if k == self.oootag ('style', 'tab-stop-distance') : +                    self.tab_correct = v +                    self.tab_depend  = {node.get (sn) : 1} +                    stps = SubElement \ +                        (props, self.oootag ('style', 'tab-stops')) +                    self.insert_tabs (stps) +                else : +                    props.set (k,v) +        if len (props) or props.attrib : +            node.append (props) +    # end def merge_defaultstyle + +    def _newname (self, key, oldname) : +        stylenum = 0 +        if (key, oldname) not in self.stylenames : +            self.stylenames [(key, oldname)] = 1 +            return oldname +        newname = basename = 'Concat_%s' % oldname +        while (key, newname) in self.stylenames : +            stylenum += 1 +            newname = '%s%d' % (basename, stylenum) +        self.stylenames [(key, newname)] = 1 +        return newname +    # end def _newname + +    def set_pagestyle (self) : +        """ For all documents: search for the first paragraph of the tbody +            and get its style. Modify this style to include a reference +            to the default page-style if it doesn't contain a reference +            to a page style. Insert the new style into the list of +            styles and modify the first paragraph to use the new page +            style. +            This procedure is necessary to make appended documents use +            their page style instead of the master page style of the +            first document. +            FIXME: We should search the style hierarchy backwards for +            the style of the first paragraph to check if there is a +            reference to a page-style somewhere and not override the +            page-style in this case. Otherwise appending complex +            documents that use a different page-style for the first page +            will not work if the page style is referenced in a style +            from which the first paragraph style derives. +        """ +        for idx in range (1, len (self.docs) + 1) : +            croot  = self.trees  ['content.xml'][idx] +            sroot  = self.trees  ['styles.xml'] [idx] +            tbody  = self.find_tbody (croot) +            para   = tbody.find  ('./' + self.oootag ('text', 'p')) +            if para is None : +                para = tbody.find  ('./' + self.oootag ('text', 'list')) +            tsn    = self.oootag ('text', 'style-name') +            sname  = para.get    (tsn) +            styles = croot.find  (self.oootag ('office', 'automatic-styles')) +            ost    = sroot.find  (self.oootag ('office', 'styles')) +            mst    = sroot.find  (self.oootag ('office', 'master-styles')) +            assert mst is not None and len (mst) +            assert mst [0].tag == self.oootag ('style', 'master-page') +            sntag  = self.oootag ('style', 'name') +            master = mst [0].get (sntag) +            mpn    = self.oootag ('style', 'master-page-name') +            stytag = self.oootag ('style', 'style') +            style  = None +            for s in styles : +                if s.tag == stytag : +                    # Explicit references to default style converted to +                    # explicit references to new page style. +                    if s.get (mpn) == '' : +                        s.set (mpn, master) +                    if s.get (sntag) == sname : +                        style = s +            if style is None : +                for s in ost : +                    if s.tag == stytag and s.get (sntag) == sname : +                        style = s +                        break +            if style is not None and not style.get (mpn) : +                newstyle = deepcopy (style) +                # Don't register with newname: will be rewritten later +                # when appending. We assume that an original doc does +                # not already contain a style with _Concat suffix. +                newname = sname + '_Concat' +                para.set (tsn, newname) +                newstyle.set (self.oootag ('style', 'name'), newname) +                newstyle.set (mpn,                            master) +                styles.append (newstyle) +    # end def set_pagestyle + +    def style_merge (self, oofile) : +        """ Loop over all the docs in our document list and look up the +            styles there. If a style matches an existing style in the +            original document, register the style name for later +            transformation if the style name in the original document +            does not match the style name in the appended document.  If +            no match is found, append style to master document and add +            to serialisation. If the style name already exists in the +            master document, a new style name is created. Names of +            parent styles are changed when appending -- this means that +            parent style names already have to be defined earlier in the +            document. + +            If there is a reference to a parent style that is not yet +            defined, and the parent style is defined later, it is +            already too late, so an assertion is raised in this case. +            OOo seems to ensure declaration order of dependent styles, +            so this should not be a problem. +        """ +        for idx in range (len (self.trees [oofile])) : +            namemap = self.namemaps [idx] +            root    = self.trees    [oofile][idx] +            delnode = [] +            for nodeidx, node in enumerate (root) : +                if node.tag not in self.style_containers : +                    continue +                prefix = '' +                # font_decls may have same name in styles.xml and content.xml +                if node.tag == self.font_decls_tag : +                    prefix = oofile +                default_style = None +                for n in node : +                    if  (   n.tag == self.oootag ('style', 'default-style') +                        and (  n.get (self.oootag ('style', 'family')) +                            == 'paragraph' +                            ) +                        ) : +                        default_style = n +                    name     = n.get (self.oootag ('style', 'name'), None) +                    if not name : continue +                    if  (   idx != 0 +                        and name == 'Standard' +                        and n.get (self.oootag ('style', 'class'))  == 'text' +                        and (  n.get (self.oootag ('style', 'family')) +                            == 'paragraph' +                            ) +                        ) : +                        self.merge_defaultstyle (default_style, n) +                    self.apply_tab_correction (n) +                    key = prefix + n.tag +                    if key not in namemap : namemap [key] = {} +                    tr = self._attr_rename (idx) +                    tr.apply (n) +                    sn  = tree_serialise (n, prefix, self.mimetype) +                    if sn in self.serialised : +                        newname = self.serialised [sn] +                        if name != newname : +                            assert \ +                                (  name not in namemap [key] +                                or namemap [key][name] == newname +                                ) +                            namemap [key][name] = newname +                            # optimize original doc: remove duplicate styles +                            if  not idx and node.tag != self.font_decls_tag : +                                pass +                                #delnode.append (nodeidx) +                    else : +                        newname = self._newname (key, name) +                        self.serialised [sn] = newname +                        if newname != name : +                            n.set (self.oootag ('style', 'name'), newname) +                            dn = self.oootag ('style', 'display-name') +                            disp_name = n.get (dn) +                            if disp_name : +                                n.set (dn, 'Concat ' + disp_name) +                            namemap [key][name] = newname +                        if idx != 0 : +                            self.sections [oofile][node.tag].append (n) +                assert not delnode or not idx +                delnode.reverse () +                for i in delnode : +                    del node [i] +    # end style_merge + +    def append_pictures (self) : +        for doc in self.docs : +            for f in doc.izip.infolist () : +                if f.filename.startswith ('Pictures/') : +                    self.transformer.appendfiles.append \ +                        ((f.filename, doc.izip.read (f.filename))) +    # end def append_pictures +             +# end class Concatenate + +def renumber_frames (mimetype) : +    return \ +        [ Renumber (OOo_Tag ('draw',  'text-box', mimetype), 'Frame') # OOo 1.X +        , Renumber (OOo_Tag ('draw',  'frame',    mimetype), 'Frame') # OOo 2.X +        ] +# end def renumber_frames + +def renumber_sections (mimetype) : +    return [Renumber (OOo_Tag ('text',  'section', mimetype))] +# end def renumber_sections + +def renumber_tables (mimetype) : +    return [Renumber (OOo_Tag ('table', 'table', mimetype))] +# end def renumber_tables + +def renumber_images (mimetype) : +    return [Renumber (OOo_Tag ('draw', 'image', mimetype))] +# end def renumber_images + +def renumber_xml_id (mimetype) : +    if mimetype == mimetypes [0] : +        return [] +    xmlid = OOo_Tag ('xml', 'id', mimetype) +    return [Renumber (OOo_Tag ('text', 'list', mimetype), 'list', xmlid)] +# end def renumber_xml_id + +def renumber_all (mimetype) : +    """ Factory function for all renumberings parameterized with +        mimetype +    """ +    return Attribute_Access \ +        ( renumber_frames   (mimetype) +        + renumber_sections (mimetype) +        + renumber_tables   (mimetype) +        + renumber_images   (mimetype) +        + renumber_xml_id   (mimetype) +        )  +# end def renumber_all + +# used to have a separate Pagecount transform -- generalized to get +# some of the meta information using an Attribute_Access transform +# and set the same information later after possibly being updated by +# other transforms. We use another naming convention here for storing +# the info retrieved from the OOo document: We use the attribute name in +# the meta-information to store (and later retrieve) the information. + +def get_meta (mimetype) : +    """ Factory function for Attribute_Access to get all interesting +        meta-data +    """ +    get_attr = [] +    for attr in meta_counts : +        a = OOo_Tag ('meta', attr, mimetype) +        t = OOo_Tag ('meta', 'document-statistic', mimetype) +        get_attr.append (Get_Attribute (t, a, attr)) +    return Attribute_Access (get_attr, prio =  20, filename = 'meta.xml') +# end def get_meta + +def set_meta (mimetype) : +    """ Factory function for Attribute_Access to set all interesting +        meta-data +    """ +    set_attr = [] +    for attr in meta_counts : +        a = OOo_Tag ('meta', attr, mimetype) +        t = OOo_Tag ('meta', 'document-statistic', mimetype) +        set_attr.append (Set_Attribute (t, a, attr)) +    return Attribute_Access (set_attr, prio = 120, filename = 'meta.xml') +# end def set_meta | 
