Embed ooopy (last version: 1.11)

author: Étienne Loks <etienne.loks@iggdrasil.net> 2019-05-01 13:51:01 +0200
committer: Étienne Loks <etienne.loks@iggdrasil.net> 2019-06-17 13:21:28 +0200
commit: 6e09fe95f07ea2c0a827beda5fc2f2a63751db7f (patch)
tree: d6452080600bd7fc377321d4dab58a7fc4333cb2 /ooopy
parent: ce4b7db76f21559b94943229bbeebd9c37c43f49 (diff)
download: Ishtar-6e09fe95f07ea2c0a827beda5fc2f2a63751db7f.tar.bz2
Ishtar-6e09fe95f07ea2c0a827beda5fc2f2a63751db7f.zip
5 files changed, 2952 insertions, 0 deletions
diff --git a/ooopy/OOoPy.py b/ooopy/OOoPy.py
new file mode 100644
index 000000000..87e0b8110
--- /dev/null
+++ b/ooopy/OOoPy.py
@@ -0,0 +1,317 @@
+#!/usr/bin/env python
+# -*- coding: iso-8859-1 -*-
+# Copyright (C) 2005-14 Dr. Ralf Schlatterbeck Open Source Consulting.
+# Reichergasse 131, A-3411 Weidling.
+# Web: http://www.runtux.com Email: office@runtux.com
+# All rights reserved
+# ****************************************************************************
+#
+# This library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Library General Public License as
+# published by the Free Software Foundation; either version 2 of the
+# License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Library General Public License for more details.
+#
+# You should have received a copy of the GNU Library General Public
+# License along with this program; if not, write to the Free Software
+# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+# ****************************************************************************
+
+from __future__              import absolute_import
+
+from zipfile                 import ZipFile, ZIP_DEFLATED, ZipInfo
+try :
+    from StringIO            import StringIO
+except ImportError :
+    from io                  import StringIO
+from datetime                import datetime
+try :
+    from xml.etree.ElementTree   import ElementTree, fromstring, _namespace_map
+except ImportError :
+    from elementtree.ElementTree import ElementTree, fromstring, _namespace_map
+from tempfile                import mkstemp
+from ooopy.Version           import VERSION
+import os
+
+class _autosuper (type) :
+    def __init__ (cls, name, bases, dict) :
+        super   (_autosuper, cls).__init__ (name, bases, dict)
+        setattr (cls, "_%s__super" % name, super (cls))
+    # end def __init__
+# end class _autosuper
+
+class autosuper (object) :
+    __metaclass__ = _autosuper
+    def __init__ (self, *args, **kw) :
+        self.__super.__init__ ()
+    # end def __init__
+# end class autosuper
+
+files = \
+    [ 'content.xml'
+    , 'styles.xml'
+    , 'meta.xml'
+    , 'settings.xml'
+    , 'META-INF/manifest.xml'
+    ]
+
+mimetypes = \
+    [ 'application/vnd.sun.xml.writer'
+    , 'application/vnd.oasis.opendocument.text'
+    ]
+namespace_by_name = \
+  { mimetypes [0] :
+      { 'chart'    : "http://openoffice.org/2000/chart"
+      , 'config'   : "http://openoffice.org/2001/config"
+      , 'dc'       : "http://purl.org/dc/elements/1.1/"
+      , 'dr3d'     : "http://openoffice.org/2000/dr3d"
+      , 'draw'     : "http://openoffice.org/2000/drawing"
+      , 'fo'       : "http://www.w3.org/1999/XSL/Format"
+      , 'form'     : "http://openoffice.org/2000/form"
+      , 'math'     : "http://www.w3.org/1998/Math/MathML"
+      , 'meta'     : "http://openoffice.org/2000/meta"
+      , 'number'   : "http://openoffice.org/2000/datastyle"
+      , 'office'   : "http://openoffice.org/2000/office"
+      , 'script'   : "http://openoffice.org/2000/script"
+      , 'style'    : "http://openoffice.org/2000/style"
+      , 'svg'      : "http://www.w3.org/2000/svg"
+      , 'table'    : "http://openoffice.org/2000/table"
+      , 'text'     : "http://openoffice.org/2000/text"
+      , 'xlink'    : "http://www.w3.org/1999/xlink"
+      , 'manifest' : "http://openoffice.org/2001/manifest"
+      }
+  , mimetypes [1] :
+      { 'chart'    : "urn:oasis:names:tc:opendocument:xmlns:chart:1.0"
+      , 'config'   : "urn:oasis:names:tc:opendocument:xmlns:config:1.0"
+      , 'dc'       : "http://purl.org/dc/elements/1.1/"
+      , 'dr3d'     : "urn:oasis:names:tc:opendocument:xmlns:dr3d:1.0"
+      , 'draw'     : "urn:oasis:names:tc:opendocument:xmlns:drawing:1.0"
+      , 'fo'       : "urn:oasis:names:tc:opendocument:xmlns:"
+                     "xsl-fo-compatible:1.0"
+      , 'form'     : "urn:oasis:names:tc:opendocument:xmlns:form:1.0"
+      , 'math'     : "http://www.w3.org/1998/Math/MathML"
+      , 'meta'     : "urn:oasis:names:tc:opendocument:xmlns:meta:1.0"
+      , 'number'   : "urn:oasis:names:tc:opendocument:xmlns:datastyle:1.0"
+      , 'office'   : "urn:oasis:names:tc:opendocument:xmlns:office:1.0"
+      , 'officeooo': "http://openoffice.org/2009/office"
+      , 'script'   : "urn:oasis:names:tc:opendocument:xmlns:script:1.0"
+      , 'style'    : "urn:oasis:names:tc:opendocument:xmlns:style:1.0"
+      , 'svg'      : "urn:oasis:names:tc:opendocument:xmlns:svg-compatible:1.0"
+      , 'table'    : "urn:oasis:names:tc:opendocument:xmlns:table:1.0"
+      , 'text'     : "urn:oasis:names:tc:opendocument:xmlns:text:1.0"
+      , 'xlink'    : "http://www.w3.org/1999/xlink"
+      , 'manifest' : "urn:oasis:names:tc:opendocument:xmlns:manifest:1.0"
+      , 'tableooo' : "http://openoffice.org/2009/table"
+      , 'transformation' : "http://www.w3.org/2003/g/data-view#"
+      # OOo 1.X tags and some others:
+      , 'ooo'      : "http://openoffice.org/2004/office"
+      , 'ooow'     : "http://openoffice.org/2004/writer"
+      , 'oooc'     : "http://openoffice.org/2004/calc"
+      , 'o_dom'    : "http://www.w3.org/2001/xml-events"
+      , 'o_xforms' : "http://www.w3.org/2002/xforms"
+      , 'xs'       : "http://www.w3.org/2001/XMLSchema"
+      , 'xsi'      : "http://www.w3.org/2001/XMLSchema-instance"
+      # predefined xml namespace, see
+      # http://www.w3.org/TR/2006/REC-xml-names11-20060816/
+      # "It MAY, but need not, be declared, and MUST NOT be undeclared
+      # or bound to any other namespace name."
+      , 'xml'      : "http://www.w3.org/XML/1998/namespace"
+      }
+  }
+
+for mimetype in namespace_by_name.itervalues () :
+    for k, v in mimetype.iteritems () :
+        if v in _namespace_map :
+            assert (_namespace_map [v] == k)
+        _namespace_map [v] = k
+
+class OOoElementTree (autosuper) :
+    """
+        An ElementTree for OOo document XML members. Behaves like the
+        orginal ElementTree (in fact it delegates almost everything to a
+        real instance of ElementTree) except for the write method, that
+        writes itself back to the OOo XML file in the OOo zip archive it
+        came from.
+    """
+    def __init__ (self, ooopy, zname, root) :
+        self.ooopy = ooopy
+        self.zname = zname
+        self.tree  = ElementTree (root)
+    # end def __init__
+
+    def write (self) :
+        self.ooopy.write (self.zname, self.tree)
+    # end def write
+
+    def __getattr__ (self, name) :
+        """
+            Delegate everything to our ElementTree attribute.
+        """
+        if not name.startswith ('__') :
+            result = getattr (self.tree, name)
+            setattr (self, name, result)
+            return result
+        raise AttributeError (name)
+    # end def __getattr__
+
+# end class OOoElementTree
+
+class OOoPy (autosuper) :
+    """
+        Wrapper for OpenOffice.org zip files (all OOo documents are
+        really zip files internally).
+
+        from ooopy.OOoPy import OOoPy
+        >>> o = OOoPy (infile = 'testfiles/test.sxw', outfile = 'out.sxw')
+        >>> o.mimetype
+        'application/vnd.sun.xml.writer'
+        >>> for f in files :
+        ...     e = o.read (f)
+        ...     e.write ()
+        ...
+        >>> o.close ()
+        >>> o = OOoPy (infile = 'testfiles/test.odt', outfile = 'out2.odt')
+        >>> o.mimetype
+        'application/vnd.oasis.opendocument.text'
+        >>> for f in files :
+        ...     e = o.read (f)
+        ...     e.write ()
+        ...
+        >>> o.append_file ('Pictures/empty', '')
+        >>> o.close ()
+        >>> o = OOoPy (infile = 'out2.odt')
+        >>> for f in o.izip.infolist () :
+        ...     print f.filename, f.create_system, f.compress_type
+        mimetype 0 8
+        content.xml 0 8
+        styles.xml 0 8
+        meta.xml 0 8
+        settings.xml 0 8
+        META-INF/manifest.xml 0 8
+        Pictures/empty 0 8
+        Configurations2/statusbar/ 0 0
+        Configurations2/accelerator/current.xml 0 8
+        Configurations2/floater/ 0 0
+        Configurations2/popupmenu/ 0 0
+        Configurations2/progressbar/ 0 0
+        Configurations2/menubar/ 0 0
+        Configurations2/toolbar/ 0 0
+        Configurations2/images/Bitmaps/ 0 0
+        Thumbnails/thumbnail.png 0 8
+    """
+    def __init__ \
+        ( self
+        , infile     = None
+        , outfile    = None
+        , write_mode = 'w'
+        , mimetype   = None
+        ) :
+        """
+            Open an OOo document, if no outfile is given, we open the
+            file read-only. Otherwise the outfile has to be different
+            from the infile -- the python ZipFile can't deal with
+            read-write access. In case an outfile is given, we open it
+            in "w" mode as a zip file, unless write_mode is specified
+            (the only allowed case would be "a" for appending to an
+            existing file, see pythons ZipFile documentation for
+            details). If no infile is given, the user is responsible for
+            providing all necessary files in the resulting output file.
+
+            It seems that OOo needs to have the mimetype as the first
+            archive member (at least with mimetype as the first member
+            it works, the order may not be arbitrary) to recognize a zip
+            archive as an OOo file. When copying from a given infile, we
+            use the same order of elements in the resulting output. When
+            creating new elements we make sure the mimetype is the first
+            in the resulting archive.
+
+            Note that both, infile and outfile can either be filenames
+            or file-like objects (e.g. StringIO).
+
+            The mimetype is automatically determined if an infile is
+            given. If only writing is desired, the mimetype should be
+            set.
+        """
+        assert (infile != outfile)
+        self.izip = self.ozip = None
+        if infile :
+            self.izip    = ZipFile (infile,  'r',        ZIP_DEFLATED)
+        if outfile :
+            self.ozip    = ZipFile (outfile, write_mode, ZIP_DEFLATED)
+            self.written = {}
+        if mimetype :
+            self.mimetype = mimetype
+        elif self.izip :
+            self.mimetype = self.izip.read ('mimetype')
+    # end def __init__
+
+    def read (self, zname) :
+        """
+            return an OOoElementTree object for the given OOo document
+            archive member name. Currently an OOo document contains the
+            following XML files::
+
+             * content.xml: the text of the OOo document
+             * styles.xml: style definitions
+             * meta.xml: meta-information (author, last changed, ...)
+             * settings.xml: settings in OOo
+             * META-INF/manifest.xml: contents of the archive
+
+            There is an additional file "mimetype" that always contains
+            the string "application/vnd.sun.xml.writer" for OOo 1.X files
+            and the string "application/vnd.oasis.opendocument.text" for
+            OOo 2.X files.
+        """
+        assert (self.izip)
+        return OOoElementTree (self, zname, fromstring (self.izip.read (zname)))
+    # end def read
+
+    def _write (self, zname, str) :
+        now  = datetime.utcnow ().timetuple ()
+        info = ZipInfo (zname, date_time = now)
+        info.create_system = 0 # pretend to be fat
+        info.compress_type = ZIP_DEFLATED
+        self.ozip.writestr (info, str)
+        self.written [zname] = 1
+    # end def _write
+
+    def write (self, zname, etree) :
+        assert (self.ozip)
+        # assure mimetype is the first member in new archive
+        if 'mimetype' not in self.written :
+            self._write ('mimetype', self.mimetype)
+        str = StringIO ()
+        etree.write (str)
+        self._write (zname, str.getvalue ())
+    # end def write
+
+    def append_file (self, zname, str) :
+        """ Official interface to _write: Append a file to the end of
+            the archive.
+        """
+        if zname not in self.written :
+            self._write (zname, str)
+    # end def append_file
+
+    def close (self) :
+        """
+            Close the zip files. According to documentation of zipfile in
+            the standard python lib, this has to be done to be sure
+            everything is written. We copy over the not-yet written files
+            from izip before closing ozip.
+        """
+        if self.izip and self.ozip :
+            for f in self.izip.infolist () :
+                if f.filename not in self.written :
+                    self.ozip.writestr (f, self.izip.read (f.filename))
+        for i in self.izip, self.ozip :
+            if i : i.close ()
+        self.izip = self.ozip = None
+    # end def close
+
+    __del__ = close # auto-close on deletion of object
+# end class OOoPy
diff --git a/ooopy/Transformer.py b/ooopy/Transformer.py
new file mode 100644
index 000000000..dbbab125d
--- /dev/null
+++ b/ooopy/Transformer.py
@@ -0,0 +1,1397 @@
+#!/usr/bin/env python
+# -*- coding: iso-8859-1 -*-
+# Copyright (C) 2005-14 Dr. Ralf Schlatterbeck Open Source Consulting.
+# Reichergasse 131, A-3411 Weidling.
+# Web: http://www.runtux.com Email: office@runtux.com
+# All rights reserved
+# ****************************************************************************
+#
+# This library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Library General Public License as
+# published by the Free Software Foundation; either version 2 of the
+# License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Library General Public License for more details.
+#
+# You should have received a copy of the GNU Library General Public
+# License along with this program; if not, write to the Free Software
+# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+# ****************************************************************************
+
+from __future__              import absolute_import
+
+import time
+import re
+try :
+    from xml.etree.ElementTree   import dump, SubElement, Element, tostring
+    from xml.etree.ElementTree   import _namespace_map
+except ImportError :
+    from elementtree.ElementTree import dump, SubElement, Element, tostring
+    from elementtree.ElementTree import _namespace_map
+from copy                    import deepcopy
+from ooopy.OOoPy             import OOoPy, autosuper
+from ooopy.OOoPy             import files, mimetypes, namespace_by_name
+from ooopy.Version           import VERSION
+
+def OOo_Tag (namespace, name, mimetype) :
+    """Return combined XML tag
+    
+       >>> OOo_Tag ('xml', 'id', mimetypes [1])
+       '{http://www.w3.org/XML/1998/namespace}id'
+       >>> OOo_Tag ('text', 'list', mimetypes [1])
+       '{urn:oasis:names:tc:opendocument:xmlns:text:1.0}list'
+    """
+    return "{%s}%s" % (namespace_by_name [mimetype][namespace], name)
+# end def OOo_Tag
+
+def split_tag (tag) :
+    """ Split tag into symbolic namespace and name part -- inverse
+        operation of OOo_Tag.
+    """
+    ns, t = tag.split ('}')
+    return (_namespace_map [ns [1:]], t)
+# end def split_tag
+
+class Transform (autosuper) :
+    """
+        Base class for individual transforms on OOo files. An individual
+        transform needs a filename variable for specifying the OOo file
+        the transform should be applied to and an optional prio.
+        Individual transforms are applied according to their prio
+        setting, higher prio means later application of a transform.
+
+        The filename variable must specify one of the XML files which are
+        part of the OOo document (see files variable above). As
+        the names imply, content.xml contains the contents of the
+        document (text and ad-hoc style definitions), styles.xml contains
+        the style definitions, meta.xml contains meta information like
+        author, editing time, etc. and settings.xml is used to store
+        OOo's settings (menu Tools->Configure).
+    """
+    prio = 100
+    textbody_names = \
+        { mimetypes [0] : 'body'
+        , mimetypes [1] : 'text'
+        }
+    paragraph_props = \
+        { mimetypes [0] : 'properties'
+        , mimetypes [1] : 'paragraph-properties'
+        }
+    font_decls = \
+        { mimetypes [0] : 'font-decls'
+        , mimetypes [1] : 'font-face-decls'
+        }
+
+    def __init__ (self, prio = None, transformer = None) :
+        if prio is not None :
+            self.prio    = prio
+        self.transformer = None
+        if transformer :
+            self.register (transformer)
+    # end def __init__
+
+    def apply (self, root) :
+        """ Apply myself to the element given as root """
+        raise NotImplementedError, 'derived transforms must implement "apply"'
+    # end def apply
+
+    def apply_all (self, trees) :
+        """ Apply myself to all the files given in trees. The variable
+            trees contains a dictionary of ElementTree indexed by the
+            name of the OOo File.
+            The standard case is that only one file (namely
+            self.filename) is used.
+        """
+        assert (self.filename)
+        self.apply (trees [self.filename].getroot ())
+    # end def apply_all
+
+    def find_tbody (self, root) :
+        """ Find the node which really contains the text -- different
+            for different OOo versions.
+        """
+        tbody = root
+        if tbody.tag != self.textbody_tag :
+            tbody = tbody.find ('.//' + self.textbody_tag)
+        return tbody
+    # end def find_tbody
+
+    def register (self, transformer) :
+        """ Registering with a transformer means being able to access
+            variables stored in the tranformer by other transforms.
+
+            Also needed for tag-computation: The transformer knows which
+            version of OOo document we are processing.
+        """
+        self.transformer     = transformer
+        mt                   = self.mimetype = transformer.mimetype
+        self.textbody_name   = self.textbody_names [mt]
+        self.paragraph_props = self.paragraph_props [mt]
+        self.properties_tag  = self.oootag ('style', self.paragraph_props)
+        self.textbody_tag    = self.oootag ('office', self.textbody_name)
+        self.font_decls_tag  = self.oootag ('office', self.font_decls [mt])
+    # end def register
+
+    def oootag (self, namespace, name) :
+        """ Compute long tag version """
+        return OOo_Tag (namespace, name, self.mimetype)
+    # end def oootag
+
+    def set (self, variable, value) :
+        """ Set variable in our transformer using naming convention. """
+        self.transformer [self._varname (variable)] = value
+    # end def set
+
+    def _varname (self, name) :
+        """ For fulfilling the naming convention of the transformer
+            dictionary (every entry in this dictionary should be prefixed
+            with the class name of the transform) we have this
+            convenience method.
+            Returns variable name prefixed with own class name.
+        """
+        return ":".join ((self.__class__.__name__, name))
+    # end def _varname
+
+# end class Transform
+
+class Transformer (autosuper) :
+    """
+        Class for applying a set of transforms to a given ooopy object.
+        The transforms are applied to the specified file in priority
+        order. When applying transforms we have a mechanism for
+        communication of transforms. We give the transformer to the
+        individual transforms as a parameter. The transforms may use the
+        transformer like a dictionary for storing values and retrieving
+        values left by previous transforms.
+        As a naming convention each transform should use its class name
+        as a prefix for storing values in the dictionary.
+        >>> import Transforms
+        >>> from Transforms import renumber_all, get_meta, set_meta, meta_counts
+        >>> try :
+        ...     from io import StringIO, BytesIO
+        ...     StringIO = BytesIO
+        ... except ImportError :
+        ...     from StringIO import StringIO
+        >>> sio = BytesIO ()
+        >>> o   = OOoPy (infile = 'testfiles/test.sxw', outfile = sio)
+        >>> m   = o.mimetype
+        >>> c = o.read ('content.xml')
+        >>> body = c.find (OOo_Tag ('office', 'body', mimetype = m))
+        >>> body [-1].get (OOo_Tag ('text', 'style-name', mimetype = m))
+        'Standard'
+        >>> def cb (name) :
+        ...     r = { 'street'     : 'Beispielstrasse 42'
+        ...         , 'firstname'  : 'Hugo'
+        ...         , 'salutation' : 'Frau'
+        ...         }
+        ...     if r.has_key (name) : return r [name]
+        ...     return None
+        ... 
+        >>> p = get_meta (m)
+        >>> t = Transformer (m, p)
+        >>> t ['a'] = 'a'
+        >>> t ['a']
+        'a'
+        >>> t.transform (o)
+        >>> p.set ('a', 'b')
+        >>> t ['Attribute_Access:a']
+        'b'
+        >>> t   = Transformer (
+        ...       m
+        ...     , Transforms.Autoupdate ()
+        ...     , Transforms.Editinfo   ()  
+        ...     , Transforms.Field_Replace (prio = 99, replace = cb)
+        ...     , Transforms.Field_Replace
+        ...         ( replace =
+        ...             { 'salutation' : ''
+        ...             , 'firstname'  : 'Erika'
+        ...             , 'lastname'   : 'Musterfrau'
+        ...             , 'country'    : 'D' 
+        ...             , 'postalcode' : '00815'
+        ...             , 'city'       : 'Niemandsdorf'
+        ...             }
+        ...         )
+        ...     , Transforms.Addpagebreak_Style ()
+        ...     , Transforms.Addpagebreak       ()
+        ...     )
+        >>> t.transform (o)
+        >>> o.close ()
+        >>> ov  = sio.getvalue ()
+        >>> f   = open ("testout.sxw", "wb")
+        >>> f.write (ov)
+        >>> f.close ()
+        >>> o = OOoPy (infile = sio)
+        >>> c = o.read ('content.xml')
+        >>> m = o.mimetype
+        >>> body = c.find (OOo_Tag ('office', 'body', mimetype = m))
+        >>> vset = './/' + OOo_Tag ('text', 'variable-set', mimetype = m)
+        >>> for node in body.findall (vset) :
+        ...     name = node.get (OOo_Tag ('text', 'name', m))
+        ...     print name, ':', node.text
+        salutation : None
+        firstname : Erika
+        lastname : Musterfrau
+        street : Beispielstrasse 42
+        country : D
+        postalcode : 00815
+        city : Niemandsdorf
+        salutation : None
+        firstname : Erika
+        lastname : Musterfrau
+        street : Beispielstrasse 42
+        country : D
+        postalcode : 00815
+        city : Niemandsdorf
+        >>> body [-1].get (OOo_Tag ('text', 'style-name', mimetype = m))
+        'P2'
+        >>> sio = StringIO ()
+        >>> o   = OOoPy (infile = 'testfiles/test.sxw', outfile = sio)
+        >>> c = o.read ('content.xml')
+        >>> t   = Transformer (
+        ...       o.mimetype
+        ...     , get_meta (o.mimetype)
+        ...     , Transforms.Addpagebreak_Style ()
+        ...     , Transforms.Mailmerge
+        ...       ( iterator = 
+        ...         ( dict (firstname = 'Erika', lastname = 'Nobody')
+        ...         , dict (firstname = 'Eric',  lastname = 'Wizard')
+        ...         , cb
+        ...         )
+        ...       )
+        ...     , renumber_all (o.mimetype)
+        ...     , set_meta (o.mimetype)
+        ...     , Transforms.Fix_OOo_Tag ()
+        ...     )
+        >>> t.transform (o)
+        >>> for i in meta_counts :
+        ...     print i, t [':'.join (('Set_Attribute', i))]
+        character-count 951
+        image-count 0
+        object-count 0
+        page-count 3
+        paragraph-count 113
+        table-count 3
+        word-count 162
+        >>> name = t ['Addpagebreak_Style:stylename']
+        >>> name
+        'P2'
+        >>> o.close ()
+        >>> ov  = sio.getvalue ()
+        >>> f   = open ("testout2.sxw", "wb")
+        >>> f.write (ov)
+        >>> f.close ()
+        >>> o = OOoPy (infile = sio)
+        >>> m = o.mimetype
+        >>> c = o.read ('content.xml')
+        >>> body = c.find (OOo_Tag ('office', 'body', m))
+        >>> for n in body.findall ('.//*') :
+        ...     zidx = n.get (OOo_Tag ('draw', 'z-index', m))
+        ...     if zidx :
+        ...         print ':'.join(split_tag (n.tag)), zidx
+        draw:text-box 0
+        draw:rect 1
+        draw:text-box 3
+        draw:rect 4
+        draw:text-box 6
+        draw:rect 7
+        draw:text-box 2
+        draw:text-box 5
+        draw:text-box 8
+        >>> for n in body.findall ('.//' + OOo_Tag ('text', 'p', m)) :
+        ...     if n.get (OOo_Tag ('text', 'style-name', m)) == name :
+        ...         print n.tag
+        {http://openoffice.org/2000/text}p
+        {http://openoffice.org/2000/text}p
+        >>> vset = './/' + OOo_Tag ('text', 'variable-set', m)
+        >>> for n in body.findall (vset) :
+        ...     if n.get (OOo_Tag ('text', 'name', m), None).endswith ('name') :
+        ...         name = n.get (OOo_Tag ('text', 'name', m))
+        ...         print name, ':', n.text
+        firstname : Erika
+        lastname : Nobody
+        firstname : Eric
+        lastname : Wizard
+        firstname : Hugo
+        lastname : Testman
+        firstname : Erika
+        lastname : Nobody
+        firstname : Eric
+        lastname : Wizard
+        firstname : Hugo
+        lastname : Testman
+        >>> for n in body.findall ('.//' + OOo_Tag ('draw', 'text-box', m)) :
+        ...     print n.get (OOo_Tag ('draw', 'name', m)),
+        ...     print n.get (OOo_Tag ('text', 'anchor-page-number', m))
+        Frame1 1
+        Frame2 2
+        Frame3 3
+        Frame4 None
+        Frame5 None
+        Frame6 None
+        >>> for n in body.findall ('.//' + OOo_Tag ('text', 'section', m)) :
+        ...     print n.get (OOo_Tag ('text', 'name', m))
+        Section1
+        Section2
+        Section3
+        Section4
+        Section5
+        Section6
+        Section7
+        Section8
+        Section9
+        Section10
+        Section11
+        Section12
+        Section13
+        Section14
+        Section15
+        Section16
+        Section17
+        Section18
+        >>> for n in body.findall ('.//' + OOo_Tag ('table', 'table', m)) :
+        ...     print n.get (OOo_Tag ('table', 'name', m))
+        Table1
+        Table2
+        Table3
+        >>> r = o.read ('meta.xml')
+        >>> meta = r.find ('.//' + OOo_Tag ('meta', 'document-statistic', m))
+        >>> for i in meta_counts :
+        ...     print i, repr (meta.get (OOo_Tag ('meta', i, m)))
+        character-count '951'
+        image-count '0'
+        object-count '0'
+        page-count '3'
+        paragraph-count '113'
+        table-count '3'
+        word-count '162'
+        >>> o.close ()
+        >>> sio = StringIO ()
+        >>> o   = OOoPy (infile = 'testfiles/test.sxw', outfile = sio)
+        >>> tf  = ('testfiles/test.sxw', 'testfiles/rechng.sxw')
+        >>> t   = Transformer (
+        ...       o.mimetype
+        ...     , get_meta (o.mimetype)
+        ...     , Transforms.Concatenate (*tf)
+        ...     , renumber_all (o.mimetype)
+        ...     , set_meta (o.mimetype)
+        ...     , Transforms.Fix_OOo_Tag ()
+        ...     )
+        >>> t.transform (o)
+        >>> for i in meta_counts :
+        ...     print i, repr (t [':'.join (('Set_Attribute', i))])
+        character-count '1131'
+        image-count '0'
+        object-count '0'
+        page-count '3'
+        paragraph-count '168'
+        table-count '2'
+        word-count '160'
+        >>> o.close ()
+        >>> ov  = sio.getvalue ()
+        >>> f   = open ("testout3.sxw", "wb")
+        >>> f.write (ov)
+        >>> f.close ()
+        >>> o = OOoPy (infile = sio)
+        >>> m = o.mimetype
+        >>> c = o.read ('content.xml')
+        >>> s = o.read ('styles.xml')
+        >>> for n in c.findall ('./*/*') :
+        ...     name = n.get (OOo_Tag ('style', 'name', m))
+        ...     if name :
+        ...         parent = n.get (OOo_Tag ('style', 'parent-style-name', m))
+        ...         print '"%s", "%s"' % (name, parent)
+        "Tahoma1", "None"
+        "Bitstream Vera Sans", "None"
+        "Tahoma", "None"
+        "Nimbus Roman No9 L", "None"
+        "Courier New", "None"
+        "Arial Black", "None"
+        "New Century Schoolbook", "None"
+        "Helvetica", "None"
+        "Table1", "None"
+        "Table1.A", "None"
+        "Table1.A1", "None"
+        "Table1.E1", "None"
+        "Table1.A2", "None"
+        "Table1.E2", "None"
+        "P1", "None"
+        "fr1", "Frame"
+        "fr2", "None"
+        "fr3", "Frame"
+        "Sect1", "None"
+        "gr1", "None"
+        "P2", "Standard"
+        "Standard_Concat", "None"
+        "Concat_P1", "Concat_Frame contents"
+        "Concat_P2", "Concat_Frame contents"
+        "P3", "Concat_Frame contents"
+        "P4", "Concat_Frame contents"
+        "P5", "Concat_Standard"
+        "P6", "Concat_Standard"
+        "P7", "Concat_Frame contents"
+        "P8", "Concat_Frame contents"
+        "P9", "Concat_Frame contents"
+        "P10", "Concat_Frame contents"
+        "P11", "Concat_Frame contents"
+        "P12", "Concat_Frame contents"
+        "P13", "Concat_Frame contents"
+        "P15", "Concat_Standard"
+        "P16", "Concat_Standard"
+        "P17", "Concat_Standard"
+        "P18", "Concat_Standard"
+        "P19", "Concat_Standard"
+        "P20", "Concat_Standard"
+        "P21", "Concat_Standard"
+        "P22", "Concat_Standard"
+        "P23", "Concat_Standard"
+        "T1", "None"
+        "Concat_fr1", "Concat_Frame"
+        "Concat_fr2", "Concat_Frame"
+        "Concat_fr3", "Concat_Frame"
+        "fr4", "Concat_Frame"
+        "fr5", "Concat_Frame"
+        "fr6", "Concat_Frame"
+        "Concat_Sect1", "None"
+        "N0", "None"
+        "N2", "None"
+        "P15_Concat", "Concat_Standard"
+        >>> for n in s.findall ('./*/*') :
+        ...     name = n.get (OOo_Tag ('style', 'name', m))
+        ...     if name :
+        ...         parent = n.get (OOo_Tag ('style', 'parent-style-name', m))
+        ...         print '"%s", "%s"' % (name, parent)
+        "Tahoma1", "None"
+        "Bitstream Vera Sans", "None"
+        "Tahoma", "None"
+        "Nimbus Roman No9 L", "None"
+        "Courier New", "None"
+        "Arial Black", "None"
+        "New Century Schoolbook", "None"
+        "Helvetica", "None"
+        "Standard", "None"
+        "Text body", "Standard"
+        "List", "Text body"
+        "Table Contents", "Text body"
+        "Table Heading", "Table Contents"
+        "Caption", "Standard"
+        "Frame contents", "Text body"
+        "Index", "Standard"
+        "Frame", "None"
+        "OLE", "None"
+        "Concat_Standard", "None"
+        "Concat_Text body", "Concat_Standard"
+        "Concat_List", "Concat_Text body"
+        "Concat_Caption", "Concat_Standard"
+        "Concat_Frame contents", "Concat_Text body"
+        "Concat_Index", "Concat_Standard"
+        "Horizontal Line", "Concat_Standard"
+        "Internet link", "None"
+        "Visited Internet Link", "None"
+        "Concat_Frame", "None"
+        "Concat_OLE", "None"
+        "pm1", "None"
+        "Concat_pm1", "None"
+        "Standard", "None"
+        "Concat_Standard", "None"
+        >>> for n in c.findall ('.//' + OOo_Tag ('text', 'variable-decl', m)) :
+        ...     name = n.get (OOo_Tag ('text', 'name', m))
+        ...     print name
+        salutation
+        firstname
+        lastname
+        street
+        country
+        postalcode
+        city
+        date
+        invoice.invoice_no
+        invoice.abo.aboprice.abotype.description
+        address.salutation
+        address.title
+        address.firstname
+        address.lastname
+        address.function
+        address.street
+        address.country
+        address.postalcode
+        address.city
+        invoice.subscriber.salutation
+        invoice.subscriber.title
+        invoice.subscriber.firstname
+        invoice.subscriber.lastname
+        invoice.subscriber.function
+        invoice.subscriber.street
+        invoice.subscriber.country
+        invoice.subscriber.postalcode
+        invoice.subscriber.city
+        invoice.period_start
+        invoice.period_end
+        invoice.currency.name
+        invoice.amount
+        invoice.subscriber.initial
+        >>> for n in c.findall ('.//' + OOo_Tag ('text', 'sequence-decl', m)) :
+        ...     name = n.get (OOo_Tag ('text', 'name', m))
+        ...     print name
+        Illustration
+        Table
+        Text
+        Drawing
+        >>> for n in c.findall ('.//' + OOo_Tag ('text', 'p', m)) :
+        ...     name = n.get (OOo_Tag ('text', 'style-name', m))
+        ...     if not name or name.startswith ('Concat') :
+        ...         print ">%s<" % name
+        >Concat_P1<
+        >Concat_P2<
+        >Concat_Frame contents<
+        >>> for n in c.findall ('.//' + OOo_Tag ('draw', 'text-box', m)) :
+        ...     attrs = 'name', 'style-name', 'z-index'
+        ...     attrs = [n.get (OOo_Tag ('draw', i, m)) for i in attrs]
+        ...     attrs.append (n.get (OOo_Tag ('text', 'anchor-page-number', m)))
+        ...     print attrs
+        ['Frame1', 'fr1', '0', '1']
+        ['Frame2', 'fr1', '3', '2']
+        ['Frame3', 'Concat_fr1', '6', '3']
+        ['Frame4', 'Concat_fr2', '7', '3']
+        ['Frame5', 'Concat_fr3', '8', '3']
+        ['Frame6', 'Concat_fr1', '9', '3']
+        ['Frame7', 'fr4', '10', '3']
+        ['Frame8', 'fr4', '11', '3']
+        ['Frame9', 'fr4', '12', '3']
+        ['Frame10', 'fr4', '13', '3']
+        ['Frame11', 'fr4', '14', '3']
+        ['Frame12', 'fr4', '15', '3']
+        ['Frame13', 'fr5', '16', '3']
+        ['Frame14', 'fr4', '18', '3']
+        ['Frame15', 'fr4', '19', '3']
+        ['Frame16', 'fr4', '20', '3']
+        ['Frame17', 'fr6', '17', '3']
+        ['Frame18', 'fr4', '23', '3']
+        ['Frame19', 'fr3', '2', None]
+        ['Frame20', 'fr3', '5', None]
+        >>> for n in c.findall ('.//' + OOo_Tag ('text', 'section', m)) :
+        ...     attrs = 'name', 'style-name'
+        ...     attrs = [n.get (OOo_Tag ('text', i, m)) for i in attrs]
+        ...     print attrs
+        ['Section1', 'Sect1']
+        ['Section2', 'Sect1']
+        ['Section3', 'Sect1']
+        ['Section4', 'Sect1']
+        ['Section5', 'Sect1']
+        ['Section6', 'Sect1']
+        ['Section7', 'Concat_Sect1']
+        ['Section8', 'Concat_Sect1']
+        ['Section9', 'Concat_Sect1']
+        ['Section10', 'Concat_Sect1']
+        ['Section11', 'Concat_Sect1']
+        ['Section12', 'Concat_Sect1']
+        ['Section13', 'Concat_Sect1']
+        ['Section14', 'Concat_Sect1']
+        ['Section15', 'Concat_Sect1']
+        ['Section16', 'Concat_Sect1']
+        ['Section17', 'Concat_Sect1']
+        ['Section18', 'Concat_Sect1']
+        ['Section19', 'Concat_Sect1']
+        ['Section20', 'Concat_Sect1']
+        ['Section21', 'Concat_Sect1']
+        ['Section22', 'Concat_Sect1']
+        ['Section23', 'Concat_Sect1']
+        ['Section24', 'Concat_Sect1']
+        ['Section25', 'Concat_Sect1']
+        ['Section26', 'Concat_Sect1']
+        ['Section27', 'Concat_Sect1']
+        ['Section28', 'Sect1']
+        ['Section29', 'Sect1']
+        ['Section30', 'Sect1']
+        ['Section31', 'Sect1']
+        ['Section32', 'Sect1']
+        ['Section33', 'Sect1']
+        >>> for n in c.findall ('.//' + OOo_Tag ('draw', 'rect', m)) :
+        ...     attrs = 'style-name', 'text-style-name', 'z-index'
+        ...     attrs = [n.get (OOo_Tag ('draw', i, m)) for i in attrs]
+        ...     attrs.append (n.get (OOo_Tag ('text', 'anchor-page-number', m)))
+        ...     print attrs
+        ['gr1', 'P1', '1', '1']
+        ['gr1', 'P1', '4', '2']
+        >>> for n in c.findall ('.//' + OOo_Tag ('draw', 'line', m)) :
+        ...     attrs = 'style-name', 'text-style-name', 'z-index'
+        ...     attrs = [n.get (OOo_Tag ('draw', i, m)) for i in attrs]
+        ...     print attrs
+        ['gr1', 'P1', '24']
+        ['gr1', 'P1', '22']
+        ['gr1', 'P1', '21']
+        >>> for n in s.findall ('.//' + OOo_Tag ('style', 'style', m)) :
+        ...     if n.get (OOo_Tag ('style', 'name', m)).startswith ('Co') :
+        ...         attrs = 'name', 'class', 'family'
+        ...         attrs = [n.get (OOo_Tag ('style', i, m)) for i in attrs]
+        ...         print attrs
+        ...         props = n.find ('./' + OOo_Tag ('style', 'properties', m))
+        ...         if props is not None and len (props) :
+        ...             props [0].tag
+        ['Concat_Standard', 'text', 'paragraph']
+        '{http://openoffice.org/2000/style}tab-stops'
+        ['Concat_Text body', 'text', 'paragraph']
+        ['Concat_List', 'list', 'paragraph']
+        ['Concat_Caption', 'extra', 'paragraph']
+        ['Concat_Frame contents', 'extra', 'paragraph']
+        ['Concat_Index', 'index', 'paragraph']
+        ['Concat_Frame', None, 'graphics']
+        ['Concat_OLE', None, 'graphics']
+        >>> for n in c.findall ('.//*') :
+        ...     zidx = n.get (OOo_Tag ('draw', 'z-index', m))
+        ...     if zidx :
+        ...         print ':'.join(split_tag (n.tag)), zidx
+        draw:text-box 0
+        draw:rect 1
+        draw:text-box 3
+        draw:rect 4
+        draw:text-box 6
+        draw:text-box 7
+        draw:text-box 8
+        draw:text-box 9
+        draw:text-box 10
+        draw:text-box 11
+        draw:text-box 12
+        draw:text-box 13
+        draw:text-box 14
+        draw:text-box 15
+        draw:text-box 16
+        draw:text-box 18
+        draw:text-box 19
+        draw:text-box 20
+        draw:text-box 17
+        draw:text-box 23
+        draw:line 24
+        draw:text-box 2
+        draw:text-box 5
+        draw:line 22
+        draw:line 21
+        >>> sio = StringIO ()
+        >>> o   = OOoPy (infile = 'testfiles/carta.stw', outfile = sio)
+        >>> t = Transformer (
+        ...     o.mimetype
+        ...   , get_meta (o.mimetype)
+        ...   , Transforms.Addpagebreak_Style ()
+        ...   , Transforms.Mailmerge
+        ...     ( iterator = 
+        ...         ( dict
+        ...             ( Spett = "Spettabile"
+        ...             , contraente = "First person"
+        ...             , indirizzo = "street? 1"
+        ...             , tipo = "racc. A.C."
+        ...             , luogo = "Varese"
+        ...             , oggetto = "Saluti"
+        ...             )
+        ...         , dict
+        ...             ( Spett = "Egregio"
+        ...             , contraente = "Second Person"
+        ...             , indirizzo = "street? 2"
+        ...             , tipo = "Raccomandata"
+        ...             , luogo = "Gavirate"
+        ...             , oggetto = "Ossequi"
+        ...             )
+        ...         )
+        ...     )
+        ...   , renumber_all (o.mimetype)
+        ...   , set_meta (o.mimetype)
+        ...   , Transforms.Fix_OOo_Tag ()
+        ...   )
+        >>> t.transform(o)
+        >>> o.close()
+        >>> ov  = sio.getvalue ()
+        >>> f   = open ("carta-out.stw", "wb")
+        >>> f.write (ov)
+        >>> f.close ()
+        >>> o = OOoPy (infile = sio)
+        >>> m = o.mimetype
+        >>> c = o.read ('content.xml')
+        >>> body = c.find (OOo_Tag ('office', 'body', mimetype = m))
+        >>> vset = './/' + OOo_Tag ('text', 'variable-set', mimetype = m)
+        >>> for node in body.findall (vset) :
+        ...     name = node.get (OOo_Tag ('text', 'name', m))
+        ...     print name, ':', node.text
+        Spett : Spettabile
+        contraente : First person
+        indirizzo : street? 1
+        Spett : Egregio
+        contraente : Second Person
+        indirizzo : street? 2
+        tipo : racc. A.C.
+        luogo : Varese
+        oggetto : Saluti
+        tipo : Raccomandata
+        luogo : Gavirate
+        oggetto : Ossequi
+        >>> sio = StringIO ()
+        >>> o   = OOoPy (infile = 'testfiles/test.odt', outfile = sio)
+        >>> t   = Transformer (
+        ...       o.mimetype
+        ...     , get_meta (o.mimetype)
+        ...     , Transforms.Addpagebreak_Style ()
+        ...     , Transforms.Mailmerge
+        ...       ( iterator = 
+        ...         ( dict (firstname = 'Erika', lastname = 'Nobody')
+        ...         , dict (firstname = 'Eric',  lastname = 'Wizard')
+        ...         , cb
+        ...         )
+        ...       )
+        ...     , renumber_all (o.mimetype)
+        ...     , set_meta (o.mimetype)
+        ...     , Transforms.Fix_OOo_Tag ()
+        ...     )
+        >>> t.transform (o)
+        >>> for i in meta_counts :
+        ...     print i, t [':'.join (('Set_Attribute', i))]
+        character-count 951
+        image-count 0
+        object-count 0
+        page-count 3
+        paragraph-count 53
+        table-count 3
+        word-count 162
+        >>> name = t ['Addpagebreak_Style:stylename']
+        >>> name
+        'P2'
+        >>> o.close ()
+        >>> ov  = sio.getvalue ()
+        >>> f   = open ("testout.odt", "wb")
+        >>> f.write (ov)
+        >>> f.close ()
+        >>> o = OOoPy (infile = sio)
+        >>> m = o.mimetype
+        >>> c = o.read ('content.xml')
+        >>> body = c.find (OOo_Tag ('office', 'body', m))
+        >>> for n in body.findall ('.//*') :
+        ...     zidx = n.get (OOo_Tag ('draw', 'z-index', m))
+        ...     if zidx :
+        ...         print ':'.join(split_tag (n.tag)), zidx
+        draw:frame 0
+        draw:rect 1
+        draw:frame 3
+        draw:rect 4
+        draw:frame 6
+        draw:rect 7
+        draw:frame 2
+        draw:frame 5
+        draw:frame 8
+        >>> for n in body.findall ('.//' + OOo_Tag ('text', 'p', m)) :
+        ...     if n.get (OOo_Tag ('text', 'style-name', m)) == name :
+        ...         print n.tag
+        {urn:oasis:names:tc:opendocument:xmlns:text:1.0}p
+        {urn:oasis:names:tc:opendocument:xmlns:text:1.0}p
+        >>> vset = './/' + OOo_Tag ('text', 'variable-set', m)
+        >>> for n in body.findall (vset) :
+        ...     if n.get (OOo_Tag ('text', 'name', m), None).endswith ('name') :
+        ...         name = n.get (OOo_Tag ('text', 'name', m))
+        ...         print name, ':', n.text
+        firstname : Erika
+        lastname : Nobody
+        firstname : Eric
+        lastname : Wizard
+        firstname : Hugo
+        lastname : Testman
+        firstname : Erika
+        lastname : Nobody
+        firstname : Eric
+        lastname : Wizard
+        firstname : Hugo
+        lastname : Testman
+        >>> for n in body.findall ('.//' + OOo_Tag ('draw', 'frame', m)) :
+        ...     print n.get (OOo_Tag ('draw', 'name', m)),
+        ...     print n.get (OOo_Tag ('text', 'anchor-page-number', m))
+        Frame1 1
+        Frame2 2
+        Frame3 3
+        Frame4 None
+        Frame5 None
+        Frame6 None
+        >>> for n in body.findall ('.//' + OOo_Tag ('text', 'section', m)) :
+        ...     print n.get (OOo_Tag ('text', 'name', m))
+        Section1
+        Section2
+        Section3
+        Section4
+        Section5
+        Section6
+        Section7
+        Section8
+        Section9
+        Section10
+        Section11
+        Section12
+        Section13
+        Section14
+        Section15
+        Section16
+        Section17
+        Section18
+        >>> for n in body.findall ('.//' + OOo_Tag ('table', 'table', m)) :
+        ...     print n.get (OOo_Tag ('table', 'name', m))
+        Table1
+        Table2
+        Table3
+        >>> r = o.read ('meta.xml')
+        >>> meta = r.find ('.//' + OOo_Tag ('meta', 'document-statistic', m))
+        >>> for i in meta_counts :
+        ...     print i, repr (meta.get (OOo_Tag ('meta', i, m)))
+        character-count '951'
+        image-count '0'
+        object-count '0'
+        page-count '3'
+        paragraph-count '53'
+        table-count '3'
+        word-count '162'
+        >>> o.close ()
+        >>> sio = StringIO ()
+        >>> o   = OOoPy (infile = 'testfiles/carta.odt', outfile = sio)
+        >>> t = Transformer (
+        ...     o.mimetype
+        ...   , get_meta (o.mimetype)
+        ...   , Transforms.Addpagebreak_Style ()
+        ...   , Transforms.Mailmerge
+        ...     ( iterator = 
+        ...         ( dict
+        ...             ( Spett = "Spettabile"
+        ...             , contraente = "First person"
+        ...             , indirizzo = "street? 1"
+        ...             , tipo = "racc. A.C."
+        ...             , luogo = "Varese"
+        ...             , oggetto = "Saluti"
+        ...             )
+        ...         , dict
+        ...             ( Spett = "Egregio"
+        ...             , contraente = "Second Person"
+        ...             , indirizzo = "street? 2"
+        ...             , tipo = "Raccomandata"
+        ...             , luogo = "Gavirate"
+        ...             , oggetto = "Ossequi"
+        ...             )
+        ...         )
+        ...     )
+        ...   , renumber_all (o.mimetype)
+        ...   , set_meta (o.mimetype)
+        ...   , Transforms.Fix_OOo_Tag ()
+        ...   )
+        >>> t.transform(o)
+        >>> o.close()
+        >>> ov  = sio.getvalue ()
+        >>> f   = open ("carta-out.odt", "wb")
+        >>> f.write (ov)
+        >>> f.close ()
+        >>> o = OOoPy (infile = sio)
+        >>> m = o.mimetype
+        >>> c = o.read ('content.xml')
+        >>> body = c.find (OOo_Tag ('office', 'body', mimetype = m))
+        >>> vset = './/' + OOo_Tag ('text', 'variable-set', mimetype = m)
+        >>> for node in body.findall (vset) :
+        ...     name = node.get (OOo_Tag ('text', 'name', m))
+        ...     print name, ':', node.text
+        Spett : Spettabile
+        contraente : First person
+        indirizzo : street? 1
+        Spett : Egregio
+        contraente : Second Person
+        indirizzo : street? 2
+        tipo : racc. A.C.
+        luogo : Varese
+        oggetto : Saluti
+        tipo : Raccomandata
+        luogo : Gavirate
+        oggetto : Ossequi
+        >>> sio = StringIO ()
+        >>> o   = OOoPy (infile = 'testfiles/test.odt', outfile = sio)
+        >>> tf  = ('testfiles/test.odt', 'testfiles/rechng.odt')
+        >>> t   = Transformer (
+        ...       o.mimetype
+        ...     , get_meta (o.mimetype)
+        ...     , Transforms.Concatenate (*tf)
+        ...     , renumber_all (o.mimetype)
+        ...     , set_meta (o.mimetype)
+        ...     , Transforms.Fix_OOo_Tag ()
+        ...     )
+        >>> t.transform (o)
+        >>> for i in meta_counts :
+        ...     print i, repr (t [':'.join (('Set_Attribute', i))])
+        character-count '1131'
+        image-count '0'
+        object-count '0'
+        page-count '3'
+        paragraph-count '80'
+        table-count '2'
+        word-count '159'
+        >>> o.close ()
+        >>> ov  = sio.getvalue ()
+        >>> f   = open ("testout3.odt", "wb")
+        >>> f.write (ov)
+        >>> f.close ()
+        >>> o = OOoPy (infile = sio)
+        >>> m = o.mimetype
+        >>> c = o.read ('content.xml')
+        >>> s = o.read ('styles.xml')
+        >>> for n in c.findall ('./*/*') :
+        ...     name = n.get (OOo_Tag ('style', 'name', m))
+        ...     if name :
+        ...         parent = n.get (OOo_Tag ('style', 'parent-style-name', m))
+        ...         print '"%s", "%s"' % (name, parent)
+        "Tahoma1", "None"
+        "Bitstream Vera Sans", "None"
+        "Tahoma", "None"
+        "Nimbus Roman No9 L", "None"
+        "Courier New", "None"
+        "Arial Black", "None"
+        "New Century Schoolbook", "None"
+        "Times New Roman", "None"
+        "Arial", "None"
+        "Helvetica", "None"
+        "Table1", "None"
+        "Table1.A", "None"
+        "Table1.A1", "None"
+        "Table1.E1", "None"
+        "Table1.A2", "None"
+        "Table1.E2", "None"
+        "P1", "None"
+        "fr1", "Frame"
+        "fr2", "Frame"
+        "Sect1", "None"
+        "gr1", "None"
+        "P2", "Standard"
+        "Standard_Concat", "None"
+        "Concat_P1", "Concat_Frame_20_contents"
+        "Concat_P2", "Concat_Frame_20_contents"
+        "P3", "Concat_Frame_20_contents"
+        "P4", "Concat_Standard"
+        "P5", "Concat_Standard"
+        "P6", "Concat_Frame_20_contents"
+        "P7", "Concat_Frame_20_contents"
+        "P8", "Concat_Frame_20_contents"
+        "P9", "Concat_Frame_20_contents"
+        "P10", "Concat_Frame_20_contents"
+        "P11", "Concat_Frame_20_contents"
+        "P12", "Concat_Frame_20_contents"
+        "P14", "Concat_Standard"
+        "P15", "Concat_Standard"
+        "P16", "Concat_Standard"
+        "P17", "Concat_Standard"
+        "P18", "Concat_Standard"
+        "P19", "Concat_Standard"
+        "P20", "Concat_Standard"
+        "P21", "Concat_Standard"
+        "P22", "Concat_Standard"
+        "P23", "Concat_Standard"
+        "Concat_fr1", "Frame"
+        "Concat_fr2", "Frame"
+        "fr3", "Frame"
+        "fr4", "Frame"
+        "fr5", "Frame"
+        "fr6", "Frame"
+        "Concat_gr1", "None"
+        "N0", "None"
+        "N2", "None"
+        "P14_Concat", "Concat_Standard"
+        >>> for n in c.findall ('.//' + OOo_Tag ('text', 'variable-decl', m)) :
+        ...     name = n.get (OOo_Tag ('text', 'name', m))
+        ...     print name
+        salutation
+        firstname
+        lastname
+        street
+        country
+        postalcode
+        city
+        date
+        invoice.invoice_no
+        invoice.abo.aboprice.abotype.description
+        address.salutation
+        address.title
+        address.firstname
+        address.lastname
+        address.function
+        address.street
+        address.country
+        address.postalcode
+        address.city
+        invoice.subscriber.salutation
+        invoice.subscriber.title
+        invoice.subscriber.firstname
+        invoice.subscriber.lastname
+        invoice.subscriber.function
+        invoice.subscriber.street
+        invoice.subscriber.country
+        invoice.subscriber.postalcode
+        invoice.subscriber.city
+        invoice.period_start
+        invoice.period_end
+        invoice.currency.name
+        invoice.amount
+        invoice.subscriber.initial
+        >>> for n in c.findall ('.//' + OOo_Tag ('text', 'sequence-decl', m)) :
+        ...     name = n.get (OOo_Tag ('text', 'name', m))
+        ...     print name
+        Illustration
+        Table
+        Text
+        Drawing
+        >>> for n in c.findall ('.//' + OOo_Tag ('text', 'p', m)) :
+        ...     name = n.get (OOo_Tag ('text', 'style-name', m))
+        ...     if not name or name.startswith ('Concat') :
+        ...         print ':'.join(split_tag (n.tag)), ">%s<" % name
+        text:p >None<
+        text:p >None<
+        text:p >Concat_P1<
+        text:p >Concat_P1<
+        text:p >Concat_P2<
+        text:p >Concat_P2<
+        text:p >Concat_P2<
+        text:p >Concat_P2<
+        text:p >Concat_P2<
+        text:p >Concat_P2<
+        text:p >Concat_P2<
+        text:p >Concat_P2<
+        text:p >Concat_P2<
+        text:p >Concat_P2<
+        text:p >Concat_Frame_20_contents<
+        text:p >None<
+        text:p >None<
+        text:p >None<
+        >>> for n in c.findall ('.//' + OOo_Tag ('draw', 'frame', m)) :
+        ...     attrs = 'name', 'style-name', 'z-index'
+        ...     attrs = [n.get (OOo_Tag ('draw', i, m)) for i in attrs]
+        ...     attrs.append (n.get (OOo_Tag ('text', 'anchor-page-number', m)))
+        ...     print attrs
+        ['Frame1', 'fr1', '0', '1']
+        ['Frame2', 'fr1', '3', '2']
+        ['Frame3', 'Concat_fr1', '6', '3']
+        ['Frame4', 'Concat_fr2', '7', '3']
+        ['Frame5', 'fr3', '8', '3']
+        ['Frame6', 'Concat_fr1', '9', '3']
+        ['Frame7', 'fr4', '10', '3']
+        ['Frame8', 'fr4', '11', '3']
+        ['Frame9', 'fr4', '12', '3']
+        ['Frame10', 'fr4', '13', '3']
+        ['Frame11', 'fr4', '14', '3']
+        ['Frame12', 'fr4', '15', '3']
+        ['Frame13', 'fr5', '16', '3']
+        ['Frame14', 'fr4', '18', '3']
+        ['Frame15', 'fr4', '19', '3']
+        ['Frame16', 'fr4', '20', '3']
+        ['Frame17', 'fr6', '17', '3']
+        ['Frame18', 'fr4', '23', '3']
+        ['Frame19', 'fr2', '2', None]
+        ['Frame20', 'fr2', '5', None]
+        >>> for n in c.findall ('.//' + OOo_Tag ('text', 'section', m)) :
+        ...     attrs = 'name', 'style-name'
+        ...     attrs = [n.get (OOo_Tag ('text', i, m)) for i in attrs]
+        ...     print attrs
+        ['Section1', 'Sect1']
+        ['Section2', 'Sect1']
+        ['Section3', 'Sect1']
+        ['Section4', 'Sect1']
+        ['Section5', 'Sect1']
+        ['Section6', 'Sect1']
+        ['Section7', 'Sect1']
+        ['Section8', 'Sect1']
+        ['Section9', 'Sect1']
+        ['Section10', 'Sect1']
+        ['Section11', 'Sect1']
+        ['Section12', 'Sect1']
+        ['Section13', 'Sect1']
+        ['Section14', 'Sect1']
+        ['Section15', 'Sect1']
+        ['Section16', 'Sect1']
+        ['Section17', 'Sect1']
+        ['Section18', 'Sect1']
+        ['Section19', 'Sect1']
+        ['Section20', 'Sect1']
+        ['Section21', 'Sect1']
+        ['Section22', 'Sect1']
+        ['Section23', 'Sect1']
+        ['Section24', 'Sect1']
+        ['Section25', 'Sect1']
+        ['Section26', 'Sect1']
+        ['Section27', 'Sect1']
+        ['Section28', 'Sect1']
+        ['Section29', 'Sect1']
+        ['Section30', 'Sect1']
+        ['Section31', 'Sect1']
+        ['Section32', 'Sect1']
+        ['Section33', 'Sect1']
+        >>> for n in c.findall ('.//' + OOo_Tag ('draw', 'rect', m)) :
+        ...     attrs = 'style-name', 'text-style-name', 'z-index'
+        ...     attrs = [n.get (OOo_Tag ('draw', i, m)) for i in attrs]
+        ...     attrs.append (n.get (OOo_Tag ('text', 'anchor-page-number', m)))
+        ...     print attrs
+        ['gr1', 'P1', '1', '1']
+        ['gr1', 'P1', '4', '2']
+        >>> for n in c.findall ('.//' + OOo_Tag ('draw', 'line', m)) :
+        ...     attrs = 'style-name', 'text-style-name', 'z-index'
+        ...     attrs = [n.get (OOo_Tag ('draw', i, m)) for i in attrs]
+        ...     print attrs
+        ['Concat_gr1', 'P1', '24']
+        ['Concat_gr1', 'P1', '22']
+        ['Concat_gr1', 'P1', '21']
+        >>> for n in s.findall ('.//' + OOo_Tag ('style', 'style', m)) :
+        ...     if n.get (OOo_Tag ('style', 'name', m)).startswith ('Co') :
+        ...         attrs = 'name', 'display-name', 'class', 'family'
+        ...         attrs = [n.get (OOo_Tag ('style', i, m)) for i in attrs]
+        ...         print attrs
+        ...         props = n.find ('./' + OOo_Tag ('style', 'properties', m))
+        ...         if props is not None and len (props) :
+        ...             props [0].tag
+        ['Concat_Standard', None, 'text', 'paragraph']
+        ['Concat_Text_20_body', 'Concat Text body', 'text', 'paragraph']
+        ['Concat_List', None, 'list', 'paragraph']
+        ['Concat_Caption', None, 'extra', 'paragraph']
+        ['Concat_Frame_20_contents', 'Concat Frame contents', 'extra', 'paragraph']
+        ['Concat_Index', None, 'index', 'paragraph']
+        >>> for n in c.findall ('.//*') :
+        ...     zidx = n.get (OOo_Tag ('draw', 'z-index', m))
+        ...     if zidx :
+        ...         print ':'.join(split_tag (n.tag)), zidx
+        draw:frame 0
+        draw:rect 1
+        draw:frame 3
+        draw:rect 4
+        draw:frame 6
+        draw:frame 7
+        draw:frame 8
+        draw:frame 9
+        draw:frame 10
+        draw:frame 11
+        draw:frame 12
+        draw:frame 13
+        draw:frame 14
+        draw:frame 15
+        draw:frame 16
+        draw:frame 18
+        draw:frame 19
+        draw:frame 20
+        draw:frame 17
+        draw:frame 23
+        draw:line 24
+        draw:frame 2
+        draw:frame 5
+        draw:line 22
+        draw:line 21
+        >>> from os import system
+        >>> system ('python bin/ooo_fieldreplace -i testfiles/test.odt '
+        ...         '-o testout.odt '
+        ...         'salutation=Frau firstname=Erika lastname=Musterfrau '
+        ...         'country=D postalcode=00815 city=Niemandsdorf '
+        ...         'street="Beispielstrasse 42"')
+        0
+        >>> o = OOoPy (infile = 'testout.odt')
+        >>> c = o.read ('content.xml')
+        >>> m = o.mimetype
+        >>> body = c.find (OOo_Tag ('office', 'body', mimetype = m))
+        >>> vset = './/' + OOo_Tag ('text', 'variable-set', mimetype = m)
+        >>> for node in body.findall (vset) :
+        ...     name = node.get (OOo_Tag ('text', 'name', m))
+        ...     print name, ':', node.text
+        salutation : Frau
+        firstname : Erika
+        lastname : Musterfrau
+        street : Beispielstrasse 42
+        country : D
+        postalcode : 00815
+        city : Niemandsdorf
+        salutation : Frau
+        firstname : Erika
+        lastname : Musterfrau
+        street : Beispielstrasse 42
+        country : D
+        postalcode : 00815
+        city : Niemandsdorf
+        >>> o.close ()
+        >>> system ("bin/ooo_mailmerge -o testout.odt -d'|' "
+        ...         "testfiles/carta.odt testfiles/x.csv")
+        0
+        >>> o = OOoPy (infile = 'testout.odt')
+        >>> m = o.mimetype
+        >>> c = o.read ('content.xml')
+        >>> body = c.find (OOo_Tag ('office', 'body', mimetype = m))
+        >>> vset = './/' + OOo_Tag ('text', 'variable-set', mimetype = m)
+        >>> for node in body.findall (vset) :
+        ...     name = node.get (OOo_Tag ('text', 'name', m))
+        ...     print name, ':', node.text
+        Spett : Spettabile
+        contraente : First person
+        indirizzo : street? 1
+        Spett : Egregio
+        contraente : Second Person
+        indirizzo : street? 2
+        tipo : racc. A.C.
+        luogo : Varese
+        oggetto : Saluti
+        tipo : Raccomandata
+        luogo : Gavirate
+        oggetto : Ossequi
+        >>> o.close ()
+        >>> infile = 'testfiles/testenum.odt'
+        >>> o   = OOoPy (infile = infile, outfile = 'xyzzy.odt')
+        >>> t   = Transformer (
+        ...       o.mimetype
+        ...     , get_meta (o.mimetype)
+        ...     , Transforms.Addpagebreak_Style ()
+        ...     , Transforms.Mailmerge
+        ...       ( iterator = 
+        ...         ( dict (firstname = 'Erika', lastname = 'Nobody')
+        ...         , dict (firstname = 'Eric',  lastname = 'Wizard')
+        ...         , cb
+        ...         )
+        ...       )
+        ...     , renumber_all (o.mimetype)
+        ...     , set_meta (o.mimetype)
+        ...     , Transforms.Fix_OOo_Tag ()
+        ...     )
+        >>> t.transform (o)
+        >>> o.close ()
+        >>> o = OOoPy (infile = 'xyzzy.odt')
+        >>> m = o.mimetype
+        >>> c = o.read ('content.xml')
+        >>> body = c.find (OOo_Tag ('office', 'body', mimetype = m))
+        >>> textlist = './/' + OOo_Tag ('text', 'list', m)
+        >>> for node in body.findall (textlist) :
+        ...     id = node.get (OOo_Tag ('xml', 'id', m))
+        ...     print 'xml:id', ':', id
+        xml:id : list1
+        xml:id : list2
+        xml:id : list3
+        >>> o = OOoPy (infile = 'testfiles/page1.odt', outfile = 'xyzzy.odt')
+        >>> m = o.mimetype
+        >>> t = Transformer (
+        ...       o.mimetype
+        ...     , get_meta (o.mimetype)
+        ...     , Transforms.Concatenate ('testfiles/page2.odt')
+        ...     , renumber_all (o.mimetype)
+        ...     , set_meta (o.mimetype)
+        ...     , Transforms.Fix_OOo_Tag ()
+        ...     , Transforms.Manifest_Append ()
+        ...     )
+        >>> t.transform (o)
+        >>> o.close ()
+        >>> o = OOoPy (infile = 'xyzzy.odt')
+        >>> c = o.read ('META-INF/manifest.xml')
+        >>> for node in c.getroot () :
+        ...     fe = node.get (OOo_Tag ('manifest', 'full-path', m))
+        ...     print fe
+        /
+        Pictures/10000000000000C80000007941B1A419.jpg
+        Pictures/10000000000000DC000000B02E191635.jpg
+        Pictures/10000000000000DC000000A337377AAA.jpg
+        meta.xml
+        settings.xml
+        content.xml
+        Thumbnails/thumbnail.png
+        layout-cache
+        manifest.rdf
+        Configurations2/accelerator/current.xml
+        Configurations2/
+        styles.xml
+        >>> for f in o.izip.infolist () :
+        ...     print f.filename
+        mimetype
+        settings.xml
+        META-INF/manifest.xml
+        content.xml
+        meta.xml
+        styles.xml
+        Pictures/10000000000000C80000007941B1A419.jpg
+        Pictures/10000000000000DC000000B02E191635.jpg
+        Pictures/10000000000000DC000000A337377AAA.jpg
+        Thumbnails/thumbnail.png
+        layout-cache
+        manifest.rdf
+        Configurations2/images/Bitmaps/
+        Configurations2/accelerator/current.xml
+        >>> sio = StringIO ()
+        >>> o = OOoPy (infile = 'testfiles/tbl_first.odt', outfile = sio)
+        >>> m = o.mimetype
+        >>> t = Transformer (
+        ...       o.mimetype
+        ...     , get_meta (o.mimetype)
+        ...     , Transforms.Concatenate ('testfiles/tbl_second.odt')
+        ...     , renumber_all (o.mimetype)
+        ...     , set_meta (o.mimetype)
+        ...     , Transforms.Fix_OOo_Tag ()
+        ...     , Transforms.Manifest_Append ()
+        ...     )
+        >>> t.transform (o)
+        >>> o.close ()
+        >>> o = OOoPy (infile = sio)
+        >>> c = o.read ('content.xml')
+        >>> body = c.find (OOo_Tag ('office', 'body', mimetype = m))
+        >>> tbls = './/' + OOo_Tag ('table', 'table', mimetype = m)
+        >>> for table in body.findall (tbls) :
+        ...     name = table.get (OOo_Tag ('table', 'style-name', mimetype = m))
+        ...     if name :
+        ...         print name
+        ...     for t in table.findall ('.//') :
+        ...         name = t.get (OOo_Tag ('table', 'style-name', mimetype = m))
+        ...         if name :
+        ...             print name
+        Tabella1
+        Tabella1.A
+        Tabella1.A1
+        Tabella1.B1
+        Tabella1.A2
+        Tabella1.B2
+        Tabella1
+        Tabella1.A
+        Tabella1.A1
+        Tabella1.B1
+        Tabella1.A2
+        Tabella1.B2
+    """
+
+    def __init__ (self, mimetype, *tf) :
+        assert (mimetype in mimetypes)
+        self.mimetype     = mimetype
+        self.transforms   = {}
+        for t in tf :
+            self.insert (t)
+        self.dictionary   = {}
+        self.has_key      = self.dictionary.has_key
+        self.__contains__ = self.has_key
+        # 2-tuples of filename, content
+        self.appendfiles  = []
+    # end def __init__
+
+    def insert (self, transform) :
+        """Insert a new transform"""
+        t = transform
+        if t.prio not in self.transforms :
+            self.transforms [t.prio] = []
+        self.transforms [t.prio].append (t)
+        t.register (self)
+    # end def append
+
+    def transform (self, ooopy) :
+        """
+            Apply all the transforms in priority order.
+            Priority order is global over all transforms.
+        """
+        self.trees = {}
+        for f in files :
+            self.trees [f] = ooopy.read (f)
+        #self.dictionary = {} # clear dict when transforming another ooopy
+        prios = self.transforms.keys ()
+        prios.sort ()
+        for p in prios :
+            for t in self.transforms [p] :
+                t.apply_all (self.trees)
+        for e in self.trees.itervalues () :
+            e.write ()
+        for fname, fcontent in self.appendfiles :
+            e.ooopy.append_file (fname, fcontent)
+    # end def transform
+
+    def __getitem__ (self, key) :
+        return self.dictionary [key]
+    # end def __getitem__
+
+    def __setitem__ (self, key, value) :
+        self.dictionary [key] = value
+    # end def __setitem__
+# end class Transformer
diff --git a/ooopy/Transforms.py b/ooopy/Transforms.py
new file mode 100644
index 000000000..50a6c0db8
--- /dev/null
+++ b/ooopy/Transforms.py
@@ -0,0 +1,1237 @@
+#!/usr/bin/env python
+# -*- coding: iso-8859-1 -*-
+# Copyright (C) 2005-14 Dr. Ralf Schlatterbeck Open Source Consulting.
+# Reichergasse 131, A-3411 Weidling.
+# Web: http://www.runtux.com Email: office@runtux.com
+# All rights reserved
+# ****************************************************************************
+#
+# This library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Library General Public License as
+# published by the Free Software Foundation; either version 2 of the
+# License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Library General Public License for more details.
+#
+# You should have received a copy of the GNU Library General Public
+# License along with this program; if not, write to the Free Software
+# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+# ****************************************************************************
+
+from __future__              import absolute_import
+
+import time
+import re
+try :
+    from xml.etree.ElementTree   import dump, SubElement, Element, tostring
+except ImportError :
+    from elementtree.ElementTree import dump, SubElement, Element, tostring
+from copy                    import deepcopy
+from ooopy.OOoPy             import OOoPy, autosuper
+from ooopy.Transformer       import files, split_tag, OOo_Tag, Transform
+from ooopy.Transformer       import mimetypes, namespace_by_name
+from ooopy.Version           import VERSION
+
+# counts in meta.xml
+meta_counts = \
+    ( 'character-count', 'image-count', 'object-count', 'page-count'
+    , 'paragraph-count', 'table-count', 'word-count'
+    )
+
+class Access_Attribute (autosuper) :
+    """ For performance reasons we do not specify a separate transform
+        for each attribute-read or -change operation. Instead we define
+        all the attribute accesses we want to perform as objects that
+        follow the attribute access api and apply them all using an
+        Attribute_Access in one go.
+    """
+
+    def __init__ (self, key = None, prefix = None, ** kw) :
+        self.__super.__init__ (key = key, prefix = prefix, **kw)
+        self.key = key
+        if key :
+            if not prefix :
+                prefix   = self.__class__.__name__
+            self.key = ':'.join ((prefix, key))
+    # end def __init__
+
+    def register (self, transformer) :
+        self.transformer = transformer
+    # end def register
+
+    def use_value (self, oldval = None) :
+        """ Can change the given value by returning the new value. If
+            returning None or oldval the attribute stays unchanged.
+        """
+        raise NotImplementedError, "use_value must be defined in derived class"
+    # end def use_value
+
+# end class Access_Attribute
+
+class Get_Attribute (Access_Attribute) :
+    """ An example of not changing an attribute but only storing the
+        value in the transformer
+    """
+
+    def __init__ (self, tag, attr, key, transform = None, ** kw) :
+        self.__super.__init__ (key = key, **kw)
+        self.tag        = tag
+        self.attribute  = attr
+        self.transform  = transform
+    # end def __init__
+
+    def use_value (self, oldval = None) :
+        self.transformer [self.key] = oldval
+        return None
+    # end def use_value
+
+# end def Get_Attribute
+
+class Get_Max (Access_Attribute) :
+    """ Get the maximum value of an attribute """
+
+    def __init__ (self, tag, attr, key, transform = None, ** kw) :
+        self.__super.__init__ (key = key, **kw)
+        self.tag        = tag
+        self.attribute  = attr
+        self.transform  = transform
+    # end def __init__
+
+    def register (self, transformer) :
+        self.__super.register (transformer)
+        self.transformer [self.key] = -1
+    # end def register
+
+    def use_value (self, oldval = None) :
+        if  self.transformer [self.key] < oldval :
+            self.transformer [self.key] = oldval
+        return None
+    # end def use_value
+
+# end def Get_Max
+
+class Renumber (Access_Attribute) :
+    """ Specifies a renumbering transform. OOo has a 'name' attribute
+        for several different tags, e.g., tables, frames, sections etc.
+        These names must be unique in the whole document. OOo itself
+        solves this by appending a unique number to a basename for each
+        element, e.g., sections are named 'Section1', 'Section2', ...
+        Renumber transforms can be applied to correct the numbering
+        after operations that destroy the unique numbering, e.g., after
+        a mailmerge where the same document is repeatedly appended.
+
+        The force parameter specifies if the new renumbered name should
+        be inserted even if the attribute in question does not exist.
+    """
+
+    def __init__ \
+        (self, tag, name = None, attr = None, start = 1, force = False) :
+        self.__super.__init__ ()
+        tag_ns, tag_name = split_tag (tag)
+        self.tag_ns      = tag_ns
+        self.tag         = tag
+        self.name        = name or tag_name [0].upper () + tag_name [1:]
+        self.num         = start
+        self.force       = force
+        self.attribute   = attr
+    # end def __init__
+
+    def register (self, transformer) :
+        self.__super.register (transformer)
+        if not self.attribute :
+            self.attribute = OOo_Tag (self.tag_ns, 'name', transformer.mimetype)
+    # end def register
+
+    def use_value (self, oldval = None) :
+        if oldval is None and not self.force :
+            return
+        name = "%s%d" % (self.name, self.num)
+        self.num += 1
+        return name
+    # end def use_value
+
+# end class Renumber
+
+class Set_Attribute (Access_Attribute) :
+    """
+        Similar to the renumbering transform in that we are assigning
+        new values to some attributes. But in this case we give keys
+        into the Transformer dict to replace some tag attributes.
+    """
+
+    def __init__ \
+        ( self
+        , tag
+        , attr
+        , key       = None
+        , transform = None
+        , value     = None
+        , oldvalue  = None
+        , ** kw
+        ) :
+        self.__super.__init__ (key = key, ** kw)
+        self.tag        = tag
+        self.attribute  = attr
+        self.transform  = transform
+        self.value      = value
+        self.oldvalue   = oldvalue
+    # end def __init__
+
+    def use_value (self, oldval) :
+        if oldval is None :
+            return None
+        if self.oldvalue and oldval != self.oldvalue :
+            return None
+        if self.key and self.transformer.has_key (self.key) :
+            return str (self.transformer [self.key])
+        return self.value
+    # end def use_value
+
+# end class Set_Attribute
+
+def set_attributes_from_dict (tag, attr, d) :
+    """ Convenience function: iterate over a dict and return a list of
+        Set_Attribute objects specifying replacement of attributes in
+        the dictionary
+    """
+    return [Set_Attribute (tag, attr, oldvalue = k, value = v)
+            for k,v in d.iteritems ()
+           ]
+# end def set_attributes_from_dict
+
+class Reanchor (Access_Attribute) :
+    """
+        Similar to the renumbering transform in that we are assigning
+        new values to some attributes. But in this case we want to
+        relocate objects that are anchored to a page.
+    """
+
+    def __init__ (self, offset, tag, attr = None) :
+        self.__super.__init__ ()
+        self.offset     = int (offset)
+        self.tag        = tag
+        self.attribute  = attr
+    # end def __init__
+
+    def register (self, transformer) :
+        self.__super.register (transformer)
+        if not self.attribute :
+            self.attribute = \
+                OOo_Tag ('text', 'anchor-page-number', transformer.mimetype)
+    # end def register
+
+    def use_value (self, oldval) :
+        if oldval is None :
+            return oldval
+        return "%d" % (int (oldval) + self.offset)
+    # end def use_value
+
+# end class Reanchor
+
+#
+# general transforms applicable to several .xml files
+#
+
+class Attribute_Access (Transform) :
+    """
+        Read or Change attributes in an OOo document.
+        Can be used for renumbering, moving anchored objects, etc.
+        Expects a list of attribute changer objects that follow the
+        attribute changer API. This API is very simple:
+
+        - Member function "use_value" returns the new value of an
+          attribute, or if unchanged the old value
+        - The attribute "tag" gives the tag for an element we are
+          searching
+        - The attribute "attribute" gives the name of the attribute we
+          want to read or change.
+        For examples of the attribute changer API, see Renumber and
+        Reanchor above.
+    """
+    filename = 'content.xml'
+    prio     = 110
+
+    def __init__ (self, attrchangers, filename = None, ** kw) :
+        self.filename     = filename or self.filename
+        self.attrchangers = {}
+        # allow several changers for a single tag
+        self.attrchangers [None] = []
+        self.changers = attrchangers
+        self.__super.__init__ (** kw)
+    # end def __init__
+
+    def register (self, transformer) :
+        """ Register transformer with all attrchangers. """
+        self.__super.register (transformer)
+        for r in self.changers :
+            if r.tag not in self.attrchangers :
+                self.attrchangers [r.tag] = []
+            self.attrchangers [r.tag].append (r)
+            r.register (transformer)
+    # end def register
+
+    def apply (self, root) :
+        """ Search for all tags for which we renumber and replace name """
+        for n in [root] + root.findall ('.//*') :
+            changers = \
+                self.attrchangers [None] + self.attrchangers.get (n.tag, [])
+            for r in changers :
+                nval = r.use_value (n.get (r.attribute))
+                if nval is not None :
+                    n.set (r.attribute, nval)
+    # end def apply
+
+# end class Attribute_Access
+
+#
+# META-INF/manifest.xml transforms
+#
+
+class Manifest_Append (Transform) :
+    """
+        The Transformer stores a list of files (and contents) to append.
+        These files are added to the archive later but need to be
+        present in the manifest, too.
+        The file list in the Transformer currently doesn't store a media
+        type (which is one of the parameters in the manifest), the
+        current application of this transform is to add pictures --
+        these don't have a media type in the files that were checked.
+        So for now we add an empty media type.
+    """
+    filename = 'META-INF/manifest.xml'
+    prio     = 1000
+
+    def apply (self, root) :
+        for n, node in enumerate (root) :
+            assert node.tag == self.oootag ('manifest', 'file-entry')
+            path = node.get (self.oootag ('manifest', 'full-path'))
+            assert (path)
+            if path == '/' :
+                break
+        else :
+            assert (not "The manifest needs a '/' entry")
+        for f, _ in self.transformer.appendfiles :
+            e = Element (self.oootag ('manifest', 'file-entry'))
+            e.attrib [self.oootag ('manifest', 'full-path')]  = f
+            e.attrib [self.oootag ('manifest', 'media-type')] = ''
+            root.insert (n + 1, e)
+            n += 1
+    # end def apply
+
+# end class Manifest_Append
+
+#
+# meta.xml transforms
+#
+
+class Editinfo (Transform) :
+    """
+        This is an example of modifying OOo meta info (edit information,
+        author, etc). We set some of the items (program that generated
+        the OOo file, modification time, number of edit cyles and overall
+        edit duration).  It's easy to subclass this transform and replace
+        the "replace" variable (pun intended) in the derived class.
+    """
+    filename = 'meta.xml'
+    prio     = 20
+    repl     = \
+        { ('meta', 'generator')        : 'OOoPy field replacement'
+        , ('dc',   'date')             : time.strftime ('%Y-%m-%dT%H:%M:%S')
+        , ('meta', 'editing-cycles')   : '0'
+        , ('meta', 'editing-duration') : 'PT0M0S'
+        }
+    replace  = {}
+    # iterate over all mimetypes, so this works for all known mimetypes
+    # of OOo documents.
+    for m in mimetypes :
+        for params, value in repl.iteritems () :
+            replace [OOo_Tag (mimetype = m, *params)] = value
+
+    def apply (self, root) :
+        for node in root.findall (self.oootag ('office', 'meta') + '/*') :
+            if self.replace.has_key (node.tag) :
+                node.text = self.replace [node.tag]
+    # end def apply
+# end class Editinfo
+
+#
+# settings.xml transforms
+#
+
+class Autoupdate (Transform) :
+    """
+        This is an example of modifying OOo settings. We set some of the
+        AutoUpdate configuration items in OOo to true. We also specify
+        that links should be updated when reading.
+
+        This was originally intended to make OOo correctly display fields
+        if they were changed with the Field_Replace below
+        (similar to pressing F9 after loading the generated document in
+        OOo). In particular I usually make spaces depend on field
+        contents so that I don't have spurious spaces if a field is
+        empty. Now it would be nice if OOo displayed the spaces correctly
+        after loading a document (It does update the fields before
+        printing, so this is only a cosmetic problem :-). This apparently
+        does not work. If anybody knows how to achieve this, please let
+        me know: mailto:rsc@runtux.com
+    """
+    filename = 'settings.xml'
+    prio     = 20
+
+    def apply (self, root) :
+        config = None
+        for config in root.findall \
+            ( self.oootag ('office', 'settings')
+            + '/'
+            + self.oootag ('config', 'config-item-set')
+            ) :
+            name = config.get (self.oootag ('config', 'name'))
+            if name == 'configuration-settings' :
+                break
+        for node in config.findall (self.oootag ('config', 'config-item')) :
+            name = node.get (self.oootag ('config', 'name'))
+            if name == 'LinkUpdateMode' :  # update when reading
+                node.text = '2'
+            # update fields when reading
+            if name == 'FieldAutoUpdate' or name == 'ChartAutoUpdate' :
+                node.text = 'true'
+    # end def apply
+# end class Autoupdate
+
+#
+# content.xml transforms
+#
+
+class Field_Replace (Transform) :
+    """
+        Takes a dict of replacement key-value pairs. The key is the name
+        of a variable in OOo. Additional replacement key-value pairs may
+        be specified in ** kw. Alternatively a callback mechanism for
+        variable name lookups is provided. The callback function is
+        given the name of a variable in OOo and is expected to return
+        the replacement value or None if the variable value should not
+        be replaced.
+    """
+    filename = 'content.xml'
+    prio     = 100
+
+    def __init__ (self, prio = None, replace = None, ** kw) :
+        """ replace is something behaving like a dict or something
+            callable for name lookups
+        """
+        self.__super.__init__ (prio, ** kw)
+        self.replace  = replace or {}
+        self.dict     = kw
+    # end def __init__
+
+    def apply (self, root) :
+        tbody = self.find_tbody (root)
+        for tag in 'variable-set', 'variable-get', 'variable-input' :
+            for node in tbody.findall ('.//' + self.oootag ('text', tag)) :
+                attr = 'name'
+                if tag == 'text-input' :
+                    attr = 'description'
+                name = node.get (self.oootag ('text', attr))
+                if callable (self.replace) :
+                    replace = self.replace (name)
+                    if replace :
+                        node.text = replace
+                elif name in self.replace :
+                    node.text = self.replace [name]
+                elif name in self.dict :
+                    node.text = self.dict    [name]
+    # end def apply
+# end class Field_Replace
+
+class Addpagebreak_Style (Transform) :
+    """
+        This transformation adds a new ad-hoc paragraph style to the
+        content part of the OOo document. This is needed to be able to
+        add new page breaks to an OOo document. Adding a new page break
+        is then a matter of adding an empty paragraph with the given page
+        break style.
+
+        We first look through all defined paragraph styles for
+        determining a new paragraph style number. Convention is P<num>
+        for paragraph styles. We search the highest number and use this
+        incremented by one for the new style to insert. Then we insert
+        the new style and store the resulting style name in the
+        transformer under the key class_name:stylename where class_name
+        is our own class name.
+    """
+    filename = 'content.xml'
+    prio     = 30
+    para     = re.compile (r'P([0-9]+)')
+
+    def apply (self, root) :
+        max_style = 0
+        styles = root.find (self.oootag ('office', 'automatic-styles'))
+        for s in styles.findall ('./' + self.oootag ('style', 'style')) :
+            m = self.para.match (s.get (self.oootag ('style', 'name'), ''))
+            if m :
+                num = int (m.group (1))
+                if num > max_style :
+                    max_style = num
+        stylename = 'P%d' % (max_style + 1)
+        new = SubElement \
+            ( styles
+            , self.oootag ('style', 'style')
+            , { self.oootag ('style', 'name')              : stylename
+              , self.oootag ('style', 'family')            : 'paragraph'
+              , self.oootag ('style', 'parent-style-name') : 'Standard'
+              }
+            )
+        SubElement \
+            ( new
+            , self.properties_tag
+            , { self.oootag ('fo', 'break-after') : 'page' }
+            )
+        self.set ('stylename', stylename)
+    # end def apply
+# end class Addpagebreak_Style
+
+class Addpagebreak (Transform) :
+    """
+        This transformation adds a page break to the last page of the OOo
+        text. This is needed, e.g., when doing mail-merge: We append a
+        page break to the tbody and then append the next page. This
+        transform needs the name of the paragraph style specifying the
+        page break style. Default is to use
+        'Addpagebreak_Style:stylename' as the key for
+        retrieving the page style. Alternatively the page style or the
+        page style key can be specified in the constructor.
+    """
+    filename = 'content.xml'
+    prio     = 50
+
+    def __init__ (self, stylename = None, stylekey = None, ** kw) :
+        self.__super.__init__ (** kw)
+        self.stylename = stylename
+        self.stylekey  = stylekey or 'Addpagebreak_Style:stylename'
+    # end def __init__
+
+    def apply (self, root) :
+        """append to tbody e.g., <text:p text:style-name="P4"/>"""
+        tbody     = self.find_tbody (root)
+        stylename = self.stylename or self.transformer [self.stylekey]
+        SubElement \
+            ( tbody
+            , self.oootag ('text', 'p')
+            , { self.oootag ('text', 'style-name') : stylename }
+            )
+    # end def apply
+# end class Addpagebreak
+
+class Fix_OOo_Tag (Transform) :
+    """
+        OOo writer conditions are attributes where the *value* is
+        prefixed by an XML namespace. If the ooow namespace declaration
+        is not in scope, all conditions will evaluate to false. I
+        consider this a bug (a violation of the ideas of XML) of OOo.
+        Nevertheless to make conditions work, we insert the ooow
+        namespace declaration into the top-level element.
+    """
+    filename = 'content.xml'
+    prio     = 10000
+
+    def apply (self, root) :
+        if self.mimetype == mimetypes [1] :
+            root.set ('xmlns:ooow', namespace_by_name [self.mimetype]['ooow'])
+    # end def apply
+# end class Fix_OOo_Tag
+
+class _Body_Concat (Transform) :
+    """ Various methods for modifying the tbody split into various pieces
+        that have to keep sequence in order to not confuse OOo.
+    """
+    ooo_sections  = {}
+    for m in mimetypes :
+        ooo_sections [m] = \
+            [ { OOo_Tag ('text', 'variable-decls',   m) : 1
+              , OOo_Tag ('text', 'sequence-decls',   m) : 1
+              , OOo_Tag ('text', 'user-field-decls', m) : 1
+              , OOo_Tag ('office', 'forms',          m) : 1
+              }
+            , { OOo_Tag ('draw', 'frame',            m) : 1
+              , OOo_Tag ('draw', 'rect',             m) : 1
+              , OOo_Tag ('draw', 'text-box',         m) : 1
+              }
+            ]
+
+    def _textbody (self) :
+        """
+            We use the office:body (OOo 1.X)/office:text (OOo 1.X)
+            element as a container for various transforms...
+        """
+        return Element (self.textbody_tag)
+    # end def _textbody
+
+    def _divide (self, textbody) :
+        """ Divide self.copy into parts that must keep their sequence.
+            We use another textbody tag for storing the parts...
+            Side-effect of setting self.copyparts is intended.
+        """
+        self.copyparts = self._textbody ()
+        self.copyparts.append (self._textbody ())
+        l = len (self.ooo_sections [self.mimetype])
+        idx = 0
+        for e in textbody :
+            while idx < l :
+                if e.tag in self.ooo_sections [self.mimetype][idx] :
+                    break
+                else :
+                    self.copyparts.append (self._textbody ())
+                    idx += 1
+            self.copyparts [-1].append (e)
+        declarations = self.copyparts [0]
+        del self.copyparts [0]
+        return declarations
+    # end def _divide
+
+    def divide_body (self, root) :
+        cont       = root
+        if cont.tag != self.oootag ('office', 'document-content') :
+            cont   = root.find  (self.oootag ('office', 'document-content'))
+        tbody      = cont.find  (self.oootag ('office', 'body'))
+        # OOo 2.X has an office:text inside office:body that contains
+        # the real text contents:
+        if self.mimetype == mimetypes [1] :
+            cont   = tbody
+            tbody  = cont.find (self.oootag ('office', 'text'))
+        idx        = cont [:].index (tbody)
+        self.tbody = cont [idx] = self._textbody ()
+        self.declarations = self._divide (tbody)
+        self.bodyparts    = self.copyparts
+    # end def divide_body
+
+    def append_declarations (self) :
+        for e in self.declarations :
+            self.tbody.append (e)
+    # end def append_declarations
+
+    def append_to_body (self, cp) :
+        for i in range (len (self.bodyparts)) :
+            for j in cp [i] :
+                self.bodyparts [i].append (j)
+    # end def append_to_body
+
+    def assemble_body (self) :
+        for p in self.bodyparts :
+            for e in p :
+                self.tbody.append (e)
+    # end def assemble_body
+
+    def _get_meta (self, var, classname = 'Get_Attribute', prefix = "") :
+        """ get page- and paragraph-count etc. meta-info """
+        return int (self.transformer [':'.join ((classname, prefix + var))])
+    # end def _get_meta
+
+    def _set_meta (self, var, value, classname = 'Set_Attribute', prefix = "") :
+        """ set page- and paragraph-count etc. meta-info """
+        self.transformer [':'.join ((classname, prefix + var))] = str (value)
+    # end def _set_meta
+# end class _Body_Concat
+
+class Mailmerge (_Body_Concat) :
+    """
+        This transformation is used to create a mailmerge document using
+        the current document as the template. In the constructor we get
+        an iterator that provides a data set for each item in the
+        iteration. Elements the iterator has to provide are either
+        something that follows the Mapping Type interface (it looks like
+        a dict) or something that is callable and can be used for
+        name-value lookups.
+
+        A precondition for this transform is the application of the
+        Addpagebreak_Style to guarantee that we know the style
+        for adding a page break to the current document. Alternatively
+        the stylename (or the stylekey if a different name should be used
+        for lookup in the current transformer) can be given in the
+        constructor.
+    """
+    filename = 'content.xml'
+    prio     = 60
+
+    def __init__ \
+        (self, iterator, stylename = None, stylekey = None, ** kw) :
+        self.__super.__init__ (** kw)
+        self.iterator  = iterator
+        self.stylename = stylename
+        self.stylekey  = stylekey
+    # end def __init__
+
+    def apply (self, root) :
+        """
+            Copy old tbody, create new empty one and repeatedly append the
+            new tbody.
+        """
+        pb = Addpagebreak \
+            ( stylename   = self.stylename
+            , stylekey    = self.stylekey
+            , transformer = self.transformer
+            )
+        zi = Attribute_Access \
+            ( (Get_Max (None, self.oootag ('draw', 'z-index'), 'z-index'),)
+            , transformer = self.transformer
+            )
+        zi.apply (root)
+
+        pagecount  = self._get_meta ('page-count')
+        z_index    = self._get_meta ('z-index', classname = 'Get_Max') + 1
+        ra         = Attribute_Access \
+            ( ( Reanchor (pagecount, self.oootag ('draw', 'text-box'))
+              , Reanchor (pagecount, self.oootag ('draw', 'rect'))
+              , Reanchor (pagecount, self.oootag ('draw', 'frame'))
+              , Reanchor (z_index, None, self.oootag ('draw', 'z-index'))
+              )
+            , transformer = self.transformer # transformer added
+            )
+        self.divide_body (root)
+        self.bodyparts = [self._textbody () for i in self.copyparts]
+
+        count = 0
+        for i in self.iterator :
+            count += 1
+            fr = Field_Replace (replace = i, transformer = self.transformer)
+            # add page break only to non-empty tbody
+            # reanchor only after the first mailmerge
+            if len (self.tbody) : # tbody non-empty (but existing!)
+                pb.apply (self.bodyparts [-1])
+                ra.apply (self.copyparts)
+            else :
+                self.append_declarations ()
+            cp = deepcopy (self.copyparts)
+            fr.apply (cp)
+            self.append_to_body (cp)
+        # new page-count:
+        for i in meta_counts :
+            self._set_meta (i, count * self._get_meta (i))
+        # we have added count-1 paragraphs, because each page-break is a
+        # paragraph.
+        p = 'paragraph-count'
+        self._set_meta \
+            (p, self._get_meta (p, classname = 'Set_Attribute') + (count - 1))
+        self.assemble_body ()
+    # end def apply
+# end class Mailmerge
+
+def tree_serialise (element, prefix = '', mimetype = mimetypes [1]) :
+    """ Serialise a style-element of an OOo document (e.g., a
+        style:font-decl, style:default-style, etc declaration).
+        We remove the name of the style and return something that is a
+        representation of the style element which can be used as a
+        dictionary key.
+        The serialisation format is a tuple containing the tag as the
+        first item, the attributes (as key,value pairs returned by
+        items()) as the second item and the following items are
+        serialisations of children.
+    """
+    attr = dict (element.attrib)
+    stylename = OOo_Tag ('style', 'name', mimetype)
+    if stylename in attr : del attr [stylename]
+    attr = attr.items ()
+    attr.sort ()
+    attr = tuple (attr)
+    serial = [prefix + element.tag, attr]
+    for e in element :
+        serial.append (tree_serialise (e, prefix, mimetype))
+    return tuple (serial)
+# end def tree_serialise
+
+class Concatenate (_Body_Concat) :
+    """
+        This transformation is used to create a new document from a
+        concatenation of several documents.  In the constructor we get a
+        list of documents to append to the master document.
+    """
+    prio     = 80
+    style_containers = {}
+    ref_attrs        = {}
+    for m in mimetypes :
+        style_containers.update \
+            ({ OOo_Tag ('office', 'font-decls',       m) : 1
+             , OOo_Tag ('office', 'font-face-decls',  m) : 1
+             , OOo_Tag ('office', 'styles',           m) : 1
+             , OOo_Tag ('office', 'automatic-styles', m) : 1
+             , OOo_Tag ('office', 'master-styles',    m) : 1
+            })
+        # Cross-references in OOo document:
+        # 'attribute' references another element with 'tag'.
+        # If attribute names change, we must replace references, too.
+        #     attribute                                :
+        #     tag
+        ref_attrs.update \
+            ({ OOo_Tag ('style', 'parent-style-name', m) :
+               OOo_Tag ('style', 'style',             m)
+             , OOo_Tag ('style', 'master-page-name',  m) :
+               OOo_Tag ('style', 'master-page',       m)
+             , OOo_Tag ('style', 'page-layout-name',  m) : # OOo 2.X
+               OOo_Tag ('style', 'page-layout',       m)
+             , OOo_Tag ('style', 'page-master-name',  m) :
+               OOo_Tag ('style', 'page-master',       m)
+             , OOo_Tag ('table', 'style-name',        m) :
+               OOo_Tag ('style', 'style',             m)
+             , OOo_Tag ('text',  'style-name',        m) :
+               OOo_Tag ('style', 'style',             m)
+             , OOo_Tag ('draw',  'style-name',        m) :
+               OOo_Tag ('style', 'style',             m)
+             , OOo_Tag ('draw',  'text-style-name',   m) :
+               OOo_Tag ('style', 'style',             m)
+            })
+    stylefiles = ['styles.xml', 'content.xml']
+    oofiles    = stylefiles + ['meta.xml']
+
+    body_decl_sections = ['variable-decl', 'sequence-decl']
+
+    def __init__ (self, * docs, ** kw) :
+        self.__super.__init__ (** kw)
+        self.docs = []
+        for doc in docs :
+            self.docs.append (OOoPy (infile = doc))
+            assert (self.docs [-1].mimetype == self.docs [0].mimetype)
+    # end def __init__
+
+    def apply_all (self, trees) :
+        assert (self.docs [0].mimetype == self.transformer.mimetype)
+        self.serialised = {}
+        self.stylenames = {}
+        self.namemaps   = [{}]
+        self.tab_depend = {}
+        for s in self.ref_attrs.itervalues () :
+            self.namemaps [0][s] = {}
+        self.body_decls = {}
+        for s in self.body_decl_sections :
+            self.body_decls [s] = {}
+        self.trees      = {}
+        for f in self.oofiles :
+            self.trees [f] = [trees [f].getroot ()]
+        self.sections   = {}
+        for f in self.stylefiles :
+            self.sections [f] = {}
+            for node in self.trees [f][0] :
+                self.sections [f][node.tag] = node
+        for d in self.docs :
+            self.namemaps.append ({})
+            for s in self.ref_attrs.itervalues () :
+                self.namemaps [-1][s] = {}
+            for f in self.oofiles :
+                self.trees [f].append (d.read (f).getroot ())
+        # append a pagebreak style, will be optimized away if duplicate
+        pbs = Addpagebreak_Style (transformer = self.transformer)
+        pbs.apply (self.trees ['content.xml'][0])
+        get_attr = []
+        for attr in meta_counts :
+            a = self.oootag ('meta', attr)
+            t = self.oootag ('meta', 'document-statistic')
+            get_attr.append (Get_Attribute (t, a, 'concat-' + attr))
+        zi = Attribute_Access \
+            ( (Get_Max (None, self.oootag ('draw', 'z-index'), 'z-index'),)
+            , transformer = self.transformer
+            )
+        zi.apply (self.trees ['content.xml'][0])
+        self.zi = Attribute_Access \
+            ( (Get_Max (None, self.oootag ('draw', 'z-index'), 'concat-z-index')
+              ,
+              )
+            , transformer = self.transformer
+            )
+        self.getmeta = Attribute_Access \
+            (get_attr, filename = 'meta.xml', transformer = self.transformer)
+        self.pbname = self.transformer \
+            [':'.join (('Addpagebreak_Style', 'stylename'))]
+        for s in self.trees ['styles.xml'][0].findall \
+            ('.//' + self.oootag ('style', 'default-style')) :
+            if s.get (self.oootag ('style', 'family')) == 'paragraph' :
+                default_style = s
+                break
+        self.default_properties = default_style.find \
+            ('./' + self.properties_tag)
+        self.set_pagestyle ()
+        for f in 'styles.xml', 'content.xml' :
+            self.style_merge (f)
+        self.body_concat ()
+        self.append_pictures ()
+    # end def apply_all
+
+    def apply_tab_correction (self, node) :
+        """ Check if node depends on a style which has corrected tabs
+            if yes, insert all the default tabs *after* the maximum tab
+            position in that style.
+        """
+        tab_stops = self.oootag ('style', 'tab-stops')
+        tab_stop  = self.oootag ('style', 'tab-stop')
+        tab_pos   = self.oootag ('style', 'position')
+        parent    = node.get (self.oootag ('style', 'parent-style-name'))
+        if parent in self.tab_depend :
+            for prop in node :
+                if prop.tag != self.properties_tag :
+                    continue
+                for sub in prop :
+                    if sub.tag == tab_stops :
+                        self.tab_depend [parent] = 1
+                        max = 0
+                        for ts in sub :
+                            assert (ts.tag == tab_stop)
+                            pos = float (ts.get (tab_pos) [:-2])
+                            if max < pos :
+                                max = pos
+                        self.insert_tabs (sub, max)
+    # end def apply_tab_correction
+
+    def _attr_rename (self, idx) :
+        r = sum \
+            ( [ set_attributes_from_dict (None, k, self.namemaps [idx][v])
+                for k,v in self.ref_attrs.iteritems ()
+              ]
+            , []
+            )
+        return Attribute_Access (r, transformer = self.transformer)
+    # end def _attr_rename
+
+    def body_concat (self) :
+        count = {}
+        for i in meta_counts :
+            count [i] = self._get_meta (i)
+        count ['z-index'] = self._get_meta \
+            ('z-index', classname = 'Get_Max') + 1
+        pb   = Addpagebreak \
+            (stylename = self.pbname, transformer = self.transformer)
+        self.divide_body (self.trees ['content.xml'][0])
+        self.body_decl (self.declarations, append = 0)
+        for idx in range (1, len (self.docs) + 1) :
+            meta    = self.trees ['meta.xml'][idx]
+            content = self.trees ['content.xml'][idx]
+            tbody   = self.find_tbody (content)
+            self.getmeta.apply (meta)
+            self.zi.apply      (tbody)
+
+            ra = Attribute_Access \
+              ( ( Reanchor 
+                    (count ['page-count'], self.oootag ('draw', 'text-box'))
+                , Reanchor
+                    (count ['page-count'], self.oootag ('draw', 'rect'))
+                , Reanchor
+                    (count ['page-count'], self.oootag ('draw', 'frame'))
+                , Reanchor
+                    (count ['z-index'], None, self.oootag ('draw', 'z-index'))
+                )
+              , transformer = self.transformer # transformer added
+              )
+            for i in meta_counts :
+                count [i] += self._get_meta (i, prefix = 'concat-')
+            count ['paragraph-count'] += 1
+            count ['z-index'] += self._get_meta \
+                ('z-index', classname = 'Get_Max', prefix = 'concat-') + 1
+            namemap = self.namemaps [idx][self.oootag ('style', 'style')]
+            tr      = self._attr_rename (idx)
+            pb.apply (self.bodyparts [-1])
+            tr.apply (content)
+            ra.apply (content)
+            declarations = self._divide (tbody)
+            self.body_decl (declarations)
+            self.append_to_body (self.copyparts)
+        self.append_declarations ()
+        self.assemble_body       ()
+        for i in meta_counts :
+            self._set_meta (i, count [i])
+    # end def body_concat
+
+    def body_decl (self, decl_section, append = 1) :
+        for sect in self.body_decl_sections :
+            s = self.declarations.find \
+                ('.//' + self.oootag ('text', sect + 's'))
+            d = self.body_decls [sect]
+            t = self.oootag ('text', sect)
+            for n in decl_section.findall ('.//' + t) :
+                name = n.get (self.oootag ('text', 'name'))
+                if name not in d :
+                    if append and s is not None :
+                        s.append (n)
+                    d [name] = 1
+    # end def body_decl
+
+    def insert_tabs (self, element, max = 0) :
+        """ Insert tab stops into the current element. Optionally after
+            max = the current maximum tab-position
+        """
+        dist_tag = self.oootag ('style', 'tab-stop-distance')
+        for k in range (1, len (self.tab_correct)) :
+            if self.tab_correct [-k].isdigit() :
+                break
+        l    = float (self.tab_correct [:-k])
+        unit = self.tab_correct [-k:]
+        for ts in range (35) :
+            pos = l * (ts + 1)
+            if pos > max :
+                SubElement \
+                    ( element
+                    , self.oootag ('style', 'tab-stop')
+                    , { self.oootag ('style', 'position') : '%s%s' % (pos, unit)
+                      }
+                    )
+    # end def insert_tabs
+
+    def merge_defaultstyle (self, default_style, node) :
+        assert default_style is not None
+        assert node is not None
+        proppath = './' + self.properties_tag
+        defprops = default_style.find (proppath)
+        props    = node.find          (proppath)
+        sn       = self.oootag ('style', 'name')
+        if props is None :
+            props = Element (self.properties_tag)
+        for k, v in defprops.attrib.iteritems () :
+            if self.default_properties.get (k) != v and not props.get (k) :
+                if k == self.oootag ('style', 'tab-stop-distance') :
+                    self.tab_correct = v
+                    self.tab_depend  = {node.get (sn) : 1}
+                    stps = SubElement \
+                        (props, self.oootag ('style', 'tab-stops'))
+                    self.insert_tabs (stps)
+                else :
+                    props.set (k,v)
+        if len (props) or props.attrib :
+            node.append (props)
+    # end def merge_defaultstyle
+
+    def _newname (self, key, oldname) :
+        stylenum = 0
+        if (key, oldname) not in self.stylenames :
+            self.stylenames [(key, oldname)] = 1
+            return oldname
+        newname = basename = 'Concat_%s' % oldname
+        while (key, newname) in self.stylenames :
+            stylenum += 1
+            newname = '%s%d' % (basename, stylenum)
+        self.stylenames [(key, newname)] = 1
+        return newname
+    # end def _newname
+
+    def set_pagestyle (self) :
+        """ For all documents: search for the first paragraph of the tbody
+            and get its style. Modify this style to include a reference
+            to the default page-style if it doesn't contain a reference
+            to a page style. Insert the new style into the list of
+            styles and modify the first paragraph to use the new page
+            style.
+            This procedure is necessary to make appended documents use
+            their page style instead of the master page style of the
+            first document.
+            FIXME: We should search the style hierarchy backwards for
+            the style of the first paragraph to check if there is a
+            reference to a page-style somewhere and not override the
+            page-style in this case. Otherwise appending complex
+            documents that use a different page-style for the first page
+            will not work if the page style is referenced in a style
+            from which the first paragraph style derives.
+        """
+        for idx in range (1, len (self.docs) + 1) :
+            croot  = self.trees  ['content.xml'][idx]
+            sroot  = self.trees  ['styles.xml'] [idx]
+            tbody  = self.find_tbody (croot)
+            para   = tbody.find  ('./' + self.oootag ('text', 'p'))
+            if para is None :
+                para = tbody.find  ('./' + self.oootag ('text', 'list'))
+            tsn    = self.oootag ('text', 'style-name')
+            sname  = para.get    (tsn)
+            styles = croot.find  (self.oootag ('office', 'automatic-styles'))
+            ost    = sroot.find  (self.oootag ('office', 'styles'))
+            mst    = sroot.find  (self.oootag ('office', 'master-styles'))
+            assert mst is not None and len (mst)
+            assert mst [0].tag == self.oootag ('style', 'master-page')
+            sntag  = self.oootag ('style', 'name')
+            master = mst [0].get (sntag)
+            mpn    = self.oootag ('style', 'master-page-name')
+            stytag = self.oootag ('style', 'style')
+            style  = None
+            for s in styles :
+                if s.tag == stytag :
+                    # Explicit references to default style converted to
+                    # explicit references to new page style.
+                    if s.get (mpn) == '' :
+                        s.set (mpn, master)
+                    if s.get (sntag) == sname :
+                        style = s
+            if style is None :
+                for s in ost :
+                    if s.tag == stytag and s.get (sntag) == sname :
+                        style = s
+                        break
+            if style is not None and not style.get (mpn) :
+                newstyle = deepcopy (style)
+                # Don't register with newname: will be rewritten later
+                # when appending. We assume that an original doc does
+                # not already contain a style with _Concat suffix.
+                newname = sname + '_Concat'
+                para.set (tsn, newname)
+                newstyle.set (self.oootag ('style', 'name'), newname)
+                newstyle.set (mpn,                            master)
+                styles.append (newstyle)
+    # end def set_pagestyle
+
+    def style_merge (self, oofile) :
+        """ Loop over all the docs in our document list and look up the
+            styles there. If a style matches an existing style in the
+            original document, register the style name for later
+            transformation if the style name in the original document
+            does not match the style name in the appended document.  If
+            no match is found, append style to master document and add
+            to serialisation. If the style name already exists in the
+            master document, a new style name is created. Names of
+            parent styles are changed when appending -- this means that
+            parent style names already have to be defined earlier in the
+            document.
+
+            If there is a reference to a parent style that is not yet
+            defined, and the parent style is defined later, it is
+            already too late, so an assertion is raised in this case.
+            OOo seems to ensure declaration order of dependent styles,
+            so this should not be a problem.
+        """
+        for idx in range (len (self.trees [oofile])) :
+            namemap = self.namemaps [idx]
+            root    = self.trees    [oofile][idx]
+            delnode = []
+            for nodeidx, node in enumerate (root) :
+                if node.tag not in self.style_containers :
+                    continue
+                prefix = ''
+                # font_decls may have same name in styles.xml and content.xml
+                if node.tag == self.font_decls_tag :
+                    prefix = oofile
+                default_style = None
+                for n in node :
+                    if  (   n.tag == self.oootag ('style', 'default-style')
+                        and (  n.get (self.oootag ('style', 'family'))
+                            == 'paragraph'
+                            )
+                        ) :
+                        default_style = n
+                    name     = n.get (self.oootag ('style', 'name'), None)
+                    if not name : continue
+                    if  (   idx != 0
+                        and name == 'Standard'
+                        and n.get (self.oootag ('style', 'class'))  == 'text'
+                        and (  n.get (self.oootag ('style', 'family'))
+                            == 'paragraph'
+                            )
+                        ) :
+                        self.merge_defaultstyle (default_style, n)
+                    self.apply_tab_correction (n)
+                    key = prefix + n.tag
+                    if key not in namemap : namemap [key] = {}
+                    tr = self._attr_rename (idx)
+                    tr.apply (n)
+                    sn  = tree_serialise (n, prefix, self.mimetype)
+                    if sn in self.serialised :
+                        newname = self.serialised [sn]
+                        if name != newname :
+                            assert \
+                                (  name not in namemap [key]
+                                or namemap [key][name] == newname
+                                )
+                            namemap [key][name] = newname
+                            # optimize original doc: remove duplicate styles
+                            if  not idx and node.tag != self.font_decls_tag :
+                                pass
+                                #delnode.append (nodeidx)
+                    else :
+                        newname = self._newname (key, name)
+                        self.serialised [sn] = newname
+                        if newname != name :
+                            n.set (self.oootag ('style', 'name'), newname)
+                            dn = self.oootag ('style', 'display-name')
+                            disp_name = n.get (dn)
+                            if disp_name :
+                                n.set (dn, 'Concat ' + disp_name)
+                            namemap [key][name] = newname
+                        if idx != 0 :
+                            self.sections [oofile][node.tag].append (n)
+                assert not delnode or not idx
+                delnode.reverse ()
+                for i in delnode :
+                    del node [i]
+    # end style_merge
+
+    def append_pictures (self) :
+        for doc in self.docs :
+            for f in doc.izip.infolist () :
+                if f.filename.startswith ('Pictures/') :
+                    self.transformer.appendfiles.append \
+                        ((f.filename, doc.izip.read (f.filename)))
+    # end def append_pictures
+            
+# end class Concatenate
+
+def renumber_frames (mimetype) :
+    return \
+        [ Renumber (OOo_Tag ('draw',  'text-box', mimetype), 'Frame') # OOo 1.X
+        , Renumber (OOo_Tag ('draw',  'frame',    mimetype), 'Frame') # OOo 2.X
+        ]
+# end def renumber_frames
+
+def renumber_sections (mimetype) :
+    return [Renumber (OOo_Tag ('text',  'section', mimetype))]
+# end def renumber_sections
+
+def renumber_tables (mimetype) :
+    return [Renumber (OOo_Tag ('table', 'table', mimetype))]
+# end def renumber_tables
+
+def renumber_images (mimetype) :
+    return [Renumber (OOo_Tag ('draw', 'image', mimetype))]
+# end def renumber_images
+
+def renumber_xml_id (mimetype) :
+    if mimetype == mimetypes [0] :
+        return []
+    xmlid = OOo_Tag ('xml', 'id', mimetype)
+    return [Renumber (OOo_Tag ('text', 'list', mimetype), 'list', xmlid)]
+# end def renumber_xml_id
+
+def renumber_all (mimetype) :
+    """ Factory function for all renumberings parameterized with
+        mimetype
+    """
+    return Attribute_Access \
+        ( renumber_frames   (mimetype)
+        + renumber_sections (mimetype)
+        + renumber_tables   (mimetype)
+        + renumber_images   (mimetype)
+        + renumber_xml_id   (mimetype)
+        ) 
+# end def renumber_all
+
+# used to have a separate Pagecount transform -- generalized to get
+# some of the meta information using an Attribute_Access transform
+# and set the same information later after possibly being updated by
+# other transforms. We use another naming convention here for storing
+# the info retrieved from the OOo document: We use the attribute name in
+# the meta-information to store (and later retrieve) the information.
+
+def get_meta (mimetype) :
+    """ Factory function for Attribute_Access to get all interesting
+        meta-data
+    """
+    get_attr = []
+    for attr in meta_counts :
+        a = OOo_Tag ('meta', attr, mimetype)
+        t = OOo_Tag ('meta', 'document-statistic', mimetype)
+        get_attr.append (Get_Attribute (t, a, attr))
+    return Attribute_Access (get_attr, prio =  20, filename = 'meta.xml')
+# end def get_meta
+
+def set_meta (mimetype) :
+    """ Factory function for Attribute_Access to set all interesting
+        meta-data
+    """
+    set_attr = []
+    for attr in meta_counts :
+        a = OOo_Tag ('meta', attr, mimetype)
+        t = OOo_Tag ('meta', 'document-statistic', mimetype)
+        set_attr.append (Set_Attribute (t, a, attr))
+    return Attribute_Access (set_attr, prio = 120, filename = 'meta.xml')
+# end def set_meta
diff --git a/ooopy/Version.py b/ooopy/Version.py
new file mode 100644
index 000000000..495ca242a
--- /dev/null
+++ b/ooopy/Version.py
@@ -0,0 +1 @@
+VERSION="1.11"
diff --git a/ooopy/__init__.py b/ooopy/__init__.py
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/ooopy/__init__.py
author	Étienne Loks <etienne.loks@iggdrasil.net>	2019-05-01 13:51:01 +0200
committer	Étienne Loks <etienne.loks@iggdrasil.net>	2019-06-17 13:21:28 +0200
commit	6e09fe95f07ea2c0a827beda5fc2f2a63751db7f (patch)
tree	d6452080600bd7fc377321d4dab58a7fc4333cb2 /ooopy
parent	ce4b7db76f21559b94943229bbeebd9c37c43f49 (diff)
download	Ishtar-6e09fe95f07ea2c0a827beda5fc2f2a63751db7f.tar.bz2 Ishtar-6e09fe95f07ea2c0a827beda5fc2f2a63751db7f.zip