diff options
Diffstat (limited to 'ooopy')
| -rw-r--r-- | ooopy/OOoPy.py | 430 | ||||
| -rw-r--r-- | ooopy/Transformer.py | 1462 | ||||
| -rw-r--r-- | ooopy/Transforms.py | 366 | ||||
| -rw-r--r-- | ooopy/Version.py | 2 | 
4 files changed, 487 insertions, 1773 deletions
| diff --git a/ooopy/OOoPy.py b/ooopy/OOoPy.py index 87e0b8110..aaa152606 100644 --- a/ooopy/OOoPy.py +++ b/ooopy/OOoPy.py @@ -1,5 +1,5 @@ -#!/usr/bin/env python -# -*- coding: iso-8859-1 -*- +#!/usr/bin/env python3 +# -*- coding: utf-8 -*-  # Copyright (C) 2005-14 Dr. Ralf Schlatterbeck Open Source Consulting.  # Reichergasse 131, A-3411 Weidling.  # Web: http://www.runtux.com Email: office@runtux.com @@ -21,297 +21,229 @@  # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.  # **************************************************************************** -from __future__              import absolute_import -from zipfile                 import ZipFile, ZIP_DEFLATED, ZipInfo -try : -    from StringIO            import StringIO -except ImportError : -    from io                  import StringIO -from datetime                import datetime -try : -    from xml.etree.ElementTree   import ElementTree, fromstring, _namespace_map -except ImportError : -    from elementtree.ElementTree import ElementTree, fromstring, _namespace_map -from tempfile                import mkstemp -from ooopy.Version           import VERSION -import os +from zipfile import ZipFile, ZIP_DEFLATED, ZipInfo +from io import BytesIO +from datetime import datetime +from xml.etree.ElementTree   import ElementTree, fromstring, _namespace_map + + +class _autosuper (type): +    def __init__(cls, name, bases, dict): +        super(_autosuper, cls).__init__(name, bases, dict) +        setattr(cls, "_%s__super" % name, super(cls)) + + +class autosuper(metaclass=_autosuper): +    def __init__(self, *args, **kw): +        self.__super.__init__() + + +files = [ +    'content.xml', 'styles.xml', 'meta.xml', 'settings.xml', +    'META-INF/manifest.xml' +] + +mimetypes = ['application/vnd.sun.xml.writer', +             'application/vnd.oasis.opendocument.text'] + +namespace_by_name = { +    mimetypes [0]: { +        'chart': "http://openoffice.org/2000/chart", +        'config': "http://openoffice.org/2001/config", +        'dc': "http://purl.org/dc/elements/1.1/", +        'dr3d': "http://openoffice.org/2000/dr3d", +        'draw': "http://openoffice.org/2000/drawing", +        'fo': "http://www.w3.org/1999/XSL/Format", +        'form': "http://openoffice.org/2000/form", +        'math': "http://www.w3.org/1998/Math/MathML", +        'meta': "http://openoffice.org/2000/meta", +        'number': "http://openoffice.org/2000/datastyle", +        'office': "http://openoffice.org/2000/office", +        'script': "http://openoffice.org/2000/script", +        'style': "http://openoffice.org/2000/style", +        'svg': "http://www.w3.org/2000/svg", +        'table': "http://openoffice.org/2000/table", +        'text': "http://openoffice.org/2000/text", +        'xlink': "http://www.w3.org/1999/xlink", +        'manifest': "http://openoffice.org/2001/manifest"}, +    mimetypes[1]: { +        'chart': "urn:oasis:names:tc:opendocument:xmlns:chart:1.0", +         'config': "urn:oasis:names:tc:opendocument:xmlns:config:1.0", +         'dc': "http://purl.org/dc/elements/1.1/", +         'dr3d': "urn:oasis:names:tc:opendocument:xmlns:dr3d:1.0", +         'draw': "urn:oasis:names:tc:opendocument:xmlns:drawing:1.0", +         'fo': "urn:oasis:names:tc:opendocument:xmlns:" "xsl-fo-compatible:1.0", +         'form': "urn:oasis:names:tc:opendocument:xmlns:form:1.0", +         'math': "http://www.w3.org/1998/Math/MathML", +         'meta': "urn:oasis:names:tc:opendocument:xmlns:meta:1.0", +         'number': "urn:oasis:names:tc:opendocument:xmlns:datastyle:1.0", +         'office': "urn:oasis:names:tc:opendocument:xmlns:office:1.0", +         'officeooo': "http://openoffice.org/2009/office", +         'script': "urn:oasis:names:tc:opendocument:xmlns:script:1.0", +         'style': "urn:oasis:names:tc:opendocument:xmlns:style:1.0", +         'svg': "urn:oasis:names:tc:opendocument:xmlns:svg-compatible:1.0", +         'table': "urn:oasis:names:tc:opendocument:xmlns:table:1.0", +         'text': "urn:oasis:names:tc:opendocument:xmlns:text:1.0", +         'xlink': "http://www.w3.org/1999/xlink", +         'manifest': "urn:oasis:names:tc:opendocument:xmlns:manifest:1.0", +         'tableooo': "http://openoffice.org/2009/table", +         'transformation': "http://www.w3.org/2003/g/data-view#", +         # OOo 1.X tags and some others: +         'ooo': "http://openoffice.org/2004/office", +         'ooow': "http://openoffice.org/2004/writer", +         'oooc': "http://openoffice.org/2004/calc", +         'o_dom': "http://www.w3.org/2001/xml-events", +         'o_xforms': "http://www.w3.org/2002/xforms", +         'xs': "http://www.w3.org/2001/XMLSchema", +         'xsi': "http://www.w3.org/2001/XMLSchema-instance", +         # predefined xml namespace, see +         # http://www.w3.org/TR/2006/REC-xml-names11-20060816/ +         # "It MAY, but need not, be declared, and MUST NOT be undeclared +         # or bound to any other namespace name." +         'xml': "http://www.w3.org/XML/1998/namespace" +    } +} + +for mimetype in namespace_by_name.values(): +    for k, v in mimetype.items(): +        if v in _namespace_map: +            assert _namespace_map[v] == k +        _namespace_map[v] = k -class _autosuper (type) : -    def __init__ (cls, name, bases, dict) : -        super   (_autosuper, cls).__init__ (name, bases, dict) -        setattr (cls, "_%s__super" % name, super (cls)) -    # end def __init__ -# end class _autosuper - -class autosuper (object) : -    __metaclass__ = _autosuper -    def __init__ (self, *args, **kw) : -        self.__super.__init__ () -    # end def __init__ -# end class autosuper - -files = \ -    [ 'content.xml' -    , 'styles.xml' -    , 'meta.xml' -    , 'settings.xml' -    , 'META-INF/manifest.xml' -    ] - -mimetypes = \ -    [ 'application/vnd.sun.xml.writer' -    , 'application/vnd.oasis.opendocument.text' -    ] -namespace_by_name = \ -  { mimetypes [0] : -      { 'chart'    : "http://openoffice.org/2000/chart" -      , 'config'   : "http://openoffice.org/2001/config" -      , 'dc'       : "http://purl.org/dc/elements/1.1/" -      , 'dr3d'     : "http://openoffice.org/2000/dr3d" -      , 'draw'     : "http://openoffice.org/2000/drawing" -      , 'fo'       : "http://www.w3.org/1999/XSL/Format" -      , 'form'     : "http://openoffice.org/2000/form" -      , 'math'     : "http://www.w3.org/1998/Math/MathML" -      , 'meta'     : "http://openoffice.org/2000/meta" -      , 'number'   : "http://openoffice.org/2000/datastyle" -      , 'office'   : "http://openoffice.org/2000/office" -      , 'script'   : "http://openoffice.org/2000/script" -      , 'style'    : "http://openoffice.org/2000/style" -      , 'svg'      : "http://www.w3.org/2000/svg" -      , 'table'    : "http://openoffice.org/2000/table" -      , 'text'     : "http://openoffice.org/2000/text" -      , 'xlink'    : "http://www.w3.org/1999/xlink" -      , 'manifest' : "http://openoffice.org/2001/manifest" -      } -  , mimetypes [1] : -      { 'chart'    : "urn:oasis:names:tc:opendocument:xmlns:chart:1.0" -      , 'config'   : "urn:oasis:names:tc:opendocument:xmlns:config:1.0" -      , 'dc'       : "http://purl.org/dc/elements/1.1/" -      , 'dr3d'     : "urn:oasis:names:tc:opendocument:xmlns:dr3d:1.0" -      , 'draw'     : "urn:oasis:names:tc:opendocument:xmlns:drawing:1.0" -      , 'fo'       : "urn:oasis:names:tc:opendocument:xmlns:" -                     "xsl-fo-compatible:1.0" -      , 'form'     : "urn:oasis:names:tc:opendocument:xmlns:form:1.0" -      , 'math'     : "http://www.w3.org/1998/Math/MathML" -      , 'meta'     : "urn:oasis:names:tc:opendocument:xmlns:meta:1.0" -      , 'number'   : "urn:oasis:names:tc:opendocument:xmlns:datastyle:1.0" -      , 'office'   : "urn:oasis:names:tc:opendocument:xmlns:office:1.0" -      , 'officeooo': "http://openoffice.org/2009/office" -      , 'script'   : "urn:oasis:names:tc:opendocument:xmlns:script:1.0" -      , 'style'    : "urn:oasis:names:tc:opendocument:xmlns:style:1.0" -      , 'svg'      : "urn:oasis:names:tc:opendocument:xmlns:svg-compatible:1.0" -      , 'table'    : "urn:oasis:names:tc:opendocument:xmlns:table:1.0" -      , 'text'     : "urn:oasis:names:tc:opendocument:xmlns:text:1.0" -      , 'xlink'    : "http://www.w3.org/1999/xlink" -      , 'manifest' : "urn:oasis:names:tc:opendocument:xmlns:manifest:1.0" -      , 'tableooo' : "http://openoffice.org/2009/table" -      , 'transformation' : "http://www.w3.org/2003/g/data-view#" -      # OOo 1.X tags and some others: -      , 'ooo'      : "http://openoffice.org/2004/office" -      , 'ooow'     : "http://openoffice.org/2004/writer" -      , 'oooc'     : "http://openoffice.org/2004/calc" -      , 'o_dom'    : "http://www.w3.org/2001/xml-events" -      , 'o_xforms' : "http://www.w3.org/2002/xforms" -      , 'xs'       : "http://www.w3.org/2001/XMLSchema" -      , 'xsi'      : "http://www.w3.org/2001/XMLSchema-instance" -      # predefined xml namespace, see -      # http://www.w3.org/TR/2006/REC-xml-names11-20060816/ -      # "It MAY, but need not, be declared, and MUST NOT be undeclared -      # or bound to any other namespace name." -      , 'xml'      : "http://www.w3.org/XML/1998/namespace" -      } -  } - -for mimetype in namespace_by_name.itervalues () : -    for k, v in mimetype.iteritems () : -        if v in _namespace_map : -            assert (_namespace_map [v] == k) -        _namespace_map [v] = k  class OOoElementTree (autosuper) :      """ -        An ElementTree for OOo document XML members. Behaves like the -        orginal ElementTree (in fact it delegates almost everything to a -        real instance of ElementTree) except for the write method, that -        writes itself back to the OOo XML file in the OOo zip archive it -        came from. +    An ElementTree for OOo document XML members. Behaves like the +    orginal ElementTree (in fact it delegates almost everything to a +    real instance of ElementTree) except for the write method, that +    writes itself back to the OOo XML file in the OOo zip archive it +    came from.      """ -    def __init__ (self, ooopy, zname, root) : +    def __init__(self, ooopy, zname, root):          self.ooopy = ooopy          self.zname = zname -        self.tree  = ElementTree (root) -    # end def __init__ +        self.tree = ElementTree(root) -    def write (self) : +    def write(self):          self.ooopy.write (self.zname, self.tree) -    # end def write -    def __getattr__ (self, name) : +    def __getattr__(self, name) :          """ -            Delegate everything to our ElementTree attribute. +        Delegate everything to our ElementTree attribute.          """          if not name.startswith ('__') :              result = getattr (self.tree, name)              setattr (self, name, result)              return result          raise AttributeError (name) -    # end def __getattr__ -# end class OOoElementTree -class OOoPy (autosuper) : +class OOoPy(autosuper):      """ -        Wrapper for OpenOffice.org zip files (all OOo documents are -        really zip files internally). - -        from ooopy.OOoPy import OOoPy -        >>> o = OOoPy (infile = 'testfiles/test.sxw', outfile = 'out.sxw') -        >>> o.mimetype -        'application/vnd.sun.xml.writer' -        >>> for f in files : -        ...     e = o.read (f) -        ...     e.write () -        ... -        >>> o.close () -        >>> o = OOoPy (infile = 'testfiles/test.odt', outfile = 'out2.odt') -        >>> o.mimetype -        'application/vnd.oasis.opendocument.text' -        >>> for f in files : -        ...     e = o.read (f) -        ...     e.write () -        ... -        >>> o.append_file ('Pictures/empty', '') -        >>> o.close () -        >>> o = OOoPy (infile = 'out2.odt') -        >>> for f in o.izip.infolist () : -        ...     print f.filename, f.create_system, f.compress_type -        mimetype 0 8 -        content.xml 0 8 -        styles.xml 0 8 -        meta.xml 0 8 -        settings.xml 0 8 -        META-INF/manifest.xml 0 8 -        Pictures/empty 0 8 -        Configurations2/statusbar/ 0 0 -        Configurations2/accelerator/current.xml 0 8 -        Configurations2/floater/ 0 0 -        Configurations2/popupmenu/ 0 0 -        Configurations2/progressbar/ 0 0 -        Configurations2/menubar/ 0 0 -        Configurations2/toolbar/ 0 0 -        Configurations2/images/Bitmaps/ 0 0 -        Thumbnails/thumbnail.png 0 8 +    Wrapper for OpenOffice.org zip files (all OOo documents are +    really zip files internally).      """ -    def __init__ \ -        ( self -        , infile     = None -        , outfile    = None -        , write_mode = 'w' -        , mimetype   = None -        ) : +    def __init__(self, infile=None, outfile=None, write_mode='w', +                 mimetype=None):          """ -            Open an OOo document, if no outfile is given, we open the -            file read-only. Otherwise the outfile has to be different -            from the infile -- the python ZipFile can't deal with -            read-write access. In case an outfile is given, we open it -            in "w" mode as a zip file, unless write_mode is specified -            (the only allowed case would be "a" for appending to an -            existing file, see pythons ZipFile documentation for -            details). If no infile is given, the user is responsible for -            providing all necessary files in the resulting output file. - -            It seems that OOo needs to have the mimetype as the first -            archive member (at least with mimetype as the first member -            it works, the order may not be arbitrary) to recognize a zip -            archive as an OOo file. When copying from a given infile, we -            use the same order of elements in the resulting output. When -            creating new elements we make sure the mimetype is the first -            in the resulting archive. - -            Note that both, infile and outfile can either be filenames -            or file-like objects (e.g. StringIO). - -            The mimetype is automatically determined if an infile is -            given. If only writing is desired, the mimetype should be -            set. +        Open an OOo document, if no outfile is given, we open the +        file read-only. Otherwise the outfile has to be different +        from the infile -- the python ZipFile can't deal with +        read-write access. In case an outfile is given, we open it +        in "w" mode as a zip file, unless write_mode is specified +        (the only allowed case would be "a" for appending to an +        existing file, see pythons ZipFile documentation for +        details). If no infile is given, the user is responsible for +        providing all necessary files in the resulting output file. + +        It seems that OOo needs to have the mimetype as the first +        archive member (at least with mimetype as the first member +        it works, the order may not be arbitrary) to recognize a zip +        archive as an OOo file. When copying from a given infile, we +        use the same order of elements in the resulting output. When +        creating new elements we make sure the mimetype is the first +        in the resulting archive. + +        Note that both, infile and outfile can either be filenames +        or file-like objects (e.g. StringIO). + +        The mimetype is automatically determined if an infile is +        given. If only writing is desired, the mimetype should be +        set.          """          assert (infile != outfile)          self.izip = self.ozip = None -        if infile : -            self.izip    = ZipFile (infile,  'r',        ZIP_DEFLATED) +        if infile: +            self.izip = ZipFile(infile, 'r', ZIP_DEFLATED)          if outfile : -            self.ozip    = ZipFile (outfile, write_mode, ZIP_DEFLATED) +            self.ozip = ZipFile(outfile, write_mode, ZIP_DEFLATED)              self.written = {} -        if mimetype : +        if mimetype:              self.mimetype = mimetype -        elif self.izip : -            self.mimetype = self.izip.read ('mimetype') -    # end def __init__ +        elif self.izip: +            self.mimetype = self.izip.read('mimetype') +        if isinstance(self.mimetype, bytes): +            self.mimetype = self.mimetype.decode() -    def read (self, zname) : +    def read(self, zname):          """ -            return an OOoElementTree object for the given OOo document -            archive member name. Currently an OOo document contains the -            following XML files:: - -             * content.xml: the text of the OOo document -             * styles.xml: style definitions -             * meta.xml: meta-information (author, last changed, ...) -             * settings.xml: settings in OOo -             * META-INF/manifest.xml: contents of the archive - -            There is an additional file "mimetype" that always contains -            the string "application/vnd.sun.xml.writer" for OOo 1.X files -            and the string "application/vnd.oasis.opendocument.text" for -            OOo 2.X files. +        return an OOoElementTree object for the given OOo document +        archive member name. Currently an OOo document contains the +        following XML files:: + +         * content.xml: the text of the OOo document +         * styles.xml: style definitions +         * meta.xml: meta-information (author, last changed, ...) +         * settings.xml: settings in OOo +         * META-INF/manifest.xml: contents of the archive + +        There is an additional file "mimetype" that always contains +        the string "application/vnd.sun.xml.writer" for OOo 1.X files +        and the string "application/vnd.oasis.opendocument.text" for +        OOo 2.X files.          """ -        assert (self.izip) +        assert self.izip          return OOoElementTree (self, zname, fromstring (self.izip.read (zname))) -    # end def read -    def _write (self, zname, str) : -        now  = datetime.utcnow ().timetuple () -        info = ZipInfo (zname, date_time = now) -        info.create_system = 0 # pretend to be fat +    def _write(self, zname, str): +        now = datetime.utcnow().timetuple() +        info = ZipInfo(zname, date_time=now) +        info.create_system = 0  # pretend to be fat          info.compress_type = ZIP_DEFLATED -        self.ozip.writestr (info, str) +        self.ozip.writestr(info, str)          self.written [zname] = 1 -    # end def _write -    def write (self, zname, etree) : -        assert (self.ozip) +    def write(self, zname, etree): +        assert self.ozip          # assure mimetype is the first member in new archive -        if 'mimetype' not in self.written : -            self._write ('mimetype', self.mimetype) -        str = StringIO () -        etree.write (str) -        self._write (zname, str.getvalue ()) -    # end def write +        if 'mimetype' not in self.written: +            self._write('mimetype', self.mimetype) +        str = BytesIO() +        etree.write(str) +        self._write(zname, str.getvalue()) -    def append_file (self, zname, str) : -        """ Official interface to _write: Append a file to the end of -            the archive. +    def append_file (self, zname, str): +        """ +        Official interface to _write: Append a file to the end of the archive.          """ -        if zname not in self.written : +        if zname not in self.written:              self._write (zname, str) -    # end def append_file -    def close (self) : +    def close(self):          """ -            Close the zip files. According to documentation of zipfile in -            the standard python lib, this has to be done to be sure -            everything is written. We copy over the not-yet written files -            from izip before closing ozip. +        Close the zip files. According to documentation of zipfile in +        the standard python lib, this has to be done to be sure +        everything is written. We copy over the not-yet written files +        from izip before closing ozip.          """ -        if self.izip and self.ozip : -            for f in self.izip.infolist () : +        if self.izip and self.ozip: +            for f in self.izip.infolist():                  if f.filename not in self.written : -                    self.ozip.writestr (f, self.izip.read (f.filename)) -        for i in self.izip, self.ozip : -            if i : i.close () +                    self.ozip.writestr(f, self.izip.read(f.filename)) +        for i in self.izip, self.ozip: +            if i: +                i.close()          self.izip = self.ozip = None -    # end def close -    __del__ = close # auto-close on deletion of object -# end class OOoPy +    __del__ = close  # auto-close on deletion of object diff --git a/ooopy/Transformer.py b/ooopy/Transformer.py index dbbab125d..4e21bb331 100644 --- a/ooopy/Transformer.py +++ b/ooopy/Transformer.py @@ -1,5 +1,5 @@ -#!/usr/bin/env python -# -*- coding: iso-8859-1 -*- +#!/usr/bin/env python3 +# -*- coding: utf-8 -*-  # Copyright (C) 2005-14 Dr. Ralf Schlatterbeck Open Source Consulting.  # Reichergasse 131, A-3411 Weidling.  # Web: http://www.runtux.com Email: office@runtux.com @@ -21,1377 +21,181 @@  # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.  # **************************************************************************** -from __future__              import absolute_import +# import time +# import re +# from xml.etree.ElementTree import dump, SubElement, Element, tostring +from xml.etree.ElementTree import _namespace_map +# from copy import deepcopy +from ooopy.OOoPy import autosuper  # , OOoPy +from ooopy.OOoPy import files, mimetypes, namespace_by_name +# from ooopy.Version           import VERSION -import time -import re -try : -    from xml.etree.ElementTree   import dump, SubElement, Element, tostring -    from xml.etree.ElementTree   import _namespace_map -except ImportError : -    from elementtree.ElementTree import dump, SubElement, Element, tostring -    from elementtree.ElementTree import _namespace_map -from copy                    import deepcopy -from ooopy.OOoPy             import OOoPy, autosuper -from ooopy.OOoPy             import files, mimetypes, namespace_by_name -from ooopy.Version           import VERSION -def OOo_Tag (namespace, name, mimetype) : -    """Return combined XML tag -     -       >>> OOo_Tag ('xml', 'id', mimetypes [1]) -       '{http://www.w3.org/XML/1998/namespace}id' -       >>> OOo_Tag ('text', 'list', mimetypes [1]) -       '{urn:oasis:names:tc:opendocument:xmlns:text:1.0}list' +def OOo_Tag(namespace, name, mimetype):      """ -    return "{%s}%s" % (namespace_by_name [mimetype][namespace], name) -# end def OOo_Tag +    Return combined XML tag +    """ +    return "{%s}%s" % (namespace_by_name[mimetype][namespace], name) + -def split_tag (tag) : -    """ Split tag into symbolic namespace and name part -- inverse -        operation of OOo_Tag. +def split_tag(tag): +    """ +    Split tag into symbolic namespace and name part -- inverse +    operation of OOo_Tag.      """      ns, t = tag.split ('}') -    return (_namespace_map [ns [1:]], t) -# end def split_tag +    return _namespace_map [ns [1:]], t -class Transform (autosuper) : -    """ -        Base class for individual transforms on OOo files. An individual -        transform needs a filename variable for specifying the OOo file -        the transform should be applied to and an optional prio. -        Individual transforms are applied according to their prio -        setting, higher prio means later application of a transform. -        The filename variable must specify one of the XML files which are -        part of the OOo document (see files variable above). As -        the names imply, content.xml contains the contents of the -        document (text and ad-hoc style definitions), styles.xml contains -        the style definitions, meta.xml contains meta information like -        author, editing time, etc. and settings.xml is used to store -        OOo's settings (menu Tools->Configure). +class Transform(autosuper): +    """ +    Base class for individual transforms on OOo files. An individual +    transform needs a filename variable for specifying the OOo file +    the transform should be applied to and an optional prio. +    Individual transforms are applied according to their prio +    setting, higher prio means later application of a transform. + +    The filename variable must specify one of the XML files which are +    part of the OOo document (see files variable above). As +    the names imply, content.xml contains the contents of the +    document (text and ad-hoc style definitions), styles.xml contains +    the style definitions, meta.xml contains meta information like +    author, editing time, etc. and settings.xml is used to store +    OOo's settings (menu Tools->Configure).      """      prio = 100 -    textbody_names = \ -        { mimetypes [0] : 'body' -        , mimetypes [1] : 'text' -        } -    paragraph_props = \ -        { mimetypes [0] : 'properties' -        , mimetypes [1] : 'paragraph-properties' -        } -    font_decls = \ -        { mimetypes [0] : 'font-decls' -        , mimetypes [1] : 'font-face-decls' -        } - -    def __init__ (self, prio = None, transformer = None) : -        if prio is not None : -            self.prio    = prio +    textbody_names = { mimetypes[0]: 'body', mimetypes[1]: 'text'} +    paragraph_props = { +        mimetypes[0]: 'properties', +        mimetypes[1]: 'paragraph-properties' +    } +    font_decls = { +        mimetypes[0]: 'font-decls', +        mimetypes[1]: 'font-face-decls' +    } + +    def __init__(self, prio=None, transformer=None): +        if prio is not None: +            self.prio = prio          self.transformer = None -        if transformer : -            self.register (transformer) -    # end def __init__ +        if transformer: +            self.register(transformer)      def apply (self, root) :          """ Apply myself to the element given as root """ -        raise NotImplementedError, 'derived transforms must implement "apply"' -    # end def apply +        raise NotImplementedError('derived transforms must implement "apply"')      def apply_all (self, trees) : -        """ Apply myself to all the files given in trees. The variable -            trees contains a dictionary of ElementTree indexed by the -            name of the OOo File. -            The standard case is that only one file (namely -            self.filename) is used.          """ -        assert (self.filename) -        self.apply (trees [self.filename].getroot ()) -    # end def apply_all +        Apply myself to all the files given in trees. The variable +        trees contains a dictionary of ElementTree indexed by the +        name of the OOo File. +        The standard case is that only one file (namely +        self.filename) is used. +        """ +        assert self.filename +        self.apply(trees[self.filename].getroot()) -    def find_tbody (self, root) : -        """ Find the node which really contains the text -- different -            for different OOo versions. +    def find_tbody(self, root) : +        """ +        Find the node which really contains the text -- different +        for different OOo versions.          """          tbody = root -        if tbody.tag != self.textbody_tag : -            tbody = tbody.find ('.//' + self.textbody_tag) +        if tbody.tag != self.textbody_tag: +            tbody = tbody.find('.//' + self.textbody_tag)          return tbody -    # end def find_tbody - -    def register (self, transformer) : -        """ Registering with a transformer means being able to access -            variables stored in the tranformer by other transforms. -            Also needed for tag-computation: The transformer knows which -            version of OOo document we are processing. +    def register(self, transformer) :          """ -        self.transformer     = transformer -        mt                   = self.mimetype = transformer.mimetype -        self.textbody_name   = self.textbody_names [mt] +        Registering with a transformer means being able to access +        variables stored in the tranformer by other transforms. +        Also needed for tag-computation: The transformer knows which +        version of OOo document we are processing. +        """ +        self.transformer = transformer +        mt = self.mimetype = transformer.mimetype +        self.textbody_name = self.textbody_names [mt]          self.paragraph_props = self.paragraph_props [mt] -        self.properties_tag  = self.oootag ('style', self.paragraph_props) -        self.textbody_tag    = self.oootag ('office', self.textbody_name) -        self.font_decls_tag  = self.oootag ('office', self.font_decls [mt]) -    # end def register +        self.properties_tag = self.oootag('style', self.paragraph_props) +        self.textbody_tag = self.oootag('office', self.textbody_name) +        self.font_decls_tag = self.oootag('office', self.font_decls [mt]) -    def oootag (self, namespace, name) : +    def oootag(self, namespace, name):          """ Compute long tag version """ -        return OOo_Tag (namespace, name, self.mimetype) -    # end def oootag +        return OOo_Tag(namespace, name, self.mimetype) -    def set (self, variable, value) : +    def set(self, variable, value) :          """ Set variable in our transformer using naming convention. """          self.transformer [self._varname (variable)] = value -    # end def set      def _varname (self, name) : -        """ For fulfilling the naming convention of the transformer -            dictionary (every entry in this dictionary should be prefixed -            with the class name of the transform) we have this -            convenience method. -            Returns variable name prefixed with own class name.          """ -        return ":".join ((self.__class__.__name__, name)) -    # end def _varname +        For fulfilling the naming convention of the transformer +        dictionary (every entry in this dictionary should be prefixed +        with the class name of the transform) we have this +        convenience method. +        Returns variable name prefixed with own class name. +        """ +        return ":".join((self.__class__.__name__, name)) -# end class Transform -class Transformer (autosuper) : +class Transformer(autosuper):      """ -        Class for applying a set of transforms to a given ooopy object. -        The transforms are applied to the specified file in priority -        order. When applying transforms we have a mechanism for -        communication of transforms. We give the transformer to the -        individual transforms as a parameter. The transforms may use the -        transformer like a dictionary for storing values and retrieving -        values left by previous transforms. -        As a naming convention each transform should use its class name -        as a prefix for storing values in the dictionary. -        >>> import Transforms -        >>> from Transforms import renumber_all, get_meta, set_meta, meta_counts -        >>> try : -        ...     from io import StringIO, BytesIO -        ...     StringIO = BytesIO -        ... except ImportError : -        ...     from StringIO import StringIO -        >>> sio = BytesIO () -        >>> o   = OOoPy (infile = 'testfiles/test.sxw', outfile = sio) -        >>> m   = o.mimetype -        >>> c = o.read ('content.xml') -        >>> body = c.find (OOo_Tag ('office', 'body', mimetype = m)) -        >>> body [-1].get (OOo_Tag ('text', 'style-name', mimetype = m)) -        'Standard' -        >>> def cb (name) : -        ...     r = { 'street'     : 'Beispielstrasse 42' -        ...         , 'firstname'  : 'Hugo' -        ...         , 'salutation' : 'Frau' -        ...         } -        ...     if r.has_key (name) : return r [name] -        ...     return None -        ...  -        >>> p = get_meta (m) -        >>> t = Transformer (m, p) -        >>> t ['a'] = 'a' -        >>> t ['a'] -        'a' -        >>> t.transform (o) -        >>> p.set ('a', 'b') -        >>> t ['Attribute_Access:a'] -        'b' -        >>> t   = Transformer ( -        ...       m -        ...     , Transforms.Autoupdate () -        ...     , Transforms.Editinfo   ()   -        ...     , Transforms.Field_Replace (prio = 99, replace = cb) -        ...     , Transforms.Field_Replace -        ...         ( replace = -        ...             { 'salutation' : '' -        ...             , 'firstname'  : 'Erika' -        ...             , 'lastname'   : 'Musterfrau' -        ...             , 'country'    : 'D'  -        ...             , 'postalcode' : '00815' -        ...             , 'city'       : 'Niemandsdorf' -        ...             } -        ...         ) -        ...     , Transforms.Addpagebreak_Style () -        ...     , Transforms.Addpagebreak       () -        ...     ) -        >>> t.transform (o) -        >>> o.close () -        >>> ov  = sio.getvalue () -        >>> f   = open ("testout.sxw", "wb") -        >>> f.write (ov) -        >>> f.close () -        >>> o = OOoPy (infile = sio) -        >>> c = o.read ('content.xml') -        >>> m = o.mimetype -        >>> body = c.find (OOo_Tag ('office', 'body', mimetype = m)) -        >>> vset = './/' + OOo_Tag ('text', 'variable-set', mimetype = m) -        >>> for node in body.findall (vset) : -        ...     name = node.get (OOo_Tag ('text', 'name', m)) -        ...     print name, ':', node.text -        salutation : None -        firstname : Erika -        lastname : Musterfrau -        street : Beispielstrasse 42 -        country : D -        postalcode : 00815 -        city : Niemandsdorf -        salutation : None -        firstname : Erika -        lastname : Musterfrau -        street : Beispielstrasse 42 -        country : D -        postalcode : 00815 -        city : Niemandsdorf -        >>> body [-1].get (OOo_Tag ('text', 'style-name', mimetype = m)) -        'P2' -        >>> sio = StringIO () -        >>> o   = OOoPy (infile = 'testfiles/test.sxw', outfile = sio) -        >>> c = o.read ('content.xml') -        >>> t   = Transformer ( -        ...       o.mimetype -        ...     , get_meta (o.mimetype) -        ...     , Transforms.Addpagebreak_Style () -        ...     , Transforms.Mailmerge -        ...       ( iterator =  -        ...         ( dict (firstname = 'Erika', lastname = 'Nobody') -        ...         , dict (firstname = 'Eric',  lastname = 'Wizard') -        ...         , cb -        ...         ) -        ...       ) -        ...     , renumber_all (o.mimetype) -        ...     , set_meta (o.mimetype) -        ...     , Transforms.Fix_OOo_Tag () -        ...     ) -        >>> t.transform (o) -        >>> for i in meta_counts : -        ...     print i, t [':'.join (('Set_Attribute', i))] -        character-count 951 -        image-count 0 -        object-count 0 -        page-count 3 -        paragraph-count 113 -        table-count 3 -        word-count 162 -        >>> name = t ['Addpagebreak_Style:stylename'] -        >>> name -        'P2' -        >>> o.close () -        >>> ov  = sio.getvalue () -        >>> f   = open ("testout2.sxw", "wb") -        >>> f.write (ov) -        >>> f.close () -        >>> o = OOoPy (infile = sio) -        >>> m = o.mimetype -        >>> c = o.read ('content.xml') -        >>> body = c.find (OOo_Tag ('office', 'body', m)) -        >>> for n in body.findall ('.//*') : -        ...     zidx = n.get (OOo_Tag ('draw', 'z-index', m)) -        ...     if zidx : -        ...         print ':'.join(split_tag (n.tag)), zidx -        draw:text-box 0 -        draw:rect 1 -        draw:text-box 3 -        draw:rect 4 -        draw:text-box 6 -        draw:rect 7 -        draw:text-box 2 -        draw:text-box 5 -        draw:text-box 8 -        >>> for n in body.findall ('.//' + OOo_Tag ('text', 'p', m)) : -        ...     if n.get (OOo_Tag ('text', 'style-name', m)) == name : -        ...         print n.tag -        {http://openoffice.org/2000/text}p -        {http://openoffice.org/2000/text}p -        >>> vset = './/' + OOo_Tag ('text', 'variable-set', m) -        >>> for n in body.findall (vset) : -        ...     if n.get (OOo_Tag ('text', 'name', m), None).endswith ('name') : -        ...         name = n.get (OOo_Tag ('text', 'name', m)) -        ...         print name, ':', n.text -        firstname : Erika -        lastname : Nobody -        firstname : Eric -        lastname : Wizard -        firstname : Hugo -        lastname : Testman -        firstname : Erika -        lastname : Nobody -        firstname : Eric -        lastname : Wizard -        firstname : Hugo -        lastname : Testman -        >>> for n in body.findall ('.//' + OOo_Tag ('draw', 'text-box', m)) : -        ...     print n.get (OOo_Tag ('draw', 'name', m)), -        ...     print n.get (OOo_Tag ('text', 'anchor-page-number', m)) -        Frame1 1 -        Frame2 2 -        Frame3 3 -        Frame4 None -        Frame5 None -        Frame6 None -        >>> for n in body.findall ('.//' + OOo_Tag ('text', 'section', m)) : -        ...     print n.get (OOo_Tag ('text', 'name', m)) -        Section1 -        Section2 -        Section3 -        Section4 -        Section5 -        Section6 -        Section7 -        Section8 -        Section9 -        Section10 -        Section11 -        Section12 -        Section13 -        Section14 -        Section15 -        Section16 -        Section17 -        Section18 -        >>> for n in body.findall ('.//' + OOo_Tag ('table', 'table', m)) : -        ...     print n.get (OOo_Tag ('table', 'name', m)) -        Table1 -        Table2 -        Table3 -        >>> r = o.read ('meta.xml') -        >>> meta = r.find ('.//' + OOo_Tag ('meta', 'document-statistic', m)) -        >>> for i in meta_counts : -        ...     print i, repr (meta.get (OOo_Tag ('meta', i, m))) -        character-count '951' -        image-count '0' -        object-count '0' -        page-count '3' -        paragraph-count '113' -        table-count '3' -        word-count '162' -        >>> o.close () -        >>> sio = StringIO () -        >>> o   = OOoPy (infile = 'testfiles/test.sxw', outfile = sio) -        >>> tf  = ('testfiles/test.sxw', 'testfiles/rechng.sxw') -        >>> t   = Transformer ( -        ...       o.mimetype -        ...     , get_meta (o.mimetype) -        ...     , Transforms.Concatenate (*tf) -        ...     , renumber_all (o.mimetype) -        ...     , set_meta (o.mimetype) -        ...     , Transforms.Fix_OOo_Tag () -        ...     ) -        >>> t.transform (o) -        >>> for i in meta_counts : -        ...     print i, repr (t [':'.join (('Set_Attribute', i))]) -        character-count '1131' -        image-count '0' -        object-count '0' -        page-count '3' -        paragraph-count '168' -        table-count '2' -        word-count '160' -        >>> o.close () -        >>> ov  = sio.getvalue () -        >>> f   = open ("testout3.sxw", "wb") -        >>> f.write (ov) -        >>> f.close () -        >>> o = OOoPy (infile = sio) -        >>> m = o.mimetype -        >>> c = o.read ('content.xml') -        >>> s = o.read ('styles.xml') -        >>> for n in c.findall ('./*/*') : -        ...     name = n.get (OOo_Tag ('style', 'name', m)) -        ...     if name : -        ...         parent = n.get (OOo_Tag ('style', 'parent-style-name', m)) -        ...         print '"%s", "%s"' % (name, parent) -        "Tahoma1", "None" -        "Bitstream Vera Sans", "None" -        "Tahoma", "None" -        "Nimbus Roman No9 L", "None" -        "Courier New", "None" -        "Arial Black", "None" -        "New Century Schoolbook", "None" -        "Helvetica", "None" -        "Table1", "None" -        "Table1.A", "None" -        "Table1.A1", "None" -        "Table1.E1", "None" -        "Table1.A2", "None" -        "Table1.E2", "None" -        "P1", "None" -        "fr1", "Frame" -        "fr2", "None" -        "fr3", "Frame" -        "Sect1", "None" -        "gr1", "None" -        "P2", "Standard" -        "Standard_Concat", "None" -        "Concat_P1", "Concat_Frame contents" -        "Concat_P2", "Concat_Frame contents" -        "P3", "Concat_Frame contents" -        "P4", "Concat_Frame contents" -        "P5", "Concat_Standard" -        "P6", "Concat_Standard" -        "P7", "Concat_Frame contents" -        "P8", "Concat_Frame contents" -        "P9", "Concat_Frame contents" -        "P10", "Concat_Frame contents" -        "P11", "Concat_Frame contents" -        "P12", "Concat_Frame contents" -        "P13", "Concat_Frame contents" -        "P15", "Concat_Standard" -        "P16", "Concat_Standard" -        "P17", "Concat_Standard" -        "P18", "Concat_Standard" -        "P19", "Concat_Standard" -        "P20", "Concat_Standard" -        "P21", "Concat_Standard" -        "P22", "Concat_Standard" -        "P23", "Concat_Standard" -        "T1", "None" -        "Concat_fr1", "Concat_Frame" -        "Concat_fr2", "Concat_Frame" -        "Concat_fr3", "Concat_Frame" -        "fr4", "Concat_Frame" -        "fr5", "Concat_Frame" -        "fr6", "Concat_Frame" -        "Concat_Sect1", "None" -        "N0", "None" -        "N2", "None" -        "P15_Concat", "Concat_Standard" -        >>> for n in s.findall ('./*/*') : -        ...     name = n.get (OOo_Tag ('style', 'name', m)) -        ...     if name : -        ...         parent = n.get (OOo_Tag ('style', 'parent-style-name', m)) -        ...         print '"%s", "%s"' % (name, parent) -        "Tahoma1", "None" -        "Bitstream Vera Sans", "None" -        "Tahoma", "None" -        "Nimbus Roman No9 L", "None" -        "Courier New", "None" -        "Arial Black", "None" -        "New Century Schoolbook", "None" -        "Helvetica", "None" -        "Standard", "None" -        "Text body", "Standard" -        "List", "Text body" -        "Table Contents", "Text body" -        "Table Heading", "Table Contents" -        "Caption", "Standard" -        "Frame contents", "Text body" -        "Index", "Standard" -        "Frame", "None" -        "OLE", "None" -        "Concat_Standard", "None" -        "Concat_Text body", "Concat_Standard" -        "Concat_List", "Concat_Text body" -        "Concat_Caption", "Concat_Standard" -        "Concat_Frame contents", "Concat_Text body" -        "Concat_Index", "Concat_Standard" -        "Horizontal Line", "Concat_Standard" -        "Internet link", "None" -        "Visited Internet Link", "None" -        "Concat_Frame", "None" -        "Concat_OLE", "None" -        "pm1", "None" -        "Concat_pm1", "None" -        "Standard", "None" -        "Concat_Standard", "None" -        >>> for n in c.findall ('.//' + OOo_Tag ('text', 'variable-decl', m)) : -        ...     name = n.get (OOo_Tag ('text', 'name', m)) -        ...     print name -        salutation -        firstname -        lastname -        street -        country -        postalcode -        city -        date -        invoice.invoice_no -        invoice.abo.aboprice.abotype.description -        address.salutation -        address.title -        address.firstname -        address.lastname -        address.function -        address.street -        address.country -        address.postalcode -        address.city -        invoice.subscriber.salutation -        invoice.subscriber.title -        invoice.subscriber.firstname -        invoice.subscriber.lastname -        invoice.subscriber.function -        invoice.subscriber.street -        invoice.subscriber.country -        invoice.subscriber.postalcode -        invoice.subscriber.city -        invoice.period_start -        invoice.period_end -        invoice.currency.name -        invoice.amount -        invoice.subscriber.initial -        >>> for n in c.findall ('.//' + OOo_Tag ('text', 'sequence-decl', m)) : -        ...     name = n.get (OOo_Tag ('text', 'name', m)) -        ...     print name -        Illustration -        Table -        Text -        Drawing -        >>> for n in c.findall ('.//' + OOo_Tag ('text', 'p', m)) : -        ...     name = n.get (OOo_Tag ('text', 'style-name', m)) -        ...     if not name or name.startswith ('Concat') : -        ...         print ">%s<" % name -        >Concat_P1< -        >Concat_P2< -        >Concat_Frame contents< -        >>> for n in c.findall ('.//' + OOo_Tag ('draw', 'text-box', m)) : -        ...     attrs = 'name', 'style-name', 'z-index' -        ...     attrs = [n.get (OOo_Tag ('draw', i, m)) for i in attrs] -        ...     attrs.append (n.get (OOo_Tag ('text', 'anchor-page-number', m))) -        ...     print attrs -        ['Frame1', 'fr1', '0', '1'] -        ['Frame2', 'fr1', '3', '2'] -        ['Frame3', 'Concat_fr1', '6', '3'] -        ['Frame4', 'Concat_fr2', '7', '3'] -        ['Frame5', 'Concat_fr3', '8', '3'] -        ['Frame6', 'Concat_fr1', '9', '3'] -        ['Frame7', 'fr4', '10', '3'] -        ['Frame8', 'fr4', '11', '3'] -        ['Frame9', 'fr4', '12', '3'] -        ['Frame10', 'fr4', '13', '3'] -        ['Frame11', 'fr4', '14', '3'] -        ['Frame12', 'fr4', '15', '3'] -        ['Frame13', 'fr5', '16', '3'] -        ['Frame14', 'fr4', '18', '3'] -        ['Frame15', 'fr4', '19', '3'] -        ['Frame16', 'fr4', '20', '3'] -        ['Frame17', 'fr6', '17', '3'] -        ['Frame18', 'fr4', '23', '3'] -        ['Frame19', 'fr3', '2', None] -        ['Frame20', 'fr3', '5', None] -        >>> for n in c.findall ('.//' + OOo_Tag ('text', 'section', m)) : -        ...     attrs = 'name', 'style-name' -        ...     attrs = [n.get (OOo_Tag ('text', i, m)) for i in attrs] -        ...     print attrs -        ['Section1', 'Sect1'] -        ['Section2', 'Sect1'] -        ['Section3', 'Sect1'] -        ['Section4', 'Sect1'] -        ['Section5', 'Sect1'] -        ['Section6', 'Sect1'] -        ['Section7', 'Concat_Sect1'] -        ['Section8', 'Concat_Sect1'] -        ['Section9', 'Concat_Sect1'] -        ['Section10', 'Concat_Sect1'] -        ['Section11', 'Concat_Sect1'] -        ['Section12', 'Concat_Sect1'] -        ['Section13', 'Concat_Sect1'] -        ['Section14', 'Concat_Sect1'] -        ['Section15', 'Concat_Sect1'] -        ['Section16', 'Concat_Sect1'] -        ['Section17', 'Concat_Sect1'] -        ['Section18', 'Concat_Sect1'] -        ['Section19', 'Concat_Sect1'] -        ['Section20', 'Concat_Sect1'] -        ['Section21', 'Concat_Sect1'] -        ['Section22', 'Concat_Sect1'] -        ['Section23', 'Concat_Sect1'] -        ['Section24', 'Concat_Sect1'] -        ['Section25', 'Concat_Sect1'] -        ['Section26', 'Concat_Sect1'] -        ['Section27', 'Concat_Sect1'] -        ['Section28', 'Sect1'] -        ['Section29', 'Sect1'] -        ['Section30', 'Sect1'] -        ['Section31', 'Sect1'] -        ['Section32', 'Sect1'] -        ['Section33', 'Sect1'] -        >>> for n in c.findall ('.//' + OOo_Tag ('draw', 'rect', m)) : -        ...     attrs = 'style-name', 'text-style-name', 'z-index' -        ...     attrs = [n.get (OOo_Tag ('draw', i, m)) for i in attrs] -        ...     attrs.append (n.get (OOo_Tag ('text', 'anchor-page-number', m))) -        ...     print attrs -        ['gr1', 'P1', '1', '1'] -        ['gr1', 'P1', '4', '2'] -        >>> for n in c.findall ('.//' + OOo_Tag ('draw', 'line', m)) : -        ...     attrs = 'style-name', 'text-style-name', 'z-index' -        ...     attrs = [n.get (OOo_Tag ('draw', i, m)) for i in attrs] -        ...     print attrs -        ['gr1', 'P1', '24'] -        ['gr1', 'P1', '22'] -        ['gr1', 'P1', '21'] -        >>> for n in s.findall ('.//' + OOo_Tag ('style', 'style', m)) : -        ...     if n.get (OOo_Tag ('style', 'name', m)).startswith ('Co') : -        ...         attrs = 'name', 'class', 'family' -        ...         attrs = [n.get (OOo_Tag ('style', i, m)) for i in attrs] -        ...         print attrs -        ...         props = n.find ('./' + OOo_Tag ('style', 'properties', m)) -        ...         if props is not None and len (props) : -        ...             props [0].tag -        ['Concat_Standard', 'text', 'paragraph'] -        '{http://openoffice.org/2000/style}tab-stops' -        ['Concat_Text body', 'text', 'paragraph'] -        ['Concat_List', 'list', 'paragraph'] -        ['Concat_Caption', 'extra', 'paragraph'] -        ['Concat_Frame contents', 'extra', 'paragraph'] -        ['Concat_Index', 'index', 'paragraph'] -        ['Concat_Frame', None, 'graphics'] -        ['Concat_OLE', None, 'graphics'] -        >>> for n in c.findall ('.//*') : -        ...     zidx = n.get (OOo_Tag ('draw', 'z-index', m)) -        ...     if zidx : -        ...         print ':'.join(split_tag (n.tag)), zidx -        draw:text-box 0 -        draw:rect 1 -        draw:text-box 3 -        draw:rect 4 -        draw:text-box 6 -        draw:text-box 7 -        draw:text-box 8 -        draw:text-box 9 -        draw:text-box 10 -        draw:text-box 11 -        draw:text-box 12 -        draw:text-box 13 -        draw:text-box 14 -        draw:text-box 15 -        draw:text-box 16 -        draw:text-box 18 -        draw:text-box 19 -        draw:text-box 20 -        draw:text-box 17 -        draw:text-box 23 -        draw:line 24 -        draw:text-box 2 -        draw:text-box 5 -        draw:line 22 -        draw:line 21 -        >>> sio = StringIO () -        >>> o   = OOoPy (infile = 'testfiles/carta.stw', outfile = sio) -        >>> t = Transformer ( -        ...     o.mimetype -        ...   , get_meta (o.mimetype) -        ...   , Transforms.Addpagebreak_Style () -        ...   , Transforms.Mailmerge -        ...     ( iterator =  -        ...         ( dict -        ...             ( Spett = "Spettabile" -        ...             , contraente = "First person" -        ...             , indirizzo = "street? 1" -        ...             , tipo = "racc. A.C." -        ...             , luogo = "Varese" -        ...             , oggetto = "Saluti" -        ...             ) -        ...         , dict -        ...             ( Spett = "Egregio" -        ...             , contraente = "Second Person" -        ...             , indirizzo = "street? 2" -        ...             , tipo = "Raccomandata" -        ...             , luogo = "Gavirate" -        ...             , oggetto = "Ossequi" -        ...             ) -        ...         ) -        ...     ) -        ...   , renumber_all (o.mimetype) -        ...   , set_meta (o.mimetype) -        ...   , Transforms.Fix_OOo_Tag () -        ...   ) -        >>> t.transform(o) -        >>> o.close() -        >>> ov  = sio.getvalue () -        >>> f   = open ("carta-out.stw", "wb") -        >>> f.write (ov) -        >>> f.close () -        >>> o = OOoPy (infile = sio) -        >>> m = o.mimetype -        >>> c = o.read ('content.xml') -        >>> body = c.find (OOo_Tag ('office', 'body', mimetype = m)) -        >>> vset = './/' + OOo_Tag ('text', 'variable-set', mimetype = m) -        >>> for node in body.findall (vset) : -        ...     name = node.get (OOo_Tag ('text', 'name', m)) -        ...     print name, ':', node.text -        Spett : Spettabile -        contraente : First person -        indirizzo : street? 1 -        Spett : Egregio -        contraente : Second Person -        indirizzo : street? 2 -        tipo : racc. A.C. -        luogo : Varese -        oggetto : Saluti -        tipo : Raccomandata -        luogo : Gavirate -        oggetto : Ossequi -        >>> sio = StringIO () -        >>> o   = OOoPy (infile = 'testfiles/test.odt', outfile = sio) -        >>> t   = Transformer ( -        ...       o.mimetype -        ...     , get_meta (o.mimetype) -        ...     , Transforms.Addpagebreak_Style () -        ...     , Transforms.Mailmerge -        ...       ( iterator =  -        ...         ( dict (firstname = 'Erika', lastname = 'Nobody') -        ...         , dict (firstname = 'Eric',  lastname = 'Wizard') -        ...         , cb -        ...         ) -        ...       ) -        ...     , renumber_all (o.mimetype) -        ...     , set_meta (o.mimetype) -        ...     , Transforms.Fix_OOo_Tag () -        ...     ) -        >>> t.transform (o) -        >>> for i in meta_counts : -        ...     print i, t [':'.join (('Set_Attribute', i))] -        character-count 951 -        image-count 0 -        object-count 0 -        page-count 3 -        paragraph-count 53 -        table-count 3 -        word-count 162 -        >>> name = t ['Addpagebreak_Style:stylename'] -        >>> name -        'P2' -        >>> o.close () -        >>> ov  = sio.getvalue () -        >>> f   = open ("testout.odt", "wb") -        >>> f.write (ov) -        >>> f.close () -        >>> o = OOoPy (infile = sio) -        >>> m = o.mimetype -        >>> c = o.read ('content.xml') -        >>> body = c.find (OOo_Tag ('office', 'body', m)) -        >>> for n in body.findall ('.//*') : -        ...     zidx = n.get (OOo_Tag ('draw', 'z-index', m)) -        ...     if zidx : -        ...         print ':'.join(split_tag (n.tag)), zidx -        draw:frame 0 -        draw:rect 1 -        draw:frame 3 -        draw:rect 4 -        draw:frame 6 -        draw:rect 7 -        draw:frame 2 -        draw:frame 5 -        draw:frame 8 -        >>> for n in body.findall ('.//' + OOo_Tag ('text', 'p', m)) : -        ...     if n.get (OOo_Tag ('text', 'style-name', m)) == name : -        ...         print n.tag -        {urn:oasis:names:tc:opendocument:xmlns:text:1.0}p -        {urn:oasis:names:tc:opendocument:xmlns:text:1.0}p -        >>> vset = './/' + OOo_Tag ('text', 'variable-set', m) -        >>> for n in body.findall (vset) : -        ...     if n.get (OOo_Tag ('text', 'name', m), None).endswith ('name') : -        ...         name = n.get (OOo_Tag ('text', 'name', m)) -        ...         print name, ':', n.text -        firstname : Erika -        lastname : Nobody -        firstname : Eric -        lastname : Wizard -        firstname : Hugo -        lastname : Testman -        firstname : Erika -        lastname : Nobody -        firstname : Eric -        lastname : Wizard -        firstname : Hugo -        lastname : Testman -        >>> for n in body.findall ('.//' + OOo_Tag ('draw', 'frame', m)) : -        ...     print n.get (OOo_Tag ('draw', 'name', m)), -        ...     print n.get (OOo_Tag ('text', 'anchor-page-number', m)) -        Frame1 1 -        Frame2 2 -        Frame3 3 -        Frame4 None -        Frame5 None -        Frame6 None -        >>> for n in body.findall ('.//' + OOo_Tag ('text', 'section', m)) : -        ...     print n.get (OOo_Tag ('text', 'name', m)) -        Section1 -        Section2 -        Section3 -        Section4 -        Section5 -        Section6 -        Section7 -        Section8 -        Section9 -        Section10 -        Section11 -        Section12 -        Section13 -        Section14 -        Section15 -        Section16 -        Section17 -        Section18 -        >>> for n in body.findall ('.//' + OOo_Tag ('table', 'table', m)) : -        ...     print n.get (OOo_Tag ('table', 'name', m)) -        Table1 -        Table2 -        Table3 -        >>> r = o.read ('meta.xml') -        >>> meta = r.find ('.//' + OOo_Tag ('meta', 'document-statistic', m)) -        >>> for i in meta_counts : -        ...     print i, repr (meta.get (OOo_Tag ('meta', i, m))) -        character-count '951' -        image-count '0' -        object-count '0' -        page-count '3' -        paragraph-count '53' -        table-count '3' -        word-count '162' -        >>> o.close () -        >>> sio = StringIO () -        >>> o   = OOoPy (infile = 'testfiles/carta.odt', outfile = sio) -        >>> t = Transformer ( -        ...     o.mimetype -        ...   , get_meta (o.mimetype) -        ...   , Transforms.Addpagebreak_Style () -        ...   , Transforms.Mailmerge -        ...     ( iterator =  -        ...         ( dict -        ...             ( Spett = "Spettabile" -        ...             , contraente = "First person" -        ...             , indirizzo = "street? 1" -        ...             , tipo = "racc. A.C." -        ...             , luogo = "Varese" -        ...             , oggetto = "Saluti" -        ...             ) -        ...         , dict -        ...             ( Spett = "Egregio" -        ...             , contraente = "Second Person" -        ...             , indirizzo = "street? 2" -        ...             , tipo = "Raccomandata" -        ...             , luogo = "Gavirate" -        ...             , oggetto = "Ossequi" -        ...             ) -        ...         ) -        ...     ) -        ...   , renumber_all (o.mimetype) -        ...   , set_meta (o.mimetype) -        ...   , Transforms.Fix_OOo_Tag () -        ...   ) -        >>> t.transform(o) -        >>> o.close() -        >>> ov  = sio.getvalue () -        >>> f   = open ("carta-out.odt", "wb") -        >>> f.write (ov) -        >>> f.close () -        >>> o = OOoPy (infile = sio) -        >>> m = o.mimetype -        >>> c = o.read ('content.xml') -        >>> body = c.find (OOo_Tag ('office', 'body', mimetype = m)) -        >>> vset = './/' + OOo_Tag ('text', 'variable-set', mimetype = m) -        >>> for node in body.findall (vset) : -        ...     name = node.get (OOo_Tag ('text', 'name', m)) -        ...     print name, ':', node.text -        Spett : Spettabile -        contraente : First person -        indirizzo : street? 1 -        Spett : Egregio -        contraente : Second Person -        indirizzo : street? 2 -        tipo : racc. A.C. -        luogo : Varese -        oggetto : Saluti -        tipo : Raccomandata -        luogo : Gavirate -        oggetto : Ossequi -        >>> sio = StringIO () -        >>> o   = OOoPy (infile = 'testfiles/test.odt', outfile = sio) -        >>> tf  = ('testfiles/test.odt', 'testfiles/rechng.odt') -        >>> t   = Transformer ( -        ...       o.mimetype -        ...     , get_meta (o.mimetype) -        ...     , Transforms.Concatenate (*tf) -        ...     , renumber_all (o.mimetype) -        ...     , set_meta (o.mimetype) -        ...     , Transforms.Fix_OOo_Tag () -        ...     ) -        >>> t.transform (o) -        >>> for i in meta_counts : -        ...     print i, repr (t [':'.join (('Set_Attribute', i))]) -        character-count '1131' -        image-count '0' -        object-count '0' -        page-count '3' -        paragraph-count '80' -        table-count '2' -        word-count '159' -        >>> o.close () -        >>> ov  = sio.getvalue () -        >>> f   = open ("testout3.odt", "wb") -        >>> f.write (ov) -        >>> f.close () -        >>> o = OOoPy (infile = sio) -        >>> m = o.mimetype -        >>> c = o.read ('content.xml') -        >>> s = o.read ('styles.xml') -        >>> for n in c.findall ('./*/*') : -        ...     name = n.get (OOo_Tag ('style', 'name', m)) -        ...     if name : -        ...         parent = n.get (OOo_Tag ('style', 'parent-style-name', m)) -        ...         print '"%s", "%s"' % (name, parent) -        "Tahoma1", "None" -        "Bitstream Vera Sans", "None" -        "Tahoma", "None" -        "Nimbus Roman No9 L", "None" -        "Courier New", "None" -        "Arial Black", "None" -        "New Century Schoolbook", "None" -        "Times New Roman", "None" -        "Arial", "None" -        "Helvetica", "None" -        "Table1", "None" -        "Table1.A", "None" -        "Table1.A1", "None" -        "Table1.E1", "None" -        "Table1.A2", "None" -        "Table1.E2", "None" -        "P1", "None" -        "fr1", "Frame" -        "fr2", "Frame" -        "Sect1", "None" -        "gr1", "None" -        "P2", "Standard" -        "Standard_Concat", "None" -        "Concat_P1", "Concat_Frame_20_contents" -        "Concat_P2", "Concat_Frame_20_contents" -        "P3", "Concat_Frame_20_contents" -        "P4", "Concat_Standard" -        "P5", "Concat_Standard" -        "P6", "Concat_Frame_20_contents" -        "P7", "Concat_Frame_20_contents" -        "P8", "Concat_Frame_20_contents" -        "P9", "Concat_Frame_20_contents" -        "P10", "Concat_Frame_20_contents" -        "P11", "Concat_Frame_20_contents" -        "P12", "Concat_Frame_20_contents" -        "P14", "Concat_Standard" -        "P15", "Concat_Standard" -        "P16", "Concat_Standard" -        "P17", "Concat_Standard" -        "P18", "Concat_Standard" -        "P19", "Concat_Standard" -        "P20", "Concat_Standard" -        "P21", "Concat_Standard" -        "P22", "Concat_Standard" -        "P23", "Concat_Standard" -        "Concat_fr1", "Frame" -        "Concat_fr2", "Frame" -        "fr3", "Frame" -        "fr4", "Frame" -        "fr5", "Frame" -        "fr6", "Frame" -        "Concat_gr1", "None" -        "N0", "None" -        "N2", "None" -        "P14_Concat", "Concat_Standard" -        >>> for n in c.findall ('.//' + OOo_Tag ('text', 'variable-decl', m)) : -        ...     name = n.get (OOo_Tag ('text', 'name', m)) -        ...     print name -        salutation -        firstname -        lastname -        street -        country -        postalcode -        city -        date -        invoice.invoice_no -        invoice.abo.aboprice.abotype.description -        address.salutation -        address.title -        address.firstname -        address.lastname -        address.function -        address.street -        address.country -        address.postalcode -        address.city -        invoice.subscriber.salutation -        invoice.subscriber.title -        invoice.subscriber.firstname -        invoice.subscriber.lastname -        invoice.subscriber.function -        invoice.subscriber.street -        invoice.subscriber.country -        invoice.subscriber.postalcode -        invoice.subscriber.city -        invoice.period_start -        invoice.period_end -        invoice.currency.name -        invoice.amount -        invoice.subscriber.initial -        >>> for n in c.findall ('.//' + OOo_Tag ('text', 'sequence-decl', m)) : -        ...     name = n.get (OOo_Tag ('text', 'name', m)) -        ...     print name -        Illustration -        Table -        Text -        Drawing -        >>> for n in c.findall ('.//' + OOo_Tag ('text', 'p', m)) : -        ...     name = n.get (OOo_Tag ('text', 'style-name', m)) -        ...     if not name or name.startswith ('Concat') : -        ...         print ':'.join(split_tag (n.tag)), ">%s<" % name -        text:p >None< -        text:p >None< -        text:p >Concat_P1< -        text:p >Concat_P1< -        text:p >Concat_P2< -        text:p >Concat_P2< -        text:p >Concat_P2< -        text:p >Concat_P2< -        text:p >Concat_P2< -        text:p >Concat_P2< -        text:p >Concat_P2< -        text:p >Concat_P2< -        text:p >Concat_P2< -        text:p >Concat_P2< -        text:p >Concat_Frame_20_contents< -        text:p >None< -        text:p >None< -        text:p >None< -        >>> for n in c.findall ('.//' + OOo_Tag ('draw', 'frame', m)) : -        ...     attrs = 'name', 'style-name', 'z-index' -        ...     attrs = [n.get (OOo_Tag ('draw', i, m)) for i in attrs] -        ...     attrs.append (n.get (OOo_Tag ('text', 'anchor-page-number', m))) -        ...     print attrs -        ['Frame1', 'fr1', '0', '1'] -        ['Frame2', 'fr1', '3', '2'] -        ['Frame3', 'Concat_fr1', '6', '3'] -        ['Frame4', 'Concat_fr2', '7', '3'] -        ['Frame5', 'fr3', '8', '3'] -        ['Frame6', 'Concat_fr1', '9', '3'] -        ['Frame7', 'fr4', '10', '3'] -        ['Frame8', 'fr4', '11', '3'] -        ['Frame9', 'fr4', '12', '3'] -        ['Frame10', 'fr4', '13', '3'] -        ['Frame11', 'fr4', '14', '3'] -        ['Frame12', 'fr4', '15', '3'] -        ['Frame13', 'fr5', '16', '3'] -        ['Frame14', 'fr4', '18', '3'] -        ['Frame15', 'fr4', '19', '3'] -        ['Frame16', 'fr4', '20', '3'] -        ['Frame17', 'fr6', '17', '3'] -        ['Frame18', 'fr4', '23', '3'] -        ['Frame19', 'fr2', '2', None] -        ['Frame20', 'fr2', '5', None] -        >>> for n in c.findall ('.//' + OOo_Tag ('text', 'section', m)) : -        ...     attrs = 'name', 'style-name' -        ...     attrs = [n.get (OOo_Tag ('text', i, m)) for i in attrs] -        ...     print attrs -        ['Section1', 'Sect1'] -        ['Section2', 'Sect1'] -        ['Section3', 'Sect1'] -        ['Section4', 'Sect1'] -        ['Section5', 'Sect1'] -        ['Section6', 'Sect1'] -        ['Section7', 'Sect1'] -        ['Section8', 'Sect1'] -        ['Section9', 'Sect1'] -        ['Section10', 'Sect1'] -        ['Section11', 'Sect1'] -        ['Section12', 'Sect1'] -        ['Section13', 'Sect1'] -        ['Section14', 'Sect1'] -        ['Section15', 'Sect1'] -        ['Section16', 'Sect1'] -        ['Section17', 'Sect1'] -        ['Section18', 'Sect1'] -        ['Section19', 'Sect1'] -        ['Section20', 'Sect1'] -        ['Section21', 'Sect1'] -        ['Section22', 'Sect1'] -        ['Section23', 'Sect1'] -        ['Section24', 'Sect1'] -        ['Section25', 'Sect1'] -        ['Section26', 'Sect1'] -        ['Section27', 'Sect1'] -        ['Section28', 'Sect1'] -        ['Section29', 'Sect1'] -        ['Section30', 'Sect1'] -        ['Section31', 'Sect1'] -        ['Section32', 'Sect1'] -        ['Section33', 'Sect1'] -        >>> for n in c.findall ('.//' + OOo_Tag ('draw', 'rect', m)) : -        ...     attrs = 'style-name', 'text-style-name', 'z-index' -        ...     attrs = [n.get (OOo_Tag ('draw', i, m)) for i in attrs] -        ...     attrs.append (n.get (OOo_Tag ('text', 'anchor-page-number', m))) -        ...     print attrs -        ['gr1', 'P1', '1', '1'] -        ['gr1', 'P1', '4', '2'] -        >>> for n in c.findall ('.//' + OOo_Tag ('draw', 'line', m)) : -        ...     attrs = 'style-name', 'text-style-name', 'z-index' -        ...     attrs = [n.get (OOo_Tag ('draw', i, m)) for i in attrs] -        ...     print attrs -        ['Concat_gr1', 'P1', '24'] -        ['Concat_gr1', 'P1', '22'] -        ['Concat_gr1', 'P1', '21'] -        >>> for n in s.findall ('.//' + OOo_Tag ('style', 'style', m)) : -        ...     if n.get (OOo_Tag ('style', 'name', m)).startswith ('Co') : -        ...         attrs = 'name', 'display-name', 'class', 'family' -        ...         attrs = [n.get (OOo_Tag ('style', i, m)) for i in attrs] -        ...         print attrs -        ...         props = n.find ('./' + OOo_Tag ('style', 'properties', m)) -        ...         if props is not None and len (props) : -        ...             props [0].tag -        ['Concat_Standard', None, 'text', 'paragraph'] -        ['Concat_Text_20_body', 'Concat Text body', 'text', 'paragraph'] -        ['Concat_List', None, 'list', 'paragraph'] -        ['Concat_Caption', None, 'extra', 'paragraph'] -        ['Concat_Frame_20_contents', 'Concat Frame contents', 'extra', 'paragraph'] -        ['Concat_Index', None, 'index', 'paragraph'] -        >>> for n in c.findall ('.//*') : -        ...     zidx = n.get (OOo_Tag ('draw', 'z-index', m)) -        ...     if zidx : -        ...         print ':'.join(split_tag (n.tag)), zidx -        draw:frame 0 -        draw:rect 1 -        draw:frame 3 -        draw:rect 4 -        draw:frame 6 -        draw:frame 7 -        draw:frame 8 -        draw:frame 9 -        draw:frame 10 -        draw:frame 11 -        draw:frame 12 -        draw:frame 13 -        draw:frame 14 -        draw:frame 15 -        draw:frame 16 -        draw:frame 18 -        draw:frame 19 -        draw:frame 20 -        draw:frame 17 -        draw:frame 23 -        draw:line 24 -        draw:frame 2 -        draw:frame 5 -        draw:line 22 -        draw:line 21 -        >>> from os import system -        >>> system ('python bin/ooo_fieldreplace -i testfiles/test.odt ' -        ...         '-o testout.odt ' -        ...         'salutation=Frau firstname=Erika lastname=Musterfrau ' -        ...         'country=D postalcode=00815 city=Niemandsdorf ' -        ...         'street="Beispielstrasse 42"') -        0 -        >>> o = OOoPy (infile = 'testout.odt') -        >>> c = o.read ('content.xml') -        >>> m = o.mimetype -        >>> body = c.find (OOo_Tag ('office', 'body', mimetype = m)) -        >>> vset = './/' + OOo_Tag ('text', 'variable-set', mimetype = m) -        >>> for node in body.findall (vset) : -        ...     name = node.get (OOo_Tag ('text', 'name', m)) -        ...     print name, ':', node.text -        salutation : Frau -        firstname : Erika -        lastname : Musterfrau -        street : Beispielstrasse 42 -        country : D -        postalcode : 00815 -        city : Niemandsdorf -        salutation : Frau -        firstname : Erika -        lastname : Musterfrau -        street : Beispielstrasse 42 -        country : D -        postalcode : 00815 -        city : Niemandsdorf -        >>> o.close () -        >>> system ("bin/ooo_mailmerge -o testout.odt -d'|' " -        ...         "testfiles/carta.odt testfiles/x.csv") -        0 -        >>> o = OOoPy (infile = 'testout.odt') -        >>> m = o.mimetype -        >>> c = o.read ('content.xml') -        >>> body = c.find (OOo_Tag ('office', 'body', mimetype = m)) -        >>> vset = './/' + OOo_Tag ('text', 'variable-set', mimetype = m) -        >>> for node in body.findall (vset) : -        ...     name = node.get (OOo_Tag ('text', 'name', m)) -        ...     print name, ':', node.text -        Spett : Spettabile -        contraente : First person -        indirizzo : street? 1 -        Spett : Egregio -        contraente : Second Person -        indirizzo : street? 2 -        tipo : racc. A.C. -        luogo : Varese -        oggetto : Saluti -        tipo : Raccomandata -        luogo : Gavirate -        oggetto : Ossequi -        >>> o.close () -        >>> infile = 'testfiles/testenum.odt' -        >>> o   = OOoPy (infile = infile, outfile = 'xyzzy.odt') -        >>> t   = Transformer ( -        ...       o.mimetype -        ...     , get_meta (o.mimetype) -        ...     , Transforms.Addpagebreak_Style () -        ...     , Transforms.Mailmerge -        ...       ( iterator =  -        ...         ( dict (firstname = 'Erika', lastname = 'Nobody') -        ...         , dict (firstname = 'Eric',  lastname = 'Wizard') -        ...         , cb -        ...         ) -        ...       ) -        ...     , renumber_all (o.mimetype) -        ...     , set_meta (o.mimetype) -        ...     , Transforms.Fix_OOo_Tag () -        ...     ) -        >>> t.transform (o) -        >>> o.close () -        >>> o = OOoPy (infile = 'xyzzy.odt') -        >>> m = o.mimetype -        >>> c = o.read ('content.xml') -        >>> body = c.find (OOo_Tag ('office', 'body', mimetype = m)) -        >>> textlist = './/' + OOo_Tag ('text', 'list', m) -        >>> for node in body.findall (textlist) : -        ...     id = node.get (OOo_Tag ('xml', 'id', m)) -        ...     print 'xml:id', ':', id -        xml:id : list1 -        xml:id : list2 -        xml:id : list3 -        >>> o = OOoPy (infile = 'testfiles/page1.odt', outfile = 'xyzzy.odt') -        >>> m = o.mimetype -        >>> t = Transformer ( -        ...       o.mimetype -        ...     , get_meta (o.mimetype) -        ...     , Transforms.Concatenate ('testfiles/page2.odt') -        ...     , renumber_all (o.mimetype) -        ...     , set_meta (o.mimetype) -        ...     , Transforms.Fix_OOo_Tag () -        ...     , Transforms.Manifest_Append () -        ...     ) -        >>> t.transform (o) -        >>> o.close () -        >>> o = OOoPy (infile = 'xyzzy.odt') -        >>> c = o.read ('META-INF/manifest.xml') -        >>> for node in c.getroot () : -        ...     fe = node.get (OOo_Tag ('manifest', 'full-path', m)) -        ...     print fe -        / -        Pictures/10000000000000C80000007941B1A419.jpg -        Pictures/10000000000000DC000000B02E191635.jpg -        Pictures/10000000000000DC000000A337377AAA.jpg -        meta.xml -        settings.xml -        content.xml -        Thumbnails/thumbnail.png -        layout-cache -        manifest.rdf -        Configurations2/accelerator/current.xml -        Configurations2/ -        styles.xml -        >>> for f in o.izip.infolist () : -        ...     print f.filename -        mimetype -        settings.xml -        META-INF/manifest.xml -        content.xml -        meta.xml -        styles.xml -        Pictures/10000000000000C80000007941B1A419.jpg -        Pictures/10000000000000DC000000B02E191635.jpg -        Pictures/10000000000000DC000000A337377AAA.jpg -        Thumbnails/thumbnail.png -        layout-cache -        manifest.rdf -        Configurations2/images/Bitmaps/ -        Configurations2/accelerator/current.xml -        >>> sio = StringIO () -        >>> o = OOoPy (infile = 'testfiles/tbl_first.odt', outfile = sio) -        >>> m = o.mimetype -        >>> t = Transformer ( -        ...       o.mimetype -        ...     , get_meta (o.mimetype) -        ...     , Transforms.Concatenate ('testfiles/tbl_second.odt') -        ...     , renumber_all (o.mimetype) -        ...     , set_meta (o.mimetype) -        ...     , Transforms.Fix_OOo_Tag () -        ...     , Transforms.Manifest_Append () -        ...     ) -        >>> t.transform (o) -        >>> o.close () -        >>> o = OOoPy (infile = sio) -        >>> c = o.read ('content.xml') -        >>> body = c.find (OOo_Tag ('office', 'body', mimetype = m)) -        >>> tbls = './/' + OOo_Tag ('table', 'table', mimetype = m) -        >>> for table in body.findall (tbls) : -        ...     name = table.get (OOo_Tag ('table', 'style-name', mimetype = m)) -        ...     if name : -        ...         print name -        ...     for t in table.findall ('.//') : -        ...         name = t.get (OOo_Tag ('table', 'style-name', mimetype = m)) -        ...         if name : -        ...             print name -        Tabella1 -        Tabella1.A -        Tabella1.A1 -        Tabella1.B1 -        Tabella1.A2 -        Tabella1.B2 -        Tabella1 -        Tabella1.A -        Tabella1.A1 -        Tabella1.B1 -        Tabella1.A2 -        Tabella1.B2 +    Class for applying a set of transforms to a given ooopy object. +    The transforms are applied to the specified file in priority +    order. When applying transforms we have a mechanism for +    communication of transforms. We give the transformer to the +    individual transforms as a parameter. The transforms may use the +    transformer like a dictionary for storing values and retrieving +    values left by previous transforms. +    As a naming convention each transform should use its class name +    as a prefix for storing values in the dictionary.      """ -    def __init__ (self, mimetype, *tf) : +    def __init__(self, mimetype, *tf):          assert (mimetype in mimetypes) -        self.mimetype     = mimetype -        self.transforms   = {} -        for t in tf : -            self.insert (t) -        self.dictionary   = {} -        self.has_key      = self.dictionary.has_key +        self.mimetype = mimetype +        self.transforms = {} +        for t in tf: +            self.insert(t) +        self.dictionary = {}          self.__contains__ = self.has_key          # 2-tuples of filename, content -        self.appendfiles  = [] -    # end def __init__ +        self.appendfiles = [] -    def insert (self, transform) : +    def has_key(self, key): +        return key in self.dictionary.keys() + +    def insert(self, transform):          """Insert a new transform"""          t = transform -        if t.prio not in self.transforms : -            self.transforms [t.prio] = [] -        self.transforms [t.prio].append (t) -        t.register (self) -    # end def append +        if t.prio not in self.transforms: +            self.transforms[t.prio] = [] +        self.transforms[t.prio].append(t) +        t.register(self) -    def transform (self, ooopy) : +    def transform(self, ooopy):          """ -            Apply all the transforms in priority order. -            Priority order is global over all transforms. +        Apply all the transforms in priority order. +        Priority order is global over all transforms.          """          self.trees = {} -        for f in files : -            self.trees [f] = ooopy.read (f) -        #self.dictionary = {} # clear dict when transforming another ooopy -        prios = self.transforms.keys () -        prios.sort () -        for p in prios : -            for t in self.transforms [p] : -                t.apply_all (self.trees) -        for e in self.trees.itervalues () : -            e.write () -        for fname, fcontent in self.appendfiles : -            e.ooopy.append_file (fname, fcontent) -    # end def transform - -    def __getitem__ (self, key) : -        return self.dictionary [key] -    # end def __getitem__ - -    def __setitem__ (self, key, value) : -        self.dictionary [key] = value -    # end def __setitem__ -# end class Transformer +        for f in files: +            self.trees[f] = ooopy.read(f) +        # self.dictionary = {} # clear dict when transforming another ooopy +        prios = list(self.transforms.keys()) +        prios.sort() +        for p in prios: +            for t in self.transforms[p]: +                t.apply_all(self.trees) +        for e in self.trees.values(): +            e.write() +        for fname, fcontent in self.appendfiles: +            e.ooopy.append_file(fname, fcontent) + +    def __getitem__(self, key): +        return self.dictionary[key] + +    def __setitem__(self, key, value): +        self.dictionary[key] = value diff --git a/ooopy/Transforms.py b/ooopy/Transforms.py index 50a6c0db8..37e7179e8 100644 --- a/ooopy/Transforms.py +++ b/ooopy/Transforms.py @@ -1,5 +1,5 @@ -#!/usr/bin/env python -# -*- coding: iso-8859-1 -*- +#!/usr/bin/env python3 +# -*- coding: utf-8 -*-  # Copyright (C) 2005-14 Dr. Ralf Schlatterbeck Open Source Consulting.  # Reichergasse 131, A-3411 Weidling.  # Web: http://www.runtux.com Email: office@runtux.com @@ -21,114 +21,102 @@  # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.  # **************************************************************************** -from __future__              import absolute_import -  import time  import re -try : -    from xml.etree.ElementTree   import dump, SubElement, Element, tostring -except ImportError : -    from elementtree.ElementTree import dump, SubElement, Element, tostring -from copy                    import deepcopy -from ooopy.OOoPy             import OOoPy, autosuper -from ooopy.Transformer       import files, split_tag, OOo_Tag, Transform -from ooopy.Transformer       import mimetypes, namespace_by_name -from ooopy.Version           import VERSION +from xml.etree.ElementTree import SubElement, Element  # , dump , tostring +from copy import deepcopy +from ooopy.OOoPy import OOoPy, autosuper +from ooopy.Transformer import split_tag, OOo_Tag, Transform , mimetypes, \ +    namespace_by_name # , # files +# from ooopy.Version           import VERSION  # counts in meta.xml -meta_counts = \ -    ( 'character-count', 'image-count', 'object-count', 'page-count' -    , 'paragraph-count', 'table-count', 'word-count' -    ) - -class Access_Attribute (autosuper) : -    """ For performance reasons we do not specify a separate transform -        for each attribute-read or -change operation. Instead we define -        all the attribute accesses we want to perform as objects that -        follow the attribute access api and apply them all using an -        Attribute_Access in one go. +meta_counts = ('character-count', 'image-count', 'object-count', 'page-count', +               'paragraph-count', 'table-count', 'word-count') + + +class Access_Attribute(autosuper): +    """ +    For performance reasons we do not specify a separate transform +    for each attribute-read or -change operation. Instead we define +    all the attribute accesses we want to perform as objects that +    follow the attribute access api and apply them all using an +    Attribute_Access in one go.      """ -    def __init__ (self, key = None, prefix = None, ** kw) : -        self.__super.__init__ (key = key, prefix = prefix, **kw) +    def __init__(self, key=None, prefix=None, **kw): +        self.__super.__init__(key=key, prefix=prefix, **kw)          self.key = key -        if key : -            if not prefix : -                prefix   = self.__class__.__name__ -            self.key = ':'.join ((prefix, key)) -    # end def __init__ +        if key: +            if not prefix: +                prefix = self.__class__.__name__ +            self.key = ':'.join((prefix, key)) -    def register (self, transformer) : +    def register (self, transformer):          self.transformer = transformer -    # end def register -    def use_value (self, oldval = None) : -        """ Can change the given value by returning the new value. If -            returning None or oldval the attribute stays unchanged. +    def use_value (self, oldval=None):          """ -        raise NotImplementedError, "use_value must be defined in derived class" -    # end def use_value +        Can change the given value by returning the new value. If +        returning None or oldval the attribute stays unchanged. +        """ +        raise NotImplementedError("use_value must be defined in derived class") -# end class Access_Attribute -class Get_Attribute (Access_Attribute) : -    """ An example of not changing an attribute but only storing the -        value in the transformer +class Get_Attribute(Access_Attribute): +    """ +    An example of not changing an attribute but only storing the +    value in the transformer      """ -    def __init__ (self, tag, attr, key, transform = None, ** kw) : -        self.__super.__init__ (key = key, **kw) -        self.tag        = tag -        self.attribute  = attr -        self.transform  = transform -    # end def __init__ +    def __init__(self, tag, attr, key, transform=None, ** kw): +        self.__super.__init__(key=key, **kw) +        self.tag = tag +        self.attribute = attr +        self.transform = transform -    def use_value (self, oldval = None) : -        self.transformer [self.key] = oldval +    def use_value(self, oldval=None): +        self.transformer[self.key] = oldval          return None -    # end def use_value -# end def Get_Attribute -class Get_Max (Access_Attribute) : +class Get_Max(Access_Attribute):      """ Get the maximum value of an attribute """ -    def __init__ (self, tag, attr, key, transform = None, ** kw) : -        self.__super.__init__ (key = key, **kw) -        self.tag        = tag -        self.attribute  = attr -        self.transform  = transform -    # end def __init__ - -    def register (self, transformer) : -        self.__super.register (transformer) -        self.transformer [self.key] = -1 -    # end def register - -    def use_value (self, oldval = None) : -        if  self.transformer [self.key] < oldval : -            self.transformer [self.key] = oldval +    def __init__(self, tag, attr, key, transform=None, ** kw): +        self.__super.__init__(key=key, **kw) +        self.tag = tag +        self.attribute = attr +        self.transform = transform + +    def register(self, transformer): +        self.__super.register(transformer) +        self.transformer[self.key] = -1 + +    def use_value(self, oldval=None): +        if oldval: +            oldval = int(oldval) +        if (self.transformer[self.key] or 0) < (oldval or 0): +            self.transformer[self.key] = oldval          return None -    # end def use_value - -# end def Get_Max -class Renumber (Access_Attribute) : -    """ Specifies a renumbering transform. OOo has a 'name' attribute -        for several different tags, e.g., tables, frames, sections etc. -        These names must be unique in the whole document. OOo itself -        solves this by appending a unique number to a basename for each -        element, e.g., sections are named 'Section1', 'Section2', ... -        Renumber transforms can be applied to correct the numbering -        after operations that destroy the unique numbering, e.g., after -        a mailmerge where the same document is repeatedly appended. -        The force parameter specifies if the new renumbered name should -        be inserted even if the attribute in question does not exist. +class Renumber (Access_Attribute): +    """ +    Specifies a renumbering transform. OOo has a 'name' attribute +    for several different tags, e.g., tables, frames, sections etc. +    These names must be unique in the whole document. OOo itself +    solves this by appending a unique number to a basename for each +    element, e.g., sections are named 'Section1', 'Section2', ... +    Renumber transforms can be applied to correct the numbering +    after operations that destroy the unique numbering, e.g., after +    a mailmerge where the same document is repeatedly appended. + +    The force parameter specifies if the new renumbered name should +    be inserted even if the attribute in question does not exist.      """ -    def __init__ \ -        (self, tag, name = None, attr = None, start = 1, force = False) : +    def __init__(self, tag, name=None, attr=None, start=1, force=False):          self.__super.__init__ ()          tag_ns, tag_name = split_tag (tag)          self.tag_ns      = tag_ns @@ -178,29 +166,26 @@ class Set_Attribute (Access_Attribute) :          self.transform  = transform          self.value      = value          self.oldvalue   = oldvalue -    # end def __init__ -    def use_value (self, oldval) : -        if oldval is None : +    def use_value(self, oldval): +        if oldval is None:              return None -        if self.oldvalue and oldval != self.oldvalue : +        if self.oldvalue and oldval != self.oldvalue:              return None -        if self.key and self.transformer.has_key (self.key) : -            return str (self.transformer [self.key]) +        if self.key and self.transformer.has_key(self.key): +            return str(self.transformer[self.key])          return self.value -    # end def use_value -# end class Set_Attribute  def set_attributes_from_dict (tag, attr, d) :      """ Convenience function: iterate over a dict and return a list of          Set_Attribute objects specifying replacement of attributes in          the dictionary      """ -    return [Set_Attribute (tag, attr, oldvalue = k, value = v) -            for k,v in d.iteritems () -           ] -# end def set_attributes_from_dict +    return [ +        Set_Attribute(tag, attr, oldvalue=k, value=v) +        for k, v in d.items() +    ]  class Reanchor (Access_Attribute) :      """ @@ -327,41 +312,37 @@ class Manifest_Append (Transform) :  # meta.xml transforms  # +  class Editinfo (Transform) :      """ -        This is an example of modifying OOo meta info (edit information, -        author, etc). We set some of the items (program that generated -        the OOo file, modification time, number of edit cyles and overall -        edit duration).  It's easy to subclass this transform and replace -        the "replace" variable (pun intended) in the derived class. +    This is an example of modifying OOo meta info (edit information, +    author, etc). We set some of the items (program that generated +    the OOo file, modification time, number of edit cyles and overall +    edit duration).  It's easy to subclass this transform and replace +    the "replace" variable (pun intended) in the derived class.      """      filename = 'meta.xml' -    prio     = 20 -    repl     = \ -        { ('meta', 'generator')        : 'OOoPy field replacement' -        , ('dc',   'date')             : time.strftime ('%Y-%m-%dT%H:%M:%S') -        , ('meta', 'editing-cycles')   : '0' -        , ('meta', 'editing-duration') : 'PT0M0S' -        } -    replace  = {} +    prio = 20 +    repl = { +        ('meta', 'generator'): 'OOoPy field replacement', +        ('dc', 'date'): time.strftime ('%Y-%m-%dT%H:%M:%S'), +        ('meta', 'editing-cycles'): '0', +        ('meta', 'editing-duration'): 'PT0M0S' +    } +    replace = {}      # iterate over all mimetypes, so this works for all known mimetypes      # of OOo documents. -    for m in mimetypes : -        for params, value in repl.iteritems () : -            replace [OOo_Tag (mimetype = m, *params)] = value +    for m in mimetypes: +        for params, value in repl.items(): +            replace [OOo_Tag (mimetype=m, *params)] = value -    def apply (self, root) : -        for node in root.findall (self.oootag ('office', 'meta') + '/*') : -            if self.replace.has_key (node.tag) : -                node.text = self.replace [node.tag] -    # end def apply -# end class Editinfo +    def apply(self, root) : +        for node in root.findall (self.oootag ('office', 'meta') + '/*'): +            if node.tag in self.replace.keys(): +                node.text = self.replace[node.tag] -# -# settings.xml transforms -# -class Autoupdate (Transform) : +class Autoupdate (Transform):      """          This is an example of modifying OOo settings. We set some of the          AutoUpdate configuration items in OOo to true. We also specify @@ -718,28 +699,31 @@ class Mailmerge (_Body_Concat) :      # end def apply  # end class Mailmerge +  def tree_serialise (element, prefix = '', mimetype = mimetypes [1]) : -    """ Serialise a style-element of an OOo document (e.g., a -        style:font-decl, style:default-style, etc declaration). -        We remove the name of the style and return something that is a -        representation of the style element which can be used as a -        dictionary key. -        The serialisation format is a tuple containing the tag as the -        first item, the attributes (as key,value pairs returned by -        items()) as the second item and the following items are -        serialisations of children.      """ -    attr = dict (element.attrib) -    stylename = OOo_Tag ('style', 'name', mimetype) -    if stylename in attr : del attr [stylename] -    attr = attr.items () -    attr.sort () -    attr = tuple (attr) +    Serialise a style-element of an OOo document (e.g., a +    style:font-decl, style:default-style, etc declaration). +    We remove the name of the style and return something that is a +    representation of the style element which can be used as a +    dictionary key. +    The serialisation format is a tuple containing the tag as the +    first item, the attributes (as key,value pairs returned by +    items()) as the second item and the following items are +    serialisations of children. +    """ +    attr = dict(element.attrib) +    stylename = OOo_Tag('style', 'name', mimetype) +    if stylename in attr: +        del attr[stylename] +    attr = list(attr.items()) +    attr.sort() +    attr = tuple(attr)      serial = [prefix + element.tag, attr] -    for e in element : -        serial.append (tree_serialise (e, prefix, mimetype)) -    return tuple (serial) -# end def tree_serialise +    for e in element: +        serial.append(tree_serialise (e, prefix, mimetype)) +    return tuple(serial) +  class Concatenate (_Body_Concat) :      """ @@ -794,31 +778,31 @@ class Concatenate (_Body_Concat) :              assert (self.docs [-1].mimetype == self.docs [0].mimetype)      # end def __init__ -    def apply_all (self, trees) : -        assert (self.docs [0].mimetype == self.transformer.mimetype) +    def apply_all(self, trees): +        assert (self.docs[0].mimetype == self.transformer.mimetype)          self.serialised = {}          self.stylenames = {} -        self.namemaps   = [{}] +        self.namemaps = [{}]          self.tab_depend = {} -        for s in self.ref_attrs.itervalues () : -            self.namemaps [0][s] = {} +        for s in self.ref_attrs.values(): +            self.namemaps[0][s] = {}          self.body_decls = {} -        for s in self.body_decl_sections : -            self.body_decls [s] = {} -        self.trees      = {} -        for f in self.oofiles : -            self.trees [f] = [trees [f].getroot ()] -        self.sections   = {} +        for s in self.body_decl_sections: +            self.body_decls[s] = {} +        self.trees = {} +        for f in self.oofiles: +            self.trees[f] = [trees[f].getroot()] +        self.sections = {}          for f in self.stylefiles : -            self.sections [f] = {} +            self.sections[f] = {}              for node in self.trees [f][0] :                  self.sections [f][node.tag] = node -        for d in self.docs : -            self.namemaps.append ({}) -            for s in self.ref_attrs.itervalues () : -                self.namemaps [-1][s] = {} -            for f in self.oofiles : -                self.trees [f].append (d.read (f).getroot ()) +        for d in self.docs: +            self.namemaps.append({}) +            for s in self.ref_attrs.values(): +                self.namemaps[-1][s] = {} +            for f in self.oofiles: +                self.trees[f].append(d.read(f).getroot())          # append a pagebreak style, will be optimized away if duplicate          pbs = Addpagebreak_Style (transformer = self.transformer)          pbs.apply (self.trees ['content.xml'][0]) @@ -879,17 +863,12 @@ class Concatenate (_Body_Concat) :                              if max < pos :                                  max = pos                          self.insert_tabs (sub, max) -    # end def apply_tab_correction - -    def _attr_rename (self, idx) : -        r = sum \ -            ( [ set_attributes_from_dict (None, k, self.namemaps [idx][v]) -                for k,v in self.ref_attrs.iteritems () -              ] -            , [] -            ) -        return Attribute_Access (r, transformer = self.transformer) -    # end def _attr_rename + +    def _attr_rename(self, idx): +        r = sum( +            [set_attributes_from_dict(None, k, self.namemaps [idx][v]) +             for k, v in self.ref_attrs.items()], []) +        return Attribute_Access(r, transformer=self.transformer)      def body_concat (self) :          count = {} @@ -974,30 +953,28 @@ class Concatenate (_Body_Concat) :                      )      # end def insert_tabs -    def merge_defaultstyle (self, default_style, node) : +    def merge_defaultstyle(self, default_style, node):          assert default_style is not None          assert node is not None          proppath = './' + self.properties_tag          defprops = default_style.find (proppath) -        props    = node.find          (proppath) -        sn       = self.oootag ('style', 'name') -        if props is None : -            props = Element (self.properties_tag) -        for k, v in defprops.attrib.iteritems () : -            if self.default_properties.get (k) != v and not props.get (k) : -                if k == self.oootag ('style', 'tab-stop-distance') : +        props = node.find(proppath) +        sn = self.oootag('style', 'name') +        if props is None: +            props = Element(self.properties_tag) +        for k, v in defprops.attrib.items(): +            if self.default_properties.get(k) != v and not props.get(k): +                if k == self.oootag('style', 'tab-stop-distance'):                      self.tab_correct = v -                    self.tab_depend  = {node.get (sn) : 1} -                    stps = SubElement \ -                        (props, self.oootag ('style', 'tab-stops')) -                    self.insert_tabs (stps) +                    self.tab_depend = {node.get(sn): 1} +                    stps = SubElement(props, self.oootag('style', 'tab-stops')) +                    self.insert_tabs(stps)                  else : -                    props.set (k,v) -        if len (props) or props.attrib : -            node.append (props) -    # end def merge_defaultstyle +                    props.set(k, v) +        if len(props) or props.attrib: +            node.append(props) -    def _newname (self, key, oldname) : +    def _newname(self, key, oldname):          stylenum = 0          if (key, oldname) not in self.stylenames :              self.stylenames [(key, oldname)] = 1 @@ -1212,17 +1189,18 @@ def renumber_all (mimetype) :  # the info retrieved from the OOo document: We use the attribute name in  # the meta-information to store (and later retrieve) the information. -def get_meta (mimetype) : -    """ Factory function for Attribute_Access to get all interesting -        meta-data + +def get_meta(mimetype): +    """ +    Factory function for Attribute_Access to get all interesting meta-data      """      get_attr = []      for attr in meta_counts : -        a = OOo_Tag ('meta', attr, mimetype) -        t = OOo_Tag ('meta', 'document-statistic', mimetype) +        a = OOo_Tag('meta', attr, mimetype) +        t = OOo_Tag('meta', 'document-statistic', mimetype)          get_attr.append (Get_Attribute (t, a, attr)) -    return Attribute_Access (get_attr, prio =  20, filename = 'meta.xml') -# end def get_meta +    return Attribute_Access (get_attr, prio= 20, filename='meta.xml') +  def set_meta (mimetype) :      """ Factory function for Attribute_Access to set all interesting diff --git a/ooopy/Version.py b/ooopy/Version.py index 495ca242a..3c6f39aef 100644 --- a/ooopy/Version.py +++ b/ooopy/Version.py @@ -1 +1 @@ -VERSION="1.11" +VERSION = "1.11-python3" | 
