summaryrefslogtreecommitdiff
path: root/ooopy
diff options
context:
space:
mode:
authorÉtienne Loks <etienne.loks@iggdrasil.net>2019-05-01 13:54:20 +0200
committerÉtienne Loks <etienne.loks@iggdrasil.net>2019-06-17 13:21:28 +0200
commitedbbd133068b0d5d0d101a9b9eff1658fbcc0384 (patch)
tree3fac64755afa9cd223626d5dd432ab1bcec83513 /ooopy
parenta18087eff29bb316af47bb5fe53a59c43edc57f0 (diff)
downloadIshtar-edbbd133068b0d5d0d101a9b9eff1658fbcc0384.tar.bz2
Ishtar-edbbd133068b0d5d0d101a9b9eff1658fbcc0384.zip
Quick adaptation of ooopy for python3
Diffstat (limited to 'ooopy')
-rw-r--r--ooopy/OOoPy.py430
-rw-r--r--ooopy/Transformer.py1462
-rw-r--r--ooopy/Transforms.py366
-rw-r--r--ooopy/Version.py2
4 files changed, 487 insertions, 1773 deletions
diff --git a/ooopy/OOoPy.py b/ooopy/OOoPy.py
index 87e0b8110..aaa152606 100644
--- a/ooopy/OOoPy.py
+++ b/ooopy/OOoPy.py
@@ -1,5 +1,5 @@
-#!/usr/bin/env python
-# -*- coding: iso-8859-1 -*-
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
# Copyright (C) 2005-14 Dr. Ralf Schlatterbeck Open Source Consulting.
# Reichergasse 131, A-3411 Weidling.
# Web: http://www.runtux.com Email: office@runtux.com
@@ -21,297 +21,229 @@
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
# ****************************************************************************
-from __future__ import absolute_import
-from zipfile import ZipFile, ZIP_DEFLATED, ZipInfo
-try :
- from StringIO import StringIO
-except ImportError :
- from io import StringIO
-from datetime import datetime
-try :
- from xml.etree.ElementTree import ElementTree, fromstring, _namespace_map
-except ImportError :
- from elementtree.ElementTree import ElementTree, fromstring, _namespace_map
-from tempfile import mkstemp
-from ooopy.Version import VERSION
-import os
+from zipfile import ZipFile, ZIP_DEFLATED, ZipInfo
+from io import BytesIO
+from datetime import datetime
+from xml.etree.ElementTree import ElementTree, fromstring, _namespace_map
+
+
+class _autosuper (type):
+ def __init__(cls, name, bases, dict):
+ super(_autosuper, cls).__init__(name, bases, dict)
+ setattr(cls, "_%s__super" % name, super(cls))
+
+
+class autosuper(metaclass=_autosuper):
+ def __init__(self, *args, **kw):
+ self.__super.__init__()
+
+
+files = [
+ 'content.xml', 'styles.xml', 'meta.xml', 'settings.xml',
+ 'META-INF/manifest.xml'
+]
+
+mimetypes = ['application/vnd.sun.xml.writer',
+ 'application/vnd.oasis.opendocument.text']
+
+namespace_by_name = {
+ mimetypes [0]: {
+ 'chart': "http://openoffice.org/2000/chart",
+ 'config': "http://openoffice.org/2001/config",
+ 'dc': "http://purl.org/dc/elements/1.1/",
+ 'dr3d': "http://openoffice.org/2000/dr3d",
+ 'draw': "http://openoffice.org/2000/drawing",
+ 'fo': "http://www.w3.org/1999/XSL/Format",
+ 'form': "http://openoffice.org/2000/form",
+ 'math': "http://www.w3.org/1998/Math/MathML",
+ 'meta': "http://openoffice.org/2000/meta",
+ 'number': "http://openoffice.org/2000/datastyle",
+ 'office': "http://openoffice.org/2000/office",
+ 'script': "http://openoffice.org/2000/script",
+ 'style': "http://openoffice.org/2000/style",
+ 'svg': "http://www.w3.org/2000/svg",
+ 'table': "http://openoffice.org/2000/table",
+ 'text': "http://openoffice.org/2000/text",
+ 'xlink': "http://www.w3.org/1999/xlink",
+ 'manifest': "http://openoffice.org/2001/manifest"},
+ mimetypes[1]: {
+ 'chart': "urn:oasis:names:tc:opendocument:xmlns:chart:1.0",
+ 'config': "urn:oasis:names:tc:opendocument:xmlns:config:1.0",
+ 'dc': "http://purl.org/dc/elements/1.1/",
+ 'dr3d': "urn:oasis:names:tc:opendocument:xmlns:dr3d:1.0",
+ 'draw': "urn:oasis:names:tc:opendocument:xmlns:drawing:1.0",
+ 'fo': "urn:oasis:names:tc:opendocument:xmlns:" "xsl-fo-compatible:1.0",
+ 'form': "urn:oasis:names:tc:opendocument:xmlns:form:1.0",
+ 'math': "http://www.w3.org/1998/Math/MathML",
+ 'meta': "urn:oasis:names:tc:opendocument:xmlns:meta:1.0",
+ 'number': "urn:oasis:names:tc:opendocument:xmlns:datastyle:1.0",
+ 'office': "urn:oasis:names:tc:opendocument:xmlns:office:1.0",
+ 'officeooo': "http://openoffice.org/2009/office",
+ 'script': "urn:oasis:names:tc:opendocument:xmlns:script:1.0",
+ 'style': "urn:oasis:names:tc:opendocument:xmlns:style:1.0",
+ 'svg': "urn:oasis:names:tc:opendocument:xmlns:svg-compatible:1.0",
+ 'table': "urn:oasis:names:tc:opendocument:xmlns:table:1.0",
+ 'text': "urn:oasis:names:tc:opendocument:xmlns:text:1.0",
+ 'xlink': "http://www.w3.org/1999/xlink",
+ 'manifest': "urn:oasis:names:tc:opendocument:xmlns:manifest:1.0",
+ 'tableooo': "http://openoffice.org/2009/table",
+ 'transformation': "http://www.w3.org/2003/g/data-view#",
+ # OOo 1.X tags and some others:
+ 'ooo': "http://openoffice.org/2004/office",
+ 'ooow': "http://openoffice.org/2004/writer",
+ 'oooc': "http://openoffice.org/2004/calc",
+ 'o_dom': "http://www.w3.org/2001/xml-events",
+ 'o_xforms': "http://www.w3.org/2002/xforms",
+ 'xs': "http://www.w3.org/2001/XMLSchema",
+ 'xsi': "http://www.w3.org/2001/XMLSchema-instance",
+ # predefined xml namespace, see
+ # http://www.w3.org/TR/2006/REC-xml-names11-20060816/
+ # "It MAY, but need not, be declared, and MUST NOT be undeclared
+ # or bound to any other namespace name."
+ 'xml': "http://www.w3.org/XML/1998/namespace"
+ }
+}
+
+for mimetype in namespace_by_name.values():
+ for k, v in mimetype.items():
+ if v in _namespace_map:
+ assert _namespace_map[v] == k
+ _namespace_map[v] = k
-class _autosuper (type) :
- def __init__ (cls, name, bases, dict) :
- super (_autosuper, cls).__init__ (name, bases, dict)
- setattr (cls, "_%s__super" % name, super (cls))
- # end def __init__
-# end class _autosuper
-
-class autosuper (object) :
- __metaclass__ = _autosuper
- def __init__ (self, *args, **kw) :
- self.__super.__init__ ()
- # end def __init__
-# end class autosuper
-
-files = \
- [ 'content.xml'
- , 'styles.xml'
- , 'meta.xml'
- , 'settings.xml'
- , 'META-INF/manifest.xml'
- ]
-
-mimetypes = \
- [ 'application/vnd.sun.xml.writer'
- , 'application/vnd.oasis.opendocument.text'
- ]
-namespace_by_name = \
- { mimetypes [0] :
- { 'chart' : "http://openoffice.org/2000/chart"
- , 'config' : "http://openoffice.org/2001/config"
- , 'dc' : "http://purl.org/dc/elements/1.1/"
- , 'dr3d' : "http://openoffice.org/2000/dr3d"
- , 'draw' : "http://openoffice.org/2000/drawing"
- , 'fo' : "http://www.w3.org/1999/XSL/Format"
- , 'form' : "http://openoffice.org/2000/form"
- , 'math' : "http://www.w3.org/1998/Math/MathML"
- , 'meta' : "http://openoffice.org/2000/meta"
- , 'number' : "http://openoffice.org/2000/datastyle"
- , 'office' : "http://openoffice.org/2000/office"
- , 'script' : "http://openoffice.org/2000/script"
- , 'style' : "http://openoffice.org/2000/style"
- , 'svg' : "http://www.w3.org/2000/svg"
- , 'table' : "http://openoffice.org/2000/table"
- , 'text' : "http://openoffice.org/2000/text"
- , 'xlink' : "http://www.w3.org/1999/xlink"
- , 'manifest' : "http://openoffice.org/2001/manifest"
- }
- , mimetypes [1] :
- { 'chart' : "urn:oasis:names:tc:opendocument:xmlns:chart:1.0"
- , 'config' : "urn:oasis:names:tc:opendocument:xmlns:config:1.0"
- , 'dc' : "http://purl.org/dc/elements/1.1/"
- , 'dr3d' : "urn:oasis:names:tc:opendocument:xmlns:dr3d:1.0"
- , 'draw' : "urn:oasis:names:tc:opendocument:xmlns:drawing:1.0"
- , 'fo' : "urn:oasis:names:tc:opendocument:xmlns:"
- "xsl-fo-compatible:1.0"
- , 'form' : "urn:oasis:names:tc:opendocument:xmlns:form:1.0"
- , 'math' : "http://www.w3.org/1998/Math/MathML"
- , 'meta' : "urn:oasis:names:tc:opendocument:xmlns:meta:1.0"
- , 'number' : "urn:oasis:names:tc:opendocument:xmlns:datastyle:1.0"
- , 'office' : "urn:oasis:names:tc:opendocument:xmlns:office:1.0"
- , 'officeooo': "http://openoffice.org/2009/office"
- , 'script' : "urn:oasis:names:tc:opendocument:xmlns:script:1.0"
- , 'style' : "urn:oasis:names:tc:opendocument:xmlns:style:1.0"
- , 'svg' : "urn:oasis:names:tc:opendocument:xmlns:svg-compatible:1.0"
- , 'table' : "urn:oasis:names:tc:opendocument:xmlns:table:1.0"
- , 'text' : "urn:oasis:names:tc:opendocument:xmlns:text:1.0"
- , 'xlink' : "http://www.w3.org/1999/xlink"
- , 'manifest' : "urn:oasis:names:tc:opendocument:xmlns:manifest:1.0"
- , 'tableooo' : "http://openoffice.org/2009/table"
- , 'transformation' : "http://www.w3.org/2003/g/data-view#"
- # OOo 1.X tags and some others:
- , 'ooo' : "http://openoffice.org/2004/office"
- , 'ooow' : "http://openoffice.org/2004/writer"
- , 'oooc' : "http://openoffice.org/2004/calc"
- , 'o_dom' : "http://www.w3.org/2001/xml-events"
- , 'o_xforms' : "http://www.w3.org/2002/xforms"
- , 'xs' : "http://www.w3.org/2001/XMLSchema"
- , 'xsi' : "http://www.w3.org/2001/XMLSchema-instance"
- # predefined xml namespace, see
- # http://www.w3.org/TR/2006/REC-xml-names11-20060816/
- # "It MAY, but need not, be declared, and MUST NOT be undeclared
- # or bound to any other namespace name."
- , 'xml' : "http://www.w3.org/XML/1998/namespace"
- }
- }
-
-for mimetype in namespace_by_name.itervalues () :
- for k, v in mimetype.iteritems () :
- if v in _namespace_map :
- assert (_namespace_map [v] == k)
- _namespace_map [v] = k
class OOoElementTree (autosuper) :
"""
- An ElementTree for OOo document XML members. Behaves like the
- orginal ElementTree (in fact it delegates almost everything to a
- real instance of ElementTree) except for the write method, that
- writes itself back to the OOo XML file in the OOo zip archive it
- came from.
+ An ElementTree for OOo document XML members. Behaves like the
+ orginal ElementTree (in fact it delegates almost everything to a
+ real instance of ElementTree) except for the write method, that
+ writes itself back to the OOo XML file in the OOo zip archive it
+ came from.
"""
- def __init__ (self, ooopy, zname, root) :
+ def __init__(self, ooopy, zname, root):
self.ooopy = ooopy
self.zname = zname
- self.tree = ElementTree (root)
- # end def __init__
+ self.tree = ElementTree(root)
- def write (self) :
+ def write(self):
self.ooopy.write (self.zname, self.tree)
- # end def write
- def __getattr__ (self, name) :
+ def __getattr__(self, name) :
"""
- Delegate everything to our ElementTree attribute.
+ Delegate everything to our ElementTree attribute.
"""
if not name.startswith ('__') :
result = getattr (self.tree, name)
setattr (self, name, result)
return result
raise AttributeError (name)
- # end def __getattr__
-# end class OOoElementTree
-class OOoPy (autosuper) :
+class OOoPy(autosuper):
"""
- Wrapper for OpenOffice.org zip files (all OOo documents are
- really zip files internally).
-
- from ooopy.OOoPy import OOoPy
- >>> o = OOoPy (infile = 'testfiles/test.sxw', outfile = 'out.sxw')
- >>> o.mimetype
- 'application/vnd.sun.xml.writer'
- >>> for f in files :
- ... e = o.read (f)
- ... e.write ()
- ...
- >>> o.close ()
- >>> o = OOoPy (infile = 'testfiles/test.odt', outfile = 'out2.odt')
- >>> o.mimetype
- 'application/vnd.oasis.opendocument.text'
- >>> for f in files :
- ... e = o.read (f)
- ... e.write ()
- ...
- >>> o.append_file ('Pictures/empty', '')
- >>> o.close ()
- >>> o = OOoPy (infile = 'out2.odt')
- >>> for f in o.izip.infolist () :
- ... print f.filename, f.create_system, f.compress_type
- mimetype 0 8
- content.xml 0 8
- styles.xml 0 8
- meta.xml 0 8
- settings.xml 0 8
- META-INF/manifest.xml 0 8
- Pictures/empty 0 8
- Configurations2/statusbar/ 0 0
- Configurations2/accelerator/current.xml 0 8
- Configurations2/floater/ 0 0
- Configurations2/popupmenu/ 0 0
- Configurations2/progressbar/ 0 0
- Configurations2/menubar/ 0 0
- Configurations2/toolbar/ 0 0
- Configurations2/images/Bitmaps/ 0 0
- Thumbnails/thumbnail.png 0 8
+ Wrapper for OpenOffice.org zip files (all OOo documents are
+ really zip files internally).
"""
- def __init__ \
- ( self
- , infile = None
- , outfile = None
- , write_mode = 'w'
- , mimetype = None
- ) :
+ def __init__(self, infile=None, outfile=None, write_mode='w',
+ mimetype=None):
"""
- Open an OOo document, if no outfile is given, we open the
- file read-only. Otherwise the outfile has to be different
- from the infile -- the python ZipFile can't deal with
- read-write access. In case an outfile is given, we open it
- in "w" mode as a zip file, unless write_mode is specified
- (the only allowed case would be "a" for appending to an
- existing file, see pythons ZipFile documentation for
- details). If no infile is given, the user is responsible for
- providing all necessary files in the resulting output file.
-
- It seems that OOo needs to have the mimetype as the first
- archive member (at least with mimetype as the first member
- it works, the order may not be arbitrary) to recognize a zip
- archive as an OOo file. When copying from a given infile, we
- use the same order of elements in the resulting output. When
- creating new elements we make sure the mimetype is the first
- in the resulting archive.
-
- Note that both, infile and outfile can either be filenames
- or file-like objects (e.g. StringIO).
-
- The mimetype is automatically determined if an infile is
- given. If only writing is desired, the mimetype should be
- set.
+ Open an OOo document, if no outfile is given, we open the
+ file read-only. Otherwise the outfile has to be different
+ from the infile -- the python ZipFile can't deal with
+ read-write access. In case an outfile is given, we open it
+ in "w" mode as a zip file, unless write_mode is specified
+ (the only allowed case would be "a" for appending to an
+ existing file, see pythons ZipFile documentation for
+ details). If no infile is given, the user is responsible for
+ providing all necessary files in the resulting output file.
+
+ It seems that OOo needs to have the mimetype as the first
+ archive member (at least with mimetype as the first member
+ it works, the order may not be arbitrary) to recognize a zip
+ archive as an OOo file. When copying from a given infile, we
+ use the same order of elements in the resulting output. When
+ creating new elements we make sure the mimetype is the first
+ in the resulting archive.
+
+ Note that both, infile and outfile can either be filenames
+ or file-like objects (e.g. StringIO).
+
+ The mimetype is automatically determined if an infile is
+ given. If only writing is desired, the mimetype should be
+ set.
"""
assert (infile != outfile)
self.izip = self.ozip = None
- if infile :
- self.izip = ZipFile (infile, 'r', ZIP_DEFLATED)
+ if infile:
+ self.izip = ZipFile(infile, 'r', ZIP_DEFLATED)
if outfile :
- self.ozip = ZipFile (outfile, write_mode, ZIP_DEFLATED)
+ self.ozip = ZipFile(outfile, write_mode, ZIP_DEFLATED)
self.written = {}
- if mimetype :
+ if mimetype:
self.mimetype = mimetype
- elif self.izip :
- self.mimetype = self.izip.read ('mimetype')
- # end def __init__
+ elif self.izip:
+ self.mimetype = self.izip.read('mimetype')
+ if isinstance(self.mimetype, bytes):
+ self.mimetype = self.mimetype.decode()
- def read (self, zname) :
+ def read(self, zname):
"""
- return an OOoElementTree object for the given OOo document
- archive member name. Currently an OOo document contains the
- following XML files::
-
- * content.xml: the text of the OOo document
- * styles.xml: style definitions
- * meta.xml: meta-information (author, last changed, ...)
- * settings.xml: settings in OOo
- * META-INF/manifest.xml: contents of the archive
-
- There is an additional file "mimetype" that always contains
- the string "application/vnd.sun.xml.writer" for OOo 1.X files
- and the string "application/vnd.oasis.opendocument.text" for
- OOo 2.X files.
+ return an OOoElementTree object for the given OOo document
+ archive member name. Currently an OOo document contains the
+ following XML files::
+
+ * content.xml: the text of the OOo document
+ * styles.xml: style definitions
+ * meta.xml: meta-information (author, last changed, ...)
+ * settings.xml: settings in OOo
+ * META-INF/manifest.xml: contents of the archive
+
+ There is an additional file "mimetype" that always contains
+ the string "application/vnd.sun.xml.writer" for OOo 1.X files
+ and the string "application/vnd.oasis.opendocument.text" for
+ OOo 2.X files.
"""
- assert (self.izip)
+ assert self.izip
return OOoElementTree (self, zname, fromstring (self.izip.read (zname)))
- # end def read
- def _write (self, zname, str) :
- now = datetime.utcnow ().timetuple ()
- info = ZipInfo (zname, date_time = now)
- info.create_system = 0 # pretend to be fat
+ def _write(self, zname, str):
+ now = datetime.utcnow().timetuple()
+ info = ZipInfo(zname, date_time=now)
+ info.create_system = 0 # pretend to be fat
info.compress_type = ZIP_DEFLATED
- self.ozip.writestr (info, str)
+ self.ozip.writestr(info, str)
self.written [zname] = 1
- # end def _write
- def write (self, zname, etree) :
- assert (self.ozip)
+ def write(self, zname, etree):
+ assert self.ozip
# assure mimetype is the first member in new archive
- if 'mimetype' not in self.written :
- self._write ('mimetype', self.mimetype)
- str = StringIO ()
- etree.write (str)
- self._write (zname, str.getvalue ())
- # end def write
+ if 'mimetype' not in self.written:
+ self._write('mimetype', self.mimetype)
+ str = BytesIO()
+ etree.write(str)
+ self._write(zname, str.getvalue())
- def append_file (self, zname, str) :
- """ Official interface to _write: Append a file to the end of
- the archive.
+ def append_file (self, zname, str):
+ """
+ Official interface to _write: Append a file to the end of the archive.
"""
- if zname not in self.written :
+ if zname not in self.written:
self._write (zname, str)
- # end def append_file
- def close (self) :
+ def close(self):
"""
- Close the zip files. According to documentation of zipfile in
- the standard python lib, this has to be done to be sure
- everything is written. We copy over the not-yet written files
- from izip before closing ozip.
+ Close the zip files. According to documentation of zipfile in
+ the standard python lib, this has to be done to be sure
+ everything is written. We copy over the not-yet written files
+ from izip before closing ozip.
"""
- if self.izip and self.ozip :
- for f in self.izip.infolist () :
+ if self.izip and self.ozip:
+ for f in self.izip.infolist():
if f.filename not in self.written :
- self.ozip.writestr (f, self.izip.read (f.filename))
- for i in self.izip, self.ozip :
- if i : i.close ()
+ self.ozip.writestr(f, self.izip.read(f.filename))
+ for i in self.izip, self.ozip:
+ if i:
+ i.close()
self.izip = self.ozip = None
- # end def close
- __del__ = close # auto-close on deletion of object
-# end class OOoPy
+ __del__ = close # auto-close on deletion of object
diff --git a/ooopy/Transformer.py b/ooopy/Transformer.py
index dbbab125d..4e21bb331 100644
--- a/ooopy/Transformer.py
+++ b/ooopy/Transformer.py
@@ -1,5 +1,5 @@
-#!/usr/bin/env python
-# -*- coding: iso-8859-1 -*-
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
# Copyright (C) 2005-14 Dr. Ralf Schlatterbeck Open Source Consulting.
# Reichergasse 131, A-3411 Weidling.
# Web: http://www.runtux.com Email: office@runtux.com
@@ -21,1377 +21,181 @@
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
# ****************************************************************************
-from __future__ import absolute_import
+# import time
+# import re
+# from xml.etree.ElementTree import dump, SubElement, Element, tostring
+from xml.etree.ElementTree import _namespace_map
+# from copy import deepcopy
+from ooopy.OOoPy import autosuper # , OOoPy
+from ooopy.OOoPy import files, mimetypes, namespace_by_name
+# from ooopy.Version import VERSION
-import time
-import re
-try :
- from xml.etree.ElementTree import dump, SubElement, Element, tostring
- from xml.etree.ElementTree import _namespace_map
-except ImportError :
- from elementtree.ElementTree import dump, SubElement, Element, tostring
- from elementtree.ElementTree import _namespace_map
-from copy import deepcopy
-from ooopy.OOoPy import OOoPy, autosuper
-from ooopy.OOoPy import files, mimetypes, namespace_by_name
-from ooopy.Version import VERSION
-def OOo_Tag (namespace, name, mimetype) :
- """Return combined XML tag
-
- >>> OOo_Tag ('xml', 'id', mimetypes [1])
- '{http://www.w3.org/XML/1998/namespace}id'
- >>> OOo_Tag ('text', 'list', mimetypes [1])
- '{urn:oasis:names:tc:opendocument:xmlns:text:1.0}list'
+def OOo_Tag(namespace, name, mimetype):
"""
- return "{%s}%s" % (namespace_by_name [mimetype][namespace], name)
-# end def OOo_Tag
+ Return combined XML tag
+ """
+ return "{%s}%s" % (namespace_by_name[mimetype][namespace], name)
+
-def split_tag (tag) :
- """ Split tag into symbolic namespace and name part -- inverse
- operation of OOo_Tag.
+def split_tag(tag):
+ """
+ Split tag into symbolic namespace and name part -- inverse
+ operation of OOo_Tag.
"""
ns, t = tag.split ('}')
- return (_namespace_map [ns [1:]], t)
-# end def split_tag
+ return _namespace_map [ns [1:]], t
-class Transform (autosuper) :
- """
- Base class for individual transforms on OOo files. An individual
- transform needs a filename variable for specifying the OOo file
- the transform should be applied to and an optional prio.
- Individual transforms are applied according to their prio
- setting, higher prio means later application of a transform.
- The filename variable must specify one of the XML files which are
- part of the OOo document (see files variable above). As
- the names imply, content.xml contains the contents of the
- document (text and ad-hoc style definitions), styles.xml contains
- the style definitions, meta.xml contains meta information like
- author, editing time, etc. and settings.xml is used to store
- OOo's settings (menu Tools->Configure).
+class Transform(autosuper):
+ """
+ Base class for individual transforms on OOo files. An individual
+ transform needs a filename variable for specifying the OOo file
+ the transform should be applied to and an optional prio.
+ Individual transforms are applied according to their prio
+ setting, higher prio means later application of a transform.
+
+ The filename variable must specify one of the XML files which are
+ part of the OOo document (see files variable above). As
+ the names imply, content.xml contains the contents of the
+ document (text and ad-hoc style definitions), styles.xml contains
+ the style definitions, meta.xml contains meta information like
+ author, editing time, etc. and settings.xml is used to store
+ OOo's settings (menu Tools->Configure).
"""
prio = 100
- textbody_names = \
- { mimetypes [0] : 'body'
- , mimetypes [1] : 'text'
- }
- paragraph_props = \
- { mimetypes [0] : 'properties'
- , mimetypes [1] : 'paragraph-properties'
- }
- font_decls = \
- { mimetypes [0] : 'font-decls'
- , mimetypes [1] : 'font-face-decls'
- }
-
- def __init__ (self, prio = None, transformer = None) :
- if prio is not None :
- self.prio = prio
+ textbody_names = { mimetypes[0]: 'body', mimetypes[1]: 'text'}
+ paragraph_props = {
+ mimetypes[0]: 'properties',
+ mimetypes[1]: 'paragraph-properties'
+ }
+ font_decls = {
+ mimetypes[0]: 'font-decls',
+ mimetypes[1]: 'font-face-decls'
+ }
+
+ def __init__(self, prio=None, transformer=None):
+ if prio is not None:
+ self.prio = prio
self.transformer = None
- if transformer :
- self.register (transformer)
- # end def __init__
+ if transformer:
+ self.register(transformer)
def apply (self, root) :
""" Apply myself to the element given as root """
- raise NotImplementedError, 'derived transforms must implement "apply"'
- # end def apply
+ raise NotImplementedError('derived transforms must implement "apply"')
def apply_all (self, trees) :
- """ Apply myself to all the files given in trees. The variable
- trees contains a dictionary of ElementTree indexed by the
- name of the OOo File.
- The standard case is that only one file (namely
- self.filename) is used.
"""
- assert (self.filename)
- self.apply (trees [self.filename].getroot ())
- # end def apply_all
+ Apply myself to all the files given in trees. The variable
+ trees contains a dictionary of ElementTree indexed by the
+ name of the OOo File.
+ The standard case is that only one file (namely
+ self.filename) is used.
+ """
+ assert self.filename
+ self.apply(trees[self.filename].getroot())
- def find_tbody (self, root) :
- """ Find the node which really contains the text -- different
- for different OOo versions.
+ def find_tbody(self, root) :
+ """
+ Find the node which really contains the text -- different
+ for different OOo versions.
"""
tbody = root
- if tbody.tag != self.textbody_tag :
- tbody = tbody.find ('.//' + self.textbody_tag)
+ if tbody.tag != self.textbody_tag:
+ tbody = tbody.find('.//' + self.textbody_tag)
return tbody
- # end def find_tbody
-
- def register (self, transformer) :
- """ Registering with a transformer means being able to access
- variables stored in the tranformer by other transforms.
- Also needed for tag-computation: The transformer knows which
- version of OOo document we are processing.
+ def register(self, transformer) :
"""
- self.transformer = transformer
- mt = self.mimetype = transformer.mimetype
- self.textbody_name = self.textbody_names [mt]
+ Registering with a transformer means being able to access
+ variables stored in the tranformer by other transforms.
+ Also needed for tag-computation: The transformer knows which
+ version of OOo document we are processing.
+ """
+ self.transformer = transformer
+ mt = self.mimetype = transformer.mimetype
+ self.textbody_name = self.textbody_names [mt]
self.paragraph_props = self.paragraph_props [mt]
- self.properties_tag = self.oootag ('style', self.paragraph_props)
- self.textbody_tag = self.oootag ('office', self.textbody_name)
- self.font_decls_tag = self.oootag ('office', self.font_decls [mt])
- # end def register
+ self.properties_tag = self.oootag('style', self.paragraph_props)
+ self.textbody_tag = self.oootag('office', self.textbody_name)
+ self.font_decls_tag = self.oootag('office', self.font_decls [mt])
- def oootag (self, namespace, name) :
+ def oootag(self, namespace, name):
""" Compute long tag version """
- return OOo_Tag (namespace, name, self.mimetype)
- # end def oootag
+ return OOo_Tag(namespace, name, self.mimetype)
- def set (self, variable, value) :
+ def set(self, variable, value) :
""" Set variable in our transformer using naming convention. """
self.transformer [self._varname (variable)] = value
- # end def set
def _varname (self, name) :
- """ For fulfilling the naming convention of the transformer
- dictionary (every entry in this dictionary should be prefixed
- with the class name of the transform) we have this
- convenience method.
- Returns variable name prefixed with own class name.
"""
- return ":".join ((self.__class__.__name__, name))
- # end def _varname
+ For fulfilling the naming convention of the transformer
+ dictionary (every entry in this dictionary should be prefixed
+ with the class name of the transform) we have this
+ convenience method.
+ Returns variable name prefixed with own class name.
+ """
+ return ":".join((self.__class__.__name__, name))
-# end class Transform
-class Transformer (autosuper) :
+class Transformer(autosuper):
"""
- Class for applying a set of transforms to a given ooopy object.
- The transforms are applied to the specified file in priority
- order. When applying transforms we have a mechanism for
- communication of transforms. We give the transformer to the
- individual transforms as a parameter. The transforms may use the
- transformer like a dictionary for storing values and retrieving
- values left by previous transforms.
- As a naming convention each transform should use its class name
- as a prefix for storing values in the dictionary.
- >>> import Transforms
- >>> from Transforms import renumber_all, get_meta, set_meta, meta_counts
- >>> try :
- ... from io import StringIO, BytesIO
- ... StringIO = BytesIO
- ... except ImportError :
- ... from StringIO import StringIO
- >>> sio = BytesIO ()
- >>> o = OOoPy (infile = 'testfiles/test.sxw', outfile = sio)
- >>> m = o.mimetype
- >>> c = o.read ('content.xml')
- >>> body = c.find (OOo_Tag ('office', 'body', mimetype = m))
- >>> body [-1].get (OOo_Tag ('text', 'style-name', mimetype = m))
- 'Standard'
- >>> def cb (name) :
- ... r = { 'street' : 'Beispielstrasse 42'
- ... , 'firstname' : 'Hugo'
- ... , 'salutation' : 'Frau'
- ... }
- ... if r.has_key (name) : return r [name]
- ... return None
- ...
- >>> p = get_meta (m)
- >>> t = Transformer (m, p)
- >>> t ['a'] = 'a'
- >>> t ['a']
- 'a'
- >>> t.transform (o)
- >>> p.set ('a', 'b')
- >>> t ['Attribute_Access:a']
- 'b'
- >>> t = Transformer (
- ... m
- ... , Transforms.Autoupdate ()
- ... , Transforms.Editinfo ()
- ... , Transforms.Field_Replace (prio = 99, replace = cb)
- ... , Transforms.Field_Replace
- ... ( replace =
- ... { 'salutation' : ''
- ... , 'firstname' : 'Erika'
- ... , 'lastname' : 'Musterfrau'
- ... , 'country' : 'D'
- ... , 'postalcode' : '00815'
- ... , 'city' : 'Niemandsdorf'
- ... }
- ... )
- ... , Transforms.Addpagebreak_Style ()
- ... , Transforms.Addpagebreak ()
- ... )
- >>> t.transform (o)
- >>> o.close ()
- >>> ov = sio.getvalue ()
- >>> f = open ("testout.sxw", "wb")
- >>> f.write (ov)
- >>> f.close ()
- >>> o = OOoPy (infile = sio)
- >>> c = o.read ('content.xml')
- >>> m = o.mimetype
- >>> body = c.find (OOo_Tag ('office', 'body', mimetype = m))
- >>> vset = './/' + OOo_Tag ('text', 'variable-set', mimetype = m)
- >>> for node in body.findall (vset) :
- ... name = node.get (OOo_Tag ('text', 'name', m))
- ... print name, ':', node.text
- salutation : None
- firstname : Erika
- lastname : Musterfrau
- street : Beispielstrasse 42
- country : D
- postalcode : 00815
- city : Niemandsdorf
- salutation : None
- firstname : Erika
- lastname : Musterfrau
- street : Beispielstrasse 42
- country : D
- postalcode : 00815
- city : Niemandsdorf
- >>> body [-1].get (OOo_Tag ('text', 'style-name', mimetype = m))
- 'P2'
- >>> sio = StringIO ()
- >>> o = OOoPy (infile = 'testfiles/test.sxw', outfile = sio)
- >>> c = o.read ('content.xml')
- >>> t = Transformer (
- ... o.mimetype
- ... , get_meta (o.mimetype)
- ... , Transforms.Addpagebreak_Style ()
- ... , Transforms.Mailmerge
- ... ( iterator =
- ... ( dict (firstname = 'Erika', lastname = 'Nobody')
- ... , dict (firstname = 'Eric', lastname = 'Wizard')
- ... , cb
- ... )
- ... )
- ... , renumber_all (o.mimetype)
- ... , set_meta (o.mimetype)
- ... , Transforms.Fix_OOo_Tag ()
- ... )
- >>> t.transform (o)
- >>> for i in meta_counts :
- ... print i, t [':'.join (('Set_Attribute', i))]
- character-count 951
- image-count 0
- object-count 0
- page-count 3
- paragraph-count 113
- table-count 3
- word-count 162
- >>> name = t ['Addpagebreak_Style:stylename']
- >>> name
- 'P2'
- >>> o.close ()
- >>> ov = sio.getvalue ()
- >>> f = open ("testout2.sxw", "wb")
- >>> f.write (ov)
- >>> f.close ()
- >>> o = OOoPy (infile = sio)
- >>> m = o.mimetype
- >>> c = o.read ('content.xml')
- >>> body = c.find (OOo_Tag ('office', 'body', m))
- >>> for n in body.findall ('.//*') :
- ... zidx = n.get (OOo_Tag ('draw', 'z-index', m))
- ... if zidx :
- ... print ':'.join(split_tag (n.tag)), zidx
- draw:text-box 0
- draw:rect 1
- draw:text-box 3
- draw:rect 4
- draw:text-box 6
- draw:rect 7
- draw:text-box 2
- draw:text-box 5
- draw:text-box 8
- >>> for n in body.findall ('.//' + OOo_Tag ('text', 'p', m)) :
- ... if n.get (OOo_Tag ('text', 'style-name', m)) == name :
- ... print n.tag
- {http://openoffice.org/2000/text}p
- {http://openoffice.org/2000/text}p
- >>> vset = './/' + OOo_Tag ('text', 'variable-set', m)
- >>> for n in body.findall (vset) :
- ... if n.get (OOo_Tag ('text', 'name', m), None).endswith ('name') :
- ... name = n.get (OOo_Tag ('text', 'name', m))
- ... print name, ':', n.text
- firstname : Erika
- lastname : Nobody
- firstname : Eric
- lastname : Wizard
- firstname : Hugo
- lastname : Testman
- firstname : Erika
- lastname : Nobody
- firstname : Eric
- lastname : Wizard
- firstname : Hugo
- lastname : Testman
- >>> for n in body.findall ('.//' + OOo_Tag ('draw', 'text-box', m)) :
- ... print n.get (OOo_Tag ('draw', 'name', m)),
- ... print n.get (OOo_Tag ('text', 'anchor-page-number', m))
- Frame1 1
- Frame2 2
- Frame3 3
- Frame4 None
- Frame5 None
- Frame6 None
- >>> for n in body.findall ('.//' + OOo_Tag ('text', 'section', m)) :
- ... print n.get (OOo_Tag ('text', 'name', m))
- Section1
- Section2
- Section3
- Section4
- Section5
- Section6
- Section7
- Section8
- Section9
- Section10
- Section11
- Section12
- Section13
- Section14
- Section15
- Section16
- Section17
- Section18
- >>> for n in body.findall ('.//' + OOo_Tag ('table', 'table', m)) :
- ... print n.get (OOo_Tag ('table', 'name', m))
- Table1
- Table2
- Table3
- >>> r = o.read ('meta.xml')
- >>> meta = r.find ('.//' + OOo_Tag ('meta', 'document-statistic', m))
- >>> for i in meta_counts :
- ... print i, repr (meta.get (OOo_Tag ('meta', i, m)))
- character-count '951'
- image-count '0'
- object-count '0'
- page-count '3'
- paragraph-count '113'
- table-count '3'
- word-count '162'
- >>> o.close ()
- >>> sio = StringIO ()
- >>> o = OOoPy (infile = 'testfiles/test.sxw', outfile = sio)
- >>> tf = ('testfiles/test.sxw', 'testfiles/rechng.sxw')
- >>> t = Transformer (
- ... o.mimetype
- ... , get_meta (o.mimetype)
- ... , Transforms.Concatenate (*tf)
- ... , renumber_all (o.mimetype)
- ... , set_meta (o.mimetype)
- ... , Transforms.Fix_OOo_Tag ()
- ... )
- >>> t.transform (o)
- >>> for i in meta_counts :
- ... print i, repr (t [':'.join (('Set_Attribute', i))])
- character-count '1131'
- image-count '0'
- object-count '0'
- page-count '3'
- paragraph-count '168'
- table-count '2'
- word-count '160'
- >>> o.close ()
- >>> ov = sio.getvalue ()
- >>> f = open ("testout3.sxw", "wb")
- >>> f.write (ov)
- >>> f.close ()
- >>> o = OOoPy (infile = sio)
- >>> m = o.mimetype
- >>> c = o.read ('content.xml')
- >>> s = o.read ('styles.xml')
- >>> for n in c.findall ('./*/*') :
- ... name = n.get (OOo_Tag ('style', 'name', m))
- ... if name :
- ... parent = n.get (OOo_Tag ('style', 'parent-style-name', m))
- ... print '"%s", "%s"' % (name, parent)
- "Tahoma1", "None"
- "Bitstream Vera Sans", "None"
- "Tahoma", "None"
- "Nimbus Roman No9 L", "None"
- "Courier New", "None"
- "Arial Black", "None"
- "New Century Schoolbook", "None"
- "Helvetica", "None"
- "Table1", "None"
- "Table1.A", "None"
- "Table1.A1", "None"
- "Table1.E1", "None"
- "Table1.A2", "None"
- "Table1.E2", "None"
- "P1", "None"
- "fr1", "Frame"
- "fr2", "None"
- "fr3", "Frame"
- "Sect1", "None"
- "gr1", "None"
- "P2", "Standard"
- "Standard_Concat", "None"
- "Concat_P1", "Concat_Frame contents"
- "Concat_P2", "Concat_Frame contents"
- "P3", "Concat_Frame contents"
- "P4", "Concat_Frame contents"
- "P5", "Concat_Standard"
- "P6", "Concat_Standard"
- "P7", "Concat_Frame contents"
- "P8", "Concat_Frame contents"
- "P9", "Concat_Frame contents"
- "P10", "Concat_Frame contents"
- "P11", "Concat_Frame contents"
- "P12", "Concat_Frame contents"
- "P13", "Concat_Frame contents"
- "P15", "Concat_Standard"
- "P16", "Concat_Standard"
- "P17", "Concat_Standard"
- "P18", "Concat_Standard"
- "P19", "Concat_Standard"
- "P20", "Concat_Standard"
- "P21", "Concat_Standard"
- "P22", "Concat_Standard"
- "P23", "Concat_Standard"
- "T1", "None"
- "Concat_fr1", "Concat_Frame"
- "Concat_fr2", "Concat_Frame"
- "Concat_fr3", "Concat_Frame"
- "fr4", "Concat_Frame"
- "fr5", "Concat_Frame"
- "fr6", "Concat_Frame"
- "Concat_Sect1", "None"
- "N0", "None"
- "N2", "None"
- "P15_Concat", "Concat_Standard"
- >>> for n in s.findall ('./*/*') :
- ... name = n.get (OOo_Tag ('style', 'name', m))
- ... if name :
- ... parent = n.get (OOo_Tag ('style', 'parent-style-name', m))
- ... print '"%s", "%s"' % (name, parent)
- "Tahoma1", "None"
- "Bitstream Vera Sans", "None"
- "Tahoma", "None"
- "Nimbus Roman No9 L", "None"
- "Courier New", "None"
- "Arial Black", "None"
- "New Century Schoolbook", "None"
- "Helvetica", "None"
- "Standard", "None"
- "Text body", "Standard"
- "List", "Text body"
- "Table Contents", "Text body"
- "Table Heading", "Table Contents"
- "Caption", "Standard"
- "Frame contents", "Text body"
- "Index", "Standard"
- "Frame", "None"
- "OLE", "None"
- "Concat_Standard", "None"
- "Concat_Text body", "Concat_Standard"
- "Concat_List", "Concat_Text body"
- "Concat_Caption", "Concat_Standard"
- "Concat_Frame contents", "Concat_Text body"
- "Concat_Index", "Concat_Standard"
- "Horizontal Line", "Concat_Standard"
- "Internet link", "None"
- "Visited Internet Link", "None"
- "Concat_Frame", "None"
- "Concat_OLE", "None"
- "pm1", "None"
- "Concat_pm1", "None"
- "Standard", "None"
- "Concat_Standard", "None"
- >>> for n in c.findall ('.//' + OOo_Tag ('text', 'variable-decl', m)) :
- ... name = n.get (OOo_Tag ('text', 'name', m))
- ... print name
- salutation
- firstname
- lastname
- street
- country
- postalcode
- city
- date
- invoice.invoice_no
- invoice.abo.aboprice.abotype.description
- address.salutation
- address.title
- address.firstname
- address.lastname
- address.function
- address.street
- address.country
- address.postalcode
- address.city
- invoice.subscriber.salutation
- invoice.subscriber.title
- invoice.subscriber.firstname
- invoice.subscriber.lastname
- invoice.subscriber.function
- invoice.subscriber.street
- invoice.subscriber.country
- invoice.subscriber.postalcode
- invoice.subscriber.city
- invoice.period_start
- invoice.period_end
- invoice.currency.name
- invoice.amount
- invoice.subscriber.initial
- >>> for n in c.findall ('.//' + OOo_Tag ('text', 'sequence-decl', m)) :
- ... name = n.get (OOo_Tag ('text', 'name', m))
- ... print name
- Illustration
- Table
- Text
- Drawing
- >>> for n in c.findall ('.//' + OOo_Tag ('text', 'p', m)) :
- ... name = n.get (OOo_Tag ('text', 'style-name', m))
- ... if not name or name.startswith ('Concat') :
- ... print ">%s<" % name
- >Concat_P1<
- >Concat_P2<
- >Concat_Frame contents<
- >>> for n in c.findall ('.//' + OOo_Tag ('draw', 'text-box', m)) :
- ... attrs = 'name', 'style-name', 'z-index'
- ... attrs = [n.get (OOo_Tag ('draw', i, m)) for i in attrs]
- ... attrs.append (n.get (OOo_Tag ('text', 'anchor-page-number', m)))
- ... print attrs
- ['Frame1', 'fr1', '0', '1']
- ['Frame2', 'fr1', '3', '2']
- ['Frame3', 'Concat_fr1', '6', '3']
- ['Frame4', 'Concat_fr2', '7', '3']
- ['Frame5', 'Concat_fr3', '8', '3']
- ['Frame6', 'Concat_fr1', '9', '3']
- ['Frame7', 'fr4', '10', '3']
- ['Frame8', 'fr4', '11', '3']
- ['Frame9', 'fr4', '12', '3']
- ['Frame10', 'fr4', '13', '3']
- ['Frame11', 'fr4', '14', '3']
- ['Frame12', 'fr4', '15', '3']
- ['Frame13', 'fr5', '16', '3']
- ['Frame14', 'fr4', '18', '3']
- ['Frame15', 'fr4', '19', '3']
- ['Frame16', 'fr4', '20', '3']
- ['Frame17', 'fr6', '17', '3']
- ['Frame18', 'fr4', '23', '3']
- ['Frame19', 'fr3', '2', None]
- ['Frame20', 'fr3', '5', None]
- >>> for n in c.findall ('.//' + OOo_Tag ('text', 'section', m)) :
- ... attrs = 'name', 'style-name'
- ... attrs = [n.get (OOo_Tag ('text', i, m)) for i in attrs]
- ... print attrs
- ['Section1', 'Sect1']
- ['Section2', 'Sect1']
- ['Section3', 'Sect1']
- ['Section4', 'Sect1']
- ['Section5', 'Sect1']
- ['Section6', 'Sect1']
- ['Section7', 'Concat_Sect1']
- ['Section8', 'Concat_Sect1']
- ['Section9', 'Concat_Sect1']
- ['Section10', 'Concat_Sect1']
- ['Section11', 'Concat_Sect1']
- ['Section12', 'Concat_Sect1']
- ['Section13', 'Concat_Sect1']
- ['Section14', 'Concat_Sect1']
- ['Section15', 'Concat_Sect1']
- ['Section16', 'Concat_Sect1']
- ['Section17', 'Concat_Sect1']
- ['Section18', 'Concat_Sect1']
- ['Section19', 'Concat_Sect1']
- ['Section20', 'Concat_Sect1']
- ['Section21', 'Concat_Sect1']
- ['Section22', 'Concat_Sect1']
- ['Section23', 'Concat_Sect1']
- ['Section24', 'Concat_Sect1']
- ['Section25', 'Concat_Sect1']
- ['Section26', 'Concat_Sect1']
- ['Section27', 'Concat_Sect1']
- ['Section28', 'Sect1']
- ['Section29', 'Sect1']
- ['Section30', 'Sect1']
- ['Section31', 'Sect1']
- ['Section32', 'Sect1']
- ['Section33', 'Sect1']
- >>> for n in c.findall ('.//' + OOo_Tag ('draw', 'rect', m)) :
- ... attrs = 'style-name', 'text-style-name', 'z-index'
- ... attrs = [n.get (OOo_Tag ('draw', i, m)) for i in attrs]
- ... attrs.append (n.get (OOo_Tag ('text', 'anchor-page-number', m)))
- ... print attrs
- ['gr1', 'P1', '1', '1']
- ['gr1', 'P1', '4', '2']
- >>> for n in c.findall ('.//' + OOo_Tag ('draw', 'line', m)) :
- ... attrs = 'style-name', 'text-style-name', 'z-index'
- ... attrs = [n.get (OOo_Tag ('draw', i, m)) for i in attrs]
- ... print attrs
- ['gr1', 'P1', '24']
- ['gr1', 'P1', '22']
- ['gr1', 'P1', '21']
- >>> for n in s.findall ('.//' + OOo_Tag ('style', 'style', m)) :
- ... if n.get (OOo_Tag ('style', 'name', m)).startswith ('Co') :
- ... attrs = 'name', 'class', 'family'
- ... attrs = [n.get (OOo_Tag ('style', i, m)) for i in attrs]
- ... print attrs
- ... props = n.find ('./' + OOo_Tag ('style', 'properties', m))
- ... if props is not None and len (props) :
- ... props [0].tag
- ['Concat_Standard', 'text', 'paragraph']
- '{http://openoffice.org/2000/style}tab-stops'
- ['Concat_Text body', 'text', 'paragraph']
- ['Concat_List', 'list', 'paragraph']
- ['Concat_Caption', 'extra', 'paragraph']
- ['Concat_Frame contents', 'extra', 'paragraph']
- ['Concat_Index', 'index', 'paragraph']
- ['Concat_Frame', None, 'graphics']
- ['Concat_OLE', None, 'graphics']
- >>> for n in c.findall ('.//*') :
- ... zidx = n.get (OOo_Tag ('draw', 'z-index', m))
- ... if zidx :
- ... print ':'.join(split_tag (n.tag)), zidx
- draw:text-box 0
- draw:rect 1
- draw:text-box 3
- draw:rect 4
- draw:text-box 6
- draw:text-box 7
- draw:text-box 8
- draw:text-box 9
- draw:text-box 10
- draw:text-box 11
- draw:text-box 12
- draw:text-box 13
- draw:text-box 14
- draw:text-box 15
- draw:text-box 16
- draw:text-box 18
- draw:text-box 19
- draw:text-box 20
- draw:text-box 17
- draw:text-box 23
- draw:line 24
- draw:text-box 2
- draw:text-box 5
- draw:line 22
- draw:line 21
- >>> sio = StringIO ()
- >>> o = OOoPy (infile = 'testfiles/carta.stw', outfile = sio)
- >>> t = Transformer (
- ... o.mimetype
- ... , get_meta (o.mimetype)
- ... , Transforms.Addpagebreak_Style ()
- ... , Transforms.Mailmerge
- ... ( iterator =
- ... ( dict
- ... ( Spett = "Spettabile"
- ... , contraente = "First person"
- ... , indirizzo = "street? 1"
- ... , tipo = "racc. A.C."
- ... , luogo = "Varese"
- ... , oggetto = "Saluti"
- ... )
- ... , dict
- ... ( Spett = "Egregio"
- ... , contraente = "Second Person"
- ... , indirizzo = "street? 2"
- ... , tipo = "Raccomandata"
- ... , luogo = "Gavirate"
- ... , oggetto = "Ossequi"
- ... )
- ... )
- ... )
- ... , renumber_all (o.mimetype)
- ... , set_meta (o.mimetype)
- ... , Transforms.Fix_OOo_Tag ()
- ... )
- >>> t.transform(o)
- >>> o.close()
- >>> ov = sio.getvalue ()
- >>> f = open ("carta-out.stw", "wb")
- >>> f.write (ov)
- >>> f.close ()
- >>> o = OOoPy (infile = sio)
- >>> m = o.mimetype
- >>> c = o.read ('content.xml')
- >>> body = c.find (OOo_Tag ('office', 'body', mimetype = m))
- >>> vset = './/' + OOo_Tag ('text', 'variable-set', mimetype = m)
- >>> for node in body.findall (vset) :
- ... name = node.get (OOo_Tag ('text', 'name', m))
- ... print name, ':', node.text
- Spett : Spettabile
- contraente : First person
- indirizzo : street? 1
- Spett : Egregio
- contraente : Second Person
- indirizzo : street? 2
- tipo : racc. A.C.
- luogo : Varese
- oggetto : Saluti
- tipo : Raccomandata
- luogo : Gavirate
- oggetto : Ossequi
- >>> sio = StringIO ()
- >>> o = OOoPy (infile = 'testfiles/test.odt', outfile = sio)
- >>> t = Transformer (
- ... o.mimetype
- ... , get_meta (o.mimetype)
- ... , Transforms.Addpagebreak_Style ()
- ... , Transforms.Mailmerge
- ... ( iterator =
- ... ( dict (firstname = 'Erika', lastname = 'Nobody')
- ... , dict (firstname = 'Eric', lastname = 'Wizard')
- ... , cb
- ... )
- ... )
- ... , renumber_all (o.mimetype)
- ... , set_meta (o.mimetype)
- ... , Transforms.Fix_OOo_Tag ()
- ... )
- >>> t.transform (o)
- >>> for i in meta_counts :
- ... print i, t [':'.join (('Set_Attribute', i))]
- character-count 951
- image-count 0
- object-count 0
- page-count 3
- paragraph-count 53
- table-count 3
- word-count 162
- >>> name = t ['Addpagebreak_Style:stylename']
- >>> name
- 'P2'
- >>> o.close ()
- >>> ov = sio.getvalue ()
- >>> f = open ("testout.odt", "wb")
- >>> f.write (ov)
- >>> f.close ()
- >>> o = OOoPy (infile = sio)
- >>> m = o.mimetype
- >>> c = o.read ('content.xml')
- >>> body = c.find (OOo_Tag ('office', 'body', m))
- >>> for n in body.findall ('.//*') :
- ... zidx = n.get (OOo_Tag ('draw', 'z-index', m))
- ... if zidx :
- ... print ':'.join(split_tag (n.tag)), zidx
- draw:frame 0
- draw:rect 1
- draw:frame 3
- draw:rect 4
- draw:frame 6
- draw:rect 7
- draw:frame 2
- draw:frame 5
- draw:frame 8
- >>> for n in body.findall ('.//' + OOo_Tag ('text', 'p', m)) :
- ... if n.get (OOo_Tag ('text', 'style-name', m)) == name :
- ... print n.tag
- {urn:oasis:names:tc:opendocument:xmlns:text:1.0}p
- {urn:oasis:names:tc:opendocument:xmlns:text:1.0}p
- >>> vset = './/' + OOo_Tag ('text', 'variable-set', m)
- >>> for n in body.findall (vset) :
- ... if n.get (OOo_Tag ('text', 'name', m), None).endswith ('name') :
- ... name = n.get (OOo_Tag ('text', 'name', m))
- ... print name, ':', n.text
- firstname : Erika
- lastname : Nobody
- firstname : Eric
- lastname : Wizard
- firstname : Hugo
- lastname : Testman
- firstname : Erika
- lastname : Nobody
- firstname : Eric
- lastname : Wizard
- firstname : Hugo
- lastname : Testman
- >>> for n in body.findall ('.//' + OOo_Tag ('draw', 'frame', m)) :
- ... print n.get (OOo_Tag ('draw', 'name', m)),
- ... print n.get (OOo_Tag ('text', 'anchor-page-number', m))
- Frame1 1
- Frame2 2
- Frame3 3
- Frame4 None
- Frame5 None
- Frame6 None
- >>> for n in body.findall ('.//' + OOo_Tag ('text', 'section', m)) :
- ... print n.get (OOo_Tag ('text', 'name', m))
- Section1
- Section2
- Section3
- Section4
- Section5
- Section6
- Section7
- Section8
- Section9
- Section10
- Section11
- Section12
- Section13
- Section14
- Section15
- Section16
- Section17
- Section18
- >>> for n in body.findall ('.//' + OOo_Tag ('table', 'table', m)) :
- ... print n.get (OOo_Tag ('table', 'name', m))
- Table1
- Table2
- Table3
- >>> r = o.read ('meta.xml')
- >>> meta = r.find ('.//' + OOo_Tag ('meta', 'document-statistic', m))
- >>> for i in meta_counts :
- ... print i, repr (meta.get (OOo_Tag ('meta', i, m)))
- character-count '951'
- image-count '0'
- object-count '0'
- page-count '3'
- paragraph-count '53'
- table-count '3'
- word-count '162'
- >>> o.close ()
- >>> sio = StringIO ()
- >>> o = OOoPy (infile = 'testfiles/carta.odt', outfile = sio)
- >>> t = Transformer (
- ... o.mimetype
- ... , get_meta (o.mimetype)
- ... , Transforms.Addpagebreak_Style ()
- ... , Transforms.Mailmerge
- ... ( iterator =
- ... ( dict
- ... ( Spett = "Spettabile"
- ... , contraente = "First person"
- ... , indirizzo = "street? 1"
- ... , tipo = "racc. A.C."
- ... , luogo = "Varese"
- ... , oggetto = "Saluti"
- ... )
- ... , dict
- ... ( Spett = "Egregio"
- ... , contraente = "Second Person"
- ... , indirizzo = "street? 2"
- ... , tipo = "Raccomandata"
- ... , luogo = "Gavirate"
- ... , oggetto = "Ossequi"
- ... )
- ... )
- ... )
- ... , renumber_all (o.mimetype)
- ... , set_meta (o.mimetype)
- ... , Transforms.Fix_OOo_Tag ()
- ... )
- >>> t.transform(o)
- >>> o.close()
- >>> ov = sio.getvalue ()
- >>> f = open ("carta-out.odt", "wb")
- >>> f.write (ov)
- >>> f.close ()
- >>> o = OOoPy (infile = sio)
- >>> m = o.mimetype
- >>> c = o.read ('content.xml')
- >>> body = c.find (OOo_Tag ('office', 'body', mimetype = m))
- >>> vset = './/' + OOo_Tag ('text', 'variable-set', mimetype = m)
- >>> for node in body.findall (vset) :
- ... name = node.get (OOo_Tag ('text', 'name', m))
- ... print name, ':', node.text
- Spett : Spettabile
- contraente : First person
- indirizzo : street? 1
- Spett : Egregio
- contraente : Second Person
- indirizzo : street? 2
- tipo : racc. A.C.
- luogo : Varese
- oggetto : Saluti
- tipo : Raccomandata
- luogo : Gavirate
- oggetto : Ossequi
- >>> sio = StringIO ()
- >>> o = OOoPy (infile = 'testfiles/test.odt', outfile = sio)
- >>> tf = ('testfiles/test.odt', 'testfiles/rechng.odt')
- >>> t = Transformer (
- ... o.mimetype
- ... , get_meta (o.mimetype)
- ... , Transforms.Concatenate (*tf)
- ... , renumber_all (o.mimetype)
- ... , set_meta (o.mimetype)
- ... , Transforms.Fix_OOo_Tag ()
- ... )
- >>> t.transform (o)
- >>> for i in meta_counts :
- ... print i, repr (t [':'.join (('Set_Attribute', i))])
- character-count '1131'
- image-count '0'
- object-count '0'
- page-count '3'
- paragraph-count '80'
- table-count '2'
- word-count '159'
- >>> o.close ()
- >>> ov = sio.getvalue ()
- >>> f = open ("testout3.odt", "wb")
- >>> f.write (ov)
- >>> f.close ()
- >>> o = OOoPy (infile = sio)
- >>> m = o.mimetype
- >>> c = o.read ('content.xml')
- >>> s = o.read ('styles.xml')
- >>> for n in c.findall ('./*/*') :
- ... name = n.get (OOo_Tag ('style', 'name', m))
- ... if name :
- ... parent = n.get (OOo_Tag ('style', 'parent-style-name', m))
- ... print '"%s", "%s"' % (name, parent)
- "Tahoma1", "None"
- "Bitstream Vera Sans", "None"
- "Tahoma", "None"
- "Nimbus Roman No9 L", "None"
- "Courier New", "None"
- "Arial Black", "None"
- "New Century Schoolbook", "None"
- "Times New Roman", "None"
- "Arial", "None"
- "Helvetica", "None"
- "Table1", "None"
- "Table1.A", "None"
- "Table1.A1", "None"
- "Table1.E1", "None"
- "Table1.A2", "None"
- "Table1.E2", "None"
- "P1", "None"
- "fr1", "Frame"
- "fr2", "Frame"
- "Sect1", "None"
- "gr1", "None"
- "P2", "Standard"
- "Standard_Concat", "None"
- "Concat_P1", "Concat_Frame_20_contents"
- "Concat_P2", "Concat_Frame_20_contents"
- "P3", "Concat_Frame_20_contents"
- "P4", "Concat_Standard"
- "P5", "Concat_Standard"
- "P6", "Concat_Frame_20_contents"
- "P7", "Concat_Frame_20_contents"
- "P8", "Concat_Frame_20_contents"
- "P9", "Concat_Frame_20_contents"
- "P10", "Concat_Frame_20_contents"
- "P11", "Concat_Frame_20_contents"
- "P12", "Concat_Frame_20_contents"
- "P14", "Concat_Standard"
- "P15", "Concat_Standard"
- "P16", "Concat_Standard"
- "P17", "Concat_Standard"
- "P18", "Concat_Standard"
- "P19", "Concat_Standard"
- "P20", "Concat_Standard"
- "P21", "Concat_Standard"
- "P22", "Concat_Standard"
- "P23", "Concat_Standard"
- "Concat_fr1", "Frame"
- "Concat_fr2", "Frame"
- "fr3", "Frame"
- "fr4", "Frame"
- "fr5", "Frame"
- "fr6", "Frame"
- "Concat_gr1", "None"
- "N0", "None"
- "N2", "None"
- "P14_Concat", "Concat_Standard"
- >>> for n in c.findall ('.//' + OOo_Tag ('text', 'variable-decl', m)) :
- ... name = n.get (OOo_Tag ('text', 'name', m))
- ... print name
- salutation
- firstname
- lastname
- street
- country
- postalcode
- city
- date
- invoice.invoice_no
- invoice.abo.aboprice.abotype.description
- address.salutation
- address.title
- address.firstname
- address.lastname
- address.function
- address.street
- address.country
- address.postalcode
- address.city
- invoice.subscriber.salutation
- invoice.subscriber.title
- invoice.subscriber.firstname
- invoice.subscriber.lastname
- invoice.subscriber.function
- invoice.subscriber.street
- invoice.subscriber.country
- invoice.subscriber.postalcode
- invoice.subscriber.city
- invoice.period_start
- invoice.period_end
- invoice.currency.name
- invoice.amount
- invoice.subscriber.initial
- >>> for n in c.findall ('.//' + OOo_Tag ('text', 'sequence-decl', m)) :
- ... name = n.get (OOo_Tag ('text', 'name', m))
- ... print name
- Illustration
- Table
- Text
- Drawing
- >>> for n in c.findall ('.//' + OOo_Tag ('text', 'p', m)) :
- ... name = n.get (OOo_Tag ('text', 'style-name', m))
- ... if not name or name.startswith ('Concat') :
- ... print ':'.join(split_tag (n.tag)), ">%s<" % name
- text:p >None<
- text:p >None<
- text:p >Concat_P1<
- text:p >Concat_P1<
- text:p >Concat_P2<
- text:p >Concat_P2<
- text:p >Concat_P2<
- text:p >Concat_P2<
- text:p >Concat_P2<
- text:p >Concat_P2<
- text:p >Concat_P2<
- text:p >Concat_P2<
- text:p >Concat_P2<
- text:p >Concat_P2<
- text:p >Concat_Frame_20_contents<
- text:p >None<
- text:p >None<
- text:p >None<
- >>> for n in c.findall ('.//' + OOo_Tag ('draw', 'frame', m)) :
- ... attrs = 'name', 'style-name', 'z-index'
- ... attrs = [n.get (OOo_Tag ('draw', i, m)) for i in attrs]
- ... attrs.append (n.get (OOo_Tag ('text', 'anchor-page-number', m)))
- ... print attrs
- ['Frame1', 'fr1', '0', '1']
- ['Frame2', 'fr1', '3', '2']
- ['Frame3', 'Concat_fr1', '6', '3']
- ['Frame4', 'Concat_fr2', '7', '3']
- ['Frame5', 'fr3', '8', '3']
- ['Frame6', 'Concat_fr1', '9', '3']
- ['Frame7', 'fr4', '10', '3']
- ['Frame8', 'fr4', '11', '3']
- ['Frame9', 'fr4', '12', '3']
- ['Frame10', 'fr4', '13', '3']
- ['Frame11', 'fr4', '14', '3']
- ['Frame12', 'fr4', '15', '3']
- ['Frame13', 'fr5', '16', '3']
- ['Frame14', 'fr4', '18', '3']
- ['Frame15', 'fr4', '19', '3']
- ['Frame16', 'fr4', '20', '3']
- ['Frame17', 'fr6', '17', '3']
- ['Frame18', 'fr4', '23', '3']
- ['Frame19', 'fr2', '2', None]
- ['Frame20', 'fr2', '5', None]
- >>> for n in c.findall ('.//' + OOo_Tag ('text', 'section', m)) :
- ... attrs = 'name', 'style-name'
- ... attrs = [n.get (OOo_Tag ('text', i, m)) for i in attrs]
- ... print attrs
- ['Section1', 'Sect1']
- ['Section2', 'Sect1']
- ['Section3', 'Sect1']
- ['Section4', 'Sect1']
- ['Section5', 'Sect1']
- ['Section6', 'Sect1']
- ['Section7', 'Sect1']
- ['Section8', 'Sect1']
- ['Section9', 'Sect1']
- ['Section10', 'Sect1']
- ['Section11', 'Sect1']
- ['Section12', 'Sect1']
- ['Section13', 'Sect1']
- ['Section14', 'Sect1']
- ['Section15', 'Sect1']
- ['Section16', 'Sect1']
- ['Section17', 'Sect1']
- ['Section18', 'Sect1']
- ['Section19', 'Sect1']
- ['Section20', 'Sect1']
- ['Section21', 'Sect1']
- ['Section22', 'Sect1']
- ['Section23', 'Sect1']
- ['Section24', 'Sect1']
- ['Section25', 'Sect1']
- ['Section26', 'Sect1']
- ['Section27', 'Sect1']
- ['Section28', 'Sect1']
- ['Section29', 'Sect1']
- ['Section30', 'Sect1']
- ['Section31', 'Sect1']
- ['Section32', 'Sect1']
- ['Section33', 'Sect1']
- >>> for n in c.findall ('.//' + OOo_Tag ('draw', 'rect', m)) :
- ... attrs = 'style-name', 'text-style-name', 'z-index'
- ... attrs = [n.get (OOo_Tag ('draw', i, m)) for i in attrs]
- ... attrs.append (n.get (OOo_Tag ('text', 'anchor-page-number', m)))
- ... print attrs
- ['gr1', 'P1', '1', '1']
- ['gr1', 'P1', '4', '2']
- >>> for n in c.findall ('.//' + OOo_Tag ('draw', 'line', m)) :
- ... attrs = 'style-name', 'text-style-name', 'z-index'
- ... attrs = [n.get (OOo_Tag ('draw', i, m)) for i in attrs]
- ... print attrs
- ['Concat_gr1', 'P1', '24']
- ['Concat_gr1', 'P1', '22']
- ['Concat_gr1', 'P1', '21']
- >>> for n in s.findall ('.//' + OOo_Tag ('style', 'style', m)) :
- ... if n.get (OOo_Tag ('style', 'name', m)).startswith ('Co') :
- ... attrs = 'name', 'display-name', 'class', 'family'
- ... attrs = [n.get (OOo_Tag ('style', i, m)) for i in attrs]
- ... print attrs
- ... props = n.find ('./' + OOo_Tag ('style', 'properties', m))
- ... if props is not None and len (props) :
- ... props [0].tag
- ['Concat_Standard', None, 'text', 'paragraph']
- ['Concat_Text_20_body', 'Concat Text body', 'text', 'paragraph']
- ['Concat_List', None, 'list', 'paragraph']
- ['Concat_Caption', None, 'extra', 'paragraph']
- ['Concat_Frame_20_contents', 'Concat Frame contents', 'extra', 'paragraph']
- ['Concat_Index', None, 'index', 'paragraph']
- >>> for n in c.findall ('.//*') :
- ... zidx = n.get (OOo_Tag ('draw', 'z-index', m))
- ... if zidx :
- ... print ':'.join(split_tag (n.tag)), zidx
- draw:frame 0
- draw:rect 1
- draw:frame 3
- draw:rect 4
- draw:frame 6
- draw:frame 7
- draw:frame 8
- draw:frame 9
- draw:frame 10
- draw:frame 11
- draw:frame 12
- draw:frame 13
- draw:frame 14
- draw:frame 15
- draw:frame 16
- draw:frame 18
- draw:frame 19
- draw:frame 20
- draw:frame 17
- draw:frame 23
- draw:line 24
- draw:frame 2
- draw:frame 5
- draw:line 22
- draw:line 21
- >>> from os import system
- >>> system ('python bin/ooo_fieldreplace -i testfiles/test.odt '
- ... '-o testout.odt '
- ... 'salutation=Frau firstname=Erika lastname=Musterfrau '
- ... 'country=D postalcode=00815 city=Niemandsdorf '
- ... 'street="Beispielstrasse 42"')
- 0
- >>> o = OOoPy (infile = 'testout.odt')
- >>> c = o.read ('content.xml')
- >>> m = o.mimetype
- >>> body = c.find (OOo_Tag ('office', 'body', mimetype = m))
- >>> vset = './/' + OOo_Tag ('text', 'variable-set', mimetype = m)
- >>> for node in body.findall (vset) :
- ... name = node.get (OOo_Tag ('text', 'name', m))
- ... print name, ':', node.text
- salutation : Frau
- firstname : Erika
- lastname : Musterfrau
- street : Beispielstrasse 42
- country : D
- postalcode : 00815
- city : Niemandsdorf
- salutation : Frau
- firstname : Erika
- lastname : Musterfrau
- street : Beispielstrasse 42
- country : D
- postalcode : 00815
- city : Niemandsdorf
- >>> o.close ()
- >>> system ("bin/ooo_mailmerge -o testout.odt -d'|' "
- ... "testfiles/carta.odt testfiles/x.csv")
- 0
- >>> o = OOoPy (infile = 'testout.odt')
- >>> m = o.mimetype
- >>> c = o.read ('content.xml')
- >>> body = c.find (OOo_Tag ('office', 'body', mimetype = m))
- >>> vset = './/' + OOo_Tag ('text', 'variable-set', mimetype = m)
- >>> for node in body.findall (vset) :
- ... name = node.get (OOo_Tag ('text', 'name', m))
- ... print name, ':', node.text
- Spett : Spettabile
- contraente : First person
- indirizzo : street? 1
- Spett : Egregio
- contraente : Second Person
- indirizzo : street? 2
- tipo : racc. A.C.
- luogo : Varese
- oggetto : Saluti
- tipo : Raccomandata
- luogo : Gavirate
- oggetto : Ossequi
- >>> o.close ()
- >>> infile = 'testfiles/testenum.odt'
- >>> o = OOoPy (infile = infile, outfile = 'xyzzy.odt')
- >>> t = Transformer (
- ... o.mimetype
- ... , get_meta (o.mimetype)
- ... , Transforms.Addpagebreak_Style ()
- ... , Transforms.Mailmerge
- ... ( iterator =
- ... ( dict (firstname = 'Erika', lastname = 'Nobody')
- ... , dict (firstname = 'Eric', lastname = 'Wizard')
- ... , cb
- ... )
- ... )
- ... , renumber_all (o.mimetype)
- ... , set_meta (o.mimetype)
- ... , Transforms.Fix_OOo_Tag ()
- ... )
- >>> t.transform (o)
- >>> o.close ()
- >>> o = OOoPy (infile = 'xyzzy.odt')
- >>> m = o.mimetype
- >>> c = o.read ('content.xml')
- >>> body = c.find (OOo_Tag ('office', 'body', mimetype = m))
- >>> textlist = './/' + OOo_Tag ('text', 'list', m)
- >>> for node in body.findall (textlist) :
- ... id = node.get (OOo_Tag ('xml', 'id', m))
- ... print 'xml:id', ':', id
- xml:id : list1
- xml:id : list2
- xml:id : list3
- >>> o = OOoPy (infile = 'testfiles/page1.odt', outfile = 'xyzzy.odt')
- >>> m = o.mimetype
- >>> t = Transformer (
- ... o.mimetype
- ... , get_meta (o.mimetype)
- ... , Transforms.Concatenate ('testfiles/page2.odt')
- ... , renumber_all (o.mimetype)
- ... , set_meta (o.mimetype)
- ... , Transforms.Fix_OOo_Tag ()
- ... , Transforms.Manifest_Append ()
- ... )
- >>> t.transform (o)
- >>> o.close ()
- >>> o = OOoPy (infile = 'xyzzy.odt')
- >>> c = o.read ('META-INF/manifest.xml')
- >>> for node in c.getroot () :
- ... fe = node.get (OOo_Tag ('manifest', 'full-path', m))
- ... print fe
- /
- Pictures/10000000000000C80000007941B1A419.jpg
- Pictures/10000000000000DC000000B02E191635.jpg
- Pictures/10000000000000DC000000A337377AAA.jpg
- meta.xml
- settings.xml
- content.xml
- Thumbnails/thumbnail.png
- layout-cache
- manifest.rdf
- Configurations2/accelerator/current.xml
- Configurations2/
- styles.xml
- >>> for f in o.izip.infolist () :
- ... print f.filename
- mimetype
- settings.xml
- META-INF/manifest.xml
- content.xml
- meta.xml
- styles.xml
- Pictures/10000000000000C80000007941B1A419.jpg
- Pictures/10000000000000DC000000B02E191635.jpg
- Pictures/10000000000000DC000000A337377AAA.jpg
- Thumbnails/thumbnail.png
- layout-cache
- manifest.rdf
- Configurations2/images/Bitmaps/
- Configurations2/accelerator/current.xml
- >>> sio = StringIO ()
- >>> o = OOoPy (infile = 'testfiles/tbl_first.odt', outfile = sio)
- >>> m = o.mimetype
- >>> t = Transformer (
- ... o.mimetype
- ... , get_meta (o.mimetype)
- ... , Transforms.Concatenate ('testfiles/tbl_second.odt')
- ... , renumber_all (o.mimetype)
- ... , set_meta (o.mimetype)
- ... , Transforms.Fix_OOo_Tag ()
- ... , Transforms.Manifest_Append ()
- ... )
- >>> t.transform (o)
- >>> o.close ()
- >>> o = OOoPy (infile = sio)
- >>> c = o.read ('content.xml')
- >>> body = c.find (OOo_Tag ('office', 'body', mimetype = m))
- >>> tbls = './/' + OOo_Tag ('table', 'table', mimetype = m)
- >>> for table in body.findall (tbls) :
- ... name = table.get (OOo_Tag ('table', 'style-name', mimetype = m))
- ... if name :
- ... print name
- ... for t in table.findall ('.//') :
- ... name = t.get (OOo_Tag ('table', 'style-name', mimetype = m))
- ... if name :
- ... print name
- Tabella1
- Tabella1.A
- Tabella1.A1
- Tabella1.B1
- Tabella1.A2
- Tabella1.B2
- Tabella1
- Tabella1.A
- Tabella1.A1
- Tabella1.B1
- Tabella1.A2
- Tabella1.B2
+ Class for applying a set of transforms to a given ooopy object.
+ The transforms are applied to the specified file in priority
+ order. When applying transforms we have a mechanism for
+ communication of transforms. We give the transformer to the
+ individual transforms as a parameter. The transforms may use the
+ transformer like a dictionary for storing values and retrieving
+ values left by previous transforms.
+ As a naming convention each transform should use its class name
+ as a prefix for storing values in the dictionary.
"""
- def __init__ (self, mimetype, *tf) :
+ def __init__(self, mimetype, *tf):
assert (mimetype in mimetypes)
- self.mimetype = mimetype
- self.transforms = {}
- for t in tf :
- self.insert (t)
- self.dictionary = {}
- self.has_key = self.dictionary.has_key
+ self.mimetype = mimetype
+ self.transforms = {}
+ for t in tf:
+ self.insert(t)
+ self.dictionary = {}
self.__contains__ = self.has_key
# 2-tuples of filename, content
- self.appendfiles = []
- # end def __init__
+ self.appendfiles = []
- def insert (self, transform) :
+ def has_key(self, key):
+ return key in self.dictionary.keys()
+
+ def insert(self, transform):
"""Insert a new transform"""
t = transform
- if t.prio not in self.transforms :
- self.transforms [t.prio] = []
- self.transforms [t.prio].append (t)
- t.register (self)
- # end def append
+ if t.prio not in self.transforms:
+ self.transforms[t.prio] = []
+ self.transforms[t.prio].append(t)
+ t.register(self)
- def transform (self, ooopy) :
+ def transform(self, ooopy):
"""
- Apply all the transforms in priority order.
- Priority order is global over all transforms.
+ Apply all the transforms in priority order.
+ Priority order is global over all transforms.
"""
self.trees = {}
- for f in files :
- self.trees [f] = ooopy.read (f)
- #self.dictionary = {} # clear dict when transforming another ooopy
- prios = self.transforms.keys ()
- prios.sort ()
- for p in prios :
- for t in self.transforms [p] :
- t.apply_all (self.trees)
- for e in self.trees.itervalues () :
- e.write ()
- for fname, fcontent in self.appendfiles :
- e.ooopy.append_file (fname, fcontent)
- # end def transform
-
- def __getitem__ (self, key) :
- return self.dictionary [key]
- # end def __getitem__
-
- def __setitem__ (self, key, value) :
- self.dictionary [key] = value
- # end def __setitem__
-# end class Transformer
+ for f in files:
+ self.trees[f] = ooopy.read(f)
+ # self.dictionary = {} # clear dict when transforming another ooopy
+ prios = list(self.transforms.keys())
+ prios.sort()
+ for p in prios:
+ for t in self.transforms[p]:
+ t.apply_all(self.trees)
+ for e in self.trees.values():
+ e.write()
+ for fname, fcontent in self.appendfiles:
+ e.ooopy.append_file(fname, fcontent)
+
+ def __getitem__(self, key):
+ return self.dictionary[key]
+
+ def __setitem__(self, key, value):
+ self.dictionary[key] = value
diff --git a/ooopy/Transforms.py b/ooopy/Transforms.py
index 50a6c0db8..37e7179e8 100644
--- a/ooopy/Transforms.py
+++ b/ooopy/Transforms.py
@@ -1,5 +1,5 @@
-#!/usr/bin/env python
-# -*- coding: iso-8859-1 -*-
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
# Copyright (C) 2005-14 Dr. Ralf Schlatterbeck Open Source Consulting.
# Reichergasse 131, A-3411 Weidling.
# Web: http://www.runtux.com Email: office@runtux.com
@@ -21,114 +21,102 @@
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
# ****************************************************************************
-from __future__ import absolute_import
-
import time
import re
-try :
- from xml.etree.ElementTree import dump, SubElement, Element, tostring
-except ImportError :
- from elementtree.ElementTree import dump, SubElement, Element, tostring
-from copy import deepcopy
-from ooopy.OOoPy import OOoPy, autosuper
-from ooopy.Transformer import files, split_tag, OOo_Tag, Transform
-from ooopy.Transformer import mimetypes, namespace_by_name
-from ooopy.Version import VERSION
+from xml.etree.ElementTree import SubElement, Element # , dump , tostring
+from copy import deepcopy
+from ooopy.OOoPy import OOoPy, autosuper
+from ooopy.Transformer import split_tag, OOo_Tag, Transform , mimetypes, \
+ namespace_by_name # , # files
+# from ooopy.Version import VERSION
# counts in meta.xml
-meta_counts = \
- ( 'character-count', 'image-count', 'object-count', 'page-count'
- , 'paragraph-count', 'table-count', 'word-count'
- )
-
-class Access_Attribute (autosuper) :
- """ For performance reasons we do not specify a separate transform
- for each attribute-read or -change operation. Instead we define
- all the attribute accesses we want to perform as objects that
- follow the attribute access api and apply them all using an
- Attribute_Access in one go.
+meta_counts = ('character-count', 'image-count', 'object-count', 'page-count',
+ 'paragraph-count', 'table-count', 'word-count')
+
+
+class Access_Attribute(autosuper):
+ """
+ For performance reasons we do not specify a separate transform
+ for each attribute-read or -change operation. Instead we define
+ all the attribute accesses we want to perform as objects that
+ follow the attribute access api and apply them all using an
+ Attribute_Access in one go.
"""
- def __init__ (self, key = None, prefix = None, ** kw) :
- self.__super.__init__ (key = key, prefix = prefix, **kw)
+ def __init__(self, key=None, prefix=None, **kw):
+ self.__super.__init__(key=key, prefix=prefix, **kw)
self.key = key
- if key :
- if not prefix :
- prefix = self.__class__.__name__
- self.key = ':'.join ((prefix, key))
- # end def __init__
+ if key:
+ if not prefix:
+ prefix = self.__class__.__name__
+ self.key = ':'.join((prefix, key))
- def register (self, transformer) :
+ def register (self, transformer):
self.transformer = transformer
- # end def register
- def use_value (self, oldval = None) :
- """ Can change the given value by returning the new value. If
- returning None or oldval the attribute stays unchanged.
+ def use_value (self, oldval=None):
"""
- raise NotImplementedError, "use_value must be defined in derived class"
- # end def use_value
+ Can change the given value by returning the new value. If
+ returning None or oldval the attribute stays unchanged.
+ """
+ raise NotImplementedError("use_value must be defined in derived class")
-# end class Access_Attribute
-class Get_Attribute (Access_Attribute) :
- """ An example of not changing an attribute but only storing the
- value in the transformer
+class Get_Attribute(Access_Attribute):
+ """
+ An example of not changing an attribute but only storing the
+ value in the transformer
"""
- def __init__ (self, tag, attr, key, transform = None, ** kw) :
- self.__super.__init__ (key = key, **kw)
- self.tag = tag
- self.attribute = attr
- self.transform = transform
- # end def __init__
+ def __init__(self, tag, attr, key, transform=None, ** kw):
+ self.__super.__init__(key=key, **kw)
+ self.tag = tag
+ self.attribute = attr
+ self.transform = transform
- def use_value (self, oldval = None) :
- self.transformer [self.key] = oldval
+ def use_value(self, oldval=None):
+ self.transformer[self.key] = oldval
return None
- # end def use_value
-# end def Get_Attribute
-class Get_Max (Access_Attribute) :
+class Get_Max(Access_Attribute):
""" Get the maximum value of an attribute """
- def __init__ (self, tag, attr, key, transform = None, ** kw) :
- self.__super.__init__ (key = key, **kw)
- self.tag = tag
- self.attribute = attr
- self.transform = transform
- # end def __init__
-
- def register (self, transformer) :
- self.__super.register (transformer)
- self.transformer [self.key] = -1
- # end def register
-
- def use_value (self, oldval = None) :
- if self.transformer [self.key] < oldval :
- self.transformer [self.key] = oldval
+ def __init__(self, tag, attr, key, transform=None, ** kw):
+ self.__super.__init__(key=key, **kw)
+ self.tag = tag
+ self.attribute = attr
+ self.transform = transform
+
+ def register(self, transformer):
+ self.__super.register(transformer)
+ self.transformer[self.key] = -1
+
+ def use_value(self, oldval=None):
+ if oldval:
+ oldval = int(oldval)
+ if (self.transformer[self.key] or 0) < (oldval or 0):
+ self.transformer[self.key] = oldval
return None
- # end def use_value
-
-# end def Get_Max
-class Renumber (Access_Attribute) :
- """ Specifies a renumbering transform. OOo has a 'name' attribute
- for several different tags, e.g., tables, frames, sections etc.
- These names must be unique in the whole document. OOo itself
- solves this by appending a unique number to a basename for each
- element, e.g., sections are named 'Section1', 'Section2', ...
- Renumber transforms can be applied to correct the numbering
- after operations that destroy the unique numbering, e.g., after
- a mailmerge where the same document is repeatedly appended.
- The force parameter specifies if the new renumbered name should
- be inserted even if the attribute in question does not exist.
+class Renumber (Access_Attribute):
+ """
+ Specifies a renumbering transform. OOo has a 'name' attribute
+ for several different tags, e.g., tables, frames, sections etc.
+ These names must be unique in the whole document. OOo itself
+ solves this by appending a unique number to a basename for each
+ element, e.g., sections are named 'Section1', 'Section2', ...
+ Renumber transforms can be applied to correct the numbering
+ after operations that destroy the unique numbering, e.g., after
+ a mailmerge where the same document is repeatedly appended.
+
+ The force parameter specifies if the new renumbered name should
+ be inserted even if the attribute in question does not exist.
"""
- def __init__ \
- (self, tag, name = None, attr = None, start = 1, force = False) :
+ def __init__(self, tag, name=None, attr=None, start=1, force=False):
self.__super.__init__ ()
tag_ns, tag_name = split_tag (tag)
self.tag_ns = tag_ns
@@ -178,29 +166,26 @@ class Set_Attribute (Access_Attribute) :
self.transform = transform
self.value = value
self.oldvalue = oldvalue
- # end def __init__
- def use_value (self, oldval) :
- if oldval is None :
+ def use_value(self, oldval):
+ if oldval is None:
return None
- if self.oldvalue and oldval != self.oldvalue :
+ if self.oldvalue and oldval != self.oldvalue:
return None
- if self.key and self.transformer.has_key (self.key) :
- return str (self.transformer [self.key])
+ if self.key and self.transformer.has_key(self.key):
+ return str(self.transformer[self.key])
return self.value
- # end def use_value
-# end class Set_Attribute
def set_attributes_from_dict (tag, attr, d) :
""" Convenience function: iterate over a dict and return a list of
Set_Attribute objects specifying replacement of attributes in
the dictionary
"""
- return [Set_Attribute (tag, attr, oldvalue = k, value = v)
- for k,v in d.iteritems ()
- ]
-# end def set_attributes_from_dict
+ return [
+ Set_Attribute(tag, attr, oldvalue=k, value=v)
+ for k, v in d.items()
+ ]
class Reanchor (Access_Attribute) :
"""
@@ -327,41 +312,37 @@ class Manifest_Append (Transform) :
# meta.xml transforms
#
+
class Editinfo (Transform) :
"""
- This is an example of modifying OOo meta info (edit information,
- author, etc). We set some of the items (program that generated
- the OOo file, modification time, number of edit cyles and overall
- edit duration). It's easy to subclass this transform and replace
- the "replace" variable (pun intended) in the derived class.
+ This is an example of modifying OOo meta info (edit information,
+ author, etc). We set some of the items (program that generated
+ the OOo file, modification time, number of edit cyles and overall
+ edit duration). It's easy to subclass this transform and replace
+ the "replace" variable (pun intended) in the derived class.
"""
filename = 'meta.xml'
- prio = 20
- repl = \
- { ('meta', 'generator') : 'OOoPy field replacement'
- , ('dc', 'date') : time.strftime ('%Y-%m-%dT%H:%M:%S')
- , ('meta', 'editing-cycles') : '0'
- , ('meta', 'editing-duration') : 'PT0M0S'
- }
- replace = {}
+ prio = 20
+ repl = {
+ ('meta', 'generator'): 'OOoPy field replacement',
+ ('dc', 'date'): time.strftime ('%Y-%m-%dT%H:%M:%S'),
+ ('meta', 'editing-cycles'): '0',
+ ('meta', 'editing-duration'): 'PT0M0S'
+ }
+ replace = {}
# iterate over all mimetypes, so this works for all known mimetypes
# of OOo documents.
- for m in mimetypes :
- for params, value in repl.iteritems () :
- replace [OOo_Tag (mimetype = m, *params)] = value
+ for m in mimetypes:
+ for params, value in repl.items():
+ replace [OOo_Tag (mimetype=m, *params)] = value
- def apply (self, root) :
- for node in root.findall (self.oootag ('office', 'meta') + '/*') :
- if self.replace.has_key (node.tag) :
- node.text = self.replace [node.tag]
- # end def apply
-# end class Editinfo
+ def apply(self, root) :
+ for node in root.findall (self.oootag ('office', 'meta') + '/*'):
+ if node.tag in self.replace.keys():
+ node.text = self.replace[node.tag]
-#
-# settings.xml transforms
-#
-class Autoupdate (Transform) :
+class Autoupdate (Transform):
"""
This is an example of modifying OOo settings. We set some of the
AutoUpdate configuration items in OOo to true. We also specify
@@ -718,28 +699,31 @@ class Mailmerge (_Body_Concat) :
# end def apply
# end class Mailmerge
+
def tree_serialise (element, prefix = '', mimetype = mimetypes [1]) :
- """ Serialise a style-element of an OOo document (e.g., a
- style:font-decl, style:default-style, etc declaration).
- We remove the name of the style and return something that is a
- representation of the style element which can be used as a
- dictionary key.
- The serialisation format is a tuple containing the tag as the
- first item, the attributes (as key,value pairs returned by
- items()) as the second item and the following items are
- serialisations of children.
"""
- attr = dict (element.attrib)
- stylename = OOo_Tag ('style', 'name', mimetype)
- if stylename in attr : del attr [stylename]
- attr = attr.items ()
- attr.sort ()
- attr = tuple (attr)
+ Serialise a style-element of an OOo document (e.g., a
+ style:font-decl, style:default-style, etc declaration).
+ We remove the name of the style and return something that is a
+ representation of the style element which can be used as a
+ dictionary key.
+ The serialisation format is a tuple containing the tag as the
+ first item, the attributes (as key,value pairs returned by
+ items()) as the second item and the following items are
+ serialisations of children.
+ """
+ attr = dict(element.attrib)
+ stylename = OOo_Tag('style', 'name', mimetype)
+ if stylename in attr:
+ del attr[stylename]
+ attr = list(attr.items())
+ attr.sort()
+ attr = tuple(attr)
serial = [prefix + element.tag, attr]
- for e in element :
- serial.append (tree_serialise (e, prefix, mimetype))
- return tuple (serial)
-# end def tree_serialise
+ for e in element:
+ serial.append(tree_serialise (e, prefix, mimetype))
+ return tuple(serial)
+
class Concatenate (_Body_Concat) :
"""
@@ -794,31 +778,31 @@ class Concatenate (_Body_Concat) :
assert (self.docs [-1].mimetype == self.docs [0].mimetype)
# end def __init__
- def apply_all (self, trees) :
- assert (self.docs [0].mimetype == self.transformer.mimetype)
+ def apply_all(self, trees):
+ assert (self.docs[0].mimetype == self.transformer.mimetype)
self.serialised = {}
self.stylenames = {}
- self.namemaps = [{}]
+ self.namemaps = [{}]
self.tab_depend = {}
- for s in self.ref_attrs.itervalues () :
- self.namemaps [0][s] = {}
+ for s in self.ref_attrs.values():
+ self.namemaps[0][s] = {}
self.body_decls = {}
- for s in self.body_decl_sections :
- self.body_decls [s] = {}
- self.trees = {}
- for f in self.oofiles :
- self.trees [f] = [trees [f].getroot ()]
- self.sections = {}
+ for s in self.body_decl_sections:
+ self.body_decls[s] = {}
+ self.trees = {}
+ for f in self.oofiles:
+ self.trees[f] = [trees[f].getroot()]
+ self.sections = {}
for f in self.stylefiles :
- self.sections [f] = {}
+ self.sections[f] = {}
for node in self.trees [f][0] :
self.sections [f][node.tag] = node
- for d in self.docs :
- self.namemaps.append ({})
- for s in self.ref_attrs.itervalues () :
- self.namemaps [-1][s] = {}
- for f in self.oofiles :
- self.trees [f].append (d.read (f).getroot ())
+ for d in self.docs:
+ self.namemaps.append({})
+ for s in self.ref_attrs.values():
+ self.namemaps[-1][s] = {}
+ for f in self.oofiles:
+ self.trees[f].append(d.read(f).getroot())
# append a pagebreak style, will be optimized away if duplicate
pbs = Addpagebreak_Style (transformer = self.transformer)
pbs.apply (self.trees ['content.xml'][0])
@@ -879,17 +863,12 @@ class Concatenate (_Body_Concat) :
if max < pos :
max = pos
self.insert_tabs (sub, max)
- # end def apply_tab_correction
-
- def _attr_rename (self, idx) :
- r = sum \
- ( [ set_attributes_from_dict (None, k, self.namemaps [idx][v])
- for k,v in self.ref_attrs.iteritems ()
- ]
- , []
- )
- return Attribute_Access (r, transformer = self.transformer)
- # end def _attr_rename
+
+ def _attr_rename(self, idx):
+ r = sum(
+ [set_attributes_from_dict(None, k, self.namemaps [idx][v])
+ for k, v in self.ref_attrs.items()], [])
+ return Attribute_Access(r, transformer=self.transformer)
def body_concat (self) :
count = {}
@@ -974,30 +953,28 @@ class Concatenate (_Body_Concat) :
)
# end def insert_tabs
- def merge_defaultstyle (self, default_style, node) :
+ def merge_defaultstyle(self, default_style, node):
assert default_style is not None
assert node is not None
proppath = './' + self.properties_tag
defprops = default_style.find (proppath)
- props = node.find (proppath)
- sn = self.oootag ('style', 'name')
- if props is None :
- props = Element (self.properties_tag)
- for k, v in defprops.attrib.iteritems () :
- if self.default_properties.get (k) != v and not props.get (k) :
- if k == self.oootag ('style', 'tab-stop-distance') :
+ props = node.find(proppath)
+ sn = self.oootag('style', 'name')
+ if props is None:
+ props = Element(self.properties_tag)
+ for k, v in defprops.attrib.items():
+ if self.default_properties.get(k) != v and not props.get(k):
+ if k == self.oootag('style', 'tab-stop-distance'):
self.tab_correct = v
- self.tab_depend = {node.get (sn) : 1}
- stps = SubElement \
- (props, self.oootag ('style', 'tab-stops'))
- self.insert_tabs (stps)
+ self.tab_depend = {node.get(sn): 1}
+ stps = SubElement(props, self.oootag('style', 'tab-stops'))
+ self.insert_tabs(stps)
else :
- props.set (k,v)
- if len (props) or props.attrib :
- node.append (props)
- # end def merge_defaultstyle
+ props.set(k, v)
+ if len(props) or props.attrib:
+ node.append(props)
- def _newname (self, key, oldname) :
+ def _newname(self, key, oldname):
stylenum = 0
if (key, oldname) not in self.stylenames :
self.stylenames [(key, oldname)] = 1
@@ -1212,17 +1189,18 @@ def renumber_all (mimetype) :
# the info retrieved from the OOo document: We use the attribute name in
# the meta-information to store (and later retrieve) the information.
-def get_meta (mimetype) :
- """ Factory function for Attribute_Access to get all interesting
- meta-data
+
+def get_meta(mimetype):
+ """
+ Factory function for Attribute_Access to get all interesting meta-data
"""
get_attr = []
for attr in meta_counts :
- a = OOo_Tag ('meta', attr, mimetype)
- t = OOo_Tag ('meta', 'document-statistic', mimetype)
+ a = OOo_Tag('meta', attr, mimetype)
+ t = OOo_Tag('meta', 'document-statistic', mimetype)
get_attr.append (Get_Attribute (t, a, attr))
- return Attribute_Access (get_attr, prio = 20, filename = 'meta.xml')
-# end def get_meta
+ return Attribute_Access (get_attr, prio= 20, filename='meta.xml')
+
def set_meta (mimetype) :
""" Factory function for Attribute_Access to set all interesting
diff --git a/ooopy/Version.py b/ooopy/Version.py
index 495ca242a..3c6f39aef 100644
--- a/ooopy/Version.py
+++ b/ooopy/Version.py
@@ -1 +1 @@
-VERSION="1.11"
+VERSION = "1.11-python3"