summaryrefslogtreecommitdiff
path: root/ooopy
diff options
context:
space:
mode:
authorÉtienne Loks <etienne.loks@iggdrasil.net>2019-05-01 13:51:01 +0200
committerÉtienne Loks <etienne.loks@iggdrasil.net>2019-06-17 13:21:28 +0200
commit6e09fe95f07ea2c0a827beda5fc2f2a63751db7f (patch)
treed6452080600bd7fc377321d4dab58a7fc4333cb2 /ooopy
parentce4b7db76f21559b94943229bbeebd9c37c43f49 (diff)
downloadIshtar-6e09fe95f07ea2c0a827beda5fc2f2a63751db7f.tar.bz2
Ishtar-6e09fe95f07ea2c0a827beda5fc2f2a63751db7f.zip
Embed ooopy (last version: 1.11)
Diffstat (limited to 'ooopy')
-rw-r--r--ooopy/OOoPy.py317
-rw-r--r--ooopy/Transformer.py1397
-rw-r--r--ooopy/Transforms.py1237
-rw-r--r--ooopy/Version.py1
-rw-r--r--ooopy/__init__.py0
5 files changed, 2952 insertions, 0 deletions
diff --git a/ooopy/OOoPy.py b/ooopy/OOoPy.py
new file mode 100644
index 000000000..87e0b8110
--- /dev/null
+++ b/ooopy/OOoPy.py
@@ -0,0 +1,317 @@
+#!/usr/bin/env python
+# -*- coding: iso-8859-1 -*-
+# Copyright (C) 2005-14 Dr. Ralf Schlatterbeck Open Source Consulting.
+# Reichergasse 131, A-3411 Weidling.
+# Web: http://www.runtux.com Email: office@runtux.com
+# All rights reserved
+# ****************************************************************************
+#
+# This library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Library General Public License as
+# published by the Free Software Foundation; either version 2 of the
+# License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Library General Public License for more details.
+#
+# You should have received a copy of the GNU Library General Public
+# License along with this program; if not, write to the Free Software
+# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+# ****************************************************************************
+
+from __future__ import absolute_import
+
+from zipfile import ZipFile, ZIP_DEFLATED, ZipInfo
+try :
+ from StringIO import StringIO
+except ImportError :
+ from io import StringIO
+from datetime import datetime
+try :
+ from xml.etree.ElementTree import ElementTree, fromstring, _namespace_map
+except ImportError :
+ from elementtree.ElementTree import ElementTree, fromstring, _namespace_map
+from tempfile import mkstemp
+from ooopy.Version import VERSION
+import os
+
+class _autosuper (type) :
+ def __init__ (cls, name, bases, dict) :
+ super (_autosuper, cls).__init__ (name, bases, dict)
+ setattr (cls, "_%s__super" % name, super (cls))
+ # end def __init__
+# end class _autosuper
+
+class autosuper (object) :
+ __metaclass__ = _autosuper
+ def __init__ (self, *args, **kw) :
+ self.__super.__init__ ()
+ # end def __init__
+# end class autosuper
+
+files = \
+ [ 'content.xml'
+ , 'styles.xml'
+ , 'meta.xml'
+ , 'settings.xml'
+ , 'META-INF/manifest.xml'
+ ]
+
+mimetypes = \
+ [ 'application/vnd.sun.xml.writer'
+ , 'application/vnd.oasis.opendocument.text'
+ ]
+namespace_by_name = \
+ { mimetypes [0] :
+ { 'chart' : "http://openoffice.org/2000/chart"
+ , 'config' : "http://openoffice.org/2001/config"
+ , 'dc' : "http://purl.org/dc/elements/1.1/"
+ , 'dr3d' : "http://openoffice.org/2000/dr3d"
+ , 'draw' : "http://openoffice.org/2000/drawing"
+ , 'fo' : "http://www.w3.org/1999/XSL/Format"
+ , 'form' : "http://openoffice.org/2000/form"
+ , 'math' : "http://www.w3.org/1998/Math/MathML"
+ , 'meta' : "http://openoffice.org/2000/meta"
+ , 'number' : "http://openoffice.org/2000/datastyle"
+ , 'office' : "http://openoffice.org/2000/office"
+ , 'script' : "http://openoffice.org/2000/script"
+ , 'style' : "http://openoffice.org/2000/style"
+ , 'svg' : "http://www.w3.org/2000/svg"
+ , 'table' : "http://openoffice.org/2000/table"
+ , 'text' : "http://openoffice.org/2000/text"
+ , 'xlink' : "http://www.w3.org/1999/xlink"
+ , 'manifest' : "http://openoffice.org/2001/manifest"
+ }
+ , mimetypes [1] :
+ { 'chart' : "urn:oasis:names:tc:opendocument:xmlns:chart:1.0"
+ , 'config' : "urn:oasis:names:tc:opendocument:xmlns:config:1.0"
+ , 'dc' : "http://purl.org/dc/elements/1.1/"
+ , 'dr3d' : "urn:oasis:names:tc:opendocument:xmlns:dr3d:1.0"
+ , 'draw' : "urn:oasis:names:tc:opendocument:xmlns:drawing:1.0"
+ , 'fo' : "urn:oasis:names:tc:opendocument:xmlns:"
+ "xsl-fo-compatible:1.0"
+ , 'form' : "urn:oasis:names:tc:opendocument:xmlns:form:1.0"
+ , 'math' : "http://www.w3.org/1998/Math/MathML"
+ , 'meta' : "urn:oasis:names:tc:opendocument:xmlns:meta:1.0"
+ , 'number' : "urn:oasis:names:tc:opendocument:xmlns:datastyle:1.0"
+ , 'office' : "urn:oasis:names:tc:opendocument:xmlns:office:1.0"
+ , 'officeooo': "http://openoffice.org/2009/office"
+ , 'script' : "urn:oasis:names:tc:opendocument:xmlns:script:1.0"
+ , 'style' : "urn:oasis:names:tc:opendocument:xmlns:style:1.0"
+ , 'svg' : "urn:oasis:names:tc:opendocument:xmlns:svg-compatible:1.0"
+ , 'table' : "urn:oasis:names:tc:opendocument:xmlns:table:1.0"
+ , 'text' : "urn:oasis:names:tc:opendocument:xmlns:text:1.0"
+ , 'xlink' : "http://www.w3.org/1999/xlink"
+ , 'manifest' : "urn:oasis:names:tc:opendocument:xmlns:manifest:1.0"
+ , 'tableooo' : "http://openoffice.org/2009/table"
+ , 'transformation' : "http://www.w3.org/2003/g/data-view#"
+ # OOo 1.X tags and some others:
+ , 'ooo' : "http://openoffice.org/2004/office"
+ , 'ooow' : "http://openoffice.org/2004/writer"
+ , 'oooc' : "http://openoffice.org/2004/calc"
+ , 'o_dom' : "http://www.w3.org/2001/xml-events"
+ , 'o_xforms' : "http://www.w3.org/2002/xforms"
+ , 'xs' : "http://www.w3.org/2001/XMLSchema"
+ , 'xsi' : "http://www.w3.org/2001/XMLSchema-instance"
+ # predefined xml namespace, see
+ # http://www.w3.org/TR/2006/REC-xml-names11-20060816/
+ # "It MAY, but need not, be declared, and MUST NOT be undeclared
+ # or bound to any other namespace name."
+ , 'xml' : "http://www.w3.org/XML/1998/namespace"
+ }
+ }
+
+for mimetype in namespace_by_name.itervalues () :
+ for k, v in mimetype.iteritems () :
+ if v in _namespace_map :
+ assert (_namespace_map [v] == k)
+ _namespace_map [v] = k
+
+class OOoElementTree (autosuper) :
+ """
+ An ElementTree for OOo document XML members. Behaves like the
+ orginal ElementTree (in fact it delegates almost everything to a
+ real instance of ElementTree) except for the write method, that
+ writes itself back to the OOo XML file in the OOo zip archive it
+ came from.
+ """
+ def __init__ (self, ooopy, zname, root) :
+ self.ooopy = ooopy
+ self.zname = zname
+ self.tree = ElementTree (root)
+ # end def __init__
+
+ def write (self) :
+ self.ooopy.write (self.zname, self.tree)
+ # end def write
+
+ def __getattr__ (self, name) :
+ """
+ Delegate everything to our ElementTree attribute.
+ """
+ if not name.startswith ('__') :
+ result = getattr (self.tree, name)
+ setattr (self, name, result)
+ return result
+ raise AttributeError (name)
+ # end def __getattr__
+
+# end class OOoElementTree
+
+class OOoPy (autosuper) :
+ """
+ Wrapper for OpenOffice.org zip files (all OOo documents are
+ really zip files internally).
+
+ from ooopy.OOoPy import OOoPy
+ >>> o = OOoPy (infile = 'testfiles/test.sxw', outfile = 'out.sxw')
+ >>> o.mimetype
+ 'application/vnd.sun.xml.writer'
+ >>> for f in files :
+ ... e = o.read (f)
+ ... e.write ()
+ ...
+ >>> o.close ()
+ >>> o = OOoPy (infile = 'testfiles/test.odt', outfile = 'out2.odt')
+ >>> o.mimetype
+ 'application/vnd.oasis.opendocument.text'
+ >>> for f in files :
+ ... e = o.read (f)
+ ... e.write ()
+ ...
+ >>> o.append_file ('Pictures/empty', '')
+ >>> o.close ()
+ >>> o = OOoPy (infile = 'out2.odt')
+ >>> for f in o.izip.infolist () :
+ ... print f.filename, f.create_system, f.compress_type
+ mimetype 0 8
+ content.xml 0 8
+ styles.xml 0 8
+ meta.xml 0 8
+ settings.xml 0 8
+ META-INF/manifest.xml 0 8
+ Pictures/empty 0 8
+ Configurations2/statusbar/ 0 0
+ Configurations2/accelerator/current.xml 0 8
+ Configurations2/floater/ 0 0
+ Configurations2/popupmenu/ 0 0
+ Configurations2/progressbar/ 0 0
+ Configurations2/menubar/ 0 0
+ Configurations2/toolbar/ 0 0
+ Configurations2/images/Bitmaps/ 0 0
+ Thumbnails/thumbnail.png 0 8
+ """
+ def __init__ \
+ ( self
+ , infile = None
+ , outfile = None
+ , write_mode = 'w'
+ , mimetype = None
+ ) :
+ """
+ Open an OOo document, if no outfile is given, we open the
+ file read-only. Otherwise the outfile has to be different
+ from the infile -- the python ZipFile can't deal with
+ read-write access. In case an outfile is given, we open it
+ in "w" mode as a zip file, unless write_mode is specified
+ (the only allowed case would be "a" for appending to an
+ existing file, see pythons ZipFile documentation for
+ details). If no infile is given, the user is responsible for
+ providing all necessary files in the resulting output file.
+
+ It seems that OOo needs to have the mimetype as the first
+ archive member (at least with mimetype as the first member
+ it works, the order may not be arbitrary) to recognize a zip
+ archive as an OOo file. When copying from a given infile, we
+ use the same order of elements in the resulting output. When
+ creating new elements we make sure the mimetype is the first
+ in the resulting archive.
+
+ Note that both, infile and outfile can either be filenames
+ or file-like objects (e.g. StringIO).
+
+ The mimetype is automatically determined if an infile is
+ given. If only writing is desired, the mimetype should be
+ set.
+ """
+ assert (infile != outfile)
+ self.izip = self.ozip = None
+ if infile :
+ self.izip = ZipFile (infile, 'r', ZIP_DEFLATED)
+ if outfile :
+ self.ozip = ZipFile (outfile, write_mode, ZIP_DEFLATED)
+ self.written = {}
+ if mimetype :
+ self.mimetype = mimetype
+ elif self.izip :
+ self.mimetype = self.izip.read ('mimetype')
+ # end def __init__
+
+ def read (self, zname) :
+ """
+ return an OOoElementTree object for the given OOo document
+ archive member name. Currently an OOo document contains the
+ following XML files::
+
+ * content.xml: the text of the OOo document
+ * styles.xml: style definitions
+ * meta.xml: meta-information (author, last changed, ...)
+ * settings.xml: settings in OOo
+ * META-INF/manifest.xml: contents of the archive
+
+ There is an additional file "mimetype" that always contains
+ the string "application/vnd.sun.xml.writer" for OOo 1.X files
+ and the string "application/vnd.oasis.opendocument.text" for
+ OOo 2.X files.
+ """
+ assert (self.izip)
+ return OOoElementTree (self, zname, fromstring (self.izip.read (zname)))
+ # end def read
+
+ def _write (self, zname, str) :
+ now = datetime.utcnow ().timetuple ()
+ info = ZipInfo (zname, date_time = now)
+ info.create_system = 0 # pretend to be fat
+ info.compress_type = ZIP_DEFLATED
+ self.ozip.writestr (info, str)
+ self.written [zname] = 1
+ # end def _write
+
+ def write (self, zname, etree) :
+ assert (self.ozip)
+ # assure mimetype is the first member in new archive
+ if 'mimetype' not in self.written :
+ self._write ('mimetype', self.mimetype)
+ str = StringIO ()
+ etree.write (str)
+ self._write (zname, str.getvalue ())
+ # end def write
+
+ def append_file (self, zname, str) :
+ """ Official interface to _write: Append a file to the end of
+ the archive.
+ """
+ if zname not in self.written :
+ self._write (zname, str)
+ # end def append_file
+
+ def close (self) :
+ """
+ Close the zip files. According to documentation of zipfile in
+ the standard python lib, this has to be done to be sure
+ everything is written. We copy over the not-yet written files
+ from izip before closing ozip.
+ """
+ if self.izip and self.ozip :
+ for f in self.izip.infolist () :
+ if f.filename not in self.written :
+ self.ozip.writestr (f, self.izip.read (f.filename))
+ for i in self.izip, self.ozip :
+ if i : i.close ()
+ self.izip = self.ozip = None
+ # end def close
+
+ __del__ = close # auto-close on deletion of object
+# end class OOoPy
diff --git a/ooopy/Transformer.py b/ooopy/Transformer.py
new file mode 100644
index 000000000..dbbab125d
--- /dev/null
+++ b/ooopy/Transformer.py
@@ -0,0 +1,1397 @@
+#!/usr/bin/env python
+# -*- coding: iso-8859-1 -*-
+# Copyright (C) 2005-14 Dr. Ralf Schlatterbeck Open Source Consulting.
+# Reichergasse 131, A-3411 Weidling.
+# Web: http://www.runtux.com Email: office@runtux.com
+# All rights reserved
+# ****************************************************************************
+#
+# This library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Library General Public License as
+# published by the Free Software Foundation; either version 2 of the
+# License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Library General Public License for more details.
+#
+# You should have received a copy of the GNU Library General Public
+# License along with this program; if not, write to the Free Software
+# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+# ****************************************************************************
+
+from __future__ import absolute_import
+
+import time
+import re
+try :
+ from xml.etree.ElementTree import dump, SubElement, Element, tostring
+ from xml.etree.ElementTree import _namespace_map
+except ImportError :
+ from elementtree.ElementTree import dump, SubElement, Element, tostring
+ from elementtree.ElementTree import _namespace_map
+from copy import deepcopy
+from ooopy.OOoPy import OOoPy, autosuper
+from ooopy.OOoPy import files, mimetypes, namespace_by_name
+from ooopy.Version import VERSION
+
+def OOo_Tag (namespace, name, mimetype) :
+ """Return combined XML tag
+
+ >>> OOo_Tag ('xml', 'id', mimetypes [1])
+ '{http://www.w3.org/XML/1998/namespace}id'
+ >>> OOo_Tag ('text', 'list', mimetypes [1])
+ '{urn:oasis:names:tc:opendocument:xmlns:text:1.0}list'
+ """
+ return "{%s}%s" % (namespace_by_name [mimetype][namespace], name)
+# end def OOo_Tag
+
+def split_tag (tag) :
+ """ Split tag into symbolic namespace and name part -- inverse
+ operation of OOo_Tag.
+ """
+ ns, t = tag.split ('}')
+ return (_namespace_map [ns [1:]], t)
+# end def split_tag
+
+class Transform (autosuper) :
+ """
+ Base class for individual transforms on OOo files. An individual
+ transform needs a filename variable for specifying the OOo file
+ the transform should be applied to and an optional prio.
+ Individual transforms are applied according to their prio
+ setting, higher prio means later application of a transform.
+
+ The filename variable must specify one of the XML files which are
+ part of the OOo document (see files variable above). As
+ the names imply, content.xml contains the contents of the
+ document (text and ad-hoc style definitions), styles.xml contains
+ the style definitions, meta.xml contains meta information like
+ author, editing time, etc. and settings.xml is used to store
+ OOo's settings (menu Tools->Configure).
+ """
+ prio = 100
+ textbody_names = \
+ { mimetypes [0] : 'body'
+ , mimetypes [1] : 'text'
+ }
+ paragraph_props = \
+ { mimetypes [0] : 'properties'
+ , mimetypes [1] : 'paragraph-properties'
+ }
+ font_decls = \
+ { mimetypes [0] : 'font-decls'
+ , mimetypes [1] : 'font-face-decls'
+ }
+
+ def __init__ (self, prio = None, transformer = None) :
+ if prio is not None :
+ self.prio = prio
+ self.transformer = None
+ if transformer :
+ self.register (transformer)
+ # end def __init__
+
+ def apply (self, root) :
+ """ Apply myself to the element given as root """
+ raise NotImplementedError, 'derived transforms must implement "apply"'
+ # end def apply
+
+ def apply_all (self, trees) :
+ """ Apply myself to all the files given in trees. The variable
+ trees contains a dictionary of ElementTree indexed by the
+ name of the OOo File.
+ The standard case is that only one file (namely
+ self.filename) is used.
+ """
+ assert (self.filename)
+ self.apply (trees [self.filename].getroot ())
+ # end def apply_all
+
+ def find_tbody (self, root) :
+ """ Find the node which really contains the text -- different
+ for different OOo versions.
+ """
+ tbody = root
+ if tbody.tag != self.textbody_tag :
+ tbody = tbody.find ('.//' + self.textbody_tag)
+ return tbody
+ # end def find_tbody
+
+ def register (self, transformer) :
+ """ Registering with a transformer means being able to access
+ variables stored in the tranformer by other transforms.
+
+ Also needed for tag-computation: The transformer knows which
+ version of OOo document we are processing.
+ """
+ self.transformer = transformer
+ mt = self.mimetype = transformer.mimetype
+ self.textbody_name = self.textbody_names [mt]
+ self.paragraph_props = self.paragraph_props [mt]
+ self.properties_tag = self.oootag ('style', self.paragraph_props)
+ self.textbody_tag = self.oootag ('office', self.textbody_name)
+ self.font_decls_tag = self.oootag ('office', self.font_decls [mt])
+ # end def register
+
+ def oootag (self, namespace, name) :
+ """ Compute long tag version """
+ return OOo_Tag (namespace, name, self.mimetype)
+ # end def oootag
+
+ def set (self, variable, value) :
+ """ Set variable in our transformer using naming convention. """
+ self.transformer [self._varname (variable)] = value
+ # end def set
+
+ def _varname (self, name) :
+ """ For fulfilling the naming convention of the transformer
+ dictionary (every entry in this dictionary should be prefixed
+ with the class name of the transform) we have this
+ convenience method.
+ Returns variable name prefixed with own class name.
+ """
+ return ":".join ((self.__class__.__name__, name))
+ # end def _varname
+
+# end class Transform
+
+class Transformer (autosuper) :
+ """
+ Class for applying a set of transforms to a given ooopy object.
+ The transforms are applied to the specified file in priority
+ order. When applying transforms we have a mechanism for
+ communication of transforms. We give the transformer to the
+ individual transforms as a parameter. The transforms may use the
+ transformer like a dictionary for storing values and retrieving
+ values left by previous transforms.
+ As a naming convention each transform should use its class name
+ as a prefix for storing values in the dictionary.
+ >>> import Transforms
+ >>> from Transforms import renumber_all, get_meta, set_meta, meta_counts
+ >>> try :
+ ... from io import StringIO, BytesIO
+ ... StringIO = BytesIO
+ ... except ImportError :
+ ... from StringIO import StringIO
+ >>> sio = BytesIO ()
+ >>> o = OOoPy (infile = 'testfiles/test.sxw', outfile = sio)
+ >>> m = o.mimetype
+ >>> c = o.read ('content.xml')
+ >>> body = c.find (OOo_Tag ('office', 'body', mimetype = m))
+ >>> body [-1].get (OOo_Tag ('text', 'style-name', mimetype = m))
+ 'Standard'
+ >>> def cb (name) :
+ ... r = { 'street' : 'Beispielstrasse 42'
+ ... , 'firstname' : 'Hugo'
+ ... , 'salutation' : 'Frau'
+ ... }
+ ... if r.has_key (name) : return r [name]
+ ... return None
+ ...
+ >>> p = get_meta (m)
+ >>> t = Transformer (m, p)
+ >>> t ['a'] = 'a'
+ >>> t ['a']
+ 'a'
+ >>> t.transform (o)
+ >>> p.set ('a', 'b')
+ >>> t ['Attribute_Access:a']
+ 'b'
+ >>> t = Transformer (
+ ... m
+ ... , Transforms.Autoupdate ()
+ ... , Transforms.Editinfo ()
+ ... , Transforms.Field_Replace (prio = 99, replace = cb)
+ ... , Transforms.Field_Replace
+ ... ( replace =
+ ... { 'salutation' : ''
+ ... , 'firstname' : 'Erika'
+ ... , 'lastname' : 'Musterfrau'
+ ... , 'country' : 'D'
+ ... , 'postalcode' : '00815'
+ ... , 'city' : 'Niemandsdorf'
+ ... }
+ ... )
+ ... , Transforms.Addpagebreak_Style ()
+ ... , Transforms.Addpagebreak ()
+ ... )
+ >>> t.transform (o)
+ >>> o.close ()
+ >>> ov = sio.getvalue ()
+ >>> f = open ("testout.sxw", "wb")
+ >>> f.write (ov)
+ >>> f.close ()
+ >>> o = OOoPy (infile = sio)
+ >>> c = o.read ('content.xml')
+ >>> m = o.mimetype
+ >>> body = c.find (OOo_Tag ('office', 'body', mimetype = m))
+ >>> vset = './/' + OOo_Tag ('text', 'variable-set', mimetype = m)
+ >>> for node in body.findall (vset) :
+ ... name = node.get (OOo_Tag ('text', 'name', m))
+ ... print name, ':', node.text
+ salutation : None
+ firstname : Erika
+ lastname : Musterfrau
+ street : Beispielstrasse 42
+ country : D
+ postalcode : 00815
+ city : Niemandsdorf
+ salutation : None
+ firstname : Erika
+ lastname : Musterfrau
+ street : Beispielstrasse 42
+ country : D
+ postalcode : 00815
+ city : Niemandsdorf
+ >>> body [-1].get (OOo_Tag ('text', 'style-name', mimetype = m))
+ 'P2'
+ >>> sio = StringIO ()
+ >>> o = OOoPy (infile = 'testfiles/test.sxw', outfile = sio)
+ >>> c = o.read ('content.xml')
+ >>> t = Transformer (
+ ... o.mimetype
+ ... , get_meta (o.mimetype)
+ ... , Transforms.Addpagebreak_Style ()
+ ... , Transforms.Mailmerge
+ ... ( iterator =
+ ... ( dict (firstname = 'Erika', lastname = 'Nobody')
+ ... , dict (firstname = 'Eric', lastname = 'Wizard')
+ ... , cb
+ ... )
+ ... )
+ ... , renumber_all (o.mimetype)
+ ... , set_meta (o.mimetype)
+ ... , Transforms.Fix_OOo_Tag ()
+ ... )
+ >>> t.transform (o)
+ >>> for i in meta_counts :
+ ... print i, t [':'.join (('Set_Attribute', i))]
+ character-count 951
+ image-count 0
+ object-count 0
+ page-count 3
+ paragraph-count 113
+ table-count 3
+ word-count 162
+ >>> name = t ['Addpagebreak_Style:stylename']
+ >>> name
+ 'P2'
+ >>> o.close ()
+ >>> ov = sio.getvalue ()
+ >>> f = open ("testout2.sxw", "wb")
+ >>> f.write (ov)
+ >>> f.close ()
+ >>> o = OOoPy (infile = sio)
+ >>> m = o.mimetype
+ >>> c = o.read ('content.xml')
+ >>> body = c.find (OOo_Tag ('office', 'body', m))
+ >>> for n in body.findall ('.//*') :
+ ... zidx = n.get (OOo_Tag ('draw', 'z-index', m))
+ ... if zidx :
+ ... print ':'.join(split_tag (n.tag)), zidx
+ draw:text-box 0
+ draw:rect 1
+ draw:text-box 3
+ draw:rect 4
+ draw:text-box 6
+ draw:rect 7
+ draw:text-box 2
+ draw:text-box 5
+ draw:text-box 8
+ >>> for n in body.findall ('.//' + OOo_Tag ('text', 'p', m)) :
+ ... if n.get (OOo_Tag ('text', 'style-name', m)) == name :
+ ... print n.tag
+ {http://openoffice.org/2000/text}p
+ {http://openoffice.org/2000/text}p
+ >>> vset = './/' + OOo_Tag ('text', 'variable-set', m)
+ >>> for n in body.findall (vset) :
+ ... if n.get (OOo_Tag ('text', 'name', m), None).endswith ('name') :
+ ... name = n.get (OOo_Tag ('text', 'name', m))
+ ... print name, ':', n.text
+ firstname : Erika
+ lastname : Nobody
+ firstname : Eric
+ lastname : Wizard
+ firstname : Hugo
+ lastname : Testman
+ firstname : Erika
+ lastname : Nobody
+ firstname : Eric
+ lastname : Wizard
+ firstname : Hugo
+ lastname : Testman
+ >>> for n in body.findall ('.//' + OOo_Tag ('draw', 'text-box', m)) :
+ ... print n.get (OOo_Tag ('draw', 'name', m)),
+ ... print n.get (OOo_Tag ('text', 'anchor-page-number', m))
+ Frame1 1
+ Frame2 2
+ Frame3 3
+ Frame4 None
+ Frame5 None
+ Frame6 None
+ >>> for n in body.findall ('.//' + OOo_Tag ('text', 'section', m)) :
+ ... print n.get (OOo_Tag ('text', 'name', m))
+ Section1
+ Section2
+ Section3
+ Section4
+ Section5
+ Section6
+ Section7
+ Section8
+ Section9
+ Section10
+ Section11
+ Section12
+ Section13
+ Section14
+ Section15
+ Section16
+ Section17
+ Section18
+ >>> for n in body.findall ('.//' + OOo_Tag ('table', 'table', m)) :
+ ... print n.get (OOo_Tag ('table', 'name', m))
+ Table1
+ Table2
+ Table3
+ >>> r = o.read ('meta.xml')
+ >>> meta = r.find ('.//' + OOo_Tag ('meta', 'document-statistic', m))
+ >>> for i in meta_counts :
+ ... print i, repr (meta.get (OOo_Tag ('meta', i, m)))
+ character-count '951'
+ image-count '0'
+ object-count '0'
+ page-count '3'
+ paragraph-count '113'
+ table-count '3'
+ word-count '162'
+ >>> o.close ()
+ >>> sio = StringIO ()
+ >>> o = OOoPy (infile = 'testfiles/test.sxw', outfile = sio)
+ >>> tf = ('testfiles/test.sxw', 'testfiles/rechng.sxw')
+ >>> t = Transformer (
+ ... o.mimetype
+ ... , get_meta (o.mimetype)
+ ... , Transforms.Concatenate (*tf)
+ ... , renumber_all (o.mimetype)
+ ... , set_meta (o.mimetype)
+ ... , Transforms.Fix_OOo_Tag ()
+ ... )
+ >>> t.transform (o)
+ >>> for i in meta_counts :
+ ... print i, repr (t [':'.join (('Set_Attribute', i))])
+ character-count '1131'
+ image-count '0'
+ object-count '0'
+ page-count '3'
+ paragraph-count '168'
+ table-count '2'
+ word-count '160'
+ >>> o.close ()
+ >>> ov = sio.getvalue ()
+ >>> f = open ("testout3.sxw", "wb")
+ >>> f.write (ov)
+ >>> f.close ()
+ >>> o = OOoPy (infile = sio)
+ >>> m = o.mimetype
+ >>> c = o.read ('content.xml')
+ >>> s = o.read ('styles.xml')
+ >>> for n in c.findall ('./*/*') :
+ ... name = n.get (OOo_Tag ('style', 'name', m))
+ ... if name :
+ ... parent = n.get (OOo_Tag ('style', 'parent-style-name', m))
+ ... print '"%s", "%s"' % (name, parent)
+ "Tahoma1", "None"
+ "Bitstream Vera Sans", "None"
+ "Tahoma", "None"
+ "Nimbus Roman No9 L", "None"
+ "Courier New", "None"
+ "Arial Black", "None"
+ "New Century Schoolbook", "None"
+ "Helvetica", "None"
+ "Table1", "None"
+ "Table1.A", "None"
+ "Table1.A1", "None"
+ "Table1.E1", "None"
+ "Table1.A2", "None"
+ "Table1.E2", "None"
+ "P1", "None"
+ "fr1", "Frame"
+ "fr2", "None"
+ "fr3", "Frame"
+ "Sect1", "None"
+ "gr1", "None"
+ "P2", "Standard"
+ "Standard_Concat", "None"
+ "Concat_P1", "Concat_Frame contents"
+ "Concat_P2", "Concat_Frame contents"
+ "P3", "Concat_Frame contents"
+ "P4", "Concat_Frame contents"
+ "P5", "Concat_Standard"
+ "P6", "Concat_Standard"
+ "P7", "Concat_Frame contents"
+ "P8", "Concat_Frame contents"
+ "P9", "Concat_Frame contents"
+ "P10", "Concat_Frame contents"
+ "P11", "Concat_Frame contents"
+ "P12", "Concat_Frame contents"
+ "P13", "Concat_Frame contents"
+ "P15", "Concat_Standard"
+ "P16", "Concat_Standard"
+ "P17", "Concat_Standard"
+ "P18", "Concat_Standard"
+ "P19", "Concat_Standard"
+ "P20", "Concat_Standard"
+ "P21", "Concat_Standard"
+ "P22", "Concat_Standard"
+ "P23", "Concat_Standard"
+ "T1", "None"
+ "Concat_fr1", "Concat_Frame"
+ "Concat_fr2", "Concat_Frame"
+ "Concat_fr3", "Concat_Frame"
+ "fr4", "Concat_Frame"
+ "fr5", "Concat_Frame"
+ "fr6", "Concat_Frame"
+ "Concat_Sect1", "None"
+ "N0", "None"
+ "N2", "None"
+ "P15_Concat", "Concat_Standard"
+ >>> for n in s.findall ('./*/*') :
+ ... name = n.get (OOo_Tag ('style', 'name', m))
+ ... if name :
+ ... parent = n.get (OOo_Tag ('style', 'parent-style-name', m))
+ ... print '"%s", "%s"' % (name, parent)
+ "Tahoma1", "None"
+ "Bitstream Vera Sans", "None"
+ "Tahoma", "None"
+ "Nimbus Roman No9 L", "None"
+ "Courier New", "None"
+ "Arial Black", "None"
+ "New Century Schoolbook", "None"
+ "Helvetica", "None"
+ "Standard", "None"
+ "Text body", "Standard"
+ "List", "Text body"
+ "Table Contents", "Text body"
+ "Table Heading", "Table Contents"
+ "Caption", "Standard"
+ "Frame contents", "Text body"
+ "Index", "Standard"
+ "Frame", "None"
+ "OLE", "None"
+ "Concat_Standard", "None"
+ "Concat_Text body", "Concat_Standard"
+ "Concat_List", "Concat_Text body"
+ "Concat_Caption", "Concat_Standard"
+ "Concat_Frame contents", "Concat_Text body"
+ "Concat_Index", "Concat_Standard"
+ "Horizontal Line", "Concat_Standard"
+ "Internet link", "None"
+ "Visited Internet Link", "None"
+ "Concat_Frame", "None"
+ "Concat_OLE", "None"
+ "pm1", "None"
+ "Concat_pm1", "None"
+ "Standard", "None"
+ "Concat_Standard", "None"
+ >>> for n in c.findall ('.//' + OOo_Tag ('text', 'variable-decl', m)) :
+ ... name = n.get (OOo_Tag ('text', 'name', m))
+ ... print name
+ salutation
+ firstname
+ lastname
+ street
+ country
+ postalcode
+ city
+ date
+ invoice.invoice_no
+ invoice.abo.aboprice.abotype.description
+ address.salutation
+ address.title
+ address.firstname
+ address.lastname
+ address.function
+ address.street
+ address.country
+ address.postalcode
+ address.city
+ invoice.subscriber.salutation
+ invoice.subscriber.title
+ invoice.subscriber.firstname
+ invoice.subscriber.lastname
+ invoice.subscriber.function
+ invoice.subscriber.street
+ invoice.subscriber.country
+ invoice.subscriber.postalcode
+ invoice.subscriber.city
+ invoice.period_start
+ invoice.period_end
+ invoice.currency.name
+ invoice.amount
+ invoice.subscriber.initial
+ >>> for n in c.findall ('.//' + OOo_Tag ('text', 'sequence-decl', m)) :
+ ... name = n.get (OOo_Tag ('text', 'name', m))
+ ... print name
+ Illustration
+ Table
+ Text
+ Drawing
+ >>> for n in c.findall ('.//' + OOo_Tag ('text', 'p', m)) :
+ ... name = n.get (OOo_Tag ('text', 'style-name', m))
+ ... if not name or name.startswith ('Concat') :
+ ... print ">%s<" % name
+ >Concat_P1<
+ >Concat_P2<
+ >Concat_Frame contents<
+ >>> for n in c.findall ('.//' + OOo_Tag ('draw', 'text-box', m)) :
+ ... attrs = 'name', 'style-name', 'z-index'
+ ... attrs = [n.get (OOo_Tag ('draw', i, m)) for i in attrs]
+ ... attrs.append (n.get (OOo_Tag ('text', 'anchor-page-number', m)))
+ ... print attrs
+ ['Frame1', 'fr1', '0', '1']
+ ['Frame2', 'fr1', '3', '2']
+ ['Frame3', 'Concat_fr1', '6', '3']
+ ['Frame4', 'Concat_fr2', '7', '3']
+ ['Frame5', 'Concat_fr3', '8', '3']
+ ['Frame6', 'Concat_fr1', '9', '3']
+ ['Frame7', 'fr4', '10', '3']
+ ['Frame8', 'fr4', '11', '3']
+ ['Frame9', 'fr4', '12', '3']
+ ['Frame10', 'fr4', '13', '3']
+ ['Frame11', 'fr4', '14', '3']
+ ['Frame12', 'fr4', '15', '3']
+ ['Frame13', 'fr5', '16', '3']
+ ['Frame14', 'fr4', '18', '3']
+ ['Frame15', 'fr4', '19', '3']
+ ['Frame16', 'fr4', '20', '3']
+ ['Frame17', 'fr6', '17', '3']
+ ['Frame18', 'fr4', '23', '3']
+ ['Frame19', 'fr3', '2', None]
+ ['Frame20', 'fr3', '5', None]
+ >>> for n in c.findall ('.//' + OOo_Tag ('text', 'section', m)) :
+ ... attrs = 'name', 'style-name'
+ ... attrs = [n.get (OOo_Tag ('text', i, m)) for i in attrs]
+ ... print attrs
+ ['Section1', 'Sect1']
+ ['Section2', 'Sect1']
+ ['Section3', 'Sect1']
+ ['Section4', 'Sect1']
+ ['Section5', 'Sect1']
+ ['Section6', 'Sect1']
+ ['Section7', 'Concat_Sect1']
+ ['Section8', 'Concat_Sect1']
+ ['Section9', 'Concat_Sect1']
+ ['Section10', 'Concat_Sect1']
+ ['Section11', 'Concat_Sect1']
+ ['Section12', 'Concat_Sect1']
+ ['Section13', 'Concat_Sect1']
+ ['Section14', 'Concat_Sect1']
+ ['Section15', 'Concat_Sect1']
+ ['Section16', 'Concat_Sect1']
+ ['Section17', 'Concat_Sect1']
+ ['Section18', 'Concat_Sect1']
+ ['Section19', 'Concat_Sect1']
+ ['Section20', 'Concat_Sect1']
+ ['Section21', 'Concat_Sect1']
+ ['Section22', 'Concat_Sect1']
+ ['Section23', 'Concat_Sect1']
+ ['Section24', 'Concat_Sect1']
+ ['Section25', 'Concat_Sect1']
+ ['Section26', 'Concat_Sect1']
+ ['Section27', 'Concat_Sect1']
+ ['Section28', 'Sect1']
+ ['Section29', 'Sect1']
+ ['Section30', 'Sect1']
+ ['Section31', 'Sect1']
+ ['Section32', 'Sect1']
+ ['Section33', 'Sect1']
+ >>> for n in c.findall ('.//' + OOo_Tag ('draw', 'rect', m)) :
+ ... attrs = 'style-name', 'text-style-name', 'z-index'
+ ... attrs = [n.get (OOo_Tag ('draw', i, m)) for i in attrs]
+ ... attrs.append (n.get (OOo_Tag ('text', 'anchor-page-number', m)))
+ ... print attrs
+ ['gr1', 'P1', '1', '1']
+ ['gr1', 'P1', '4', '2']
+ >>> for n in c.findall ('.//' + OOo_Tag ('draw', 'line', m)) :
+ ... attrs = 'style-name', 'text-style-name', 'z-index'
+ ... attrs = [n.get (OOo_Tag ('draw', i, m)) for i in attrs]
+ ... print attrs
+ ['gr1', 'P1', '24']
+ ['gr1', 'P1', '22']
+ ['gr1', 'P1', '21']
+ >>> for n in s.findall ('.//' + OOo_Tag ('style', 'style', m)) :
+ ... if n.get (OOo_Tag ('style', 'name', m)).startswith ('Co') :
+ ... attrs = 'name', 'class', 'family'
+ ... attrs = [n.get (OOo_Tag ('style', i, m)) for i in attrs]
+ ... print attrs
+ ... props = n.find ('./' + OOo_Tag ('style', 'properties', m))
+ ... if props is not None and len (props) :
+ ... props [0].tag
+ ['Concat_Standard', 'text', 'paragraph']
+ '{http://openoffice.org/2000/style}tab-stops'
+ ['Concat_Text body', 'text', 'paragraph']
+ ['Concat_List', 'list', 'paragraph']
+ ['Concat_Caption', 'extra', 'paragraph']
+ ['Concat_Frame contents', 'extra', 'paragraph']
+ ['Concat_Index', 'index', 'paragraph']
+ ['Concat_Frame', None, 'graphics']
+ ['Concat_OLE', None, 'graphics']
+ >>> for n in c.findall ('.//*') :
+ ... zidx = n.get (OOo_Tag ('draw', 'z-index', m))
+ ... if zidx :
+ ... print ':'.join(split_tag (n.tag)), zidx
+ draw:text-box 0
+ draw:rect 1
+ draw:text-box 3
+ draw:rect 4
+ draw:text-box 6
+ draw:text-box 7
+ draw:text-box 8
+ draw:text-box 9
+ draw:text-box 10
+ draw:text-box 11
+ draw:text-box 12
+ draw:text-box 13
+ draw:text-box 14
+ draw:text-box 15
+ draw:text-box 16
+ draw:text-box 18
+ draw:text-box 19
+ draw:text-box 20
+ draw:text-box 17
+ draw:text-box 23
+ draw:line 24
+ draw:text-box 2
+ draw:text-box 5
+ draw:line 22
+ draw:line 21
+ >>> sio = StringIO ()
+ >>> o = OOoPy (infile = 'testfiles/carta.stw', outfile = sio)
+ >>> t = Transformer (
+ ... o.mimetype
+ ... , get_meta (o.mimetype)
+ ... , Transforms.Addpagebreak_Style ()
+ ... , Transforms.Mailmerge
+ ... ( iterator =
+ ... ( dict
+ ... ( Spett = "Spettabile"
+ ... , contraente = "First person"
+ ... , indirizzo = "street? 1"
+ ... , tipo = "racc. A.C."
+ ... , luogo = "Varese"
+ ... , oggetto = "Saluti"
+ ... )
+ ... , dict
+ ... ( Spett = "Egregio"
+ ... , contraente = "Second Person"
+ ... , indirizzo = "street? 2"
+ ... , tipo = "Raccomandata"
+ ... , luogo = "Gavirate"
+ ... , oggetto = "Ossequi"
+ ... )
+ ... )
+ ... )
+ ... , renumber_all (o.mimetype)
+ ... , set_meta (o.mimetype)
+ ... , Transforms.Fix_OOo_Tag ()
+ ... )
+ >>> t.transform(o)
+ >>> o.close()
+ >>> ov = sio.getvalue ()
+ >>> f = open ("carta-out.stw", "wb")
+ >>> f.write (ov)
+ >>> f.close ()
+ >>> o = OOoPy (infile = sio)
+ >>> m = o.mimetype
+ >>> c = o.read ('content.xml')
+ >>> body = c.find (OOo_Tag ('office', 'body', mimetype = m))
+ >>> vset = './/' + OOo_Tag ('text', 'variable-set', mimetype = m)
+ >>> for node in body.findall (vset) :
+ ... name = node.get (OOo_Tag ('text', 'name', m))
+ ... print name, ':', node.text
+ Spett : Spettabile
+ contraente : First person
+ indirizzo : street? 1
+ Spett : Egregio
+ contraente : Second Person
+ indirizzo : street? 2
+ tipo : racc. A.C.
+ luogo : Varese
+ oggetto : Saluti
+ tipo : Raccomandata
+ luogo : Gavirate
+ oggetto : Ossequi
+ >>> sio = StringIO ()
+ >>> o = OOoPy (infile = 'testfiles/test.odt', outfile = sio)
+ >>> t = Transformer (
+ ... o.mimetype
+ ... , get_meta (o.mimetype)
+ ... , Transforms.Addpagebreak_Style ()
+ ... , Transforms.Mailmerge
+ ... ( iterator =
+ ... ( dict (firstname = 'Erika', lastname = 'Nobody')
+ ... , dict (firstname = 'Eric', lastname = 'Wizard')
+ ... , cb
+ ... )
+ ... )
+ ... , renumber_all (o.mimetype)
+ ... , set_meta (o.mimetype)
+ ... , Transforms.Fix_OOo_Tag ()
+ ... )
+ >>> t.transform (o)
+ >>> for i in meta_counts :
+ ... print i, t [':'.join (('Set_Attribute', i))]
+ character-count 951
+ image-count 0
+ object-count 0
+ page-count 3
+ paragraph-count 53
+ table-count 3
+ word-count 162
+ >>> name = t ['Addpagebreak_Style:stylename']
+ >>> name
+ 'P2'
+ >>> o.close ()
+ >>> ov = sio.getvalue ()
+ >>> f = open ("testout.odt", "wb")
+ >>> f.write (ov)
+ >>> f.close ()
+ >>> o = OOoPy (infile = sio)
+ >>> m = o.mimetype
+ >>> c = o.read ('content.xml')
+ >>> body = c.find (OOo_Tag ('office', 'body', m))
+ >>> for n in body.findall ('.//*') :
+ ... zidx = n.get (OOo_Tag ('draw', 'z-index', m))
+ ... if zidx :
+ ... print ':'.join(split_tag (n.tag)), zidx
+ draw:frame 0
+ draw:rect 1
+ draw:frame 3
+ draw:rect 4
+ draw:frame 6
+ draw:rect 7
+ draw:frame 2
+ draw:frame 5
+ draw:frame 8
+ >>> for n in body.findall ('.//' + OOo_Tag ('text', 'p', m)) :
+ ... if n.get (OOo_Tag ('text', 'style-name', m)) == name :
+ ... print n.tag
+ {urn:oasis:names:tc:opendocument:xmlns:text:1.0}p
+ {urn:oasis:names:tc:opendocument:xmlns:text:1.0}p
+ >>> vset = './/' + OOo_Tag ('text', 'variable-set', m)
+ >>> for n in body.findall (vset) :
+ ... if n.get (OOo_Tag ('text', 'name', m), None).endswith ('name') :
+ ... name = n.get (OOo_Tag ('text', 'name', m))
+ ... print name, ':', n.text
+ firstname : Erika
+ lastname : Nobody
+ firstname : Eric
+ lastname : Wizard
+ firstname : Hugo
+ lastname : Testman
+ firstname : Erika
+ lastname : Nobody
+ firstname : Eric
+ lastname : Wizard
+ firstname : Hugo
+ lastname : Testman
+ >>> for n in body.findall ('.//' + OOo_Tag ('draw', 'frame', m)) :
+ ... print n.get (OOo_Tag ('draw', 'name', m)),
+ ... print n.get (OOo_Tag ('text', 'anchor-page-number', m))
+ Frame1 1
+ Frame2 2
+ Frame3 3
+ Frame4 None
+ Frame5 None
+ Frame6 None
+ >>> for n in body.findall ('.//' + OOo_Tag ('text', 'section', m)) :
+ ... print n.get (OOo_Tag ('text', 'name', m))
+ Section1
+ Section2
+ Section3
+ Section4
+ Section5
+ Section6
+ Section7
+ Section8
+ Section9
+ Section10
+ Section11
+ Section12
+ Section13
+ Section14
+ Section15
+ Section16
+ Section17
+ Section18
+ >>> for n in body.findall ('.//' + OOo_Tag ('table', 'table', m)) :
+ ... print n.get (OOo_Tag ('table', 'name', m))
+ Table1
+ Table2
+ Table3
+ >>> r = o.read ('meta.xml')
+ >>> meta = r.find ('.//' + OOo_Tag ('meta', 'document-statistic', m))
+ >>> for i in meta_counts :
+ ... print i, repr (meta.get (OOo_Tag ('meta', i, m)))
+ character-count '951'
+ image-count '0'
+ object-count '0'
+ page-count '3'
+ paragraph-count '53'
+ table-count '3'
+ word-count '162'
+ >>> o.close ()
+ >>> sio = StringIO ()
+ >>> o = OOoPy (infile = 'testfiles/carta.odt', outfile = sio)
+ >>> t = Transformer (
+ ... o.mimetype
+ ... , get_meta (o.mimetype)
+ ... , Transforms.Addpagebreak_Style ()
+ ... , Transforms.Mailmerge
+ ... ( iterator =
+ ... ( dict
+ ... ( Spett = "Spettabile"
+ ... , contraente = "First person"
+ ... , indirizzo = "street? 1"
+ ... , tipo = "racc. A.C."
+ ... , luogo = "Varese"
+ ... , oggetto = "Saluti"
+ ... )
+ ... , dict
+ ... ( Spett = "Egregio"
+ ... , contraente = "Second Person"
+ ... , indirizzo = "street? 2"
+ ... , tipo = "Raccomandata"
+ ... , luogo = "Gavirate"
+ ... , oggetto = "Ossequi"
+ ... )
+ ... )
+ ... )
+ ... , renumber_all (o.mimetype)
+ ... , set_meta (o.mimetype)
+ ... , Transforms.Fix_OOo_Tag ()
+ ... )
+ >>> t.transform(o)
+ >>> o.close()
+ >>> ov = sio.getvalue ()
+ >>> f = open ("carta-out.odt", "wb")
+ >>> f.write (ov)
+ >>> f.close ()
+ >>> o = OOoPy (infile = sio)
+ >>> m = o.mimetype
+ >>> c = o.read ('content.xml')
+ >>> body = c.find (OOo_Tag ('office', 'body', mimetype = m))
+ >>> vset = './/' + OOo_Tag ('text', 'variable-set', mimetype = m)
+ >>> for node in body.findall (vset) :
+ ... name = node.get (OOo_Tag ('text', 'name', m))
+ ... print name, ':', node.text
+ Spett : Spettabile
+ contraente : First person
+ indirizzo : street? 1
+ Spett : Egregio
+ contraente : Second Person
+ indirizzo : street? 2
+ tipo : racc. A.C.
+ luogo : Varese
+ oggetto : Saluti
+ tipo : Raccomandata
+ luogo : Gavirate
+ oggetto : Ossequi
+ >>> sio = StringIO ()
+ >>> o = OOoPy (infile = 'testfiles/test.odt', outfile = sio)
+ >>> tf = ('testfiles/test.odt', 'testfiles/rechng.odt')
+ >>> t = Transformer (
+ ... o.mimetype
+ ... , get_meta (o.mimetype)
+ ... , Transforms.Concatenate (*tf)
+ ... , renumber_all (o.mimetype)
+ ... , set_meta (o.mimetype)
+ ... , Transforms.Fix_OOo_Tag ()
+ ... )
+ >>> t.transform (o)
+ >>> for i in meta_counts :
+ ... print i, repr (t [':'.join (('Set_Attribute', i))])
+ character-count '1131'
+ image-count '0'
+ object-count '0'
+ page-count '3'
+ paragraph-count '80'
+ table-count '2'
+ word-count '159'
+ >>> o.close ()
+ >>> ov = sio.getvalue ()
+ >>> f = open ("testout3.odt", "wb")
+ >>> f.write (ov)
+ >>> f.close ()
+ >>> o = OOoPy (infile = sio)
+ >>> m = o.mimetype
+ >>> c = o.read ('content.xml')
+ >>> s = o.read ('styles.xml')
+ >>> for n in c.findall ('./*/*') :
+ ... name = n.get (OOo_Tag ('style', 'name', m))
+ ... if name :
+ ... parent = n.get (OOo_Tag ('style', 'parent-style-name', m))
+ ... print '"%s", "%s"' % (name, parent)
+ "Tahoma1", "None"
+ "Bitstream Vera Sans", "None"
+ "Tahoma", "None"
+ "Nimbus Roman No9 L", "None"
+ "Courier New", "None"
+ "Arial Black", "None"
+ "New Century Schoolbook", "None"
+ "Times New Roman", "None"
+ "Arial", "None"
+ "Helvetica", "None"
+ "Table1", "None"
+ "Table1.A", "None"
+ "Table1.A1", "None"
+ "Table1.E1", "None"
+ "Table1.A2", "None"
+ "Table1.E2", "None"
+ "P1", "None"
+ "fr1", "Frame"
+ "fr2", "Frame"
+ "Sect1", "None"
+ "gr1", "None"
+ "P2", "Standard"
+ "Standard_Concat", "None"
+ "Concat_P1", "Concat_Frame_20_contents"
+ "Concat_P2", "Concat_Frame_20_contents"
+ "P3", "Concat_Frame_20_contents"
+ "P4", "Concat_Standard"
+ "P5", "Concat_Standard"
+ "P6", "Concat_Frame_20_contents"
+ "P7", "Concat_Frame_20_contents"
+ "P8", "Concat_Frame_20_contents"
+ "P9", "Concat_Frame_20_contents"
+ "P10", "Concat_Frame_20_contents"
+ "P11", "Concat_Frame_20_contents"
+ "P12", "Concat_Frame_20_contents"
+ "P14", "Concat_Standard"
+ "P15", "Concat_Standard"
+ "P16", "Concat_Standard"
+ "P17", "Concat_Standard"
+ "P18", "Concat_Standard"
+ "P19", "Concat_Standard"
+ "P20", "Concat_Standard"
+ "P21", "Concat_Standard"
+ "P22", "Concat_Standard"
+ "P23", "Concat_Standard"
+ "Concat_fr1", "Frame"
+ "Concat_fr2", "Frame"
+ "fr3", "Frame"
+ "fr4", "Frame"
+ "fr5", "Frame"
+ "fr6", "Frame"
+ "Concat_gr1", "None"
+ "N0", "None"
+ "N2", "None"
+ "P14_Concat", "Concat_Standard"
+ >>> for n in c.findall ('.//' + OOo_Tag ('text', 'variable-decl', m)) :
+ ... name = n.get (OOo_Tag ('text', 'name', m))
+ ... print name
+ salutation
+ firstname
+ lastname
+ street
+ country
+ postalcode
+ city
+ date
+ invoice.invoice_no
+ invoice.abo.aboprice.abotype.description
+ address.salutation
+ address.title
+ address.firstname
+ address.lastname
+ address.function
+ address.street
+ address.country
+ address.postalcode
+ address.city
+ invoice.subscriber.salutation
+ invoice.subscriber.title
+ invoice.subscriber.firstname
+ invoice.subscriber.lastname
+ invoice.subscriber.function
+ invoice.subscriber.street
+ invoice.subscriber.country
+ invoice.subscriber.postalcode
+ invoice.subscriber.city
+ invoice.period_start
+ invoice.period_end
+ invoice.currency.name
+ invoice.amount
+ invoice.subscriber.initial
+ >>> for n in c.findall ('.//' + OOo_Tag ('text', 'sequence-decl', m)) :
+ ... name = n.get (OOo_Tag ('text', 'name', m))
+ ... print name
+ Illustration
+ Table
+ Text
+ Drawing
+ >>> for n in c.findall ('.//' + OOo_Tag ('text', 'p', m)) :
+ ... name = n.get (OOo_Tag ('text', 'style-name', m))
+ ... if not name or name.startswith ('Concat') :
+ ... print ':'.join(split_tag (n.tag)), ">%s<" % name
+ text:p >None<
+ text:p >None<
+ text:p >Concat_P1<
+ text:p >Concat_P1<
+ text:p >Concat_P2<
+ text:p >Concat_P2<
+ text:p >Concat_P2<
+ text:p >Concat_P2<
+ text:p >Concat_P2<
+ text:p >Concat_P2<
+ text:p >Concat_P2<
+ text:p >Concat_P2<
+ text:p >Concat_P2<
+ text:p >Concat_P2<
+ text:p >Concat_Frame_20_contents<
+ text:p >None<
+ text:p >None<
+ text:p >None<
+ >>> for n in c.findall ('.//' + OOo_Tag ('draw', 'frame', m)) :
+ ... attrs = 'name', 'style-name', 'z-index'
+ ... attrs = [n.get (OOo_Tag ('draw', i, m)) for i in attrs]
+ ... attrs.append (n.get (OOo_Tag ('text', 'anchor-page-number', m)))
+ ... print attrs
+ ['Frame1', 'fr1', '0', '1']
+ ['Frame2', 'fr1', '3', '2']
+ ['Frame3', 'Concat_fr1', '6', '3']
+ ['Frame4', 'Concat_fr2', '7', '3']
+ ['Frame5', 'fr3', '8', '3']
+ ['Frame6', 'Concat_fr1', '9', '3']
+ ['Frame7', 'fr4', '10', '3']
+ ['Frame8', 'fr4', '11', '3']
+ ['Frame9', 'fr4', '12', '3']
+ ['Frame10', 'fr4', '13', '3']
+ ['Frame11', 'fr4', '14', '3']
+ ['Frame12', 'fr4', '15', '3']
+ ['Frame13', 'fr5', '16', '3']
+ ['Frame14', 'fr4', '18', '3']
+ ['Frame15', 'fr4', '19', '3']
+ ['Frame16', 'fr4', '20', '3']
+ ['Frame17', 'fr6', '17', '3']
+ ['Frame18', 'fr4', '23', '3']
+ ['Frame19', 'fr2', '2', None]
+ ['Frame20', 'fr2', '5', None]
+ >>> for n in c.findall ('.//' + OOo_Tag ('text', 'section', m)) :
+ ... attrs = 'name', 'style-name'
+ ... attrs = [n.get (OOo_Tag ('text', i, m)) for i in attrs]
+ ... print attrs
+ ['Section1', 'Sect1']
+ ['Section2', 'Sect1']
+ ['Section3', 'Sect1']
+ ['Section4', 'Sect1']
+ ['Section5', 'Sect1']
+ ['Section6', 'Sect1']
+ ['Section7', 'Sect1']
+ ['Section8', 'Sect1']
+ ['Section9', 'Sect1']
+ ['Section10', 'Sect1']
+ ['Section11', 'Sect1']
+ ['Section12', 'Sect1']
+ ['Section13', 'Sect1']
+ ['Section14', 'Sect1']
+ ['Section15', 'Sect1']
+ ['Section16', 'Sect1']
+ ['Section17', 'Sect1']
+ ['Section18', 'Sect1']
+ ['Section19', 'Sect1']
+ ['Section20', 'Sect1']
+ ['Section21', 'Sect1']
+ ['Section22', 'Sect1']
+ ['Section23', 'Sect1']
+ ['Section24', 'Sect1']
+ ['Section25', 'Sect1']
+ ['Section26', 'Sect1']
+ ['Section27', 'Sect1']
+ ['Section28', 'Sect1']
+ ['Section29', 'Sect1']
+ ['Section30', 'Sect1']
+ ['Section31', 'Sect1']
+ ['Section32', 'Sect1']
+ ['Section33', 'Sect1']
+ >>> for n in c.findall ('.//' + OOo_Tag ('draw', 'rect', m)) :
+ ... attrs = 'style-name', 'text-style-name', 'z-index'
+ ... attrs = [n.get (OOo_Tag ('draw', i, m)) for i in attrs]
+ ... attrs.append (n.get (OOo_Tag ('text', 'anchor-page-number', m)))
+ ... print attrs
+ ['gr1', 'P1', '1', '1']
+ ['gr1', 'P1', '4', '2']
+ >>> for n in c.findall ('.//' + OOo_Tag ('draw', 'line', m)) :
+ ... attrs = 'style-name', 'text-style-name', 'z-index'
+ ... attrs = [n.get (OOo_Tag ('draw', i, m)) for i in attrs]
+ ... print attrs
+ ['Concat_gr1', 'P1', '24']
+ ['Concat_gr1', 'P1', '22']
+ ['Concat_gr1', 'P1', '21']
+ >>> for n in s.findall ('.//' + OOo_Tag ('style', 'style', m)) :
+ ... if n.get (OOo_Tag ('style', 'name', m)).startswith ('Co') :
+ ... attrs = 'name', 'display-name', 'class', 'family'
+ ... attrs = [n.get (OOo_Tag ('style', i, m)) for i in attrs]
+ ... print attrs
+ ... props = n.find ('./' + OOo_Tag ('style', 'properties', m))
+ ... if props is not None and len (props) :
+ ... props [0].tag
+ ['Concat_Standard', None, 'text', 'paragraph']
+ ['Concat_Text_20_body', 'Concat Text body', 'text', 'paragraph']
+ ['Concat_List', None, 'list', 'paragraph']
+ ['Concat_Caption', None, 'extra', 'paragraph']
+ ['Concat_Frame_20_contents', 'Concat Frame contents', 'extra', 'paragraph']
+ ['Concat_Index', None, 'index', 'paragraph']
+ >>> for n in c.findall ('.//*') :
+ ... zidx = n.get (OOo_Tag ('draw', 'z-index', m))
+ ... if zidx :
+ ... print ':'.join(split_tag (n.tag)), zidx
+ draw:frame 0
+ draw:rect 1
+ draw:frame 3
+ draw:rect 4
+ draw:frame 6
+ draw:frame 7
+ draw:frame 8
+ draw:frame 9
+ draw:frame 10
+ draw:frame 11
+ draw:frame 12
+ draw:frame 13
+ draw:frame 14
+ draw:frame 15
+ draw:frame 16
+ draw:frame 18
+ draw:frame 19
+ draw:frame 20
+ draw:frame 17
+ draw:frame 23
+ draw:line 24
+ draw:frame 2
+ draw:frame 5
+ draw:line 22
+ draw:line 21
+ >>> from os import system
+ >>> system ('python bin/ooo_fieldreplace -i testfiles/test.odt '
+ ... '-o testout.odt '
+ ... 'salutation=Frau firstname=Erika lastname=Musterfrau '
+ ... 'country=D postalcode=00815 city=Niemandsdorf '
+ ... 'street="Beispielstrasse 42"')
+ 0
+ >>> o = OOoPy (infile = 'testout.odt')
+ >>> c = o.read ('content.xml')
+ >>> m = o.mimetype
+ >>> body = c.find (OOo_Tag ('office', 'body', mimetype = m))
+ >>> vset = './/' + OOo_Tag ('text', 'variable-set', mimetype = m)
+ >>> for node in body.findall (vset) :
+ ... name = node.get (OOo_Tag ('text', 'name', m))
+ ... print name, ':', node.text
+ salutation : Frau
+ firstname : Erika
+ lastname : Musterfrau
+ street : Beispielstrasse 42
+ country : D
+ postalcode : 00815
+ city : Niemandsdorf
+ salutation : Frau
+ firstname : Erika
+ lastname : Musterfrau
+ street : Beispielstrasse 42
+ country : D
+ postalcode : 00815
+ city : Niemandsdorf
+ >>> o.close ()
+ >>> system ("bin/ooo_mailmerge -o testout.odt -d'|' "
+ ... "testfiles/carta.odt testfiles/x.csv")
+ 0
+ >>> o = OOoPy (infile = 'testout.odt')
+ >>> m = o.mimetype
+ >>> c = o.read ('content.xml')
+ >>> body = c.find (OOo_Tag ('office', 'body', mimetype = m))
+ >>> vset = './/' + OOo_Tag ('text', 'variable-set', mimetype = m)
+ >>> for node in body.findall (vset) :
+ ... name = node.get (OOo_Tag ('text', 'name', m))
+ ... print name, ':', node.text
+ Spett : Spettabile
+ contraente : First person
+ indirizzo : street? 1
+ Spett : Egregio
+ contraente : Second Person
+ indirizzo : street? 2
+ tipo : racc. A.C.
+ luogo : Varese
+ oggetto : Saluti
+ tipo : Raccomandata
+ luogo : Gavirate
+ oggetto : Ossequi
+ >>> o.close ()
+ >>> infile = 'testfiles/testenum.odt'
+ >>> o = OOoPy (infile = infile, outfile = 'xyzzy.odt')
+ >>> t = Transformer (
+ ... o.mimetype
+ ... , get_meta (o.mimetype)
+ ... , Transforms.Addpagebreak_Style ()
+ ... , Transforms.Mailmerge
+ ... ( iterator =
+ ... ( dict (firstname = 'Erika', lastname = 'Nobody')
+ ... , dict (firstname = 'Eric', lastname = 'Wizard')
+ ... , cb
+ ... )
+ ... )
+ ... , renumber_all (o.mimetype)
+ ... , set_meta (o.mimetype)
+ ... , Transforms.Fix_OOo_Tag ()
+ ... )
+ >>> t.transform (o)
+ >>> o.close ()
+ >>> o = OOoPy (infile = 'xyzzy.odt')
+ >>> m = o.mimetype
+ >>> c = o.read ('content.xml')
+ >>> body = c.find (OOo_Tag ('office', 'body', mimetype = m))
+ >>> textlist = './/' + OOo_Tag ('text', 'list', m)
+ >>> for node in body.findall (textlist) :
+ ... id = node.get (OOo_Tag ('xml', 'id', m))
+ ... print 'xml:id', ':', id
+ xml:id : list1
+ xml:id : list2
+ xml:id : list3
+ >>> o = OOoPy (infile = 'testfiles/page1.odt', outfile = 'xyzzy.odt')
+ >>> m = o.mimetype
+ >>> t = Transformer (
+ ... o.mimetype
+ ... , get_meta (o.mimetype)
+ ... , Transforms.Concatenate ('testfiles/page2.odt')
+ ... , renumber_all (o.mimetype)
+ ... , set_meta (o.mimetype)
+ ... , Transforms.Fix_OOo_Tag ()
+ ... , Transforms.Manifest_Append ()
+ ... )
+ >>> t.transform (o)
+ >>> o.close ()
+ >>> o = OOoPy (infile = 'xyzzy.odt')
+ >>> c = o.read ('META-INF/manifest.xml')
+ >>> for node in c.getroot () :
+ ... fe = node.get (OOo_Tag ('manifest', 'full-path', m))
+ ... print fe
+ /
+ Pictures/10000000000000C80000007941B1A419.jpg
+ Pictures/10000000000000DC000000B02E191635.jpg
+ Pictures/10000000000000DC000000A337377AAA.jpg
+ meta.xml
+ settings.xml
+ content.xml
+ Thumbnails/thumbnail.png
+ layout-cache
+ manifest.rdf
+ Configurations2/accelerator/current.xml
+ Configurations2/
+ styles.xml
+ >>> for f in o.izip.infolist () :
+ ... print f.filename
+ mimetype
+ settings.xml
+ META-INF/manifest.xml
+ content.xml
+ meta.xml
+ styles.xml
+ Pictures/10000000000000C80000007941B1A419.jpg
+ Pictures/10000000000000DC000000B02E191635.jpg
+ Pictures/10000000000000DC000000A337377AAA.jpg
+ Thumbnails/thumbnail.png
+ layout-cache
+ manifest.rdf
+ Configurations2/images/Bitmaps/
+ Configurations2/accelerator/current.xml
+ >>> sio = StringIO ()
+ >>> o = OOoPy (infile = 'testfiles/tbl_first.odt', outfile = sio)
+ >>> m = o.mimetype
+ >>> t = Transformer (
+ ... o.mimetype
+ ... , get_meta (o.mimetype)
+ ... , Transforms.Concatenate ('testfiles/tbl_second.odt')
+ ... , renumber_all (o.mimetype)
+ ... , set_meta (o.mimetype)
+ ... , Transforms.Fix_OOo_Tag ()
+ ... , Transforms.Manifest_Append ()
+ ... )
+ >>> t.transform (o)
+ >>> o.close ()
+ >>> o = OOoPy (infile = sio)
+ >>> c = o.read ('content.xml')
+ >>> body = c.find (OOo_Tag ('office', 'body', mimetype = m))
+ >>> tbls = './/' + OOo_Tag ('table', 'table', mimetype = m)
+ >>> for table in body.findall (tbls) :
+ ... name = table.get (OOo_Tag ('table', 'style-name', mimetype = m))
+ ... if name :
+ ... print name
+ ... for t in table.findall ('.//') :
+ ... name = t.get (OOo_Tag ('table', 'style-name', mimetype = m))
+ ... if name :
+ ... print name
+ Tabella1
+ Tabella1.A
+ Tabella1.A1
+ Tabella1.B1
+ Tabella1.A2
+ Tabella1.B2
+ Tabella1
+ Tabella1.A
+ Tabella1.A1
+ Tabella1.B1
+ Tabella1.A2
+ Tabella1.B2
+ """
+
+ def __init__ (self, mimetype, *tf) :
+ assert (mimetype in mimetypes)
+ self.mimetype = mimetype
+ self.transforms = {}
+ for t in tf :
+ self.insert (t)
+ self.dictionary = {}
+ self.has_key = self.dictionary.has_key
+ self.__contains__ = self.has_key
+ # 2-tuples of filename, content
+ self.appendfiles = []
+ # end def __init__
+
+ def insert (self, transform) :
+ """Insert a new transform"""
+ t = transform
+ if t.prio not in self.transforms :
+ self.transforms [t.prio] = []
+ self.transforms [t.prio].append (t)
+ t.register (self)
+ # end def append
+
+ def transform (self, ooopy) :
+ """
+ Apply all the transforms in priority order.
+ Priority order is global over all transforms.
+ """
+ self.trees = {}
+ for f in files :
+ self.trees [f] = ooopy.read (f)
+ #self.dictionary = {} # clear dict when transforming another ooopy
+ prios = self.transforms.keys ()
+ prios.sort ()
+ for p in prios :
+ for t in self.transforms [p] :
+ t.apply_all (self.trees)
+ for e in self.trees.itervalues () :
+ e.write ()
+ for fname, fcontent in self.appendfiles :
+ e.ooopy.append_file (fname, fcontent)
+ # end def transform
+
+ def __getitem__ (self, key) :
+ return self.dictionary [key]
+ # end def __getitem__
+
+ def __setitem__ (self, key, value) :
+ self.dictionary [key] = value
+ # end def __setitem__
+# end class Transformer
diff --git a/ooopy/Transforms.py b/ooopy/Transforms.py
new file mode 100644
index 000000000..50a6c0db8
--- /dev/null
+++ b/ooopy/Transforms.py
@@ -0,0 +1,1237 @@
+#!/usr/bin/env python
+# -*- coding: iso-8859-1 -*-
+# Copyright (C) 2005-14 Dr. Ralf Schlatterbeck Open Source Consulting.
+# Reichergasse 131, A-3411 Weidling.
+# Web: http://www.runtux.com Email: office@runtux.com
+# All rights reserved
+# ****************************************************************************
+#
+# This library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Library General Public License as
+# published by the Free Software Foundation; either version 2 of the
+# License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Library General Public License for more details.
+#
+# You should have received a copy of the GNU Library General Public
+# License along with this program; if not, write to the Free Software
+# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+# ****************************************************************************
+
+from __future__ import absolute_import
+
+import time
+import re
+try :
+ from xml.etree.ElementTree import dump, SubElement, Element, tostring
+except ImportError :
+ from elementtree.ElementTree import dump, SubElement, Element, tostring
+from copy import deepcopy
+from ooopy.OOoPy import OOoPy, autosuper
+from ooopy.Transformer import files, split_tag, OOo_Tag, Transform
+from ooopy.Transformer import mimetypes, namespace_by_name
+from ooopy.Version import VERSION
+
+# counts in meta.xml
+meta_counts = \
+ ( 'character-count', 'image-count', 'object-count', 'page-count'
+ , 'paragraph-count', 'table-count', 'word-count'
+ )
+
+class Access_Attribute (autosuper) :
+ """ For performance reasons we do not specify a separate transform
+ for each attribute-read or -change operation. Instead we define
+ all the attribute accesses we want to perform as objects that
+ follow the attribute access api and apply them all using an
+ Attribute_Access in one go.
+ """
+
+ def __init__ (self, key = None, prefix = None, ** kw) :
+ self.__super.__init__ (key = key, prefix = prefix, **kw)
+ self.key = key
+ if key :
+ if not prefix :
+ prefix = self.__class__.__name__
+ self.key = ':'.join ((prefix, key))
+ # end def __init__
+
+ def register (self, transformer) :
+ self.transformer = transformer
+ # end def register
+
+ def use_value (self, oldval = None) :
+ """ Can change the given value by returning the new value. If
+ returning None or oldval the attribute stays unchanged.
+ """
+ raise NotImplementedError, "use_value must be defined in derived class"
+ # end def use_value
+
+# end class Access_Attribute
+
+class Get_Attribute (Access_Attribute) :
+ """ An example of not changing an attribute but only storing the
+ value in the transformer
+ """
+
+ def __init__ (self, tag, attr, key, transform = None, ** kw) :
+ self.__super.__init__ (key = key, **kw)
+ self.tag = tag
+ self.attribute = attr
+ self.transform = transform
+ # end def __init__
+
+ def use_value (self, oldval = None) :
+ self.transformer [self.key] = oldval
+ return None
+ # end def use_value
+
+# end def Get_Attribute
+
+class Get_Max (Access_Attribute) :
+ """ Get the maximum value of an attribute """
+
+ def __init__ (self, tag, attr, key, transform = None, ** kw) :
+ self.__super.__init__ (key = key, **kw)
+ self.tag = tag
+ self.attribute = attr
+ self.transform = transform
+ # end def __init__
+
+ def register (self, transformer) :
+ self.__super.register (transformer)
+ self.transformer [self.key] = -1
+ # end def register
+
+ def use_value (self, oldval = None) :
+ if self.transformer [self.key] < oldval :
+ self.transformer [self.key] = oldval
+ return None
+ # end def use_value
+
+# end def Get_Max
+
+class Renumber (Access_Attribute) :
+ """ Specifies a renumbering transform. OOo has a 'name' attribute
+ for several different tags, e.g., tables, frames, sections etc.
+ These names must be unique in the whole document. OOo itself
+ solves this by appending a unique number to a basename for each
+ element, e.g., sections are named 'Section1', 'Section2', ...
+ Renumber transforms can be applied to correct the numbering
+ after operations that destroy the unique numbering, e.g., after
+ a mailmerge where the same document is repeatedly appended.
+
+ The force parameter specifies if the new renumbered name should
+ be inserted even if the attribute in question does not exist.
+ """
+
+ def __init__ \
+ (self, tag, name = None, attr = None, start = 1, force = False) :
+ self.__super.__init__ ()
+ tag_ns, tag_name = split_tag (tag)
+ self.tag_ns = tag_ns
+ self.tag = tag
+ self.name = name or tag_name [0].upper () + tag_name [1:]
+ self.num = start
+ self.force = force
+ self.attribute = attr
+ # end def __init__
+
+ def register (self, transformer) :
+ self.__super.register (transformer)
+ if not self.attribute :
+ self.attribute = OOo_Tag (self.tag_ns, 'name', transformer.mimetype)
+ # end def register
+
+ def use_value (self, oldval = None) :
+ if oldval is None and not self.force :
+ return
+ name = "%s%d" % (self.name, self.num)
+ self.num += 1
+ return name
+ # end def use_value
+
+# end class Renumber
+
+class Set_Attribute (Access_Attribute) :
+ """
+ Similar to the renumbering transform in that we are assigning
+ new values to some attributes. But in this case we give keys
+ into the Transformer dict to replace some tag attributes.
+ """
+
+ def __init__ \
+ ( self
+ , tag
+ , attr
+ , key = None
+ , transform = None
+ , value = None
+ , oldvalue = None
+ , ** kw
+ ) :
+ self.__super.__init__ (key = key, ** kw)
+ self.tag = tag
+ self.attribute = attr
+ self.transform = transform
+ self.value = value
+ self.oldvalue = oldvalue
+ # end def __init__
+
+ def use_value (self, oldval) :
+ if oldval is None :
+ return None
+ if self.oldvalue and oldval != self.oldvalue :
+ return None
+ if self.key and self.transformer.has_key (self.key) :
+ return str (self.transformer [self.key])
+ return self.value
+ # end def use_value
+
+# end class Set_Attribute
+
+def set_attributes_from_dict (tag, attr, d) :
+ """ Convenience function: iterate over a dict and return a list of
+ Set_Attribute objects specifying replacement of attributes in
+ the dictionary
+ """
+ return [Set_Attribute (tag, attr, oldvalue = k, value = v)
+ for k,v in d.iteritems ()
+ ]
+# end def set_attributes_from_dict
+
+class Reanchor (Access_Attribute) :
+ """
+ Similar to the renumbering transform in that we are assigning
+ new values to some attributes. But in this case we want to
+ relocate objects that are anchored to a page.
+ """
+
+ def __init__ (self, offset, tag, attr = None) :
+ self.__super.__init__ ()
+ self.offset = int (offset)
+ self.tag = tag
+ self.attribute = attr
+ # end def __init__
+
+ def register (self, transformer) :
+ self.__super.register (transformer)
+ if not self.attribute :
+ self.attribute = \
+ OOo_Tag ('text', 'anchor-page-number', transformer.mimetype)
+ # end def register
+
+ def use_value (self, oldval) :
+ if oldval is None :
+ return oldval
+ return "%d" % (int (oldval) + self.offset)
+ # end def use_value
+
+# end class Reanchor
+
+#
+# general transforms applicable to several .xml files
+#
+
+class Attribute_Access (Transform) :
+ """
+ Read or Change attributes in an OOo document.
+ Can be used for renumbering, moving anchored objects, etc.
+ Expects a list of attribute changer objects that follow the
+ attribute changer API. This API is very simple:
+
+ - Member function "use_value" returns the new value of an
+ attribute, or if unchanged the old value
+ - The attribute "tag" gives the tag for an element we are
+ searching
+ - The attribute "attribute" gives the name of the attribute we
+ want to read or change.
+ For examples of the attribute changer API, see Renumber and
+ Reanchor above.
+ """
+ filename = 'content.xml'
+ prio = 110
+
+ def __init__ (self, attrchangers, filename = None, ** kw) :
+ self.filename = filename or self.filename
+ self.attrchangers = {}
+ # allow several changers for a single tag
+ self.attrchangers [None] = []
+ self.changers = attrchangers
+ self.__super.__init__ (** kw)
+ # end def __init__
+
+ def register (self, transformer) :
+ """ Register transformer with all attrchangers. """
+ self.__super.register (transformer)
+ for r in self.changers :
+ if r.tag not in self.attrchangers :
+ self.attrchangers [r.tag] = []
+ self.attrchangers [r.tag].append (r)
+ r.register (transformer)
+ # end def register
+
+ def apply (self, root) :
+ """ Search for all tags for which we renumber and replace name """
+ for n in [root] + root.findall ('.//*') :
+ changers = \
+ self.attrchangers [None] + self.attrchangers.get (n.tag, [])
+ for r in changers :
+ nval = r.use_value (n.get (r.attribute))
+ if nval is not None :
+ n.set (r.attribute, nval)
+ # end def apply
+
+# end class Attribute_Access
+
+#
+# META-INF/manifest.xml transforms
+#
+
+class Manifest_Append (Transform) :
+ """
+ The Transformer stores a list of files (and contents) to append.
+ These files are added to the archive later but need to be
+ present in the manifest, too.
+ The file list in the Transformer currently doesn't store a media
+ type (which is one of the parameters in the manifest), the
+ current application of this transform is to add pictures --
+ these don't have a media type in the files that were checked.
+ So for now we add an empty media type.
+ """
+ filename = 'META-INF/manifest.xml'
+ prio = 1000
+
+ def apply (self, root) :
+ for n, node in enumerate (root) :
+ assert node.tag == self.oootag ('manifest', 'file-entry')
+ path = node.get (self.oootag ('manifest', 'full-path'))
+ assert (path)
+ if path == '/' :
+ break
+ else :
+ assert (not "The manifest needs a '/' entry")
+ for f, _ in self.transformer.appendfiles :
+ e = Element (self.oootag ('manifest', 'file-entry'))
+ e.attrib [self.oootag ('manifest', 'full-path')] = f
+ e.attrib [self.oootag ('manifest', 'media-type')] = ''
+ root.insert (n + 1, e)
+ n += 1
+ # end def apply
+
+# end class Manifest_Append
+
+#
+# meta.xml transforms
+#
+
+class Editinfo (Transform) :
+ """
+ This is an example of modifying OOo meta info (edit information,
+ author, etc). We set some of the items (program that generated
+ the OOo file, modification time, number of edit cyles and overall
+ edit duration). It's easy to subclass this transform and replace
+ the "replace" variable (pun intended) in the derived class.
+ """
+ filename = 'meta.xml'
+ prio = 20
+ repl = \
+ { ('meta', 'generator') : 'OOoPy field replacement'
+ , ('dc', 'date') : time.strftime ('%Y-%m-%dT%H:%M:%S')
+ , ('meta', 'editing-cycles') : '0'
+ , ('meta', 'editing-duration') : 'PT0M0S'
+ }
+ replace = {}
+ # iterate over all mimetypes, so this works for all known mimetypes
+ # of OOo documents.
+ for m in mimetypes :
+ for params, value in repl.iteritems () :
+ replace [OOo_Tag (mimetype = m, *params)] = value
+
+ def apply (self, root) :
+ for node in root.findall (self.oootag ('office', 'meta') + '/*') :
+ if self.replace.has_key (node.tag) :
+ node.text = self.replace [node.tag]
+ # end def apply
+# end class Editinfo
+
+#
+# settings.xml transforms
+#
+
+class Autoupdate (Transform) :
+ """
+ This is an example of modifying OOo settings. We set some of the
+ AutoUpdate configuration items in OOo to true. We also specify
+ that links should be updated when reading.
+
+ This was originally intended to make OOo correctly display fields
+ if they were changed with the Field_Replace below
+ (similar to pressing F9 after loading the generated document in
+ OOo). In particular I usually make spaces depend on field
+ contents so that I don't have spurious spaces if a field is
+ empty. Now it would be nice if OOo displayed the spaces correctly
+ after loading a document (It does update the fields before
+ printing, so this is only a cosmetic problem :-). This apparently
+ does not work. If anybody knows how to achieve this, please let
+ me know: mailto:rsc@runtux.com
+ """
+ filename = 'settings.xml'
+ prio = 20
+
+ def apply (self, root) :
+ config = None
+ for config in root.findall \
+ ( self.oootag ('office', 'settings')
+ + '/'
+ + self.oootag ('config', 'config-item-set')
+ ) :
+ name = config.get (self.oootag ('config', 'name'))
+ if name == 'configuration-settings' :
+ break
+ for node in config.findall (self.oootag ('config', 'config-item')) :
+ name = node.get (self.oootag ('config', 'name'))
+ if name == 'LinkUpdateMode' : # update when reading
+ node.text = '2'
+ # update fields when reading
+ if name == 'FieldAutoUpdate' or name == 'ChartAutoUpdate' :
+ node.text = 'true'
+ # end def apply
+# end class Autoupdate
+
+#
+# content.xml transforms
+#
+
+class Field_Replace (Transform) :
+ """
+ Takes a dict of replacement key-value pairs. The key is the name
+ of a variable in OOo. Additional replacement key-value pairs may
+ be specified in ** kw. Alternatively a callback mechanism for
+ variable name lookups is provided. The callback function is
+ given the name of a variable in OOo and is expected to return
+ the replacement value or None if the variable value should not
+ be replaced.
+ """
+ filename = 'content.xml'
+ prio = 100
+
+ def __init__ (self, prio = None, replace = None, ** kw) :
+ """ replace is something behaving like a dict or something
+ callable for name lookups
+ """
+ self.__super.__init__ (prio, ** kw)
+ self.replace = replace or {}
+ self.dict = kw
+ # end def __init__
+
+ def apply (self, root) :
+ tbody = self.find_tbody (root)
+ for tag in 'variable-set', 'variable-get', 'variable-input' :
+ for node in tbody.findall ('.//' + self.oootag ('text', tag)) :
+ attr = 'name'
+ if tag == 'text-input' :
+ attr = 'description'
+ name = node.get (self.oootag ('text', attr))
+ if callable (self.replace) :
+ replace = self.replace (name)
+ if replace :
+ node.text = replace
+ elif name in self.replace :
+ node.text = self.replace [name]
+ elif name in self.dict :
+ node.text = self.dict [name]
+ # end def apply
+# end class Field_Replace
+
+class Addpagebreak_Style (Transform) :
+ """
+ This transformation adds a new ad-hoc paragraph style to the
+ content part of the OOo document. This is needed to be able to
+ add new page breaks to an OOo document. Adding a new page break
+ is then a matter of adding an empty paragraph with the given page
+ break style.
+
+ We first look through all defined paragraph styles for
+ determining a new paragraph style number. Convention is P<num>
+ for paragraph styles. We search the highest number and use this
+ incremented by one for the new style to insert. Then we insert
+ the new style and store the resulting style name in the
+ transformer under the key class_name:stylename where class_name
+ is our own class name.
+ """
+ filename = 'content.xml'
+ prio = 30
+ para = re.compile (r'P([0-9]+)')
+
+ def apply (self, root) :
+ max_style = 0
+ styles = root.find (self.oootag ('office', 'automatic-styles'))
+ for s in styles.findall ('./' + self.oootag ('style', 'style')) :
+ m = self.para.match (s.get (self.oootag ('style', 'name'), ''))
+ if m :
+ num = int (m.group (1))
+ if num > max_style :
+ max_style = num
+ stylename = 'P%d' % (max_style + 1)
+ new = SubElement \
+ ( styles
+ , self.oootag ('style', 'style')
+ , { self.oootag ('style', 'name') : stylename
+ , self.oootag ('style', 'family') : 'paragraph'
+ , self.oootag ('style', 'parent-style-name') : 'Standard'
+ }
+ )
+ SubElement \
+ ( new
+ , self.properties_tag
+ , { self.oootag ('fo', 'break-after') : 'page' }
+ )
+ self.set ('stylename', stylename)
+ # end def apply
+# end class Addpagebreak_Style
+
+class Addpagebreak (Transform) :
+ """
+ This transformation adds a page break to the last page of the OOo
+ text. This is needed, e.g., when doing mail-merge: We append a
+ page break to the tbody and then append the next page. This
+ transform needs the name of the paragraph style specifying the
+ page break style. Default is to use
+ 'Addpagebreak_Style:stylename' as the key for
+ retrieving the page style. Alternatively the page style or the
+ page style key can be specified in the constructor.
+ """
+ filename = 'content.xml'
+ prio = 50
+
+ def __init__ (self, stylename = None, stylekey = None, ** kw) :
+ self.__super.__init__ (** kw)
+ self.stylename = stylename
+ self.stylekey = stylekey or 'Addpagebreak_Style:stylename'
+ # end def __init__
+
+ def apply (self, root) :
+ """append to tbody e.g., <text:p text:style-name="P4"/>"""
+ tbody = self.find_tbody (root)
+ stylename = self.stylename or self.transformer [self.stylekey]
+ SubElement \
+ ( tbody
+ , self.oootag ('text', 'p')
+ , { self.oootag ('text', 'style-name') : stylename }
+ )
+ # end def apply
+# end class Addpagebreak
+
+class Fix_OOo_Tag (Transform) :
+ """
+ OOo writer conditions are attributes where the *value* is
+ prefixed by an XML namespace. If the ooow namespace declaration
+ is not in scope, all conditions will evaluate to false. I
+ consider this a bug (a violation of the ideas of XML) of OOo.
+ Nevertheless to make conditions work, we insert the ooow
+ namespace declaration into the top-level element.
+ """
+ filename = 'content.xml'
+ prio = 10000
+
+ def apply (self, root) :
+ if self.mimetype == mimetypes [1] :
+ root.set ('xmlns:ooow', namespace_by_name [self.mimetype]['ooow'])
+ # end def apply
+# end class Fix_OOo_Tag
+
+class _Body_Concat (Transform) :
+ """ Various methods for modifying the tbody split into various pieces
+ that have to keep sequence in order to not confuse OOo.
+ """
+ ooo_sections = {}
+ for m in mimetypes :
+ ooo_sections [m] = \
+ [ { OOo_Tag ('text', 'variable-decls', m) : 1
+ , OOo_Tag ('text', 'sequence-decls', m) : 1
+ , OOo_Tag ('text', 'user-field-decls', m) : 1
+ , OOo_Tag ('office', 'forms', m) : 1
+ }
+ , { OOo_Tag ('draw', 'frame', m) : 1
+ , OOo_Tag ('draw', 'rect', m) : 1
+ , OOo_Tag ('draw', 'text-box', m) : 1
+ }
+ ]
+
+ def _textbody (self) :
+ """
+ We use the office:body (OOo 1.X)/office:text (OOo 1.X)
+ element as a container for various transforms...
+ """
+ return Element (self.textbody_tag)
+ # end def _textbody
+
+ def _divide (self, textbody) :
+ """ Divide self.copy into parts that must keep their sequence.
+ We use another textbody tag for storing the parts...
+ Side-effect of setting self.copyparts is intended.
+ """
+ self.copyparts = self._textbody ()
+ self.copyparts.append (self._textbody ())
+ l = len (self.ooo_sections [self.mimetype])
+ idx = 0
+ for e in textbody :
+ while idx < l :
+ if e.tag in self.ooo_sections [self.mimetype][idx] :
+ break
+ else :
+ self.copyparts.append (self._textbody ())
+ idx += 1
+ self.copyparts [-1].append (e)
+ declarations = self.copyparts [0]
+ del self.copyparts [0]
+ return declarations
+ # end def _divide
+
+ def divide_body (self, root) :
+ cont = root
+ if cont.tag != self.oootag ('office', 'document-content') :
+ cont = root.find (self.oootag ('office', 'document-content'))
+ tbody = cont.find (self.oootag ('office', 'body'))
+ # OOo 2.X has an office:text inside office:body that contains
+ # the real text contents:
+ if self.mimetype == mimetypes [1] :
+ cont = tbody
+ tbody = cont.find (self.oootag ('office', 'text'))
+ idx = cont [:].index (tbody)
+ self.tbody = cont [idx] = self._textbody ()
+ self.declarations = self._divide (tbody)
+ self.bodyparts = self.copyparts
+ # end def divide_body
+
+ def append_declarations (self) :
+ for e in self.declarations :
+ self.tbody.append (e)
+ # end def append_declarations
+
+ def append_to_body (self, cp) :
+ for i in range (len (self.bodyparts)) :
+ for j in cp [i] :
+ self.bodyparts [i].append (j)
+ # end def append_to_body
+
+ def assemble_body (self) :
+ for p in self.bodyparts :
+ for e in p :
+ self.tbody.append (e)
+ # end def assemble_body
+
+ def _get_meta (self, var, classname = 'Get_Attribute', prefix = "") :
+ """ get page- and paragraph-count etc. meta-info """
+ return int (self.transformer [':'.join ((classname, prefix + var))])
+ # end def _get_meta
+
+ def _set_meta (self, var, value, classname = 'Set_Attribute', prefix = "") :
+ """ set page- and paragraph-count etc. meta-info """
+ self.transformer [':'.join ((classname, prefix + var))] = str (value)
+ # end def _set_meta
+# end class _Body_Concat
+
+class Mailmerge (_Body_Concat) :
+ """
+ This transformation is used to create a mailmerge document using
+ the current document as the template. In the constructor we get
+ an iterator that provides a data set for each item in the
+ iteration. Elements the iterator has to provide are either
+ something that follows the Mapping Type interface (it looks like
+ a dict) or something that is callable and can be used for
+ name-value lookups.
+
+ A precondition for this transform is the application of the
+ Addpagebreak_Style to guarantee that we know the style
+ for adding a page break to the current document. Alternatively
+ the stylename (or the stylekey if a different name should be used
+ for lookup in the current transformer) can be given in the
+ constructor.
+ """
+ filename = 'content.xml'
+ prio = 60
+
+ def __init__ \
+ (self, iterator, stylename = None, stylekey = None, ** kw) :
+ self.__super.__init__ (** kw)
+ self.iterator = iterator
+ self.stylename = stylename
+ self.stylekey = stylekey
+ # end def __init__
+
+ def apply (self, root) :
+ """
+ Copy old tbody, create new empty one and repeatedly append the
+ new tbody.
+ """
+ pb = Addpagebreak \
+ ( stylename = self.stylename
+ , stylekey = self.stylekey
+ , transformer = self.transformer
+ )
+ zi = Attribute_Access \
+ ( (Get_Max (None, self.oootag ('draw', 'z-index'), 'z-index'),)
+ , transformer = self.transformer
+ )
+ zi.apply (root)
+
+ pagecount = self._get_meta ('page-count')
+ z_index = self._get_meta ('z-index', classname = 'Get_Max') + 1
+ ra = Attribute_Access \
+ ( ( Reanchor (pagecount, self.oootag ('draw', 'text-box'))
+ , Reanchor (pagecount, self.oootag ('draw', 'rect'))
+ , Reanchor (pagecount, self.oootag ('draw', 'frame'))
+ , Reanchor (z_index, None, self.oootag ('draw', 'z-index'))
+ )
+ , transformer = self.transformer # transformer added
+ )
+ self.divide_body (root)
+ self.bodyparts = [self._textbody () for i in self.copyparts]
+
+ count = 0
+ for i in self.iterator :
+ count += 1
+ fr = Field_Replace (replace = i, transformer = self.transformer)
+ # add page break only to non-empty tbody
+ # reanchor only after the first mailmerge
+ if len (self.tbody) : # tbody non-empty (but existing!)
+ pb.apply (self.bodyparts [-1])
+ ra.apply (self.copyparts)
+ else :
+ self.append_declarations ()
+ cp = deepcopy (self.copyparts)
+ fr.apply (cp)
+ self.append_to_body (cp)
+ # new page-count:
+ for i in meta_counts :
+ self._set_meta (i, count * self._get_meta (i))
+ # we have added count-1 paragraphs, because each page-break is a
+ # paragraph.
+ p = 'paragraph-count'
+ self._set_meta \
+ (p, self._get_meta (p, classname = 'Set_Attribute') + (count - 1))
+ self.assemble_body ()
+ # end def apply
+# end class Mailmerge
+
+def tree_serialise (element, prefix = '', mimetype = mimetypes [1]) :
+ """ Serialise a style-element of an OOo document (e.g., a
+ style:font-decl, style:default-style, etc declaration).
+ We remove the name of the style and return something that is a
+ representation of the style element which can be used as a
+ dictionary key.
+ The serialisation format is a tuple containing the tag as the
+ first item, the attributes (as key,value pairs returned by
+ items()) as the second item and the following items are
+ serialisations of children.
+ """
+ attr = dict (element.attrib)
+ stylename = OOo_Tag ('style', 'name', mimetype)
+ if stylename in attr : del attr [stylename]
+ attr = attr.items ()
+ attr.sort ()
+ attr = tuple (attr)
+ serial = [prefix + element.tag, attr]
+ for e in element :
+ serial.append (tree_serialise (e, prefix, mimetype))
+ return tuple (serial)
+# end def tree_serialise
+
+class Concatenate (_Body_Concat) :
+ """
+ This transformation is used to create a new document from a
+ concatenation of several documents. In the constructor we get a
+ list of documents to append to the master document.
+ """
+ prio = 80
+ style_containers = {}
+ ref_attrs = {}
+ for m in mimetypes :
+ style_containers.update \
+ ({ OOo_Tag ('office', 'font-decls', m) : 1
+ , OOo_Tag ('office', 'font-face-decls', m) : 1
+ , OOo_Tag ('office', 'styles', m) : 1
+ , OOo_Tag ('office', 'automatic-styles', m) : 1
+ , OOo_Tag ('office', 'master-styles', m) : 1
+ })
+ # Cross-references in OOo document:
+ # 'attribute' references another element with 'tag'.
+ # If attribute names change, we must replace references, too.
+ # attribute :
+ # tag
+ ref_attrs.update \
+ ({ OOo_Tag ('style', 'parent-style-name', m) :
+ OOo_Tag ('style', 'style', m)
+ , OOo_Tag ('style', 'master-page-name', m) :
+ OOo_Tag ('style', 'master-page', m)
+ , OOo_Tag ('style', 'page-layout-name', m) : # OOo 2.X
+ OOo_Tag ('style', 'page-layout', m)
+ , OOo_Tag ('style', 'page-master-name', m) :
+ OOo_Tag ('style', 'page-master', m)
+ , OOo_Tag ('table', 'style-name', m) :
+ OOo_Tag ('style', 'style', m)
+ , OOo_Tag ('text', 'style-name', m) :
+ OOo_Tag ('style', 'style', m)
+ , OOo_Tag ('draw', 'style-name', m) :
+ OOo_Tag ('style', 'style', m)
+ , OOo_Tag ('draw', 'text-style-name', m) :
+ OOo_Tag ('style', 'style', m)
+ })
+ stylefiles = ['styles.xml', 'content.xml']
+ oofiles = stylefiles + ['meta.xml']
+
+ body_decl_sections = ['variable-decl', 'sequence-decl']
+
+ def __init__ (self, * docs, ** kw) :
+ self.__super.__init__ (** kw)
+ self.docs = []
+ for doc in docs :
+ self.docs.append (OOoPy (infile = doc))
+ assert (self.docs [-1].mimetype == self.docs [0].mimetype)
+ # end def __init__
+
+ def apply_all (self, trees) :
+ assert (self.docs [0].mimetype == self.transformer.mimetype)
+ self.serialised = {}
+ self.stylenames = {}
+ self.namemaps = [{}]
+ self.tab_depend = {}
+ for s in self.ref_attrs.itervalues () :
+ self.namemaps [0][s] = {}
+ self.body_decls = {}
+ for s in self.body_decl_sections :
+ self.body_decls [s] = {}
+ self.trees = {}
+ for f in self.oofiles :
+ self.trees [f] = [trees [f].getroot ()]
+ self.sections = {}
+ for f in self.stylefiles :
+ self.sections [f] = {}
+ for node in self.trees [f][0] :
+ self.sections [f][node.tag] = node
+ for d in self.docs :
+ self.namemaps.append ({})
+ for s in self.ref_attrs.itervalues () :
+ self.namemaps [-1][s] = {}
+ for f in self.oofiles :
+ self.trees [f].append (d.read (f).getroot ())
+ # append a pagebreak style, will be optimized away if duplicate
+ pbs = Addpagebreak_Style (transformer = self.transformer)
+ pbs.apply (self.trees ['content.xml'][0])
+ get_attr = []
+ for attr in meta_counts :
+ a = self.oootag ('meta', attr)
+ t = self.oootag ('meta', 'document-statistic')
+ get_attr.append (Get_Attribute (t, a, 'concat-' + attr))
+ zi = Attribute_Access \
+ ( (Get_Max (None, self.oootag ('draw', 'z-index'), 'z-index'),)
+ , transformer = self.transformer
+ )
+ zi.apply (self.trees ['content.xml'][0])
+ self.zi = Attribute_Access \
+ ( (Get_Max (None, self.oootag ('draw', 'z-index'), 'concat-z-index')
+ ,
+ )
+ , transformer = self.transformer
+ )
+ self.getmeta = Attribute_Access \
+ (get_attr, filename = 'meta.xml', transformer = self.transformer)
+ self.pbname = self.transformer \
+ [':'.join (('Addpagebreak_Style', 'stylename'))]
+ for s in self.trees ['styles.xml'][0].findall \
+ ('.//' + self.oootag ('style', 'default-style')) :
+ if s.get (self.oootag ('style', 'family')) == 'paragraph' :
+ default_style = s
+ break
+ self.default_properties = default_style.find \
+ ('./' + self.properties_tag)
+ self.set_pagestyle ()
+ for f in 'styles.xml', 'content.xml' :
+ self.style_merge (f)
+ self.body_concat ()
+ self.append_pictures ()
+ # end def apply_all
+
+ def apply_tab_correction (self, node) :
+ """ Check if node depends on a style which has corrected tabs
+ if yes, insert all the default tabs *after* the maximum tab
+ position in that style.
+ """
+ tab_stops = self.oootag ('style', 'tab-stops')
+ tab_stop = self.oootag ('style', 'tab-stop')
+ tab_pos = self.oootag ('style', 'position')
+ parent = node.get (self.oootag ('style', 'parent-style-name'))
+ if parent in self.tab_depend :
+ for prop in node :
+ if prop.tag != self.properties_tag :
+ continue
+ for sub in prop :
+ if sub.tag == tab_stops :
+ self.tab_depend [parent] = 1
+ max = 0
+ for ts in sub :
+ assert (ts.tag == tab_stop)
+ pos = float (ts.get (tab_pos) [:-2])
+ if max < pos :
+ max = pos
+ self.insert_tabs (sub, max)
+ # end def apply_tab_correction
+
+ def _attr_rename (self, idx) :
+ r = sum \
+ ( [ set_attributes_from_dict (None, k, self.namemaps [idx][v])
+ for k,v in self.ref_attrs.iteritems ()
+ ]
+ , []
+ )
+ return Attribute_Access (r, transformer = self.transformer)
+ # end def _attr_rename
+
+ def body_concat (self) :
+ count = {}
+ for i in meta_counts :
+ count [i] = self._get_meta (i)
+ count ['z-index'] = self._get_meta \
+ ('z-index', classname = 'Get_Max') + 1
+ pb = Addpagebreak \
+ (stylename = self.pbname, transformer = self.transformer)
+ self.divide_body (self.trees ['content.xml'][0])
+ self.body_decl (self.declarations, append = 0)
+ for idx in range (1, len (self.docs) + 1) :
+ meta = self.trees ['meta.xml'][idx]
+ content = self.trees ['content.xml'][idx]
+ tbody = self.find_tbody (content)
+ self.getmeta.apply (meta)
+ self.zi.apply (tbody)
+
+ ra = Attribute_Access \
+ ( ( Reanchor
+ (count ['page-count'], self.oootag ('draw', 'text-box'))
+ , Reanchor
+ (count ['page-count'], self.oootag ('draw', 'rect'))
+ , Reanchor
+ (count ['page-count'], self.oootag ('draw', 'frame'))
+ , Reanchor
+ (count ['z-index'], None, self.oootag ('draw', 'z-index'))
+ )
+ , transformer = self.transformer # transformer added
+ )
+ for i in meta_counts :
+ count [i] += self._get_meta (i, prefix = 'concat-')
+ count ['paragraph-count'] += 1
+ count ['z-index'] += self._get_meta \
+ ('z-index', classname = 'Get_Max', prefix = 'concat-') + 1
+ namemap = self.namemaps [idx][self.oootag ('style', 'style')]
+ tr = self._attr_rename (idx)
+ pb.apply (self.bodyparts [-1])
+ tr.apply (content)
+ ra.apply (content)
+ declarations = self._divide (tbody)
+ self.body_decl (declarations)
+ self.append_to_body (self.copyparts)
+ self.append_declarations ()
+ self.assemble_body ()
+ for i in meta_counts :
+ self._set_meta (i, count [i])
+ # end def body_concat
+
+ def body_decl (self, decl_section, append = 1) :
+ for sect in self.body_decl_sections :
+ s = self.declarations.find \
+ ('.//' + self.oootag ('text', sect + 's'))
+ d = self.body_decls [sect]
+ t = self.oootag ('text', sect)
+ for n in decl_section.findall ('.//' + t) :
+ name = n.get (self.oootag ('text', 'name'))
+ if name not in d :
+ if append and s is not None :
+ s.append (n)
+ d [name] = 1
+ # end def body_decl
+
+ def insert_tabs (self, element, max = 0) :
+ """ Insert tab stops into the current element. Optionally after
+ max = the current maximum tab-position
+ """
+ dist_tag = self.oootag ('style', 'tab-stop-distance')
+ for k in range (1, len (self.tab_correct)) :
+ if self.tab_correct [-k].isdigit() :
+ break
+ l = float (self.tab_correct [:-k])
+ unit = self.tab_correct [-k:]
+ for ts in range (35) :
+ pos = l * (ts + 1)
+ if pos > max :
+ SubElement \
+ ( element
+ , self.oootag ('style', 'tab-stop')
+ , { self.oootag ('style', 'position') : '%s%s' % (pos, unit)
+ }
+ )
+ # end def insert_tabs
+
+ def merge_defaultstyle (self, default_style, node) :
+ assert default_style is not None
+ assert node is not None
+ proppath = './' + self.properties_tag
+ defprops = default_style.find (proppath)
+ props = node.find (proppath)
+ sn = self.oootag ('style', 'name')
+ if props is None :
+ props = Element (self.properties_tag)
+ for k, v in defprops.attrib.iteritems () :
+ if self.default_properties.get (k) != v and not props.get (k) :
+ if k == self.oootag ('style', 'tab-stop-distance') :
+ self.tab_correct = v
+ self.tab_depend = {node.get (sn) : 1}
+ stps = SubElement \
+ (props, self.oootag ('style', 'tab-stops'))
+ self.insert_tabs (stps)
+ else :
+ props.set (k,v)
+ if len (props) or props.attrib :
+ node.append (props)
+ # end def merge_defaultstyle
+
+ def _newname (self, key, oldname) :
+ stylenum = 0
+ if (key, oldname) not in self.stylenames :
+ self.stylenames [(key, oldname)] = 1
+ return oldname
+ newname = basename = 'Concat_%s' % oldname
+ while (key, newname) in self.stylenames :
+ stylenum += 1
+ newname = '%s%d' % (basename, stylenum)
+ self.stylenames [(key, newname)] = 1
+ return newname
+ # end def _newname
+
+ def set_pagestyle (self) :
+ """ For all documents: search for the first paragraph of the tbody
+ and get its style. Modify this style to include a reference
+ to the default page-style if it doesn't contain a reference
+ to a page style. Insert the new style into the list of
+ styles and modify the first paragraph to use the new page
+ style.
+ This procedure is necessary to make appended documents use
+ their page style instead of the master page style of the
+ first document.
+ FIXME: We should search the style hierarchy backwards for
+ the style of the first paragraph to check if there is a
+ reference to a page-style somewhere and not override the
+ page-style in this case. Otherwise appending complex
+ documents that use a different page-style for the first page
+ will not work if the page style is referenced in a style
+ from which the first paragraph style derives.
+ """
+ for idx in range (1, len (self.docs) + 1) :
+ croot = self.trees ['content.xml'][idx]
+ sroot = self.trees ['styles.xml'] [idx]
+ tbody = self.find_tbody (croot)
+ para = tbody.find ('./' + self.oootag ('text', 'p'))
+ if para is None :
+ para = tbody.find ('./' + self.oootag ('text', 'list'))
+ tsn = self.oootag ('text', 'style-name')
+ sname = para.get (tsn)
+ styles = croot.find (self.oootag ('office', 'automatic-styles'))
+ ost = sroot.find (self.oootag ('office', 'styles'))
+ mst = sroot.find (self.oootag ('office', 'master-styles'))
+ assert mst is not None and len (mst)
+ assert mst [0].tag == self.oootag ('style', 'master-page')
+ sntag = self.oootag ('style', 'name')
+ master = mst [0].get (sntag)
+ mpn = self.oootag ('style', 'master-page-name')
+ stytag = self.oootag ('style', 'style')
+ style = None
+ for s in styles :
+ if s.tag == stytag :
+ # Explicit references to default style converted to
+ # explicit references to new page style.
+ if s.get (mpn) == '' :
+ s.set (mpn, master)
+ if s.get (sntag) == sname :
+ style = s
+ if style is None :
+ for s in ost :
+ if s.tag == stytag and s.get (sntag) == sname :
+ style = s
+ break
+ if style is not None and not style.get (mpn) :
+ newstyle = deepcopy (style)
+ # Don't register with newname: will be rewritten later
+ # when appending. We assume that an original doc does
+ # not already contain a style with _Concat suffix.
+ newname = sname + '_Concat'
+ para.set (tsn, newname)
+ newstyle.set (self.oootag ('style', 'name'), newname)
+ newstyle.set (mpn, master)
+ styles.append (newstyle)
+ # end def set_pagestyle
+
+ def style_merge (self, oofile) :
+ """ Loop over all the docs in our document list and look up the
+ styles there. If a style matches an existing style in the
+ original document, register the style name for later
+ transformation if the style name in the original document
+ does not match the style name in the appended document. If
+ no match is found, append style to master document and add
+ to serialisation. If the style name already exists in the
+ master document, a new style name is created. Names of
+ parent styles are changed when appending -- this means that
+ parent style names already have to be defined earlier in the
+ document.
+
+ If there is a reference to a parent style that is not yet
+ defined, and the parent style is defined later, it is
+ already too late, so an assertion is raised in this case.
+ OOo seems to ensure declaration order of dependent styles,
+ so this should not be a problem.
+ """
+ for idx in range (len (self.trees [oofile])) :
+ namemap = self.namemaps [idx]
+ root = self.trees [oofile][idx]
+ delnode = []
+ for nodeidx, node in enumerate (root) :
+ if node.tag not in self.style_containers :
+ continue
+ prefix = ''
+ # font_decls may have same name in styles.xml and content.xml
+ if node.tag == self.font_decls_tag :
+ prefix = oofile
+ default_style = None
+ for n in node :
+ if ( n.tag == self.oootag ('style', 'default-style')
+ and ( n.get (self.oootag ('style', 'family'))
+ == 'paragraph'
+ )
+ ) :
+ default_style = n
+ name = n.get (self.oootag ('style', 'name'), None)
+ if not name : continue
+ if ( idx != 0
+ and name == 'Standard'
+ and n.get (self.oootag ('style', 'class')) == 'text'
+ and ( n.get (self.oootag ('style', 'family'))
+ == 'paragraph'
+ )
+ ) :
+ self.merge_defaultstyle (default_style, n)
+ self.apply_tab_correction (n)
+ key = prefix + n.tag
+ if key not in namemap : namemap [key] = {}
+ tr = self._attr_rename (idx)
+ tr.apply (n)
+ sn = tree_serialise (n, prefix, self.mimetype)
+ if sn in self.serialised :
+ newname = self.serialised [sn]
+ if name != newname :
+ assert \
+ ( name not in namemap [key]
+ or namemap [key][name] == newname
+ )
+ namemap [key][name] = newname
+ # optimize original doc: remove duplicate styles
+ if not idx and node.tag != self.font_decls_tag :
+ pass
+ #delnode.append (nodeidx)
+ else :
+ newname = self._newname (key, name)
+ self.serialised [sn] = newname
+ if newname != name :
+ n.set (self.oootag ('style', 'name'), newname)
+ dn = self.oootag ('style', 'display-name')
+ disp_name = n.get (dn)
+ if disp_name :
+ n.set (dn, 'Concat ' + disp_name)
+ namemap [key][name] = newname
+ if idx != 0 :
+ self.sections [oofile][node.tag].append (n)
+ assert not delnode or not idx
+ delnode.reverse ()
+ for i in delnode :
+ del node [i]
+ # end style_merge
+
+ def append_pictures (self) :
+ for doc in self.docs :
+ for f in doc.izip.infolist () :
+ if f.filename.startswith ('Pictures/') :
+ self.transformer.appendfiles.append \
+ ((f.filename, doc.izip.read (f.filename)))
+ # end def append_pictures
+
+# end class Concatenate
+
+def renumber_frames (mimetype) :
+ return \
+ [ Renumber (OOo_Tag ('draw', 'text-box', mimetype), 'Frame') # OOo 1.X
+ , Renumber (OOo_Tag ('draw', 'frame', mimetype), 'Frame') # OOo 2.X
+ ]
+# end def renumber_frames
+
+def renumber_sections (mimetype) :
+ return [Renumber (OOo_Tag ('text', 'section', mimetype))]
+# end def renumber_sections
+
+def renumber_tables (mimetype) :
+ return [Renumber (OOo_Tag ('table', 'table', mimetype))]
+# end def renumber_tables
+
+def renumber_images (mimetype) :
+ return [Renumber (OOo_Tag ('draw', 'image', mimetype))]
+# end def renumber_images
+
+def renumber_xml_id (mimetype) :
+ if mimetype == mimetypes [0] :
+ return []
+ xmlid = OOo_Tag ('xml', 'id', mimetype)
+ return [Renumber (OOo_Tag ('text', 'list', mimetype), 'list', xmlid)]
+# end def renumber_xml_id
+
+def renumber_all (mimetype) :
+ """ Factory function for all renumberings parameterized with
+ mimetype
+ """
+ return Attribute_Access \
+ ( renumber_frames (mimetype)
+ + renumber_sections (mimetype)
+ + renumber_tables (mimetype)
+ + renumber_images (mimetype)
+ + renumber_xml_id (mimetype)
+ )
+# end def renumber_all
+
+# used to have a separate Pagecount transform -- generalized to get
+# some of the meta information using an Attribute_Access transform
+# and set the same information later after possibly being updated by
+# other transforms. We use another naming convention here for storing
+# the info retrieved from the OOo document: We use the attribute name in
+# the meta-information to store (and later retrieve) the information.
+
+def get_meta (mimetype) :
+ """ Factory function for Attribute_Access to get all interesting
+ meta-data
+ """
+ get_attr = []
+ for attr in meta_counts :
+ a = OOo_Tag ('meta', attr, mimetype)
+ t = OOo_Tag ('meta', 'document-statistic', mimetype)
+ get_attr.append (Get_Attribute (t, a, attr))
+ return Attribute_Access (get_attr, prio = 20, filename = 'meta.xml')
+# end def get_meta
+
+def set_meta (mimetype) :
+ """ Factory function for Attribute_Access to set all interesting
+ meta-data
+ """
+ set_attr = []
+ for attr in meta_counts :
+ a = OOo_Tag ('meta', attr, mimetype)
+ t = OOo_Tag ('meta', 'document-statistic', mimetype)
+ set_attr.append (Set_Attribute (t, a, attr))
+ return Attribute_Access (set_attr, prio = 120, filename = 'meta.xml')
+# end def set_meta
diff --git a/ooopy/Version.py b/ooopy/Version.py
new file mode 100644
index 000000000..495ca242a
--- /dev/null
+++ b/ooopy/Version.py
@@ -0,0 +1 @@
+VERSION="1.11"
diff --git a/ooopy/__init__.py b/ooopy/__init__.py
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/ooopy/__init__.py