diff options
author | Étienne Loks <etienne.loks@iggdrasil.net> | 2019-05-01 13:51:01 +0200 |
---|---|---|
committer | Étienne Loks <etienne.loks@iggdrasil.net> | 2019-06-17 13:21:28 +0200 |
commit | 6e09fe95f07ea2c0a827beda5fc2f2a63751db7f (patch) | |
tree | d6452080600bd7fc377321d4dab58a7fc4333cb2 /ooopy | |
parent | ce4b7db76f21559b94943229bbeebd9c37c43f49 (diff) | |
download | Ishtar-6e09fe95f07ea2c0a827beda5fc2f2a63751db7f.tar.bz2 Ishtar-6e09fe95f07ea2c0a827beda5fc2f2a63751db7f.zip |
Embed ooopy (last version: 1.11)
Diffstat (limited to 'ooopy')
-rw-r--r-- | ooopy/OOoPy.py | 317 | ||||
-rw-r--r-- | ooopy/Transformer.py | 1397 | ||||
-rw-r--r-- | ooopy/Transforms.py | 1237 | ||||
-rw-r--r-- | ooopy/Version.py | 1 | ||||
-rw-r--r-- | ooopy/__init__.py | 0 |
5 files changed, 2952 insertions, 0 deletions
diff --git a/ooopy/OOoPy.py b/ooopy/OOoPy.py new file mode 100644 index 000000000..87e0b8110 --- /dev/null +++ b/ooopy/OOoPy.py @@ -0,0 +1,317 @@ +#!/usr/bin/env python +# -*- coding: iso-8859-1 -*- +# Copyright (C) 2005-14 Dr. Ralf Schlatterbeck Open Source Consulting. +# Reichergasse 131, A-3411 Weidling. +# Web: http://www.runtux.com Email: office@runtux.com +# All rights reserved +# **************************************************************************** +# +# This library is free software; you can redistribute it and/or modify +# it under the terms of the GNU Library General Public License as +# published by the Free Software Foundation; either version 2 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Library General Public License for more details. +# +# You should have received a copy of the GNU Library General Public +# License along with this program; if not, write to the Free Software +# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. +# **************************************************************************** + +from __future__ import absolute_import + +from zipfile import ZipFile, ZIP_DEFLATED, ZipInfo +try : + from StringIO import StringIO +except ImportError : + from io import StringIO +from datetime import datetime +try : + from xml.etree.ElementTree import ElementTree, fromstring, _namespace_map +except ImportError : + from elementtree.ElementTree import ElementTree, fromstring, _namespace_map +from tempfile import mkstemp +from ooopy.Version import VERSION +import os + +class _autosuper (type) : + def __init__ (cls, name, bases, dict) : + super (_autosuper, cls).__init__ (name, bases, dict) + setattr (cls, "_%s__super" % name, super (cls)) + # end def __init__ +# end class _autosuper + +class autosuper (object) : + __metaclass__ = _autosuper + def __init__ (self, *args, **kw) : + self.__super.__init__ () + # end def __init__ +# end class autosuper + +files = \ + [ 'content.xml' + , 'styles.xml' + , 'meta.xml' + , 'settings.xml' + , 'META-INF/manifest.xml' + ] + +mimetypes = \ + [ 'application/vnd.sun.xml.writer' + , 'application/vnd.oasis.opendocument.text' + ] +namespace_by_name = \ + { mimetypes [0] : + { 'chart' : "http://openoffice.org/2000/chart" + , 'config' : "http://openoffice.org/2001/config" + , 'dc' : "http://purl.org/dc/elements/1.1/" + , 'dr3d' : "http://openoffice.org/2000/dr3d" + , 'draw' : "http://openoffice.org/2000/drawing" + , 'fo' : "http://www.w3.org/1999/XSL/Format" + , 'form' : "http://openoffice.org/2000/form" + , 'math' : "http://www.w3.org/1998/Math/MathML" + , 'meta' : "http://openoffice.org/2000/meta" + , 'number' : "http://openoffice.org/2000/datastyle" + , 'office' : "http://openoffice.org/2000/office" + , 'script' : "http://openoffice.org/2000/script" + , 'style' : "http://openoffice.org/2000/style" + , 'svg' : "http://www.w3.org/2000/svg" + , 'table' : "http://openoffice.org/2000/table" + , 'text' : "http://openoffice.org/2000/text" + , 'xlink' : "http://www.w3.org/1999/xlink" + , 'manifest' : "http://openoffice.org/2001/manifest" + } + , mimetypes [1] : + { 'chart' : "urn:oasis:names:tc:opendocument:xmlns:chart:1.0" + , 'config' : "urn:oasis:names:tc:opendocument:xmlns:config:1.0" + , 'dc' : "http://purl.org/dc/elements/1.1/" + , 'dr3d' : "urn:oasis:names:tc:opendocument:xmlns:dr3d:1.0" + , 'draw' : "urn:oasis:names:tc:opendocument:xmlns:drawing:1.0" + , 'fo' : "urn:oasis:names:tc:opendocument:xmlns:" + "xsl-fo-compatible:1.0" + , 'form' : "urn:oasis:names:tc:opendocument:xmlns:form:1.0" + , 'math' : "http://www.w3.org/1998/Math/MathML" + , 'meta' : "urn:oasis:names:tc:opendocument:xmlns:meta:1.0" + , 'number' : "urn:oasis:names:tc:opendocument:xmlns:datastyle:1.0" + , 'office' : "urn:oasis:names:tc:opendocument:xmlns:office:1.0" + , 'officeooo': "http://openoffice.org/2009/office" + , 'script' : "urn:oasis:names:tc:opendocument:xmlns:script:1.0" + , 'style' : "urn:oasis:names:tc:opendocument:xmlns:style:1.0" + , 'svg' : "urn:oasis:names:tc:opendocument:xmlns:svg-compatible:1.0" + , 'table' : "urn:oasis:names:tc:opendocument:xmlns:table:1.0" + , 'text' : "urn:oasis:names:tc:opendocument:xmlns:text:1.0" + , 'xlink' : "http://www.w3.org/1999/xlink" + , 'manifest' : "urn:oasis:names:tc:opendocument:xmlns:manifest:1.0" + , 'tableooo' : "http://openoffice.org/2009/table" + , 'transformation' : "http://www.w3.org/2003/g/data-view#" + # OOo 1.X tags and some others: + , 'ooo' : "http://openoffice.org/2004/office" + , 'ooow' : "http://openoffice.org/2004/writer" + , 'oooc' : "http://openoffice.org/2004/calc" + , 'o_dom' : "http://www.w3.org/2001/xml-events" + , 'o_xforms' : "http://www.w3.org/2002/xforms" + , 'xs' : "http://www.w3.org/2001/XMLSchema" + , 'xsi' : "http://www.w3.org/2001/XMLSchema-instance" + # predefined xml namespace, see + # http://www.w3.org/TR/2006/REC-xml-names11-20060816/ + # "It MAY, but need not, be declared, and MUST NOT be undeclared + # or bound to any other namespace name." + , 'xml' : "http://www.w3.org/XML/1998/namespace" + } + } + +for mimetype in namespace_by_name.itervalues () : + for k, v in mimetype.iteritems () : + if v in _namespace_map : + assert (_namespace_map [v] == k) + _namespace_map [v] = k + +class OOoElementTree (autosuper) : + """ + An ElementTree for OOo document XML members. Behaves like the + orginal ElementTree (in fact it delegates almost everything to a + real instance of ElementTree) except for the write method, that + writes itself back to the OOo XML file in the OOo zip archive it + came from. + """ + def __init__ (self, ooopy, zname, root) : + self.ooopy = ooopy + self.zname = zname + self.tree = ElementTree (root) + # end def __init__ + + def write (self) : + self.ooopy.write (self.zname, self.tree) + # end def write + + def __getattr__ (self, name) : + """ + Delegate everything to our ElementTree attribute. + """ + if not name.startswith ('__') : + result = getattr (self.tree, name) + setattr (self, name, result) + return result + raise AttributeError (name) + # end def __getattr__ + +# end class OOoElementTree + +class OOoPy (autosuper) : + """ + Wrapper for OpenOffice.org zip files (all OOo documents are + really zip files internally). + + from ooopy.OOoPy import OOoPy + >>> o = OOoPy (infile = 'testfiles/test.sxw', outfile = 'out.sxw') + >>> o.mimetype + 'application/vnd.sun.xml.writer' + >>> for f in files : + ... e = o.read (f) + ... e.write () + ... + >>> o.close () + >>> o = OOoPy (infile = 'testfiles/test.odt', outfile = 'out2.odt') + >>> o.mimetype + 'application/vnd.oasis.opendocument.text' + >>> for f in files : + ... e = o.read (f) + ... e.write () + ... + >>> o.append_file ('Pictures/empty', '') + >>> o.close () + >>> o = OOoPy (infile = 'out2.odt') + >>> for f in o.izip.infolist () : + ... print f.filename, f.create_system, f.compress_type + mimetype 0 8 + content.xml 0 8 + styles.xml 0 8 + meta.xml 0 8 + settings.xml 0 8 + META-INF/manifest.xml 0 8 + Pictures/empty 0 8 + Configurations2/statusbar/ 0 0 + Configurations2/accelerator/current.xml 0 8 + Configurations2/floater/ 0 0 + Configurations2/popupmenu/ 0 0 + Configurations2/progressbar/ 0 0 + Configurations2/menubar/ 0 0 + Configurations2/toolbar/ 0 0 + Configurations2/images/Bitmaps/ 0 0 + Thumbnails/thumbnail.png 0 8 + """ + def __init__ \ + ( self + , infile = None + , outfile = None + , write_mode = 'w' + , mimetype = None + ) : + """ + Open an OOo document, if no outfile is given, we open the + file read-only. Otherwise the outfile has to be different + from the infile -- the python ZipFile can't deal with + read-write access. In case an outfile is given, we open it + in "w" mode as a zip file, unless write_mode is specified + (the only allowed case would be "a" for appending to an + existing file, see pythons ZipFile documentation for + details). If no infile is given, the user is responsible for + providing all necessary files in the resulting output file. + + It seems that OOo needs to have the mimetype as the first + archive member (at least with mimetype as the first member + it works, the order may not be arbitrary) to recognize a zip + archive as an OOo file. When copying from a given infile, we + use the same order of elements in the resulting output. When + creating new elements we make sure the mimetype is the first + in the resulting archive. + + Note that both, infile and outfile can either be filenames + or file-like objects (e.g. StringIO). + + The mimetype is automatically determined if an infile is + given. If only writing is desired, the mimetype should be + set. + """ + assert (infile != outfile) + self.izip = self.ozip = None + if infile : + self.izip = ZipFile (infile, 'r', ZIP_DEFLATED) + if outfile : + self.ozip = ZipFile (outfile, write_mode, ZIP_DEFLATED) + self.written = {} + if mimetype : + self.mimetype = mimetype + elif self.izip : + self.mimetype = self.izip.read ('mimetype') + # end def __init__ + + def read (self, zname) : + """ + return an OOoElementTree object for the given OOo document + archive member name. Currently an OOo document contains the + following XML files:: + + * content.xml: the text of the OOo document + * styles.xml: style definitions + * meta.xml: meta-information (author, last changed, ...) + * settings.xml: settings in OOo + * META-INF/manifest.xml: contents of the archive + + There is an additional file "mimetype" that always contains + the string "application/vnd.sun.xml.writer" for OOo 1.X files + and the string "application/vnd.oasis.opendocument.text" for + OOo 2.X files. + """ + assert (self.izip) + return OOoElementTree (self, zname, fromstring (self.izip.read (zname))) + # end def read + + def _write (self, zname, str) : + now = datetime.utcnow ().timetuple () + info = ZipInfo (zname, date_time = now) + info.create_system = 0 # pretend to be fat + info.compress_type = ZIP_DEFLATED + self.ozip.writestr (info, str) + self.written [zname] = 1 + # end def _write + + def write (self, zname, etree) : + assert (self.ozip) + # assure mimetype is the first member in new archive + if 'mimetype' not in self.written : + self._write ('mimetype', self.mimetype) + str = StringIO () + etree.write (str) + self._write (zname, str.getvalue ()) + # end def write + + def append_file (self, zname, str) : + """ Official interface to _write: Append a file to the end of + the archive. + """ + if zname not in self.written : + self._write (zname, str) + # end def append_file + + def close (self) : + """ + Close the zip files. According to documentation of zipfile in + the standard python lib, this has to be done to be sure + everything is written. We copy over the not-yet written files + from izip before closing ozip. + """ + if self.izip and self.ozip : + for f in self.izip.infolist () : + if f.filename not in self.written : + self.ozip.writestr (f, self.izip.read (f.filename)) + for i in self.izip, self.ozip : + if i : i.close () + self.izip = self.ozip = None + # end def close + + __del__ = close # auto-close on deletion of object +# end class OOoPy diff --git a/ooopy/Transformer.py b/ooopy/Transformer.py new file mode 100644 index 000000000..dbbab125d --- /dev/null +++ b/ooopy/Transformer.py @@ -0,0 +1,1397 @@ +#!/usr/bin/env python +# -*- coding: iso-8859-1 -*- +# Copyright (C) 2005-14 Dr. Ralf Schlatterbeck Open Source Consulting. +# Reichergasse 131, A-3411 Weidling. +# Web: http://www.runtux.com Email: office@runtux.com +# All rights reserved +# **************************************************************************** +# +# This library is free software; you can redistribute it and/or modify +# it under the terms of the GNU Library General Public License as +# published by the Free Software Foundation; either version 2 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Library General Public License for more details. +# +# You should have received a copy of the GNU Library General Public +# License along with this program; if not, write to the Free Software +# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. +# **************************************************************************** + +from __future__ import absolute_import + +import time +import re +try : + from xml.etree.ElementTree import dump, SubElement, Element, tostring + from xml.etree.ElementTree import _namespace_map +except ImportError : + from elementtree.ElementTree import dump, SubElement, Element, tostring + from elementtree.ElementTree import _namespace_map +from copy import deepcopy +from ooopy.OOoPy import OOoPy, autosuper +from ooopy.OOoPy import files, mimetypes, namespace_by_name +from ooopy.Version import VERSION + +def OOo_Tag (namespace, name, mimetype) : + """Return combined XML tag + + >>> OOo_Tag ('xml', 'id', mimetypes [1]) + '{http://www.w3.org/XML/1998/namespace}id' + >>> OOo_Tag ('text', 'list', mimetypes [1]) + '{urn:oasis:names:tc:opendocument:xmlns:text:1.0}list' + """ + return "{%s}%s" % (namespace_by_name [mimetype][namespace], name) +# end def OOo_Tag + +def split_tag (tag) : + """ Split tag into symbolic namespace and name part -- inverse + operation of OOo_Tag. + """ + ns, t = tag.split ('}') + return (_namespace_map [ns [1:]], t) +# end def split_tag + +class Transform (autosuper) : + """ + Base class for individual transforms on OOo files. An individual + transform needs a filename variable for specifying the OOo file + the transform should be applied to and an optional prio. + Individual transforms are applied according to their prio + setting, higher prio means later application of a transform. + + The filename variable must specify one of the XML files which are + part of the OOo document (see files variable above). As + the names imply, content.xml contains the contents of the + document (text and ad-hoc style definitions), styles.xml contains + the style definitions, meta.xml contains meta information like + author, editing time, etc. and settings.xml is used to store + OOo's settings (menu Tools->Configure). + """ + prio = 100 + textbody_names = \ + { mimetypes [0] : 'body' + , mimetypes [1] : 'text' + } + paragraph_props = \ + { mimetypes [0] : 'properties' + , mimetypes [1] : 'paragraph-properties' + } + font_decls = \ + { mimetypes [0] : 'font-decls' + , mimetypes [1] : 'font-face-decls' + } + + def __init__ (self, prio = None, transformer = None) : + if prio is not None : + self.prio = prio + self.transformer = None + if transformer : + self.register (transformer) + # end def __init__ + + def apply (self, root) : + """ Apply myself to the element given as root """ + raise NotImplementedError, 'derived transforms must implement "apply"' + # end def apply + + def apply_all (self, trees) : + """ Apply myself to all the files given in trees. The variable + trees contains a dictionary of ElementTree indexed by the + name of the OOo File. + The standard case is that only one file (namely + self.filename) is used. + """ + assert (self.filename) + self.apply (trees [self.filename].getroot ()) + # end def apply_all + + def find_tbody (self, root) : + """ Find the node which really contains the text -- different + for different OOo versions. + """ + tbody = root + if tbody.tag != self.textbody_tag : + tbody = tbody.find ('.//' + self.textbody_tag) + return tbody + # end def find_tbody + + def register (self, transformer) : + """ Registering with a transformer means being able to access + variables stored in the tranformer by other transforms. + + Also needed for tag-computation: The transformer knows which + version of OOo document we are processing. + """ + self.transformer = transformer + mt = self.mimetype = transformer.mimetype + self.textbody_name = self.textbody_names [mt] + self.paragraph_props = self.paragraph_props [mt] + self.properties_tag = self.oootag ('style', self.paragraph_props) + self.textbody_tag = self.oootag ('office', self.textbody_name) + self.font_decls_tag = self.oootag ('office', self.font_decls [mt]) + # end def register + + def oootag (self, namespace, name) : + """ Compute long tag version """ + return OOo_Tag (namespace, name, self.mimetype) + # end def oootag + + def set (self, variable, value) : + """ Set variable in our transformer using naming convention. """ + self.transformer [self._varname (variable)] = value + # end def set + + def _varname (self, name) : + """ For fulfilling the naming convention of the transformer + dictionary (every entry in this dictionary should be prefixed + with the class name of the transform) we have this + convenience method. + Returns variable name prefixed with own class name. + """ + return ":".join ((self.__class__.__name__, name)) + # end def _varname + +# end class Transform + +class Transformer (autosuper) : + """ + Class for applying a set of transforms to a given ooopy object. + The transforms are applied to the specified file in priority + order. When applying transforms we have a mechanism for + communication of transforms. We give the transformer to the + individual transforms as a parameter. The transforms may use the + transformer like a dictionary for storing values and retrieving + values left by previous transforms. + As a naming convention each transform should use its class name + as a prefix for storing values in the dictionary. + >>> import Transforms + >>> from Transforms import renumber_all, get_meta, set_meta, meta_counts + >>> try : + ... from io import StringIO, BytesIO + ... StringIO = BytesIO + ... except ImportError : + ... from StringIO import StringIO + >>> sio = BytesIO () + >>> o = OOoPy (infile = 'testfiles/test.sxw', outfile = sio) + >>> m = o.mimetype + >>> c = o.read ('content.xml') + >>> body = c.find (OOo_Tag ('office', 'body', mimetype = m)) + >>> body [-1].get (OOo_Tag ('text', 'style-name', mimetype = m)) + 'Standard' + >>> def cb (name) : + ... r = { 'street' : 'Beispielstrasse 42' + ... , 'firstname' : 'Hugo' + ... , 'salutation' : 'Frau' + ... } + ... if r.has_key (name) : return r [name] + ... return None + ... + >>> p = get_meta (m) + >>> t = Transformer (m, p) + >>> t ['a'] = 'a' + >>> t ['a'] + 'a' + >>> t.transform (o) + >>> p.set ('a', 'b') + >>> t ['Attribute_Access:a'] + 'b' + >>> t = Transformer ( + ... m + ... , Transforms.Autoupdate () + ... , Transforms.Editinfo () + ... , Transforms.Field_Replace (prio = 99, replace = cb) + ... , Transforms.Field_Replace + ... ( replace = + ... { 'salutation' : '' + ... , 'firstname' : 'Erika' + ... , 'lastname' : 'Musterfrau' + ... , 'country' : 'D' + ... , 'postalcode' : '00815' + ... , 'city' : 'Niemandsdorf' + ... } + ... ) + ... , Transforms.Addpagebreak_Style () + ... , Transforms.Addpagebreak () + ... ) + >>> t.transform (o) + >>> o.close () + >>> ov = sio.getvalue () + >>> f = open ("testout.sxw", "wb") + >>> f.write (ov) + >>> f.close () + >>> o = OOoPy (infile = sio) + >>> c = o.read ('content.xml') + >>> m = o.mimetype + >>> body = c.find (OOo_Tag ('office', 'body', mimetype = m)) + >>> vset = './/' + OOo_Tag ('text', 'variable-set', mimetype = m) + >>> for node in body.findall (vset) : + ... name = node.get (OOo_Tag ('text', 'name', m)) + ... print name, ':', node.text + salutation : None + firstname : Erika + lastname : Musterfrau + street : Beispielstrasse 42 + country : D + postalcode : 00815 + city : Niemandsdorf + salutation : None + firstname : Erika + lastname : Musterfrau + street : Beispielstrasse 42 + country : D + postalcode : 00815 + city : Niemandsdorf + >>> body [-1].get (OOo_Tag ('text', 'style-name', mimetype = m)) + 'P2' + >>> sio = StringIO () + >>> o = OOoPy (infile = 'testfiles/test.sxw', outfile = sio) + >>> c = o.read ('content.xml') + >>> t = Transformer ( + ... o.mimetype + ... , get_meta (o.mimetype) + ... , Transforms.Addpagebreak_Style () + ... , Transforms.Mailmerge + ... ( iterator = + ... ( dict (firstname = 'Erika', lastname = 'Nobody') + ... , dict (firstname = 'Eric', lastname = 'Wizard') + ... , cb + ... ) + ... ) + ... , renumber_all (o.mimetype) + ... , set_meta (o.mimetype) + ... , Transforms.Fix_OOo_Tag () + ... ) + >>> t.transform (o) + >>> for i in meta_counts : + ... print i, t [':'.join (('Set_Attribute', i))] + character-count 951 + image-count 0 + object-count 0 + page-count 3 + paragraph-count 113 + table-count 3 + word-count 162 + >>> name = t ['Addpagebreak_Style:stylename'] + >>> name + 'P2' + >>> o.close () + >>> ov = sio.getvalue () + >>> f = open ("testout2.sxw", "wb") + >>> f.write (ov) + >>> f.close () + >>> o = OOoPy (infile = sio) + >>> m = o.mimetype + >>> c = o.read ('content.xml') + >>> body = c.find (OOo_Tag ('office', 'body', m)) + >>> for n in body.findall ('.//*') : + ... zidx = n.get (OOo_Tag ('draw', 'z-index', m)) + ... if zidx : + ... print ':'.join(split_tag (n.tag)), zidx + draw:text-box 0 + draw:rect 1 + draw:text-box 3 + draw:rect 4 + draw:text-box 6 + draw:rect 7 + draw:text-box 2 + draw:text-box 5 + draw:text-box 8 + >>> for n in body.findall ('.//' + OOo_Tag ('text', 'p', m)) : + ... if n.get (OOo_Tag ('text', 'style-name', m)) == name : + ... print n.tag + {http://openoffice.org/2000/text}p + {http://openoffice.org/2000/text}p + >>> vset = './/' + OOo_Tag ('text', 'variable-set', m) + >>> for n in body.findall (vset) : + ... if n.get (OOo_Tag ('text', 'name', m), None).endswith ('name') : + ... name = n.get (OOo_Tag ('text', 'name', m)) + ... print name, ':', n.text + firstname : Erika + lastname : Nobody + firstname : Eric + lastname : Wizard + firstname : Hugo + lastname : Testman + firstname : Erika + lastname : Nobody + firstname : Eric + lastname : Wizard + firstname : Hugo + lastname : Testman + >>> for n in body.findall ('.//' + OOo_Tag ('draw', 'text-box', m)) : + ... print n.get (OOo_Tag ('draw', 'name', m)), + ... print n.get (OOo_Tag ('text', 'anchor-page-number', m)) + Frame1 1 + Frame2 2 + Frame3 3 + Frame4 None + Frame5 None + Frame6 None + >>> for n in body.findall ('.//' + OOo_Tag ('text', 'section', m)) : + ... print n.get (OOo_Tag ('text', 'name', m)) + Section1 + Section2 + Section3 + Section4 + Section5 + Section6 + Section7 + Section8 + Section9 + Section10 + Section11 + Section12 + Section13 + Section14 + Section15 + Section16 + Section17 + Section18 + >>> for n in body.findall ('.//' + OOo_Tag ('table', 'table', m)) : + ... print n.get (OOo_Tag ('table', 'name', m)) + Table1 + Table2 + Table3 + >>> r = o.read ('meta.xml') + >>> meta = r.find ('.//' + OOo_Tag ('meta', 'document-statistic', m)) + >>> for i in meta_counts : + ... print i, repr (meta.get (OOo_Tag ('meta', i, m))) + character-count '951' + image-count '0' + object-count '0' + page-count '3' + paragraph-count '113' + table-count '3' + word-count '162' + >>> o.close () + >>> sio = StringIO () + >>> o = OOoPy (infile = 'testfiles/test.sxw', outfile = sio) + >>> tf = ('testfiles/test.sxw', 'testfiles/rechng.sxw') + >>> t = Transformer ( + ... o.mimetype + ... , get_meta (o.mimetype) + ... , Transforms.Concatenate (*tf) + ... , renumber_all (o.mimetype) + ... , set_meta (o.mimetype) + ... , Transforms.Fix_OOo_Tag () + ... ) + >>> t.transform (o) + >>> for i in meta_counts : + ... print i, repr (t [':'.join (('Set_Attribute', i))]) + character-count '1131' + image-count '0' + object-count '0' + page-count '3' + paragraph-count '168' + table-count '2' + word-count '160' + >>> o.close () + >>> ov = sio.getvalue () + >>> f = open ("testout3.sxw", "wb") + >>> f.write (ov) + >>> f.close () + >>> o = OOoPy (infile = sio) + >>> m = o.mimetype + >>> c = o.read ('content.xml') + >>> s = o.read ('styles.xml') + >>> for n in c.findall ('./*/*') : + ... name = n.get (OOo_Tag ('style', 'name', m)) + ... if name : + ... parent = n.get (OOo_Tag ('style', 'parent-style-name', m)) + ... print '"%s", "%s"' % (name, parent) + "Tahoma1", "None" + "Bitstream Vera Sans", "None" + "Tahoma", "None" + "Nimbus Roman No9 L", "None" + "Courier New", "None" + "Arial Black", "None" + "New Century Schoolbook", "None" + "Helvetica", "None" + "Table1", "None" + "Table1.A", "None" + "Table1.A1", "None" + "Table1.E1", "None" + "Table1.A2", "None" + "Table1.E2", "None" + "P1", "None" + "fr1", "Frame" + "fr2", "None" + "fr3", "Frame" + "Sect1", "None" + "gr1", "None" + "P2", "Standard" + "Standard_Concat", "None" + "Concat_P1", "Concat_Frame contents" + "Concat_P2", "Concat_Frame contents" + "P3", "Concat_Frame contents" + "P4", "Concat_Frame contents" + "P5", "Concat_Standard" + "P6", "Concat_Standard" + "P7", "Concat_Frame contents" + "P8", "Concat_Frame contents" + "P9", "Concat_Frame contents" + "P10", "Concat_Frame contents" + "P11", "Concat_Frame contents" + "P12", "Concat_Frame contents" + "P13", "Concat_Frame contents" + "P15", "Concat_Standard" + "P16", "Concat_Standard" + "P17", "Concat_Standard" + "P18", "Concat_Standard" + "P19", "Concat_Standard" + "P20", "Concat_Standard" + "P21", "Concat_Standard" + "P22", "Concat_Standard" + "P23", "Concat_Standard" + "T1", "None" + "Concat_fr1", "Concat_Frame" + "Concat_fr2", "Concat_Frame" + "Concat_fr3", "Concat_Frame" + "fr4", "Concat_Frame" + "fr5", "Concat_Frame" + "fr6", "Concat_Frame" + "Concat_Sect1", "None" + "N0", "None" + "N2", "None" + "P15_Concat", "Concat_Standard" + >>> for n in s.findall ('./*/*') : + ... name = n.get (OOo_Tag ('style', 'name', m)) + ... if name : + ... parent = n.get (OOo_Tag ('style', 'parent-style-name', m)) + ... print '"%s", "%s"' % (name, parent) + "Tahoma1", "None" + "Bitstream Vera Sans", "None" + "Tahoma", "None" + "Nimbus Roman No9 L", "None" + "Courier New", "None" + "Arial Black", "None" + "New Century Schoolbook", "None" + "Helvetica", "None" + "Standard", "None" + "Text body", "Standard" + "List", "Text body" + "Table Contents", "Text body" + "Table Heading", "Table Contents" + "Caption", "Standard" + "Frame contents", "Text body" + "Index", "Standard" + "Frame", "None" + "OLE", "None" + "Concat_Standard", "None" + "Concat_Text body", "Concat_Standard" + "Concat_List", "Concat_Text body" + "Concat_Caption", "Concat_Standard" + "Concat_Frame contents", "Concat_Text body" + "Concat_Index", "Concat_Standard" + "Horizontal Line", "Concat_Standard" + "Internet link", "None" + "Visited Internet Link", "None" + "Concat_Frame", "None" + "Concat_OLE", "None" + "pm1", "None" + "Concat_pm1", "None" + "Standard", "None" + "Concat_Standard", "None" + >>> for n in c.findall ('.//' + OOo_Tag ('text', 'variable-decl', m)) : + ... name = n.get (OOo_Tag ('text', 'name', m)) + ... print name + salutation + firstname + lastname + street + country + postalcode + city + date + invoice.invoice_no + invoice.abo.aboprice.abotype.description + address.salutation + address.title + address.firstname + address.lastname + address.function + address.street + address.country + address.postalcode + address.city + invoice.subscriber.salutation + invoice.subscriber.title + invoice.subscriber.firstname + invoice.subscriber.lastname + invoice.subscriber.function + invoice.subscriber.street + invoice.subscriber.country + invoice.subscriber.postalcode + invoice.subscriber.city + invoice.period_start + invoice.period_end + invoice.currency.name + invoice.amount + invoice.subscriber.initial + >>> for n in c.findall ('.//' + OOo_Tag ('text', 'sequence-decl', m)) : + ... name = n.get (OOo_Tag ('text', 'name', m)) + ... print name + Illustration + Table + Text + Drawing + >>> for n in c.findall ('.//' + OOo_Tag ('text', 'p', m)) : + ... name = n.get (OOo_Tag ('text', 'style-name', m)) + ... if not name or name.startswith ('Concat') : + ... print ">%s<" % name + >Concat_P1< + >Concat_P2< + >Concat_Frame contents< + >>> for n in c.findall ('.//' + OOo_Tag ('draw', 'text-box', m)) : + ... attrs = 'name', 'style-name', 'z-index' + ... attrs = [n.get (OOo_Tag ('draw', i, m)) for i in attrs] + ... attrs.append (n.get (OOo_Tag ('text', 'anchor-page-number', m))) + ... print attrs + ['Frame1', 'fr1', '0', '1'] + ['Frame2', 'fr1', '3', '2'] + ['Frame3', 'Concat_fr1', '6', '3'] + ['Frame4', 'Concat_fr2', '7', '3'] + ['Frame5', 'Concat_fr3', '8', '3'] + ['Frame6', 'Concat_fr1', '9', '3'] + ['Frame7', 'fr4', '10', '3'] + ['Frame8', 'fr4', '11', '3'] + ['Frame9', 'fr4', '12', '3'] + ['Frame10', 'fr4', '13', '3'] + ['Frame11', 'fr4', '14', '3'] + ['Frame12', 'fr4', '15', '3'] + ['Frame13', 'fr5', '16', '3'] + ['Frame14', 'fr4', '18', '3'] + ['Frame15', 'fr4', '19', '3'] + ['Frame16', 'fr4', '20', '3'] + ['Frame17', 'fr6', '17', '3'] + ['Frame18', 'fr4', '23', '3'] + ['Frame19', 'fr3', '2', None] + ['Frame20', 'fr3', '5', None] + >>> for n in c.findall ('.//' + OOo_Tag ('text', 'section', m)) : + ... attrs = 'name', 'style-name' + ... attrs = [n.get (OOo_Tag ('text', i, m)) for i in attrs] + ... print attrs + ['Section1', 'Sect1'] + ['Section2', 'Sect1'] + ['Section3', 'Sect1'] + ['Section4', 'Sect1'] + ['Section5', 'Sect1'] + ['Section6', 'Sect1'] + ['Section7', 'Concat_Sect1'] + ['Section8', 'Concat_Sect1'] + ['Section9', 'Concat_Sect1'] + ['Section10', 'Concat_Sect1'] + ['Section11', 'Concat_Sect1'] + ['Section12', 'Concat_Sect1'] + ['Section13', 'Concat_Sect1'] + ['Section14', 'Concat_Sect1'] + ['Section15', 'Concat_Sect1'] + ['Section16', 'Concat_Sect1'] + ['Section17', 'Concat_Sect1'] + ['Section18', 'Concat_Sect1'] + ['Section19', 'Concat_Sect1'] + ['Section20', 'Concat_Sect1'] + ['Section21', 'Concat_Sect1'] + ['Section22', 'Concat_Sect1'] + ['Section23', 'Concat_Sect1'] + ['Section24', 'Concat_Sect1'] + ['Section25', 'Concat_Sect1'] + ['Section26', 'Concat_Sect1'] + ['Section27', 'Concat_Sect1'] + ['Section28', 'Sect1'] + ['Section29', 'Sect1'] + ['Section30', 'Sect1'] + ['Section31', 'Sect1'] + ['Section32', 'Sect1'] + ['Section33', 'Sect1'] + >>> for n in c.findall ('.//' + OOo_Tag ('draw', 'rect', m)) : + ... attrs = 'style-name', 'text-style-name', 'z-index' + ... attrs = [n.get (OOo_Tag ('draw', i, m)) for i in attrs] + ... attrs.append (n.get (OOo_Tag ('text', 'anchor-page-number', m))) + ... print attrs + ['gr1', 'P1', '1', '1'] + ['gr1', 'P1', '4', '2'] + >>> for n in c.findall ('.//' + OOo_Tag ('draw', 'line', m)) : + ... attrs = 'style-name', 'text-style-name', 'z-index' + ... attrs = [n.get (OOo_Tag ('draw', i, m)) for i in attrs] + ... print attrs + ['gr1', 'P1', '24'] + ['gr1', 'P1', '22'] + ['gr1', 'P1', '21'] + >>> for n in s.findall ('.//' + OOo_Tag ('style', 'style', m)) : + ... if n.get (OOo_Tag ('style', 'name', m)).startswith ('Co') : + ... attrs = 'name', 'class', 'family' + ... attrs = [n.get (OOo_Tag ('style', i, m)) for i in attrs] + ... print attrs + ... props = n.find ('./' + OOo_Tag ('style', 'properties', m)) + ... if props is not None and len (props) : + ... props [0].tag + ['Concat_Standard', 'text', 'paragraph'] + '{http://openoffice.org/2000/style}tab-stops' + ['Concat_Text body', 'text', 'paragraph'] + ['Concat_List', 'list', 'paragraph'] + ['Concat_Caption', 'extra', 'paragraph'] + ['Concat_Frame contents', 'extra', 'paragraph'] + ['Concat_Index', 'index', 'paragraph'] + ['Concat_Frame', None, 'graphics'] + ['Concat_OLE', None, 'graphics'] + >>> for n in c.findall ('.//*') : + ... zidx = n.get (OOo_Tag ('draw', 'z-index', m)) + ... if zidx : + ... print ':'.join(split_tag (n.tag)), zidx + draw:text-box 0 + draw:rect 1 + draw:text-box 3 + draw:rect 4 + draw:text-box 6 + draw:text-box 7 + draw:text-box 8 + draw:text-box 9 + draw:text-box 10 + draw:text-box 11 + draw:text-box 12 + draw:text-box 13 + draw:text-box 14 + draw:text-box 15 + draw:text-box 16 + draw:text-box 18 + draw:text-box 19 + draw:text-box 20 + draw:text-box 17 + draw:text-box 23 + draw:line 24 + draw:text-box 2 + draw:text-box 5 + draw:line 22 + draw:line 21 + >>> sio = StringIO () + >>> o = OOoPy (infile = 'testfiles/carta.stw', outfile = sio) + >>> t = Transformer ( + ... o.mimetype + ... , get_meta (o.mimetype) + ... , Transforms.Addpagebreak_Style () + ... , Transforms.Mailmerge + ... ( iterator = + ... ( dict + ... ( Spett = "Spettabile" + ... , contraente = "First person" + ... , indirizzo = "street? 1" + ... , tipo = "racc. A.C." + ... , luogo = "Varese" + ... , oggetto = "Saluti" + ... ) + ... , dict + ... ( Spett = "Egregio" + ... , contraente = "Second Person" + ... , indirizzo = "street? 2" + ... , tipo = "Raccomandata" + ... , luogo = "Gavirate" + ... , oggetto = "Ossequi" + ... ) + ... ) + ... ) + ... , renumber_all (o.mimetype) + ... , set_meta (o.mimetype) + ... , Transforms.Fix_OOo_Tag () + ... ) + >>> t.transform(o) + >>> o.close() + >>> ov = sio.getvalue () + >>> f = open ("carta-out.stw", "wb") + >>> f.write (ov) + >>> f.close () + >>> o = OOoPy (infile = sio) + >>> m = o.mimetype + >>> c = o.read ('content.xml') + >>> body = c.find (OOo_Tag ('office', 'body', mimetype = m)) + >>> vset = './/' + OOo_Tag ('text', 'variable-set', mimetype = m) + >>> for node in body.findall (vset) : + ... name = node.get (OOo_Tag ('text', 'name', m)) + ... print name, ':', node.text + Spett : Spettabile + contraente : First person + indirizzo : street? 1 + Spett : Egregio + contraente : Second Person + indirizzo : street? 2 + tipo : racc. A.C. + luogo : Varese + oggetto : Saluti + tipo : Raccomandata + luogo : Gavirate + oggetto : Ossequi + >>> sio = StringIO () + >>> o = OOoPy (infile = 'testfiles/test.odt', outfile = sio) + >>> t = Transformer ( + ... o.mimetype + ... , get_meta (o.mimetype) + ... , Transforms.Addpagebreak_Style () + ... , Transforms.Mailmerge + ... ( iterator = + ... ( dict (firstname = 'Erika', lastname = 'Nobody') + ... , dict (firstname = 'Eric', lastname = 'Wizard') + ... , cb + ... ) + ... ) + ... , renumber_all (o.mimetype) + ... , set_meta (o.mimetype) + ... , Transforms.Fix_OOo_Tag () + ... ) + >>> t.transform (o) + >>> for i in meta_counts : + ... print i, t [':'.join (('Set_Attribute', i))] + character-count 951 + image-count 0 + object-count 0 + page-count 3 + paragraph-count 53 + table-count 3 + word-count 162 + >>> name = t ['Addpagebreak_Style:stylename'] + >>> name + 'P2' + >>> o.close () + >>> ov = sio.getvalue () + >>> f = open ("testout.odt", "wb") + >>> f.write (ov) + >>> f.close () + >>> o = OOoPy (infile = sio) + >>> m = o.mimetype + >>> c = o.read ('content.xml') + >>> body = c.find (OOo_Tag ('office', 'body', m)) + >>> for n in body.findall ('.//*') : + ... zidx = n.get (OOo_Tag ('draw', 'z-index', m)) + ... if zidx : + ... print ':'.join(split_tag (n.tag)), zidx + draw:frame 0 + draw:rect 1 + draw:frame 3 + draw:rect 4 + draw:frame 6 + draw:rect 7 + draw:frame 2 + draw:frame 5 + draw:frame 8 + >>> for n in body.findall ('.//' + OOo_Tag ('text', 'p', m)) : + ... if n.get (OOo_Tag ('text', 'style-name', m)) == name : + ... print n.tag + {urn:oasis:names:tc:opendocument:xmlns:text:1.0}p + {urn:oasis:names:tc:opendocument:xmlns:text:1.0}p + >>> vset = './/' + OOo_Tag ('text', 'variable-set', m) + >>> for n in body.findall (vset) : + ... if n.get (OOo_Tag ('text', 'name', m), None).endswith ('name') : + ... name = n.get (OOo_Tag ('text', 'name', m)) + ... print name, ':', n.text + firstname : Erika + lastname : Nobody + firstname : Eric + lastname : Wizard + firstname : Hugo + lastname : Testman + firstname : Erika + lastname : Nobody + firstname : Eric + lastname : Wizard + firstname : Hugo + lastname : Testman + >>> for n in body.findall ('.//' + OOo_Tag ('draw', 'frame', m)) : + ... print n.get (OOo_Tag ('draw', 'name', m)), + ... print n.get (OOo_Tag ('text', 'anchor-page-number', m)) + Frame1 1 + Frame2 2 + Frame3 3 + Frame4 None + Frame5 None + Frame6 None + >>> for n in body.findall ('.//' + OOo_Tag ('text', 'section', m)) : + ... print n.get (OOo_Tag ('text', 'name', m)) + Section1 + Section2 + Section3 + Section4 + Section5 + Section6 + Section7 + Section8 + Section9 + Section10 + Section11 + Section12 + Section13 + Section14 + Section15 + Section16 + Section17 + Section18 + >>> for n in body.findall ('.//' + OOo_Tag ('table', 'table', m)) : + ... print n.get (OOo_Tag ('table', 'name', m)) + Table1 + Table2 + Table3 + >>> r = o.read ('meta.xml') + >>> meta = r.find ('.//' + OOo_Tag ('meta', 'document-statistic', m)) + >>> for i in meta_counts : + ... print i, repr (meta.get (OOo_Tag ('meta', i, m))) + character-count '951' + image-count '0' + object-count '0' + page-count '3' + paragraph-count '53' + table-count '3' + word-count '162' + >>> o.close () + >>> sio = StringIO () + >>> o = OOoPy (infile = 'testfiles/carta.odt', outfile = sio) + >>> t = Transformer ( + ... o.mimetype + ... , get_meta (o.mimetype) + ... , Transforms.Addpagebreak_Style () + ... , Transforms.Mailmerge + ... ( iterator = + ... ( dict + ... ( Spett = "Spettabile" + ... , contraente = "First person" + ... , indirizzo = "street? 1" + ... , tipo = "racc. A.C." + ... , luogo = "Varese" + ... , oggetto = "Saluti" + ... ) + ... , dict + ... ( Spett = "Egregio" + ... , contraente = "Second Person" + ... , indirizzo = "street? 2" + ... , tipo = "Raccomandata" + ... , luogo = "Gavirate" + ... , oggetto = "Ossequi" + ... ) + ... ) + ... ) + ... , renumber_all (o.mimetype) + ... , set_meta (o.mimetype) + ... , Transforms.Fix_OOo_Tag () + ... ) + >>> t.transform(o) + >>> o.close() + >>> ov = sio.getvalue () + >>> f = open ("carta-out.odt", "wb") + >>> f.write (ov) + >>> f.close () + >>> o = OOoPy (infile = sio) + >>> m = o.mimetype + >>> c = o.read ('content.xml') + >>> body = c.find (OOo_Tag ('office', 'body', mimetype = m)) + >>> vset = './/' + OOo_Tag ('text', 'variable-set', mimetype = m) + >>> for node in body.findall (vset) : + ... name = node.get (OOo_Tag ('text', 'name', m)) + ... print name, ':', node.text + Spett : Spettabile + contraente : First person + indirizzo : street? 1 + Spett : Egregio + contraente : Second Person + indirizzo : street? 2 + tipo : racc. A.C. + luogo : Varese + oggetto : Saluti + tipo : Raccomandata + luogo : Gavirate + oggetto : Ossequi + >>> sio = StringIO () + >>> o = OOoPy (infile = 'testfiles/test.odt', outfile = sio) + >>> tf = ('testfiles/test.odt', 'testfiles/rechng.odt') + >>> t = Transformer ( + ... o.mimetype + ... , get_meta (o.mimetype) + ... , Transforms.Concatenate (*tf) + ... , renumber_all (o.mimetype) + ... , set_meta (o.mimetype) + ... , Transforms.Fix_OOo_Tag () + ... ) + >>> t.transform (o) + >>> for i in meta_counts : + ... print i, repr (t [':'.join (('Set_Attribute', i))]) + character-count '1131' + image-count '0' + object-count '0' + page-count '3' + paragraph-count '80' + table-count '2' + word-count '159' + >>> o.close () + >>> ov = sio.getvalue () + >>> f = open ("testout3.odt", "wb") + >>> f.write (ov) + >>> f.close () + >>> o = OOoPy (infile = sio) + >>> m = o.mimetype + >>> c = o.read ('content.xml') + >>> s = o.read ('styles.xml') + >>> for n in c.findall ('./*/*') : + ... name = n.get (OOo_Tag ('style', 'name', m)) + ... if name : + ... parent = n.get (OOo_Tag ('style', 'parent-style-name', m)) + ... print '"%s", "%s"' % (name, parent) + "Tahoma1", "None" + "Bitstream Vera Sans", "None" + "Tahoma", "None" + "Nimbus Roman No9 L", "None" + "Courier New", "None" + "Arial Black", "None" + "New Century Schoolbook", "None" + "Times New Roman", "None" + "Arial", "None" + "Helvetica", "None" + "Table1", "None" + "Table1.A", "None" + "Table1.A1", "None" + "Table1.E1", "None" + "Table1.A2", "None" + "Table1.E2", "None" + "P1", "None" + "fr1", "Frame" + "fr2", "Frame" + "Sect1", "None" + "gr1", "None" + "P2", "Standard" + "Standard_Concat", "None" + "Concat_P1", "Concat_Frame_20_contents" + "Concat_P2", "Concat_Frame_20_contents" + "P3", "Concat_Frame_20_contents" + "P4", "Concat_Standard" + "P5", "Concat_Standard" + "P6", "Concat_Frame_20_contents" + "P7", "Concat_Frame_20_contents" + "P8", "Concat_Frame_20_contents" + "P9", "Concat_Frame_20_contents" + "P10", "Concat_Frame_20_contents" + "P11", "Concat_Frame_20_contents" + "P12", "Concat_Frame_20_contents" + "P14", "Concat_Standard" + "P15", "Concat_Standard" + "P16", "Concat_Standard" + "P17", "Concat_Standard" + "P18", "Concat_Standard" + "P19", "Concat_Standard" + "P20", "Concat_Standard" + "P21", "Concat_Standard" + "P22", "Concat_Standard" + "P23", "Concat_Standard" + "Concat_fr1", "Frame" + "Concat_fr2", "Frame" + "fr3", "Frame" + "fr4", "Frame" + "fr5", "Frame" + "fr6", "Frame" + "Concat_gr1", "None" + "N0", "None" + "N2", "None" + "P14_Concat", "Concat_Standard" + >>> for n in c.findall ('.//' + OOo_Tag ('text', 'variable-decl', m)) : + ... name = n.get (OOo_Tag ('text', 'name', m)) + ... print name + salutation + firstname + lastname + street + country + postalcode + city + date + invoice.invoice_no + invoice.abo.aboprice.abotype.description + address.salutation + address.title + address.firstname + address.lastname + address.function + address.street + address.country + address.postalcode + address.city + invoice.subscriber.salutation + invoice.subscriber.title + invoice.subscriber.firstname + invoice.subscriber.lastname + invoice.subscriber.function + invoice.subscriber.street + invoice.subscriber.country + invoice.subscriber.postalcode + invoice.subscriber.city + invoice.period_start + invoice.period_end + invoice.currency.name + invoice.amount + invoice.subscriber.initial + >>> for n in c.findall ('.//' + OOo_Tag ('text', 'sequence-decl', m)) : + ... name = n.get (OOo_Tag ('text', 'name', m)) + ... print name + Illustration + Table + Text + Drawing + >>> for n in c.findall ('.//' + OOo_Tag ('text', 'p', m)) : + ... name = n.get (OOo_Tag ('text', 'style-name', m)) + ... if not name or name.startswith ('Concat') : + ... print ':'.join(split_tag (n.tag)), ">%s<" % name + text:p >None< + text:p >None< + text:p >Concat_P1< + text:p >Concat_P1< + text:p >Concat_P2< + text:p >Concat_P2< + text:p >Concat_P2< + text:p >Concat_P2< + text:p >Concat_P2< + text:p >Concat_P2< + text:p >Concat_P2< + text:p >Concat_P2< + text:p >Concat_P2< + text:p >Concat_P2< + text:p >Concat_Frame_20_contents< + text:p >None< + text:p >None< + text:p >None< + >>> for n in c.findall ('.//' + OOo_Tag ('draw', 'frame', m)) : + ... attrs = 'name', 'style-name', 'z-index' + ... attrs = [n.get (OOo_Tag ('draw', i, m)) for i in attrs] + ... attrs.append (n.get (OOo_Tag ('text', 'anchor-page-number', m))) + ... print attrs + ['Frame1', 'fr1', '0', '1'] + ['Frame2', 'fr1', '3', '2'] + ['Frame3', 'Concat_fr1', '6', '3'] + ['Frame4', 'Concat_fr2', '7', '3'] + ['Frame5', 'fr3', '8', '3'] + ['Frame6', 'Concat_fr1', '9', '3'] + ['Frame7', 'fr4', '10', '3'] + ['Frame8', 'fr4', '11', '3'] + ['Frame9', 'fr4', '12', '3'] + ['Frame10', 'fr4', '13', '3'] + ['Frame11', 'fr4', '14', '3'] + ['Frame12', 'fr4', '15', '3'] + ['Frame13', 'fr5', '16', '3'] + ['Frame14', 'fr4', '18', '3'] + ['Frame15', 'fr4', '19', '3'] + ['Frame16', 'fr4', '20', '3'] + ['Frame17', 'fr6', '17', '3'] + ['Frame18', 'fr4', '23', '3'] + ['Frame19', 'fr2', '2', None] + ['Frame20', 'fr2', '5', None] + >>> for n in c.findall ('.//' + OOo_Tag ('text', 'section', m)) : + ... attrs = 'name', 'style-name' + ... attrs = [n.get (OOo_Tag ('text', i, m)) for i in attrs] + ... print attrs + ['Section1', 'Sect1'] + ['Section2', 'Sect1'] + ['Section3', 'Sect1'] + ['Section4', 'Sect1'] + ['Section5', 'Sect1'] + ['Section6', 'Sect1'] + ['Section7', 'Sect1'] + ['Section8', 'Sect1'] + ['Section9', 'Sect1'] + ['Section10', 'Sect1'] + ['Section11', 'Sect1'] + ['Section12', 'Sect1'] + ['Section13', 'Sect1'] + ['Section14', 'Sect1'] + ['Section15', 'Sect1'] + ['Section16', 'Sect1'] + ['Section17', 'Sect1'] + ['Section18', 'Sect1'] + ['Section19', 'Sect1'] + ['Section20', 'Sect1'] + ['Section21', 'Sect1'] + ['Section22', 'Sect1'] + ['Section23', 'Sect1'] + ['Section24', 'Sect1'] + ['Section25', 'Sect1'] + ['Section26', 'Sect1'] + ['Section27', 'Sect1'] + ['Section28', 'Sect1'] + ['Section29', 'Sect1'] + ['Section30', 'Sect1'] + ['Section31', 'Sect1'] + ['Section32', 'Sect1'] + ['Section33', 'Sect1'] + >>> for n in c.findall ('.//' + OOo_Tag ('draw', 'rect', m)) : + ... attrs = 'style-name', 'text-style-name', 'z-index' + ... attrs = [n.get (OOo_Tag ('draw', i, m)) for i in attrs] + ... attrs.append (n.get (OOo_Tag ('text', 'anchor-page-number', m))) + ... print attrs + ['gr1', 'P1', '1', '1'] + ['gr1', 'P1', '4', '2'] + >>> for n in c.findall ('.//' + OOo_Tag ('draw', 'line', m)) : + ... attrs = 'style-name', 'text-style-name', 'z-index' + ... attrs = [n.get (OOo_Tag ('draw', i, m)) for i in attrs] + ... print attrs + ['Concat_gr1', 'P1', '24'] + ['Concat_gr1', 'P1', '22'] + ['Concat_gr1', 'P1', '21'] + >>> for n in s.findall ('.//' + OOo_Tag ('style', 'style', m)) : + ... if n.get (OOo_Tag ('style', 'name', m)).startswith ('Co') : + ... attrs = 'name', 'display-name', 'class', 'family' + ... attrs = [n.get (OOo_Tag ('style', i, m)) for i in attrs] + ... print attrs + ... props = n.find ('./' + OOo_Tag ('style', 'properties', m)) + ... if props is not None and len (props) : + ... props [0].tag + ['Concat_Standard', None, 'text', 'paragraph'] + ['Concat_Text_20_body', 'Concat Text body', 'text', 'paragraph'] + ['Concat_List', None, 'list', 'paragraph'] + ['Concat_Caption', None, 'extra', 'paragraph'] + ['Concat_Frame_20_contents', 'Concat Frame contents', 'extra', 'paragraph'] + ['Concat_Index', None, 'index', 'paragraph'] + >>> for n in c.findall ('.//*') : + ... zidx = n.get (OOo_Tag ('draw', 'z-index', m)) + ... if zidx : + ... print ':'.join(split_tag (n.tag)), zidx + draw:frame 0 + draw:rect 1 + draw:frame 3 + draw:rect 4 + draw:frame 6 + draw:frame 7 + draw:frame 8 + draw:frame 9 + draw:frame 10 + draw:frame 11 + draw:frame 12 + draw:frame 13 + draw:frame 14 + draw:frame 15 + draw:frame 16 + draw:frame 18 + draw:frame 19 + draw:frame 20 + draw:frame 17 + draw:frame 23 + draw:line 24 + draw:frame 2 + draw:frame 5 + draw:line 22 + draw:line 21 + >>> from os import system + >>> system ('python bin/ooo_fieldreplace -i testfiles/test.odt ' + ... '-o testout.odt ' + ... 'salutation=Frau firstname=Erika lastname=Musterfrau ' + ... 'country=D postalcode=00815 city=Niemandsdorf ' + ... 'street="Beispielstrasse 42"') + 0 + >>> o = OOoPy (infile = 'testout.odt') + >>> c = o.read ('content.xml') + >>> m = o.mimetype + >>> body = c.find (OOo_Tag ('office', 'body', mimetype = m)) + >>> vset = './/' + OOo_Tag ('text', 'variable-set', mimetype = m) + >>> for node in body.findall (vset) : + ... name = node.get (OOo_Tag ('text', 'name', m)) + ... print name, ':', node.text + salutation : Frau + firstname : Erika + lastname : Musterfrau + street : Beispielstrasse 42 + country : D + postalcode : 00815 + city : Niemandsdorf + salutation : Frau + firstname : Erika + lastname : Musterfrau + street : Beispielstrasse 42 + country : D + postalcode : 00815 + city : Niemandsdorf + >>> o.close () + >>> system ("bin/ooo_mailmerge -o testout.odt -d'|' " + ... "testfiles/carta.odt testfiles/x.csv") + 0 + >>> o = OOoPy (infile = 'testout.odt') + >>> m = o.mimetype + >>> c = o.read ('content.xml') + >>> body = c.find (OOo_Tag ('office', 'body', mimetype = m)) + >>> vset = './/' + OOo_Tag ('text', 'variable-set', mimetype = m) + >>> for node in body.findall (vset) : + ... name = node.get (OOo_Tag ('text', 'name', m)) + ... print name, ':', node.text + Spett : Spettabile + contraente : First person + indirizzo : street? 1 + Spett : Egregio + contraente : Second Person + indirizzo : street? 2 + tipo : racc. A.C. + luogo : Varese + oggetto : Saluti + tipo : Raccomandata + luogo : Gavirate + oggetto : Ossequi + >>> o.close () + >>> infile = 'testfiles/testenum.odt' + >>> o = OOoPy (infile = infile, outfile = 'xyzzy.odt') + >>> t = Transformer ( + ... o.mimetype + ... , get_meta (o.mimetype) + ... , Transforms.Addpagebreak_Style () + ... , Transforms.Mailmerge + ... ( iterator = + ... ( dict (firstname = 'Erika', lastname = 'Nobody') + ... , dict (firstname = 'Eric', lastname = 'Wizard') + ... , cb + ... ) + ... ) + ... , renumber_all (o.mimetype) + ... , set_meta (o.mimetype) + ... , Transforms.Fix_OOo_Tag () + ... ) + >>> t.transform (o) + >>> o.close () + >>> o = OOoPy (infile = 'xyzzy.odt') + >>> m = o.mimetype + >>> c = o.read ('content.xml') + >>> body = c.find (OOo_Tag ('office', 'body', mimetype = m)) + >>> textlist = './/' + OOo_Tag ('text', 'list', m) + >>> for node in body.findall (textlist) : + ... id = node.get (OOo_Tag ('xml', 'id', m)) + ... print 'xml:id', ':', id + xml:id : list1 + xml:id : list2 + xml:id : list3 + >>> o = OOoPy (infile = 'testfiles/page1.odt', outfile = 'xyzzy.odt') + >>> m = o.mimetype + >>> t = Transformer ( + ... o.mimetype + ... , get_meta (o.mimetype) + ... , Transforms.Concatenate ('testfiles/page2.odt') + ... , renumber_all (o.mimetype) + ... , set_meta (o.mimetype) + ... , Transforms.Fix_OOo_Tag () + ... , Transforms.Manifest_Append () + ... ) + >>> t.transform (o) + >>> o.close () + >>> o = OOoPy (infile = 'xyzzy.odt') + >>> c = o.read ('META-INF/manifest.xml') + >>> for node in c.getroot () : + ... fe = node.get (OOo_Tag ('manifest', 'full-path', m)) + ... print fe + / + Pictures/10000000000000C80000007941B1A419.jpg + Pictures/10000000000000DC000000B02E191635.jpg + Pictures/10000000000000DC000000A337377AAA.jpg + meta.xml + settings.xml + content.xml + Thumbnails/thumbnail.png + layout-cache + manifest.rdf + Configurations2/accelerator/current.xml + Configurations2/ + styles.xml + >>> for f in o.izip.infolist () : + ... print f.filename + mimetype + settings.xml + META-INF/manifest.xml + content.xml + meta.xml + styles.xml + Pictures/10000000000000C80000007941B1A419.jpg + Pictures/10000000000000DC000000B02E191635.jpg + Pictures/10000000000000DC000000A337377AAA.jpg + Thumbnails/thumbnail.png + layout-cache + manifest.rdf + Configurations2/images/Bitmaps/ + Configurations2/accelerator/current.xml + >>> sio = StringIO () + >>> o = OOoPy (infile = 'testfiles/tbl_first.odt', outfile = sio) + >>> m = o.mimetype + >>> t = Transformer ( + ... o.mimetype + ... , get_meta (o.mimetype) + ... , Transforms.Concatenate ('testfiles/tbl_second.odt') + ... , renumber_all (o.mimetype) + ... , set_meta (o.mimetype) + ... , Transforms.Fix_OOo_Tag () + ... , Transforms.Manifest_Append () + ... ) + >>> t.transform (o) + >>> o.close () + >>> o = OOoPy (infile = sio) + >>> c = o.read ('content.xml') + >>> body = c.find (OOo_Tag ('office', 'body', mimetype = m)) + >>> tbls = './/' + OOo_Tag ('table', 'table', mimetype = m) + >>> for table in body.findall (tbls) : + ... name = table.get (OOo_Tag ('table', 'style-name', mimetype = m)) + ... if name : + ... print name + ... for t in table.findall ('.//') : + ... name = t.get (OOo_Tag ('table', 'style-name', mimetype = m)) + ... if name : + ... print name + Tabella1 + Tabella1.A + Tabella1.A1 + Tabella1.B1 + Tabella1.A2 + Tabella1.B2 + Tabella1 + Tabella1.A + Tabella1.A1 + Tabella1.B1 + Tabella1.A2 + Tabella1.B2 + """ + + def __init__ (self, mimetype, *tf) : + assert (mimetype in mimetypes) + self.mimetype = mimetype + self.transforms = {} + for t in tf : + self.insert (t) + self.dictionary = {} + self.has_key = self.dictionary.has_key + self.__contains__ = self.has_key + # 2-tuples of filename, content + self.appendfiles = [] + # end def __init__ + + def insert (self, transform) : + """Insert a new transform""" + t = transform + if t.prio not in self.transforms : + self.transforms [t.prio] = [] + self.transforms [t.prio].append (t) + t.register (self) + # end def append + + def transform (self, ooopy) : + """ + Apply all the transforms in priority order. + Priority order is global over all transforms. + """ + self.trees = {} + for f in files : + self.trees [f] = ooopy.read (f) + #self.dictionary = {} # clear dict when transforming another ooopy + prios = self.transforms.keys () + prios.sort () + for p in prios : + for t in self.transforms [p] : + t.apply_all (self.trees) + for e in self.trees.itervalues () : + e.write () + for fname, fcontent in self.appendfiles : + e.ooopy.append_file (fname, fcontent) + # end def transform + + def __getitem__ (self, key) : + return self.dictionary [key] + # end def __getitem__ + + def __setitem__ (self, key, value) : + self.dictionary [key] = value + # end def __setitem__ +# end class Transformer diff --git a/ooopy/Transforms.py b/ooopy/Transforms.py new file mode 100644 index 000000000..50a6c0db8 --- /dev/null +++ b/ooopy/Transforms.py @@ -0,0 +1,1237 @@ +#!/usr/bin/env python +# -*- coding: iso-8859-1 -*- +# Copyright (C) 2005-14 Dr. Ralf Schlatterbeck Open Source Consulting. +# Reichergasse 131, A-3411 Weidling. +# Web: http://www.runtux.com Email: office@runtux.com +# All rights reserved +# **************************************************************************** +# +# This library is free software; you can redistribute it and/or modify +# it under the terms of the GNU Library General Public License as +# published by the Free Software Foundation; either version 2 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Library General Public License for more details. +# +# You should have received a copy of the GNU Library General Public +# License along with this program; if not, write to the Free Software +# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. +# **************************************************************************** + +from __future__ import absolute_import + +import time +import re +try : + from xml.etree.ElementTree import dump, SubElement, Element, tostring +except ImportError : + from elementtree.ElementTree import dump, SubElement, Element, tostring +from copy import deepcopy +from ooopy.OOoPy import OOoPy, autosuper +from ooopy.Transformer import files, split_tag, OOo_Tag, Transform +from ooopy.Transformer import mimetypes, namespace_by_name +from ooopy.Version import VERSION + +# counts in meta.xml +meta_counts = \ + ( 'character-count', 'image-count', 'object-count', 'page-count' + , 'paragraph-count', 'table-count', 'word-count' + ) + +class Access_Attribute (autosuper) : + """ For performance reasons we do not specify a separate transform + for each attribute-read or -change operation. Instead we define + all the attribute accesses we want to perform as objects that + follow the attribute access api and apply them all using an + Attribute_Access in one go. + """ + + def __init__ (self, key = None, prefix = None, ** kw) : + self.__super.__init__ (key = key, prefix = prefix, **kw) + self.key = key + if key : + if not prefix : + prefix = self.__class__.__name__ + self.key = ':'.join ((prefix, key)) + # end def __init__ + + def register (self, transformer) : + self.transformer = transformer + # end def register + + def use_value (self, oldval = None) : + """ Can change the given value by returning the new value. If + returning None or oldval the attribute stays unchanged. + """ + raise NotImplementedError, "use_value must be defined in derived class" + # end def use_value + +# end class Access_Attribute + +class Get_Attribute (Access_Attribute) : + """ An example of not changing an attribute but only storing the + value in the transformer + """ + + def __init__ (self, tag, attr, key, transform = None, ** kw) : + self.__super.__init__ (key = key, **kw) + self.tag = tag + self.attribute = attr + self.transform = transform + # end def __init__ + + def use_value (self, oldval = None) : + self.transformer [self.key] = oldval + return None + # end def use_value + +# end def Get_Attribute + +class Get_Max (Access_Attribute) : + """ Get the maximum value of an attribute """ + + def __init__ (self, tag, attr, key, transform = None, ** kw) : + self.__super.__init__ (key = key, **kw) + self.tag = tag + self.attribute = attr + self.transform = transform + # end def __init__ + + def register (self, transformer) : + self.__super.register (transformer) + self.transformer [self.key] = -1 + # end def register + + def use_value (self, oldval = None) : + if self.transformer [self.key] < oldval : + self.transformer [self.key] = oldval + return None + # end def use_value + +# end def Get_Max + +class Renumber (Access_Attribute) : + """ Specifies a renumbering transform. OOo has a 'name' attribute + for several different tags, e.g., tables, frames, sections etc. + These names must be unique in the whole document. OOo itself + solves this by appending a unique number to a basename for each + element, e.g., sections are named 'Section1', 'Section2', ... + Renumber transforms can be applied to correct the numbering + after operations that destroy the unique numbering, e.g., after + a mailmerge where the same document is repeatedly appended. + + The force parameter specifies if the new renumbered name should + be inserted even if the attribute in question does not exist. + """ + + def __init__ \ + (self, tag, name = None, attr = None, start = 1, force = False) : + self.__super.__init__ () + tag_ns, tag_name = split_tag (tag) + self.tag_ns = tag_ns + self.tag = tag + self.name = name or tag_name [0].upper () + tag_name [1:] + self.num = start + self.force = force + self.attribute = attr + # end def __init__ + + def register (self, transformer) : + self.__super.register (transformer) + if not self.attribute : + self.attribute = OOo_Tag (self.tag_ns, 'name', transformer.mimetype) + # end def register + + def use_value (self, oldval = None) : + if oldval is None and not self.force : + return + name = "%s%d" % (self.name, self.num) + self.num += 1 + return name + # end def use_value + +# end class Renumber + +class Set_Attribute (Access_Attribute) : + """ + Similar to the renumbering transform in that we are assigning + new values to some attributes. But in this case we give keys + into the Transformer dict to replace some tag attributes. + """ + + def __init__ \ + ( self + , tag + , attr + , key = None + , transform = None + , value = None + , oldvalue = None + , ** kw + ) : + self.__super.__init__ (key = key, ** kw) + self.tag = tag + self.attribute = attr + self.transform = transform + self.value = value + self.oldvalue = oldvalue + # end def __init__ + + def use_value (self, oldval) : + if oldval is None : + return None + if self.oldvalue and oldval != self.oldvalue : + return None + if self.key and self.transformer.has_key (self.key) : + return str (self.transformer [self.key]) + return self.value + # end def use_value + +# end class Set_Attribute + +def set_attributes_from_dict (tag, attr, d) : + """ Convenience function: iterate over a dict and return a list of + Set_Attribute objects specifying replacement of attributes in + the dictionary + """ + return [Set_Attribute (tag, attr, oldvalue = k, value = v) + for k,v in d.iteritems () + ] +# end def set_attributes_from_dict + +class Reanchor (Access_Attribute) : + """ + Similar to the renumbering transform in that we are assigning + new values to some attributes. But in this case we want to + relocate objects that are anchored to a page. + """ + + def __init__ (self, offset, tag, attr = None) : + self.__super.__init__ () + self.offset = int (offset) + self.tag = tag + self.attribute = attr + # end def __init__ + + def register (self, transformer) : + self.__super.register (transformer) + if not self.attribute : + self.attribute = \ + OOo_Tag ('text', 'anchor-page-number', transformer.mimetype) + # end def register + + def use_value (self, oldval) : + if oldval is None : + return oldval + return "%d" % (int (oldval) + self.offset) + # end def use_value + +# end class Reanchor + +# +# general transforms applicable to several .xml files +# + +class Attribute_Access (Transform) : + """ + Read or Change attributes in an OOo document. + Can be used for renumbering, moving anchored objects, etc. + Expects a list of attribute changer objects that follow the + attribute changer API. This API is very simple: + + - Member function "use_value" returns the new value of an + attribute, or if unchanged the old value + - The attribute "tag" gives the tag for an element we are + searching + - The attribute "attribute" gives the name of the attribute we + want to read or change. + For examples of the attribute changer API, see Renumber and + Reanchor above. + """ + filename = 'content.xml' + prio = 110 + + def __init__ (self, attrchangers, filename = None, ** kw) : + self.filename = filename or self.filename + self.attrchangers = {} + # allow several changers for a single tag + self.attrchangers [None] = [] + self.changers = attrchangers + self.__super.__init__ (** kw) + # end def __init__ + + def register (self, transformer) : + """ Register transformer with all attrchangers. """ + self.__super.register (transformer) + for r in self.changers : + if r.tag not in self.attrchangers : + self.attrchangers [r.tag] = [] + self.attrchangers [r.tag].append (r) + r.register (transformer) + # end def register + + def apply (self, root) : + """ Search for all tags for which we renumber and replace name """ + for n in [root] + root.findall ('.//*') : + changers = \ + self.attrchangers [None] + self.attrchangers.get (n.tag, []) + for r in changers : + nval = r.use_value (n.get (r.attribute)) + if nval is not None : + n.set (r.attribute, nval) + # end def apply + +# end class Attribute_Access + +# +# META-INF/manifest.xml transforms +# + +class Manifest_Append (Transform) : + """ + The Transformer stores a list of files (and contents) to append. + These files are added to the archive later but need to be + present in the manifest, too. + The file list in the Transformer currently doesn't store a media + type (which is one of the parameters in the manifest), the + current application of this transform is to add pictures -- + these don't have a media type in the files that were checked. + So for now we add an empty media type. + """ + filename = 'META-INF/manifest.xml' + prio = 1000 + + def apply (self, root) : + for n, node in enumerate (root) : + assert node.tag == self.oootag ('manifest', 'file-entry') + path = node.get (self.oootag ('manifest', 'full-path')) + assert (path) + if path == '/' : + break + else : + assert (not "The manifest needs a '/' entry") + for f, _ in self.transformer.appendfiles : + e = Element (self.oootag ('manifest', 'file-entry')) + e.attrib [self.oootag ('manifest', 'full-path')] = f + e.attrib [self.oootag ('manifest', 'media-type')] = '' + root.insert (n + 1, e) + n += 1 + # end def apply + +# end class Manifest_Append + +# +# meta.xml transforms +# + +class Editinfo (Transform) : + """ + This is an example of modifying OOo meta info (edit information, + author, etc). We set some of the items (program that generated + the OOo file, modification time, number of edit cyles and overall + edit duration). It's easy to subclass this transform and replace + the "replace" variable (pun intended) in the derived class. + """ + filename = 'meta.xml' + prio = 20 + repl = \ + { ('meta', 'generator') : 'OOoPy field replacement' + , ('dc', 'date') : time.strftime ('%Y-%m-%dT%H:%M:%S') + , ('meta', 'editing-cycles') : '0' + , ('meta', 'editing-duration') : 'PT0M0S' + } + replace = {} + # iterate over all mimetypes, so this works for all known mimetypes + # of OOo documents. + for m in mimetypes : + for params, value in repl.iteritems () : + replace [OOo_Tag (mimetype = m, *params)] = value + + def apply (self, root) : + for node in root.findall (self.oootag ('office', 'meta') + '/*') : + if self.replace.has_key (node.tag) : + node.text = self.replace [node.tag] + # end def apply +# end class Editinfo + +# +# settings.xml transforms +# + +class Autoupdate (Transform) : + """ + This is an example of modifying OOo settings. We set some of the + AutoUpdate configuration items in OOo to true. We also specify + that links should be updated when reading. + + This was originally intended to make OOo correctly display fields + if they were changed with the Field_Replace below + (similar to pressing F9 after loading the generated document in + OOo). In particular I usually make spaces depend on field + contents so that I don't have spurious spaces if a field is + empty. Now it would be nice if OOo displayed the spaces correctly + after loading a document (It does update the fields before + printing, so this is only a cosmetic problem :-). This apparently + does not work. If anybody knows how to achieve this, please let + me know: mailto:rsc@runtux.com + """ + filename = 'settings.xml' + prio = 20 + + def apply (self, root) : + config = None + for config in root.findall \ + ( self.oootag ('office', 'settings') + + '/' + + self.oootag ('config', 'config-item-set') + ) : + name = config.get (self.oootag ('config', 'name')) + if name == 'configuration-settings' : + break + for node in config.findall (self.oootag ('config', 'config-item')) : + name = node.get (self.oootag ('config', 'name')) + if name == 'LinkUpdateMode' : # update when reading + node.text = '2' + # update fields when reading + if name == 'FieldAutoUpdate' or name == 'ChartAutoUpdate' : + node.text = 'true' + # end def apply +# end class Autoupdate + +# +# content.xml transforms +# + +class Field_Replace (Transform) : + """ + Takes a dict of replacement key-value pairs. The key is the name + of a variable in OOo. Additional replacement key-value pairs may + be specified in ** kw. Alternatively a callback mechanism for + variable name lookups is provided. The callback function is + given the name of a variable in OOo and is expected to return + the replacement value or None if the variable value should not + be replaced. + """ + filename = 'content.xml' + prio = 100 + + def __init__ (self, prio = None, replace = None, ** kw) : + """ replace is something behaving like a dict or something + callable for name lookups + """ + self.__super.__init__ (prio, ** kw) + self.replace = replace or {} + self.dict = kw + # end def __init__ + + def apply (self, root) : + tbody = self.find_tbody (root) + for tag in 'variable-set', 'variable-get', 'variable-input' : + for node in tbody.findall ('.//' + self.oootag ('text', tag)) : + attr = 'name' + if tag == 'text-input' : + attr = 'description' + name = node.get (self.oootag ('text', attr)) + if callable (self.replace) : + replace = self.replace (name) + if replace : + node.text = replace + elif name in self.replace : + node.text = self.replace [name] + elif name in self.dict : + node.text = self.dict [name] + # end def apply +# end class Field_Replace + +class Addpagebreak_Style (Transform) : + """ + This transformation adds a new ad-hoc paragraph style to the + content part of the OOo document. This is needed to be able to + add new page breaks to an OOo document. Adding a new page break + is then a matter of adding an empty paragraph with the given page + break style. + + We first look through all defined paragraph styles for + determining a new paragraph style number. Convention is P<num> + for paragraph styles. We search the highest number and use this + incremented by one for the new style to insert. Then we insert + the new style and store the resulting style name in the + transformer under the key class_name:stylename where class_name + is our own class name. + """ + filename = 'content.xml' + prio = 30 + para = re.compile (r'P([0-9]+)') + + def apply (self, root) : + max_style = 0 + styles = root.find (self.oootag ('office', 'automatic-styles')) + for s in styles.findall ('./' + self.oootag ('style', 'style')) : + m = self.para.match (s.get (self.oootag ('style', 'name'), '')) + if m : + num = int (m.group (1)) + if num > max_style : + max_style = num + stylename = 'P%d' % (max_style + 1) + new = SubElement \ + ( styles + , self.oootag ('style', 'style') + , { self.oootag ('style', 'name') : stylename + , self.oootag ('style', 'family') : 'paragraph' + , self.oootag ('style', 'parent-style-name') : 'Standard' + } + ) + SubElement \ + ( new + , self.properties_tag + , { self.oootag ('fo', 'break-after') : 'page' } + ) + self.set ('stylename', stylename) + # end def apply +# end class Addpagebreak_Style + +class Addpagebreak (Transform) : + """ + This transformation adds a page break to the last page of the OOo + text. This is needed, e.g., when doing mail-merge: We append a + page break to the tbody and then append the next page. This + transform needs the name of the paragraph style specifying the + page break style. Default is to use + 'Addpagebreak_Style:stylename' as the key for + retrieving the page style. Alternatively the page style or the + page style key can be specified in the constructor. + """ + filename = 'content.xml' + prio = 50 + + def __init__ (self, stylename = None, stylekey = None, ** kw) : + self.__super.__init__ (** kw) + self.stylename = stylename + self.stylekey = stylekey or 'Addpagebreak_Style:stylename' + # end def __init__ + + def apply (self, root) : + """append to tbody e.g., <text:p text:style-name="P4"/>""" + tbody = self.find_tbody (root) + stylename = self.stylename or self.transformer [self.stylekey] + SubElement \ + ( tbody + , self.oootag ('text', 'p') + , { self.oootag ('text', 'style-name') : stylename } + ) + # end def apply +# end class Addpagebreak + +class Fix_OOo_Tag (Transform) : + """ + OOo writer conditions are attributes where the *value* is + prefixed by an XML namespace. If the ooow namespace declaration + is not in scope, all conditions will evaluate to false. I + consider this a bug (a violation of the ideas of XML) of OOo. + Nevertheless to make conditions work, we insert the ooow + namespace declaration into the top-level element. + """ + filename = 'content.xml' + prio = 10000 + + def apply (self, root) : + if self.mimetype == mimetypes [1] : + root.set ('xmlns:ooow', namespace_by_name [self.mimetype]['ooow']) + # end def apply +# end class Fix_OOo_Tag + +class _Body_Concat (Transform) : + """ Various methods for modifying the tbody split into various pieces + that have to keep sequence in order to not confuse OOo. + """ + ooo_sections = {} + for m in mimetypes : + ooo_sections [m] = \ + [ { OOo_Tag ('text', 'variable-decls', m) : 1 + , OOo_Tag ('text', 'sequence-decls', m) : 1 + , OOo_Tag ('text', 'user-field-decls', m) : 1 + , OOo_Tag ('office', 'forms', m) : 1 + } + , { OOo_Tag ('draw', 'frame', m) : 1 + , OOo_Tag ('draw', 'rect', m) : 1 + , OOo_Tag ('draw', 'text-box', m) : 1 + } + ] + + def _textbody (self) : + """ + We use the office:body (OOo 1.X)/office:text (OOo 1.X) + element as a container for various transforms... + """ + return Element (self.textbody_tag) + # end def _textbody + + def _divide (self, textbody) : + """ Divide self.copy into parts that must keep their sequence. + We use another textbody tag for storing the parts... + Side-effect of setting self.copyparts is intended. + """ + self.copyparts = self._textbody () + self.copyparts.append (self._textbody ()) + l = len (self.ooo_sections [self.mimetype]) + idx = 0 + for e in textbody : + while idx < l : + if e.tag in self.ooo_sections [self.mimetype][idx] : + break + else : + self.copyparts.append (self._textbody ()) + idx += 1 + self.copyparts [-1].append (e) + declarations = self.copyparts [0] + del self.copyparts [0] + return declarations + # end def _divide + + def divide_body (self, root) : + cont = root + if cont.tag != self.oootag ('office', 'document-content') : + cont = root.find (self.oootag ('office', 'document-content')) + tbody = cont.find (self.oootag ('office', 'body')) + # OOo 2.X has an office:text inside office:body that contains + # the real text contents: + if self.mimetype == mimetypes [1] : + cont = tbody + tbody = cont.find (self.oootag ('office', 'text')) + idx = cont [:].index (tbody) + self.tbody = cont [idx] = self._textbody () + self.declarations = self._divide (tbody) + self.bodyparts = self.copyparts + # end def divide_body + + def append_declarations (self) : + for e in self.declarations : + self.tbody.append (e) + # end def append_declarations + + def append_to_body (self, cp) : + for i in range (len (self.bodyparts)) : + for j in cp [i] : + self.bodyparts [i].append (j) + # end def append_to_body + + def assemble_body (self) : + for p in self.bodyparts : + for e in p : + self.tbody.append (e) + # end def assemble_body + + def _get_meta (self, var, classname = 'Get_Attribute', prefix = "") : + """ get page- and paragraph-count etc. meta-info """ + return int (self.transformer [':'.join ((classname, prefix + var))]) + # end def _get_meta + + def _set_meta (self, var, value, classname = 'Set_Attribute', prefix = "") : + """ set page- and paragraph-count etc. meta-info """ + self.transformer [':'.join ((classname, prefix + var))] = str (value) + # end def _set_meta +# end class _Body_Concat + +class Mailmerge (_Body_Concat) : + """ + This transformation is used to create a mailmerge document using + the current document as the template. In the constructor we get + an iterator that provides a data set for each item in the + iteration. Elements the iterator has to provide are either + something that follows the Mapping Type interface (it looks like + a dict) or something that is callable and can be used for + name-value lookups. + + A precondition for this transform is the application of the + Addpagebreak_Style to guarantee that we know the style + for adding a page break to the current document. Alternatively + the stylename (or the stylekey if a different name should be used + for lookup in the current transformer) can be given in the + constructor. + """ + filename = 'content.xml' + prio = 60 + + def __init__ \ + (self, iterator, stylename = None, stylekey = None, ** kw) : + self.__super.__init__ (** kw) + self.iterator = iterator + self.stylename = stylename + self.stylekey = stylekey + # end def __init__ + + def apply (self, root) : + """ + Copy old tbody, create new empty one and repeatedly append the + new tbody. + """ + pb = Addpagebreak \ + ( stylename = self.stylename + , stylekey = self.stylekey + , transformer = self.transformer + ) + zi = Attribute_Access \ + ( (Get_Max (None, self.oootag ('draw', 'z-index'), 'z-index'),) + , transformer = self.transformer + ) + zi.apply (root) + + pagecount = self._get_meta ('page-count') + z_index = self._get_meta ('z-index', classname = 'Get_Max') + 1 + ra = Attribute_Access \ + ( ( Reanchor (pagecount, self.oootag ('draw', 'text-box')) + , Reanchor (pagecount, self.oootag ('draw', 'rect')) + , Reanchor (pagecount, self.oootag ('draw', 'frame')) + , Reanchor (z_index, None, self.oootag ('draw', 'z-index')) + ) + , transformer = self.transformer # transformer added + ) + self.divide_body (root) + self.bodyparts = [self._textbody () for i in self.copyparts] + + count = 0 + for i in self.iterator : + count += 1 + fr = Field_Replace (replace = i, transformer = self.transformer) + # add page break only to non-empty tbody + # reanchor only after the first mailmerge + if len (self.tbody) : # tbody non-empty (but existing!) + pb.apply (self.bodyparts [-1]) + ra.apply (self.copyparts) + else : + self.append_declarations () + cp = deepcopy (self.copyparts) + fr.apply (cp) + self.append_to_body (cp) + # new page-count: + for i in meta_counts : + self._set_meta (i, count * self._get_meta (i)) + # we have added count-1 paragraphs, because each page-break is a + # paragraph. + p = 'paragraph-count' + self._set_meta \ + (p, self._get_meta (p, classname = 'Set_Attribute') + (count - 1)) + self.assemble_body () + # end def apply +# end class Mailmerge + +def tree_serialise (element, prefix = '', mimetype = mimetypes [1]) : + """ Serialise a style-element of an OOo document (e.g., a + style:font-decl, style:default-style, etc declaration). + We remove the name of the style and return something that is a + representation of the style element which can be used as a + dictionary key. + The serialisation format is a tuple containing the tag as the + first item, the attributes (as key,value pairs returned by + items()) as the second item and the following items are + serialisations of children. + """ + attr = dict (element.attrib) + stylename = OOo_Tag ('style', 'name', mimetype) + if stylename in attr : del attr [stylename] + attr = attr.items () + attr.sort () + attr = tuple (attr) + serial = [prefix + element.tag, attr] + for e in element : + serial.append (tree_serialise (e, prefix, mimetype)) + return tuple (serial) +# end def tree_serialise + +class Concatenate (_Body_Concat) : + """ + This transformation is used to create a new document from a + concatenation of several documents. In the constructor we get a + list of documents to append to the master document. + """ + prio = 80 + style_containers = {} + ref_attrs = {} + for m in mimetypes : + style_containers.update \ + ({ OOo_Tag ('office', 'font-decls', m) : 1 + , OOo_Tag ('office', 'font-face-decls', m) : 1 + , OOo_Tag ('office', 'styles', m) : 1 + , OOo_Tag ('office', 'automatic-styles', m) : 1 + , OOo_Tag ('office', 'master-styles', m) : 1 + }) + # Cross-references in OOo document: + # 'attribute' references another element with 'tag'. + # If attribute names change, we must replace references, too. + # attribute : + # tag + ref_attrs.update \ + ({ OOo_Tag ('style', 'parent-style-name', m) : + OOo_Tag ('style', 'style', m) + , OOo_Tag ('style', 'master-page-name', m) : + OOo_Tag ('style', 'master-page', m) + , OOo_Tag ('style', 'page-layout-name', m) : # OOo 2.X + OOo_Tag ('style', 'page-layout', m) + , OOo_Tag ('style', 'page-master-name', m) : + OOo_Tag ('style', 'page-master', m) + , OOo_Tag ('table', 'style-name', m) : + OOo_Tag ('style', 'style', m) + , OOo_Tag ('text', 'style-name', m) : + OOo_Tag ('style', 'style', m) + , OOo_Tag ('draw', 'style-name', m) : + OOo_Tag ('style', 'style', m) + , OOo_Tag ('draw', 'text-style-name', m) : + OOo_Tag ('style', 'style', m) + }) + stylefiles = ['styles.xml', 'content.xml'] + oofiles = stylefiles + ['meta.xml'] + + body_decl_sections = ['variable-decl', 'sequence-decl'] + + def __init__ (self, * docs, ** kw) : + self.__super.__init__ (** kw) + self.docs = [] + for doc in docs : + self.docs.append (OOoPy (infile = doc)) + assert (self.docs [-1].mimetype == self.docs [0].mimetype) + # end def __init__ + + def apply_all (self, trees) : + assert (self.docs [0].mimetype == self.transformer.mimetype) + self.serialised = {} + self.stylenames = {} + self.namemaps = [{}] + self.tab_depend = {} + for s in self.ref_attrs.itervalues () : + self.namemaps [0][s] = {} + self.body_decls = {} + for s in self.body_decl_sections : + self.body_decls [s] = {} + self.trees = {} + for f in self.oofiles : + self.trees [f] = [trees [f].getroot ()] + self.sections = {} + for f in self.stylefiles : + self.sections [f] = {} + for node in self.trees [f][0] : + self.sections [f][node.tag] = node + for d in self.docs : + self.namemaps.append ({}) + for s in self.ref_attrs.itervalues () : + self.namemaps [-1][s] = {} + for f in self.oofiles : + self.trees [f].append (d.read (f).getroot ()) + # append a pagebreak style, will be optimized away if duplicate + pbs = Addpagebreak_Style (transformer = self.transformer) + pbs.apply (self.trees ['content.xml'][0]) + get_attr = [] + for attr in meta_counts : + a = self.oootag ('meta', attr) + t = self.oootag ('meta', 'document-statistic') + get_attr.append (Get_Attribute (t, a, 'concat-' + attr)) + zi = Attribute_Access \ + ( (Get_Max (None, self.oootag ('draw', 'z-index'), 'z-index'),) + , transformer = self.transformer + ) + zi.apply (self.trees ['content.xml'][0]) + self.zi = Attribute_Access \ + ( (Get_Max (None, self.oootag ('draw', 'z-index'), 'concat-z-index') + , + ) + , transformer = self.transformer + ) + self.getmeta = Attribute_Access \ + (get_attr, filename = 'meta.xml', transformer = self.transformer) + self.pbname = self.transformer \ + [':'.join (('Addpagebreak_Style', 'stylename'))] + for s in self.trees ['styles.xml'][0].findall \ + ('.//' + self.oootag ('style', 'default-style')) : + if s.get (self.oootag ('style', 'family')) == 'paragraph' : + default_style = s + break + self.default_properties = default_style.find \ + ('./' + self.properties_tag) + self.set_pagestyle () + for f in 'styles.xml', 'content.xml' : + self.style_merge (f) + self.body_concat () + self.append_pictures () + # end def apply_all + + def apply_tab_correction (self, node) : + """ Check if node depends on a style which has corrected tabs + if yes, insert all the default tabs *after* the maximum tab + position in that style. + """ + tab_stops = self.oootag ('style', 'tab-stops') + tab_stop = self.oootag ('style', 'tab-stop') + tab_pos = self.oootag ('style', 'position') + parent = node.get (self.oootag ('style', 'parent-style-name')) + if parent in self.tab_depend : + for prop in node : + if prop.tag != self.properties_tag : + continue + for sub in prop : + if sub.tag == tab_stops : + self.tab_depend [parent] = 1 + max = 0 + for ts in sub : + assert (ts.tag == tab_stop) + pos = float (ts.get (tab_pos) [:-2]) + if max < pos : + max = pos + self.insert_tabs (sub, max) + # end def apply_tab_correction + + def _attr_rename (self, idx) : + r = sum \ + ( [ set_attributes_from_dict (None, k, self.namemaps [idx][v]) + for k,v in self.ref_attrs.iteritems () + ] + , [] + ) + return Attribute_Access (r, transformer = self.transformer) + # end def _attr_rename + + def body_concat (self) : + count = {} + for i in meta_counts : + count [i] = self._get_meta (i) + count ['z-index'] = self._get_meta \ + ('z-index', classname = 'Get_Max') + 1 + pb = Addpagebreak \ + (stylename = self.pbname, transformer = self.transformer) + self.divide_body (self.trees ['content.xml'][0]) + self.body_decl (self.declarations, append = 0) + for idx in range (1, len (self.docs) + 1) : + meta = self.trees ['meta.xml'][idx] + content = self.trees ['content.xml'][idx] + tbody = self.find_tbody (content) + self.getmeta.apply (meta) + self.zi.apply (tbody) + + ra = Attribute_Access \ + ( ( Reanchor + (count ['page-count'], self.oootag ('draw', 'text-box')) + , Reanchor + (count ['page-count'], self.oootag ('draw', 'rect')) + , Reanchor + (count ['page-count'], self.oootag ('draw', 'frame')) + , Reanchor + (count ['z-index'], None, self.oootag ('draw', 'z-index')) + ) + , transformer = self.transformer # transformer added + ) + for i in meta_counts : + count [i] += self._get_meta (i, prefix = 'concat-') + count ['paragraph-count'] += 1 + count ['z-index'] += self._get_meta \ + ('z-index', classname = 'Get_Max', prefix = 'concat-') + 1 + namemap = self.namemaps [idx][self.oootag ('style', 'style')] + tr = self._attr_rename (idx) + pb.apply (self.bodyparts [-1]) + tr.apply (content) + ra.apply (content) + declarations = self._divide (tbody) + self.body_decl (declarations) + self.append_to_body (self.copyparts) + self.append_declarations () + self.assemble_body () + for i in meta_counts : + self._set_meta (i, count [i]) + # end def body_concat + + def body_decl (self, decl_section, append = 1) : + for sect in self.body_decl_sections : + s = self.declarations.find \ + ('.//' + self.oootag ('text', sect + 's')) + d = self.body_decls [sect] + t = self.oootag ('text', sect) + for n in decl_section.findall ('.//' + t) : + name = n.get (self.oootag ('text', 'name')) + if name not in d : + if append and s is not None : + s.append (n) + d [name] = 1 + # end def body_decl + + def insert_tabs (self, element, max = 0) : + """ Insert tab stops into the current element. Optionally after + max = the current maximum tab-position + """ + dist_tag = self.oootag ('style', 'tab-stop-distance') + for k in range (1, len (self.tab_correct)) : + if self.tab_correct [-k].isdigit() : + break + l = float (self.tab_correct [:-k]) + unit = self.tab_correct [-k:] + for ts in range (35) : + pos = l * (ts + 1) + if pos > max : + SubElement \ + ( element + , self.oootag ('style', 'tab-stop') + , { self.oootag ('style', 'position') : '%s%s' % (pos, unit) + } + ) + # end def insert_tabs + + def merge_defaultstyle (self, default_style, node) : + assert default_style is not None + assert node is not None + proppath = './' + self.properties_tag + defprops = default_style.find (proppath) + props = node.find (proppath) + sn = self.oootag ('style', 'name') + if props is None : + props = Element (self.properties_tag) + for k, v in defprops.attrib.iteritems () : + if self.default_properties.get (k) != v and not props.get (k) : + if k == self.oootag ('style', 'tab-stop-distance') : + self.tab_correct = v + self.tab_depend = {node.get (sn) : 1} + stps = SubElement \ + (props, self.oootag ('style', 'tab-stops')) + self.insert_tabs (stps) + else : + props.set (k,v) + if len (props) or props.attrib : + node.append (props) + # end def merge_defaultstyle + + def _newname (self, key, oldname) : + stylenum = 0 + if (key, oldname) not in self.stylenames : + self.stylenames [(key, oldname)] = 1 + return oldname + newname = basename = 'Concat_%s' % oldname + while (key, newname) in self.stylenames : + stylenum += 1 + newname = '%s%d' % (basename, stylenum) + self.stylenames [(key, newname)] = 1 + return newname + # end def _newname + + def set_pagestyle (self) : + """ For all documents: search for the first paragraph of the tbody + and get its style. Modify this style to include a reference + to the default page-style if it doesn't contain a reference + to a page style. Insert the new style into the list of + styles and modify the first paragraph to use the new page + style. + This procedure is necessary to make appended documents use + their page style instead of the master page style of the + first document. + FIXME: We should search the style hierarchy backwards for + the style of the first paragraph to check if there is a + reference to a page-style somewhere and not override the + page-style in this case. Otherwise appending complex + documents that use a different page-style for the first page + will not work if the page style is referenced in a style + from which the first paragraph style derives. + """ + for idx in range (1, len (self.docs) + 1) : + croot = self.trees ['content.xml'][idx] + sroot = self.trees ['styles.xml'] [idx] + tbody = self.find_tbody (croot) + para = tbody.find ('./' + self.oootag ('text', 'p')) + if para is None : + para = tbody.find ('./' + self.oootag ('text', 'list')) + tsn = self.oootag ('text', 'style-name') + sname = para.get (tsn) + styles = croot.find (self.oootag ('office', 'automatic-styles')) + ost = sroot.find (self.oootag ('office', 'styles')) + mst = sroot.find (self.oootag ('office', 'master-styles')) + assert mst is not None and len (mst) + assert mst [0].tag == self.oootag ('style', 'master-page') + sntag = self.oootag ('style', 'name') + master = mst [0].get (sntag) + mpn = self.oootag ('style', 'master-page-name') + stytag = self.oootag ('style', 'style') + style = None + for s in styles : + if s.tag == stytag : + # Explicit references to default style converted to + # explicit references to new page style. + if s.get (mpn) == '' : + s.set (mpn, master) + if s.get (sntag) == sname : + style = s + if style is None : + for s in ost : + if s.tag == stytag and s.get (sntag) == sname : + style = s + break + if style is not None and not style.get (mpn) : + newstyle = deepcopy (style) + # Don't register with newname: will be rewritten later + # when appending. We assume that an original doc does + # not already contain a style with _Concat suffix. + newname = sname + '_Concat' + para.set (tsn, newname) + newstyle.set (self.oootag ('style', 'name'), newname) + newstyle.set (mpn, master) + styles.append (newstyle) + # end def set_pagestyle + + def style_merge (self, oofile) : + """ Loop over all the docs in our document list and look up the + styles there. If a style matches an existing style in the + original document, register the style name for later + transformation if the style name in the original document + does not match the style name in the appended document. If + no match is found, append style to master document and add + to serialisation. If the style name already exists in the + master document, a new style name is created. Names of + parent styles are changed when appending -- this means that + parent style names already have to be defined earlier in the + document. + + If there is a reference to a parent style that is not yet + defined, and the parent style is defined later, it is + already too late, so an assertion is raised in this case. + OOo seems to ensure declaration order of dependent styles, + so this should not be a problem. + """ + for idx in range (len (self.trees [oofile])) : + namemap = self.namemaps [idx] + root = self.trees [oofile][idx] + delnode = [] + for nodeidx, node in enumerate (root) : + if node.tag not in self.style_containers : + continue + prefix = '' + # font_decls may have same name in styles.xml and content.xml + if node.tag == self.font_decls_tag : + prefix = oofile + default_style = None + for n in node : + if ( n.tag == self.oootag ('style', 'default-style') + and ( n.get (self.oootag ('style', 'family')) + == 'paragraph' + ) + ) : + default_style = n + name = n.get (self.oootag ('style', 'name'), None) + if not name : continue + if ( idx != 0 + and name == 'Standard' + and n.get (self.oootag ('style', 'class')) == 'text' + and ( n.get (self.oootag ('style', 'family')) + == 'paragraph' + ) + ) : + self.merge_defaultstyle (default_style, n) + self.apply_tab_correction (n) + key = prefix + n.tag + if key not in namemap : namemap [key] = {} + tr = self._attr_rename (idx) + tr.apply (n) + sn = tree_serialise (n, prefix, self.mimetype) + if sn in self.serialised : + newname = self.serialised [sn] + if name != newname : + assert \ + ( name not in namemap [key] + or namemap [key][name] == newname + ) + namemap [key][name] = newname + # optimize original doc: remove duplicate styles + if not idx and node.tag != self.font_decls_tag : + pass + #delnode.append (nodeidx) + else : + newname = self._newname (key, name) + self.serialised [sn] = newname + if newname != name : + n.set (self.oootag ('style', 'name'), newname) + dn = self.oootag ('style', 'display-name') + disp_name = n.get (dn) + if disp_name : + n.set (dn, 'Concat ' + disp_name) + namemap [key][name] = newname + if idx != 0 : + self.sections [oofile][node.tag].append (n) + assert not delnode or not idx + delnode.reverse () + for i in delnode : + del node [i] + # end style_merge + + def append_pictures (self) : + for doc in self.docs : + for f in doc.izip.infolist () : + if f.filename.startswith ('Pictures/') : + self.transformer.appendfiles.append \ + ((f.filename, doc.izip.read (f.filename))) + # end def append_pictures + +# end class Concatenate + +def renumber_frames (mimetype) : + return \ + [ Renumber (OOo_Tag ('draw', 'text-box', mimetype), 'Frame') # OOo 1.X + , Renumber (OOo_Tag ('draw', 'frame', mimetype), 'Frame') # OOo 2.X + ] +# end def renumber_frames + +def renumber_sections (mimetype) : + return [Renumber (OOo_Tag ('text', 'section', mimetype))] +# end def renumber_sections + +def renumber_tables (mimetype) : + return [Renumber (OOo_Tag ('table', 'table', mimetype))] +# end def renumber_tables + +def renumber_images (mimetype) : + return [Renumber (OOo_Tag ('draw', 'image', mimetype))] +# end def renumber_images + +def renumber_xml_id (mimetype) : + if mimetype == mimetypes [0] : + return [] + xmlid = OOo_Tag ('xml', 'id', mimetype) + return [Renumber (OOo_Tag ('text', 'list', mimetype), 'list', xmlid)] +# end def renumber_xml_id + +def renumber_all (mimetype) : + """ Factory function for all renumberings parameterized with + mimetype + """ + return Attribute_Access \ + ( renumber_frames (mimetype) + + renumber_sections (mimetype) + + renumber_tables (mimetype) + + renumber_images (mimetype) + + renumber_xml_id (mimetype) + ) +# end def renumber_all + +# used to have a separate Pagecount transform -- generalized to get +# some of the meta information using an Attribute_Access transform +# and set the same information later after possibly being updated by +# other transforms. We use another naming convention here for storing +# the info retrieved from the OOo document: We use the attribute name in +# the meta-information to store (and later retrieve) the information. + +def get_meta (mimetype) : + """ Factory function for Attribute_Access to get all interesting + meta-data + """ + get_attr = [] + for attr in meta_counts : + a = OOo_Tag ('meta', attr, mimetype) + t = OOo_Tag ('meta', 'document-statistic', mimetype) + get_attr.append (Get_Attribute (t, a, attr)) + return Attribute_Access (get_attr, prio = 20, filename = 'meta.xml') +# end def get_meta + +def set_meta (mimetype) : + """ Factory function for Attribute_Access to set all interesting + meta-data + """ + set_attr = [] + for attr in meta_counts : + a = OOo_Tag ('meta', attr, mimetype) + t = OOo_Tag ('meta', 'document-statistic', mimetype) + set_attr.append (Set_Attribute (t, a, attr)) + return Attribute_Access (set_attr, prio = 120, filename = 'meta.xml') +# end def set_meta diff --git a/ooopy/Version.py b/ooopy/Version.py new file mode 100644 index 000000000..495ca242a --- /dev/null +++ b/ooopy/Version.py @@ -0,0 +1 @@ +VERSION="1.11" diff --git a/ooopy/__init__.py b/ooopy/__init__.py new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/ooopy/__init__.py |