diff options
author | Étienne Loks <etienne.loks@iggdrasil.net> | 2019-05-01 13:54:20 +0200 |
---|---|---|
committer | Étienne Loks <etienne.loks@iggdrasil.net> | 2019-06-17 13:21:28 +0200 |
commit | edbbd133068b0d5d0d101a9b9eff1658fbcc0384 (patch) | |
tree | 3fac64755afa9cd223626d5dd432ab1bcec83513 /ooopy | |
parent | a18087eff29bb316af47bb5fe53a59c43edc57f0 (diff) | |
download | Ishtar-edbbd133068b0d5d0d101a9b9eff1658fbcc0384.tar.bz2 Ishtar-edbbd133068b0d5d0d101a9b9eff1658fbcc0384.zip |
Quick adaptation of ooopy for python3
Diffstat (limited to 'ooopy')
-rw-r--r-- | ooopy/OOoPy.py | 430 | ||||
-rw-r--r-- | ooopy/Transformer.py | 1462 | ||||
-rw-r--r-- | ooopy/Transforms.py | 366 | ||||
-rw-r--r-- | ooopy/Version.py | 2 |
4 files changed, 487 insertions, 1773 deletions
diff --git a/ooopy/OOoPy.py b/ooopy/OOoPy.py index 87e0b8110..aaa152606 100644 --- a/ooopy/OOoPy.py +++ b/ooopy/OOoPy.py @@ -1,5 +1,5 @@ -#!/usr/bin/env python -# -*- coding: iso-8859-1 -*- +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- # Copyright (C) 2005-14 Dr. Ralf Schlatterbeck Open Source Consulting. # Reichergasse 131, A-3411 Weidling. # Web: http://www.runtux.com Email: office@runtux.com @@ -21,297 +21,229 @@ # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. # **************************************************************************** -from __future__ import absolute_import -from zipfile import ZipFile, ZIP_DEFLATED, ZipInfo -try : - from StringIO import StringIO -except ImportError : - from io import StringIO -from datetime import datetime -try : - from xml.etree.ElementTree import ElementTree, fromstring, _namespace_map -except ImportError : - from elementtree.ElementTree import ElementTree, fromstring, _namespace_map -from tempfile import mkstemp -from ooopy.Version import VERSION -import os +from zipfile import ZipFile, ZIP_DEFLATED, ZipInfo +from io import BytesIO +from datetime import datetime +from xml.etree.ElementTree import ElementTree, fromstring, _namespace_map + + +class _autosuper (type): + def __init__(cls, name, bases, dict): + super(_autosuper, cls).__init__(name, bases, dict) + setattr(cls, "_%s__super" % name, super(cls)) + + +class autosuper(metaclass=_autosuper): + def __init__(self, *args, **kw): + self.__super.__init__() + + +files = [ + 'content.xml', 'styles.xml', 'meta.xml', 'settings.xml', + 'META-INF/manifest.xml' +] + +mimetypes = ['application/vnd.sun.xml.writer', + 'application/vnd.oasis.opendocument.text'] + +namespace_by_name = { + mimetypes [0]: { + 'chart': "http://openoffice.org/2000/chart", + 'config': "http://openoffice.org/2001/config", + 'dc': "http://purl.org/dc/elements/1.1/", + 'dr3d': "http://openoffice.org/2000/dr3d", + 'draw': "http://openoffice.org/2000/drawing", + 'fo': "http://www.w3.org/1999/XSL/Format", + 'form': "http://openoffice.org/2000/form", + 'math': "http://www.w3.org/1998/Math/MathML", + 'meta': "http://openoffice.org/2000/meta", + 'number': "http://openoffice.org/2000/datastyle", + 'office': "http://openoffice.org/2000/office", + 'script': "http://openoffice.org/2000/script", + 'style': "http://openoffice.org/2000/style", + 'svg': "http://www.w3.org/2000/svg", + 'table': "http://openoffice.org/2000/table", + 'text': "http://openoffice.org/2000/text", + 'xlink': "http://www.w3.org/1999/xlink", + 'manifest': "http://openoffice.org/2001/manifest"}, + mimetypes[1]: { + 'chart': "urn:oasis:names:tc:opendocument:xmlns:chart:1.0", + 'config': "urn:oasis:names:tc:opendocument:xmlns:config:1.0", + 'dc': "http://purl.org/dc/elements/1.1/", + 'dr3d': "urn:oasis:names:tc:opendocument:xmlns:dr3d:1.0", + 'draw': "urn:oasis:names:tc:opendocument:xmlns:drawing:1.0", + 'fo': "urn:oasis:names:tc:opendocument:xmlns:" "xsl-fo-compatible:1.0", + 'form': "urn:oasis:names:tc:opendocument:xmlns:form:1.0", + 'math': "http://www.w3.org/1998/Math/MathML", + 'meta': "urn:oasis:names:tc:opendocument:xmlns:meta:1.0", + 'number': "urn:oasis:names:tc:opendocument:xmlns:datastyle:1.0", + 'office': "urn:oasis:names:tc:opendocument:xmlns:office:1.0", + 'officeooo': "http://openoffice.org/2009/office", + 'script': "urn:oasis:names:tc:opendocument:xmlns:script:1.0", + 'style': "urn:oasis:names:tc:opendocument:xmlns:style:1.0", + 'svg': "urn:oasis:names:tc:opendocument:xmlns:svg-compatible:1.0", + 'table': "urn:oasis:names:tc:opendocument:xmlns:table:1.0", + 'text': "urn:oasis:names:tc:opendocument:xmlns:text:1.0", + 'xlink': "http://www.w3.org/1999/xlink", + 'manifest': "urn:oasis:names:tc:opendocument:xmlns:manifest:1.0", + 'tableooo': "http://openoffice.org/2009/table", + 'transformation': "http://www.w3.org/2003/g/data-view#", + # OOo 1.X tags and some others: + 'ooo': "http://openoffice.org/2004/office", + 'ooow': "http://openoffice.org/2004/writer", + 'oooc': "http://openoffice.org/2004/calc", + 'o_dom': "http://www.w3.org/2001/xml-events", + 'o_xforms': "http://www.w3.org/2002/xforms", + 'xs': "http://www.w3.org/2001/XMLSchema", + 'xsi': "http://www.w3.org/2001/XMLSchema-instance", + # predefined xml namespace, see + # http://www.w3.org/TR/2006/REC-xml-names11-20060816/ + # "It MAY, but need not, be declared, and MUST NOT be undeclared + # or bound to any other namespace name." + 'xml': "http://www.w3.org/XML/1998/namespace" + } +} + +for mimetype in namespace_by_name.values(): + for k, v in mimetype.items(): + if v in _namespace_map: + assert _namespace_map[v] == k + _namespace_map[v] = k -class _autosuper (type) : - def __init__ (cls, name, bases, dict) : - super (_autosuper, cls).__init__ (name, bases, dict) - setattr (cls, "_%s__super" % name, super (cls)) - # end def __init__ -# end class _autosuper - -class autosuper (object) : - __metaclass__ = _autosuper - def __init__ (self, *args, **kw) : - self.__super.__init__ () - # end def __init__ -# end class autosuper - -files = \ - [ 'content.xml' - , 'styles.xml' - , 'meta.xml' - , 'settings.xml' - , 'META-INF/manifest.xml' - ] - -mimetypes = \ - [ 'application/vnd.sun.xml.writer' - , 'application/vnd.oasis.opendocument.text' - ] -namespace_by_name = \ - { mimetypes [0] : - { 'chart' : "http://openoffice.org/2000/chart" - , 'config' : "http://openoffice.org/2001/config" - , 'dc' : "http://purl.org/dc/elements/1.1/" - , 'dr3d' : "http://openoffice.org/2000/dr3d" - , 'draw' : "http://openoffice.org/2000/drawing" - , 'fo' : "http://www.w3.org/1999/XSL/Format" - , 'form' : "http://openoffice.org/2000/form" - , 'math' : "http://www.w3.org/1998/Math/MathML" - , 'meta' : "http://openoffice.org/2000/meta" - , 'number' : "http://openoffice.org/2000/datastyle" - , 'office' : "http://openoffice.org/2000/office" - , 'script' : "http://openoffice.org/2000/script" - , 'style' : "http://openoffice.org/2000/style" - , 'svg' : "http://www.w3.org/2000/svg" - , 'table' : "http://openoffice.org/2000/table" - , 'text' : "http://openoffice.org/2000/text" - , 'xlink' : "http://www.w3.org/1999/xlink" - , 'manifest' : "http://openoffice.org/2001/manifest" - } - , mimetypes [1] : - { 'chart' : "urn:oasis:names:tc:opendocument:xmlns:chart:1.0" - , 'config' : "urn:oasis:names:tc:opendocument:xmlns:config:1.0" - , 'dc' : "http://purl.org/dc/elements/1.1/" - , 'dr3d' : "urn:oasis:names:tc:opendocument:xmlns:dr3d:1.0" - , 'draw' : "urn:oasis:names:tc:opendocument:xmlns:drawing:1.0" - , 'fo' : "urn:oasis:names:tc:opendocument:xmlns:" - "xsl-fo-compatible:1.0" - , 'form' : "urn:oasis:names:tc:opendocument:xmlns:form:1.0" - , 'math' : "http://www.w3.org/1998/Math/MathML" - , 'meta' : "urn:oasis:names:tc:opendocument:xmlns:meta:1.0" - , 'number' : "urn:oasis:names:tc:opendocument:xmlns:datastyle:1.0" - , 'office' : "urn:oasis:names:tc:opendocument:xmlns:office:1.0" - , 'officeooo': "http://openoffice.org/2009/office" - , 'script' : "urn:oasis:names:tc:opendocument:xmlns:script:1.0" - , 'style' : "urn:oasis:names:tc:opendocument:xmlns:style:1.0" - , 'svg' : "urn:oasis:names:tc:opendocument:xmlns:svg-compatible:1.0" - , 'table' : "urn:oasis:names:tc:opendocument:xmlns:table:1.0" - , 'text' : "urn:oasis:names:tc:opendocument:xmlns:text:1.0" - , 'xlink' : "http://www.w3.org/1999/xlink" - , 'manifest' : "urn:oasis:names:tc:opendocument:xmlns:manifest:1.0" - , 'tableooo' : "http://openoffice.org/2009/table" - , 'transformation' : "http://www.w3.org/2003/g/data-view#" - # OOo 1.X tags and some others: - , 'ooo' : "http://openoffice.org/2004/office" - , 'ooow' : "http://openoffice.org/2004/writer" - , 'oooc' : "http://openoffice.org/2004/calc" - , 'o_dom' : "http://www.w3.org/2001/xml-events" - , 'o_xforms' : "http://www.w3.org/2002/xforms" - , 'xs' : "http://www.w3.org/2001/XMLSchema" - , 'xsi' : "http://www.w3.org/2001/XMLSchema-instance" - # predefined xml namespace, see - # http://www.w3.org/TR/2006/REC-xml-names11-20060816/ - # "It MAY, but need not, be declared, and MUST NOT be undeclared - # or bound to any other namespace name." - , 'xml' : "http://www.w3.org/XML/1998/namespace" - } - } - -for mimetype in namespace_by_name.itervalues () : - for k, v in mimetype.iteritems () : - if v in _namespace_map : - assert (_namespace_map [v] == k) - _namespace_map [v] = k class OOoElementTree (autosuper) : """ - An ElementTree for OOo document XML members. Behaves like the - orginal ElementTree (in fact it delegates almost everything to a - real instance of ElementTree) except for the write method, that - writes itself back to the OOo XML file in the OOo zip archive it - came from. + An ElementTree for OOo document XML members. Behaves like the + orginal ElementTree (in fact it delegates almost everything to a + real instance of ElementTree) except for the write method, that + writes itself back to the OOo XML file in the OOo zip archive it + came from. """ - def __init__ (self, ooopy, zname, root) : + def __init__(self, ooopy, zname, root): self.ooopy = ooopy self.zname = zname - self.tree = ElementTree (root) - # end def __init__ + self.tree = ElementTree(root) - def write (self) : + def write(self): self.ooopy.write (self.zname, self.tree) - # end def write - def __getattr__ (self, name) : + def __getattr__(self, name) : """ - Delegate everything to our ElementTree attribute. + Delegate everything to our ElementTree attribute. """ if not name.startswith ('__') : result = getattr (self.tree, name) setattr (self, name, result) return result raise AttributeError (name) - # end def __getattr__ -# end class OOoElementTree -class OOoPy (autosuper) : +class OOoPy(autosuper): """ - Wrapper for OpenOffice.org zip files (all OOo documents are - really zip files internally). - - from ooopy.OOoPy import OOoPy - >>> o = OOoPy (infile = 'testfiles/test.sxw', outfile = 'out.sxw') - >>> o.mimetype - 'application/vnd.sun.xml.writer' - >>> for f in files : - ... e = o.read (f) - ... e.write () - ... - >>> o.close () - >>> o = OOoPy (infile = 'testfiles/test.odt', outfile = 'out2.odt') - >>> o.mimetype - 'application/vnd.oasis.opendocument.text' - >>> for f in files : - ... e = o.read (f) - ... e.write () - ... - >>> o.append_file ('Pictures/empty', '') - >>> o.close () - >>> o = OOoPy (infile = 'out2.odt') - >>> for f in o.izip.infolist () : - ... print f.filename, f.create_system, f.compress_type - mimetype 0 8 - content.xml 0 8 - styles.xml 0 8 - meta.xml 0 8 - settings.xml 0 8 - META-INF/manifest.xml 0 8 - Pictures/empty 0 8 - Configurations2/statusbar/ 0 0 - Configurations2/accelerator/current.xml 0 8 - Configurations2/floater/ 0 0 - Configurations2/popupmenu/ 0 0 - Configurations2/progressbar/ 0 0 - Configurations2/menubar/ 0 0 - Configurations2/toolbar/ 0 0 - Configurations2/images/Bitmaps/ 0 0 - Thumbnails/thumbnail.png 0 8 + Wrapper for OpenOffice.org zip files (all OOo documents are + really zip files internally). """ - def __init__ \ - ( self - , infile = None - , outfile = None - , write_mode = 'w' - , mimetype = None - ) : + def __init__(self, infile=None, outfile=None, write_mode='w', + mimetype=None): """ - Open an OOo document, if no outfile is given, we open the - file read-only. Otherwise the outfile has to be different - from the infile -- the python ZipFile can't deal with - read-write access. In case an outfile is given, we open it - in "w" mode as a zip file, unless write_mode is specified - (the only allowed case would be "a" for appending to an - existing file, see pythons ZipFile documentation for - details). If no infile is given, the user is responsible for - providing all necessary files in the resulting output file. - - It seems that OOo needs to have the mimetype as the first - archive member (at least with mimetype as the first member - it works, the order may not be arbitrary) to recognize a zip - archive as an OOo file. When copying from a given infile, we - use the same order of elements in the resulting output. When - creating new elements we make sure the mimetype is the first - in the resulting archive. - - Note that both, infile and outfile can either be filenames - or file-like objects (e.g. StringIO). - - The mimetype is automatically determined if an infile is - given. If only writing is desired, the mimetype should be - set. + Open an OOo document, if no outfile is given, we open the + file read-only. Otherwise the outfile has to be different + from the infile -- the python ZipFile can't deal with + read-write access. In case an outfile is given, we open it + in "w" mode as a zip file, unless write_mode is specified + (the only allowed case would be "a" for appending to an + existing file, see pythons ZipFile documentation for + details). If no infile is given, the user is responsible for + providing all necessary files in the resulting output file. + + It seems that OOo needs to have the mimetype as the first + archive member (at least with mimetype as the first member + it works, the order may not be arbitrary) to recognize a zip + archive as an OOo file. When copying from a given infile, we + use the same order of elements in the resulting output. When + creating new elements we make sure the mimetype is the first + in the resulting archive. + + Note that both, infile and outfile can either be filenames + or file-like objects (e.g. StringIO). + + The mimetype is automatically determined if an infile is + given. If only writing is desired, the mimetype should be + set. """ assert (infile != outfile) self.izip = self.ozip = None - if infile : - self.izip = ZipFile (infile, 'r', ZIP_DEFLATED) + if infile: + self.izip = ZipFile(infile, 'r', ZIP_DEFLATED) if outfile : - self.ozip = ZipFile (outfile, write_mode, ZIP_DEFLATED) + self.ozip = ZipFile(outfile, write_mode, ZIP_DEFLATED) self.written = {} - if mimetype : + if mimetype: self.mimetype = mimetype - elif self.izip : - self.mimetype = self.izip.read ('mimetype') - # end def __init__ + elif self.izip: + self.mimetype = self.izip.read('mimetype') + if isinstance(self.mimetype, bytes): + self.mimetype = self.mimetype.decode() - def read (self, zname) : + def read(self, zname): """ - return an OOoElementTree object for the given OOo document - archive member name. Currently an OOo document contains the - following XML files:: - - * content.xml: the text of the OOo document - * styles.xml: style definitions - * meta.xml: meta-information (author, last changed, ...) - * settings.xml: settings in OOo - * META-INF/manifest.xml: contents of the archive - - There is an additional file "mimetype" that always contains - the string "application/vnd.sun.xml.writer" for OOo 1.X files - and the string "application/vnd.oasis.opendocument.text" for - OOo 2.X files. + return an OOoElementTree object for the given OOo document + archive member name. Currently an OOo document contains the + following XML files:: + + * content.xml: the text of the OOo document + * styles.xml: style definitions + * meta.xml: meta-information (author, last changed, ...) + * settings.xml: settings in OOo + * META-INF/manifest.xml: contents of the archive + + There is an additional file "mimetype" that always contains + the string "application/vnd.sun.xml.writer" for OOo 1.X files + and the string "application/vnd.oasis.opendocument.text" for + OOo 2.X files. """ - assert (self.izip) + assert self.izip return OOoElementTree (self, zname, fromstring (self.izip.read (zname))) - # end def read - def _write (self, zname, str) : - now = datetime.utcnow ().timetuple () - info = ZipInfo (zname, date_time = now) - info.create_system = 0 # pretend to be fat + def _write(self, zname, str): + now = datetime.utcnow().timetuple() + info = ZipInfo(zname, date_time=now) + info.create_system = 0 # pretend to be fat info.compress_type = ZIP_DEFLATED - self.ozip.writestr (info, str) + self.ozip.writestr(info, str) self.written [zname] = 1 - # end def _write - def write (self, zname, etree) : - assert (self.ozip) + def write(self, zname, etree): + assert self.ozip # assure mimetype is the first member in new archive - if 'mimetype' not in self.written : - self._write ('mimetype', self.mimetype) - str = StringIO () - etree.write (str) - self._write (zname, str.getvalue ()) - # end def write + if 'mimetype' not in self.written: + self._write('mimetype', self.mimetype) + str = BytesIO() + etree.write(str) + self._write(zname, str.getvalue()) - def append_file (self, zname, str) : - """ Official interface to _write: Append a file to the end of - the archive. + def append_file (self, zname, str): + """ + Official interface to _write: Append a file to the end of the archive. """ - if zname not in self.written : + if zname not in self.written: self._write (zname, str) - # end def append_file - def close (self) : + def close(self): """ - Close the zip files. According to documentation of zipfile in - the standard python lib, this has to be done to be sure - everything is written. We copy over the not-yet written files - from izip before closing ozip. + Close the zip files. According to documentation of zipfile in + the standard python lib, this has to be done to be sure + everything is written. We copy over the not-yet written files + from izip before closing ozip. """ - if self.izip and self.ozip : - for f in self.izip.infolist () : + if self.izip and self.ozip: + for f in self.izip.infolist(): if f.filename not in self.written : - self.ozip.writestr (f, self.izip.read (f.filename)) - for i in self.izip, self.ozip : - if i : i.close () + self.ozip.writestr(f, self.izip.read(f.filename)) + for i in self.izip, self.ozip: + if i: + i.close() self.izip = self.ozip = None - # end def close - __del__ = close # auto-close on deletion of object -# end class OOoPy + __del__ = close # auto-close on deletion of object diff --git a/ooopy/Transformer.py b/ooopy/Transformer.py index dbbab125d..4e21bb331 100644 --- a/ooopy/Transformer.py +++ b/ooopy/Transformer.py @@ -1,5 +1,5 @@ -#!/usr/bin/env python -# -*- coding: iso-8859-1 -*- +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- # Copyright (C) 2005-14 Dr. Ralf Schlatterbeck Open Source Consulting. # Reichergasse 131, A-3411 Weidling. # Web: http://www.runtux.com Email: office@runtux.com @@ -21,1377 +21,181 @@ # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. # **************************************************************************** -from __future__ import absolute_import +# import time +# import re +# from xml.etree.ElementTree import dump, SubElement, Element, tostring +from xml.etree.ElementTree import _namespace_map +# from copy import deepcopy +from ooopy.OOoPy import autosuper # , OOoPy +from ooopy.OOoPy import files, mimetypes, namespace_by_name +# from ooopy.Version import VERSION -import time -import re -try : - from xml.etree.ElementTree import dump, SubElement, Element, tostring - from xml.etree.ElementTree import _namespace_map -except ImportError : - from elementtree.ElementTree import dump, SubElement, Element, tostring - from elementtree.ElementTree import _namespace_map -from copy import deepcopy -from ooopy.OOoPy import OOoPy, autosuper -from ooopy.OOoPy import files, mimetypes, namespace_by_name -from ooopy.Version import VERSION -def OOo_Tag (namespace, name, mimetype) : - """Return combined XML tag - - >>> OOo_Tag ('xml', 'id', mimetypes [1]) - '{http://www.w3.org/XML/1998/namespace}id' - >>> OOo_Tag ('text', 'list', mimetypes [1]) - '{urn:oasis:names:tc:opendocument:xmlns:text:1.0}list' +def OOo_Tag(namespace, name, mimetype): """ - return "{%s}%s" % (namespace_by_name [mimetype][namespace], name) -# end def OOo_Tag + Return combined XML tag + """ + return "{%s}%s" % (namespace_by_name[mimetype][namespace], name) + -def split_tag (tag) : - """ Split tag into symbolic namespace and name part -- inverse - operation of OOo_Tag. +def split_tag(tag): + """ + Split tag into symbolic namespace and name part -- inverse + operation of OOo_Tag. """ ns, t = tag.split ('}') - return (_namespace_map [ns [1:]], t) -# end def split_tag + return _namespace_map [ns [1:]], t -class Transform (autosuper) : - """ - Base class for individual transforms on OOo files. An individual - transform needs a filename variable for specifying the OOo file - the transform should be applied to and an optional prio. - Individual transforms are applied according to their prio - setting, higher prio means later application of a transform. - The filename variable must specify one of the XML files which are - part of the OOo document (see files variable above). As - the names imply, content.xml contains the contents of the - document (text and ad-hoc style definitions), styles.xml contains - the style definitions, meta.xml contains meta information like - author, editing time, etc. and settings.xml is used to store - OOo's settings (menu Tools->Configure). +class Transform(autosuper): + """ + Base class for individual transforms on OOo files. An individual + transform needs a filename variable for specifying the OOo file + the transform should be applied to and an optional prio. + Individual transforms are applied according to their prio + setting, higher prio means later application of a transform. + + The filename variable must specify one of the XML files which are + part of the OOo document (see files variable above). As + the names imply, content.xml contains the contents of the + document (text and ad-hoc style definitions), styles.xml contains + the style definitions, meta.xml contains meta information like + author, editing time, etc. and settings.xml is used to store + OOo's settings (menu Tools->Configure). """ prio = 100 - textbody_names = \ - { mimetypes [0] : 'body' - , mimetypes [1] : 'text' - } - paragraph_props = \ - { mimetypes [0] : 'properties' - , mimetypes [1] : 'paragraph-properties' - } - font_decls = \ - { mimetypes [0] : 'font-decls' - , mimetypes [1] : 'font-face-decls' - } - - def __init__ (self, prio = None, transformer = None) : - if prio is not None : - self.prio = prio + textbody_names = { mimetypes[0]: 'body', mimetypes[1]: 'text'} + paragraph_props = { + mimetypes[0]: 'properties', + mimetypes[1]: 'paragraph-properties' + } + font_decls = { + mimetypes[0]: 'font-decls', + mimetypes[1]: 'font-face-decls' + } + + def __init__(self, prio=None, transformer=None): + if prio is not None: + self.prio = prio self.transformer = None - if transformer : - self.register (transformer) - # end def __init__ + if transformer: + self.register(transformer) def apply (self, root) : """ Apply myself to the element given as root """ - raise NotImplementedError, 'derived transforms must implement "apply"' - # end def apply + raise NotImplementedError('derived transforms must implement "apply"') def apply_all (self, trees) : - """ Apply myself to all the files given in trees. The variable - trees contains a dictionary of ElementTree indexed by the - name of the OOo File. - The standard case is that only one file (namely - self.filename) is used. """ - assert (self.filename) - self.apply (trees [self.filename].getroot ()) - # end def apply_all + Apply myself to all the files given in trees. The variable + trees contains a dictionary of ElementTree indexed by the + name of the OOo File. + The standard case is that only one file (namely + self.filename) is used. + """ + assert self.filename + self.apply(trees[self.filename].getroot()) - def find_tbody (self, root) : - """ Find the node which really contains the text -- different - for different OOo versions. + def find_tbody(self, root) : + """ + Find the node which really contains the text -- different + for different OOo versions. """ tbody = root - if tbody.tag != self.textbody_tag : - tbody = tbody.find ('.//' + self.textbody_tag) + if tbody.tag != self.textbody_tag: + tbody = tbody.find('.//' + self.textbody_tag) return tbody - # end def find_tbody - - def register (self, transformer) : - """ Registering with a transformer means being able to access - variables stored in the tranformer by other transforms. - Also needed for tag-computation: The transformer knows which - version of OOo document we are processing. + def register(self, transformer) : """ - self.transformer = transformer - mt = self.mimetype = transformer.mimetype - self.textbody_name = self.textbody_names [mt] + Registering with a transformer means being able to access + variables stored in the tranformer by other transforms. + Also needed for tag-computation: The transformer knows which + version of OOo document we are processing. + """ + self.transformer = transformer + mt = self.mimetype = transformer.mimetype + self.textbody_name = self.textbody_names [mt] self.paragraph_props = self.paragraph_props [mt] - self.properties_tag = self.oootag ('style', self.paragraph_props) - self.textbody_tag = self.oootag ('office', self.textbody_name) - self.font_decls_tag = self.oootag ('office', self.font_decls [mt]) - # end def register + self.properties_tag = self.oootag('style', self.paragraph_props) + self.textbody_tag = self.oootag('office', self.textbody_name) + self.font_decls_tag = self.oootag('office', self.font_decls [mt]) - def oootag (self, namespace, name) : + def oootag(self, namespace, name): """ Compute long tag version """ - return OOo_Tag (namespace, name, self.mimetype) - # end def oootag + return OOo_Tag(namespace, name, self.mimetype) - def set (self, variable, value) : + def set(self, variable, value) : """ Set variable in our transformer using naming convention. """ self.transformer [self._varname (variable)] = value - # end def set def _varname (self, name) : - """ For fulfilling the naming convention of the transformer - dictionary (every entry in this dictionary should be prefixed - with the class name of the transform) we have this - convenience method. - Returns variable name prefixed with own class name. """ - return ":".join ((self.__class__.__name__, name)) - # end def _varname + For fulfilling the naming convention of the transformer + dictionary (every entry in this dictionary should be prefixed + with the class name of the transform) we have this + convenience method. + Returns variable name prefixed with own class name. + """ + return ":".join((self.__class__.__name__, name)) -# end class Transform -class Transformer (autosuper) : +class Transformer(autosuper): """ - Class for applying a set of transforms to a given ooopy object. - The transforms are applied to the specified file in priority - order. When applying transforms we have a mechanism for - communication of transforms. We give the transformer to the - individual transforms as a parameter. The transforms may use the - transformer like a dictionary for storing values and retrieving - values left by previous transforms. - As a naming convention each transform should use its class name - as a prefix for storing values in the dictionary. - >>> import Transforms - >>> from Transforms import renumber_all, get_meta, set_meta, meta_counts - >>> try : - ... from io import StringIO, BytesIO - ... StringIO = BytesIO - ... except ImportError : - ... from StringIO import StringIO - >>> sio = BytesIO () - >>> o = OOoPy (infile = 'testfiles/test.sxw', outfile = sio) - >>> m = o.mimetype - >>> c = o.read ('content.xml') - >>> body = c.find (OOo_Tag ('office', 'body', mimetype = m)) - >>> body [-1].get (OOo_Tag ('text', 'style-name', mimetype = m)) - 'Standard' - >>> def cb (name) : - ... r = { 'street' : 'Beispielstrasse 42' - ... , 'firstname' : 'Hugo' - ... , 'salutation' : 'Frau' - ... } - ... if r.has_key (name) : return r [name] - ... return None - ... - >>> p = get_meta (m) - >>> t = Transformer (m, p) - >>> t ['a'] = 'a' - >>> t ['a'] - 'a' - >>> t.transform (o) - >>> p.set ('a', 'b') - >>> t ['Attribute_Access:a'] - 'b' - >>> t = Transformer ( - ... m - ... , Transforms.Autoupdate () - ... , Transforms.Editinfo () - ... , Transforms.Field_Replace (prio = 99, replace = cb) - ... , Transforms.Field_Replace - ... ( replace = - ... { 'salutation' : '' - ... , 'firstname' : 'Erika' - ... , 'lastname' : 'Musterfrau' - ... , 'country' : 'D' - ... , 'postalcode' : '00815' - ... , 'city' : 'Niemandsdorf' - ... } - ... ) - ... , Transforms.Addpagebreak_Style () - ... , Transforms.Addpagebreak () - ... ) - >>> t.transform (o) - >>> o.close () - >>> ov = sio.getvalue () - >>> f = open ("testout.sxw", "wb") - >>> f.write (ov) - >>> f.close () - >>> o = OOoPy (infile = sio) - >>> c = o.read ('content.xml') - >>> m = o.mimetype - >>> body = c.find (OOo_Tag ('office', 'body', mimetype = m)) - >>> vset = './/' + OOo_Tag ('text', 'variable-set', mimetype = m) - >>> for node in body.findall (vset) : - ... name = node.get (OOo_Tag ('text', 'name', m)) - ... print name, ':', node.text - salutation : None - firstname : Erika - lastname : Musterfrau - street : Beispielstrasse 42 - country : D - postalcode : 00815 - city : Niemandsdorf - salutation : None - firstname : Erika - lastname : Musterfrau - street : Beispielstrasse 42 - country : D - postalcode : 00815 - city : Niemandsdorf - >>> body [-1].get (OOo_Tag ('text', 'style-name', mimetype = m)) - 'P2' - >>> sio = StringIO () - >>> o = OOoPy (infile = 'testfiles/test.sxw', outfile = sio) - >>> c = o.read ('content.xml') - >>> t = Transformer ( - ... o.mimetype - ... , get_meta (o.mimetype) - ... , Transforms.Addpagebreak_Style () - ... , Transforms.Mailmerge - ... ( iterator = - ... ( dict (firstname = 'Erika', lastname = 'Nobody') - ... , dict (firstname = 'Eric', lastname = 'Wizard') - ... , cb - ... ) - ... ) - ... , renumber_all (o.mimetype) - ... , set_meta (o.mimetype) - ... , Transforms.Fix_OOo_Tag () - ... ) - >>> t.transform (o) - >>> for i in meta_counts : - ... print i, t [':'.join (('Set_Attribute', i))] - character-count 951 - image-count 0 - object-count 0 - page-count 3 - paragraph-count 113 - table-count 3 - word-count 162 - >>> name = t ['Addpagebreak_Style:stylename'] - >>> name - 'P2' - >>> o.close () - >>> ov = sio.getvalue () - >>> f = open ("testout2.sxw", "wb") - >>> f.write (ov) - >>> f.close () - >>> o = OOoPy (infile = sio) - >>> m = o.mimetype - >>> c = o.read ('content.xml') - >>> body = c.find (OOo_Tag ('office', 'body', m)) - >>> for n in body.findall ('.//*') : - ... zidx = n.get (OOo_Tag ('draw', 'z-index', m)) - ... if zidx : - ... print ':'.join(split_tag (n.tag)), zidx - draw:text-box 0 - draw:rect 1 - draw:text-box 3 - draw:rect 4 - draw:text-box 6 - draw:rect 7 - draw:text-box 2 - draw:text-box 5 - draw:text-box 8 - >>> for n in body.findall ('.//' + OOo_Tag ('text', 'p', m)) : - ... if n.get (OOo_Tag ('text', 'style-name', m)) == name : - ... print n.tag - {http://openoffice.org/2000/text}p - {http://openoffice.org/2000/text}p - >>> vset = './/' + OOo_Tag ('text', 'variable-set', m) - >>> for n in body.findall (vset) : - ... if n.get (OOo_Tag ('text', 'name', m), None).endswith ('name') : - ... name = n.get (OOo_Tag ('text', 'name', m)) - ... print name, ':', n.text - firstname : Erika - lastname : Nobody - firstname : Eric - lastname : Wizard - firstname : Hugo - lastname : Testman - firstname : Erika - lastname : Nobody - firstname : Eric - lastname : Wizard - firstname : Hugo - lastname : Testman - >>> for n in body.findall ('.//' + OOo_Tag ('draw', 'text-box', m)) : - ... print n.get (OOo_Tag ('draw', 'name', m)), - ... print n.get (OOo_Tag ('text', 'anchor-page-number', m)) - Frame1 1 - Frame2 2 - Frame3 3 - Frame4 None - Frame5 None - Frame6 None - >>> for n in body.findall ('.//' + OOo_Tag ('text', 'section', m)) : - ... print n.get (OOo_Tag ('text', 'name', m)) - Section1 - Section2 - Section3 - Section4 - Section5 - Section6 - Section7 - Section8 - Section9 - Section10 - Section11 - Section12 - Section13 - Section14 - Section15 - Section16 - Section17 - Section18 - >>> for n in body.findall ('.//' + OOo_Tag ('table', 'table', m)) : - ... print n.get (OOo_Tag ('table', 'name', m)) - Table1 - Table2 - Table3 - >>> r = o.read ('meta.xml') - >>> meta = r.find ('.//' + OOo_Tag ('meta', 'document-statistic', m)) - >>> for i in meta_counts : - ... print i, repr (meta.get (OOo_Tag ('meta', i, m))) - character-count '951' - image-count '0' - object-count '0' - page-count '3' - paragraph-count '113' - table-count '3' - word-count '162' - >>> o.close () - >>> sio = StringIO () - >>> o = OOoPy (infile = 'testfiles/test.sxw', outfile = sio) - >>> tf = ('testfiles/test.sxw', 'testfiles/rechng.sxw') - >>> t = Transformer ( - ... o.mimetype - ... , get_meta (o.mimetype) - ... , Transforms.Concatenate (*tf) - ... , renumber_all (o.mimetype) - ... , set_meta (o.mimetype) - ... , Transforms.Fix_OOo_Tag () - ... ) - >>> t.transform (o) - >>> for i in meta_counts : - ... print i, repr (t [':'.join (('Set_Attribute', i))]) - character-count '1131' - image-count '0' - object-count '0' - page-count '3' - paragraph-count '168' - table-count '2' - word-count '160' - >>> o.close () - >>> ov = sio.getvalue () - >>> f = open ("testout3.sxw", "wb") - >>> f.write (ov) - >>> f.close () - >>> o = OOoPy (infile = sio) - >>> m = o.mimetype - >>> c = o.read ('content.xml') - >>> s = o.read ('styles.xml') - >>> for n in c.findall ('./*/*') : - ... name = n.get (OOo_Tag ('style', 'name', m)) - ... if name : - ... parent = n.get (OOo_Tag ('style', 'parent-style-name', m)) - ... print '"%s", "%s"' % (name, parent) - "Tahoma1", "None" - "Bitstream Vera Sans", "None" - "Tahoma", "None" - "Nimbus Roman No9 L", "None" - "Courier New", "None" - "Arial Black", "None" - "New Century Schoolbook", "None" - "Helvetica", "None" - "Table1", "None" - "Table1.A", "None" - "Table1.A1", "None" - "Table1.E1", "None" - "Table1.A2", "None" - "Table1.E2", "None" - "P1", "None" - "fr1", "Frame" - "fr2", "None" - "fr3", "Frame" - "Sect1", "None" - "gr1", "None" - "P2", "Standard" - "Standard_Concat", "None" - "Concat_P1", "Concat_Frame contents" - "Concat_P2", "Concat_Frame contents" - "P3", "Concat_Frame contents" - "P4", "Concat_Frame contents" - "P5", "Concat_Standard" - "P6", "Concat_Standard" - "P7", "Concat_Frame contents" - "P8", "Concat_Frame contents" - "P9", "Concat_Frame contents" - "P10", "Concat_Frame contents" - "P11", "Concat_Frame contents" - "P12", "Concat_Frame contents" - "P13", "Concat_Frame contents" - "P15", "Concat_Standard" - "P16", "Concat_Standard" - "P17", "Concat_Standard" - "P18", "Concat_Standard" - "P19", "Concat_Standard" - "P20", "Concat_Standard" - "P21", "Concat_Standard" - "P22", "Concat_Standard" - "P23", "Concat_Standard" - "T1", "None" - "Concat_fr1", "Concat_Frame" - "Concat_fr2", "Concat_Frame" - "Concat_fr3", "Concat_Frame" - "fr4", "Concat_Frame" - "fr5", "Concat_Frame" - "fr6", "Concat_Frame" - "Concat_Sect1", "None" - "N0", "None" - "N2", "None" - "P15_Concat", "Concat_Standard" - >>> for n in s.findall ('./*/*') : - ... name = n.get (OOo_Tag ('style', 'name', m)) - ... if name : - ... parent = n.get (OOo_Tag ('style', 'parent-style-name', m)) - ... print '"%s", "%s"' % (name, parent) - "Tahoma1", "None" - "Bitstream Vera Sans", "None" - "Tahoma", "None" - "Nimbus Roman No9 L", "None" - "Courier New", "None" - "Arial Black", "None" - "New Century Schoolbook", "None" - "Helvetica", "None" - "Standard", "None" - "Text body", "Standard" - "List", "Text body" - "Table Contents", "Text body" - "Table Heading", "Table Contents" - "Caption", "Standard" - "Frame contents", "Text body" - "Index", "Standard" - "Frame", "None" - "OLE", "None" - "Concat_Standard", "None" - "Concat_Text body", "Concat_Standard" - "Concat_List", "Concat_Text body" - "Concat_Caption", "Concat_Standard" - "Concat_Frame contents", "Concat_Text body" - "Concat_Index", "Concat_Standard" - "Horizontal Line", "Concat_Standard" - "Internet link", "None" - "Visited Internet Link", "None" - "Concat_Frame", "None" - "Concat_OLE", "None" - "pm1", "None" - "Concat_pm1", "None" - "Standard", "None" - "Concat_Standard", "None" - >>> for n in c.findall ('.//' + OOo_Tag ('text', 'variable-decl', m)) : - ... name = n.get (OOo_Tag ('text', 'name', m)) - ... print name - salutation - firstname - lastname - street - country - postalcode - city - date - invoice.invoice_no - invoice.abo.aboprice.abotype.description - address.salutation - address.title - address.firstname - address.lastname - address.function - address.street - address.country - address.postalcode - address.city - invoice.subscriber.salutation - invoice.subscriber.title - invoice.subscriber.firstname - invoice.subscriber.lastname - invoice.subscriber.function - invoice.subscriber.street - invoice.subscriber.country - invoice.subscriber.postalcode - invoice.subscriber.city - invoice.period_start - invoice.period_end - invoice.currency.name - invoice.amount - invoice.subscriber.initial - >>> for n in c.findall ('.//' + OOo_Tag ('text', 'sequence-decl', m)) : - ... name = n.get (OOo_Tag ('text', 'name', m)) - ... print name - Illustration - Table - Text - Drawing - >>> for n in c.findall ('.//' + OOo_Tag ('text', 'p', m)) : - ... name = n.get (OOo_Tag ('text', 'style-name', m)) - ... if not name or name.startswith ('Concat') : - ... print ">%s<" % name - >Concat_P1< - >Concat_P2< - >Concat_Frame contents< - >>> for n in c.findall ('.//' + OOo_Tag ('draw', 'text-box', m)) : - ... attrs = 'name', 'style-name', 'z-index' - ... attrs = [n.get (OOo_Tag ('draw', i, m)) for i in attrs] - ... attrs.append (n.get (OOo_Tag ('text', 'anchor-page-number', m))) - ... print attrs - ['Frame1', 'fr1', '0', '1'] - ['Frame2', 'fr1', '3', '2'] - ['Frame3', 'Concat_fr1', '6', '3'] - ['Frame4', 'Concat_fr2', '7', '3'] - ['Frame5', 'Concat_fr3', '8', '3'] - ['Frame6', 'Concat_fr1', '9', '3'] - ['Frame7', 'fr4', '10', '3'] - ['Frame8', 'fr4', '11', '3'] - ['Frame9', 'fr4', '12', '3'] - ['Frame10', 'fr4', '13', '3'] - ['Frame11', 'fr4', '14', '3'] - ['Frame12', 'fr4', '15', '3'] - ['Frame13', 'fr5', '16', '3'] - ['Frame14', 'fr4', '18', '3'] - ['Frame15', 'fr4', '19', '3'] - ['Frame16', 'fr4', '20', '3'] - ['Frame17', 'fr6', '17', '3'] - ['Frame18', 'fr4', '23', '3'] - ['Frame19', 'fr3', '2', None] - ['Frame20', 'fr3', '5', None] - >>> for n in c.findall ('.//' + OOo_Tag ('text', 'section', m)) : - ... attrs = 'name', 'style-name' - ... attrs = [n.get (OOo_Tag ('text', i, m)) for i in attrs] - ... print attrs - ['Section1', 'Sect1'] - ['Section2', 'Sect1'] - ['Section3', 'Sect1'] - ['Section4', 'Sect1'] - ['Section5', 'Sect1'] - ['Section6', 'Sect1'] - ['Section7', 'Concat_Sect1'] - ['Section8', 'Concat_Sect1'] - ['Section9', 'Concat_Sect1'] - ['Section10', 'Concat_Sect1'] - ['Section11', 'Concat_Sect1'] - ['Section12', 'Concat_Sect1'] - ['Section13', 'Concat_Sect1'] - ['Section14', 'Concat_Sect1'] - ['Section15', 'Concat_Sect1'] - ['Section16', 'Concat_Sect1'] - ['Section17', 'Concat_Sect1'] - ['Section18', 'Concat_Sect1'] - ['Section19', 'Concat_Sect1'] - ['Section20', 'Concat_Sect1'] - ['Section21', 'Concat_Sect1'] - ['Section22', 'Concat_Sect1'] - ['Section23', 'Concat_Sect1'] - ['Section24', 'Concat_Sect1'] - ['Section25', 'Concat_Sect1'] - ['Section26', 'Concat_Sect1'] - ['Section27', 'Concat_Sect1'] - ['Section28', 'Sect1'] - ['Section29', 'Sect1'] - ['Section30', 'Sect1'] - ['Section31', 'Sect1'] - ['Section32', 'Sect1'] - ['Section33', 'Sect1'] - >>> for n in c.findall ('.//' + OOo_Tag ('draw', 'rect', m)) : - ... attrs = 'style-name', 'text-style-name', 'z-index' - ... attrs = [n.get (OOo_Tag ('draw', i, m)) for i in attrs] - ... attrs.append (n.get (OOo_Tag ('text', 'anchor-page-number', m))) - ... print attrs - ['gr1', 'P1', '1', '1'] - ['gr1', 'P1', '4', '2'] - >>> for n in c.findall ('.//' + OOo_Tag ('draw', 'line', m)) : - ... attrs = 'style-name', 'text-style-name', 'z-index' - ... attrs = [n.get (OOo_Tag ('draw', i, m)) for i in attrs] - ... print attrs - ['gr1', 'P1', '24'] - ['gr1', 'P1', '22'] - ['gr1', 'P1', '21'] - >>> for n in s.findall ('.//' + OOo_Tag ('style', 'style', m)) : - ... if n.get (OOo_Tag ('style', 'name', m)).startswith ('Co') : - ... attrs = 'name', 'class', 'family' - ... attrs = [n.get (OOo_Tag ('style', i, m)) for i in attrs] - ... print attrs - ... props = n.find ('./' + OOo_Tag ('style', 'properties', m)) - ... if props is not None and len (props) : - ... props [0].tag - ['Concat_Standard', 'text', 'paragraph'] - '{http://openoffice.org/2000/style}tab-stops' - ['Concat_Text body', 'text', 'paragraph'] - ['Concat_List', 'list', 'paragraph'] - ['Concat_Caption', 'extra', 'paragraph'] - ['Concat_Frame contents', 'extra', 'paragraph'] - ['Concat_Index', 'index', 'paragraph'] - ['Concat_Frame', None, 'graphics'] - ['Concat_OLE', None, 'graphics'] - >>> for n in c.findall ('.//*') : - ... zidx = n.get (OOo_Tag ('draw', 'z-index', m)) - ... if zidx : - ... print ':'.join(split_tag (n.tag)), zidx - draw:text-box 0 - draw:rect 1 - draw:text-box 3 - draw:rect 4 - draw:text-box 6 - draw:text-box 7 - draw:text-box 8 - draw:text-box 9 - draw:text-box 10 - draw:text-box 11 - draw:text-box 12 - draw:text-box 13 - draw:text-box 14 - draw:text-box 15 - draw:text-box 16 - draw:text-box 18 - draw:text-box 19 - draw:text-box 20 - draw:text-box 17 - draw:text-box 23 - draw:line 24 - draw:text-box 2 - draw:text-box 5 - draw:line 22 - draw:line 21 - >>> sio = StringIO () - >>> o = OOoPy (infile = 'testfiles/carta.stw', outfile = sio) - >>> t = Transformer ( - ... o.mimetype - ... , get_meta (o.mimetype) - ... , Transforms.Addpagebreak_Style () - ... , Transforms.Mailmerge - ... ( iterator = - ... ( dict - ... ( Spett = "Spettabile" - ... , contraente = "First person" - ... , indirizzo = "street? 1" - ... , tipo = "racc. A.C." - ... , luogo = "Varese" - ... , oggetto = "Saluti" - ... ) - ... , dict - ... ( Spett = "Egregio" - ... , contraente = "Second Person" - ... , indirizzo = "street? 2" - ... , tipo = "Raccomandata" - ... , luogo = "Gavirate" - ... , oggetto = "Ossequi" - ... ) - ... ) - ... ) - ... , renumber_all (o.mimetype) - ... , set_meta (o.mimetype) - ... , Transforms.Fix_OOo_Tag () - ... ) - >>> t.transform(o) - >>> o.close() - >>> ov = sio.getvalue () - >>> f = open ("carta-out.stw", "wb") - >>> f.write (ov) - >>> f.close () - >>> o = OOoPy (infile = sio) - >>> m = o.mimetype - >>> c = o.read ('content.xml') - >>> body = c.find (OOo_Tag ('office', 'body', mimetype = m)) - >>> vset = './/' + OOo_Tag ('text', 'variable-set', mimetype = m) - >>> for node in body.findall (vset) : - ... name = node.get (OOo_Tag ('text', 'name', m)) - ... print name, ':', node.text - Spett : Spettabile - contraente : First person - indirizzo : street? 1 - Spett : Egregio - contraente : Second Person - indirizzo : street? 2 - tipo : racc. A.C. - luogo : Varese - oggetto : Saluti - tipo : Raccomandata - luogo : Gavirate - oggetto : Ossequi - >>> sio = StringIO () - >>> o = OOoPy (infile = 'testfiles/test.odt', outfile = sio) - >>> t = Transformer ( - ... o.mimetype - ... , get_meta (o.mimetype) - ... , Transforms.Addpagebreak_Style () - ... , Transforms.Mailmerge - ... ( iterator = - ... ( dict (firstname = 'Erika', lastname = 'Nobody') - ... , dict (firstname = 'Eric', lastname = 'Wizard') - ... , cb - ... ) - ... ) - ... , renumber_all (o.mimetype) - ... , set_meta (o.mimetype) - ... , Transforms.Fix_OOo_Tag () - ... ) - >>> t.transform (o) - >>> for i in meta_counts : - ... print i, t [':'.join (('Set_Attribute', i))] - character-count 951 - image-count 0 - object-count 0 - page-count 3 - paragraph-count 53 - table-count 3 - word-count 162 - >>> name = t ['Addpagebreak_Style:stylename'] - >>> name - 'P2' - >>> o.close () - >>> ov = sio.getvalue () - >>> f = open ("testout.odt", "wb") - >>> f.write (ov) - >>> f.close () - >>> o = OOoPy (infile = sio) - >>> m = o.mimetype - >>> c = o.read ('content.xml') - >>> body = c.find (OOo_Tag ('office', 'body', m)) - >>> for n in body.findall ('.//*') : - ... zidx = n.get (OOo_Tag ('draw', 'z-index', m)) - ... if zidx : - ... print ':'.join(split_tag (n.tag)), zidx - draw:frame 0 - draw:rect 1 - draw:frame 3 - draw:rect 4 - draw:frame 6 - draw:rect 7 - draw:frame 2 - draw:frame 5 - draw:frame 8 - >>> for n in body.findall ('.//' + OOo_Tag ('text', 'p', m)) : - ... if n.get (OOo_Tag ('text', 'style-name', m)) == name : - ... print n.tag - {urn:oasis:names:tc:opendocument:xmlns:text:1.0}p - {urn:oasis:names:tc:opendocument:xmlns:text:1.0}p - >>> vset = './/' + OOo_Tag ('text', 'variable-set', m) - >>> for n in body.findall (vset) : - ... if n.get (OOo_Tag ('text', 'name', m), None).endswith ('name') : - ... name = n.get (OOo_Tag ('text', 'name', m)) - ... print name, ':', n.text - firstname : Erika - lastname : Nobody - firstname : Eric - lastname : Wizard - firstname : Hugo - lastname : Testman - firstname : Erika - lastname : Nobody - firstname : Eric - lastname : Wizard - firstname : Hugo - lastname : Testman - >>> for n in body.findall ('.//' + OOo_Tag ('draw', 'frame', m)) : - ... print n.get (OOo_Tag ('draw', 'name', m)), - ... print n.get (OOo_Tag ('text', 'anchor-page-number', m)) - Frame1 1 - Frame2 2 - Frame3 3 - Frame4 None - Frame5 None - Frame6 None - >>> for n in body.findall ('.//' + OOo_Tag ('text', 'section', m)) : - ... print n.get (OOo_Tag ('text', 'name', m)) - Section1 - Section2 - Section3 - Section4 - Section5 - Section6 - Section7 - Section8 - Section9 - Section10 - Section11 - Section12 - Section13 - Section14 - Section15 - Section16 - Section17 - Section18 - >>> for n in body.findall ('.//' + OOo_Tag ('table', 'table', m)) : - ... print n.get (OOo_Tag ('table', 'name', m)) - Table1 - Table2 - Table3 - >>> r = o.read ('meta.xml') - >>> meta = r.find ('.//' + OOo_Tag ('meta', 'document-statistic', m)) - >>> for i in meta_counts : - ... print i, repr (meta.get (OOo_Tag ('meta', i, m))) - character-count '951' - image-count '0' - object-count '0' - page-count '3' - paragraph-count '53' - table-count '3' - word-count '162' - >>> o.close () - >>> sio = StringIO () - >>> o = OOoPy (infile = 'testfiles/carta.odt', outfile = sio) - >>> t = Transformer ( - ... o.mimetype - ... , get_meta (o.mimetype) - ... , Transforms.Addpagebreak_Style () - ... , Transforms.Mailmerge - ... ( iterator = - ... ( dict - ... ( Spett = "Spettabile" - ... , contraente = "First person" - ... , indirizzo = "street? 1" - ... , tipo = "racc. A.C." - ... , luogo = "Varese" - ... , oggetto = "Saluti" - ... ) - ... , dict - ... ( Spett = "Egregio" - ... , contraente = "Second Person" - ... , indirizzo = "street? 2" - ... , tipo = "Raccomandata" - ... , luogo = "Gavirate" - ... , oggetto = "Ossequi" - ... ) - ... ) - ... ) - ... , renumber_all (o.mimetype) - ... , set_meta (o.mimetype) - ... , Transforms.Fix_OOo_Tag () - ... ) - >>> t.transform(o) - >>> o.close() - >>> ov = sio.getvalue () - >>> f = open ("carta-out.odt", "wb") - >>> f.write (ov) - >>> f.close () - >>> o = OOoPy (infile = sio) - >>> m = o.mimetype - >>> c = o.read ('content.xml') - >>> body = c.find (OOo_Tag ('office', 'body', mimetype = m)) - >>> vset = './/' + OOo_Tag ('text', 'variable-set', mimetype = m) - >>> for node in body.findall (vset) : - ... name = node.get (OOo_Tag ('text', 'name', m)) - ... print name, ':', node.text - Spett : Spettabile - contraente : First person - indirizzo : street? 1 - Spett : Egregio - contraente : Second Person - indirizzo : street? 2 - tipo : racc. A.C. - luogo : Varese - oggetto : Saluti - tipo : Raccomandata - luogo : Gavirate - oggetto : Ossequi - >>> sio = StringIO () - >>> o = OOoPy (infile = 'testfiles/test.odt', outfile = sio) - >>> tf = ('testfiles/test.odt', 'testfiles/rechng.odt') - >>> t = Transformer ( - ... o.mimetype - ... , get_meta (o.mimetype) - ... , Transforms.Concatenate (*tf) - ... , renumber_all (o.mimetype) - ... , set_meta (o.mimetype) - ... , Transforms.Fix_OOo_Tag () - ... ) - >>> t.transform (o) - >>> for i in meta_counts : - ... print i, repr (t [':'.join (('Set_Attribute', i))]) - character-count '1131' - image-count '0' - object-count '0' - page-count '3' - paragraph-count '80' - table-count '2' - word-count '159' - >>> o.close () - >>> ov = sio.getvalue () - >>> f = open ("testout3.odt", "wb") - >>> f.write (ov) - >>> f.close () - >>> o = OOoPy (infile = sio) - >>> m = o.mimetype - >>> c = o.read ('content.xml') - >>> s = o.read ('styles.xml') - >>> for n in c.findall ('./*/*') : - ... name = n.get (OOo_Tag ('style', 'name', m)) - ... if name : - ... parent = n.get (OOo_Tag ('style', 'parent-style-name', m)) - ... print '"%s", "%s"' % (name, parent) - "Tahoma1", "None" - "Bitstream Vera Sans", "None" - "Tahoma", "None" - "Nimbus Roman No9 L", "None" - "Courier New", "None" - "Arial Black", "None" - "New Century Schoolbook", "None" - "Times New Roman", "None" - "Arial", "None" - "Helvetica", "None" - "Table1", "None" - "Table1.A", "None" - "Table1.A1", "None" - "Table1.E1", "None" - "Table1.A2", "None" - "Table1.E2", "None" - "P1", "None" - "fr1", "Frame" - "fr2", "Frame" - "Sect1", "None" - "gr1", "None" - "P2", "Standard" - "Standard_Concat", "None" - "Concat_P1", "Concat_Frame_20_contents" - "Concat_P2", "Concat_Frame_20_contents" - "P3", "Concat_Frame_20_contents" - "P4", "Concat_Standard" - "P5", "Concat_Standard" - "P6", "Concat_Frame_20_contents" - "P7", "Concat_Frame_20_contents" - "P8", "Concat_Frame_20_contents" - "P9", "Concat_Frame_20_contents" - "P10", "Concat_Frame_20_contents" - "P11", "Concat_Frame_20_contents" - "P12", "Concat_Frame_20_contents" - "P14", "Concat_Standard" - "P15", "Concat_Standard" - "P16", "Concat_Standard" - "P17", "Concat_Standard" - "P18", "Concat_Standard" - "P19", "Concat_Standard" - "P20", "Concat_Standard" - "P21", "Concat_Standard" - "P22", "Concat_Standard" - "P23", "Concat_Standard" - "Concat_fr1", "Frame" - "Concat_fr2", "Frame" - "fr3", "Frame" - "fr4", "Frame" - "fr5", "Frame" - "fr6", "Frame" - "Concat_gr1", "None" - "N0", "None" - "N2", "None" - "P14_Concat", "Concat_Standard" - >>> for n in c.findall ('.//' + OOo_Tag ('text', 'variable-decl', m)) : - ... name = n.get (OOo_Tag ('text', 'name', m)) - ... print name - salutation - firstname - lastname - street - country - postalcode - city - date - invoice.invoice_no - invoice.abo.aboprice.abotype.description - address.salutation - address.title - address.firstname - address.lastname - address.function - address.street - address.country - address.postalcode - address.city - invoice.subscriber.salutation - invoice.subscriber.title - invoice.subscriber.firstname - invoice.subscriber.lastname - invoice.subscriber.function - invoice.subscriber.street - invoice.subscriber.country - invoice.subscriber.postalcode - invoice.subscriber.city - invoice.period_start - invoice.period_end - invoice.currency.name - invoice.amount - invoice.subscriber.initial - >>> for n in c.findall ('.//' + OOo_Tag ('text', 'sequence-decl', m)) : - ... name = n.get (OOo_Tag ('text', 'name', m)) - ... print name - Illustration - Table - Text - Drawing - >>> for n in c.findall ('.//' + OOo_Tag ('text', 'p', m)) : - ... name = n.get (OOo_Tag ('text', 'style-name', m)) - ... if not name or name.startswith ('Concat') : - ... print ':'.join(split_tag (n.tag)), ">%s<" % name - text:p >None< - text:p >None< - text:p >Concat_P1< - text:p >Concat_P1< - text:p >Concat_P2< - text:p >Concat_P2< - text:p >Concat_P2< - text:p >Concat_P2< - text:p >Concat_P2< - text:p >Concat_P2< - text:p >Concat_P2< - text:p >Concat_P2< - text:p >Concat_P2< - text:p >Concat_P2< - text:p >Concat_Frame_20_contents< - text:p >None< - text:p >None< - text:p >None< - >>> for n in c.findall ('.//' + OOo_Tag ('draw', 'frame', m)) : - ... attrs = 'name', 'style-name', 'z-index' - ... attrs = [n.get (OOo_Tag ('draw', i, m)) for i in attrs] - ... attrs.append (n.get (OOo_Tag ('text', 'anchor-page-number', m))) - ... print attrs - ['Frame1', 'fr1', '0', '1'] - ['Frame2', 'fr1', '3', '2'] - ['Frame3', 'Concat_fr1', '6', '3'] - ['Frame4', 'Concat_fr2', '7', '3'] - ['Frame5', 'fr3', '8', '3'] - ['Frame6', 'Concat_fr1', '9', '3'] - ['Frame7', 'fr4', '10', '3'] - ['Frame8', 'fr4', '11', '3'] - ['Frame9', 'fr4', '12', '3'] - ['Frame10', 'fr4', '13', '3'] - ['Frame11', 'fr4', '14', '3'] - ['Frame12', 'fr4', '15', '3'] - ['Frame13', 'fr5', '16', '3'] - ['Frame14', 'fr4', '18', '3'] - ['Frame15', 'fr4', '19', '3'] - ['Frame16', 'fr4', '20', '3'] - ['Frame17', 'fr6', '17', '3'] - ['Frame18', 'fr4', '23', '3'] - ['Frame19', 'fr2', '2', None] - ['Frame20', 'fr2', '5', None] - >>> for n in c.findall ('.//' + OOo_Tag ('text', 'section', m)) : - ... attrs = 'name', 'style-name' - ... attrs = [n.get (OOo_Tag ('text', i, m)) for i in attrs] - ... print attrs - ['Section1', 'Sect1'] - ['Section2', 'Sect1'] - ['Section3', 'Sect1'] - ['Section4', 'Sect1'] - ['Section5', 'Sect1'] - ['Section6', 'Sect1'] - ['Section7', 'Sect1'] - ['Section8', 'Sect1'] - ['Section9', 'Sect1'] - ['Section10', 'Sect1'] - ['Section11', 'Sect1'] - ['Section12', 'Sect1'] - ['Section13', 'Sect1'] - ['Section14', 'Sect1'] - ['Section15', 'Sect1'] - ['Section16', 'Sect1'] - ['Section17', 'Sect1'] - ['Section18', 'Sect1'] - ['Section19', 'Sect1'] - ['Section20', 'Sect1'] - ['Section21', 'Sect1'] - ['Section22', 'Sect1'] - ['Section23', 'Sect1'] - ['Section24', 'Sect1'] - ['Section25', 'Sect1'] - ['Section26', 'Sect1'] - ['Section27', 'Sect1'] - ['Section28', 'Sect1'] - ['Section29', 'Sect1'] - ['Section30', 'Sect1'] - ['Section31', 'Sect1'] - ['Section32', 'Sect1'] - ['Section33', 'Sect1'] - >>> for n in c.findall ('.//' + OOo_Tag ('draw', 'rect', m)) : - ... attrs = 'style-name', 'text-style-name', 'z-index' - ... attrs = [n.get (OOo_Tag ('draw', i, m)) for i in attrs] - ... attrs.append (n.get (OOo_Tag ('text', 'anchor-page-number', m))) - ... print attrs - ['gr1', 'P1', '1', '1'] - ['gr1', 'P1', '4', '2'] - >>> for n in c.findall ('.//' + OOo_Tag ('draw', 'line', m)) : - ... attrs = 'style-name', 'text-style-name', 'z-index' - ... attrs = [n.get (OOo_Tag ('draw', i, m)) for i in attrs] - ... print attrs - ['Concat_gr1', 'P1', '24'] - ['Concat_gr1', 'P1', '22'] - ['Concat_gr1', 'P1', '21'] - >>> for n in s.findall ('.//' + OOo_Tag ('style', 'style', m)) : - ... if n.get (OOo_Tag ('style', 'name', m)).startswith ('Co') : - ... attrs = 'name', 'display-name', 'class', 'family' - ... attrs = [n.get (OOo_Tag ('style', i, m)) for i in attrs] - ... print attrs - ... props = n.find ('./' + OOo_Tag ('style', 'properties', m)) - ... if props is not None and len (props) : - ... props [0].tag - ['Concat_Standard', None, 'text', 'paragraph'] - ['Concat_Text_20_body', 'Concat Text body', 'text', 'paragraph'] - ['Concat_List', None, 'list', 'paragraph'] - ['Concat_Caption', None, 'extra', 'paragraph'] - ['Concat_Frame_20_contents', 'Concat Frame contents', 'extra', 'paragraph'] - ['Concat_Index', None, 'index', 'paragraph'] - >>> for n in c.findall ('.//*') : - ... zidx = n.get (OOo_Tag ('draw', 'z-index', m)) - ... if zidx : - ... print ':'.join(split_tag (n.tag)), zidx - draw:frame 0 - draw:rect 1 - draw:frame 3 - draw:rect 4 - draw:frame 6 - draw:frame 7 - draw:frame 8 - draw:frame 9 - draw:frame 10 - draw:frame 11 - draw:frame 12 - draw:frame 13 - draw:frame 14 - draw:frame 15 - draw:frame 16 - draw:frame 18 - draw:frame 19 - draw:frame 20 - draw:frame 17 - draw:frame 23 - draw:line 24 - draw:frame 2 - draw:frame 5 - draw:line 22 - draw:line 21 - >>> from os import system - >>> system ('python bin/ooo_fieldreplace -i testfiles/test.odt ' - ... '-o testout.odt ' - ... 'salutation=Frau firstname=Erika lastname=Musterfrau ' - ... 'country=D postalcode=00815 city=Niemandsdorf ' - ... 'street="Beispielstrasse 42"') - 0 - >>> o = OOoPy (infile = 'testout.odt') - >>> c = o.read ('content.xml') - >>> m = o.mimetype - >>> body = c.find (OOo_Tag ('office', 'body', mimetype = m)) - >>> vset = './/' + OOo_Tag ('text', 'variable-set', mimetype = m) - >>> for node in body.findall (vset) : - ... name = node.get (OOo_Tag ('text', 'name', m)) - ... print name, ':', node.text - salutation : Frau - firstname : Erika - lastname : Musterfrau - street : Beispielstrasse 42 - country : D - postalcode : 00815 - city : Niemandsdorf - salutation : Frau - firstname : Erika - lastname : Musterfrau - street : Beispielstrasse 42 - country : D - postalcode : 00815 - city : Niemandsdorf - >>> o.close () - >>> system ("bin/ooo_mailmerge -o testout.odt -d'|' " - ... "testfiles/carta.odt testfiles/x.csv") - 0 - >>> o = OOoPy (infile = 'testout.odt') - >>> m = o.mimetype - >>> c = o.read ('content.xml') - >>> body = c.find (OOo_Tag ('office', 'body', mimetype = m)) - >>> vset = './/' + OOo_Tag ('text', 'variable-set', mimetype = m) - >>> for node in body.findall (vset) : - ... name = node.get (OOo_Tag ('text', 'name', m)) - ... print name, ':', node.text - Spett : Spettabile - contraente : First person - indirizzo : street? 1 - Spett : Egregio - contraente : Second Person - indirizzo : street? 2 - tipo : racc. A.C. - luogo : Varese - oggetto : Saluti - tipo : Raccomandata - luogo : Gavirate - oggetto : Ossequi - >>> o.close () - >>> infile = 'testfiles/testenum.odt' - >>> o = OOoPy (infile = infile, outfile = 'xyzzy.odt') - >>> t = Transformer ( - ... o.mimetype - ... , get_meta (o.mimetype) - ... , Transforms.Addpagebreak_Style () - ... , Transforms.Mailmerge - ... ( iterator = - ... ( dict (firstname = 'Erika', lastname = 'Nobody') - ... , dict (firstname = 'Eric', lastname = 'Wizard') - ... , cb - ... ) - ... ) - ... , renumber_all (o.mimetype) - ... , set_meta (o.mimetype) - ... , Transforms.Fix_OOo_Tag () - ... ) - >>> t.transform (o) - >>> o.close () - >>> o = OOoPy (infile = 'xyzzy.odt') - >>> m = o.mimetype - >>> c = o.read ('content.xml') - >>> body = c.find (OOo_Tag ('office', 'body', mimetype = m)) - >>> textlist = './/' + OOo_Tag ('text', 'list', m) - >>> for node in body.findall (textlist) : - ... id = node.get (OOo_Tag ('xml', 'id', m)) - ... print 'xml:id', ':', id - xml:id : list1 - xml:id : list2 - xml:id : list3 - >>> o = OOoPy (infile = 'testfiles/page1.odt', outfile = 'xyzzy.odt') - >>> m = o.mimetype - >>> t = Transformer ( - ... o.mimetype - ... , get_meta (o.mimetype) - ... , Transforms.Concatenate ('testfiles/page2.odt') - ... , renumber_all (o.mimetype) - ... , set_meta (o.mimetype) - ... , Transforms.Fix_OOo_Tag () - ... , Transforms.Manifest_Append () - ... ) - >>> t.transform (o) - >>> o.close () - >>> o = OOoPy (infile = 'xyzzy.odt') - >>> c = o.read ('META-INF/manifest.xml') - >>> for node in c.getroot () : - ... fe = node.get (OOo_Tag ('manifest', 'full-path', m)) - ... print fe - / - Pictures/10000000000000C80000007941B1A419.jpg - Pictures/10000000000000DC000000B02E191635.jpg - Pictures/10000000000000DC000000A337377AAA.jpg - meta.xml - settings.xml - content.xml - Thumbnails/thumbnail.png - layout-cache - manifest.rdf - Configurations2/accelerator/current.xml - Configurations2/ - styles.xml - >>> for f in o.izip.infolist () : - ... print f.filename - mimetype - settings.xml - META-INF/manifest.xml - content.xml - meta.xml - styles.xml - Pictures/10000000000000C80000007941B1A419.jpg - Pictures/10000000000000DC000000B02E191635.jpg - Pictures/10000000000000DC000000A337377AAA.jpg - Thumbnails/thumbnail.png - layout-cache - manifest.rdf - Configurations2/images/Bitmaps/ - Configurations2/accelerator/current.xml - >>> sio = StringIO () - >>> o = OOoPy (infile = 'testfiles/tbl_first.odt', outfile = sio) - >>> m = o.mimetype - >>> t = Transformer ( - ... o.mimetype - ... , get_meta (o.mimetype) - ... , Transforms.Concatenate ('testfiles/tbl_second.odt') - ... , renumber_all (o.mimetype) - ... , set_meta (o.mimetype) - ... , Transforms.Fix_OOo_Tag () - ... , Transforms.Manifest_Append () - ... ) - >>> t.transform (o) - >>> o.close () - >>> o = OOoPy (infile = sio) - >>> c = o.read ('content.xml') - >>> body = c.find (OOo_Tag ('office', 'body', mimetype = m)) - >>> tbls = './/' + OOo_Tag ('table', 'table', mimetype = m) - >>> for table in body.findall (tbls) : - ... name = table.get (OOo_Tag ('table', 'style-name', mimetype = m)) - ... if name : - ... print name - ... for t in table.findall ('.//') : - ... name = t.get (OOo_Tag ('table', 'style-name', mimetype = m)) - ... if name : - ... print name - Tabella1 - Tabella1.A - Tabella1.A1 - Tabella1.B1 - Tabella1.A2 - Tabella1.B2 - Tabella1 - Tabella1.A - Tabella1.A1 - Tabella1.B1 - Tabella1.A2 - Tabella1.B2 + Class for applying a set of transforms to a given ooopy object. + The transforms are applied to the specified file in priority + order. When applying transforms we have a mechanism for + communication of transforms. We give the transformer to the + individual transforms as a parameter. The transforms may use the + transformer like a dictionary for storing values and retrieving + values left by previous transforms. + As a naming convention each transform should use its class name + as a prefix for storing values in the dictionary. """ - def __init__ (self, mimetype, *tf) : + def __init__(self, mimetype, *tf): assert (mimetype in mimetypes) - self.mimetype = mimetype - self.transforms = {} - for t in tf : - self.insert (t) - self.dictionary = {} - self.has_key = self.dictionary.has_key + self.mimetype = mimetype + self.transforms = {} + for t in tf: + self.insert(t) + self.dictionary = {} self.__contains__ = self.has_key # 2-tuples of filename, content - self.appendfiles = [] - # end def __init__ + self.appendfiles = [] - def insert (self, transform) : + def has_key(self, key): + return key in self.dictionary.keys() + + def insert(self, transform): """Insert a new transform""" t = transform - if t.prio not in self.transforms : - self.transforms [t.prio] = [] - self.transforms [t.prio].append (t) - t.register (self) - # end def append + if t.prio not in self.transforms: + self.transforms[t.prio] = [] + self.transforms[t.prio].append(t) + t.register(self) - def transform (self, ooopy) : + def transform(self, ooopy): """ - Apply all the transforms in priority order. - Priority order is global over all transforms. + Apply all the transforms in priority order. + Priority order is global over all transforms. """ self.trees = {} - for f in files : - self.trees [f] = ooopy.read (f) - #self.dictionary = {} # clear dict when transforming another ooopy - prios = self.transforms.keys () - prios.sort () - for p in prios : - for t in self.transforms [p] : - t.apply_all (self.trees) - for e in self.trees.itervalues () : - e.write () - for fname, fcontent in self.appendfiles : - e.ooopy.append_file (fname, fcontent) - # end def transform - - def __getitem__ (self, key) : - return self.dictionary [key] - # end def __getitem__ - - def __setitem__ (self, key, value) : - self.dictionary [key] = value - # end def __setitem__ -# end class Transformer + for f in files: + self.trees[f] = ooopy.read(f) + # self.dictionary = {} # clear dict when transforming another ooopy + prios = list(self.transforms.keys()) + prios.sort() + for p in prios: + for t in self.transforms[p]: + t.apply_all(self.trees) + for e in self.trees.values(): + e.write() + for fname, fcontent in self.appendfiles: + e.ooopy.append_file(fname, fcontent) + + def __getitem__(self, key): + return self.dictionary[key] + + def __setitem__(self, key, value): + self.dictionary[key] = value diff --git a/ooopy/Transforms.py b/ooopy/Transforms.py index 50a6c0db8..37e7179e8 100644 --- a/ooopy/Transforms.py +++ b/ooopy/Transforms.py @@ -1,5 +1,5 @@ -#!/usr/bin/env python -# -*- coding: iso-8859-1 -*- +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- # Copyright (C) 2005-14 Dr. Ralf Schlatterbeck Open Source Consulting. # Reichergasse 131, A-3411 Weidling. # Web: http://www.runtux.com Email: office@runtux.com @@ -21,114 +21,102 @@ # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. # **************************************************************************** -from __future__ import absolute_import - import time import re -try : - from xml.etree.ElementTree import dump, SubElement, Element, tostring -except ImportError : - from elementtree.ElementTree import dump, SubElement, Element, tostring -from copy import deepcopy -from ooopy.OOoPy import OOoPy, autosuper -from ooopy.Transformer import files, split_tag, OOo_Tag, Transform -from ooopy.Transformer import mimetypes, namespace_by_name -from ooopy.Version import VERSION +from xml.etree.ElementTree import SubElement, Element # , dump , tostring +from copy import deepcopy +from ooopy.OOoPy import OOoPy, autosuper +from ooopy.Transformer import split_tag, OOo_Tag, Transform , mimetypes, \ + namespace_by_name # , # files +# from ooopy.Version import VERSION # counts in meta.xml -meta_counts = \ - ( 'character-count', 'image-count', 'object-count', 'page-count' - , 'paragraph-count', 'table-count', 'word-count' - ) - -class Access_Attribute (autosuper) : - """ For performance reasons we do not specify a separate transform - for each attribute-read or -change operation. Instead we define - all the attribute accesses we want to perform as objects that - follow the attribute access api and apply them all using an - Attribute_Access in one go. +meta_counts = ('character-count', 'image-count', 'object-count', 'page-count', + 'paragraph-count', 'table-count', 'word-count') + + +class Access_Attribute(autosuper): + """ + For performance reasons we do not specify a separate transform + for each attribute-read or -change operation. Instead we define + all the attribute accesses we want to perform as objects that + follow the attribute access api and apply them all using an + Attribute_Access in one go. """ - def __init__ (self, key = None, prefix = None, ** kw) : - self.__super.__init__ (key = key, prefix = prefix, **kw) + def __init__(self, key=None, prefix=None, **kw): + self.__super.__init__(key=key, prefix=prefix, **kw) self.key = key - if key : - if not prefix : - prefix = self.__class__.__name__ - self.key = ':'.join ((prefix, key)) - # end def __init__ + if key: + if not prefix: + prefix = self.__class__.__name__ + self.key = ':'.join((prefix, key)) - def register (self, transformer) : + def register (self, transformer): self.transformer = transformer - # end def register - def use_value (self, oldval = None) : - """ Can change the given value by returning the new value. If - returning None or oldval the attribute stays unchanged. + def use_value (self, oldval=None): """ - raise NotImplementedError, "use_value must be defined in derived class" - # end def use_value + Can change the given value by returning the new value. If + returning None or oldval the attribute stays unchanged. + """ + raise NotImplementedError("use_value must be defined in derived class") -# end class Access_Attribute -class Get_Attribute (Access_Attribute) : - """ An example of not changing an attribute but only storing the - value in the transformer +class Get_Attribute(Access_Attribute): + """ + An example of not changing an attribute but only storing the + value in the transformer """ - def __init__ (self, tag, attr, key, transform = None, ** kw) : - self.__super.__init__ (key = key, **kw) - self.tag = tag - self.attribute = attr - self.transform = transform - # end def __init__ + def __init__(self, tag, attr, key, transform=None, ** kw): + self.__super.__init__(key=key, **kw) + self.tag = tag + self.attribute = attr + self.transform = transform - def use_value (self, oldval = None) : - self.transformer [self.key] = oldval + def use_value(self, oldval=None): + self.transformer[self.key] = oldval return None - # end def use_value -# end def Get_Attribute -class Get_Max (Access_Attribute) : +class Get_Max(Access_Attribute): """ Get the maximum value of an attribute """ - def __init__ (self, tag, attr, key, transform = None, ** kw) : - self.__super.__init__ (key = key, **kw) - self.tag = tag - self.attribute = attr - self.transform = transform - # end def __init__ - - def register (self, transformer) : - self.__super.register (transformer) - self.transformer [self.key] = -1 - # end def register - - def use_value (self, oldval = None) : - if self.transformer [self.key] < oldval : - self.transformer [self.key] = oldval + def __init__(self, tag, attr, key, transform=None, ** kw): + self.__super.__init__(key=key, **kw) + self.tag = tag + self.attribute = attr + self.transform = transform + + def register(self, transformer): + self.__super.register(transformer) + self.transformer[self.key] = -1 + + def use_value(self, oldval=None): + if oldval: + oldval = int(oldval) + if (self.transformer[self.key] or 0) < (oldval or 0): + self.transformer[self.key] = oldval return None - # end def use_value - -# end def Get_Max -class Renumber (Access_Attribute) : - """ Specifies a renumbering transform. OOo has a 'name' attribute - for several different tags, e.g., tables, frames, sections etc. - These names must be unique in the whole document. OOo itself - solves this by appending a unique number to a basename for each - element, e.g., sections are named 'Section1', 'Section2', ... - Renumber transforms can be applied to correct the numbering - after operations that destroy the unique numbering, e.g., after - a mailmerge where the same document is repeatedly appended. - The force parameter specifies if the new renumbered name should - be inserted even if the attribute in question does not exist. +class Renumber (Access_Attribute): + """ + Specifies a renumbering transform. OOo has a 'name' attribute + for several different tags, e.g., tables, frames, sections etc. + These names must be unique in the whole document. OOo itself + solves this by appending a unique number to a basename for each + element, e.g., sections are named 'Section1', 'Section2', ... + Renumber transforms can be applied to correct the numbering + after operations that destroy the unique numbering, e.g., after + a mailmerge where the same document is repeatedly appended. + + The force parameter specifies if the new renumbered name should + be inserted even if the attribute in question does not exist. """ - def __init__ \ - (self, tag, name = None, attr = None, start = 1, force = False) : + def __init__(self, tag, name=None, attr=None, start=1, force=False): self.__super.__init__ () tag_ns, tag_name = split_tag (tag) self.tag_ns = tag_ns @@ -178,29 +166,26 @@ class Set_Attribute (Access_Attribute) : self.transform = transform self.value = value self.oldvalue = oldvalue - # end def __init__ - def use_value (self, oldval) : - if oldval is None : + def use_value(self, oldval): + if oldval is None: return None - if self.oldvalue and oldval != self.oldvalue : + if self.oldvalue and oldval != self.oldvalue: return None - if self.key and self.transformer.has_key (self.key) : - return str (self.transformer [self.key]) + if self.key and self.transformer.has_key(self.key): + return str(self.transformer[self.key]) return self.value - # end def use_value -# end class Set_Attribute def set_attributes_from_dict (tag, attr, d) : """ Convenience function: iterate over a dict and return a list of Set_Attribute objects specifying replacement of attributes in the dictionary """ - return [Set_Attribute (tag, attr, oldvalue = k, value = v) - for k,v in d.iteritems () - ] -# end def set_attributes_from_dict + return [ + Set_Attribute(tag, attr, oldvalue=k, value=v) + for k, v in d.items() + ] class Reanchor (Access_Attribute) : """ @@ -327,41 +312,37 @@ class Manifest_Append (Transform) : # meta.xml transforms # + class Editinfo (Transform) : """ - This is an example of modifying OOo meta info (edit information, - author, etc). We set some of the items (program that generated - the OOo file, modification time, number of edit cyles and overall - edit duration). It's easy to subclass this transform and replace - the "replace" variable (pun intended) in the derived class. + This is an example of modifying OOo meta info (edit information, + author, etc). We set some of the items (program that generated + the OOo file, modification time, number of edit cyles and overall + edit duration). It's easy to subclass this transform and replace + the "replace" variable (pun intended) in the derived class. """ filename = 'meta.xml' - prio = 20 - repl = \ - { ('meta', 'generator') : 'OOoPy field replacement' - , ('dc', 'date') : time.strftime ('%Y-%m-%dT%H:%M:%S') - , ('meta', 'editing-cycles') : '0' - , ('meta', 'editing-duration') : 'PT0M0S' - } - replace = {} + prio = 20 + repl = { + ('meta', 'generator'): 'OOoPy field replacement', + ('dc', 'date'): time.strftime ('%Y-%m-%dT%H:%M:%S'), + ('meta', 'editing-cycles'): '0', + ('meta', 'editing-duration'): 'PT0M0S' + } + replace = {} # iterate over all mimetypes, so this works for all known mimetypes # of OOo documents. - for m in mimetypes : - for params, value in repl.iteritems () : - replace [OOo_Tag (mimetype = m, *params)] = value + for m in mimetypes: + for params, value in repl.items(): + replace [OOo_Tag (mimetype=m, *params)] = value - def apply (self, root) : - for node in root.findall (self.oootag ('office', 'meta') + '/*') : - if self.replace.has_key (node.tag) : - node.text = self.replace [node.tag] - # end def apply -# end class Editinfo + def apply(self, root) : + for node in root.findall (self.oootag ('office', 'meta') + '/*'): + if node.tag in self.replace.keys(): + node.text = self.replace[node.tag] -# -# settings.xml transforms -# -class Autoupdate (Transform) : +class Autoupdate (Transform): """ This is an example of modifying OOo settings. We set some of the AutoUpdate configuration items in OOo to true. We also specify @@ -718,28 +699,31 @@ class Mailmerge (_Body_Concat) : # end def apply # end class Mailmerge + def tree_serialise (element, prefix = '', mimetype = mimetypes [1]) : - """ Serialise a style-element of an OOo document (e.g., a - style:font-decl, style:default-style, etc declaration). - We remove the name of the style and return something that is a - representation of the style element which can be used as a - dictionary key. - The serialisation format is a tuple containing the tag as the - first item, the attributes (as key,value pairs returned by - items()) as the second item and the following items are - serialisations of children. """ - attr = dict (element.attrib) - stylename = OOo_Tag ('style', 'name', mimetype) - if stylename in attr : del attr [stylename] - attr = attr.items () - attr.sort () - attr = tuple (attr) + Serialise a style-element of an OOo document (e.g., a + style:font-decl, style:default-style, etc declaration). + We remove the name of the style and return something that is a + representation of the style element which can be used as a + dictionary key. + The serialisation format is a tuple containing the tag as the + first item, the attributes (as key,value pairs returned by + items()) as the second item and the following items are + serialisations of children. + """ + attr = dict(element.attrib) + stylename = OOo_Tag('style', 'name', mimetype) + if stylename in attr: + del attr[stylename] + attr = list(attr.items()) + attr.sort() + attr = tuple(attr) serial = [prefix + element.tag, attr] - for e in element : - serial.append (tree_serialise (e, prefix, mimetype)) - return tuple (serial) -# end def tree_serialise + for e in element: + serial.append(tree_serialise (e, prefix, mimetype)) + return tuple(serial) + class Concatenate (_Body_Concat) : """ @@ -794,31 +778,31 @@ class Concatenate (_Body_Concat) : assert (self.docs [-1].mimetype == self.docs [0].mimetype) # end def __init__ - def apply_all (self, trees) : - assert (self.docs [0].mimetype == self.transformer.mimetype) + def apply_all(self, trees): + assert (self.docs[0].mimetype == self.transformer.mimetype) self.serialised = {} self.stylenames = {} - self.namemaps = [{}] + self.namemaps = [{}] self.tab_depend = {} - for s in self.ref_attrs.itervalues () : - self.namemaps [0][s] = {} + for s in self.ref_attrs.values(): + self.namemaps[0][s] = {} self.body_decls = {} - for s in self.body_decl_sections : - self.body_decls [s] = {} - self.trees = {} - for f in self.oofiles : - self.trees [f] = [trees [f].getroot ()] - self.sections = {} + for s in self.body_decl_sections: + self.body_decls[s] = {} + self.trees = {} + for f in self.oofiles: + self.trees[f] = [trees[f].getroot()] + self.sections = {} for f in self.stylefiles : - self.sections [f] = {} + self.sections[f] = {} for node in self.trees [f][0] : self.sections [f][node.tag] = node - for d in self.docs : - self.namemaps.append ({}) - for s in self.ref_attrs.itervalues () : - self.namemaps [-1][s] = {} - for f in self.oofiles : - self.trees [f].append (d.read (f).getroot ()) + for d in self.docs: + self.namemaps.append({}) + for s in self.ref_attrs.values(): + self.namemaps[-1][s] = {} + for f in self.oofiles: + self.trees[f].append(d.read(f).getroot()) # append a pagebreak style, will be optimized away if duplicate pbs = Addpagebreak_Style (transformer = self.transformer) pbs.apply (self.trees ['content.xml'][0]) @@ -879,17 +863,12 @@ class Concatenate (_Body_Concat) : if max < pos : max = pos self.insert_tabs (sub, max) - # end def apply_tab_correction - - def _attr_rename (self, idx) : - r = sum \ - ( [ set_attributes_from_dict (None, k, self.namemaps [idx][v]) - for k,v in self.ref_attrs.iteritems () - ] - , [] - ) - return Attribute_Access (r, transformer = self.transformer) - # end def _attr_rename + + def _attr_rename(self, idx): + r = sum( + [set_attributes_from_dict(None, k, self.namemaps [idx][v]) + for k, v in self.ref_attrs.items()], []) + return Attribute_Access(r, transformer=self.transformer) def body_concat (self) : count = {} @@ -974,30 +953,28 @@ class Concatenate (_Body_Concat) : ) # end def insert_tabs - def merge_defaultstyle (self, default_style, node) : + def merge_defaultstyle(self, default_style, node): assert default_style is not None assert node is not None proppath = './' + self.properties_tag defprops = default_style.find (proppath) - props = node.find (proppath) - sn = self.oootag ('style', 'name') - if props is None : - props = Element (self.properties_tag) - for k, v in defprops.attrib.iteritems () : - if self.default_properties.get (k) != v and not props.get (k) : - if k == self.oootag ('style', 'tab-stop-distance') : + props = node.find(proppath) + sn = self.oootag('style', 'name') + if props is None: + props = Element(self.properties_tag) + for k, v in defprops.attrib.items(): + if self.default_properties.get(k) != v and not props.get(k): + if k == self.oootag('style', 'tab-stop-distance'): self.tab_correct = v - self.tab_depend = {node.get (sn) : 1} - stps = SubElement \ - (props, self.oootag ('style', 'tab-stops')) - self.insert_tabs (stps) + self.tab_depend = {node.get(sn): 1} + stps = SubElement(props, self.oootag('style', 'tab-stops')) + self.insert_tabs(stps) else : - props.set (k,v) - if len (props) or props.attrib : - node.append (props) - # end def merge_defaultstyle + props.set(k, v) + if len(props) or props.attrib: + node.append(props) - def _newname (self, key, oldname) : + def _newname(self, key, oldname): stylenum = 0 if (key, oldname) not in self.stylenames : self.stylenames [(key, oldname)] = 1 @@ -1212,17 +1189,18 @@ def renumber_all (mimetype) : # the info retrieved from the OOo document: We use the attribute name in # the meta-information to store (and later retrieve) the information. -def get_meta (mimetype) : - """ Factory function for Attribute_Access to get all interesting - meta-data + +def get_meta(mimetype): + """ + Factory function for Attribute_Access to get all interesting meta-data """ get_attr = [] for attr in meta_counts : - a = OOo_Tag ('meta', attr, mimetype) - t = OOo_Tag ('meta', 'document-statistic', mimetype) + a = OOo_Tag('meta', attr, mimetype) + t = OOo_Tag('meta', 'document-statistic', mimetype) get_attr.append (Get_Attribute (t, a, attr)) - return Attribute_Access (get_attr, prio = 20, filename = 'meta.xml') -# end def get_meta + return Attribute_Access (get_attr, prio= 20, filename='meta.xml') + def set_meta (mimetype) : """ Factory function for Attribute_Access to set all interesting diff --git a/ooopy/Version.py b/ooopy/Version.py index 495ca242a..3c6f39aef 100644 --- a/ooopy/Version.py +++ b/ooopy/Version.py @@ -1 +1 @@ -VERSION="1.11" +VERSION = "1.11-python3" |