diff options
author | Étienne Loks <etienne.loks@iggdrasil.net> | 2019-05-01 13:51:01 +0200 |
---|---|---|
committer | Étienne Loks <etienne.loks@iggdrasil.net> | 2019-06-17 13:21:28 +0200 |
commit | a18087eff29bb316af47bb5fe53a59c43edc57f0 (patch) | |
tree | d6452080600bd7fc377321d4dab58a7fc4333cb2 /ooopy/OOoPy.py | |
parent | ef5d92489c4516db6cd0611c4e5671db62837f63 (diff) | |
download | Ishtar-a18087eff29bb316af47bb5fe53a59c43edc57f0.tar.bz2 Ishtar-a18087eff29bb316af47bb5fe53a59c43edc57f0.zip |
Embed ooopy (last version: 1.11)
Diffstat (limited to 'ooopy/OOoPy.py')
-rw-r--r-- | ooopy/OOoPy.py | 317 |
1 files changed, 317 insertions, 0 deletions
diff --git a/ooopy/OOoPy.py b/ooopy/OOoPy.py new file mode 100644 index 000000000..87e0b8110 --- /dev/null +++ b/ooopy/OOoPy.py @@ -0,0 +1,317 @@ +#!/usr/bin/env python +# -*- coding: iso-8859-1 -*- +# Copyright (C) 2005-14 Dr. Ralf Schlatterbeck Open Source Consulting. +# Reichergasse 131, A-3411 Weidling. +# Web: http://www.runtux.com Email: office@runtux.com +# All rights reserved +# **************************************************************************** +# +# This library is free software; you can redistribute it and/or modify +# it under the terms of the GNU Library General Public License as +# published by the Free Software Foundation; either version 2 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Library General Public License for more details. +# +# You should have received a copy of the GNU Library General Public +# License along with this program; if not, write to the Free Software +# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. +# **************************************************************************** + +from __future__ import absolute_import + +from zipfile import ZipFile, ZIP_DEFLATED, ZipInfo +try : + from StringIO import StringIO +except ImportError : + from io import StringIO +from datetime import datetime +try : + from xml.etree.ElementTree import ElementTree, fromstring, _namespace_map +except ImportError : + from elementtree.ElementTree import ElementTree, fromstring, _namespace_map +from tempfile import mkstemp +from ooopy.Version import VERSION +import os + +class _autosuper (type) : + def __init__ (cls, name, bases, dict) : + super (_autosuper, cls).__init__ (name, bases, dict) + setattr (cls, "_%s__super" % name, super (cls)) + # end def __init__ +# end class _autosuper + +class autosuper (object) : + __metaclass__ = _autosuper + def __init__ (self, *args, **kw) : + self.__super.__init__ () + # end def __init__ +# end class autosuper + +files = \ + [ 'content.xml' + , 'styles.xml' + , 'meta.xml' + , 'settings.xml' + , 'META-INF/manifest.xml' + ] + +mimetypes = \ + [ 'application/vnd.sun.xml.writer' + , 'application/vnd.oasis.opendocument.text' + ] +namespace_by_name = \ + { mimetypes [0] : + { 'chart' : "http://openoffice.org/2000/chart" + , 'config' : "http://openoffice.org/2001/config" + , 'dc' : "http://purl.org/dc/elements/1.1/" + , 'dr3d' : "http://openoffice.org/2000/dr3d" + , 'draw' : "http://openoffice.org/2000/drawing" + , 'fo' : "http://www.w3.org/1999/XSL/Format" + , 'form' : "http://openoffice.org/2000/form" + , 'math' : "http://www.w3.org/1998/Math/MathML" + , 'meta' : "http://openoffice.org/2000/meta" + , 'number' : "http://openoffice.org/2000/datastyle" + , 'office' : "http://openoffice.org/2000/office" + , 'script' : "http://openoffice.org/2000/script" + , 'style' : "http://openoffice.org/2000/style" + , 'svg' : "http://www.w3.org/2000/svg" + , 'table' : "http://openoffice.org/2000/table" + , 'text' : "http://openoffice.org/2000/text" + , 'xlink' : "http://www.w3.org/1999/xlink" + , 'manifest' : "http://openoffice.org/2001/manifest" + } + , mimetypes [1] : + { 'chart' : "urn:oasis:names:tc:opendocument:xmlns:chart:1.0" + , 'config' : "urn:oasis:names:tc:opendocument:xmlns:config:1.0" + , 'dc' : "http://purl.org/dc/elements/1.1/" + , 'dr3d' : "urn:oasis:names:tc:opendocument:xmlns:dr3d:1.0" + , 'draw' : "urn:oasis:names:tc:opendocument:xmlns:drawing:1.0" + , 'fo' : "urn:oasis:names:tc:opendocument:xmlns:" + "xsl-fo-compatible:1.0" + , 'form' : "urn:oasis:names:tc:opendocument:xmlns:form:1.0" + , 'math' : "http://www.w3.org/1998/Math/MathML" + , 'meta' : "urn:oasis:names:tc:opendocument:xmlns:meta:1.0" + , 'number' : "urn:oasis:names:tc:opendocument:xmlns:datastyle:1.0" + , 'office' : "urn:oasis:names:tc:opendocument:xmlns:office:1.0" + , 'officeooo': "http://openoffice.org/2009/office" + , 'script' : "urn:oasis:names:tc:opendocument:xmlns:script:1.0" + , 'style' : "urn:oasis:names:tc:opendocument:xmlns:style:1.0" + , 'svg' : "urn:oasis:names:tc:opendocument:xmlns:svg-compatible:1.0" + , 'table' : "urn:oasis:names:tc:opendocument:xmlns:table:1.0" + , 'text' : "urn:oasis:names:tc:opendocument:xmlns:text:1.0" + , 'xlink' : "http://www.w3.org/1999/xlink" + , 'manifest' : "urn:oasis:names:tc:opendocument:xmlns:manifest:1.0" + , 'tableooo' : "http://openoffice.org/2009/table" + , 'transformation' : "http://www.w3.org/2003/g/data-view#" + # OOo 1.X tags and some others: + , 'ooo' : "http://openoffice.org/2004/office" + , 'ooow' : "http://openoffice.org/2004/writer" + , 'oooc' : "http://openoffice.org/2004/calc" + , 'o_dom' : "http://www.w3.org/2001/xml-events" + , 'o_xforms' : "http://www.w3.org/2002/xforms" + , 'xs' : "http://www.w3.org/2001/XMLSchema" + , 'xsi' : "http://www.w3.org/2001/XMLSchema-instance" + # predefined xml namespace, see + # http://www.w3.org/TR/2006/REC-xml-names11-20060816/ + # "It MAY, but need not, be declared, and MUST NOT be undeclared + # or bound to any other namespace name." + , 'xml' : "http://www.w3.org/XML/1998/namespace" + } + } + +for mimetype in namespace_by_name.itervalues () : + for k, v in mimetype.iteritems () : + if v in _namespace_map : + assert (_namespace_map [v] == k) + _namespace_map [v] = k + +class OOoElementTree (autosuper) : + """ + An ElementTree for OOo document XML members. Behaves like the + orginal ElementTree (in fact it delegates almost everything to a + real instance of ElementTree) except for the write method, that + writes itself back to the OOo XML file in the OOo zip archive it + came from. + """ + def __init__ (self, ooopy, zname, root) : + self.ooopy = ooopy + self.zname = zname + self.tree = ElementTree (root) + # end def __init__ + + def write (self) : + self.ooopy.write (self.zname, self.tree) + # end def write + + def __getattr__ (self, name) : + """ + Delegate everything to our ElementTree attribute. + """ + if not name.startswith ('__') : + result = getattr (self.tree, name) + setattr (self, name, result) + return result + raise AttributeError (name) + # end def __getattr__ + +# end class OOoElementTree + +class OOoPy (autosuper) : + """ + Wrapper for OpenOffice.org zip files (all OOo documents are + really zip files internally). + + from ooopy.OOoPy import OOoPy + >>> o = OOoPy (infile = 'testfiles/test.sxw', outfile = 'out.sxw') + >>> o.mimetype + 'application/vnd.sun.xml.writer' + >>> for f in files : + ... e = o.read (f) + ... e.write () + ... + >>> o.close () + >>> o = OOoPy (infile = 'testfiles/test.odt', outfile = 'out2.odt') + >>> o.mimetype + 'application/vnd.oasis.opendocument.text' + >>> for f in files : + ... e = o.read (f) + ... e.write () + ... + >>> o.append_file ('Pictures/empty', '') + >>> o.close () + >>> o = OOoPy (infile = 'out2.odt') + >>> for f in o.izip.infolist () : + ... print f.filename, f.create_system, f.compress_type + mimetype 0 8 + content.xml 0 8 + styles.xml 0 8 + meta.xml 0 8 + settings.xml 0 8 + META-INF/manifest.xml 0 8 + Pictures/empty 0 8 + Configurations2/statusbar/ 0 0 + Configurations2/accelerator/current.xml 0 8 + Configurations2/floater/ 0 0 + Configurations2/popupmenu/ 0 0 + Configurations2/progressbar/ 0 0 + Configurations2/menubar/ 0 0 + Configurations2/toolbar/ 0 0 + Configurations2/images/Bitmaps/ 0 0 + Thumbnails/thumbnail.png 0 8 + """ + def __init__ \ + ( self + , infile = None + , outfile = None + , write_mode = 'w' + , mimetype = None + ) : + """ + Open an OOo document, if no outfile is given, we open the + file read-only. Otherwise the outfile has to be different + from the infile -- the python ZipFile can't deal with + read-write access. In case an outfile is given, we open it + in "w" mode as a zip file, unless write_mode is specified + (the only allowed case would be "a" for appending to an + existing file, see pythons ZipFile documentation for + details). If no infile is given, the user is responsible for + providing all necessary files in the resulting output file. + + It seems that OOo needs to have the mimetype as the first + archive member (at least with mimetype as the first member + it works, the order may not be arbitrary) to recognize a zip + archive as an OOo file. When copying from a given infile, we + use the same order of elements in the resulting output. When + creating new elements we make sure the mimetype is the first + in the resulting archive. + + Note that both, infile and outfile can either be filenames + or file-like objects (e.g. StringIO). + + The mimetype is automatically determined if an infile is + given. If only writing is desired, the mimetype should be + set. + """ + assert (infile != outfile) + self.izip = self.ozip = None + if infile : + self.izip = ZipFile (infile, 'r', ZIP_DEFLATED) + if outfile : + self.ozip = ZipFile (outfile, write_mode, ZIP_DEFLATED) + self.written = {} + if mimetype : + self.mimetype = mimetype + elif self.izip : + self.mimetype = self.izip.read ('mimetype') + # end def __init__ + + def read (self, zname) : + """ + return an OOoElementTree object for the given OOo document + archive member name. Currently an OOo document contains the + following XML files:: + + * content.xml: the text of the OOo document + * styles.xml: style definitions + * meta.xml: meta-information (author, last changed, ...) + * settings.xml: settings in OOo + * META-INF/manifest.xml: contents of the archive + + There is an additional file "mimetype" that always contains + the string "application/vnd.sun.xml.writer" for OOo 1.X files + and the string "application/vnd.oasis.opendocument.text" for + OOo 2.X files. + """ + assert (self.izip) + return OOoElementTree (self, zname, fromstring (self.izip.read (zname))) + # end def read + + def _write (self, zname, str) : + now = datetime.utcnow ().timetuple () + info = ZipInfo (zname, date_time = now) + info.create_system = 0 # pretend to be fat + info.compress_type = ZIP_DEFLATED + self.ozip.writestr (info, str) + self.written [zname] = 1 + # end def _write + + def write (self, zname, etree) : + assert (self.ozip) + # assure mimetype is the first member in new archive + if 'mimetype' not in self.written : + self._write ('mimetype', self.mimetype) + str = StringIO () + etree.write (str) + self._write (zname, str.getvalue ()) + # end def write + + def append_file (self, zname, str) : + """ Official interface to _write: Append a file to the end of + the archive. + """ + if zname not in self.written : + self._write (zname, str) + # end def append_file + + def close (self) : + """ + Close the zip files. According to documentation of zipfile in + the standard python lib, this has to be done to be sure + everything is written. We copy over the not-yet written files + from izip before closing ozip. + """ + if self.izip and self.ozip : + for f in self.izip.infolist () : + if f.filename not in self.written : + self.ozip.writestr (f, self.izip.read (f.filename)) + for i in self.izip, self.ozip : + if i : i.close () + self.izip = self.ozip = None + # end def close + + __del__ = close # auto-close on deletion of object +# end class OOoPy |