| 1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
 | #!/usr/bin/env python3
# -*- coding: utf-8 -*-
# Copyright (C) 2005-14 Dr. Ralf Schlatterbeck Open Source Consulting.
# Reichergasse 131, A-3411 Weidling.
# Web: http://www.runtux.com Email: office@runtux.com
# All rights reserved
# ****************************************************************************
#
# This library is free software; you can redistribute it and/or modify
# it under the terms of the GNU Library General Public License as
# published by the Free Software Foundation; either version 2 of the
# License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU Library General Public License for more details.
#
# You should have received a copy of the GNU Library General Public
# License along with this program; if not, write to the Free Software
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
# ****************************************************************************
from zipfile import ZipFile, ZIP_DEFLATED, ZipInfo
from io import BytesIO
from datetime import datetime
from xml.etree.ElementTree   import ElementTree, fromstring, _namespace_map
class _autosuper (type):
    def __init__(cls, name, bases, dict):
        super(_autosuper, cls).__init__(name, bases, dict)
        setattr(cls, "_%s__super" % name, super(cls))
class autosuper(metaclass=_autosuper):
    def __init__(self, *args, **kw):
        self.__super.__init__()
files = [
    'content.xml', 'styles.xml', 'meta.xml', 'settings.xml',
    'META-INF/manifest.xml'
]
mimetypes = ['application/vnd.sun.xml.writer',
             'application/vnd.oasis.opendocument.text']
namespace_by_name = {
    mimetypes [0]: {
        'chart': "http://openoffice.org/2000/chart",
        'config': "http://openoffice.org/2001/config",
        'dc': "http://purl.org/dc/elements/1.1/",
        'dr3d': "http://openoffice.org/2000/dr3d",
        'draw': "http://openoffice.org/2000/drawing",
        'fo': "http://www.w3.org/1999/XSL/Format",
        'form': "http://openoffice.org/2000/form",
        'math': "http://www.w3.org/1998/Math/MathML",
        'meta': "http://openoffice.org/2000/meta",
        'number': "http://openoffice.org/2000/datastyle",
        'office': "http://openoffice.org/2000/office",
        'script': "http://openoffice.org/2000/script",
        'style': "http://openoffice.org/2000/style",
        'svg': "http://www.w3.org/2000/svg",
        'table': "http://openoffice.org/2000/table",
        'text': "http://openoffice.org/2000/text",
        'xlink': "http://www.w3.org/1999/xlink",
        'manifest': "http://openoffice.org/2001/manifest"},
    mimetypes[1]: {
        'chart': "urn:oasis:names:tc:opendocument:xmlns:chart:1.0",
         'config': "urn:oasis:names:tc:opendocument:xmlns:config:1.0",
         'dc': "http://purl.org/dc/elements/1.1/",
         'dr3d': "urn:oasis:names:tc:opendocument:xmlns:dr3d:1.0",
         'draw': "urn:oasis:names:tc:opendocument:xmlns:drawing:1.0",
         'fo': "urn:oasis:names:tc:opendocument:xmlns:" "xsl-fo-compatible:1.0",
         'form': "urn:oasis:names:tc:opendocument:xmlns:form:1.0",
         'math': "http://www.w3.org/1998/Math/MathML",
         'meta': "urn:oasis:names:tc:opendocument:xmlns:meta:1.0",
         'number': "urn:oasis:names:tc:opendocument:xmlns:datastyle:1.0",
         'office': "urn:oasis:names:tc:opendocument:xmlns:office:1.0",
         'officeooo': "http://openoffice.org/2009/office",
         'script': "urn:oasis:names:tc:opendocument:xmlns:script:1.0",
         'style': "urn:oasis:names:tc:opendocument:xmlns:style:1.0",
         'svg': "urn:oasis:names:tc:opendocument:xmlns:svg-compatible:1.0",
         'table': "urn:oasis:names:tc:opendocument:xmlns:table:1.0",
         'text': "urn:oasis:names:tc:opendocument:xmlns:text:1.0",
         'xlink': "http://www.w3.org/1999/xlink",
         'manifest': "urn:oasis:names:tc:opendocument:xmlns:manifest:1.0",
         'tableooo': "http://openoffice.org/2009/table",
         'transformation': "http://www.w3.org/2003/g/data-view#",
         # OOo 1.X tags and some others:
         'ooo': "http://openoffice.org/2004/office",
         'ooow': "http://openoffice.org/2004/writer",
         'oooc': "http://openoffice.org/2004/calc",
         'o_dom': "http://www.w3.org/2001/xml-events",
         'o_xforms': "http://www.w3.org/2002/xforms",
         'xs': "http://www.w3.org/2001/XMLSchema",
         'xsi': "http://www.w3.org/2001/XMLSchema-instance",
         # predefined xml namespace, see
         # http://www.w3.org/TR/2006/REC-xml-names11-20060816/
         # "It MAY, but need not, be declared, and MUST NOT be undeclared
         # or bound to any other namespace name."
         'xml': "http://www.w3.org/XML/1998/namespace"
    }
}
for mimetype in namespace_by_name.values():
    for k, v in mimetype.items():
        if v in _namespace_map:
            assert _namespace_map[v] == k
        _namespace_map[v] = k
class OOoElementTree (autosuper) :
    """
    An ElementTree for OOo document XML members. Behaves like the
    orginal ElementTree (in fact it delegates almost everything to a
    real instance of ElementTree) except for the write method, that
    writes itself back to the OOo XML file in the OOo zip archive it
    came from.
    """
    def __init__(self, ooopy, zname, root):
        self.ooopy = ooopy
        self.zname = zname
        self.tree = ElementTree(root)
    def write(self):
        self.ooopy.write (self.zname, self.tree)
    def __getattr__(self, name) :
        """
        Delegate everything to our ElementTree attribute.
        """
        if not name.startswith ('__') :
            result = getattr (self.tree, name)
            setattr (self, name, result)
            return result
        raise AttributeError (name)
class OOoPy(autosuper):
    """
    Wrapper for OpenOffice.org zip files (all OOo documents are
    really zip files internally).
    """
    def __init__(self, infile=None, outfile=None, write_mode='w',
                 mimetype=None):
        """
        Open an OOo document, if no outfile is given, we open the
        file read-only. Otherwise the outfile has to be different
        from the infile -- the python ZipFile can't deal with
        read-write access. In case an outfile is given, we open it
        in "w" mode as a zip file, unless write_mode is specified
        (the only allowed case would be "a" for appending to an
        existing file, see pythons ZipFile documentation for
        details). If no infile is given, the user is responsible for
        providing all necessary files in the resulting output file.
        It seems that OOo needs to have the mimetype as the first
        archive member (at least with mimetype as the first member
        it works, the order may not be arbitrary) to recognize a zip
        archive as an OOo file. When copying from a given infile, we
        use the same order of elements in the resulting output. When
        creating new elements we make sure the mimetype is the first
        in the resulting archive.
        Note that both, infile and outfile can either be filenames
        or file-like objects (e.g. StringIO).
        The mimetype is automatically determined if an infile is
        given. If only writing is desired, the mimetype should be
        set.
        """
        assert (infile != outfile)
        self.izip = self.ozip = None
        if infile:
            self.izip = ZipFile(infile, 'r', ZIP_DEFLATED)
        if outfile :
            self.ozip = ZipFile(outfile, write_mode, ZIP_DEFLATED)
            self.written = {}
        if mimetype:
            self.mimetype = mimetype
        elif self.izip:
            self.mimetype = self.izip.read('mimetype')
        if isinstance(self.mimetype, bytes):
            self.mimetype = self.mimetype.decode()
    def read(self, zname):
        """
        return an OOoElementTree object for the given OOo document
        archive member name. Currently an OOo document contains the
        following XML files::
         * content.xml: the text of the OOo document
         * styles.xml: style definitions
         * meta.xml: meta-information (author, last changed, ...)
         * settings.xml: settings in OOo
         * META-INF/manifest.xml: contents of the archive
        There is an additional file "mimetype" that always contains
        the string "application/vnd.sun.xml.writer" for OOo 1.X files
        and the string "application/vnd.oasis.opendocument.text" for
        OOo 2.X files.
        """
        assert self.izip
        return OOoElementTree (self, zname, fromstring (self.izip.read (zname)))
    def _write(self, zname, str):
        now = datetime.utcnow().timetuple()
        info = ZipInfo(zname, date_time=now)
        info.create_system = 0  # pretend to be fat
        info.compress_type = ZIP_DEFLATED
        self.ozip.writestr(info, str)
        self.written [zname] = 1
    def write(self, zname, etree):
        assert self.ozip
        # assure mimetype is the first member in new archive
        if 'mimetype' not in self.written:
            self._write('mimetype', self.mimetype)
        str = BytesIO()
        etree.write(str)
        self._write(zname, str.getvalue())
    def append_file (self, zname, str):
        """
        Official interface to _write: Append a file to the end of the archive.
        """
        if zname not in self.written:
            self._write (zname, str)
    def close(self):
        """
        Close the zip files. According to documentation of zipfile in
        the standard python lib, this has to be done to be sure
        everything is written. We copy over the not-yet written files
        from izip before closing ozip.
        """
        if self.izip and self.ozip:
            for f in self.izip.infolist():
                if f.filename not in self.written :
                    self.ozip.writestr(f, self.izip.read(f.filename))
        for i in self.izip, self.ozip:
            if i:
                i.close()
        self.izip = self.ozip = None
    __del__ = close  # auto-close on deletion of object
 |