diff options
author | Étienne Loks <etienne.loks@iggdrasil.net> | 2019-05-01 13:51:01 +0200 |
---|---|---|
committer | Étienne Loks <etienne.loks@iggdrasil.net> | 2019-06-17 13:21:28 +0200 |
commit | a18087eff29bb316af47bb5fe53a59c43edc57f0 (patch) | |
tree | d6452080600bd7fc377321d4dab58a7fc4333cb2 /ooopy/Transformer.py | |
parent | ef5d92489c4516db6cd0611c4e5671db62837f63 (diff) | |
download | Ishtar-a18087eff29bb316af47bb5fe53a59c43edc57f0.tar.bz2 Ishtar-a18087eff29bb316af47bb5fe53a59c43edc57f0.zip |
Embed ooopy (last version: 1.11)
Diffstat (limited to 'ooopy/Transformer.py')
-rw-r--r-- | ooopy/Transformer.py | 1397 |
1 files changed, 1397 insertions, 0 deletions
diff --git a/ooopy/Transformer.py b/ooopy/Transformer.py new file mode 100644 index 000000000..dbbab125d --- /dev/null +++ b/ooopy/Transformer.py @@ -0,0 +1,1397 @@ +#!/usr/bin/env python +# -*- coding: iso-8859-1 -*- +# Copyright (C) 2005-14 Dr. Ralf Schlatterbeck Open Source Consulting. +# Reichergasse 131, A-3411 Weidling. +# Web: http://www.runtux.com Email: office@runtux.com +# All rights reserved +# **************************************************************************** +# +# This library is free software; you can redistribute it and/or modify +# it under the terms of the GNU Library General Public License as +# published by the Free Software Foundation; either version 2 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Library General Public License for more details. +# +# You should have received a copy of the GNU Library General Public +# License along with this program; if not, write to the Free Software +# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. +# **************************************************************************** + +from __future__ import absolute_import + +import time +import re +try : + from xml.etree.ElementTree import dump, SubElement, Element, tostring + from xml.etree.ElementTree import _namespace_map +except ImportError : + from elementtree.ElementTree import dump, SubElement, Element, tostring + from elementtree.ElementTree import _namespace_map +from copy import deepcopy +from ooopy.OOoPy import OOoPy, autosuper +from ooopy.OOoPy import files, mimetypes, namespace_by_name +from ooopy.Version import VERSION + +def OOo_Tag (namespace, name, mimetype) : + """Return combined XML tag + + >>> OOo_Tag ('xml', 'id', mimetypes [1]) + '{http://www.w3.org/XML/1998/namespace}id' + >>> OOo_Tag ('text', 'list', mimetypes [1]) + '{urn:oasis:names:tc:opendocument:xmlns:text:1.0}list' + """ + return "{%s}%s" % (namespace_by_name [mimetype][namespace], name) +# end def OOo_Tag + +def split_tag (tag) : + """ Split tag into symbolic namespace and name part -- inverse + operation of OOo_Tag. + """ + ns, t = tag.split ('}') + return (_namespace_map [ns [1:]], t) +# end def split_tag + +class Transform (autosuper) : + """ + Base class for individual transforms on OOo files. An individual + transform needs a filename variable for specifying the OOo file + the transform should be applied to and an optional prio. + Individual transforms are applied according to their prio + setting, higher prio means later application of a transform. + + The filename variable must specify one of the XML files which are + part of the OOo document (see files variable above). As + the names imply, content.xml contains the contents of the + document (text and ad-hoc style definitions), styles.xml contains + the style definitions, meta.xml contains meta information like + author, editing time, etc. and settings.xml is used to store + OOo's settings (menu Tools->Configure). + """ + prio = 100 + textbody_names = \ + { mimetypes [0] : 'body' + , mimetypes [1] : 'text' + } + paragraph_props = \ + { mimetypes [0] : 'properties' + , mimetypes [1] : 'paragraph-properties' + } + font_decls = \ + { mimetypes [0] : 'font-decls' + , mimetypes [1] : 'font-face-decls' + } + + def __init__ (self, prio = None, transformer = None) : + if prio is not None : + self.prio = prio + self.transformer = None + if transformer : + self.register (transformer) + # end def __init__ + + def apply (self, root) : + """ Apply myself to the element given as root """ + raise NotImplementedError, 'derived transforms must implement "apply"' + # end def apply + + def apply_all (self, trees) : + """ Apply myself to all the files given in trees. The variable + trees contains a dictionary of ElementTree indexed by the + name of the OOo File. + The standard case is that only one file (namely + self.filename) is used. + """ + assert (self.filename) + self.apply (trees [self.filename].getroot ()) + # end def apply_all + + def find_tbody (self, root) : + """ Find the node which really contains the text -- different + for different OOo versions. + """ + tbody = root + if tbody.tag != self.textbody_tag : + tbody = tbody.find ('.//' + self.textbody_tag) + return tbody + # end def find_tbody + + def register (self, transformer) : + """ Registering with a transformer means being able to access + variables stored in the tranformer by other transforms. + + Also needed for tag-computation: The transformer knows which + version of OOo document we are processing. + """ + self.transformer = transformer + mt = self.mimetype = transformer.mimetype + self.textbody_name = self.textbody_names [mt] + self.paragraph_props = self.paragraph_props [mt] + self.properties_tag = self.oootag ('style', self.paragraph_props) + self.textbody_tag = self.oootag ('office', self.textbody_name) + self.font_decls_tag = self.oootag ('office', self.font_decls [mt]) + # end def register + + def oootag (self, namespace, name) : + """ Compute long tag version """ + return OOo_Tag (namespace, name, self.mimetype) + # end def oootag + + def set (self, variable, value) : + """ Set variable in our transformer using naming convention. """ + self.transformer [self._varname (variable)] = value + # end def set + + def _varname (self, name) : + """ For fulfilling the naming convention of the transformer + dictionary (every entry in this dictionary should be prefixed + with the class name of the transform) we have this + convenience method. + Returns variable name prefixed with own class name. + """ + return ":".join ((self.__class__.__name__, name)) + # end def _varname + +# end class Transform + +class Transformer (autosuper) : + """ + Class for applying a set of transforms to a given ooopy object. + The transforms are applied to the specified file in priority + order. When applying transforms we have a mechanism for + communication of transforms. We give the transformer to the + individual transforms as a parameter. The transforms may use the + transformer like a dictionary for storing values and retrieving + values left by previous transforms. + As a naming convention each transform should use its class name + as a prefix for storing values in the dictionary. + >>> import Transforms + >>> from Transforms import renumber_all, get_meta, set_meta, meta_counts + >>> try : + ... from io import StringIO, BytesIO + ... StringIO = BytesIO + ... except ImportError : + ... from StringIO import StringIO + >>> sio = BytesIO () + >>> o = OOoPy (infile = 'testfiles/test.sxw', outfile = sio) + >>> m = o.mimetype + >>> c = o.read ('content.xml') + >>> body = c.find (OOo_Tag ('office', 'body', mimetype = m)) + >>> body [-1].get (OOo_Tag ('text', 'style-name', mimetype = m)) + 'Standard' + >>> def cb (name) : + ... r = { 'street' : 'Beispielstrasse 42' + ... , 'firstname' : 'Hugo' + ... , 'salutation' : 'Frau' + ... } + ... if r.has_key (name) : return r [name] + ... return None + ... + >>> p = get_meta (m) + >>> t = Transformer (m, p) + >>> t ['a'] = 'a' + >>> t ['a'] + 'a' + >>> t.transform (o) + >>> p.set ('a', 'b') + >>> t ['Attribute_Access:a'] + 'b' + >>> t = Transformer ( + ... m + ... , Transforms.Autoupdate () + ... , Transforms.Editinfo () + ... , Transforms.Field_Replace (prio = 99, replace = cb) + ... , Transforms.Field_Replace + ... ( replace = + ... { 'salutation' : '' + ... , 'firstname' : 'Erika' + ... , 'lastname' : 'Musterfrau' + ... , 'country' : 'D' + ... , 'postalcode' : '00815' + ... , 'city' : 'Niemandsdorf' + ... } + ... ) + ... , Transforms.Addpagebreak_Style () + ... , Transforms.Addpagebreak () + ... ) + >>> t.transform (o) + >>> o.close () + >>> ov = sio.getvalue () + >>> f = open ("testout.sxw", "wb") + >>> f.write (ov) + >>> f.close () + >>> o = OOoPy (infile = sio) + >>> c = o.read ('content.xml') + >>> m = o.mimetype + >>> body = c.find (OOo_Tag ('office', 'body', mimetype = m)) + >>> vset = './/' + OOo_Tag ('text', 'variable-set', mimetype = m) + >>> for node in body.findall (vset) : + ... name = node.get (OOo_Tag ('text', 'name', m)) + ... print name, ':', node.text + salutation : None + firstname : Erika + lastname : Musterfrau + street : Beispielstrasse 42 + country : D + postalcode : 00815 + city : Niemandsdorf + salutation : None + firstname : Erika + lastname : Musterfrau + street : Beispielstrasse 42 + country : D + postalcode : 00815 + city : Niemandsdorf + >>> body [-1].get (OOo_Tag ('text', 'style-name', mimetype = m)) + 'P2' + >>> sio = StringIO () + >>> o = OOoPy (infile = 'testfiles/test.sxw', outfile = sio) + >>> c = o.read ('content.xml') + >>> t = Transformer ( + ... o.mimetype + ... , get_meta (o.mimetype) + ... , Transforms.Addpagebreak_Style () + ... , Transforms.Mailmerge + ... ( iterator = + ... ( dict (firstname = 'Erika', lastname = 'Nobody') + ... , dict (firstname = 'Eric', lastname = 'Wizard') + ... , cb + ... ) + ... ) + ... , renumber_all (o.mimetype) + ... , set_meta (o.mimetype) + ... , Transforms.Fix_OOo_Tag () + ... ) + >>> t.transform (o) + >>> for i in meta_counts : + ... print i, t [':'.join (('Set_Attribute', i))] + character-count 951 + image-count 0 + object-count 0 + page-count 3 + paragraph-count 113 + table-count 3 + word-count 162 + >>> name = t ['Addpagebreak_Style:stylename'] + >>> name + 'P2' + >>> o.close () + >>> ov = sio.getvalue () + >>> f = open ("testout2.sxw", "wb") + >>> f.write (ov) + >>> f.close () + >>> o = OOoPy (infile = sio) + >>> m = o.mimetype + >>> c = o.read ('content.xml') + >>> body = c.find (OOo_Tag ('office', 'body', m)) + >>> for n in body.findall ('.//*') : + ... zidx = n.get (OOo_Tag ('draw', 'z-index', m)) + ... if zidx : + ... print ':'.join(split_tag (n.tag)), zidx + draw:text-box 0 + draw:rect 1 + draw:text-box 3 + draw:rect 4 + draw:text-box 6 + draw:rect 7 + draw:text-box 2 + draw:text-box 5 + draw:text-box 8 + >>> for n in body.findall ('.//' + OOo_Tag ('text', 'p', m)) : + ... if n.get (OOo_Tag ('text', 'style-name', m)) == name : + ... print n.tag + {http://openoffice.org/2000/text}p + {http://openoffice.org/2000/text}p + >>> vset = './/' + OOo_Tag ('text', 'variable-set', m) + >>> for n in body.findall (vset) : + ... if n.get (OOo_Tag ('text', 'name', m), None).endswith ('name') : + ... name = n.get (OOo_Tag ('text', 'name', m)) + ... print name, ':', n.text + firstname : Erika + lastname : Nobody + firstname : Eric + lastname : Wizard + firstname : Hugo + lastname : Testman + firstname : Erika + lastname : Nobody + firstname : Eric + lastname : Wizard + firstname : Hugo + lastname : Testman + >>> for n in body.findall ('.//' + OOo_Tag ('draw', 'text-box', m)) : + ... print n.get (OOo_Tag ('draw', 'name', m)), + ... print n.get (OOo_Tag ('text', 'anchor-page-number', m)) + Frame1 1 + Frame2 2 + Frame3 3 + Frame4 None + Frame5 None + Frame6 None + >>> for n in body.findall ('.//' + OOo_Tag ('text', 'section', m)) : + ... print n.get (OOo_Tag ('text', 'name', m)) + Section1 + Section2 + Section3 + Section4 + Section5 + Section6 + Section7 + Section8 + Section9 + Section10 + Section11 + Section12 + Section13 + Section14 + Section15 + Section16 + Section17 + Section18 + >>> for n in body.findall ('.//' + OOo_Tag ('table', 'table', m)) : + ... print n.get (OOo_Tag ('table', 'name', m)) + Table1 + Table2 + Table3 + >>> r = o.read ('meta.xml') + >>> meta = r.find ('.//' + OOo_Tag ('meta', 'document-statistic', m)) + >>> for i in meta_counts : + ... print i, repr (meta.get (OOo_Tag ('meta', i, m))) + character-count '951' + image-count '0' + object-count '0' + page-count '3' + paragraph-count '113' + table-count '3' + word-count '162' + >>> o.close () + >>> sio = StringIO () + >>> o = OOoPy (infile = 'testfiles/test.sxw', outfile = sio) + >>> tf = ('testfiles/test.sxw', 'testfiles/rechng.sxw') + >>> t = Transformer ( + ... o.mimetype + ... , get_meta (o.mimetype) + ... , Transforms.Concatenate (*tf) + ... , renumber_all (o.mimetype) + ... , set_meta (o.mimetype) + ... , Transforms.Fix_OOo_Tag () + ... ) + >>> t.transform (o) + >>> for i in meta_counts : + ... print i, repr (t [':'.join (('Set_Attribute', i))]) + character-count '1131' + image-count '0' + object-count '0' + page-count '3' + paragraph-count '168' + table-count '2' + word-count '160' + >>> o.close () + >>> ov = sio.getvalue () + >>> f = open ("testout3.sxw", "wb") + >>> f.write (ov) + >>> f.close () + >>> o = OOoPy (infile = sio) + >>> m = o.mimetype + >>> c = o.read ('content.xml') + >>> s = o.read ('styles.xml') + >>> for n in c.findall ('./*/*') : + ... name = n.get (OOo_Tag ('style', 'name', m)) + ... if name : + ... parent = n.get (OOo_Tag ('style', 'parent-style-name', m)) + ... print '"%s", "%s"' % (name, parent) + "Tahoma1", "None" + "Bitstream Vera Sans", "None" + "Tahoma", "None" + "Nimbus Roman No9 L", "None" + "Courier New", "None" + "Arial Black", "None" + "New Century Schoolbook", "None" + "Helvetica", "None" + "Table1", "None" + "Table1.A", "None" + "Table1.A1", "None" + "Table1.E1", "None" + "Table1.A2", "None" + "Table1.E2", "None" + "P1", "None" + "fr1", "Frame" + "fr2", "None" + "fr3", "Frame" + "Sect1", "None" + "gr1", "None" + "P2", "Standard" + "Standard_Concat", "None" + "Concat_P1", "Concat_Frame contents" + "Concat_P2", "Concat_Frame contents" + "P3", "Concat_Frame contents" + "P4", "Concat_Frame contents" + "P5", "Concat_Standard" + "P6", "Concat_Standard" + "P7", "Concat_Frame contents" + "P8", "Concat_Frame contents" + "P9", "Concat_Frame contents" + "P10", "Concat_Frame contents" + "P11", "Concat_Frame contents" + "P12", "Concat_Frame contents" + "P13", "Concat_Frame contents" + "P15", "Concat_Standard" + "P16", "Concat_Standard" + "P17", "Concat_Standard" + "P18", "Concat_Standard" + "P19", "Concat_Standard" + "P20", "Concat_Standard" + "P21", "Concat_Standard" + "P22", "Concat_Standard" + "P23", "Concat_Standard" + "T1", "None" + "Concat_fr1", "Concat_Frame" + "Concat_fr2", "Concat_Frame" + "Concat_fr3", "Concat_Frame" + "fr4", "Concat_Frame" + "fr5", "Concat_Frame" + "fr6", "Concat_Frame" + "Concat_Sect1", "None" + "N0", "None" + "N2", "None" + "P15_Concat", "Concat_Standard" + >>> for n in s.findall ('./*/*') : + ... name = n.get (OOo_Tag ('style', 'name', m)) + ... if name : + ... parent = n.get (OOo_Tag ('style', 'parent-style-name', m)) + ... print '"%s", "%s"' % (name, parent) + "Tahoma1", "None" + "Bitstream Vera Sans", "None" + "Tahoma", "None" + "Nimbus Roman No9 L", "None" + "Courier New", "None" + "Arial Black", "None" + "New Century Schoolbook", "None" + "Helvetica", "None" + "Standard", "None" + "Text body", "Standard" + "List", "Text body" + "Table Contents", "Text body" + "Table Heading", "Table Contents" + "Caption", "Standard" + "Frame contents", "Text body" + "Index", "Standard" + "Frame", "None" + "OLE", "None" + "Concat_Standard", "None" + "Concat_Text body", "Concat_Standard" + "Concat_List", "Concat_Text body" + "Concat_Caption", "Concat_Standard" + "Concat_Frame contents", "Concat_Text body" + "Concat_Index", "Concat_Standard" + "Horizontal Line", "Concat_Standard" + "Internet link", "None" + "Visited Internet Link", "None" + "Concat_Frame", "None" + "Concat_OLE", "None" + "pm1", "None" + "Concat_pm1", "None" + "Standard", "None" + "Concat_Standard", "None" + >>> for n in c.findall ('.//' + OOo_Tag ('text', 'variable-decl', m)) : + ... name = n.get (OOo_Tag ('text', 'name', m)) + ... print name + salutation + firstname + lastname + street + country + postalcode + city + date + invoice.invoice_no + invoice.abo.aboprice.abotype.description + address.salutation + address.title + address.firstname + address.lastname + address.function + address.street + address.country + address.postalcode + address.city + invoice.subscriber.salutation + invoice.subscriber.title + invoice.subscriber.firstname + invoice.subscriber.lastname + invoice.subscriber.function + invoice.subscriber.street + invoice.subscriber.country + invoice.subscriber.postalcode + invoice.subscriber.city + invoice.period_start + invoice.period_end + invoice.currency.name + invoice.amount + invoice.subscriber.initial + >>> for n in c.findall ('.//' + OOo_Tag ('text', 'sequence-decl', m)) : + ... name = n.get (OOo_Tag ('text', 'name', m)) + ... print name + Illustration + Table + Text + Drawing + >>> for n in c.findall ('.//' + OOo_Tag ('text', 'p', m)) : + ... name = n.get (OOo_Tag ('text', 'style-name', m)) + ... if not name or name.startswith ('Concat') : + ... print ">%s<" % name + >Concat_P1< + >Concat_P2< + >Concat_Frame contents< + >>> for n in c.findall ('.//' + OOo_Tag ('draw', 'text-box', m)) : + ... attrs = 'name', 'style-name', 'z-index' + ... attrs = [n.get (OOo_Tag ('draw', i, m)) for i in attrs] + ... attrs.append (n.get (OOo_Tag ('text', 'anchor-page-number', m))) + ... print attrs + ['Frame1', 'fr1', '0', '1'] + ['Frame2', 'fr1', '3', '2'] + ['Frame3', 'Concat_fr1', '6', '3'] + ['Frame4', 'Concat_fr2', '7', '3'] + ['Frame5', 'Concat_fr3', '8', '3'] + ['Frame6', 'Concat_fr1', '9', '3'] + ['Frame7', 'fr4', '10', '3'] + ['Frame8', 'fr4', '11', '3'] + ['Frame9', 'fr4', '12', '3'] + ['Frame10', 'fr4', '13', '3'] + ['Frame11', 'fr4', '14', '3'] + ['Frame12', 'fr4', '15', '3'] + ['Frame13', 'fr5', '16', '3'] + ['Frame14', 'fr4', '18', '3'] + ['Frame15', 'fr4', '19', '3'] + ['Frame16', 'fr4', '20', '3'] + ['Frame17', 'fr6', '17', '3'] + ['Frame18', 'fr4', '23', '3'] + ['Frame19', 'fr3', '2', None] + ['Frame20', 'fr3', '5', None] + >>> for n in c.findall ('.//' + OOo_Tag ('text', 'section', m)) : + ... attrs = 'name', 'style-name' + ... attrs = [n.get (OOo_Tag ('text', i, m)) for i in attrs] + ... print attrs + ['Section1', 'Sect1'] + ['Section2', 'Sect1'] + ['Section3', 'Sect1'] + ['Section4', 'Sect1'] + ['Section5', 'Sect1'] + ['Section6', 'Sect1'] + ['Section7', 'Concat_Sect1'] + ['Section8', 'Concat_Sect1'] + ['Section9', 'Concat_Sect1'] + ['Section10', 'Concat_Sect1'] + ['Section11', 'Concat_Sect1'] + ['Section12', 'Concat_Sect1'] + ['Section13', 'Concat_Sect1'] + ['Section14', 'Concat_Sect1'] + ['Section15', 'Concat_Sect1'] + ['Section16', 'Concat_Sect1'] + ['Section17', 'Concat_Sect1'] + ['Section18', 'Concat_Sect1'] + ['Section19', 'Concat_Sect1'] + ['Section20', 'Concat_Sect1'] + ['Section21', 'Concat_Sect1'] + ['Section22', 'Concat_Sect1'] + ['Section23', 'Concat_Sect1'] + ['Section24', 'Concat_Sect1'] + ['Section25', 'Concat_Sect1'] + ['Section26', 'Concat_Sect1'] + ['Section27', 'Concat_Sect1'] + ['Section28', 'Sect1'] + ['Section29', 'Sect1'] + ['Section30', 'Sect1'] + ['Section31', 'Sect1'] + ['Section32', 'Sect1'] + ['Section33', 'Sect1'] + >>> for n in c.findall ('.//' + OOo_Tag ('draw', 'rect', m)) : + ... attrs = 'style-name', 'text-style-name', 'z-index' + ... attrs = [n.get (OOo_Tag ('draw', i, m)) for i in attrs] + ... attrs.append (n.get (OOo_Tag ('text', 'anchor-page-number', m))) + ... print attrs + ['gr1', 'P1', '1', '1'] + ['gr1', 'P1', '4', '2'] + >>> for n in c.findall ('.//' + OOo_Tag ('draw', 'line', m)) : + ... attrs = 'style-name', 'text-style-name', 'z-index' + ... attrs = [n.get (OOo_Tag ('draw', i, m)) for i in attrs] + ... print attrs + ['gr1', 'P1', '24'] + ['gr1', 'P1', '22'] + ['gr1', 'P1', '21'] + >>> for n in s.findall ('.//' + OOo_Tag ('style', 'style', m)) : + ... if n.get (OOo_Tag ('style', 'name', m)).startswith ('Co') : + ... attrs = 'name', 'class', 'family' + ... attrs = [n.get (OOo_Tag ('style', i, m)) for i in attrs] + ... print attrs + ... props = n.find ('./' + OOo_Tag ('style', 'properties', m)) + ... if props is not None and len (props) : + ... props [0].tag + ['Concat_Standard', 'text', 'paragraph'] + '{http://openoffice.org/2000/style}tab-stops' + ['Concat_Text body', 'text', 'paragraph'] + ['Concat_List', 'list', 'paragraph'] + ['Concat_Caption', 'extra', 'paragraph'] + ['Concat_Frame contents', 'extra', 'paragraph'] + ['Concat_Index', 'index', 'paragraph'] + ['Concat_Frame', None, 'graphics'] + ['Concat_OLE', None, 'graphics'] + >>> for n in c.findall ('.//*') : + ... zidx = n.get (OOo_Tag ('draw', 'z-index', m)) + ... if zidx : + ... print ':'.join(split_tag (n.tag)), zidx + draw:text-box 0 + draw:rect 1 + draw:text-box 3 + draw:rect 4 + draw:text-box 6 + draw:text-box 7 + draw:text-box 8 + draw:text-box 9 + draw:text-box 10 + draw:text-box 11 + draw:text-box 12 + draw:text-box 13 + draw:text-box 14 + draw:text-box 15 + draw:text-box 16 + draw:text-box 18 + draw:text-box 19 + draw:text-box 20 + draw:text-box 17 + draw:text-box 23 + draw:line 24 + draw:text-box 2 + draw:text-box 5 + draw:line 22 + draw:line 21 + >>> sio = StringIO () + >>> o = OOoPy (infile = 'testfiles/carta.stw', outfile = sio) + >>> t = Transformer ( + ... o.mimetype + ... , get_meta (o.mimetype) + ... , Transforms.Addpagebreak_Style () + ... , Transforms.Mailmerge + ... ( iterator = + ... ( dict + ... ( Spett = "Spettabile" + ... , contraente = "First person" + ... , indirizzo = "street? 1" + ... , tipo = "racc. A.C." + ... , luogo = "Varese" + ... , oggetto = "Saluti" + ... ) + ... , dict + ... ( Spett = "Egregio" + ... , contraente = "Second Person" + ... , indirizzo = "street? 2" + ... , tipo = "Raccomandata" + ... , luogo = "Gavirate" + ... , oggetto = "Ossequi" + ... ) + ... ) + ... ) + ... , renumber_all (o.mimetype) + ... , set_meta (o.mimetype) + ... , Transforms.Fix_OOo_Tag () + ... ) + >>> t.transform(o) + >>> o.close() + >>> ov = sio.getvalue () + >>> f = open ("carta-out.stw", "wb") + >>> f.write (ov) + >>> f.close () + >>> o = OOoPy (infile = sio) + >>> m = o.mimetype + >>> c = o.read ('content.xml') + >>> body = c.find (OOo_Tag ('office', 'body', mimetype = m)) + >>> vset = './/' + OOo_Tag ('text', 'variable-set', mimetype = m) + >>> for node in body.findall (vset) : + ... name = node.get (OOo_Tag ('text', 'name', m)) + ... print name, ':', node.text + Spett : Spettabile + contraente : First person + indirizzo : street? 1 + Spett : Egregio + contraente : Second Person + indirizzo : street? 2 + tipo : racc. A.C. + luogo : Varese + oggetto : Saluti + tipo : Raccomandata + luogo : Gavirate + oggetto : Ossequi + >>> sio = StringIO () + >>> o = OOoPy (infile = 'testfiles/test.odt', outfile = sio) + >>> t = Transformer ( + ... o.mimetype + ... , get_meta (o.mimetype) + ... , Transforms.Addpagebreak_Style () + ... , Transforms.Mailmerge + ... ( iterator = + ... ( dict (firstname = 'Erika', lastname = 'Nobody') + ... , dict (firstname = 'Eric', lastname = 'Wizard') + ... , cb + ... ) + ... ) + ... , renumber_all (o.mimetype) + ... , set_meta (o.mimetype) + ... , Transforms.Fix_OOo_Tag () + ... ) + >>> t.transform (o) + >>> for i in meta_counts : + ... print i, t [':'.join (('Set_Attribute', i))] + character-count 951 + image-count 0 + object-count 0 + page-count 3 + paragraph-count 53 + table-count 3 + word-count 162 + >>> name = t ['Addpagebreak_Style:stylename'] + >>> name + 'P2' + >>> o.close () + >>> ov = sio.getvalue () + >>> f = open ("testout.odt", "wb") + >>> f.write (ov) + >>> f.close () + >>> o = OOoPy (infile = sio) + >>> m = o.mimetype + >>> c = o.read ('content.xml') + >>> body = c.find (OOo_Tag ('office', 'body', m)) + >>> for n in body.findall ('.//*') : + ... zidx = n.get (OOo_Tag ('draw', 'z-index', m)) + ... if zidx : + ... print ':'.join(split_tag (n.tag)), zidx + draw:frame 0 + draw:rect 1 + draw:frame 3 + draw:rect 4 + draw:frame 6 + draw:rect 7 + draw:frame 2 + draw:frame 5 + draw:frame 8 + >>> for n in body.findall ('.//' + OOo_Tag ('text', 'p', m)) : + ... if n.get (OOo_Tag ('text', 'style-name', m)) == name : + ... print n.tag + {urn:oasis:names:tc:opendocument:xmlns:text:1.0}p + {urn:oasis:names:tc:opendocument:xmlns:text:1.0}p + >>> vset = './/' + OOo_Tag ('text', 'variable-set', m) + >>> for n in body.findall (vset) : + ... if n.get (OOo_Tag ('text', 'name', m), None).endswith ('name') : + ... name = n.get (OOo_Tag ('text', 'name', m)) + ... print name, ':', n.text + firstname : Erika + lastname : Nobody + firstname : Eric + lastname : Wizard + firstname : Hugo + lastname : Testman + firstname : Erika + lastname : Nobody + firstname : Eric + lastname : Wizard + firstname : Hugo + lastname : Testman + >>> for n in body.findall ('.//' + OOo_Tag ('draw', 'frame', m)) : + ... print n.get (OOo_Tag ('draw', 'name', m)), + ... print n.get (OOo_Tag ('text', 'anchor-page-number', m)) + Frame1 1 + Frame2 2 + Frame3 3 + Frame4 None + Frame5 None + Frame6 None + >>> for n in body.findall ('.//' + OOo_Tag ('text', 'section', m)) : + ... print n.get (OOo_Tag ('text', 'name', m)) + Section1 + Section2 + Section3 + Section4 + Section5 + Section6 + Section7 + Section8 + Section9 + Section10 + Section11 + Section12 + Section13 + Section14 + Section15 + Section16 + Section17 + Section18 + >>> for n in body.findall ('.//' + OOo_Tag ('table', 'table', m)) : + ... print n.get (OOo_Tag ('table', 'name', m)) + Table1 + Table2 + Table3 + >>> r = o.read ('meta.xml') + >>> meta = r.find ('.//' + OOo_Tag ('meta', 'document-statistic', m)) + >>> for i in meta_counts : + ... print i, repr (meta.get (OOo_Tag ('meta', i, m))) + character-count '951' + image-count '0' + object-count '0' + page-count '3' + paragraph-count '53' + table-count '3' + word-count '162' + >>> o.close () + >>> sio = StringIO () + >>> o = OOoPy (infile = 'testfiles/carta.odt', outfile = sio) + >>> t = Transformer ( + ... o.mimetype + ... , get_meta (o.mimetype) + ... , Transforms.Addpagebreak_Style () + ... , Transforms.Mailmerge + ... ( iterator = + ... ( dict + ... ( Spett = "Spettabile" + ... , contraente = "First person" + ... , indirizzo = "street? 1" + ... , tipo = "racc. A.C." + ... , luogo = "Varese" + ... , oggetto = "Saluti" + ... ) + ... , dict + ... ( Spett = "Egregio" + ... , contraente = "Second Person" + ... , indirizzo = "street? 2" + ... , tipo = "Raccomandata" + ... , luogo = "Gavirate" + ... , oggetto = "Ossequi" + ... ) + ... ) + ... ) + ... , renumber_all (o.mimetype) + ... , set_meta (o.mimetype) + ... , Transforms.Fix_OOo_Tag () + ... ) + >>> t.transform(o) + >>> o.close() + >>> ov = sio.getvalue () + >>> f = open ("carta-out.odt", "wb") + >>> f.write (ov) + >>> f.close () + >>> o = OOoPy (infile = sio) + >>> m = o.mimetype + >>> c = o.read ('content.xml') + >>> body = c.find (OOo_Tag ('office', 'body', mimetype = m)) + >>> vset = './/' + OOo_Tag ('text', 'variable-set', mimetype = m) + >>> for node in body.findall (vset) : + ... name = node.get (OOo_Tag ('text', 'name', m)) + ... print name, ':', node.text + Spett : Spettabile + contraente : First person + indirizzo : street? 1 + Spett : Egregio + contraente : Second Person + indirizzo : street? 2 + tipo : racc. A.C. + luogo : Varese + oggetto : Saluti + tipo : Raccomandata + luogo : Gavirate + oggetto : Ossequi + >>> sio = StringIO () + >>> o = OOoPy (infile = 'testfiles/test.odt', outfile = sio) + >>> tf = ('testfiles/test.odt', 'testfiles/rechng.odt') + >>> t = Transformer ( + ... o.mimetype + ... , get_meta (o.mimetype) + ... , Transforms.Concatenate (*tf) + ... , renumber_all (o.mimetype) + ... , set_meta (o.mimetype) + ... , Transforms.Fix_OOo_Tag () + ... ) + >>> t.transform (o) + >>> for i in meta_counts : + ... print i, repr (t [':'.join (('Set_Attribute', i))]) + character-count '1131' + image-count '0' + object-count '0' + page-count '3' + paragraph-count '80' + table-count '2' + word-count '159' + >>> o.close () + >>> ov = sio.getvalue () + >>> f = open ("testout3.odt", "wb") + >>> f.write (ov) + >>> f.close () + >>> o = OOoPy (infile = sio) + >>> m = o.mimetype + >>> c = o.read ('content.xml') + >>> s = o.read ('styles.xml') + >>> for n in c.findall ('./*/*') : + ... name = n.get (OOo_Tag ('style', 'name', m)) + ... if name : + ... parent = n.get (OOo_Tag ('style', 'parent-style-name', m)) + ... print '"%s", "%s"' % (name, parent) + "Tahoma1", "None" + "Bitstream Vera Sans", "None" + "Tahoma", "None" + "Nimbus Roman No9 L", "None" + "Courier New", "None" + "Arial Black", "None" + "New Century Schoolbook", "None" + "Times New Roman", "None" + "Arial", "None" + "Helvetica", "None" + "Table1", "None" + "Table1.A", "None" + "Table1.A1", "None" + "Table1.E1", "None" + "Table1.A2", "None" + "Table1.E2", "None" + "P1", "None" + "fr1", "Frame" + "fr2", "Frame" + "Sect1", "None" + "gr1", "None" + "P2", "Standard" + "Standard_Concat", "None" + "Concat_P1", "Concat_Frame_20_contents" + "Concat_P2", "Concat_Frame_20_contents" + "P3", "Concat_Frame_20_contents" + "P4", "Concat_Standard" + "P5", "Concat_Standard" + "P6", "Concat_Frame_20_contents" + "P7", "Concat_Frame_20_contents" + "P8", "Concat_Frame_20_contents" + "P9", "Concat_Frame_20_contents" + "P10", "Concat_Frame_20_contents" + "P11", "Concat_Frame_20_contents" + "P12", "Concat_Frame_20_contents" + "P14", "Concat_Standard" + "P15", "Concat_Standard" + "P16", "Concat_Standard" + "P17", "Concat_Standard" + "P18", "Concat_Standard" + "P19", "Concat_Standard" + "P20", "Concat_Standard" + "P21", "Concat_Standard" + "P22", "Concat_Standard" + "P23", "Concat_Standard" + "Concat_fr1", "Frame" + "Concat_fr2", "Frame" + "fr3", "Frame" + "fr4", "Frame" + "fr5", "Frame" + "fr6", "Frame" + "Concat_gr1", "None" + "N0", "None" + "N2", "None" + "P14_Concat", "Concat_Standard" + >>> for n in c.findall ('.//' + OOo_Tag ('text', 'variable-decl', m)) : + ... name = n.get (OOo_Tag ('text', 'name', m)) + ... print name + salutation + firstname + lastname + street + country + postalcode + city + date + invoice.invoice_no + invoice.abo.aboprice.abotype.description + address.salutation + address.title + address.firstname + address.lastname + address.function + address.street + address.country + address.postalcode + address.city + invoice.subscriber.salutation + invoice.subscriber.title + invoice.subscriber.firstname + invoice.subscriber.lastname + invoice.subscriber.function + invoice.subscriber.street + invoice.subscriber.country + invoice.subscriber.postalcode + invoice.subscriber.city + invoice.period_start + invoice.period_end + invoice.currency.name + invoice.amount + invoice.subscriber.initial + >>> for n in c.findall ('.//' + OOo_Tag ('text', 'sequence-decl', m)) : + ... name = n.get (OOo_Tag ('text', 'name', m)) + ... print name + Illustration + Table + Text + Drawing + >>> for n in c.findall ('.//' + OOo_Tag ('text', 'p', m)) : + ... name = n.get (OOo_Tag ('text', 'style-name', m)) + ... if not name or name.startswith ('Concat') : + ... print ':'.join(split_tag (n.tag)), ">%s<" % name + text:p >None< + text:p >None< + text:p >Concat_P1< + text:p >Concat_P1< + text:p >Concat_P2< + text:p >Concat_P2< + text:p >Concat_P2< + text:p >Concat_P2< + text:p >Concat_P2< + text:p >Concat_P2< + text:p >Concat_P2< + text:p >Concat_P2< + text:p >Concat_P2< + text:p >Concat_P2< + text:p >Concat_Frame_20_contents< + text:p >None< + text:p >None< + text:p >None< + >>> for n in c.findall ('.//' + OOo_Tag ('draw', 'frame', m)) : + ... attrs = 'name', 'style-name', 'z-index' + ... attrs = [n.get (OOo_Tag ('draw', i, m)) for i in attrs] + ... attrs.append (n.get (OOo_Tag ('text', 'anchor-page-number', m))) + ... print attrs + ['Frame1', 'fr1', '0', '1'] + ['Frame2', 'fr1', '3', '2'] + ['Frame3', 'Concat_fr1', '6', '3'] + ['Frame4', 'Concat_fr2', '7', '3'] + ['Frame5', 'fr3', '8', '3'] + ['Frame6', 'Concat_fr1', '9', '3'] + ['Frame7', 'fr4', '10', '3'] + ['Frame8', 'fr4', '11', '3'] + ['Frame9', 'fr4', '12', '3'] + ['Frame10', 'fr4', '13', '3'] + ['Frame11', 'fr4', '14', '3'] + ['Frame12', 'fr4', '15', '3'] + ['Frame13', 'fr5', '16', '3'] + ['Frame14', 'fr4', '18', '3'] + ['Frame15', 'fr4', '19', '3'] + ['Frame16', 'fr4', '20', '3'] + ['Frame17', 'fr6', '17', '3'] + ['Frame18', 'fr4', '23', '3'] + ['Frame19', 'fr2', '2', None] + ['Frame20', 'fr2', '5', None] + >>> for n in c.findall ('.//' + OOo_Tag ('text', 'section', m)) : + ... attrs = 'name', 'style-name' + ... attrs = [n.get (OOo_Tag ('text', i, m)) for i in attrs] + ... print attrs + ['Section1', 'Sect1'] + ['Section2', 'Sect1'] + ['Section3', 'Sect1'] + ['Section4', 'Sect1'] + ['Section5', 'Sect1'] + ['Section6', 'Sect1'] + ['Section7', 'Sect1'] + ['Section8', 'Sect1'] + ['Section9', 'Sect1'] + ['Section10', 'Sect1'] + ['Section11', 'Sect1'] + ['Section12', 'Sect1'] + ['Section13', 'Sect1'] + ['Section14', 'Sect1'] + ['Section15', 'Sect1'] + ['Section16', 'Sect1'] + ['Section17', 'Sect1'] + ['Section18', 'Sect1'] + ['Section19', 'Sect1'] + ['Section20', 'Sect1'] + ['Section21', 'Sect1'] + ['Section22', 'Sect1'] + ['Section23', 'Sect1'] + ['Section24', 'Sect1'] + ['Section25', 'Sect1'] + ['Section26', 'Sect1'] + ['Section27', 'Sect1'] + ['Section28', 'Sect1'] + ['Section29', 'Sect1'] + ['Section30', 'Sect1'] + ['Section31', 'Sect1'] + ['Section32', 'Sect1'] + ['Section33', 'Sect1'] + >>> for n in c.findall ('.//' + OOo_Tag ('draw', 'rect', m)) : + ... attrs = 'style-name', 'text-style-name', 'z-index' + ... attrs = [n.get (OOo_Tag ('draw', i, m)) for i in attrs] + ... attrs.append (n.get (OOo_Tag ('text', 'anchor-page-number', m))) + ... print attrs + ['gr1', 'P1', '1', '1'] + ['gr1', 'P1', '4', '2'] + >>> for n in c.findall ('.//' + OOo_Tag ('draw', 'line', m)) : + ... attrs = 'style-name', 'text-style-name', 'z-index' + ... attrs = [n.get (OOo_Tag ('draw', i, m)) for i in attrs] + ... print attrs + ['Concat_gr1', 'P1', '24'] + ['Concat_gr1', 'P1', '22'] + ['Concat_gr1', 'P1', '21'] + >>> for n in s.findall ('.//' + OOo_Tag ('style', 'style', m)) : + ... if n.get (OOo_Tag ('style', 'name', m)).startswith ('Co') : + ... attrs = 'name', 'display-name', 'class', 'family' + ... attrs = [n.get (OOo_Tag ('style', i, m)) for i in attrs] + ... print attrs + ... props = n.find ('./' + OOo_Tag ('style', 'properties', m)) + ... if props is not None and len (props) : + ... props [0].tag + ['Concat_Standard', None, 'text', 'paragraph'] + ['Concat_Text_20_body', 'Concat Text body', 'text', 'paragraph'] + ['Concat_List', None, 'list', 'paragraph'] + ['Concat_Caption', None, 'extra', 'paragraph'] + ['Concat_Frame_20_contents', 'Concat Frame contents', 'extra', 'paragraph'] + ['Concat_Index', None, 'index', 'paragraph'] + >>> for n in c.findall ('.//*') : + ... zidx = n.get (OOo_Tag ('draw', 'z-index', m)) + ... if zidx : + ... print ':'.join(split_tag (n.tag)), zidx + draw:frame 0 + draw:rect 1 + draw:frame 3 + draw:rect 4 + draw:frame 6 + draw:frame 7 + draw:frame 8 + draw:frame 9 + draw:frame 10 + draw:frame 11 + draw:frame 12 + draw:frame 13 + draw:frame 14 + draw:frame 15 + draw:frame 16 + draw:frame 18 + draw:frame 19 + draw:frame 20 + draw:frame 17 + draw:frame 23 + draw:line 24 + draw:frame 2 + draw:frame 5 + draw:line 22 + draw:line 21 + >>> from os import system + >>> system ('python bin/ooo_fieldreplace -i testfiles/test.odt ' + ... '-o testout.odt ' + ... 'salutation=Frau firstname=Erika lastname=Musterfrau ' + ... 'country=D postalcode=00815 city=Niemandsdorf ' + ... 'street="Beispielstrasse 42"') + 0 + >>> o = OOoPy (infile = 'testout.odt') + >>> c = o.read ('content.xml') + >>> m = o.mimetype + >>> body = c.find (OOo_Tag ('office', 'body', mimetype = m)) + >>> vset = './/' + OOo_Tag ('text', 'variable-set', mimetype = m) + >>> for node in body.findall (vset) : + ... name = node.get (OOo_Tag ('text', 'name', m)) + ... print name, ':', node.text + salutation : Frau + firstname : Erika + lastname : Musterfrau + street : Beispielstrasse 42 + country : D + postalcode : 00815 + city : Niemandsdorf + salutation : Frau + firstname : Erika + lastname : Musterfrau + street : Beispielstrasse 42 + country : D + postalcode : 00815 + city : Niemandsdorf + >>> o.close () + >>> system ("bin/ooo_mailmerge -o testout.odt -d'|' " + ... "testfiles/carta.odt testfiles/x.csv") + 0 + >>> o = OOoPy (infile = 'testout.odt') + >>> m = o.mimetype + >>> c = o.read ('content.xml') + >>> body = c.find (OOo_Tag ('office', 'body', mimetype = m)) + >>> vset = './/' + OOo_Tag ('text', 'variable-set', mimetype = m) + >>> for node in body.findall (vset) : + ... name = node.get (OOo_Tag ('text', 'name', m)) + ... print name, ':', node.text + Spett : Spettabile + contraente : First person + indirizzo : street? 1 + Spett : Egregio + contraente : Second Person + indirizzo : street? 2 + tipo : racc. A.C. + luogo : Varese + oggetto : Saluti + tipo : Raccomandata + luogo : Gavirate + oggetto : Ossequi + >>> o.close () + >>> infile = 'testfiles/testenum.odt' + >>> o = OOoPy (infile = infile, outfile = 'xyzzy.odt') + >>> t = Transformer ( + ... o.mimetype + ... , get_meta (o.mimetype) + ... , Transforms.Addpagebreak_Style () + ... , Transforms.Mailmerge + ... ( iterator = + ... ( dict (firstname = 'Erika', lastname = 'Nobody') + ... , dict (firstname = 'Eric', lastname = 'Wizard') + ... , cb + ... ) + ... ) + ... , renumber_all (o.mimetype) + ... , set_meta (o.mimetype) + ... , Transforms.Fix_OOo_Tag () + ... ) + >>> t.transform (o) + >>> o.close () + >>> o = OOoPy (infile = 'xyzzy.odt') + >>> m = o.mimetype + >>> c = o.read ('content.xml') + >>> body = c.find (OOo_Tag ('office', 'body', mimetype = m)) + >>> textlist = './/' + OOo_Tag ('text', 'list', m) + >>> for node in body.findall (textlist) : + ... id = node.get (OOo_Tag ('xml', 'id', m)) + ... print 'xml:id', ':', id + xml:id : list1 + xml:id : list2 + xml:id : list3 + >>> o = OOoPy (infile = 'testfiles/page1.odt', outfile = 'xyzzy.odt') + >>> m = o.mimetype + >>> t = Transformer ( + ... o.mimetype + ... , get_meta (o.mimetype) + ... , Transforms.Concatenate ('testfiles/page2.odt') + ... , renumber_all (o.mimetype) + ... , set_meta (o.mimetype) + ... , Transforms.Fix_OOo_Tag () + ... , Transforms.Manifest_Append () + ... ) + >>> t.transform (o) + >>> o.close () + >>> o = OOoPy (infile = 'xyzzy.odt') + >>> c = o.read ('META-INF/manifest.xml') + >>> for node in c.getroot () : + ... fe = node.get (OOo_Tag ('manifest', 'full-path', m)) + ... print fe + / + Pictures/10000000000000C80000007941B1A419.jpg + Pictures/10000000000000DC000000B02E191635.jpg + Pictures/10000000000000DC000000A337377AAA.jpg + meta.xml + settings.xml + content.xml + Thumbnails/thumbnail.png + layout-cache + manifest.rdf + Configurations2/accelerator/current.xml + Configurations2/ + styles.xml + >>> for f in o.izip.infolist () : + ... print f.filename + mimetype + settings.xml + META-INF/manifest.xml + content.xml + meta.xml + styles.xml + Pictures/10000000000000C80000007941B1A419.jpg + Pictures/10000000000000DC000000B02E191635.jpg + Pictures/10000000000000DC000000A337377AAA.jpg + Thumbnails/thumbnail.png + layout-cache + manifest.rdf + Configurations2/images/Bitmaps/ + Configurations2/accelerator/current.xml + >>> sio = StringIO () + >>> o = OOoPy (infile = 'testfiles/tbl_first.odt', outfile = sio) + >>> m = o.mimetype + >>> t = Transformer ( + ... o.mimetype + ... , get_meta (o.mimetype) + ... , Transforms.Concatenate ('testfiles/tbl_second.odt') + ... , renumber_all (o.mimetype) + ... , set_meta (o.mimetype) + ... , Transforms.Fix_OOo_Tag () + ... , Transforms.Manifest_Append () + ... ) + >>> t.transform (o) + >>> o.close () + >>> o = OOoPy (infile = sio) + >>> c = o.read ('content.xml') + >>> body = c.find (OOo_Tag ('office', 'body', mimetype = m)) + >>> tbls = './/' + OOo_Tag ('table', 'table', mimetype = m) + >>> for table in body.findall (tbls) : + ... name = table.get (OOo_Tag ('table', 'style-name', mimetype = m)) + ... if name : + ... print name + ... for t in table.findall ('.//') : + ... name = t.get (OOo_Tag ('table', 'style-name', mimetype = m)) + ... if name : + ... print name + Tabella1 + Tabella1.A + Tabella1.A1 + Tabella1.B1 + Tabella1.A2 + Tabella1.B2 + Tabella1 + Tabella1.A + Tabella1.A1 + Tabella1.B1 + Tabella1.A2 + Tabella1.B2 + """ + + def __init__ (self, mimetype, *tf) : + assert (mimetype in mimetypes) + self.mimetype = mimetype + self.transforms = {} + for t in tf : + self.insert (t) + self.dictionary = {} + self.has_key = self.dictionary.has_key + self.__contains__ = self.has_key + # 2-tuples of filename, content + self.appendfiles = [] + # end def __init__ + + def insert (self, transform) : + """Insert a new transform""" + t = transform + if t.prio not in self.transforms : + self.transforms [t.prio] = [] + self.transforms [t.prio].append (t) + t.register (self) + # end def append + + def transform (self, ooopy) : + """ + Apply all the transforms in priority order. + Priority order is global over all transforms. + """ + self.trees = {} + for f in files : + self.trees [f] = ooopy.read (f) + #self.dictionary = {} # clear dict when transforming another ooopy + prios = self.transforms.keys () + prios.sort () + for p in prios : + for t in self.transforms [p] : + t.apply_all (self.trees) + for e in self.trees.itervalues () : + e.write () + for fname, fcontent in self.appendfiles : + e.ooopy.append_file (fname, fcontent) + # end def transform + + def __getitem__ (self, key) : + return self.dictionary [key] + # end def __getitem__ + + def __setitem__ (self, key, value) : + self.dictionary [key] = value + # end def __setitem__ +# end class Transformer |