diff options
Diffstat (limited to 'ooopy/Transforms.py')
-rw-r--r-- | ooopy/Transforms.py | 366 |
1 files changed, 172 insertions, 194 deletions
diff --git a/ooopy/Transforms.py b/ooopy/Transforms.py index 50a6c0db8..37e7179e8 100644 --- a/ooopy/Transforms.py +++ b/ooopy/Transforms.py @@ -1,5 +1,5 @@ -#!/usr/bin/env python -# -*- coding: iso-8859-1 -*- +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- # Copyright (C) 2005-14 Dr. Ralf Schlatterbeck Open Source Consulting. # Reichergasse 131, A-3411 Weidling. # Web: http://www.runtux.com Email: office@runtux.com @@ -21,114 +21,102 @@ # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. # **************************************************************************** -from __future__ import absolute_import - import time import re -try : - from xml.etree.ElementTree import dump, SubElement, Element, tostring -except ImportError : - from elementtree.ElementTree import dump, SubElement, Element, tostring -from copy import deepcopy -from ooopy.OOoPy import OOoPy, autosuper -from ooopy.Transformer import files, split_tag, OOo_Tag, Transform -from ooopy.Transformer import mimetypes, namespace_by_name -from ooopy.Version import VERSION +from xml.etree.ElementTree import SubElement, Element # , dump , tostring +from copy import deepcopy +from ooopy.OOoPy import OOoPy, autosuper +from ooopy.Transformer import split_tag, OOo_Tag, Transform , mimetypes, \ + namespace_by_name # , # files +# from ooopy.Version import VERSION # counts in meta.xml -meta_counts = \ - ( 'character-count', 'image-count', 'object-count', 'page-count' - , 'paragraph-count', 'table-count', 'word-count' - ) - -class Access_Attribute (autosuper) : - """ For performance reasons we do not specify a separate transform - for each attribute-read or -change operation. Instead we define - all the attribute accesses we want to perform as objects that - follow the attribute access api and apply them all using an - Attribute_Access in one go. +meta_counts = ('character-count', 'image-count', 'object-count', 'page-count', + 'paragraph-count', 'table-count', 'word-count') + + +class Access_Attribute(autosuper): + """ + For performance reasons we do not specify a separate transform + for each attribute-read or -change operation. Instead we define + all the attribute accesses we want to perform as objects that + follow the attribute access api and apply them all using an + Attribute_Access in one go. """ - def __init__ (self, key = None, prefix = None, ** kw) : - self.__super.__init__ (key = key, prefix = prefix, **kw) + def __init__(self, key=None, prefix=None, **kw): + self.__super.__init__(key=key, prefix=prefix, **kw) self.key = key - if key : - if not prefix : - prefix = self.__class__.__name__ - self.key = ':'.join ((prefix, key)) - # end def __init__ + if key: + if not prefix: + prefix = self.__class__.__name__ + self.key = ':'.join((prefix, key)) - def register (self, transformer) : + def register (self, transformer): self.transformer = transformer - # end def register - def use_value (self, oldval = None) : - """ Can change the given value by returning the new value. If - returning None or oldval the attribute stays unchanged. + def use_value (self, oldval=None): """ - raise NotImplementedError, "use_value must be defined in derived class" - # end def use_value + Can change the given value by returning the new value. If + returning None or oldval the attribute stays unchanged. + """ + raise NotImplementedError("use_value must be defined in derived class") -# end class Access_Attribute -class Get_Attribute (Access_Attribute) : - """ An example of not changing an attribute but only storing the - value in the transformer +class Get_Attribute(Access_Attribute): + """ + An example of not changing an attribute but only storing the + value in the transformer """ - def __init__ (self, tag, attr, key, transform = None, ** kw) : - self.__super.__init__ (key = key, **kw) - self.tag = tag - self.attribute = attr - self.transform = transform - # end def __init__ + def __init__(self, tag, attr, key, transform=None, ** kw): + self.__super.__init__(key=key, **kw) + self.tag = tag + self.attribute = attr + self.transform = transform - def use_value (self, oldval = None) : - self.transformer [self.key] = oldval + def use_value(self, oldval=None): + self.transformer[self.key] = oldval return None - # end def use_value -# end def Get_Attribute -class Get_Max (Access_Attribute) : +class Get_Max(Access_Attribute): """ Get the maximum value of an attribute """ - def __init__ (self, tag, attr, key, transform = None, ** kw) : - self.__super.__init__ (key = key, **kw) - self.tag = tag - self.attribute = attr - self.transform = transform - # end def __init__ - - def register (self, transformer) : - self.__super.register (transformer) - self.transformer [self.key] = -1 - # end def register - - def use_value (self, oldval = None) : - if self.transformer [self.key] < oldval : - self.transformer [self.key] = oldval + def __init__(self, tag, attr, key, transform=None, ** kw): + self.__super.__init__(key=key, **kw) + self.tag = tag + self.attribute = attr + self.transform = transform + + def register(self, transformer): + self.__super.register(transformer) + self.transformer[self.key] = -1 + + def use_value(self, oldval=None): + if oldval: + oldval = int(oldval) + if (self.transformer[self.key] or 0) < (oldval or 0): + self.transformer[self.key] = oldval return None - # end def use_value - -# end def Get_Max -class Renumber (Access_Attribute) : - """ Specifies a renumbering transform. OOo has a 'name' attribute - for several different tags, e.g., tables, frames, sections etc. - These names must be unique in the whole document. OOo itself - solves this by appending a unique number to a basename for each - element, e.g., sections are named 'Section1', 'Section2', ... - Renumber transforms can be applied to correct the numbering - after operations that destroy the unique numbering, e.g., after - a mailmerge where the same document is repeatedly appended. - The force parameter specifies if the new renumbered name should - be inserted even if the attribute in question does not exist. +class Renumber (Access_Attribute): + """ + Specifies a renumbering transform. OOo has a 'name' attribute + for several different tags, e.g., tables, frames, sections etc. + These names must be unique in the whole document. OOo itself + solves this by appending a unique number to a basename for each + element, e.g., sections are named 'Section1', 'Section2', ... + Renumber transforms can be applied to correct the numbering + after operations that destroy the unique numbering, e.g., after + a mailmerge where the same document is repeatedly appended. + + The force parameter specifies if the new renumbered name should + be inserted even if the attribute in question does not exist. """ - def __init__ \ - (self, tag, name = None, attr = None, start = 1, force = False) : + def __init__(self, tag, name=None, attr=None, start=1, force=False): self.__super.__init__ () tag_ns, tag_name = split_tag (tag) self.tag_ns = tag_ns @@ -178,29 +166,26 @@ class Set_Attribute (Access_Attribute) : self.transform = transform self.value = value self.oldvalue = oldvalue - # end def __init__ - def use_value (self, oldval) : - if oldval is None : + def use_value(self, oldval): + if oldval is None: return None - if self.oldvalue and oldval != self.oldvalue : + if self.oldvalue and oldval != self.oldvalue: return None - if self.key and self.transformer.has_key (self.key) : - return str (self.transformer [self.key]) + if self.key and self.transformer.has_key(self.key): + return str(self.transformer[self.key]) return self.value - # end def use_value -# end class Set_Attribute def set_attributes_from_dict (tag, attr, d) : """ Convenience function: iterate over a dict and return a list of Set_Attribute objects specifying replacement of attributes in the dictionary """ - return [Set_Attribute (tag, attr, oldvalue = k, value = v) - for k,v in d.iteritems () - ] -# end def set_attributes_from_dict + return [ + Set_Attribute(tag, attr, oldvalue=k, value=v) + for k, v in d.items() + ] class Reanchor (Access_Attribute) : """ @@ -327,41 +312,37 @@ class Manifest_Append (Transform) : # meta.xml transforms # + class Editinfo (Transform) : """ - This is an example of modifying OOo meta info (edit information, - author, etc). We set some of the items (program that generated - the OOo file, modification time, number of edit cyles and overall - edit duration). It's easy to subclass this transform and replace - the "replace" variable (pun intended) in the derived class. + This is an example of modifying OOo meta info (edit information, + author, etc). We set some of the items (program that generated + the OOo file, modification time, number of edit cyles and overall + edit duration). It's easy to subclass this transform and replace + the "replace" variable (pun intended) in the derived class. """ filename = 'meta.xml' - prio = 20 - repl = \ - { ('meta', 'generator') : 'OOoPy field replacement' - , ('dc', 'date') : time.strftime ('%Y-%m-%dT%H:%M:%S') - , ('meta', 'editing-cycles') : '0' - , ('meta', 'editing-duration') : 'PT0M0S' - } - replace = {} + prio = 20 + repl = { + ('meta', 'generator'): 'OOoPy field replacement', + ('dc', 'date'): time.strftime ('%Y-%m-%dT%H:%M:%S'), + ('meta', 'editing-cycles'): '0', + ('meta', 'editing-duration'): 'PT0M0S' + } + replace = {} # iterate over all mimetypes, so this works for all known mimetypes # of OOo documents. - for m in mimetypes : - for params, value in repl.iteritems () : - replace [OOo_Tag (mimetype = m, *params)] = value + for m in mimetypes: + for params, value in repl.items(): + replace [OOo_Tag (mimetype=m, *params)] = value - def apply (self, root) : - for node in root.findall (self.oootag ('office', 'meta') + '/*') : - if self.replace.has_key (node.tag) : - node.text = self.replace [node.tag] - # end def apply -# end class Editinfo + def apply(self, root) : + for node in root.findall (self.oootag ('office', 'meta') + '/*'): + if node.tag in self.replace.keys(): + node.text = self.replace[node.tag] -# -# settings.xml transforms -# -class Autoupdate (Transform) : +class Autoupdate (Transform): """ This is an example of modifying OOo settings. We set some of the AutoUpdate configuration items in OOo to true. We also specify @@ -718,28 +699,31 @@ class Mailmerge (_Body_Concat) : # end def apply # end class Mailmerge + def tree_serialise (element, prefix = '', mimetype = mimetypes [1]) : - """ Serialise a style-element of an OOo document (e.g., a - style:font-decl, style:default-style, etc declaration). - We remove the name of the style and return something that is a - representation of the style element which can be used as a - dictionary key. - The serialisation format is a tuple containing the tag as the - first item, the attributes (as key,value pairs returned by - items()) as the second item and the following items are - serialisations of children. """ - attr = dict (element.attrib) - stylename = OOo_Tag ('style', 'name', mimetype) - if stylename in attr : del attr [stylename] - attr = attr.items () - attr.sort () - attr = tuple (attr) + Serialise a style-element of an OOo document (e.g., a + style:font-decl, style:default-style, etc declaration). + We remove the name of the style and return something that is a + representation of the style element which can be used as a + dictionary key. + The serialisation format is a tuple containing the tag as the + first item, the attributes (as key,value pairs returned by + items()) as the second item and the following items are + serialisations of children. + """ + attr = dict(element.attrib) + stylename = OOo_Tag('style', 'name', mimetype) + if stylename in attr: + del attr[stylename] + attr = list(attr.items()) + attr.sort() + attr = tuple(attr) serial = [prefix + element.tag, attr] - for e in element : - serial.append (tree_serialise (e, prefix, mimetype)) - return tuple (serial) -# end def tree_serialise + for e in element: + serial.append(tree_serialise (e, prefix, mimetype)) + return tuple(serial) + class Concatenate (_Body_Concat) : """ @@ -794,31 +778,31 @@ class Concatenate (_Body_Concat) : assert (self.docs [-1].mimetype == self.docs [0].mimetype) # end def __init__ - def apply_all (self, trees) : - assert (self.docs [0].mimetype == self.transformer.mimetype) + def apply_all(self, trees): + assert (self.docs[0].mimetype == self.transformer.mimetype) self.serialised = {} self.stylenames = {} - self.namemaps = [{}] + self.namemaps = [{}] self.tab_depend = {} - for s in self.ref_attrs.itervalues () : - self.namemaps [0][s] = {} + for s in self.ref_attrs.values(): + self.namemaps[0][s] = {} self.body_decls = {} - for s in self.body_decl_sections : - self.body_decls [s] = {} - self.trees = {} - for f in self.oofiles : - self.trees [f] = [trees [f].getroot ()] - self.sections = {} + for s in self.body_decl_sections: + self.body_decls[s] = {} + self.trees = {} + for f in self.oofiles: + self.trees[f] = [trees[f].getroot()] + self.sections = {} for f in self.stylefiles : - self.sections [f] = {} + self.sections[f] = {} for node in self.trees [f][0] : self.sections [f][node.tag] = node - for d in self.docs : - self.namemaps.append ({}) - for s in self.ref_attrs.itervalues () : - self.namemaps [-1][s] = {} - for f in self.oofiles : - self.trees [f].append (d.read (f).getroot ()) + for d in self.docs: + self.namemaps.append({}) + for s in self.ref_attrs.values(): + self.namemaps[-1][s] = {} + for f in self.oofiles: + self.trees[f].append(d.read(f).getroot()) # append a pagebreak style, will be optimized away if duplicate pbs = Addpagebreak_Style (transformer = self.transformer) pbs.apply (self.trees ['content.xml'][0]) @@ -879,17 +863,12 @@ class Concatenate (_Body_Concat) : if max < pos : max = pos self.insert_tabs (sub, max) - # end def apply_tab_correction - - def _attr_rename (self, idx) : - r = sum \ - ( [ set_attributes_from_dict (None, k, self.namemaps [idx][v]) - for k,v in self.ref_attrs.iteritems () - ] - , [] - ) - return Attribute_Access (r, transformer = self.transformer) - # end def _attr_rename + + def _attr_rename(self, idx): + r = sum( + [set_attributes_from_dict(None, k, self.namemaps [idx][v]) + for k, v in self.ref_attrs.items()], []) + return Attribute_Access(r, transformer=self.transformer) def body_concat (self) : count = {} @@ -974,30 +953,28 @@ class Concatenate (_Body_Concat) : ) # end def insert_tabs - def merge_defaultstyle (self, default_style, node) : + def merge_defaultstyle(self, default_style, node): assert default_style is not None assert node is not None proppath = './' + self.properties_tag defprops = default_style.find (proppath) - props = node.find (proppath) - sn = self.oootag ('style', 'name') - if props is None : - props = Element (self.properties_tag) - for k, v in defprops.attrib.iteritems () : - if self.default_properties.get (k) != v and not props.get (k) : - if k == self.oootag ('style', 'tab-stop-distance') : + props = node.find(proppath) + sn = self.oootag('style', 'name') + if props is None: + props = Element(self.properties_tag) + for k, v in defprops.attrib.items(): + if self.default_properties.get(k) != v and not props.get(k): + if k == self.oootag('style', 'tab-stop-distance'): self.tab_correct = v - self.tab_depend = {node.get (sn) : 1} - stps = SubElement \ - (props, self.oootag ('style', 'tab-stops')) - self.insert_tabs (stps) + self.tab_depend = {node.get(sn): 1} + stps = SubElement(props, self.oootag('style', 'tab-stops')) + self.insert_tabs(stps) else : - props.set (k,v) - if len (props) or props.attrib : - node.append (props) - # end def merge_defaultstyle + props.set(k, v) + if len(props) or props.attrib: + node.append(props) - def _newname (self, key, oldname) : + def _newname(self, key, oldname): stylenum = 0 if (key, oldname) not in self.stylenames : self.stylenames [(key, oldname)] = 1 @@ -1212,17 +1189,18 @@ def renumber_all (mimetype) : # the info retrieved from the OOo document: We use the attribute name in # the meta-information to store (and later retrieve) the information. -def get_meta (mimetype) : - """ Factory function for Attribute_Access to get all interesting - meta-data + +def get_meta(mimetype): + """ + Factory function for Attribute_Access to get all interesting meta-data """ get_attr = [] for attr in meta_counts : - a = OOo_Tag ('meta', attr, mimetype) - t = OOo_Tag ('meta', 'document-statistic', mimetype) + a = OOo_Tag('meta', attr, mimetype) + t = OOo_Tag('meta', 'document-statistic', mimetype) get_attr.append (Get_Attribute (t, a, attr)) - return Attribute_Access (get_attr, prio = 20, filename = 'meta.xml') -# end def get_meta + return Attribute_Access (get_attr, prio= 20, filename='meta.xml') + def set_meta (mimetype) : """ Factory function for Attribute_Access to set all interesting |