diff options
Diffstat (limited to 'ooopy/Transforms.py')
| -rw-r--r-- | ooopy/Transforms.py | 366 | 
1 files changed, 172 insertions, 194 deletions
| diff --git a/ooopy/Transforms.py b/ooopy/Transforms.py index 50a6c0db8..37e7179e8 100644 --- a/ooopy/Transforms.py +++ b/ooopy/Transforms.py @@ -1,5 +1,5 @@ -#!/usr/bin/env python -# -*- coding: iso-8859-1 -*- +#!/usr/bin/env python3 +# -*- coding: utf-8 -*-  # Copyright (C) 2005-14 Dr. Ralf Schlatterbeck Open Source Consulting.  # Reichergasse 131, A-3411 Weidling.  # Web: http://www.runtux.com Email: office@runtux.com @@ -21,114 +21,102 @@  # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.  # **************************************************************************** -from __future__              import absolute_import -  import time  import re -try : -    from xml.etree.ElementTree   import dump, SubElement, Element, tostring -except ImportError : -    from elementtree.ElementTree import dump, SubElement, Element, tostring -from copy                    import deepcopy -from ooopy.OOoPy             import OOoPy, autosuper -from ooopy.Transformer       import files, split_tag, OOo_Tag, Transform -from ooopy.Transformer       import mimetypes, namespace_by_name -from ooopy.Version           import VERSION +from xml.etree.ElementTree import SubElement, Element  # , dump , tostring +from copy import deepcopy +from ooopy.OOoPy import OOoPy, autosuper +from ooopy.Transformer import split_tag, OOo_Tag, Transform , mimetypes, \ +    namespace_by_name # , # files +# from ooopy.Version           import VERSION  # counts in meta.xml -meta_counts = \ -    ( 'character-count', 'image-count', 'object-count', 'page-count' -    , 'paragraph-count', 'table-count', 'word-count' -    ) - -class Access_Attribute (autosuper) : -    """ For performance reasons we do not specify a separate transform -        for each attribute-read or -change operation. Instead we define -        all the attribute accesses we want to perform as objects that -        follow the attribute access api and apply them all using an -        Attribute_Access in one go. +meta_counts = ('character-count', 'image-count', 'object-count', 'page-count', +               'paragraph-count', 'table-count', 'word-count') + + +class Access_Attribute(autosuper): +    """ +    For performance reasons we do not specify a separate transform +    for each attribute-read or -change operation. Instead we define +    all the attribute accesses we want to perform as objects that +    follow the attribute access api and apply them all using an +    Attribute_Access in one go.      """ -    def __init__ (self, key = None, prefix = None, ** kw) : -        self.__super.__init__ (key = key, prefix = prefix, **kw) +    def __init__(self, key=None, prefix=None, **kw): +        self.__super.__init__(key=key, prefix=prefix, **kw)          self.key = key -        if key : -            if not prefix : -                prefix   = self.__class__.__name__ -            self.key = ':'.join ((prefix, key)) -    # end def __init__ +        if key: +            if not prefix: +                prefix = self.__class__.__name__ +            self.key = ':'.join((prefix, key)) -    def register (self, transformer) : +    def register (self, transformer):          self.transformer = transformer -    # end def register -    def use_value (self, oldval = None) : -        """ Can change the given value by returning the new value. If -            returning None or oldval the attribute stays unchanged. +    def use_value (self, oldval=None):          """ -        raise NotImplementedError, "use_value must be defined in derived class" -    # end def use_value +        Can change the given value by returning the new value. If +        returning None or oldval the attribute stays unchanged. +        """ +        raise NotImplementedError("use_value must be defined in derived class") -# end class Access_Attribute -class Get_Attribute (Access_Attribute) : -    """ An example of not changing an attribute but only storing the -        value in the transformer +class Get_Attribute(Access_Attribute): +    """ +    An example of not changing an attribute but only storing the +    value in the transformer      """ -    def __init__ (self, tag, attr, key, transform = None, ** kw) : -        self.__super.__init__ (key = key, **kw) -        self.tag        = tag -        self.attribute  = attr -        self.transform  = transform -    # end def __init__ +    def __init__(self, tag, attr, key, transform=None, ** kw): +        self.__super.__init__(key=key, **kw) +        self.tag = tag +        self.attribute = attr +        self.transform = transform -    def use_value (self, oldval = None) : -        self.transformer [self.key] = oldval +    def use_value(self, oldval=None): +        self.transformer[self.key] = oldval          return None -    # end def use_value -# end def Get_Attribute -class Get_Max (Access_Attribute) : +class Get_Max(Access_Attribute):      """ Get the maximum value of an attribute """ -    def __init__ (self, tag, attr, key, transform = None, ** kw) : -        self.__super.__init__ (key = key, **kw) -        self.tag        = tag -        self.attribute  = attr -        self.transform  = transform -    # end def __init__ - -    def register (self, transformer) : -        self.__super.register (transformer) -        self.transformer [self.key] = -1 -    # end def register - -    def use_value (self, oldval = None) : -        if  self.transformer [self.key] < oldval : -            self.transformer [self.key] = oldval +    def __init__(self, tag, attr, key, transform=None, ** kw): +        self.__super.__init__(key=key, **kw) +        self.tag = tag +        self.attribute = attr +        self.transform = transform + +    def register(self, transformer): +        self.__super.register(transformer) +        self.transformer[self.key] = -1 + +    def use_value(self, oldval=None): +        if oldval: +            oldval = int(oldval) +        if (self.transformer[self.key] or 0) < (oldval or 0): +            self.transformer[self.key] = oldval          return None -    # end def use_value - -# end def Get_Max -class Renumber (Access_Attribute) : -    """ Specifies a renumbering transform. OOo has a 'name' attribute -        for several different tags, e.g., tables, frames, sections etc. -        These names must be unique in the whole document. OOo itself -        solves this by appending a unique number to a basename for each -        element, e.g., sections are named 'Section1', 'Section2', ... -        Renumber transforms can be applied to correct the numbering -        after operations that destroy the unique numbering, e.g., after -        a mailmerge where the same document is repeatedly appended. -        The force parameter specifies if the new renumbered name should -        be inserted even if the attribute in question does not exist. +class Renumber (Access_Attribute): +    """ +    Specifies a renumbering transform. OOo has a 'name' attribute +    for several different tags, e.g., tables, frames, sections etc. +    These names must be unique in the whole document. OOo itself +    solves this by appending a unique number to a basename for each +    element, e.g., sections are named 'Section1', 'Section2', ... +    Renumber transforms can be applied to correct the numbering +    after operations that destroy the unique numbering, e.g., after +    a mailmerge where the same document is repeatedly appended. + +    The force parameter specifies if the new renumbered name should +    be inserted even if the attribute in question does not exist.      """ -    def __init__ \ -        (self, tag, name = None, attr = None, start = 1, force = False) : +    def __init__(self, tag, name=None, attr=None, start=1, force=False):          self.__super.__init__ ()          tag_ns, tag_name = split_tag (tag)          self.tag_ns      = tag_ns @@ -178,29 +166,26 @@ class Set_Attribute (Access_Attribute) :          self.transform  = transform          self.value      = value          self.oldvalue   = oldvalue -    # end def __init__ -    def use_value (self, oldval) : -        if oldval is None : +    def use_value(self, oldval): +        if oldval is None:              return None -        if self.oldvalue and oldval != self.oldvalue : +        if self.oldvalue and oldval != self.oldvalue:              return None -        if self.key and self.transformer.has_key (self.key) : -            return str (self.transformer [self.key]) +        if self.key and self.transformer.has_key(self.key): +            return str(self.transformer[self.key])          return self.value -    # end def use_value -# end class Set_Attribute  def set_attributes_from_dict (tag, attr, d) :      """ Convenience function: iterate over a dict and return a list of          Set_Attribute objects specifying replacement of attributes in          the dictionary      """ -    return [Set_Attribute (tag, attr, oldvalue = k, value = v) -            for k,v in d.iteritems () -           ] -# end def set_attributes_from_dict +    return [ +        Set_Attribute(tag, attr, oldvalue=k, value=v) +        for k, v in d.items() +    ]  class Reanchor (Access_Attribute) :      """ @@ -327,41 +312,37 @@ class Manifest_Append (Transform) :  # meta.xml transforms  # +  class Editinfo (Transform) :      """ -        This is an example of modifying OOo meta info (edit information, -        author, etc). We set some of the items (program that generated -        the OOo file, modification time, number of edit cyles and overall -        edit duration).  It's easy to subclass this transform and replace -        the "replace" variable (pun intended) in the derived class. +    This is an example of modifying OOo meta info (edit information, +    author, etc). We set some of the items (program that generated +    the OOo file, modification time, number of edit cyles and overall +    edit duration).  It's easy to subclass this transform and replace +    the "replace" variable (pun intended) in the derived class.      """      filename = 'meta.xml' -    prio     = 20 -    repl     = \ -        { ('meta', 'generator')        : 'OOoPy field replacement' -        , ('dc',   'date')             : time.strftime ('%Y-%m-%dT%H:%M:%S') -        , ('meta', 'editing-cycles')   : '0' -        , ('meta', 'editing-duration') : 'PT0M0S' -        } -    replace  = {} +    prio = 20 +    repl = { +        ('meta', 'generator'): 'OOoPy field replacement', +        ('dc', 'date'): time.strftime ('%Y-%m-%dT%H:%M:%S'), +        ('meta', 'editing-cycles'): '0', +        ('meta', 'editing-duration'): 'PT0M0S' +    } +    replace = {}      # iterate over all mimetypes, so this works for all known mimetypes      # of OOo documents. -    for m in mimetypes : -        for params, value in repl.iteritems () : -            replace [OOo_Tag (mimetype = m, *params)] = value +    for m in mimetypes: +        for params, value in repl.items(): +            replace [OOo_Tag (mimetype=m, *params)] = value -    def apply (self, root) : -        for node in root.findall (self.oootag ('office', 'meta') + '/*') : -            if self.replace.has_key (node.tag) : -                node.text = self.replace [node.tag] -    # end def apply -# end class Editinfo +    def apply(self, root) : +        for node in root.findall (self.oootag ('office', 'meta') + '/*'): +            if node.tag in self.replace.keys(): +                node.text = self.replace[node.tag] -# -# settings.xml transforms -# -class Autoupdate (Transform) : +class Autoupdate (Transform):      """          This is an example of modifying OOo settings. We set some of the          AutoUpdate configuration items in OOo to true. We also specify @@ -718,28 +699,31 @@ class Mailmerge (_Body_Concat) :      # end def apply  # end class Mailmerge +  def tree_serialise (element, prefix = '', mimetype = mimetypes [1]) : -    """ Serialise a style-element of an OOo document (e.g., a -        style:font-decl, style:default-style, etc declaration). -        We remove the name of the style and return something that is a -        representation of the style element which can be used as a -        dictionary key. -        The serialisation format is a tuple containing the tag as the -        first item, the attributes (as key,value pairs returned by -        items()) as the second item and the following items are -        serialisations of children.      """ -    attr = dict (element.attrib) -    stylename = OOo_Tag ('style', 'name', mimetype) -    if stylename in attr : del attr [stylename] -    attr = attr.items () -    attr.sort () -    attr = tuple (attr) +    Serialise a style-element of an OOo document (e.g., a +    style:font-decl, style:default-style, etc declaration). +    We remove the name of the style and return something that is a +    representation of the style element which can be used as a +    dictionary key. +    The serialisation format is a tuple containing the tag as the +    first item, the attributes (as key,value pairs returned by +    items()) as the second item and the following items are +    serialisations of children. +    """ +    attr = dict(element.attrib) +    stylename = OOo_Tag('style', 'name', mimetype) +    if stylename in attr: +        del attr[stylename] +    attr = list(attr.items()) +    attr.sort() +    attr = tuple(attr)      serial = [prefix + element.tag, attr] -    for e in element : -        serial.append (tree_serialise (e, prefix, mimetype)) -    return tuple (serial) -# end def tree_serialise +    for e in element: +        serial.append(tree_serialise (e, prefix, mimetype)) +    return tuple(serial) +  class Concatenate (_Body_Concat) :      """ @@ -794,31 +778,31 @@ class Concatenate (_Body_Concat) :              assert (self.docs [-1].mimetype == self.docs [0].mimetype)      # end def __init__ -    def apply_all (self, trees) : -        assert (self.docs [0].mimetype == self.transformer.mimetype) +    def apply_all(self, trees): +        assert (self.docs[0].mimetype == self.transformer.mimetype)          self.serialised = {}          self.stylenames = {} -        self.namemaps   = [{}] +        self.namemaps = [{}]          self.tab_depend = {} -        for s in self.ref_attrs.itervalues () : -            self.namemaps [0][s] = {} +        for s in self.ref_attrs.values(): +            self.namemaps[0][s] = {}          self.body_decls = {} -        for s in self.body_decl_sections : -            self.body_decls [s] = {} -        self.trees      = {} -        for f in self.oofiles : -            self.trees [f] = [trees [f].getroot ()] -        self.sections   = {} +        for s in self.body_decl_sections: +            self.body_decls[s] = {} +        self.trees = {} +        for f in self.oofiles: +            self.trees[f] = [trees[f].getroot()] +        self.sections = {}          for f in self.stylefiles : -            self.sections [f] = {} +            self.sections[f] = {}              for node in self.trees [f][0] :                  self.sections [f][node.tag] = node -        for d in self.docs : -            self.namemaps.append ({}) -            for s in self.ref_attrs.itervalues () : -                self.namemaps [-1][s] = {} -            for f in self.oofiles : -                self.trees [f].append (d.read (f).getroot ()) +        for d in self.docs: +            self.namemaps.append({}) +            for s in self.ref_attrs.values(): +                self.namemaps[-1][s] = {} +            for f in self.oofiles: +                self.trees[f].append(d.read(f).getroot())          # append a pagebreak style, will be optimized away if duplicate          pbs = Addpagebreak_Style (transformer = self.transformer)          pbs.apply (self.trees ['content.xml'][0]) @@ -879,17 +863,12 @@ class Concatenate (_Body_Concat) :                              if max < pos :                                  max = pos                          self.insert_tabs (sub, max) -    # end def apply_tab_correction - -    def _attr_rename (self, idx) : -        r = sum \ -            ( [ set_attributes_from_dict (None, k, self.namemaps [idx][v]) -                for k,v in self.ref_attrs.iteritems () -              ] -            , [] -            ) -        return Attribute_Access (r, transformer = self.transformer) -    # end def _attr_rename + +    def _attr_rename(self, idx): +        r = sum( +            [set_attributes_from_dict(None, k, self.namemaps [idx][v]) +             for k, v in self.ref_attrs.items()], []) +        return Attribute_Access(r, transformer=self.transformer)      def body_concat (self) :          count = {} @@ -974,30 +953,28 @@ class Concatenate (_Body_Concat) :                      )      # end def insert_tabs -    def merge_defaultstyle (self, default_style, node) : +    def merge_defaultstyle(self, default_style, node):          assert default_style is not None          assert node is not None          proppath = './' + self.properties_tag          defprops = default_style.find (proppath) -        props    = node.find          (proppath) -        sn       = self.oootag ('style', 'name') -        if props is None : -            props = Element (self.properties_tag) -        for k, v in defprops.attrib.iteritems () : -            if self.default_properties.get (k) != v and not props.get (k) : -                if k == self.oootag ('style', 'tab-stop-distance') : +        props = node.find(proppath) +        sn = self.oootag('style', 'name') +        if props is None: +            props = Element(self.properties_tag) +        for k, v in defprops.attrib.items(): +            if self.default_properties.get(k) != v and not props.get(k): +                if k == self.oootag('style', 'tab-stop-distance'):                      self.tab_correct = v -                    self.tab_depend  = {node.get (sn) : 1} -                    stps = SubElement \ -                        (props, self.oootag ('style', 'tab-stops')) -                    self.insert_tabs (stps) +                    self.tab_depend = {node.get(sn): 1} +                    stps = SubElement(props, self.oootag('style', 'tab-stops')) +                    self.insert_tabs(stps)                  else : -                    props.set (k,v) -        if len (props) or props.attrib : -            node.append (props) -    # end def merge_defaultstyle +                    props.set(k, v) +        if len(props) or props.attrib: +            node.append(props) -    def _newname (self, key, oldname) : +    def _newname(self, key, oldname):          stylenum = 0          if (key, oldname) not in self.stylenames :              self.stylenames [(key, oldname)] = 1 @@ -1212,17 +1189,18 @@ def renumber_all (mimetype) :  # the info retrieved from the OOo document: We use the attribute name in  # the meta-information to store (and later retrieve) the information. -def get_meta (mimetype) : -    """ Factory function for Attribute_Access to get all interesting -        meta-data + +def get_meta(mimetype): +    """ +    Factory function for Attribute_Access to get all interesting meta-data      """      get_attr = []      for attr in meta_counts : -        a = OOo_Tag ('meta', attr, mimetype) -        t = OOo_Tag ('meta', 'document-statistic', mimetype) +        a = OOo_Tag('meta', attr, mimetype) +        t = OOo_Tag('meta', 'document-statistic', mimetype)          get_attr.append (Get_Attribute (t, a, attr)) -    return Attribute_Access (get_attr, prio =  20, filename = 'meta.xml') -# end def get_meta +    return Attribute_Access (get_attr, prio= 20, filename='meta.xml') +  def set_meta (mimetype) :      """ Factory function for Attribute_Access to set all interesting | 
