diff options
| -rw-r--r-- | chimere/tasks.py | 37 | ||||
| -rw-r--r-- | chimere/utils.py | 370 | 
2 files changed, 216 insertions, 191 deletions
| diff --git a/chimere/tasks.py b/chimere/tasks.py index 9c94f43..9eff7f5 100644 --- a/chimere/tasks.py +++ b/chimere/tasks.py @@ -60,19 +60,20 @@ else:          return task_exc  IMPORT_MESSAGES = { -                   'import_pending':[_(u"Import pending")], -                   'import_process':[_(u"Import processing")], -                   'import_done':[_(u"Import successfuly done"), +    'import_pending': [_(u"Import pending")], +    'import_process': [_(u"Import processing")], +    'import_done': [_(u"Import successfuly done"),                      _(u" %(new)d new item(s), %(updated)d updated item(s)")], -                   'import_failed':[_(u"Import failed"), "%s"], -                   'import_cancel':[_(u"Import canceled")], -                   'export_pending':[_(u"Export pending")], -                   'export_process':[_(u"Export processing")], -                   'export_done':[_(u"Export successfuly done"), +    'import_failed': [_(u"Import failed"), "%s"], +    'import_cancel': [_(u"Import canceled")], +    'export_pending': [_(u"Export pending")], +    'export_process': [_(u"Export processing")], +    'export_done': [_(u"Export successfuly done"),                      _(u" %(updated)d updated item(s)")], -                   'export_failed':[_(u"Export failed"), "%s"], -                   'export_cancel':[_(u"Export canceled")] -                   } +    'export_failed': [_(u"Export failed"), "%s"], +    'export_cancel': [_(u"Export canceled")] +} +  @task()  def importing(importer_pk): @@ -89,15 +90,16 @@ def importing(importer_pk):      new_item, updated_item, error = importer.manager.get()      importer.state = error + '\n' if error else ''      importer.state += unicode(IMPORT_MESSAGES['import_done'][0]) -    importer.state += u" - " \ -         + unicode(IMPORT_MESSAGES['import_done'][1]) % {'new':new_item, -                                                         'updated':updated_item} +    importer.state += \ +        u" - " + unicode(IMPORT_MESSAGES['import_done'][1]) % { +            'new': new_item, 'updated': updated_item}      importer.state = importer.state      importer.save()      return True +  @task() -@single_instance_task(60*10) +@single_instance_task(60 * 10)  def exporting(importer_pk, extra_args=[]):      try:          importer = Importer.objects.get(pk=importer_pk) @@ -116,10 +118,11 @@ def exporting(importer_pk, extra_args=[]):          pass      if error:          importer.state = unicode(IMPORT_MESSAGES['export_failed'][0]) \ -              + u" - " + unicode(IMPORT_MESSAGES['export_failed'][1]) % error +            + u" - " + unicode(IMPORT_MESSAGES['export_failed'][1]) % error          importer.save()          return      importer.state = unicode(IMPORT_MESSAGES['export_done'][0]) + u" - " \ -         + unicode(IMPORT_MESSAGES['export_done'][1]) % {'updated':updated_item} +        + unicode(IMPORT_MESSAGES['export_done'][1]) % { +            'updated': updated_item}      importer.save()      return True diff --git a/chimere/utils.py b/chimere/utils.py index c5c59e9..e017762 100644 --- a/chimere/utils.py +++ b/chimere/utils.py @@ -45,26 +45,30 @@ from django.utils.translation import ugettext_lazy as _  from chimere import get_version  from external_utils import OsmApi +  def unicode_normalize(string):      if type(string) == str:          string = unicode(string.decode('utf-8'))      return ''.join(          (c for c in unicodedata.normalize('NFD', string) -        if unicodedata.category(c) != 'Mn')) +         if unicodedata.category(c) != 'Mn')) +  class ImportManager(object):      u"""      Generic class for specific importers      """      default_source = None +      def __init__(self, importer_instance):          self.importer_instance = importer_instance          if self.importer_instance.default_name:              self.default_name = self.importer_instance.default_name          else: -            self.default_name = " - ".join([cat.name +            self.default_name = " - ".join([ +                cat.name                  for cat in self.importer_instance.categories.order_by( -                                                               'name').all()]) +                    'name').all()])      def get(self):          raise NotImplementedError @@ -83,8 +87,8 @@ class ImportManager(object):          item = None          if import_key or pk:              dct_import = { -                'import_key__icontains':'%s:%s;' % (key, import_key), -                'import_source':self.importer_instance.source} +                'import_key__icontains': '%s:%s;' % (key, import_key), +                'import_source': self.importer_instance.source}              ref_item = cls.objects.filter(**dct_import)              try:                  item = None @@ -121,12 +125,12 @@ class ImportManager(object):              if not self.importer_instance.get_description and \                 self.importer_instance.default_description:                  values['description'] = \ -                                     self.importer_instance.default_description +                    self.importer_instance.default_description              values.update({ -                'import_source':self.importer_instance.source}) +                'import_source': self.importer_instance.source})              values['status'] = self.importer_instance.default_status              if not self.importer_instance.associate_marker_to_way\ -              and cls.__name__ == 'Route': +                    and cls.__name__ == 'Route':                  values['has_associated_marker'] = False              try:                  item = cls.objects.create(**values) @@ -158,8 +162,8 @@ class ImportManager(object):              current_file_name = None              for name in namelist:                  if name.endswith(suffix) \ -                  or name.endswith(suffix.lower()) \ -                  or name.endswith(suffix.upper()): +                        or name.endswith(suffix.lower()) \ +                        or name.endswith(suffix.upper()):                      current_file_name = name              filenames.append(current_file_name)          files = [] @@ -180,7 +184,7 @@ class ImportManager(object):          if not hasattr(source, 'read'):              if not source:                  source = self.importer_instance.source \ -                       if self.importer_instance.source else self.default_source +                    if self.importer_instance.source else self.default_source              try:                  url = source                  if extra_url: @@ -207,6 +211,7 @@ class ImportManager(object):              source = files[0] if len(suffixes) == 1 else files          return (source, None) +  class KMLManager(ImportManager):      u"""      KML importer @@ -215,6 +220,7 @@ class KMLManager(ImportManager):      """      XPATH = '//kml:Folder/kml:name[text()="%s"]/../kml:Placemark'      DEFAULT_XPATH = '//kml:Placemark' +      def __init__(self, importer_instance, ns=''):          super(KMLManager, self).__init__(importer_instance)          self.ns = ns @@ -249,9 +255,9 @@ class KMLManager(ImportManager):          if not self.ns:              self.ns = tree.getroot().nsmap[None]          xpath = self.XPATH % self.importer_instance.filtr \ -                  if self.importer_instance.filtr else self.DEFAULT_XPATH +            if self.importer_instance.filtr else self.DEFAULT_XPATH          for placemark in tree.xpath(xpath, -                                    namespaces={'kml':self.ns}): +                                    namespaces={'kml': self.ns}):              name, point, line = None, None, None              pl_id = placemark.attrib.get('id')              pl_key = 'kml-%d' % self.importer_instance.pk @@ -279,10 +285,10 @@ class KMLManager(ImportManager):                                                 for p in points if p])                              line = 'SRID=4326;LINESTRING(%s)' % points              cls = None -            dct = {'description':description, -                   'name':name, -                   'origin':self.importer_instance.origin, -                   'license':self.importer_instance.license} +            dct = {'description': description, +                   'name': name, +                   'origin': self.importer_instance.origin, +                   'license': self.importer_instance.license}              if point:                  dct['point'] = point                  cls = Marker @@ -292,7 +298,7 @@ class KMLManager(ImportManager):                  cls = Route              if cls:                  item, updated, created = self.create_or_update_item( -                                                cls, dct, pl_id, key=pl_key) +                    cls, dct, pl_id, key=pl_key)                  if updated:                      updated_item += 1                  if created: @@ -301,15 +307,17 @@ class KMLManager(ImportManager):      @classmethod      def export(cls, queryset): -        dct = {'name':settings.PROJECT_NAME, -               'description':unicode(datetime.date.today()), -               'locations':queryset.all() +        dct = { +            'name': settings.PROJECT_NAME, +            'description': unicode(datetime.date.today()), +            'locations': queryset.all()          } -        filename = unicode_normalize(settings.PROJECT_NAME + dct['description']\ +        filename = unicode_normalize(settings.PROJECT_NAME + dct['description']                                       + '.kml')          result = render_to_response('chimere/export.kml', dct)          return filename, result +  class ShapefileManager(ImportManager):      u"""      Shapefile importer @@ -351,7 +359,7 @@ class ShapefileManager(ImportManager):                  srid = settings.CHIMERE_EPSG_DISPLAY_PROJECTION                  msg = _(u"SRID cannot be guessed. The default SRID (%s) has "                          u"been used.") % srid -                #If imported items are not well located " +                # If imported items are not well located "                  #        u"ask your data provider for the SRID to use.") % srid          shapefilename = tmpdir + os.sep + sources[0]          ds = DataSource(shapefilename) @@ -374,7 +382,7 @@ class ShapefileManager(ImportManager):                              u"is not managed by Chimère.") % lyr.geom_type)          geom_key = 'point' if lyr.geom_type == 'Point' else 'route'          geom_cls = Marker if lyr.geom_type == 'Point' else Route -        indexes = [] +        # indexes = []          for idx, feat in enumerate(lyr):              name = unicode(idx)              if lbl_name: @@ -384,7 +392,7 @@ class ShapefileManager(ImportManager):                  except UnicodeDecodeError:                      try:                          name = unicode( -                               name.decode(settings.CHIMERE_SHAPEFILE_ENCODING)) +                            name.decode(settings.CHIMERE_SHAPEFILE_ENCODING))                      except:                          continue              try: @@ -393,15 +401,17 @@ class ShapefileManager(ImportManager):                  return (0, 0, _(u"Bad Shapefile"))              if feat.geom.geom_type == 'MultiLineString':                  geoms = [geom.wkt for geom in feat.geom] -            import_key = feat.get(id_name) if id_name and len(geoms) == 1 else '' +            import_key = feat.get(id_name) if id_name and len(geoms) == 1 \ +                else ''              for geom in geoms: -                dct = {geom_key:'SRID=%s;%s' % (srid, geom), -                   'name':name, -                   'origin':self.importer_instance.origin, -                   'license':self.importer_instance.license -                  } +                dct = { +                    geom_key: 'SRID=%s;%s' % (srid, geom), +                    'name': name, +                    'origin': self.importer_instance.origin, +                    'license': self.importer_instance.license +                }                  item, updated, created = self.create_or_update_item( -                                    geom_cls, dct, import_key) +                    geom_cls, dct, import_key)                  if updated:                      updated_item += 1                  if created: @@ -426,8 +436,9 @@ class ShapefileManager(ImportManager):          tmp_name = tmp.name          field_names = [field.name for field in queryset.model._meta.fields] -        geo_field = getattr(queryset.model, -                          'point' if 'point' in field_names else 'route')._field +        geo_field = getattr( +            queryset.model, +            'point' if 'point' in field_names else 'route')._field          dr = ogr.GetDriverByName('ESRI Shapefile')          ds = dr.CreateDataSource(tmp_name) @@ -453,7 +464,7 @@ class ShapefileManager(ImportManager):                  feat = ogr.Feature(feature_def)                  feat.SetField('name', str(unicode_normalize(item.name)[:80]))                  feat.SetField('category', -                                   str(unicode_normalize(category.name)[:80])) +                              str(unicode_normalize(category.name)[:80]))                  geom = getattr(item, geo_field.name)                  if not geom: @@ -479,6 +490,7 @@ class ShapefileManager(ImportManager):          buff.close()          return filename, zip_stream +  class CSVManager(ImportManager):      u"""      CSV importer @@ -489,9 +501,8 @@ class CSVManager(ImportManager):      # (label, getter, setter)      COLS = [("Id", 'pk', 'pk'), (_(u"Name"), 'name', 'name'), -            (_(u"Categories"), lambda obj:", ".join( -                                [c.name for c in obj.categories.all()]), -                                set_categories), +            (_(u"Categories"), lambda obj: ", ".join( +                [c.name for c in obj.categories.all()]), set_categories),              (_(u"State"), 'status', lambda x: x),              (_(u"Description"), 'description', 'description'),              (_(u"Localisation"), 'geometry', 'geometry')] @@ -514,11 +525,11 @@ class CSVManager(ImportManager):          prop_cols = []          for pm in Marker.all_properties():              prop_cols.append((pm.name, pm.getAttrName(), -                              pm.getAttrName()+'_set')) +                              pm.getAttrName() + '_set'))          cols = list(self.COLS) + prop_cols -        datas = [] +        # datas = []          for idx, row in enumerate(reader): -            if not idx: # first row +            if not idx:  # first row                  try:                      assert(len(row) >= len(cols))                  except AssertionError: @@ -526,16 +537,17 @@ class CSVManager(ImportManager):                  continue              if len(row) < len(cols):                  continue -            pk, name, cats, state = row[0], row[1], row[2], row[3] +            # pk, name, cats, state = row[0], row[1], row[2], row[3] +            pk, name = row[0], row[1]              geom = row[5]              description = ''              if self.importer_instance.get_description:                  description = row[4]              COL_INDEX = 6 -            dct = {'description':description, -                   'name':name, -                   'origin':self.importer_instance.origin, -                   'license':self.importer_instance.license} +            dct = {'description': description, +                   'name': name, +                   'origin': self.importer_instance.origin, +                   'license': self.importer_instance.license}              cls = None              if 'POINT' in geom:                  cls = Marker @@ -546,8 +558,8 @@ class CSVManager(ImportManager):              else:                  continue              import_key = pk if pk else name.decode('utf-8') -            item, updated, created = self.create_or_update_item(cls, dct, -                                                             import_key, pk=pk) +            item, updated, created = self.create_or_update_item( +                cls, dct, import_key, pk=pk)              if updated:                  updated_item += 1              if created: @@ -555,17 +567,17 @@ class CSVManager(ImportManager):              for idx, col in enumerate(cols[COL_INDEX:]):                  name, getter, setter_val = col                  setter = getattr(item, setter_val) -                val = row[idx+COL_INDEX] +                val = row[idx + COL_INDEX]                  setter(item, val)          return (new_item, updated_item, msg)      @classmethod      def export(cls, queryset): -        dct = {'description':unicode(datetime.date.today()), 'data':[]} -        cls_name = queryset.model.__name__.lower() +        dct = {'description': unicode(datetime.date.today()), 'data': []} +        # cls_name = queryset.model.__name__.lower()          cols = list(cls.COLS)          for pm in queryset.model.all_properties(): -            cols.append((pm.name, pm.getAttrName(), pm.getAttrName()+'_set')) +            cols.append((pm.name, pm.getAttrName(), pm.getAttrName() + '_set'))          header = [col[0] for col in cols]          dct['data'].append(header)          for item in queryset.all(): @@ -576,11 +588,12 @@ class CSVManager(ImportManager):                  else:                      data.append(getattr(item, attr))              dct['data'].append(data) -        filename = unicode_normalize(settings.PROJECT_NAME + dct['description']\ +        filename = unicode_normalize(settings.PROJECT_NAME + dct['description']                                       + '.csv')          result = render_to_response('chimere/export.csv', dct)          return filename, result +  class GeoRSSManager(ImportManager):      u"""      RSS importer. @@ -596,19 +609,19 @@ class GeoRSSManager(ImportManager):           - number of item updated ;           - error detail on error          """ -        from models import Marker +        from models import Marker, Route          new_item, updated_item, msg = 0, 0, ''          feed = feedparser.parse(self.importer_instance.source) -        if feed['bozo'] and not isinstance(feed['bozo_exception'], -                                          feedparser.CharacterEncodingOverride): +        if feed['bozo'] and not isinstance( +                feed['bozo_exception'], feedparser.CharacterEncodingOverride):              return (0, 0, _(u"RSS feed is not well formed"))          for item in feed['items']:              if "georss_point" not in item and 'georss_line' not in item \                 and not ("geo_lat" in item and "geo_long" in item):                  continue              cls = None -            dct = {'origin':self.importer_instance.origin, -                   'license':self.importer_instance.license} +            dct = {'origin': self.importer_instance.origin, +                   'license': self.importer_instance.license}              if 'georss_point' in item or "geo_lat" in item:                  cls = Marker                  if 'georss_point' in item: @@ -630,11 +643,11 @@ class GeoRSSManager(ImportManager):                  points = item['georss_line'].split(' ')                  reordered_points = []                  # lat, lon -> x, y -                for idx in xrange(len(points)/2): -                    reordered_points.append("%s %s" % (points[idx*2+1], -                                                       points[idx*2])) +                for idx in xrange(len(points) / 2): +                    reordered_points.append("%s %s" % (points[idx * 2 + 1], +                                                       points[idx * 2]))                  dct['route'] = 'SRID=4326;LINESTRING(%s)' % \ -                            ",".join(reordered_points) +                    ",".join(reordered_points)              dct['name'] = item['title']              pl_id = item['id'] if 'id' in item else item['title'] @@ -649,6 +662,7 @@ RE_HOOK = re.compile('\[([^\]]*)\]')  # TODO: manage deleted item from OSM +  class OSMManager(ImportManager):      u"""      OSM importer/exporter @@ -666,8 +680,8 @@ class OSMManager(ImportManager):          - updated items;          - error detail on error.          """ -        source, msg = self.get_source_file(['.osm'], -                                         extra_url=self.importer_instance.filtr) +        source, msg = self.get_source_file( +            ['.osm'], extra_url=self.importer_instance.filtr)          if not source:              return (0, 0, msg) @@ -680,8 +694,8 @@ class OSMManager(ImportManager):          return 0, 0, _(u"Nothing to import")      def import_ways(self, tree): -        from chimere.models import Marker, Route -        msg, items, new_item, updated_item = "", [], 0 , 0 +        from chimere.models import Route +        msg, items, new_item, updated_item = "", [], 0, 0          nodes = {}          for node in tree.xpath('//node'):              node_id = node.attrib.get('id') @@ -703,17 +717,17 @@ class OSMManager(ImportManager):                      points.append(item.get('ref'))              if not points:                  continue -            wkt = 'SRID=4326;LINESTRING(%s)' % ",".join([nodes[point_id] -                            for point_id in points if point_id in nodes]) -            dct = {'route':wkt, -                   'name':name, -                   'origin':self.importer_instance.origin \ -                            or u'OpenStreetMap.org', -                   'license':self.importer_instance.license \ -                             or u'ODbL', -                   'import_version':version} +            wkt = 'SRID=4326;LINESTRING(%s)' % ",".join( +                [nodes[point_id] for point_id in points if point_id in nodes]) +            dct = {'route': wkt, +                   'name': name, +                   'origin': self.importer_instance.origin +                   or u'OpenStreetMap.org', +                   'license': self.importer_instance.license +                   or u'ODbL', +                   'import_version': version}              item, updated, created = self.create_or_update_item( -                                    Route, dct, node_id, version) +                Route, dct, node_id, version)              if updated:                  updated_item += 1              if created: @@ -723,7 +737,7 @@ class OSMManager(ImportManager):      def import_nodes(self, tree):          from chimere.models import Marker -        msg, items, new_item, updated_item = "", [], 0 , 0 +        msg, items, new_item, updated_item = "", [], 0, 0          for node in tree.xpath('//node'):              name = None              node_id = node.attrib.get('id') @@ -736,15 +750,15 @@ class OSMManager(ImportManager):                      name = item.attrib.get('v')              point = 'SRID=4326;POINT(%s %s)' % (node.get('lon'),                                                  node.get('lat')) -            dct = {'point':point, -                   'name':name, -                   'origin':self.importer_instance.origin \ -                            or u'OpenStreetMap.org', -                   'license':self.importer_instance.license \ -                             or u'ODbL', -                   'import_version':version} +            dct = {'point': point, +                   'name': name, +                   'origin': self.importer_instance.origin +                   or u'OpenStreetMap.org', +                   'license': self.importer_instance.license +                   or u'ODbL', +                   'import_version': version}              item, updated, created = self.create_or_update_item( -                                    Marker, dct, node_id, version) +                Marker, dct, node_id, version)              if updated:                  updated_item += 1              if created: @@ -779,8 +793,8 @@ class OSMManager(ImportManager):          username = username.encode('latin1')          password = password.encode('latin1')          api = OsmApi.OsmApi(api=api, username=username, password=password) -        api.ChangesetCreate({u"comment": u"Import from Chimère %s" % \ -                                                            get_version()}) +        api.ChangesetCreate({u"comment": u"Import from Chimère %s" % +                             get_version()})          hooks = RE_HOOK.findall(self.importer_instance.filtr)          if not hooks:              hooks = RE_HOOK.findall(self.importer_instance.source) @@ -794,28 +808,31 @@ class OSMManager(ImportManager):                  continue              if key == 'bbox':                  x1, y1, x2, y2 = [float(val) for val in value.split(',')] -                bbox =  GEOSGeometry( +                bbox = GEOSGeometry(                      'POLYGON((%f %f,%f %f,%f %f,%f %f,%f %f))' % ( -                    x1, y1, x2, y1, x2, y2, x1, y2, x1, y1), srid=4326) +                        x1, y1, x2, y1, x2, y2, x1, y2, x1, y1), srid=4326)                  continue              tags[key] = value          if not tags:              return 0, _(u"No non ambigious tag is defined in the XAPI request")          if not bbox: -            return 0, _(u"No bounding box is defined in the XAPI request."\ -            u"If you are sure to manage the entire planet set the bounding box"\ -            u" to -180,-90,180,90") -        default_dct = {'tag':tags, -                       'import_source':self.importer_instance.source} +            return 0, _( +                u"No bounding box is defined in the XAPI request." +                u"If you are sure to manage the entire planet set the " +                u"bounding box to -180,-90,180,90") +        default_dct = {'tag': tags, +                       'import_source': self.importer_instance.source}          idx = -1 -        for idx, item in enumerate(Marker.objects.filter(status='A', -                point__contained=bbox, -                categories=self.importer_instance.categories.all(), -                not_for_osm=False, modified_since_import=True, -                route=None).all()): +        for idx, item in enumerate( +                Marker.objects.filter( +                    status='A', +                    point__contained=bbox, +                    categories=self.importer_instance.categories.all(), +                    not_for_osm=False, modified_since_import=True, +                    route=None).all()):              dct = default_dct.copy() -            dct.update({'lon':item.point.x, -                        'lat':item.point.y}) +            dct.update({'lon': item.point.x, +                        'lat': item.point.y})              dct['tag']['name'] = item.name              node = None              import_key = item.get_key('OSM') @@ -830,7 +847,7 @@ class OSMManager(ImportManager):                      if error.status == 404:                          dct.pop('id')                          dct.pop('version') -                        pass # if the node doesn't exist it is created +                        pass  # if the node doesn't exist it is created                      else:                          raise              if not updated: @@ -839,20 +856,23 @@ class OSMManager(ImportManager):              item.import_version = node['version']              item.save()          api.ChangesetClose() -        return idx+1, None +        return idx + 1, None + -import urllib2, chardet, HTMLParser +import chardet +import HTMLParser  from BeautifulSoup import BeautifulSoup -from lxml import etree +  RE_CLEANS = ((re.compile('(\n)*|^( )*(\n)*( )*|( )*(\n)*( )*$'), ''),               (re.compile(' ( )*'), ' '),               (re.compile(r"""<a href=["'](?!https?)(.*)["']"""), -                        '<a href="%(base_url)s\\1"'), +              '<a href="%(base_url)s\\1"'),               )  from calendar import TimeEncoding, month_name +  def get_month_name(month_no, locale):      with TimeEncoding(locale) as encoding:          s = month_name[month_no] @@ -860,62 +880,62 @@ def get_month_name(month_no, locale):              s = s.decode(encoding)          return s -MONTH_NAMES = {locale:[get_month_name(no_month, locale+'.UTF-8') -                        for no_month in xrange(1, 13)] for locale in ['fr_FR']} +MONTH_NAMES = {locale: [get_month_name(no_month, locale + '.UTF-8') +               for no_month in xrange(1, 13)] for locale in ['fr_FR']}  try: -    UNI_MONTH_NAMES = {locale:[m.decode('utf-8') for m in MONTH_NAMES[locale]] -                                                 for locale in MONTH_NAMES} +    UNI_MONTH_NAMES = {locale: [m.decode('utf-8') for m in MONTH_NAMES[locale]] +                       for locale in MONTH_NAMES}  except UnicodeEncodeError: -    UNI_MONTH_NAMES = {locale:[m for m in MONTH_NAMES[locale]] -                                         for locale in MONTH_NAMES} - -DATE_PARSINGS = {'fr_FR':[ -           re.compile(r'(?P<day1>\d{1,2}) '\ -                      r'(?P<month1>'+ '|'.join(UNI_MONTH_NAMES['fr_FR']) +') '\ -                      r'(?P<year1>\d{4})?[^\d]*'\ -                      r'(?P<day2>\d{1,2}) '\ -                      r'(?P<month2>'+ '|'.join(UNI_MONTH_NAMES['fr_FR']) +') *'\ -                      r'(?P<year2>\d{4})?.*'), -           re.compile(r'(?P<day1>\d{1,2}) '\ -                      r'(?P<month1>'+ '|'.join(UNI_MONTH_NAMES['fr_FR']) +') *'\ -                      r'(?P<year1>\d{4})?') -                     ], -                 'en':[ -           re.compile(r'(?P<year1>\d{4})-'\ -                      r'(?P<month1>\d{2})-'\ -                      r'(?P<day1>\d{2})'\ -                      r'(?:T'\ -                          r'(?P<hour1>\d{2})?:'\ -                          r'(?P<minut1>\d{2})?:'\ -                          r'(?P<second1>\d{2})'\ -                      r')?.*'\ -                      r'(?P<year2>\d{4})-'\ -                      r'(?P<month2>\d{2})-'\ -                      r'(?P<day2>\d{2})'\ -                      r'(?:T'\ -                          r'(?P<hour2>\d{2})?:'\ -                          r'(?P<minut2>\d{2})?:'\ -                          r'(?P<second2>\d{2})'\ -                      r')?.*' -                      ), -           re.compile(r'(?P<year1>\d{4})-'\ -                      r'(?P<month1>\d{2})-'\ -                      r'(?P<day1>\d{2})'\ -                      r'(?:T'\ -                          r'(?P<hour1>\d{2})?:'\ -                          r'(?P<minut1>\d{2})?:'\ -                          r'(?P<second1>\d{2})'\ -                      r')?' -                      ) -                     ], -            } +    UNI_MONTH_NAMES = {locale: [m for m in MONTH_NAMES[locale]] +                       for locale in MONTH_NAMES} + +DATE_PARSINGS = { +    'fr_FR': [ +        re.compile(r'(?P<day1>\d{1,2}) ' +                   r'(?P<month1>' + '|'.join(UNI_MONTH_NAMES['fr_FR']) + ') ' +                   r'(?P<year1>\d{4})?[^\d]*' +                   r'(?P<day2>\d{1,2}) ' +                   r'(?P<month2>' + '|'.join(UNI_MONTH_NAMES['fr_FR']) + ') *' +                   r'(?P<year2>\d{4})?.*'), +        re.compile(r'(?P<day1>\d{1,2}) ' +                   r'(?P<month1>' + '|'.join(UNI_MONTH_NAMES['fr_FR']) + ') * ' +                   r'(?P<year1>\d{4})?')], +    'en': [ +        re.compile(r'(?P<year1>\d{4})-' +                   r'(?P<month1>\d{2})-' +                   r'(?P<day1>\d{2})' +                   r'(?:T' +                   r'(?P<hour1>\d{2})?:' +                   r'(?P<minut1>\d{2})?:' +                   r'(?P<second1>\d{2})' +                   r')?.*' +                   r'(?P<year2>\d{4})-' +                   r'(?P<month2>\d{2})-' +                   r'(?P<day2>\d{2})' +                   r'(?:T' +                   r'(?P<hour2>\d{2})?:' +                   r'(?P<minut2>\d{2})?:' +                   r'(?P<second2>\d{2})' +                   r')?.*'), +        re.compile(r'(?P<year1>\d{4})-' +                   r'(?P<month1>\d{2})-' +                   r'(?P<day1>\d{2})' +                   r'(?:T' +                   r'(?P<hour1>\d{2})?:' +                   r'(?P<minut1>\d{2})?:' +                   r'(?P<second1>\d{2})' +                   r')?')], +} +  def clean_field(value):      return value.strip() +  class HtmlXsltManager(ImportManager):      PARSER = 'HTMLParser' +      def get(self):          u"""          Get data from the source @@ -939,7 +959,7 @@ class HtmlXsltManager(ImportManager):          soup = BeautifulSoup(data)          main_page = soup.prettify()          # convert it to valid XHTML -        #doc, errors = tidy_document(main_page) +        # doc, errors = tidy_document(main_page)          doc = main_page          dom = etree.HTML(doc, getattr(etree, self.PARSER)())          try: @@ -963,8 +983,8 @@ class HtmlXsltManager(ImportManager):          base_url = u"/".join(self.importer_instance.source.split(u'/')[:-1])          base_url += u"/"          for item in newdom.getroot(): -            c_item = {child.tag:clean_field(child.text) -                        for child in item.getchildren() if child.text} +            c_item = {child.tag: clean_field(child.text) +                      for child in item.getchildren() if child.text}              # try to have more information on the linked page              if transform_child and 'link' in c_item:                  # not an absolute address @@ -985,8 +1005,8 @@ class HtmlXsltManager(ImportManager):                  child_dom = etree.HTML(child_page, etree.HTMLParser())                  extra_keys = transform_child(child_dom).getroot()                  if len(extra_keys): -                    c_item.update({extra.tag:etree.tostring(extra) -                            for extra in extra_keys[0].getchildren()}) +                    c_item.update({extra.tag: etree.tostring(extra) +                                   for extra in extra_keys[0].getchildren()})              items.append(c_item)          # change relative link to full link, simplify, unescape HTML entities          html_unescape = HTMLParser.HTMLParser().unescape @@ -994,7 +1014,7 @@ class HtmlXsltManager(ImportManager):              for k in item:                  val = item[k]                  for r, replaced in RE_CLEANS: -                    val = re.sub(r, replaced % {'base_url':base_url}, val) +                    val = re.sub(r, replaced % {'base_url': base_url}, val)                  item[k] = html_unescape(val)          self.key_categories = self.importer_instance.get_key_category_dict()          self.missing_cats = set() @@ -1003,9 +1023,10 @@ class HtmlXsltManager(ImportManager):              self.add_dct_item(item)          msg = ''          if self.missing_cats: -            msg = _(u"Names \"%s\" doesn't match existing categories. " -                u"Modify the import to match theses names with categories.") % ( -                    u'", "'.join(self.missing_cats)) +            msg = _( +                u"Names \"%s\" doesn't match existing categories. " +                u"Modify the import to match theses names with categories.") %\ +                (u'", "'.join(self.missing_cats))          return (self.new_item, self.updated_item, msg)      @classmethod @@ -1042,18 +1063,18 @@ class HtmlXsltManager(ImportManager):                  if not m:                      continue                  values = m.groupdict() -                date = self._internal_parse_date(locale, -                              'year1' in values and values['year1'], -                              values['month1'], values['day1']) +                date = self._internal_parse_date( +                    locale, 'year1' in values and values['year1'], +                    values['month1'], values['day1'])                  if not date:                      continue                  dct['start_date'] = date                  has_dates = True                  if 'day2' not in values:                      break -                date = self._internal_parse_date(locale, -                              'year2' in values and values['year2'], -                              values['month2'], values['day2']) +                date = self._internal_parse_date( +                    locale, 'year2' in values and values['year2'], +                    values['month2'], values['day2'])                  if date:                      dct['end_date'] = date                  break @@ -1061,14 +1082,14 @@ class HtmlXsltManager(ImportManager):      def add_dct_item(self, item):          if not self.importer_instance.default_localisation and \ -           not "point" in item and not ("lat" in item and item['lat']): +                "point" not in item and not ("lat" in item and item['lat']):              return          cls = None -        dct = {'origin':"<a href='%s' target='_blank'>%s</a>" % ( -                                    item.get('link') or '#', -                                    self.importer_instance.origin), -               'license':self.importer_instance.license, -               'name':item['name']} +        dct = { +            'origin': "<a href='%s' target='_blank'>%s</a>" % ( +                item.get('link') or '#', self.importer_instance.origin), +            'license': self.importer_instance.license, +            'name': item['name']}          category = None          if 'category' in item and item['category']:              if item['category'] in self.key_categories: @@ -1095,5 +1116,6 @@ class HtmlXsltManager(ImportManager):          if created:              self.new_item += 1 +  class XMLXsltManager(HtmlXsltManager):      PARSER = 'XMLParser' | 
