Merge branch 'v2.1'

author: Étienne Loks <etienne.loks@proxience.com> 2015-02-14 14:33:53 +0100
committer: Étienne Loks <etienne.loks@proxience.com> 2015-02-14 14:33:53 +0100
commit: 06008ace42c6f972b96532e76ef69cf35b0b9eea (patch)
tree: d7860b8022fe36d85ab1fe2fcf8b69cc1d1a074c /chimere/utils.py
parent: 72f4ae70dee56b5e532a579aeae7f5cc22f49813 (diff)
parent: afbcd9cf0578f70ac25afac0199446a43d317b52 (diff)
download: Chimère-06008ace42c6f972b96532e76ef69cf35b0b9eea.tar.bz2
Chimère-06008ace42c6f972b96532e76ef69cf35b0b9eea.zip
1 files changed, 134 insertions, 73 deletions
diff --git a/chimere/utils.py b/chimere/utils.py
index 73e38ba..55fc45c 100644
--- a/chimere/utils.py
+++ b/chimere/utils.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
-# Copyright (C) 2012-2013  Étienne Loks  <etienne.loks_AT_peacefrogsDOTnet>
+# Copyright (C) 2012-2015  Étienne Loks  <etienne.loks_AT_peacefrogsDOTnet>
 
 # This program is free software: you can redistribute it and/or modify
 # it under the terms of the GNU General Public License as
@@ -870,17 +870,45 @@ except UnicodeEncodeError:
                                          for locale in MONTH_NAMES}
 
 DATE_PARSINGS = {'fr_FR':[
-               re.compile(r'(?P<day1>\d{1,2}) '\
-                          r'(?P<month1>'+ '|'.join(UNI_MONTH_NAMES['fr_FR']) +') '\
-                          r'(?P<year1>\d{4})?[^\d]*'\
-                          r'(?P<day2>\d{1,2}) '\
-                          r'(?P<month2>'+ '|'.join(UNI_MONTH_NAMES['fr_FR']) +') *'\
-                          r'(?P<year2>\d{4})?.*'),
-               re.compile(r'(?P<day1>\d{1,2}) '\
-                          r'(?P<month1>'+ '|'.join(UNI_MONTH_NAMES['fr_FR']) +') *'\
-                          r'(?P<year1>\d{4})?')
-                         ]
-                }
+           re.compile(r'(?P<day1>\d{1,2}) '\
+                      r'(?P<month1>'+ '|'.join(UNI_MONTH_NAMES['fr_FR']) +') '\
+                      r'(?P<year1>\d{4})?[^\d]*'\
+                      r'(?P<day2>\d{1,2}) '\
+                      r'(?P<month2>'+ '|'.join(UNI_MONTH_NAMES['fr_FR']) +') *'\
+                      r'(?P<year2>\d{4})?.*'),
+           re.compile(r'(?P<day1>\d{1,2}) '\
+                      r'(?P<month1>'+ '|'.join(UNI_MONTH_NAMES['fr_FR']) +') *'\
+                      r'(?P<year1>\d{4})?')
+                     ],
+                 'en':[
+           re.compile(r'(?P<year1>\d{4})-'\
+                      r'(?P<month1>\d{2})-'\
+                      r'(?P<day1>\d{2})'\
+                      r'(?:T'\
+                          r'(?P<hour1>\d{2})?:'\
+                          r'(?P<minut1>\d{2})?:'\
+                          r'(?P<second1>\d{2})'\
+                      r')?.*'\
+                      r'(?P<year2>\d{4})-'\
+                      r'(?P<month2>\d{2})-'\
+                      r'(?P<day2>\d{2})'\
+                      r'(?:T'\
+                          r'(?P<hour2>\d{2})?:'\
+                          r'(?P<minut2>\d{2})?:'\
+                          r'(?P<second2>\d{2})'\
+                      r')?.*'
+                      ),
+           re.compile(r'(?P<year1>\d{4})-'\
+                      r'(?P<month1>\d{2})-'\
+                      r'(?P<day1>\d{2})'\
+                      r'(?:T'\
+                          r'(?P<hour1>\d{2})?:'\
+                          r'(?P<minut1>\d{2})?:'\
+                          r'(?P<second1>\d{2})'\
+                      r')?'
+                      )
+                     ],
+            }
 
 def clean_field(value):
     return value.strip()
@@ -897,6 +925,7 @@ class HtmlXsltManager(ImportManager):
         - error detail on error.
         """
         from models import Marker
+        self.marker_cls = Marker
         try:
             main_page = urllib2.urlopen(self.importer_instance.source)
             assert main_page.getcode() == 200
@@ -966,71 +995,103 @@ class HtmlXsltManager(ImportManager):
                 for r, replaced in RE_CLEANS:
                     val = re.sub(r, replaced % {'base_url':base_url}, val)
                 item[k] = html_unescape(val)
-        updated_item, new_item = 0, 0
-        key_categories = self.importer_instance.get_key_category_dict()
-        missing_cats = set()
+        self.key_categories = self.importer_instance.get_key_category_dict()
+        self.missing_cats = set()
+        self.updated_item, self.new_item = 0, 0
         for item in items:
-            if not self.importer_instance.default_localisation and \
-               not "point" in item and not ("lat" in item and item['lat']):
-                continue
-            cls = None
-            dct = {'origin':"<a href='%s'>%s</a>" % (item['link'],
-                                        self.importer_instance.origin),
-                   'license':self.importer_instance.license,
-                   'name':item['name']}
-            category = None
-            if 'category' in item and item['category']:
-                if item['category'] in key_categories:
-                    category = key_categories[item['category']]
-                else:
-                    missing_cats.add(item['category'])
-            cls = Marker
-            if 'point' in item:
-                x, y = item['point'].split(",")
-                dct['point'] = 'SRID=4326;POINT(%s %s)' % (x, y)
-            elif 'lat' in item and item['lat']:
-                dct['point'] = 'SRID=4326;POINT(%s %s)' % (item['lon'],
-                                                           item['lat'])
-            else:
-                dct['point'] = self.importer_instance.default_localisation
-            dct['description'] = item['description']
-            if 'date' in item:
-                has_dates = False
-                for locale in DATE_PARSINGS:
-                    if has_dates:
-                        break
-                    for r in DATE_PARSINGS[locale]:
-                        m = r.search(item['date'])
-                        if not m:
-                            continue
-                        has_dates = True
-                        values = m.groupdict()
-                        year1 = datetime.date.today().year if 'year1' not in values \
-                                else int(values['year1'])
-                        dct['start_date'] = datetime.date(year1,
-                              MONTH_NAMES[locale].index(values['month1'].encode('utf-8')) + 1,
-                              int(values['day1']))
-                        if 'day2' not in values:
-                            break
-                        year2 = datetime.date.today().year if 'year2' not in values \
-                                else int(values['year2'])
-                        dct['end_date'] = datetime.date(year2,
-                              MONTH_NAMES[locale].index(values['month2'].encode('utf-8')) + 1,
-                              int(values['day2']))
-                        break
-            key = item['key']
-            it, updated, created = self.create_or_update_item(cls, dct, key,
-                                                              category=category)
-            if updated:
-                updated_item += 1
-            if created:
-                new_item += 1
+            self.add_dct_item(item)
         msg = ''
-        if missing_cats:
+        if self.missing_cats:
             msg = _(u"Names \"%s\" doesn't match existing categories. "
                 u"Modify the import to match theses names with categories.") % (
-                    u'", "'.join(missing_cats))
-        return (new_item, updated_item, msg)
+                    u'", "'.join(self.missing_cats))
+        return (self.new_item, self.updated_item, msg)
+
+    @classmethod
+    def _internal_parse_date(cls, locale, year, month, day):
+        try:
+            year = datetime.date.today().year if not year else int(year)
+        except ValueError:
+            return
+        month = month.encode('utf-8')
+        if locale in MONTH_NAMES and month in MONTH_NAMES[locale]:
+            month = MONTH_NAMES[locale].index(month) + 1
+        else:
+            try:
+                month = int(month)
+            except ValueError:
+                return
+        try:
+            day = int(day)
+        except ValueError:
+            return
+        try:
+            return datetime.date(year, month, day)
+        except ValueError:
+            return
+
+    def parse_date(self, date):
+        dct = {}
+        has_dates = False
+        for locale in DATE_PARSINGS:
+            if has_dates:
+                break
+            for r in DATE_PARSINGS[locale]:
+                m = r.search(date)
+                if not m:
+                    continue
+                values = m.groupdict()
+                date = self._internal_parse_date(locale,
+                              'year1' in values and values['year1'],
+                              values['month1'], values['day1'])
+                if not date:
+                    continue
+                dct['start_date'] = date
+                has_dates = True
+                if 'day2' not in values:
+                    break
+                date = self._internal_parse_date(locale,
+                              'year2' in values and values['year2'],
+                              values['month2'], values['day2'])
+                if date:
+                    dct['end_date'] = date
+                break
+        return dct
+
+    def add_dct_item(self, item):
+        if not self.importer_instance.default_localisation and \
+           not "point" in item and not ("lat" in item and item['lat']):
+            return
+        cls = None
+        dct = {'origin':"<a href='%s'>%s</a>" % (item['link'],
+                                    self.importer_instance.origin),
+               'license':self.importer_instance.license,
+               'name':item['name']}
+        category = None
+        if 'category' in item and item['category']:
+            if item['category'] in self.key_categories:
+                category = self.key_categories[item['category']]
+            else:
+                self.missing_cats.add(item['category'])
+        cls = self.marker_cls
+        if 'point' in item:
+            x, y = item['point'].split(",")
+            dct['point'] = 'SRID=4326;POINT(%s %s)' % (x, y)
+        elif 'lat' in item and item['lat']:
+            dct['point'] = 'SRID=4326;POINT(%s %s)' % (item['lon'],
+                                                       item['lat'])
+        else:
+            dct['point'] = self.importer_instance.default_localisation
+        dct['description'] = item['description']
+        if 'date' in item:
+            dct.update(self.parse_date(item['date']))
+        key = item['key']
+        it, updated, created = self.create_or_update_item(cls, dct, key,
+                                                          category=category)
+        if updated:
+            self.updated_item += 1
+        if created:
+            self.new_item += 1
 
 class XMLXsltManager(HtmlXsltManager):
     PARSER = 'XMLParser'
author	Étienne Loks <etienne.loks@proxience.com>	2015-02-14 14:33:53 +0100
committer	Étienne Loks <etienne.loks@proxience.com>	2015-02-14 14:33:53 +0100
commit	06008ace42c6f972b96532e76ef69cf35b0b9eea (patch)
tree	d7860b8022fe36d85ab1fe2fcf8b69cc1d1a074c /chimere/utils.py
parent	72f4ae70dee56b5e532a579aeae7f5cc22f49813 (diff)
parent	afbcd9cf0578f70ac25afac0199446a43d317b52 (diff)
download	Chimère-06008ace42c6f972b96532e76ef69cf35b0b9eea.tar.bz2 Chimère-06008ace42c6f972b96532e76ef69cf35b0b9eea.zip