From 5d9f3d528e8cd3cd30471906783fae6d969c9d0f Mon Sep 17 00:00:00 2001
From: Étienne Loks <etienne.loks@proxience.com>
Date: Sat, 14 Feb 2015 13:47:10 +0100
Subject: Refactoring XSLT imports - fix tests

---
 chimere/tests.py |  18 +++++--
 chimere/utils.py | 155 +++++++++++++++++++++++++++++--------------------------
 2 files changed, 98 insertions(+), 75 deletions(-)
diff --git a/chimere/tests.py b/chimere/tests.py
index ef54d0b..3a3144e 100644
--- a/chimere/tests.py
+++ b/chimere/tests.py
@@ -137,8 +137,9 @@ class ImporterTest:
             nb, nb_updated, res = importer.manager.get()
             if awaited_nb == None:
                 continue
-            self.assertEqual(nb, awaited_nb, msg=u"%s: get test failed" %
-                                                    unicode(self.__class__))
+            self.assertEqual(nb, awaited_nb,
+		msg=u"%s: get test failed - got %d when %d was awaited" %
+				    (unicode(self.__class__), nb, awaited_nb))
             self.assertEqual(nb_updated, 0)
             for cat in importer.categories.all():
                 if cat not in nb_by_cat:
@@ -311,6 +312,17 @@ class HtmlXsltImporterTest(TestCase, ImporterTest):
         importer1.categories.add(subcategories[0])
         self.marker_importers = [(importer1, 7),]
 
+class XmlXsltImporterTest(TestCase, ImporterTest):
+    def setUp(self):
+        subcategories = subcategory_setup()
+        xslt1 = File(open(test_dir_path + 'tests/magny-xml.xslt'))
+        importer1 = Importer.objects.create(importer_type='XXLT',
+             source='http://www.chymeres.net/test/magny.xml',
+             source_file=xslt1,
+             default_localisation='SRID=4326;POINT(-4.5 48.4)',)
+        importer1.categories.add(subcategories[0])
+        self.marker_importers = [(importer1, 10),]
+
 class FeedsTest(TestCase):
     def setUp(self):
         self.areas = areas_setup()
@@ -442,7 +454,7 @@ class DynamicCategoryTest(TestCase):
         response = self.client.get(url)
         self.assertEqual(200, response.status_code)
         cats = json.loads(response.content)['categories']
-        self.assertEqual(len(cats), 2)
+        self.assertEqual(len(cats), 5)
 
 class NewsTest(TestCase):
     def setUp(self):
diff --git a/chimere/utils.py b/chimere/utils.py
index 73e38ba..ba5e558 100644
--- a/chimere/utils.py
+++ b/chimere/utils.py
@@ -870,17 +870,17 @@ except UnicodeEncodeError:
                                          for locale in MONTH_NAMES}
 
 DATE_PARSINGS = {'fr_FR':[
-               re.compile(r'(?P<day1>\d{1,2}) '\
-                          r'(?P<month1>'+ '|'.join(UNI_MONTH_NAMES['fr_FR']) +') '\
-                          r'(?P<year1>\d{4})?[^\d]*'\
-                          r'(?P<day2>\d{1,2}) '\
-                          r'(?P<month2>'+ '|'.join(UNI_MONTH_NAMES['fr_FR']) +') *'\
-                          r'(?P<year2>\d{4})?.*'),
-               re.compile(r'(?P<day1>\d{1,2}) '\
-                          r'(?P<month1>'+ '|'.join(UNI_MONTH_NAMES['fr_FR']) +') *'\
-                          r'(?P<year1>\d{4})?')
-                         ]
-                }
+           re.compile(r'(?P<day1>\d{1,2}) '\
+                      r'(?P<month1>'+ '|'.join(UNI_MONTH_NAMES['fr_FR']) +') '\
+                      r'(?P<year1>\d{4})?[^\d]*'\
+                      r'(?P<day2>\d{1,2}) '\
+                      r'(?P<month2>'+ '|'.join(UNI_MONTH_NAMES['fr_FR']) +') *'\
+                      r'(?P<year2>\d{4})?.*'),
+           re.compile(r'(?P<day1>\d{1,2}) '\
+                      r'(?P<month1>'+ '|'.join(UNI_MONTH_NAMES['fr_FR']) +') *'\
+                      r'(?P<year1>\d{4})?')
+                     ]
+            }
 
 def clean_field(value):
     return value.strip()
@@ -897,6 +897,7 @@ class HtmlXsltManager(ImportManager):
         - error detail on error.
         """
         from models import Marker
+        self.marker_cls = Marker
         try:
             main_page = urllib2.urlopen(self.importer_instance.source)
             assert main_page.getcode() == 200
@@ -966,71 +967,81 @@ class HtmlXsltManager(ImportManager):
                 for r, replaced in RE_CLEANS:
                     val = re.sub(r, replaced % {'base_url':base_url}, val)
                 item[k] = html_unescape(val)
-        updated_item, new_item = 0, 0
-        key_categories = self.importer_instance.get_key_category_dict()
-        missing_cats = set()
+        self.key_categories = self.importer_instance.get_key_category_dict()
+        self.missing_cats = set()
+        self.updated_item, self.new_item = 0, 0
         for item in items:
-            if not self.importer_instance.default_localisation and \
-               not "point" in item and not ("lat" in item and item['lat']):
-                continue
-            cls = None
-            dct = {'origin':"<a href='%s'>%s</a>" % (item['link'],
-                                        self.importer_instance.origin),
-                   'license':self.importer_instance.license,
-                   'name':item['name']}
-            category = None
-            if 'category' in item and item['category']:
-                if item['category'] in key_categories:
-                    category = key_categories[item['category']]
-                else:
-                    missing_cats.add(item['category'])
-            cls = Marker
-            if 'point' in item:
-                x, y = item['point'].split(",")
-                dct['point'] = 'SRID=4326;POINT(%s %s)' % (x, y)
-            elif 'lat' in item and item['lat']:
-                dct['point'] = 'SRID=4326;POINT(%s %s)' % (item['lon'],
-                                                           item['lat'])
-            else:
-                dct['point'] = self.importer_instance.default_localisation
-            dct['description'] = item['description']
-            if 'date' in item:
-                has_dates = False
-                for locale in DATE_PARSINGS:
-                    if has_dates:
-                        break
-                    for r in DATE_PARSINGS[locale]:
-                        m = r.search(item['date'])
-                        if not m:
-                            continue
-                        has_dates = True
-                        values = m.groupdict()
-                        year1 = datetime.date.today().year if 'year1' not in values \
-                                else int(values['year1'])
-                        dct['start_date'] = datetime.date(year1,
-                              MONTH_NAMES[locale].index(values['month1'].encode('utf-8')) + 1,
-                              int(values['day1']))
-                        if 'day2' not in values:
-                            break
-                        year2 = datetime.date.today().year if 'year2' not in values \
-                                else int(values['year2'])
-                        dct['end_date'] = datetime.date(year2,
-                              MONTH_NAMES[locale].index(values['month2'].encode('utf-8')) + 1,
-                              int(values['day2']))
-                        break
-            key = item['key']
-            it, updated, created = self.create_or_update_item(cls, dct, key,
-                                                              category=category)
-            if updated:
-                updated_item += 1
-            if created:
-                new_item += 1
+            self.add_dct_item(item)
         msg = ''
-        if missing_cats:
+        if self.missing_cats:
             msg = _(u"Names \"%s\" doesn't match existing categories. "
                 u"Modify the import to match theses names with categories.") % (
-                    u'", "'.join(missing_cats))
-        return (new_item, updated_item, msg)
+                    u'", "'.join(self.missing_cats))
+        return (self.new_item, self.updated_item, msg)
+
+    def parse_date(self, date):
+        dct = {}
+        has_dates = False
+        for locale in DATE_PARSINGS:
+            if has_dates:
+                break
+            for r in DATE_PARSINGS[locale]:
+                m = r.search(date)
+                if not m:
+                    continue
+                has_dates = True
+                values = m.groupdict()
+                year1 = datetime.date.today().year if 'year1' not in values \
+                        else int(values['year1'])
+                dct['start_date'] = datetime.date(year1,
+                  MONTH_NAMES[locale].index(values['month1'].encode('utf-8')
+                                                                        ) + 1,
+                  int(values['day1']))
+                if 'day2' not in values:
+                    break
+                year2 = datetime.date.today().year if 'year2' not in values \
+                        else int(values['year2'])
+                dct['end_date'] = datetime.date(year2,
+                  MONTH_NAMES[locale].index(values['month2'].encode('utf-8')
+                                                                        ) + 1,
+                  int(values['day2']))
+                break
+        return dct
+
+    def add_dct_item(self, item):
+        if not self.importer_instance.default_localisation and \
+           not "point" in item and not ("lat" in item and item['lat']):
+            return
+        cls = None
+        dct = {'origin':"<a href='%s'>%s</a>" % (item['link'],
+                                    self.importer_instance.origin),
+               'license':self.importer_instance.license,
+               'name':item['name']}
+        category = None
+        if 'category' in item and item['category']:
+            if item['category'] in self.key_categories:
+                category = self.key_categories[item['category']]
+            else:
+                self.missing_cats.add(item['category'])
+        cls = self.marker_cls
+        if 'point' in item:
+            x, y = item['point'].split(",")
+            dct['point'] = 'SRID=4326;POINT(%s %s)' % (x, y)
+        elif 'lat' in item and item['lat']:
+            dct['point'] = 'SRID=4326;POINT(%s %s)' % (item['lon'],
+                                                       item['lat'])
+        else:
+            dct['point'] = self.importer_instance.default_localisation
+        dct['description'] = item['description']
+        if 'date' in item:
+            dct.update(self.parse_date(item['date']))
+        key = item['key']
+        it, updated, created = self.create_or_update_item(cls, dct, key,
+                                                          category=category)
+        if updated:
+            self.updated_item += 1
+        if created:
+            self.new_item += 1
 
 class XMLXsltManager(HtmlXsltManager):
     PARSER = 'XMLParser'
-- 
cgit v1.2.3