From 5d9f3d528e8cd3cd30471906783fae6d969c9d0f Mon Sep 17 00:00:00 2001 From: Étienne Loks Date: Sat, 14 Feb 2015 13:47:10 +0100 Subject: Refactoring XSLT imports - fix tests --- chimere/tests.py | 18 +++++-- chimere/utils.py | 155 +++++++++++++++++++++++++++++-------------------------- 2 files changed, 98 insertions(+), 75 deletions(-) diff --git a/chimere/tests.py b/chimere/tests.py index ef54d0b..3a3144e 100644 --- a/chimere/tests.py +++ b/chimere/tests.py @@ -137,8 +137,9 @@ class ImporterTest: nb, nb_updated, res = importer.manager.get() if awaited_nb == None: continue - self.assertEqual(nb, awaited_nb, msg=u"%s: get test failed" % - unicode(self.__class__)) + self.assertEqual(nb, awaited_nb, + msg=u"%s: get test failed - got %d when %d was awaited" % + (unicode(self.__class__), nb, awaited_nb)) self.assertEqual(nb_updated, 0) for cat in importer.categories.all(): if cat not in nb_by_cat: @@ -311,6 +312,17 @@ class HtmlXsltImporterTest(TestCase, ImporterTest): importer1.categories.add(subcategories[0]) self.marker_importers = [(importer1, 7),] +class XmlXsltImporterTest(TestCase, ImporterTest): + def setUp(self): + subcategories = subcategory_setup() + xslt1 = File(open(test_dir_path + 'tests/magny-xml.xslt')) + importer1 = Importer.objects.create(importer_type='XXLT', + source='http://www.chymeres.net/test/magny.xml', + source_file=xslt1, + default_localisation='SRID=4326;POINT(-4.5 48.4)',) + importer1.categories.add(subcategories[0]) + self.marker_importers = [(importer1, 10),] + class FeedsTest(TestCase): def setUp(self): self.areas = areas_setup() @@ -442,7 +454,7 @@ class DynamicCategoryTest(TestCase): response = self.client.get(url) self.assertEqual(200, response.status_code) cats = json.loads(response.content)['categories'] - self.assertEqual(len(cats), 2) + self.assertEqual(len(cats), 5) class NewsTest(TestCase): def setUp(self): diff --git a/chimere/utils.py b/chimere/utils.py index 73e38ba..ba5e558 100644 --- a/chimere/utils.py +++ b/chimere/utils.py @@ -870,17 +870,17 @@ except UnicodeEncodeError: for locale in MONTH_NAMES} DATE_PARSINGS = {'fr_FR':[ - re.compile(r'(?P\d{1,2}) '\ - r'(?P'+ '|'.join(UNI_MONTH_NAMES['fr_FR']) +') '\ - r'(?P\d{4})?[^\d]*'\ - r'(?P\d{1,2}) '\ - r'(?P'+ '|'.join(UNI_MONTH_NAMES['fr_FR']) +') *'\ - r'(?P\d{4})?.*'), - re.compile(r'(?P\d{1,2}) '\ - r'(?P'+ '|'.join(UNI_MONTH_NAMES['fr_FR']) +') *'\ - r'(?P\d{4})?') - ] - } + re.compile(r'(?P\d{1,2}) '\ + r'(?P'+ '|'.join(UNI_MONTH_NAMES['fr_FR']) +') '\ + r'(?P\d{4})?[^\d]*'\ + r'(?P\d{1,2}) '\ + r'(?P'+ '|'.join(UNI_MONTH_NAMES['fr_FR']) +') *'\ + r'(?P\d{4})?.*'), + re.compile(r'(?P\d{1,2}) '\ + r'(?P'+ '|'.join(UNI_MONTH_NAMES['fr_FR']) +') *'\ + r'(?P\d{4})?') + ] + } def clean_field(value): return value.strip() @@ -897,6 +897,7 @@ class HtmlXsltManager(ImportManager): - error detail on error. """ from models import Marker + self.marker_cls = Marker try: main_page = urllib2.urlopen(self.importer_instance.source) assert main_page.getcode() == 200 @@ -966,71 +967,81 @@ class HtmlXsltManager(ImportManager): for r, replaced in RE_CLEANS: val = re.sub(r, replaced % {'base_url':base_url}, val) item[k] = html_unescape(val) - updated_item, new_item = 0, 0 - key_categories = self.importer_instance.get_key_category_dict() - missing_cats = set() + self.key_categories = self.importer_instance.get_key_category_dict() + self.missing_cats = set() + self.updated_item, self.new_item = 0, 0 for item in items: - if not self.importer_instance.default_localisation and \ - not "point" in item and not ("lat" in item and item['lat']): - continue - cls = None - dct = {'origin':"%s" % (item['link'], - self.importer_instance.origin), - 'license':self.importer_instance.license, - 'name':item['name']} - category = None - if 'category' in item and item['category']: - if item['category'] in key_categories: - category = key_categories[item['category']] - else: - missing_cats.add(item['category']) - cls = Marker - if 'point' in item: - x, y = item['point'].split(",") - dct['point'] = 'SRID=4326;POINT(%s %s)' % (x, y) - elif 'lat' in item and item['lat']: - dct['point'] = 'SRID=4326;POINT(%s %s)' % (item['lon'], - item['lat']) - else: - dct['point'] = self.importer_instance.default_localisation - dct['description'] = item['description'] - if 'date' in item: - has_dates = False - for locale in DATE_PARSINGS: - if has_dates: - break - for r in DATE_PARSINGS[locale]: - m = r.search(item['date']) - if not m: - continue - has_dates = True - values = m.groupdict() - year1 = datetime.date.today().year if 'year1' not in values \ - else int(values['year1']) - dct['start_date'] = datetime.date(year1, - MONTH_NAMES[locale].index(values['month1'].encode('utf-8')) + 1, - int(values['day1'])) - if 'day2' not in values: - break - year2 = datetime.date.today().year if 'year2' not in values \ - else int(values['year2']) - dct['end_date'] = datetime.date(year2, - MONTH_NAMES[locale].index(values['month2'].encode('utf-8')) + 1, - int(values['day2'])) - break - key = item['key'] - it, updated, created = self.create_or_update_item(cls, dct, key, - category=category) - if updated: - updated_item += 1 - if created: - new_item += 1 + self.add_dct_item(item) msg = '' - if missing_cats: + if self.missing_cats: msg = _(u"Names \"%s\" doesn't match existing categories. " u"Modify the import to match theses names with categories.") % ( - u'", "'.join(missing_cats)) - return (new_item, updated_item, msg) + u'", "'.join(self.missing_cats)) + return (self.new_item, self.updated_item, msg) + + def parse_date(self, date): + dct = {} + has_dates = False + for locale in DATE_PARSINGS: + if has_dates: + break + for r in DATE_PARSINGS[locale]: + m = r.search(date) + if not m: + continue + has_dates = True + values = m.groupdict() + year1 = datetime.date.today().year if 'year1' not in values \ + else int(values['year1']) + dct['start_date'] = datetime.date(year1, + MONTH_NAMES[locale].index(values['month1'].encode('utf-8') + ) + 1, + int(values['day1'])) + if 'day2' not in values: + break + year2 = datetime.date.today().year if 'year2' not in values \ + else int(values['year2']) + dct['end_date'] = datetime.date(year2, + MONTH_NAMES[locale].index(values['month2'].encode('utf-8') + ) + 1, + int(values['day2'])) + break + return dct + + def add_dct_item(self, item): + if not self.importer_instance.default_localisation and \ + not "point" in item and not ("lat" in item and item['lat']): + return + cls = None + dct = {'origin':"%s" % (item['link'], + self.importer_instance.origin), + 'license':self.importer_instance.license, + 'name':item['name']} + category = None + if 'category' in item and item['category']: + if item['category'] in self.key_categories: + category = self.key_categories[item['category']] + else: + self.missing_cats.add(item['category']) + cls = self.marker_cls + if 'point' in item: + x, y = item['point'].split(",") + dct['point'] = 'SRID=4326;POINT(%s %s)' % (x, y) + elif 'lat' in item and item['lat']: + dct['point'] = 'SRID=4326;POINT(%s %s)' % (item['lon'], + item['lat']) + else: + dct['point'] = self.importer_instance.default_localisation + dct['description'] = item['description'] + if 'date' in item: + dct.update(self.parse_date(item['date'])) + key = item['key'] + it, updated, created = self.create_or_update_item(cls, dct, key, + category=category) + if updated: + self.updated_item += 1 + if created: + self.new_item += 1 class XMLXsltManager(HtmlXsltManager): PARSER = 'XMLParser' -- cgit v1.2.3