summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--chimere/tests.py18
-rw-r--r--chimere/utils.py155
2 files changed, 98 insertions, 75 deletions
diff --git a/chimere/tests.py b/chimere/tests.py
index ef54d0b..3a3144e 100644
--- a/chimere/tests.py
+++ b/chimere/tests.py
@@ -137,8 +137,9 @@ class ImporterTest:
nb, nb_updated, res = importer.manager.get()
if awaited_nb == None:
continue
- self.assertEqual(nb, awaited_nb, msg=u"%s: get test failed" %
- unicode(self.__class__))
+ self.assertEqual(nb, awaited_nb,
+ msg=u"%s: get test failed - got %d when %d was awaited" %
+ (unicode(self.__class__), nb, awaited_nb))
self.assertEqual(nb_updated, 0)
for cat in importer.categories.all():
if cat not in nb_by_cat:
@@ -311,6 +312,17 @@ class HtmlXsltImporterTest(TestCase, ImporterTest):
importer1.categories.add(subcategories[0])
self.marker_importers = [(importer1, 7),]
+class XmlXsltImporterTest(TestCase, ImporterTest):
+ def setUp(self):
+ subcategories = subcategory_setup()
+ xslt1 = File(open(test_dir_path + 'tests/magny-xml.xslt'))
+ importer1 = Importer.objects.create(importer_type='XXLT',
+ source='http://www.chymeres.net/test/magny.xml',
+ source_file=xslt1,
+ default_localisation='SRID=4326;POINT(-4.5 48.4)',)
+ importer1.categories.add(subcategories[0])
+ self.marker_importers = [(importer1, 10),]
+
class FeedsTest(TestCase):
def setUp(self):
self.areas = areas_setup()
@@ -442,7 +454,7 @@ class DynamicCategoryTest(TestCase):
response = self.client.get(url)
self.assertEqual(200, response.status_code)
cats = json.loads(response.content)['categories']
- self.assertEqual(len(cats), 2)
+ self.assertEqual(len(cats), 5)
class NewsTest(TestCase):
def setUp(self):
diff --git a/chimere/utils.py b/chimere/utils.py
index 73e38ba..ba5e558 100644
--- a/chimere/utils.py
+++ b/chimere/utils.py
@@ -870,17 +870,17 @@ except UnicodeEncodeError:
for locale in MONTH_NAMES}
DATE_PARSINGS = {'fr_FR':[
- re.compile(r'(?P<day1>\d{1,2}) '\
- r'(?P<month1>'+ '|'.join(UNI_MONTH_NAMES['fr_FR']) +') '\
- r'(?P<year1>\d{4})?[^\d]*'\
- r'(?P<day2>\d{1,2}) '\
- r'(?P<month2>'+ '|'.join(UNI_MONTH_NAMES['fr_FR']) +') *'\
- r'(?P<year2>\d{4})?.*'),
- re.compile(r'(?P<day1>\d{1,2}) '\
- r'(?P<month1>'+ '|'.join(UNI_MONTH_NAMES['fr_FR']) +') *'\
- r'(?P<year1>\d{4})?')
- ]
- }
+ re.compile(r'(?P<day1>\d{1,2}) '\
+ r'(?P<month1>'+ '|'.join(UNI_MONTH_NAMES['fr_FR']) +') '\
+ r'(?P<year1>\d{4})?[^\d]*'\
+ r'(?P<day2>\d{1,2}) '\
+ r'(?P<month2>'+ '|'.join(UNI_MONTH_NAMES['fr_FR']) +') *'\
+ r'(?P<year2>\d{4})?.*'),
+ re.compile(r'(?P<day1>\d{1,2}) '\
+ r'(?P<month1>'+ '|'.join(UNI_MONTH_NAMES['fr_FR']) +') *'\
+ r'(?P<year1>\d{4})?')
+ ]
+ }
def clean_field(value):
return value.strip()
@@ -897,6 +897,7 @@ class HtmlXsltManager(ImportManager):
- error detail on error.
"""
from models import Marker
+ self.marker_cls = Marker
try:
main_page = urllib2.urlopen(self.importer_instance.source)
assert main_page.getcode() == 200
@@ -966,71 +967,81 @@ class HtmlXsltManager(ImportManager):
for r, replaced in RE_CLEANS:
val = re.sub(r, replaced % {'base_url':base_url}, val)
item[k] = html_unescape(val)
- updated_item, new_item = 0, 0
- key_categories = self.importer_instance.get_key_category_dict()
- missing_cats = set()
+ self.key_categories = self.importer_instance.get_key_category_dict()
+ self.missing_cats = set()
+ self.updated_item, self.new_item = 0, 0
for item in items:
- if not self.importer_instance.default_localisation and \
- not "point" in item and not ("lat" in item and item['lat']):
- continue
- cls = None
- dct = {'origin':"<a href='%s'>%s</a>" % (item['link'],
- self.importer_instance.origin),
- 'license':self.importer_instance.license,
- 'name':item['name']}
- category = None
- if 'category' in item and item['category']:
- if item['category'] in key_categories:
- category = key_categories[item['category']]
- else:
- missing_cats.add(item['category'])
- cls = Marker
- if 'point' in item:
- x, y = item['point'].split(",")
- dct['point'] = 'SRID=4326;POINT(%s %s)' % (x, y)
- elif 'lat' in item and item['lat']:
- dct['point'] = 'SRID=4326;POINT(%s %s)' % (item['lon'],
- item['lat'])
- else:
- dct['point'] = self.importer_instance.default_localisation
- dct['description'] = item['description']
- if 'date' in item:
- has_dates = False
- for locale in DATE_PARSINGS:
- if has_dates:
- break
- for r in DATE_PARSINGS[locale]:
- m = r.search(item['date'])
- if not m:
- continue
- has_dates = True
- values = m.groupdict()
- year1 = datetime.date.today().year if 'year1' not in values \
- else int(values['year1'])
- dct['start_date'] = datetime.date(year1,
- MONTH_NAMES[locale].index(values['month1'].encode('utf-8')) + 1,
- int(values['day1']))
- if 'day2' not in values:
- break
- year2 = datetime.date.today().year if 'year2' not in values \
- else int(values['year2'])
- dct['end_date'] = datetime.date(year2,
- MONTH_NAMES[locale].index(values['month2'].encode('utf-8')) + 1,
- int(values['day2']))
- break
- key = item['key']
- it, updated, created = self.create_or_update_item(cls, dct, key,
- category=category)
- if updated:
- updated_item += 1
- if created:
- new_item += 1
+ self.add_dct_item(item)
msg = ''
- if missing_cats:
+ if self.missing_cats:
msg = _(u"Names \"%s\" doesn't match existing categories. "
u"Modify the import to match theses names with categories.") % (
- u'", "'.join(missing_cats))
- return (new_item, updated_item, msg)
+ u'", "'.join(self.missing_cats))
+ return (self.new_item, self.updated_item, msg)
+
+ def parse_date(self, date):
+ dct = {}
+ has_dates = False
+ for locale in DATE_PARSINGS:
+ if has_dates:
+ break
+ for r in DATE_PARSINGS[locale]:
+ m = r.search(date)
+ if not m:
+ continue
+ has_dates = True
+ values = m.groupdict()
+ year1 = datetime.date.today().year if 'year1' not in values \
+ else int(values['year1'])
+ dct['start_date'] = datetime.date(year1,
+ MONTH_NAMES[locale].index(values['month1'].encode('utf-8')
+ ) + 1,
+ int(values['day1']))
+ if 'day2' not in values:
+ break
+ year2 = datetime.date.today().year if 'year2' not in values \
+ else int(values['year2'])
+ dct['end_date'] = datetime.date(year2,
+ MONTH_NAMES[locale].index(values['month2'].encode('utf-8')
+ ) + 1,
+ int(values['day2']))
+ break
+ return dct
+
+ def add_dct_item(self, item):
+ if not self.importer_instance.default_localisation and \
+ not "point" in item and not ("lat" in item and item['lat']):
+ return
+ cls = None
+ dct = {'origin':"<a href='%s'>%s</a>" % (item['link'],
+ self.importer_instance.origin),
+ 'license':self.importer_instance.license,
+ 'name':item['name']}
+ category = None
+ if 'category' in item and item['category']:
+ if item['category'] in self.key_categories:
+ category = self.key_categories[item['category']]
+ else:
+ self.missing_cats.add(item['category'])
+ cls = self.marker_cls
+ if 'point' in item:
+ x, y = item['point'].split(",")
+ dct['point'] = 'SRID=4326;POINT(%s %s)' % (x, y)
+ elif 'lat' in item and item['lat']:
+ dct['point'] = 'SRID=4326;POINT(%s %s)' % (item['lon'],
+ item['lat'])
+ else:
+ dct['point'] = self.importer_instance.default_localisation
+ dct['description'] = item['description']
+ if 'date' in item:
+ dct.update(self.parse_date(item['date']))
+ key = item['key']
+ it, updated, created = self.create_or_update_item(cls, dct, key,
+ category=category)
+ if updated:
+ self.updated_item += 1
+ if created:
+ self.new_item += 1
class XMLXsltManager(HtmlXsltManager):
PARSER = 'XMLParser'