summaryrefslogtreecommitdiff
path: root/chimere/utils.py
diff options
context:
space:
mode:
Diffstat (limited to 'chimere/utils.py')
-rw-r--r--chimere/utils.py155
1 files changed, 83 insertions, 72 deletions
diff --git a/chimere/utils.py b/chimere/utils.py
index 73e38ba..ba5e558 100644
--- a/chimere/utils.py
+++ b/chimere/utils.py
@@ -870,17 +870,17 @@ except UnicodeEncodeError:
for locale in MONTH_NAMES}
DATE_PARSINGS = {'fr_FR':[
- re.compile(r'(?P<day1>\d{1,2}) '\
- r'(?P<month1>'+ '|'.join(UNI_MONTH_NAMES['fr_FR']) +') '\
- r'(?P<year1>\d{4})?[^\d]*'\
- r'(?P<day2>\d{1,2}) '\
- r'(?P<month2>'+ '|'.join(UNI_MONTH_NAMES['fr_FR']) +') *'\
- r'(?P<year2>\d{4})?.*'),
- re.compile(r'(?P<day1>\d{1,2}) '\
- r'(?P<month1>'+ '|'.join(UNI_MONTH_NAMES['fr_FR']) +') *'\
- r'(?P<year1>\d{4})?')
- ]
- }
+ re.compile(r'(?P<day1>\d{1,2}) '\
+ r'(?P<month1>'+ '|'.join(UNI_MONTH_NAMES['fr_FR']) +') '\
+ r'(?P<year1>\d{4})?[^\d]*'\
+ r'(?P<day2>\d{1,2}) '\
+ r'(?P<month2>'+ '|'.join(UNI_MONTH_NAMES['fr_FR']) +') *'\
+ r'(?P<year2>\d{4})?.*'),
+ re.compile(r'(?P<day1>\d{1,2}) '\
+ r'(?P<month1>'+ '|'.join(UNI_MONTH_NAMES['fr_FR']) +') *'\
+ r'(?P<year1>\d{4})?')
+ ]
+ }
def clean_field(value):
return value.strip()
@@ -897,6 +897,7 @@ class HtmlXsltManager(ImportManager):
- error detail on error.
"""
from models import Marker
+ self.marker_cls = Marker
try:
main_page = urllib2.urlopen(self.importer_instance.source)
assert main_page.getcode() == 200
@@ -966,71 +967,81 @@ class HtmlXsltManager(ImportManager):
for r, replaced in RE_CLEANS:
val = re.sub(r, replaced % {'base_url':base_url}, val)
item[k] = html_unescape(val)
- updated_item, new_item = 0, 0
- key_categories = self.importer_instance.get_key_category_dict()
- missing_cats = set()
+ self.key_categories = self.importer_instance.get_key_category_dict()
+ self.missing_cats = set()
+ self.updated_item, self.new_item = 0, 0
for item in items:
- if not self.importer_instance.default_localisation and \
- not "point" in item and not ("lat" in item and item['lat']):
- continue
- cls = None
- dct = {'origin':"<a href='%s'>%s</a>" % (item['link'],
- self.importer_instance.origin),
- 'license':self.importer_instance.license,
- 'name':item['name']}
- category = None
- if 'category' in item and item['category']:
- if item['category'] in key_categories:
- category = key_categories[item['category']]
- else:
- missing_cats.add(item['category'])
- cls = Marker
- if 'point' in item:
- x, y = item['point'].split(",")
- dct['point'] = 'SRID=4326;POINT(%s %s)' % (x, y)
- elif 'lat' in item and item['lat']:
- dct['point'] = 'SRID=4326;POINT(%s %s)' % (item['lon'],
- item['lat'])
- else:
- dct['point'] = self.importer_instance.default_localisation
- dct['description'] = item['description']
- if 'date' in item:
- has_dates = False
- for locale in DATE_PARSINGS:
- if has_dates:
- break
- for r in DATE_PARSINGS[locale]:
- m = r.search(item['date'])
- if not m:
- continue
- has_dates = True
- values = m.groupdict()
- year1 = datetime.date.today().year if 'year1' not in values \
- else int(values['year1'])
- dct['start_date'] = datetime.date(year1,
- MONTH_NAMES[locale].index(values['month1'].encode('utf-8')) + 1,
- int(values['day1']))
- if 'day2' not in values:
- break
- year2 = datetime.date.today().year if 'year2' not in values \
- else int(values['year2'])
- dct['end_date'] = datetime.date(year2,
- MONTH_NAMES[locale].index(values['month2'].encode('utf-8')) + 1,
- int(values['day2']))
- break
- key = item['key']
- it, updated, created = self.create_or_update_item(cls, dct, key,
- category=category)
- if updated:
- updated_item += 1
- if created:
- new_item += 1
+ self.add_dct_item(item)
msg = ''
- if missing_cats:
+ if self.missing_cats:
msg = _(u"Names \"%s\" doesn't match existing categories. "
u"Modify the import to match theses names with categories.") % (
- u'", "'.join(missing_cats))
- return (new_item, updated_item, msg)
+ u'", "'.join(self.missing_cats))
+ return (self.new_item, self.updated_item, msg)
+
+ def parse_date(self, date):
+ dct = {}
+ has_dates = False
+ for locale in DATE_PARSINGS:
+ if has_dates:
+ break
+ for r in DATE_PARSINGS[locale]:
+ m = r.search(date)
+ if not m:
+ continue
+ has_dates = True
+ values = m.groupdict()
+ year1 = datetime.date.today().year if 'year1' not in values \
+ else int(values['year1'])
+ dct['start_date'] = datetime.date(year1,
+ MONTH_NAMES[locale].index(values['month1'].encode('utf-8')
+ ) + 1,
+ int(values['day1']))
+ if 'day2' not in values:
+ break
+ year2 = datetime.date.today().year if 'year2' not in values \
+ else int(values['year2'])
+ dct['end_date'] = datetime.date(year2,
+ MONTH_NAMES[locale].index(values['month2'].encode('utf-8')
+ ) + 1,
+ int(values['day2']))
+ break
+ return dct
+
+ def add_dct_item(self, item):
+ if not self.importer_instance.default_localisation and \
+ not "point" in item and not ("lat" in item and item['lat']):
+ return
+ cls = None
+ dct = {'origin':"<a href='%s'>%s</a>" % (item['link'],
+ self.importer_instance.origin),
+ 'license':self.importer_instance.license,
+ 'name':item['name']}
+ category = None
+ if 'category' in item and item['category']:
+ if item['category'] in self.key_categories:
+ category = self.key_categories[item['category']]
+ else:
+ self.missing_cats.add(item['category'])
+ cls = self.marker_cls
+ if 'point' in item:
+ x, y = item['point'].split(",")
+ dct['point'] = 'SRID=4326;POINT(%s %s)' % (x, y)
+ elif 'lat' in item and item['lat']:
+ dct['point'] = 'SRID=4326;POINT(%s %s)' % (item['lon'],
+ item['lat'])
+ else:
+ dct['point'] = self.importer_instance.default_localisation
+ dct['description'] = item['description']
+ if 'date' in item:
+ dct.update(self.parse_date(item['date']))
+ key = item['key']
+ it, updated, created = self.create_or_update_item(cls, dct, key,
+ category=category)
+ if updated:
+ self.updated_item += 1
+ if created:
+ self.new_item += 1
class XMLXsltManager(HtmlXsltManager):
PARSER = 'XMLParser'