diff options
author | Étienne Loks <etienne.loks@proxience.com> | 2015-02-11 18:07:03 +0100 |
---|---|---|
committer | Étienne Loks <etienne.loks@proxience.com> | 2015-02-11 18:07:03 +0100 |
commit | ee58fc47a79779c5b712fafb19207664e9171e68 (patch) | |
tree | 828d1f1a167f32d09eea0dde555df1bc7aec805c /chimere/utils.py | |
parent | c481c44e52175f16571654bc9948d3e14097badb (diff) | |
parent | ec824db12597389c87184497796e8d0763c80b51 (diff) | |
download | Chimère-ee58fc47a79779c5b712fafb19207664e9171e68.tar.bz2 Chimère-ee58fc47a79779c5b712fafb19207664e9171e68.zip |
Merge branch 'v2.1' into v2.2
Diffstat (limited to 'chimere/utils.py')
-rw-r--r-- | chimere/utils.py | 45 |
1 files changed, 36 insertions, 9 deletions
diff --git a/chimere/utils.py b/chimere/utils.py index 2d74095..73e38ba 100644 --- a/chimere/utils.py +++ b/chimere/utils.py @@ -73,7 +73,7 @@ class ImportManager(object): raise NotImplementedError def create_or_update_item(self, cls, values, import_key, version=None, - key='', pk=None): + key='', pk=None, category=None): updated, created, item = False, False, None import_key = unicode(import_key).replace(':', '^') if not values.get('name'): @@ -85,6 +85,7 @@ class ImportManager(object): dct_import = { 'import_key__icontains':'%s:%s;' % (key, import_key), 'import_source':self.importer_instance.source} + ref_item = cls.objects.filter(**dct_import) try: item = None if pk: @@ -137,8 +138,11 @@ class ImportManager(object): if import_key: item.set_key(key, import_key) item.categories.clear() - for cat in self.importer_instance.categories.all(): - item.categories.add(cat) + if category: + item.categories.add(category) + else: + for cat in self.importer_instance.categories.all(): + item.categories.add(cat) return item, updated, created @classmethod @@ -878,8 +882,11 @@ DATE_PARSINGS = {'fr_FR':[ ] } +def clean_field(value): + return value.strip() class HtmlXsltManager(ImportManager): + PARSER = 'HTMLParser' def get(self): u""" Get data from the source @@ -904,7 +911,7 @@ class HtmlXsltManager(ImportManager): # convert it to valid XHTML #doc, errors = tidy_document(main_page) doc = main_page - dom = etree.HTML(doc, etree.HTMLParser()) + dom = etree.HTML(doc, getattr(etree, self.PARSER)()) try: xslt = etree.parse(self.importer_instance.source_file) self.importer_instance.source_file.seek(0) @@ -926,8 +933,8 @@ class HtmlXsltManager(ImportManager): base_url = u"/".join(self.importer_instance.source.split(u'/')[:-1]) base_url += u"/" for item in newdom.getroot(): - c_item = {child.tag:child.text for child in item.getchildren() - if child.text} + c_item = {child.tag:clean_field(child.text) + for child in item.getchildren() if child.text} # try to have more information on the linked page if transform_child and 'link' in c_item: # not an absolute address @@ -960,19 +967,30 @@ class HtmlXsltManager(ImportManager): val = re.sub(r, replaced % {'base_url':base_url}, val) item[k] = html_unescape(val) updated_item, new_item = 0, 0 + key_categories = self.importer_instance.get_key_category_dict() + missing_cats = set() for item in items: if not self.importer_instance.default_localisation and \ - not "point" in item: + not "point" in item and not ("lat" in item and item['lat']): continue cls = None dct = {'origin':"<a href='%s'>%s</a>" % (item['link'], self.importer_instance.origin), 'license':self.importer_instance.license, 'name':item['name']} + category = None + if 'category' in item and item['category']: + if item['category'] in key_categories: + category = key_categories[item['category']] + else: + missing_cats.add(item['category']) cls = Marker if 'point' in item: x, y = item['point'].split(",") dct['point'] = 'SRID=4326;POINT(%s %s)' % (x, y) + elif 'lat' in item and item['lat']: + dct['point'] = 'SRID=4326;POINT(%s %s)' % (item['lon'], + item['lat']) else: dct['point'] = self.importer_instance.default_localisation dct['description'] = item['description'] @@ -1001,9 +1019,18 @@ class HtmlXsltManager(ImportManager): int(values['day2'])) break key = item['key'] - it, updated, created = self.create_or_update_item(cls, dct, key) + it, updated, created = self.create_or_update_item(cls, dct, key, + category=category) if updated: updated_item += 1 if created: new_item += 1 - return (new_item, updated_item, '') + msg = '' + if missing_cats: + msg = _(u"Names \"%s\" doesn't match existing categories. " + u"Modify the import to match theses names with categories.") % ( + u'", "'.join(missing_cats)) + return (new_item, updated_item, msg) + +class XMLXsltManager(HtmlXsltManager): + PARSER = 'XMLParser' |