summaryrefslogtreecommitdiff
path: root/chimere/utils.py
diff options
context:
space:
mode:
authorÉtienne Loks <etienne.loks@proxience.com>2015-02-11 18:07:03 +0100
committerÉtienne Loks <etienne.loks@proxience.com>2015-02-11 18:07:03 +0100
commitee58fc47a79779c5b712fafb19207664e9171e68 (patch)
tree828d1f1a167f32d09eea0dde555df1bc7aec805c /chimere/utils.py
parentc481c44e52175f16571654bc9948d3e14097badb (diff)
parentec824db12597389c87184497796e8d0763c80b51 (diff)
downloadChimère-ee58fc47a79779c5b712fafb19207664e9171e68.tar.bz2
Chimère-ee58fc47a79779c5b712fafb19207664e9171e68.zip
Merge branch 'v2.1' into v2.2
Diffstat (limited to 'chimere/utils.py')
-rw-r--r--chimere/utils.py45
1 files changed, 36 insertions, 9 deletions
diff --git a/chimere/utils.py b/chimere/utils.py
index 2d74095..73e38ba 100644
--- a/chimere/utils.py
+++ b/chimere/utils.py
@@ -73,7 +73,7 @@ class ImportManager(object):
raise NotImplementedError
def create_or_update_item(self, cls, values, import_key, version=None,
- key='', pk=None):
+ key='', pk=None, category=None):
updated, created, item = False, False, None
import_key = unicode(import_key).replace(':', '^')
if not values.get('name'):
@@ -85,6 +85,7 @@ class ImportManager(object):
dct_import = {
'import_key__icontains':'%s:%s;' % (key, import_key),
'import_source':self.importer_instance.source}
+ ref_item = cls.objects.filter(**dct_import)
try:
item = None
if pk:
@@ -137,8 +138,11 @@ class ImportManager(object):
if import_key:
item.set_key(key, import_key)
item.categories.clear()
- for cat in self.importer_instance.categories.all():
- item.categories.add(cat)
+ if category:
+ item.categories.add(category)
+ else:
+ for cat in self.importer_instance.categories.all():
+ item.categories.add(cat)
return item, updated, created
@classmethod
@@ -878,8 +882,11 @@ DATE_PARSINGS = {'fr_FR':[
]
}
+def clean_field(value):
+ return value.strip()
class HtmlXsltManager(ImportManager):
+ PARSER = 'HTMLParser'
def get(self):
u"""
Get data from the source
@@ -904,7 +911,7 @@ class HtmlXsltManager(ImportManager):
# convert it to valid XHTML
#doc, errors = tidy_document(main_page)
doc = main_page
- dom = etree.HTML(doc, etree.HTMLParser())
+ dom = etree.HTML(doc, getattr(etree, self.PARSER)())
try:
xslt = etree.parse(self.importer_instance.source_file)
self.importer_instance.source_file.seek(0)
@@ -926,8 +933,8 @@ class HtmlXsltManager(ImportManager):
base_url = u"/".join(self.importer_instance.source.split(u'/')[:-1])
base_url += u"/"
for item in newdom.getroot():
- c_item = {child.tag:child.text for child in item.getchildren()
- if child.text}
+ c_item = {child.tag:clean_field(child.text)
+ for child in item.getchildren() if child.text}
# try to have more information on the linked page
if transform_child and 'link' in c_item:
# not an absolute address
@@ -960,19 +967,30 @@ class HtmlXsltManager(ImportManager):
val = re.sub(r, replaced % {'base_url':base_url}, val)
item[k] = html_unescape(val)
updated_item, new_item = 0, 0
+ key_categories = self.importer_instance.get_key_category_dict()
+ missing_cats = set()
for item in items:
if not self.importer_instance.default_localisation and \
- not "point" in item:
+ not "point" in item and not ("lat" in item and item['lat']):
continue
cls = None
dct = {'origin':"<a href='%s'>%s</a>" % (item['link'],
self.importer_instance.origin),
'license':self.importer_instance.license,
'name':item['name']}
+ category = None
+ if 'category' in item and item['category']:
+ if item['category'] in key_categories:
+ category = key_categories[item['category']]
+ else:
+ missing_cats.add(item['category'])
cls = Marker
if 'point' in item:
x, y = item['point'].split(",")
dct['point'] = 'SRID=4326;POINT(%s %s)' % (x, y)
+ elif 'lat' in item and item['lat']:
+ dct['point'] = 'SRID=4326;POINT(%s %s)' % (item['lon'],
+ item['lat'])
else:
dct['point'] = self.importer_instance.default_localisation
dct['description'] = item['description']
@@ -1001,9 +1019,18 @@ class HtmlXsltManager(ImportManager):
int(values['day2']))
break
key = item['key']
- it, updated, created = self.create_or_update_item(cls, dct, key)
+ it, updated, created = self.create_or_update_item(cls, dct, key,
+ category=category)
if updated:
updated_item += 1
if created:
new_item += 1
- return (new_item, updated_item, '')
+ msg = ''
+ if missing_cats:
+ msg = _(u"Names \"%s\" doesn't match existing categories. "
+ u"Modify the import to match theses names with categories.") % (
+ u'", "'.join(missing_cats))
+ return (new_item, updated_item, msg)
+
+class XMLXsltManager(HtmlXsltManager):
+ PARSER = 'XMLParser'