summaryrefslogtreecommitdiff
path: root/chimere/utils.py
diff options
context:
space:
mode:
authorÉtienne Loks <etienne.loks@proxience.com>2015-02-11 11:37:03 +0100
committerÉtienne Loks <etienne.loks@proxience.com>2015-02-11 11:37:03 +0100
commitc70c2c8f4f58436df8d1c694a74c457954bd1070 (patch)
treee521bfdf3e7377954d2cfd7a1647106d74a402e1 /chimere/utils.py
parent3c779a01f8320bb833ed95eb871c20e988a4b026 (diff)
downloadChimère-c70c2c8f4f58436df8d1c694a74c457954bd1070.tar.bz2
Chimère-c70c2c8f4f58436df8d1c694a74c457954bd1070.zip
Manage XML-XSLT import
Diffstat (limited to 'chimere/utils.py')
-rw-r--r--chimere/utils.py18
1 files changed, 14 insertions, 4 deletions
diff --git a/chimere/utils.py b/chimere/utils.py
index 2d74095..f3ec751 100644
--- a/chimere/utils.py
+++ b/chimere/utils.py
@@ -85,6 +85,7 @@ class ImportManager(object):
dct_import = {
'import_key__icontains':'%s:%s;' % (key, import_key),
'import_source':self.importer_instance.source}
+ ref_item = cls.objects.filter(**dct_import)
try:
item = None
if pk:
@@ -878,8 +879,11 @@ DATE_PARSINGS = {'fr_FR':[
]
}
+def clean_field(value):
+ return value.strip()
class HtmlXsltManager(ImportManager):
+ PARSER = 'HTMLParser'
def get(self):
u"""
Get data from the source
@@ -904,7 +908,7 @@ class HtmlXsltManager(ImportManager):
# convert it to valid XHTML
#doc, errors = tidy_document(main_page)
doc = main_page
- dom = etree.HTML(doc, etree.HTMLParser())
+ dom = etree.HTML(doc, getattr(etree, self.PARSER)())
try:
xslt = etree.parse(self.importer_instance.source_file)
self.importer_instance.source_file.seek(0)
@@ -926,8 +930,8 @@ class HtmlXsltManager(ImportManager):
base_url = u"/".join(self.importer_instance.source.split(u'/')[:-1])
base_url += u"/"
for item in newdom.getroot():
- c_item = {child.tag:child.text for child in item.getchildren()
- if child.text}
+ c_item = {child.tag:clean_field(child.text)
+ for child in item.getchildren() if child.text}
# try to have more information on the linked page
if transform_child and 'link' in c_item:
# not an absolute address
@@ -962,7 +966,7 @@ class HtmlXsltManager(ImportManager):
updated_item, new_item = 0, 0
for item in items:
if not self.importer_instance.default_localisation and \
- not "point" in item:
+ not "point" in item and not ("lat" in item and item['lat']):
continue
cls = None
dct = {'origin':"<a href='%s'>%s</a>" % (item['link'],
@@ -973,6 +977,9 @@ class HtmlXsltManager(ImportManager):
if 'point' in item:
x, y = item['point'].split(",")
dct['point'] = 'SRID=4326;POINT(%s %s)' % (x, y)
+ elif 'lat' in item and item['lat']:
+ dct['point'] = 'SRID=4326;POINT(%s %s)' % (item['lon'],
+ item['lat'])
else:
dct['point'] = self.importer_instance.default_localisation
dct['description'] = item['description']
@@ -1007,3 +1014,6 @@ class HtmlXsltManager(ImportManager):
if created:
new_item += 1
return (new_item, updated_item, '')
+
+class XMLXsltManager(HtmlXsltManager):
+ PARSER = 'XMLParser'