diff options
author | Étienne Loks <etienne.loks@proxience.com> | 2015-02-11 11:37:03 +0100 |
---|---|---|
committer | Étienne Loks <etienne.loks@proxience.com> | 2015-02-11 11:37:03 +0100 |
commit | c70c2c8f4f58436df8d1c694a74c457954bd1070 (patch) | |
tree | e521bfdf3e7377954d2cfd7a1647106d74a402e1 /chimere/utils.py | |
parent | 3c779a01f8320bb833ed95eb871c20e988a4b026 (diff) | |
download | Chimère-c70c2c8f4f58436df8d1c694a74c457954bd1070.tar.bz2 Chimère-c70c2c8f4f58436df8d1c694a74c457954bd1070.zip |
Manage XML-XSLT import
Diffstat (limited to 'chimere/utils.py')
-rw-r--r-- | chimere/utils.py | 18 |
1 files changed, 14 insertions, 4 deletions
diff --git a/chimere/utils.py b/chimere/utils.py index 2d74095..f3ec751 100644 --- a/chimere/utils.py +++ b/chimere/utils.py @@ -85,6 +85,7 @@ class ImportManager(object): dct_import = { 'import_key__icontains':'%s:%s;' % (key, import_key), 'import_source':self.importer_instance.source} + ref_item = cls.objects.filter(**dct_import) try: item = None if pk: @@ -878,8 +879,11 @@ DATE_PARSINGS = {'fr_FR':[ ] } +def clean_field(value): + return value.strip() class HtmlXsltManager(ImportManager): + PARSER = 'HTMLParser' def get(self): u""" Get data from the source @@ -904,7 +908,7 @@ class HtmlXsltManager(ImportManager): # convert it to valid XHTML #doc, errors = tidy_document(main_page) doc = main_page - dom = etree.HTML(doc, etree.HTMLParser()) + dom = etree.HTML(doc, getattr(etree, self.PARSER)()) try: xslt = etree.parse(self.importer_instance.source_file) self.importer_instance.source_file.seek(0) @@ -926,8 +930,8 @@ class HtmlXsltManager(ImportManager): base_url = u"/".join(self.importer_instance.source.split(u'/')[:-1]) base_url += u"/" for item in newdom.getroot(): - c_item = {child.tag:child.text for child in item.getchildren() - if child.text} + c_item = {child.tag:clean_field(child.text) + for child in item.getchildren() if child.text} # try to have more information on the linked page if transform_child and 'link' in c_item: # not an absolute address @@ -962,7 +966,7 @@ class HtmlXsltManager(ImportManager): updated_item, new_item = 0, 0 for item in items: if not self.importer_instance.default_localisation and \ - not "point" in item: + not "point" in item and not ("lat" in item and item['lat']): continue cls = None dct = {'origin':"<a href='%s'>%s</a>" % (item['link'], @@ -973,6 +977,9 @@ class HtmlXsltManager(ImportManager): if 'point' in item: x, y = item['point'].split(",") dct['point'] = 'SRID=4326;POINT(%s %s)' % (x, y) + elif 'lat' in item and item['lat']: + dct['point'] = 'SRID=4326;POINT(%s %s)' % (item['lon'], + item['lat']) else: dct['point'] = self.importer_instance.default_localisation dct['description'] = item['description'] @@ -1007,3 +1014,6 @@ class HtmlXsltManager(ImportManager): if created: new_item += 1 return (new_item, updated_item, '') + +class XMLXsltManager(HtmlXsltManager): + PARSER = 'XMLParser' |