From 5b41f143a3956fbd9d3a2288e607e6aa8d8c3451 Mon Sep 17 00:00:00 2001 From: Étienne Loks Date: Wed, 30 Oct 2013 03:08:06 +0100 Subject: Importer HTML-XSLT: retrieving and parsing of external website using an URL and XSLT files --- chimere/models.py | 3 ++- chimere/tests.py | 7 ++++++ chimere/utils.py | 71 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ debian/control | 3 ++- 4 files changed, 82 insertions(+), 2 deletions(-) diff --git a/chimere/models.py b/chimere/models.py index 952594c..727c3c7 100644 --- a/chimere/models.py +++ b/chimere/models.py @@ -326,7 +326,8 @@ class Importer(models.Model): filtr = models.CharField(_(u"Filter"), max_length=200, blank=True, null=True) source = models.CharField(_(u"Web address"), max_length=200, - blank=True, null=True) + blank=True, null=True, + help_text=_(u"Don't forget the trailing slash")) source_file = models.FileField(_(u"Source file"), upload_to='import_files', blank=True, null=True) source_file_alt = models.FileField(_(u"Alt source file"), diff --git a/chimere/tests.py b/chimere/tests.py index 070443e..5c76d2b 100644 --- a/chimere/tests.py +++ b/chimere/tests.py @@ -296,6 +296,13 @@ class GeoRSSImporterTest(TestCase, ImporterTest): self.marker_importers = [(importer1, 1), (importer2, 32)] +class HtmlXsltImporterTest(TestCase, ImporterTest): + def setUp(self): + subcategories = subcategory_setup() + importer1 = Importer.objects.create(importer_type='XSLT', + source='http://www.ville-villierslebacle.fr/') + self.marker_importers = [(importer1, 5),] + class FeedsTest(TestCase): def setUp(self): self.areas = areas_setup() diff --git a/chimere/utils.py b/chimere/utils.py index efaf084..894c13c 100644 --- a/chimere/utils.py +++ b/chimere/utils.py @@ -835,6 +835,17 @@ class OSMManager(ImportManager): api.ChangesetClose() return idx+1, None +import urllib2 +from BeautifulSoup import BeautifulSoup +#from tidylib import tidy_document +from lxml import etree + +RE_CLEANS = ((re.compile('(\n)*|^( )*(\n)*( )*|( )*(\n)*( )*$'), ''), + (re.compile(' ( )*'), ' '), + (re.compile(r"""