1 files changed, 1357 insertions, 0 deletions
diff --git a/chimere/utils.py b/chimere/utils.py
new file mode 100644
index 0000000..0d84be3
--- /dev/null
+++ b/chimere/utils.py
@@ -0,0 +1,1357 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# Copyright (C) 2012-2016  Étienne Loks  <etienne.loks_AT_peacefrogsDOTnet>
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as
+# published by the Free Software Foundation, either version 3 of the
+# License, or (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+# See the file COPYING for details.
+
+"""
+Utilitaries
+"""
+
+import csv
+import collections
+import datetime
+import feedparser
+import json
+import os
+import re
+import StringIO
+import tempfile
+import urllib2
+import unicodedata
+import zipfile
+
+from osgeo import ogr, osr
+from lxml import etree
+
+from django.conf import settings
+from django.contrib.gis.gdal import DataSource, OGRGeomType, check_err
+from django.contrib.gis.geos import GEOSGeometry
+from django.core.exceptions import ObjectDoesNotExist
+from django.shortcuts import render_to_response
+from django.utils.translation import ugettext_lazy as _
+
+from chimere import get_version
+from external_utils import OsmApi
+
+
+def unicode_normalize(string):
+    if type(string) == str:
+        string = unicode(string.decode('utf-8'))
+    return ''.join(
+        (c for c in unicodedata.normalize('NFD', string)
+         if unicodedata.category(c) not in ('Mn', 'Sm', 'Sc')))
+
+
+class ImportManager(object):
+    u"""
+    Generic class for specific importers
+    """
+    default_source = None
+
+    def __init__(self, importer_instance):
+        self.importer_instance = importer_instance
+        if self.importer_instance.default_name:
+            self.default_name = self.importer_instance.default_name
+        else:
+            self.default_name = " - ".join([
+                cat.name
+                for cat in self.importer_instance.categories.order_by(
+                    'name').all()])
+
+    def get(self):
+        raise NotImplementedError
+
+    def put(self, extra_args={}):
+        raise NotImplementedError
+
+    def create_or_update_item(self, cls, values, import_key, version=None,
+                              key='', pk=None, category=None):
+        from models import PropertyModel
+        updated, created, item = False, False, None
+        import_key = unicode(import_key).replace(':', '^')
+        if not values.get('name'):
+            values['name'] = self.default_name
+        if not key:
+            key = self.importer_instance.importer_type
+        item = None
+        pms = [pm["slug"] for pm in PropertyModel.objects.values('slug').all()]
+        properties = {}
+        for k in values.keys():
+            if k in pms:
+                properties[k] = values.pop(k)
+        if import_key or pk:
+            dct_import = {
+                'import_key__icontains': '%s:%s;' % (key, import_key),
+                'import_source': self.importer_instance.source}
+            ref_item = cls.objects.filter(**dct_import)
+            try:
+                item = None
+                if pk:
+                    ref_item = cls.objects.get(pk=pk)
+                else:
+                    ref_item = cls.objects.filter(**dct_import)
+                    if not ref_item.count():
+                        raise ObjectDoesNotExist
+                    ref_item = ref_item.all()[0]
+                if version and ref_item.import_version == int(version):
+                    # no update since the last import
+                    return ref_item, None, None
+                if not self.importer_instance.overwrite \
+                   and ref_item.modified_since_import:
+                    return ref_item, None, None
+                else:
+                    item = ref_item
+                    for k in values:
+                        if values[k]:
+                            setattr(item, k, values[k])
+                    try:
+                        item.save()
+                        # force the modified_since_import status
+                        item.modified_since_import = False
+                        item.save()
+                    except TypeError:
+                        # error on data source
+                        return None, False, False
+                    updated = True
+            except ObjectDoesNotExist:
+                pass
+        if not item:
+            if not self.importer_instance.get_description and \
+               self.importer_instance.default_description:
+                values['description'] = \
+                    self.importer_instance.default_description
+            values.update({
+                'import_source': self.importer_instance.source})
+            values['status'] = self.importer_instance.default_status
+            if not self.importer_instance.associate_marker_to_way\
+                    and cls.__name__ == 'Route':
+                values['has_associated_marker'] = False
+
+            try:
+                item = cls.objects.create(**values)
+                item.modified_since_import = False
+                item.save()
+            except TypeError:
+                # error on data source
+                return None, False, False
+            created = True
+        if import_key:
+            item.set_key(key, import_key)
+        item.categories.clear()
+        if category:
+            item.categories.add(category)
+        else:
+            for cat in self.importer_instance.categories.all():
+                item.categories.add(cat)
+        for prop in properties:
+            item.setProperty(prop, properties[prop])
+        return item, updated, created
+
+    @classmethod
+    def get_files_inside_zip(cls, zippedfile, suffixes, dest_dir=None):
+        try:
+            flz = zipfile.ZipFile(zippedfile)
+        except zipfile.BadZipfile:
+            return [], _(u"Bad zip file")
+        namelist = flz.namelist()
+        filenames = []
+        for suffix in suffixes:
+            current_file_name = None
+            for name in namelist:
+                if name.endswith(suffix) \
+                        or name.endswith(suffix.lower()) \
+                        or name.endswith(suffix.upper()):
+                    current_file_name = name
+            filenames.append(current_file_name)
+        files = []
+        for filename in filenames:
+            if filename:
+                if dest_dir:
+                    files.append(filename)
+                    flz.extract(filename, dest_dir)
+                else:
+                    files.append(flz.open(filename))
+            else:
+                files.append(None)
+        return files
+
+    def get_source_file(self, suffixes, dest_dir=None,
+                        extra_url=None):
+        source = self.importer_instance.source_file
+        if not hasattr(source, 'read'):
+            if not source:
+                source = self.importer_instance.source \
+                    if self.importer_instance.source else self.default_source
+            try:
+                url = source
+                if extra_url:
+                    url += extra_url
+                remotehandle = urllib2.urlopen(url)
+                source = StringIO.StringIO(remotehandle.read())
+                remotehandle.close()
+            except ValueError:
+                # assume it is a local file
+                try:
+                    source = open(source)
+                except IOError, msg:
+                    return (None, msg)
+            except (urllib2.URLError, AttributeError) as error:
+                return (None, error.message)
+        if self.importer_instance.zipped:
+            try:
+                files = self.get_files_inside_zip(source, suffixes, dest_dir)
+            except zipfile.BadZipfile:
+                return (None, _(u"Bad zip file"))
+            if not files or None in files:
+                return (None,
+                        _(u"Missing file(s) inside the zip file"))
+            source = files[0] if len(suffixes) == 1 else files
+        return (source, None)
+
+
+class KMLManager(ImportManager):
+    u"""
+    KML importer
+    The filtr argument has to be defined as the exact name of the folder to be
+    imported
+    """
+    XPATH = '//kml:Folder/kml:name[text()="%s"]/../kml:Placemark'
+    DEFAULT_XPATH = '//kml:Placemark'
+
+    def __init__(self, importer_instance, ns=''):
+        super(KMLManager, self).__init__(importer_instance)
+        self.ns = ns
+
+    def get(self):
+        u"""
+        Get data from a KML source
+
+        Return a tuple with:
+         - number of new item ;
+         - number of item updated ;
+         - error detail on error
+        """
+        from models import Marker, Route
+        new_item, updated_item, msg = 0, 0, ''
+        source, msg = self.get_source_file(['.kml'])
+        if msg:
+            return (0, 0, msg)
+        doc = source
+        # remove empty lines before declaration (bad XML file)
+        if hasattr(source, 'getvalue'):
+            splitted = source.getvalue().split('\n')
+            for idx, line in enumerate(splitted):
+                if line.strip():
+                    break
+            doc = StringIO.StringIO("\n".join(splitted[idx:]))
+        try:
+            tree = etree.parse(doc)
+        except:
+            return (0, 0, _(u"Bad XML file"))
+        # try to get default namespace
+        if not self.ns:
+            self.ns = tree.getroot().nsmap[None]
+        xpath = self.XPATH % self.importer_instance.filtr \
+            if self.importer_instance.filtr else self.DEFAULT_XPATH
+        for placemark in tree.xpath(xpath,
+                                    namespaces={'kml': self.ns}):
+            name, point, line = None, None, None
+            pl_id = placemark.attrib.get('id')
+            pl_key = 'kml-%d' % self.importer_instance.pk
+            ns = '{%s}' % self.ns
+            description = ''
+            for item in placemark:
+                if item.tag == ns + 'name':
+                    name = item.text
+                    if not pl_id:
+                        # if no ID is provided assume that name is a key
+                        pl_id = name
+                elif item.tag == ns + 'description':
+                    if self.importer_instance.get_description:
+                        description = item.text
+                elif item.tag == ns + 'Point':
+                    for coord in item:
+                        if coord.tag == ns + 'coordinates':
+                            x, y, z = coord.text.split(',')
+                            point = 'SRID=4326;POINT(%s %s)' % (x, y)
+                elif item.tag == ns + 'LineString':
+                    for coord in item:
+                        if coord.tag == ns + 'coordinates':
+                            points = coord.text.replace('\n', ' ').split(' ')
+                            points = ",".join([" ".join(p.split(',')[:2])
+                                               for p in points if p])
+                            line = 'SRID=4326;LINESTRING(%s)' % points
+            cls = None
+            dct = {'description': description,
+                   'name': name,
+                   'origin': self.importer_instance.origin,
+                   'license': self.importer_instance.license}
+            if point:
+                dct['point'] = point
+                cls = Marker
+            if line:
+                dct['route'] = line
+                dct.pop('description')
+                cls = Route
+            if cls:
+                item, updated, created = self.create_or_update_item(
+                    cls, dct, pl_id, key=pl_key)
+                if updated:
+                    updated_item += 1
+                if created:
+                    new_item += 1
+        return (new_item, updated_item, msg)
+
+    @classmethod
+    def export(cls, queryset):
+        dct = {
+            'name': settings.PROJECT_NAME,
+            'description': unicode(datetime.date.today()),
+            'locations': queryset.all()
+        }
+        filename = unicode_normalize(settings.PROJECT_NAME + dct['description']
+                                     + '.kml')
+        result = render_to_response('chimere/export.kml', dct)
+        return filename, result
+
+
+class ShapefileManager(ImportManager):
+    u"""
+    Shapefile importer
+    """
+    def get(self):
+        u"""
+        Get data from a Shapefile source
+
+        Return a tuple with:
+         - number of new item ;
+         - number of item updated ;
+         - error detail on error
+
+        The filtr argument allow to specify match between the shapefile cols
+        and the db. JSON format is used.
+        """
+        from models import Marker, Route, Polygon
+        new_item, updated_item, msg = 0, 0, ''
+        tmpdir = tempfile.mkdtemp()
+        sources, msg = self.get_source_file(['.shp', '.dbf', '.prj', '.shx'],
+                                            dest_dir=tmpdir)
+        if msg:
+            return (0, 0, msg)
+        if not sources:
+            return (0, 0, _(u"Error while reading the data source."))
+        # get the srid
+        srid = self.importer_instance.srid
+        if not srid:
+            prjfilename = tmpdir + os.sep + sources[2]
+            try:
+                from osgeo import osr
+                with open(prjfilename, 'r') as prj_file:
+                    prj_txt = prj_file.read()
+                    srs = osr.SpatialReference()
+                    srs.ImportFromESRI([prj_txt])
+                    srs.AutoIdentifyEPSG()
+                    srid = srs.GetAuthorityCode(None)
+            except ImportError:
+                pass
+            if not srid:
+                # try with the default projection
+                srid = settings.CHIMERE_EPSG_DISPLAY_PROJECTION
+                msg = _(u"SRID cannot be guessed. The default SRID (%s) has "
+                        u"been used.") % srid
+                # If imported items are not well located "
+                #        u"ask your data provider for the SRID to use.") % srid
+        shapefilename = tmpdir + os.sep + sources[0]
+        ds = DataSource(shapefilename)
+        lyr = ds[0]
+        default_dct = {}
+        filtr = self.importer_instance.filtr
+        if filtr:
+            try:
+                filtr = json.JSONDecoder().decode(self.importer_instance.filtr)
+            except ValueError:
+                return (
+                    new_item, updated_item,
+                    _(u"Bad configuration: filter must be a valid "
+                      u"JSON string"))
+            for k in ('id',):
+                if k not in filtr:
+                    return (
+                        new_item, updated_item,
+                        _(u"The key \"%s\" is missing in the "
+                          u"filter.") % k)
+            for k in filtr:
+                try:
+                    ids = lyr.get_fields(k)
+                except:
+                    return (
+                        new_item, updated_item,
+                        _(u"Config: {} is not an appropriate column name "
+                          u"for this Shapefile. Available columns "
+                          u" are: {}").format(k, u", ".join(
+                              [j for j in lyr.fields])))
+            default_dct = {'origin': self.importer_instance.origin,
+                           'license': self.importer_instance.license}
+            if 'prefix_name' in filtr:
+                default_dct['name'] = filtr.pop('prefix_name')
+            if 'prefix_description' in filtr:
+                default_dct['description'] = filtr.pop('prefix_description')
+        else:
+            # if no filtr it is assumed that the first field is a
+            # id name and the second field is the name
+            id_name = lyr.fields[0] if len(lyr.fields) > 0 else None
+            # test if id_name is well guess
+            if id_name:
+                ids = lyr.get_fields(id_name)
+                if len(ids) != len(set(ids)):
+                    id_name = None
+            filtr['id'] = id_name
+            if len(lyr.fields) > 1:
+                filtr["name"] = lyr.fields[1]
+            elif id_name:
+                filtr["name"] = id_name
+
+        if lyr.geom_type not in ('Point', 'LineString', 'Polygon'):
+            return (0, 0, _(u"Type of geographic item (%s) of this shapefile "
+                            u"is not managed by Chimère.") % lyr.geom_type)
+        geom_key = ''
+        geom_cls = None
+        if lyr.geom_type == 'Point':
+            geom_key = 'point'
+            geom_cls = Marker
+        elif lyr.geom_type == 'Polygon':
+            geom_key = 'polygon'
+            geom_cls = Polygon
+        else:
+            geom_key = 'route'
+            geom_cls = Route
+        # indexes = []
+        for idx, feat in enumerate(lyr):
+            dct = default_dct.copy()
+            for k in filtr:
+                val = feat.get(k)
+                try:
+                    val = unicode(val)
+                except UnicodeDecodeError:
+                    try:
+                        val = unicode(
+                            val.decode(settings.CHIMERE_SHAPEFILE_ENCODING))
+                    except:
+                        continue
+                if filtr[k] not in dct:
+                    dct[filtr[k]] = ''
+                dct[filtr[k]] += val
+            try:
+                geoms = [feat.geom.wkt]
+            except:
+                return (0, 0, _(u"Bad Shapefile"))
+            if feat.geom.geom_type == 'MultiLineString':
+                geoms = [geom.wkt for geom in feat.geom]
+            import_key = dct.pop('id')
+            for geom in geoms:
+                dct[geom_key] = 'SRID=%s;%s' % (srid, geom)
+                item, updated, created = self.create_or_update_item(
+                    geom_cls, dct, import_key)
+                if updated:
+                    updated_item += 1
+                if created:
+                    new_item += 1
+        # clean up
+        tmpdirs = set()
+        for src in sources:
+            dirs = os.sep.join(src.split(os.sep)[:-1])
+            if dirs:
+                tmpdirs.add(tmpdir + os.sep + dirs)
+            os.remove(tmpdir + os.sep + src)
+        for dr in tmpdirs:
+            os.removedirs(dr)
+        return (new_item, updated_item, msg)
+
+    @classmethod
+    def export(cls, queryset):
+        date = unicode(datetime.date.today())
+
+        tmp = tempfile.NamedTemporaryFile(suffix='.shp', mode='w+b')
+        tmp.close()
+
+        tmp_name = tmp.name
+        field_names = [field.name for field in queryset.model._meta.fields]
+        geo_field = getattr(
+            queryset.model,
+            'point' if 'point' in field_names else 'route')._field
+
+        dr = ogr.GetDriverByName('ESRI Shapefile')
+        ds = dr.CreateDataSource(tmp_name)
+        if ds is None:
+            raise Exception(_(u'Could not create file!'))
+        ogr_type = OGRGeomType(geo_field.geom_type).num
+        srs = osr.SpatialReference()
+        srs.ImportFromEPSG(geo_field.srid)
+
+        layer = ds.CreateLayer('lyr', srs=srs, geom_type=ogr_type)
+
+        for field_name in ('name', 'category'):
+            field_defn = ogr.FieldDefn(str(field_name), ogr.OFTString)
+            field_defn.SetWidth(255)
+            if layer.CreateField(field_defn) != 0:
+                raise Exception(_(u'Failed to create field'))
+
+        feature_def = layer.GetLayerDefn()
+
+        for item in queryset:
+            # duplicate items when in several categories
+            q = item.categories
+            if not q.count():
+                categories = [None]
+            else:
+                categories = q.all()
+            for category in categories:
+                feat = ogr.Feature(feature_def)
+                feat.SetField('name', str(unicode_normalize(item.name)[:80]))
+                if category:
+                    feat.SetField('category',
+                                  str(unicode_normalize(category.name)[:80]))
+
+                geom = getattr(item, geo_field.name)
+                if not geom:
+                    continue
+                ogr_geom = ogr.CreateGeometryFromWkt(geom.wkt)
+                check_err(feat.SetGeometry(ogr_geom))
+            check_err(layer.CreateFeature(feat))
+        # Cleaning up
+        ds.Destroy()
+
+        # writing to a zip file
+        filename = unicode_normalize(settings.PROJECT_NAME) + '-' + date
+        buff = StringIO.StringIO()
+        zip_file = zipfile.ZipFile(buff, 'w', zipfile.ZIP_DEFLATED)
+        suffixes = ['shp', 'shx', 'prj', 'dbf']
+        for suffix in suffixes:
+            name = tmp_name.replace('.shp', '.' + suffix)
+            arcname = '.'.join((filename, suffix))
+            zip_file.write(name, arcname=arcname)
+        zip_file.close()
+        buff.flush()
+        zip_stream = buff.getvalue()
+        buff.close()
+        return filename, zip_stream
+
+
+class CSVManager(ImportManager):
+    u"""
+    CSV importer
+    """
+    @classmethod
+    def set_categories(value):
+        return
+
+    # (label, getter, setter)
+    COLS = [("Id", 'pk', 'pk'), (_(u"Name"), 'name', 'name'),
+            (_(u"Categories"), lambda obj: ", ".join(
+                [c.name for c in obj.categories.all()]), set_categories),
+            (_(u"State"), 'status', lambda x: x),
+            (_(u"Description"), 'description', 'description'),
+            (_(u"Localisation"), 'geometry', 'geometry')]
+
+    def get(self):
+        u"""
+        Get data from a CSV source
+
+        Return a tuple with:
+         - number of new item ;
+         - number of item updated ;
+         - error detail on error
+        """
+        from models import Marker, Route
+        new_item, updated_item, msg = 0, 0, ''
+        source, msg = self.get_source_file(['.csv'])
+        if msg:
+            return (0, 0, msg)
+        reader = csv.reader(source, delimiter=';', quotechar='"')
+        prop_cols = []
+        for pm in Marker.all_properties():
+            prop_cols.append((pm.name, pm.getAttrName(),
+                              pm.getAttrName() + '_set'))
+        cols = list(self.COLS) + prop_cols
+        # datas = []
+        for idx, row in enumerate(reader):
+            if not idx:  # first row
+                try:
+                    assert(len(row) >= len(cols))
+                except AssertionError:
+                    return (0, 0, _(u"Invalid CSV format"))
+                continue
+            if len(row) < len(cols):
+                continue
+            # pk, name, cats, state = row[0], row[1], row[2], row[3]
+            pk, name = row[0], row[1]
+            geom = row[5]
+            description = ''
+            if self.importer_instance.get_description:
+                description = row[4]
+            COL_INDEX = 6
+            dct = {'description': description,
+                   'name': name,
+                   'origin': self.importer_instance.origin,
+                   'license': self.importer_instance.license}
+            cls = None
+            if 'POINT' in geom:
+                cls = Marker
+                dct['point'] = geom
+            elif 'LINE' in geom:
+                cls = Route
+                dct['route'] = geom
+            else:
+                continue
+            import_key = pk if pk else name.decode('utf-8')
+            item, updated, created = self.create_or_update_item(
+                cls, dct, import_key, pk=pk)
+            if updated:
+                updated_item += 1
+            if created:
+                new_item += 1
+            for idx, col in enumerate(cols[COL_INDEX:]):
+                name, getter, setter_val = col
+                setter = getattr(item, setter_val)
+                val = row[idx + COL_INDEX]
+                setter(item, val)
+        return (new_item, updated_item, msg)
+
+    @classmethod
+    def export(cls, queryset):
+        dct = {'description': unicode(datetime.date.today()), 'data': []}
+        # cls_name = queryset.model.__name__.lower()
+        cols = list(cls.COLS)
+        for pm in queryset.model.all_properties():
+            cols.append((pm.name, pm.getAttrName(), pm.getAttrName() + '_set'))
+        header = [col[0] for col in cols]
+        dct['data'].append(header)
+        for item in queryset.all():
+            data = []
+            for (lbl, attr, setr) in cols:
+                if callable(attr):
+                    data.append(attr(item))
+                else:
+                    data.append(getattr(item, attr))
+            dct['data'].append(data)
+        filename = unicode_normalize(settings.PROJECT_NAME + dct['description']
+                                     + '.csv')
+        result = render_to_response('chimere/export.csv', dct)
+        return filename, result
+
+
+class GeoRSSManager(ImportManager):
+    u"""
+    RSS importer.
+    This manager only gets and do not produce GeoRSSFeed
+    """
+
+    def get(self):
+        u"""
+        Get data from a GeoRSS simple source
+
+        Return a tuple with:
+         - number of new item ;
+         - number of item updated ;
+         - error detail on error
+        """
+        from models import Marker, Route
+        new_item, updated_item, msg = 0, 0, ''
+        feed = feedparser.parse(self.importer_instance.source)
+        if feed['bozo'] and not isinstance(
+                feed['bozo_exception'], feedparser.CharacterEncodingOverride):
+            return (0, 0, _(u"RSS feed is not well formed"))
+        for item in feed['items']:
+            if "georss_point" not in item and 'georss_line' not in item \
+               and not ("geo_lat" in item and "geo_long" in item):
+                continue
+            cls = None
+            dct = {'origin': self.importer_instance.origin,
+                   'license': self.importer_instance.license}
+            if 'georss_point' in item or "geo_lat" in item:
+                cls = Marker
+                if 'georss_point' in item:
+                    try:
+                        y, x = item['georss_point'].split(' ')
+                    except ValueError:
+                        continue
+                else:
+                    y = item['geo_lat']
+                    x = item['geo_long']
+                dct['point'] = 'SRID=4326;POINT(%s %s)' % (x, y)
+                if self.importer_instance.get_description:
+                    for k in ['description', 'summary', 'value']:
+                        if k in item:
+                            dct['description'] = item[k]
+                            break
+            else:
+                cls = Route
+                points = item['georss_line'].split(' ')
+                reordered_points = []
+                # lat, lon -> x, y
+                for idx in xrange(len(points) / 2):
+                    reordered_points.append("%s %s" % (points[idx * 2 + 1],
+                                                       points[idx * 2]))
+                dct['route'] = 'SRID=4326;LINESTRING(%s)' % \
+                    ",".join(reordered_points)
+
+            dct['name'] = item['title']
+            pl_id = item['id'] if 'id' in item else item['title']
+            it, updated, created = self.create_or_update_item(cls, dct, pl_id)
+            if updated:
+                updated_item += 1
+            if created:
+                new_item += 1
+        return (new_item, updated_item, msg)
+
+
+class JsonManager(ImportManager):
+    u"""
+    Json importer.
+    This manager only gets and do not produce Json feed
+    """
+
+    def get(self):
+        u"""
+        Get data from a json simple source
+
+        Return a tuple with:
+         - number of new item ;
+         - number of item updated ;
+         - error detail on error
+        """
+        from models import Marker
+        new_item, updated_item, msg = 0, 0, ''
+        source, msg = self.get_source_file(['.json'])
+        if msg:
+            return (0, 0, msg)
+
+        vals = source.read().replace('\n', ' ')
+        try:
+            values = json.JSONDecoder(
+                object_pairs_hook=collections.OrderedDict).decode(vals)
+        except ValueError as e:
+            return (new_item, updated_item,
+                    _(u"JSON file is not well formed: " + e.message))
+        # configuration in filtr
+        try:
+            filtr = json.JSONDecoder().decode(self.importer_instance.filtr)
+        except ValueError:
+            return (
+                new_item, updated_item,
+                _(u"Bad configuration: filter field must be a valid "
+                  u"JSON string"))
+
+        vls = filtr.values()
+        for k in ('name', 'id', 'description'):
+            if k not in vls:
+                return (
+                    new_item, updated_item,
+                    _(u"A key must be associated to \"%s\" in the "
+                      u"filter.") % k)
+
+        default_dct = {'origin': self.importer_instance.origin,
+                       'license': self.importer_instance.license}
+        if 'prefix_name' in filtr:
+            default_dct['name'] = filtr.pop('prefix_name')
+        if 'prefix_description' in filtr:
+            default_dct['description'] = filtr.pop('prefix_description')
+        if self.importer_instance.default_localisation:
+            default_dct['point'] = self.importer_instance.default_localisation
+
+        for item in values:
+            dct = default_dct.copy()
+            for k in filtr:
+                if k.startswith('prefix_') or k.startswith('suffix_'):
+                    continue
+                if k in item and item[k]:
+                    if filtr[k] not in dct:
+                        dct[filtr[k]] = ""
+                    else:
+                        if filtr[k] == 'description':
+                            dct[filtr[k]] += "<br/>"
+                        else:
+                            dct[filtr[k]] += " "
+                    dct[filtr[k]] += item[k]
+            if 'point' in item:
+                x, y = item['point'].split(",")
+                dct['point'] = 'SRID=4326;POINT(%s %s)' % (x, y)
+            elif 'lat' in item and item['lat'] \
+                    and 'lon' in item and item['lon']:
+                dct['point'] = 'SRID=4326;POINT(%s %s)' % (item['lon'],
+                                                           item['lat'])
+            elif 'x' in item and item['x'] \
+                    and 'y' in item and item['y']:
+                dct['point'] = 'SRID=4326;POINT(%s %s)' % (item['x'],
+                                                           item['y'])
+            if not dct['point']:
+                continue
+            for k in filtr:
+                if k.startswith('prefix_') or k.startswith('suffix_'):
+                    pos = k.split('_')[0]
+                    key = '_'.join(k.split('_')[1:])
+                    if key in dct:
+                        if pos == 'prefix':
+                            dct[key] = filtr[k] + dct[key]
+                        else:
+                            dct[key] += filtr[k]
+            cls = Marker
+            pl_id = (dct.pop('id') if 'id' in dct else dct['name']) \
+                + "-" + unicode(self.importer_instance.pk)
+            it, updated, created = self.create_or_update_item(cls, dct, pl_id)
+            if updated:
+                updated_item += 1
+            if created:
+                new_item += 1
+        return (new_item, updated_item, msg)
+
+RE_HOOK = re.compile('\[([^\]]*)\]')
+
+# TODO: manage deleted item from OSM
+
+
+class OSMManager(ImportManager):
+    u"""
+    OSM importer/exporter
+    The source url is a path to an OSM file or a XAPI url
+    The filtr argument is XAPI args or empty if it is an OSM file.
+    """
+    default_source = settings.CHIMERE_XAPI_URL
+
+    def get(self):
+        u"""
+        Get data from the source
+
+        Return a tuple with:
+        - new items;
+        - updated items;
+        - error detail on error.
+        """
+        source, msg = self.get_source_file(
+            ['.osm'], extra_url=self.importer_instance.filtr)
+        if not source:
+            return (0, 0, msg)
+
+        tree = etree.parse(source)
+        # only import node or ways
+        if tree.xpath('count(//way)') and tree.xpath('count(//node)'):
+            return self.import_ways(tree)
+        elif tree.xpath('count(//node)'):
+            return self.import_nodes(tree)
+        return 0, 0, _(u"Nothing to import")
+
+    def import_ways(self, tree):
+        from chimere.models import Route
+        msg, items, new_item, updated_item = "", [], 0, 0
+        nodes = {}
+        for node in tree.xpath('//node'):
+            node_id = node.attrib.get('id')
+            for item in node:
+                k = item.attrib.get('k')
+            if node_id:
+                nodes[node_id] = '%s %s' % (node.get('lon'),
+                                            node.get('lat'))
+        for way in tree.xpath('//way'):
+            name = None
+            points = []
+            node_id = way.attrib.get('id')
+            version = way.attrib.get('version')
+            for item in way:
+                k = item.attrib.get('k')
+                if k == 'name':
+                    name = item.attrib.get('v')
+                if item.tag == 'nd':
+                    points.append(item.get('ref'))
+            if not points:
+                continue
+            wkt = 'SRID=4326;LINESTRING(%s)' % ",".join(
+                [nodes[point_id] for point_id in points if point_id in nodes])
+            dct = {'route': wkt,
+                   'name': name,
+                   'origin': self.importer_instance.origin
+                   or u'OpenStreetMap.org',
+                   'license': self.importer_instance.license
+                   or u'ODbL',
+                   'import_version': version}
+            item, updated, created = self.create_or_update_item(
+                Route, dct, node_id, version)
+            if updated:
+                updated_item += 1
+            if created:
+                new_item += 1
+            items.append(item)
+        return new_item, updated_item, msg
+
+    def import_nodes(self, tree):
+        from chimere.models import Marker
+        msg, items, new_item, updated_item = "", [], 0, 0
+        for node in tree.xpath('//node'):
+            name = None
+            node_id = node.attrib.get('id')
+            if not node_id:
+                continue
+            version = node.attrib.get('version')
+            for item in node:
+                k = item.attrib.get('k')
+                if k == 'name':
+                    name = item.attrib.get('v')
+            point = 'SRID=4326;POINT(%s %s)' % (node.get('lon'),
+                                                node.get('lat'))
+            dct = {'point': point,
+                   'name': name,
+                   'origin': self.importer_instance.origin
+                   or u'OpenStreetMap.org',
+                   'license': self.importer_instance.license
+                   or u'ODbL',
+                   'import_version': version}
+            item, updated, created = self.create_or_update_item(
+                Marker, dct, node_id, version)
+            if updated:
+                updated_item += 1
+            if created:
+                new_item += 1
+            items.append(item)
+        return (new_item, updated_item, msg)
+
+    def put(self, extra_args={}):
+        # first of all: reimport in order to verify that no changes has been
+        # made since the last import
+        from models import Marker
+        new_item, updated_item, msg = self.get()
+        # check if import is possible
+        if msg:
+            return 0, msg
+        if new_item:
+            return 0, _(u"New items imported - validate them before exporting")
+        if Marker.objects.filter(status='I').count():
+            return 0, _(u"There are items from a former import not yet "
+                        u"validated - validate them before exporting")
+        # start import
+        api = settings.CHIMERE_OSM_API_URL
+        username = settings.CHIMERE_OSM_USER
+        password = settings.CHIMERE_OSM_PASSWORD
+        if extra_args:
+            try:
+                api = extra_args['api']
+                username = extra_args['username']
+                password = extra_args['password']
+            except KeyError:
+                return 0, _(u"Bad params - programming error")
+        username = username.encode('latin1')
+        password = password.encode('latin1')
+        api = OsmApi.OsmApi(api=api, username=username, password=password)
+        api.ChangesetCreate({u"comment": u"Import from Chimère %s" %
+                             get_version()})
+        hooks = RE_HOOK.findall(self.importer_instance.filtr)
+        if not hooks:
+            hooks = RE_HOOK.findall(self.importer_instance.source)
+            if not hooks:
+                return 0, _(u"Bad param")
+        tags = {}
+        bbox = []
+        for hook in hooks:
+            key, value = hook.split('=')
+            if '*' in value or '|' in key or '|' in value:
+                continue
+            if key == 'bbox':
+                x1, y1, x2, y2 = [float(val) for val in value.split(',')]
+                bbox = GEOSGeometry(
+                    'POLYGON((%f %f,%f %f,%f %f,%f %f,%f %f))' % (
+                        x1, y1, x2, y1, x2, y2, x1, y2, x1, y1), srid=4326)
+                continue
+            tags[key] = value
+        if not tags:
+            return 0, _(u"No non ambigious tag is defined in the XAPI request")
+        if not bbox:
+            return 0, _(
+                u"No bounding box is defined in the XAPI request."
+                u"If you are sure to manage the entire planet set the "
+                u"bounding box to -180,-90,180,90")
+        default_dct = {'tag': tags,
+                       'import_source': self.importer_instance.source}
+        idx = -1
+        for idx, item in enumerate(
+                Marker.objects.filter(
+                    status='A',
+                    point__contained=bbox,
+                    categories=self.importer_instance.categories.all(),
+                    not_for_osm=False, modified_since_import=True,
+                    route=None).all()):
+            dct = default_dct.copy()
+            dct.update({'lon': item.point.x,
+                        'lat': item.point.y})
+            dct['tag']['name'] = item.name
+            node = None
+            import_key = item.get_key('OSM')
+            updated = False
+            if import_key:
+                try:
+                    dct['id'] = import_key
+                    dct['version'] = item.import_version
+                    node = api.NodeUpdate(dct)
+                    updated = True
+                except OsmApi.ApiError, error:
+                    if error.status == 404:
+                        dct.pop('id')
+                        dct.pop('version')
+                        pass  # if the node doesn't exist it is created
+                    else:
+                        raise
+            if not updated:
+                node = api.NodeCreate(dct)
+                item.set_key('OSM', node['id'])
+            item.import_version = node['version']
+            item.save()
+        api.ChangesetClose()
+        return idx + 1, None
+
+
+import chardet
+import HTMLParser
+from BeautifulSoup import BeautifulSoup
+
+
+RE_CLEANS = ((re.compile('(\n)*|^( )*(\n)*( )*|( )*(\n)*( )*$'), ''),
+             (re.compile(' ( )*'), ' '),
+             (re.compile(r"""<a href=["'](?!https?)(.*)["']"""),
+              '<a href="%(base_url)s\\1"'),
+             )
+
+from calendar import TimeEncoding, month_name
+
+
+def get_month_name(month_no, locale):
+    with TimeEncoding(locale) as encoding:
+        s = month_name[month_no]
+        if encoding is not None:
+            s = s.decode(encoding)
+        return s
+
+MONTH_NAMES = {locale: [get_month_name(no_month, locale + '.UTF-8')
+               for no_month in xrange(1, 13)] for locale in ['fr_FR']}
+
+try:
+    UNI_MONTH_NAMES = {locale: [m.decode('utf-8') for m in MONTH_NAMES[locale]]
+                       for locale in MONTH_NAMES}
+except UnicodeEncodeError:
+    UNI_MONTH_NAMES = {locale: [m for m in MONTH_NAMES[locale]]
+                       for locale in MONTH_NAMES}
+
+DATE_PARSINGS = {
+    'fr_FR': [
+        re.compile(r'(?P<day1>\d{1,2}) '
+                   r'(?P<month1>' + '|'.join(UNI_MONTH_NAMES['fr_FR']) + ') '
+                   r'(?P<year1>\d{4})?[^\d]*'
+                   r'(?P<day2>\d{1,2}) '
+                   r'(?P<month2>' + '|'.join(UNI_MONTH_NAMES['fr_FR']) + ') *'
+                   r'(?P<year2>\d{4})?.*'),
+        re.compile(r'(?P<day1>\d{1,2}) '
+                   r'(?P<month1>' + '|'.join(UNI_MONTH_NAMES['fr_FR']) + ') * '
+                   r'(?P<year1>\d{4})?')],
+    'en': [
+        re.compile(r'(?P<year1>\d{4})-'
+                   r'(?P<month1>\d{2})-'
+                   r'(?P<day1>\d{2})'
+                   r'(?:T'
+                   r'(?P<hour1>\d{2})?:'
+                   r'(?P<minut1>\d{2})?:'
+                   r'(?P<second1>\d{2})'
+                   r')?.*'
+                   r'(?P<year2>\d{4})-'
+                   r'(?P<month2>\d{2})-'
+                   r'(?P<day2>\d{2})'
+                   r'(?:T'
+                   r'(?P<hour2>\d{2})?:'
+                   r'(?P<minut2>\d{2})?:'
+                   r'(?P<second2>\d{2})'
+                   r')?.*'),
+        re.compile(r'(?P<year1>\d{4})-'
+                   r'(?P<month1>\d{2})-'
+                   r'(?P<day1>\d{2})'
+                   r'(?:T'
+                   r'(?P<hour1>\d{2})?:'
+                   r'(?P<minut1>\d{2})?:'
+                   r'(?P<second1>\d{2})'
+                   r')?')],
+}
+
+
+def clean_field(value):
+    return value.strip()
+
+
+class HtmlXsltManager(ImportManager):
+    PARSER = 'HTMLParser'
+
+    def get(self):
+        u"""
+        Get data from the source
+
+        Return a tuple with:
+        - new items;
+        - updated items;
+        - error detail on error.
+        """
+        from models import Marker
+        self.marker_cls = Marker
+        try:
+            main_page = urllib2.urlopen(self.importer_instance.source)
+            assert main_page.getcode() == 200
+        except (urllib2.URLError, AssertionError):
+            return (0, 0, _(u"Source page is unreachable."))
+        data = main_page.read()
+        encoding = chardet.detect(data)
+        data = data.decode(encoding['encoding'])
+
+        soup = BeautifulSoup(data)
+        main_page = soup.prettify()
+        # convert it to valid XHTML
+        # doc, errors = tidy_document(main_page)
+        doc = main_page
+        dom = etree.HTML(doc, getattr(etree, self.PARSER)())
+        try:
+            xslt = etree.parse(self.importer_instance.source_file)
+            self.importer_instance.source_file.seek(0)
+            transform = etree.XSLT(xslt)
+        except (etree.XSLTParseError, etree.XMLSyntaxError, TypeError):
+            return (0, 0, _(u"The source file is not a valid XSLT file."))
+        newdom = transform(dom)
+        items = []
+        # load an alternate xslt file to apply to linked page
+        transform_child = None
+        if self.importer_instance.source_file_alt:
+            try:
+                alt_xslt = etree.parse(self.importer_instance.source_file_alt)
+                self.importer_instance.source_file_alt.seek(0)
+                transform_child = etree.XSLT(alt_xslt)
+            except (etree.XSLTParseError, etree.XMLSyntaxError, TypeError):
+                return (0, 0,
+                        _(u"The alt source file is not a valid XSLT file."))
+        base_url = u"/".join(self.importer_instance.source.split(u'/')[:-1])
+        base_url += u"/"
+        for item in newdom.getroot():
+            c_item = {child.tag: clean_field(child.text)
+                      for child in item.getchildren() if child.text}
+            # try to have more information on the linked page
+            if transform_child and 'link' in c_item:
+                # not an absolute address
+                if not c_item['link'].startswith('http://') and \
+                   not c_item['link'].startswith('https://'):
+                    c_item['link'] = base_url + c_item['link']
+                try:
+                    child_page = urllib2.urlopen(c_item['link'])
+                    assert child_page.getcode() == 200
+                except (urllib2.URLError, AssertionError):
+                    # don't stop the export for a bad link
+                    items.append(c_item)
+                    continue
+                data = child_page.read()
+                encoding = chardet.detect(data)
+                data = data.decode(encoding['encoding'])
+                child_page = BeautifulSoup(data).prettify()
+                child_dom = etree.HTML(child_page, etree.HTMLParser())
+                extra_keys = transform_child(child_dom).getroot()
+                if len(extra_keys):
+                    c_item.update({extra.tag: etree.tostring(extra)
+                                   for extra in extra_keys[0].getchildren()})
+            items.append(c_item)
+        # change relative link to full link, simplify, unescape HTML entities
+        html_unescape = HTMLParser.HTMLParser().unescape
+        for item in items:
+            for k in item:
+                val = item[k]
+                for r, replaced in RE_CLEANS:
+                    val = re.sub(r, replaced % {'base_url': base_url}, val)
+                item[k] = html_unescape(val)
+        self.key_categories = self.importer_instance.get_key_category_dict()
+        self.missing_cats = set()
+        self.updated_item, self.new_item = 0, 0
+        for item in items:
+            self.add_dct_item(item)
+        msg = ''
+        if self.missing_cats:
+            msg = _(
+                u"Names \"%s\" doesn't match existing categories. "
+                u"Modify the import to match theses names with categories.") %\
+                (u'", "'.join(self.missing_cats))
+        return (self.new_item, self.updated_item, msg)
+
+    @classmethod
+    def _internal_parse_date(cls, locale, year, month, day):
+        try:
+            year = datetime.date.today().year if not year else int(year)
+        except ValueError:
+            return
+        month = month.encode('utf-8')
+        if locale in MONTH_NAMES and month in MONTH_NAMES[locale]:
+            month = MONTH_NAMES[locale].index(month) + 1
+        else:
+            try:
+                month = int(month)
+            except ValueError:
+                return
+        try:
+            day = int(day)
+        except ValueError:
+            return
+        try:
+            return datetime.date(year, month, day)
+        except ValueError:
+            return
+
+    def parse_date(self, date):
+        dct = {}
+        has_dates = False
+        for locale in DATE_PARSINGS:
+            if has_dates:
+                break
+            for r in DATE_PARSINGS[locale]:
+                m = r.search(date)
+                if not m:
+                    continue
+                values = m.groupdict()
+                date = self._internal_parse_date(
+                    locale, 'year1' in values and values['year1'],
+                    values['month1'], values['day1'])
+                if not date:
+                    continue
+                dct['start_date'] = date
+                has_dates = True
+                if 'day2' not in values:
+                    break
+                date = self._internal_parse_date(
+                    locale, 'year2' in values and values['year2'],
+                    values['month2'], values['day2'])
+                if date:
+                    dct['end_date'] = date
+                break
+        return dct
+
+    def add_dct_item(self, item):
+        if not self.importer_instance.default_localisation and \
+                "point" not in item and not ("lat" in item and item['lat']):
+            return
+        cls = None
+        origin = self.importer_instance.origin
+        origin_lnk = item.get('link')
+        if origin_lnk:
+            origin = u"<a href='%s' target='_blank'>%s</a>" % (
+                origin_lnk, origin)
+        dct = {
+            'origin': origin,
+            'license': self.importer_instance.license,
+            'name': item['name']}
+        category = None
+        if 'category' in item and item['category']:
+            if item['category'] in self.key_categories:
+                category = self.key_categories[item['category']]
+            else:
+                self.missing_cats.add(item['category'])
+        cls = self.marker_cls
+        if 'point' in item:
+            x, y = item['point'].split(",")
+            dct['point'] = 'SRID=4326;POINT(%s %s)' % (x, y)
+        elif 'lat' in item and item['lat']:
+            dct['point'] = 'SRID=4326;POINT(%s %s)' % (item['lon'],
+                                                       item['lat'])
+        else:
+            dct['point'] = self.importer_instance.default_localisation
+        dct['description'] = item.get('description', '')
+        if 'date' in item:
+            dct.update(self.parse_date(item['date']))
+        if "start_date" in item and item["start_date"]:
+            dct['start_date'] = item["start_date"]
+            if "end_date" in item and item["end_date"]:
+                dct['end_date'] = item["end_date"]
+        key = item['key']
+        it, updated, created = self.create_or_update_item(cls, dct, key,
+                                                          category=category)
+        if updated:
+            self.updated_item += 1
+        if created:
+            self.new_item += 1
+
+
+class XMLXsltManager(HtmlXsltManager):
+    PARSER = 'XMLParser'
+
+import icalendar
+
+
+class IcalManager(ImportManager):
+    def get(self):
+        u"""
+        Get data from an icalendar source
+        """
+        from models import Marker
+        new_item, updated_item, msg = 0, 0, ''
+        source, msg = self.get_source_file([])
+        if msg:
+            return (0, 0, msg)
+
+        data = source.read()
+        try:
+            cal = icalendar.Calendar.from_ical(data)
+        except ValueError as e:
+            return (new_item, updated_item,
+                    _(u"Error on icalendar parsing: " + e.message))
+
+        default_dct = {'origin': self.importer_instance.origin,
+                       'license': self.importer_instance.license}
+        if self.importer_instance.default_localisation:
+            default_dct['point'] = self.importer_instance.default_localisation
+
+        for event in cal.walk('VEVENT'):
+            dct = default_dct.copy()
+            dct['name'] = event.get('SUMMARY', '')
+            if dct['name']:
+                dct['name'] = unicode(dct['name'])
+            dct['description'] = event.get('DESCRIPTION', '')
+            if dct['description']:
+                dct['description'] = unicode(dct['description'])
+            loc = event.get('LOCATION', None)
+            if loc:
+                dct['description'] += u"<br/>{}".format(unicode(loc))
+            url = event.get('URL', None)
+            if url:
+                dct['description'] += u"<br/><a href='{}'>{}</a>".format(
+                    unicode(url), unicode(_(u'Link')))
+            dct['start_date'] = event.get('DTSTART', None)
+            if dct['start_date']:
+                dct['start_date'] = event.decoded('DTSTART')
+            dct['end_date'] = event.get('DTEND', None)
+            if dct['end_date']:
+                dct['end_date'] = event.decoded('DTEND')
+            point = event.get('GEO', None)
+            if point:
+                dct['point'] = 'SRID=4326;POINT(%s %s)' % (point.longitude,
+                                                           point.latitude)
+
+            if not dct.get('point', None):
+                continue
+
+            cls = Marker
+            pl_id = event.get('UID', None)
+            if not pl_id:
+                pl_id = dct['name'] + "-" + unicode(self.importer_instance.pk)
+            pl_id += "-" + unicode(self.importer_instance.pk)
+            it, updated, created = self.create_or_update_item(cls, dct, pl_id)
+            if updated:
+                updated_item += 1
+            if created:
+                new_item += 1
+        return (new_item, updated_item, msg)