summaryrefslogtreecommitdiff
path: root/chimere/utils.py
diff options
context:
space:
mode:
Diffstat (limited to 'chimere/utils.py')
-rw-r--r--chimere/utils.py1357
1 files changed, 1357 insertions, 0 deletions
diff --git a/chimere/utils.py b/chimere/utils.py
new file mode 100644
index 0000000..0d84be3
--- /dev/null
+++ b/chimere/utils.py
@@ -0,0 +1,1357 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# Copyright (C) 2012-2016 Étienne Loks <etienne.loks_AT_peacefrogsDOTnet>
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as
+# published by the Free Software Foundation, either version 3 of the
+# License, or (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+# See the file COPYING for details.
+
+"""
+Utilitaries
+"""
+
+import csv
+import collections
+import datetime
+import feedparser
+import json
+import os
+import re
+import StringIO
+import tempfile
+import urllib2
+import unicodedata
+import zipfile
+
+from osgeo import ogr, osr
+from lxml import etree
+
+from django.conf import settings
+from django.contrib.gis.gdal import DataSource, OGRGeomType, check_err
+from django.contrib.gis.geos import GEOSGeometry
+from django.core.exceptions import ObjectDoesNotExist
+from django.shortcuts import render_to_response
+from django.utils.translation import ugettext_lazy as _
+
+from chimere import get_version
+from external_utils import OsmApi
+
+
+def unicode_normalize(string):
+ if type(string) == str:
+ string = unicode(string.decode('utf-8'))
+ return ''.join(
+ (c for c in unicodedata.normalize('NFD', string)
+ if unicodedata.category(c) not in ('Mn', 'Sm', 'Sc')))
+
+
+class ImportManager(object):
+ u"""
+ Generic class for specific importers
+ """
+ default_source = None
+
+ def __init__(self, importer_instance):
+ self.importer_instance = importer_instance
+ if self.importer_instance.default_name:
+ self.default_name = self.importer_instance.default_name
+ else:
+ self.default_name = " - ".join([
+ cat.name
+ for cat in self.importer_instance.categories.order_by(
+ 'name').all()])
+
+ def get(self):
+ raise NotImplementedError
+
+ def put(self, extra_args={}):
+ raise NotImplementedError
+
+ def create_or_update_item(self, cls, values, import_key, version=None,
+ key='', pk=None, category=None):
+ from models import PropertyModel
+ updated, created, item = False, False, None
+ import_key = unicode(import_key).replace(':', '^')
+ if not values.get('name'):
+ values['name'] = self.default_name
+ if not key:
+ key = self.importer_instance.importer_type
+ item = None
+ pms = [pm["slug"] for pm in PropertyModel.objects.values('slug').all()]
+ properties = {}
+ for k in values.keys():
+ if k in pms:
+ properties[k] = values.pop(k)
+ if import_key or pk:
+ dct_import = {
+ 'import_key__icontains': '%s:%s;' % (key, import_key),
+ 'import_source': self.importer_instance.source}
+ ref_item = cls.objects.filter(**dct_import)
+ try:
+ item = None
+ if pk:
+ ref_item = cls.objects.get(pk=pk)
+ else:
+ ref_item = cls.objects.filter(**dct_import)
+ if not ref_item.count():
+ raise ObjectDoesNotExist
+ ref_item = ref_item.all()[0]
+ if version and ref_item.import_version == int(version):
+ # no update since the last import
+ return ref_item, None, None
+ if not self.importer_instance.overwrite \
+ and ref_item.modified_since_import:
+ return ref_item, None, None
+ else:
+ item = ref_item
+ for k in values:
+ if values[k]:
+ setattr(item, k, values[k])
+ try:
+ item.save()
+ # force the modified_since_import status
+ item.modified_since_import = False
+ item.save()
+ except TypeError:
+ # error on data source
+ return None, False, False
+ updated = True
+ except ObjectDoesNotExist:
+ pass
+ if not item:
+ if not self.importer_instance.get_description and \
+ self.importer_instance.default_description:
+ values['description'] = \
+ self.importer_instance.default_description
+ values.update({
+ 'import_source': self.importer_instance.source})
+ values['status'] = self.importer_instance.default_status
+ if not self.importer_instance.associate_marker_to_way\
+ and cls.__name__ == 'Route':
+ values['has_associated_marker'] = False
+
+ try:
+ item = cls.objects.create(**values)
+ item.modified_since_import = False
+ item.save()
+ except TypeError:
+ # error on data source
+ return None, False, False
+ created = True
+ if import_key:
+ item.set_key(key, import_key)
+ item.categories.clear()
+ if category:
+ item.categories.add(category)
+ else:
+ for cat in self.importer_instance.categories.all():
+ item.categories.add(cat)
+ for prop in properties:
+ item.setProperty(prop, properties[prop])
+ return item, updated, created
+
+ @classmethod
+ def get_files_inside_zip(cls, zippedfile, suffixes, dest_dir=None):
+ try:
+ flz = zipfile.ZipFile(zippedfile)
+ except zipfile.BadZipfile:
+ return [], _(u"Bad zip file")
+ namelist = flz.namelist()
+ filenames = []
+ for suffix in suffixes:
+ current_file_name = None
+ for name in namelist:
+ if name.endswith(suffix) \
+ or name.endswith(suffix.lower()) \
+ or name.endswith(suffix.upper()):
+ current_file_name = name
+ filenames.append(current_file_name)
+ files = []
+ for filename in filenames:
+ if filename:
+ if dest_dir:
+ files.append(filename)
+ flz.extract(filename, dest_dir)
+ else:
+ files.append(flz.open(filename))
+ else:
+ files.append(None)
+ return files
+
+ def get_source_file(self, suffixes, dest_dir=None,
+ extra_url=None):
+ source = self.importer_instance.source_file
+ if not hasattr(source, 'read'):
+ if not source:
+ source = self.importer_instance.source \
+ if self.importer_instance.source else self.default_source
+ try:
+ url = source
+ if extra_url:
+ url += extra_url
+ remotehandle = urllib2.urlopen(url)
+ source = StringIO.StringIO(remotehandle.read())
+ remotehandle.close()
+ except ValueError:
+ # assume it is a local file
+ try:
+ source = open(source)
+ except IOError, msg:
+ return (None, msg)
+ except (urllib2.URLError, AttributeError) as error:
+ return (None, error.message)
+ if self.importer_instance.zipped:
+ try:
+ files = self.get_files_inside_zip(source, suffixes, dest_dir)
+ except zipfile.BadZipfile:
+ return (None, _(u"Bad zip file"))
+ if not files or None in files:
+ return (None,
+ _(u"Missing file(s) inside the zip file"))
+ source = files[0] if len(suffixes) == 1 else files
+ return (source, None)
+
+
+class KMLManager(ImportManager):
+ u"""
+ KML importer
+ The filtr argument has to be defined as the exact name of the folder to be
+ imported
+ """
+ XPATH = '//kml:Folder/kml:name[text()="%s"]/../kml:Placemark'
+ DEFAULT_XPATH = '//kml:Placemark'
+
+ def __init__(self, importer_instance, ns=''):
+ super(KMLManager, self).__init__(importer_instance)
+ self.ns = ns
+
+ def get(self):
+ u"""
+ Get data from a KML source
+
+ Return a tuple with:
+ - number of new item ;
+ - number of item updated ;
+ - error detail on error
+ """
+ from models import Marker, Route
+ new_item, updated_item, msg = 0, 0, ''
+ source, msg = self.get_source_file(['.kml'])
+ if msg:
+ return (0, 0, msg)
+ doc = source
+ # remove empty lines before declaration (bad XML file)
+ if hasattr(source, 'getvalue'):
+ splitted = source.getvalue().split('\n')
+ for idx, line in enumerate(splitted):
+ if line.strip():
+ break
+ doc = StringIO.StringIO("\n".join(splitted[idx:]))
+ try:
+ tree = etree.parse(doc)
+ except:
+ return (0, 0, _(u"Bad XML file"))
+ # try to get default namespace
+ if not self.ns:
+ self.ns = tree.getroot().nsmap[None]
+ xpath = self.XPATH % self.importer_instance.filtr \
+ if self.importer_instance.filtr else self.DEFAULT_XPATH
+ for placemark in tree.xpath(xpath,
+ namespaces={'kml': self.ns}):
+ name, point, line = None, None, None
+ pl_id = placemark.attrib.get('id')
+ pl_key = 'kml-%d' % self.importer_instance.pk
+ ns = '{%s}' % self.ns
+ description = ''
+ for item in placemark:
+ if item.tag == ns + 'name':
+ name = item.text
+ if not pl_id:
+ # if no ID is provided assume that name is a key
+ pl_id = name
+ elif item.tag == ns + 'description':
+ if self.importer_instance.get_description:
+ description = item.text
+ elif item.tag == ns + 'Point':
+ for coord in item:
+ if coord.tag == ns + 'coordinates':
+ x, y, z = coord.text.split(',')
+ point = 'SRID=4326;POINT(%s %s)' % (x, y)
+ elif item.tag == ns + 'LineString':
+ for coord in item:
+ if coord.tag == ns + 'coordinates':
+ points = coord.text.replace('\n', ' ').split(' ')
+ points = ",".join([" ".join(p.split(',')[:2])
+ for p in points if p])
+ line = 'SRID=4326;LINESTRING(%s)' % points
+ cls = None
+ dct = {'description': description,
+ 'name': name,
+ 'origin': self.importer_instance.origin,
+ 'license': self.importer_instance.license}
+ if point:
+ dct['point'] = point
+ cls = Marker
+ if line:
+ dct['route'] = line
+ dct.pop('description')
+ cls = Route
+ if cls:
+ item, updated, created = self.create_or_update_item(
+ cls, dct, pl_id, key=pl_key)
+ if updated:
+ updated_item += 1
+ if created:
+ new_item += 1
+ return (new_item, updated_item, msg)
+
+ @classmethod
+ def export(cls, queryset):
+ dct = {
+ 'name': settings.PROJECT_NAME,
+ 'description': unicode(datetime.date.today()),
+ 'locations': queryset.all()
+ }
+ filename = unicode_normalize(settings.PROJECT_NAME + dct['description']
+ + '.kml')
+ result = render_to_response('chimere/export.kml', dct)
+ return filename, result
+
+
+class ShapefileManager(ImportManager):
+ u"""
+ Shapefile importer
+ """
+ def get(self):
+ u"""
+ Get data from a Shapefile source
+
+ Return a tuple with:
+ - number of new item ;
+ - number of item updated ;
+ - error detail on error
+
+ The filtr argument allow to specify match between the shapefile cols
+ and the db. JSON format is used.
+ """
+ from models import Marker, Route, Polygon
+ new_item, updated_item, msg = 0, 0, ''
+ tmpdir = tempfile.mkdtemp()
+ sources, msg = self.get_source_file(['.shp', '.dbf', '.prj', '.shx'],
+ dest_dir=tmpdir)
+ if msg:
+ return (0, 0, msg)
+ if not sources:
+ return (0, 0, _(u"Error while reading the data source."))
+ # get the srid
+ srid = self.importer_instance.srid
+ if not srid:
+ prjfilename = tmpdir + os.sep + sources[2]
+ try:
+ from osgeo import osr
+ with open(prjfilename, 'r') as prj_file:
+ prj_txt = prj_file.read()
+ srs = osr.SpatialReference()
+ srs.ImportFromESRI([prj_txt])
+ srs.AutoIdentifyEPSG()
+ srid = srs.GetAuthorityCode(None)
+ except ImportError:
+ pass
+ if not srid:
+ # try with the default projection
+ srid = settings.CHIMERE_EPSG_DISPLAY_PROJECTION
+ msg = _(u"SRID cannot be guessed. The default SRID (%s) has "
+ u"been used.") % srid
+ # If imported items are not well located "
+ # u"ask your data provider for the SRID to use.") % srid
+ shapefilename = tmpdir + os.sep + sources[0]
+ ds = DataSource(shapefilename)
+ lyr = ds[0]
+ default_dct = {}
+ filtr = self.importer_instance.filtr
+ if filtr:
+ try:
+ filtr = json.JSONDecoder().decode(self.importer_instance.filtr)
+ except ValueError:
+ return (
+ new_item, updated_item,
+ _(u"Bad configuration: filter must be a valid "
+ u"JSON string"))
+ for k in ('id',):
+ if k not in filtr:
+ return (
+ new_item, updated_item,
+ _(u"The key \"%s\" is missing in the "
+ u"filter.") % k)
+ for k in filtr:
+ try:
+ ids = lyr.get_fields(k)
+ except:
+ return (
+ new_item, updated_item,
+ _(u"Config: {} is not an appropriate column name "
+ u"for this Shapefile. Available columns "
+ u" are: {}").format(k, u", ".join(
+ [j for j in lyr.fields])))
+ default_dct = {'origin': self.importer_instance.origin,
+ 'license': self.importer_instance.license}
+ if 'prefix_name' in filtr:
+ default_dct['name'] = filtr.pop('prefix_name')
+ if 'prefix_description' in filtr:
+ default_dct['description'] = filtr.pop('prefix_description')
+ else:
+ # if no filtr it is assumed that the first field is a
+ # id name and the second field is the name
+ id_name = lyr.fields[0] if len(lyr.fields) > 0 else None
+ # test if id_name is well guess
+ if id_name:
+ ids = lyr.get_fields(id_name)
+ if len(ids) != len(set(ids)):
+ id_name = None
+ filtr['id'] = id_name
+ if len(lyr.fields) > 1:
+ filtr["name"] = lyr.fields[1]
+ elif id_name:
+ filtr["name"] = id_name
+
+ if lyr.geom_type not in ('Point', 'LineString', 'Polygon'):
+ return (0, 0, _(u"Type of geographic item (%s) of this shapefile "
+ u"is not managed by Chimère.") % lyr.geom_type)
+ geom_key = ''
+ geom_cls = None
+ if lyr.geom_type == 'Point':
+ geom_key = 'point'
+ geom_cls = Marker
+ elif lyr.geom_type == 'Polygon':
+ geom_key = 'polygon'
+ geom_cls = Polygon
+ else:
+ geom_key = 'route'
+ geom_cls = Route
+ # indexes = []
+ for idx, feat in enumerate(lyr):
+ dct = default_dct.copy()
+ for k in filtr:
+ val = feat.get(k)
+ try:
+ val = unicode(val)
+ except UnicodeDecodeError:
+ try:
+ val = unicode(
+ val.decode(settings.CHIMERE_SHAPEFILE_ENCODING))
+ except:
+ continue
+ if filtr[k] not in dct:
+ dct[filtr[k]] = ''
+ dct[filtr[k]] += val
+ try:
+ geoms = [feat.geom.wkt]
+ except:
+ return (0, 0, _(u"Bad Shapefile"))
+ if feat.geom.geom_type == 'MultiLineString':
+ geoms = [geom.wkt for geom in feat.geom]
+ import_key = dct.pop('id')
+ for geom in geoms:
+ dct[geom_key] = 'SRID=%s;%s' % (srid, geom)
+ item, updated, created = self.create_or_update_item(
+ geom_cls, dct, import_key)
+ if updated:
+ updated_item += 1
+ if created:
+ new_item += 1
+ # clean up
+ tmpdirs = set()
+ for src in sources:
+ dirs = os.sep.join(src.split(os.sep)[:-1])
+ if dirs:
+ tmpdirs.add(tmpdir + os.sep + dirs)
+ os.remove(tmpdir + os.sep + src)
+ for dr in tmpdirs:
+ os.removedirs(dr)
+ return (new_item, updated_item, msg)
+
+ @classmethod
+ def export(cls, queryset):
+ date = unicode(datetime.date.today())
+
+ tmp = tempfile.NamedTemporaryFile(suffix='.shp', mode='w+b')
+ tmp.close()
+
+ tmp_name = tmp.name
+ field_names = [field.name for field in queryset.model._meta.fields]
+ geo_field = getattr(
+ queryset.model,
+ 'point' if 'point' in field_names else 'route')._field
+
+ dr = ogr.GetDriverByName('ESRI Shapefile')
+ ds = dr.CreateDataSource(tmp_name)
+ if ds is None:
+ raise Exception(_(u'Could not create file!'))
+ ogr_type = OGRGeomType(geo_field.geom_type).num
+ srs = osr.SpatialReference()
+ srs.ImportFromEPSG(geo_field.srid)
+
+ layer = ds.CreateLayer('lyr', srs=srs, geom_type=ogr_type)
+
+ for field_name in ('name', 'category'):
+ field_defn = ogr.FieldDefn(str(field_name), ogr.OFTString)
+ field_defn.SetWidth(255)
+ if layer.CreateField(field_defn) != 0:
+ raise Exception(_(u'Failed to create field'))
+
+ feature_def = layer.GetLayerDefn()
+
+ for item in queryset:
+ # duplicate items when in several categories
+ q = item.categories
+ if not q.count():
+ categories = [None]
+ else:
+ categories = q.all()
+ for category in categories:
+ feat = ogr.Feature(feature_def)
+ feat.SetField('name', str(unicode_normalize(item.name)[:80]))
+ if category:
+ feat.SetField('category',
+ str(unicode_normalize(category.name)[:80]))
+
+ geom = getattr(item, geo_field.name)
+ if not geom:
+ continue
+ ogr_geom = ogr.CreateGeometryFromWkt(geom.wkt)
+ check_err(feat.SetGeometry(ogr_geom))
+ check_err(layer.CreateFeature(feat))
+ # Cleaning up
+ ds.Destroy()
+
+ # writing to a zip file
+ filename = unicode_normalize(settings.PROJECT_NAME) + '-' + date
+ buff = StringIO.StringIO()
+ zip_file = zipfile.ZipFile(buff, 'w', zipfile.ZIP_DEFLATED)
+ suffixes = ['shp', 'shx', 'prj', 'dbf']
+ for suffix in suffixes:
+ name = tmp_name.replace('.shp', '.' + suffix)
+ arcname = '.'.join((filename, suffix))
+ zip_file.write(name, arcname=arcname)
+ zip_file.close()
+ buff.flush()
+ zip_stream = buff.getvalue()
+ buff.close()
+ return filename, zip_stream
+
+
+class CSVManager(ImportManager):
+ u"""
+ CSV importer
+ """
+ @classmethod
+ def set_categories(value):
+ return
+
+ # (label, getter, setter)
+ COLS = [("Id", 'pk', 'pk'), (_(u"Name"), 'name', 'name'),
+ (_(u"Categories"), lambda obj: ", ".join(
+ [c.name for c in obj.categories.all()]), set_categories),
+ (_(u"State"), 'status', lambda x: x),
+ (_(u"Description"), 'description', 'description'),
+ (_(u"Localisation"), 'geometry', 'geometry')]
+
+ def get(self):
+ u"""
+ Get data from a CSV source
+
+ Return a tuple with:
+ - number of new item ;
+ - number of item updated ;
+ - error detail on error
+ """
+ from models import Marker, Route
+ new_item, updated_item, msg = 0, 0, ''
+ source, msg = self.get_source_file(['.csv'])
+ if msg:
+ return (0, 0, msg)
+ reader = csv.reader(source, delimiter=';', quotechar='"')
+ prop_cols = []
+ for pm in Marker.all_properties():
+ prop_cols.append((pm.name, pm.getAttrName(),
+ pm.getAttrName() + '_set'))
+ cols = list(self.COLS) + prop_cols
+ # datas = []
+ for idx, row in enumerate(reader):
+ if not idx: # first row
+ try:
+ assert(len(row) >= len(cols))
+ except AssertionError:
+ return (0, 0, _(u"Invalid CSV format"))
+ continue
+ if len(row) < len(cols):
+ continue
+ # pk, name, cats, state = row[0], row[1], row[2], row[3]
+ pk, name = row[0], row[1]
+ geom = row[5]
+ description = ''
+ if self.importer_instance.get_description:
+ description = row[4]
+ COL_INDEX = 6
+ dct = {'description': description,
+ 'name': name,
+ 'origin': self.importer_instance.origin,
+ 'license': self.importer_instance.license}
+ cls = None
+ if 'POINT' in geom:
+ cls = Marker
+ dct['point'] = geom
+ elif 'LINE' in geom:
+ cls = Route
+ dct['route'] = geom
+ else:
+ continue
+ import_key = pk if pk else name.decode('utf-8')
+ item, updated, created = self.create_or_update_item(
+ cls, dct, import_key, pk=pk)
+ if updated:
+ updated_item += 1
+ if created:
+ new_item += 1
+ for idx, col in enumerate(cols[COL_INDEX:]):
+ name, getter, setter_val = col
+ setter = getattr(item, setter_val)
+ val = row[idx + COL_INDEX]
+ setter(item, val)
+ return (new_item, updated_item, msg)
+
+ @classmethod
+ def export(cls, queryset):
+ dct = {'description': unicode(datetime.date.today()), 'data': []}
+ # cls_name = queryset.model.__name__.lower()
+ cols = list(cls.COLS)
+ for pm in queryset.model.all_properties():
+ cols.append((pm.name, pm.getAttrName(), pm.getAttrName() + '_set'))
+ header = [col[0] for col in cols]
+ dct['data'].append(header)
+ for item in queryset.all():
+ data = []
+ for (lbl, attr, setr) in cols:
+ if callable(attr):
+ data.append(attr(item))
+ else:
+ data.append(getattr(item, attr))
+ dct['data'].append(data)
+ filename = unicode_normalize(settings.PROJECT_NAME + dct['description']
+ + '.csv')
+ result = render_to_response('chimere/export.csv', dct)
+ return filename, result
+
+
+class GeoRSSManager(ImportManager):
+ u"""
+ RSS importer.
+ This manager only gets and do not produce GeoRSSFeed
+ """
+
+ def get(self):
+ u"""
+ Get data from a GeoRSS simple source
+
+ Return a tuple with:
+ - number of new item ;
+ - number of item updated ;
+ - error detail on error
+ """
+ from models import Marker, Route
+ new_item, updated_item, msg = 0, 0, ''
+ feed = feedparser.parse(self.importer_instance.source)
+ if feed['bozo'] and not isinstance(
+ feed['bozo_exception'], feedparser.CharacterEncodingOverride):
+ return (0, 0, _(u"RSS feed is not well formed"))
+ for item in feed['items']:
+ if "georss_point" not in item and 'georss_line' not in item \
+ and not ("geo_lat" in item and "geo_long" in item):
+ continue
+ cls = None
+ dct = {'origin': self.importer_instance.origin,
+ 'license': self.importer_instance.license}
+ if 'georss_point' in item or "geo_lat" in item:
+ cls = Marker
+ if 'georss_point' in item:
+ try:
+ y, x = item['georss_point'].split(' ')
+ except ValueError:
+ continue
+ else:
+ y = item['geo_lat']
+ x = item['geo_long']
+ dct['point'] = 'SRID=4326;POINT(%s %s)' % (x, y)
+ if self.importer_instance.get_description:
+ for k in ['description', 'summary', 'value']:
+ if k in item:
+ dct['description'] = item[k]
+ break
+ else:
+ cls = Route
+ points = item['georss_line'].split(' ')
+ reordered_points = []
+ # lat, lon -> x, y
+ for idx in xrange(len(points) / 2):
+ reordered_points.append("%s %s" % (points[idx * 2 + 1],
+ points[idx * 2]))
+ dct['route'] = 'SRID=4326;LINESTRING(%s)' % \
+ ",".join(reordered_points)
+
+ dct['name'] = item['title']
+ pl_id = item['id'] if 'id' in item else item['title']
+ it, updated, created = self.create_or_update_item(cls, dct, pl_id)
+ if updated:
+ updated_item += 1
+ if created:
+ new_item += 1
+ return (new_item, updated_item, msg)
+
+
+class JsonManager(ImportManager):
+ u"""
+ Json importer.
+ This manager only gets and do not produce Json feed
+ """
+
+ def get(self):
+ u"""
+ Get data from a json simple source
+
+ Return a tuple with:
+ - number of new item ;
+ - number of item updated ;
+ - error detail on error
+ """
+ from models import Marker
+ new_item, updated_item, msg = 0, 0, ''
+ source, msg = self.get_source_file(['.json'])
+ if msg:
+ return (0, 0, msg)
+
+ vals = source.read().replace('\n', ' ')
+ try:
+ values = json.JSONDecoder(
+ object_pairs_hook=collections.OrderedDict).decode(vals)
+ except ValueError as e:
+ return (new_item, updated_item,
+ _(u"JSON file is not well formed: " + e.message))
+ # configuration in filtr
+ try:
+ filtr = json.JSONDecoder().decode(self.importer_instance.filtr)
+ except ValueError:
+ return (
+ new_item, updated_item,
+ _(u"Bad configuration: filter field must be a valid "
+ u"JSON string"))
+
+ vls = filtr.values()
+ for k in ('name', 'id', 'description'):
+ if k not in vls:
+ return (
+ new_item, updated_item,
+ _(u"A key must be associated to \"%s\" in the "
+ u"filter.") % k)
+
+ default_dct = {'origin': self.importer_instance.origin,
+ 'license': self.importer_instance.license}
+ if 'prefix_name' in filtr:
+ default_dct['name'] = filtr.pop('prefix_name')
+ if 'prefix_description' in filtr:
+ default_dct['description'] = filtr.pop('prefix_description')
+ if self.importer_instance.default_localisation:
+ default_dct['point'] = self.importer_instance.default_localisation
+
+ for item in values:
+ dct = default_dct.copy()
+ for k in filtr:
+ if k.startswith('prefix_') or k.startswith('suffix_'):
+ continue
+ if k in item and item[k]:
+ if filtr[k] not in dct:
+ dct[filtr[k]] = ""
+ else:
+ if filtr[k] == 'description':
+ dct[filtr[k]] += "<br/>"
+ else:
+ dct[filtr[k]] += " "
+ dct[filtr[k]] += item[k]
+ if 'point' in item:
+ x, y = item['point'].split(",")
+ dct['point'] = 'SRID=4326;POINT(%s %s)' % (x, y)
+ elif 'lat' in item and item['lat'] \
+ and 'lon' in item and item['lon']:
+ dct['point'] = 'SRID=4326;POINT(%s %s)' % (item['lon'],
+ item['lat'])
+ elif 'x' in item and item['x'] \
+ and 'y' in item and item['y']:
+ dct['point'] = 'SRID=4326;POINT(%s %s)' % (item['x'],
+ item['y'])
+ if not dct['point']:
+ continue
+ for k in filtr:
+ if k.startswith('prefix_') or k.startswith('suffix_'):
+ pos = k.split('_')[0]
+ key = '_'.join(k.split('_')[1:])
+ if key in dct:
+ if pos == 'prefix':
+ dct[key] = filtr[k] + dct[key]
+ else:
+ dct[key] += filtr[k]
+ cls = Marker
+ pl_id = (dct.pop('id') if 'id' in dct else dct['name']) \
+ + "-" + unicode(self.importer_instance.pk)
+ it, updated, created = self.create_or_update_item(cls, dct, pl_id)
+ if updated:
+ updated_item += 1
+ if created:
+ new_item += 1
+ return (new_item, updated_item, msg)
+
+RE_HOOK = re.compile('\[([^\]]*)\]')
+
+# TODO: manage deleted item from OSM
+
+
+class OSMManager(ImportManager):
+ u"""
+ OSM importer/exporter
+ The source url is a path to an OSM file or a XAPI url
+ The filtr argument is XAPI args or empty if it is an OSM file.
+ """
+ default_source = settings.CHIMERE_XAPI_URL
+
+ def get(self):
+ u"""
+ Get data from the source
+
+ Return a tuple with:
+ - new items;
+ - updated items;
+ - error detail on error.
+ """
+ source, msg = self.get_source_file(
+ ['.osm'], extra_url=self.importer_instance.filtr)
+ if not source:
+ return (0, 0, msg)
+
+ tree = etree.parse(source)
+ # only import node or ways
+ if tree.xpath('count(//way)') and tree.xpath('count(//node)'):
+ return self.import_ways(tree)
+ elif tree.xpath('count(//node)'):
+ return self.import_nodes(tree)
+ return 0, 0, _(u"Nothing to import")
+
+ def import_ways(self, tree):
+ from chimere.models import Route
+ msg, items, new_item, updated_item = "", [], 0, 0
+ nodes = {}
+ for node in tree.xpath('//node'):
+ node_id = node.attrib.get('id')
+ for item in node:
+ k = item.attrib.get('k')
+ if node_id:
+ nodes[node_id] = '%s %s' % (node.get('lon'),
+ node.get('lat'))
+ for way in tree.xpath('//way'):
+ name = None
+ points = []
+ node_id = way.attrib.get('id')
+ version = way.attrib.get('version')
+ for item in way:
+ k = item.attrib.get('k')
+ if k == 'name':
+ name = item.attrib.get('v')
+ if item.tag == 'nd':
+ points.append(item.get('ref'))
+ if not points:
+ continue
+ wkt = 'SRID=4326;LINESTRING(%s)' % ",".join(
+ [nodes[point_id] for point_id in points if point_id in nodes])
+ dct = {'route': wkt,
+ 'name': name,
+ 'origin': self.importer_instance.origin
+ or u'OpenStreetMap.org',
+ 'license': self.importer_instance.license
+ or u'ODbL',
+ 'import_version': version}
+ item, updated, created = self.create_or_update_item(
+ Route, dct, node_id, version)
+ if updated:
+ updated_item += 1
+ if created:
+ new_item += 1
+ items.append(item)
+ return new_item, updated_item, msg
+
+ def import_nodes(self, tree):
+ from chimere.models import Marker
+ msg, items, new_item, updated_item = "", [], 0, 0
+ for node in tree.xpath('//node'):
+ name = None
+ node_id = node.attrib.get('id')
+ if not node_id:
+ continue
+ version = node.attrib.get('version')
+ for item in node:
+ k = item.attrib.get('k')
+ if k == 'name':
+ name = item.attrib.get('v')
+ point = 'SRID=4326;POINT(%s %s)' % (node.get('lon'),
+ node.get('lat'))
+ dct = {'point': point,
+ 'name': name,
+ 'origin': self.importer_instance.origin
+ or u'OpenStreetMap.org',
+ 'license': self.importer_instance.license
+ or u'ODbL',
+ 'import_version': version}
+ item, updated, created = self.create_or_update_item(
+ Marker, dct, node_id, version)
+ if updated:
+ updated_item += 1
+ if created:
+ new_item += 1
+ items.append(item)
+ return (new_item, updated_item, msg)
+
+ def put(self, extra_args={}):
+ # first of all: reimport in order to verify that no changes has been
+ # made since the last import
+ from models import Marker
+ new_item, updated_item, msg = self.get()
+ # check if import is possible
+ if msg:
+ return 0, msg
+ if new_item:
+ return 0, _(u"New items imported - validate them before exporting")
+ if Marker.objects.filter(status='I').count():
+ return 0, _(u"There are items from a former import not yet "
+ u"validated - validate them before exporting")
+ # start import
+ api = settings.CHIMERE_OSM_API_URL
+ username = settings.CHIMERE_OSM_USER
+ password = settings.CHIMERE_OSM_PASSWORD
+ if extra_args:
+ try:
+ api = extra_args['api']
+ username = extra_args['username']
+ password = extra_args['password']
+ except KeyError:
+ return 0, _(u"Bad params - programming error")
+ username = username.encode('latin1')
+ password = password.encode('latin1')
+ api = OsmApi.OsmApi(api=api, username=username, password=password)
+ api.ChangesetCreate({u"comment": u"Import from Chimère %s" %
+ get_version()})
+ hooks = RE_HOOK.findall(self.importer_instance.filtr)
+ if not hooks:
+ hooks = RE_HOOK.findall(self.importer_instance.source)
+ if not hooks:
+ return 0, _(u"Bad param")
+ tags = {}
+ bbox = []
+ for hook in hooks:
+ key, value = hook.split('=')
+ if '*' in value or '|' in key or '|' in value:
+ continue
+ if key == 'bbox':
+ x1, y1, x2, y2 = [float(val) for val in value.split(',')]
+ bbox = GEOSGeometry(
+ 'POLYGON((%f %f,%f %f,%f %f,%f %f,%f %f))' % (
+ x1, y1, x2, y1, x2, y2, x1, y2, x1, y1), srid=4326)
+ continue
+ tags[key] = value
+ if not tags:
+ return 0, _(u"No non ambigious tag is defined in the XAPI request")
+ if not bbox:
+ return 0, _(
+ u"No bounding box is defined in the XAPI request."
+ u"If you are sure to manage the entire planet set the "
+ u"bounding box to -180,-90,180,90")
+ default_dct = {'tag': tags,
+ 'import_source': self.importer_instance.source}
+ idx = -1
+ for idx, item in enumerate(
+ Marker.objects.filter(
+ status='A',
+ point__contained=bbox,
+ categories=self.importer_instance.categories.all(),
+ not_for_osm=False, modified_since_import=True,
+ route=None).all()):
+ dct = default_dct.copy()
+ dct.update({'lon': item.point.x,
+ 'lat': item.point.y})
+ dct['tag']['name'] = item.name
+ node = None
+ import_key = item.get_key('OSM')
+ updated = False
+ if import_key:
+ try:
+ dct['id'] = import_key
+ dct['version'] = item.import_version
+ node = api.NodeUpdate(dct)
+ updated = True
+ except OsmApi.ApiError, error:
+ if error.status == 404:
+ dct.pop('id')
+ dct.pop('version')
+ pass # if the node doesn't exist it is created
+ else:
+ raise
+ if not updated:
+ node = api.NodeCreate(dct)
+ item.set_key('OSM', node['id'])
+ item.import_version = node['version']
+ item.save()
+ api.ChangesetClose()
+ return idx + 1, None
+
+
+import chardet
+import HTMLParser
+from BeautifulSoup import BeautifulSoup
+
+
+RE_CLEANS = ((re.compile('(\n)*|^( )*(\n)*( )*|( )*(\n)*( )*$'), ''),
+ (re.compile(' ( )*'), ' '),
+ (re.compile(r"""<a href=["'](?!https?)(.*)["']"""),
+ '<a href="%(base_url)s\\1"'),
+ )
+
+from calendar import TimeEncoding, month_name
+
+
+def get_month_name(month_no, locale):
+ with TimeEncoding(locale) as encoding:
+ s = month_name[month_no]
+ if encoding is not None:
+ s = s.decode(encoding)
+ return s
+
+MONTH_NAMES = {locale: [get_month_name(no_month, locale + '.UTF-8')
+ for no_month in xrange(1, 13)] for locale in ['fr_FR']}
+
+try:
+ UNI_MONTH_NAMES = {locale: [m.decode('utf-8') for m in MONTH_NAMES[locale]]
+ for locale in MONTH_NAMES}
+except UnicodeEncodeError:
+ UNI_MONTH_NAMES = {locale: [m for m in MONTH_NAMES[locale]]
+ for locale in MONTH_NAMES}
+
+DATE_PARSINGS = {
+ 'fr_FR': [
+ re.compile(r'(?P<day1>\d{1,2}) '
+ r'(?P<month1>' + '|'.join(UNI_MONTH_NAMES['fr_FR']) + ') '
+ r'(?P<year1>\d{4})?[^\d]*'
+ r'(?P<day2>\d{1,2}) '
+ r'(?P<month2>' + '|'.join(UNI_MONTH_NAMES['fr_FR']) + ') *'
+ r'(?P<year2>\d{4})?.*'),
+ re.compile(r'(?P<day1>\d{1,2}) '
+ r'(?P<month1>' + '|'.join(UNI_MONTH_NAMES['fr_FR']) + ') * '
+ r'(?P<year1>\d{4})?')],
+ 'en': [
+ re.compile(r'(?P<year1>\d{4})-'
+ r'(?P<month1>\d{2})-'
+ r'(?P<day1>\d{2})'
+ r'(?:T'
+ r'(?P<hour1>\d{2})?:'
+ r'(?P<minut1>\d{2})?:'
+ r'(?P<second1>\d{2})'
+ r')?.*'
+ r'(?P<year2>\d{4})-'
+ r'(?P<month2>\d{2})-'
+ r'(?P<day2>\d{2})'
+ r'(?:T'
+ r'(?P<hour2>\d{2})?:'
+ r'(?P<minut2>\d{2})?:'
+ r'(?P<second2>\d{2})'
+ r')?.*'),
+ re.compile(r'(?P<year1>\d{4})-'
+ r'(?P<month1>\d{2})-'
+ r'(?P<day1>\d{2})'
+ r'(?:T'
+ r'(?P<hour1>\d{2})?:'
+ r'(?P<minut1>\d{2})?:'
+ r'(?P<second1>\d{2})'
+ r')?')],
+}
+
+
+def clean_field(value):
+ return value.strip()
+
+
+class HtmlXsltManager(ImportManager):
+ PARSER = 'HTMLParser'
+
+ def get(self):
+ u"""
+ Get data from the source
+
+ Return a tuple with:
+ - new items;
+ - updated items;
+ - error detail on error.
+ """
+ from models import Marker
+ self.marker_cls = Marker
+ try:
+ main_page = urllib2.urlopen(self.importer_instance.source)
+ assert main_page.getcode() == 200
+ except (urllib2.URLError, AssertionError):
+ return (0, 0, _(u"Source page is unreachable."))
+ data = main_page.read()
+ encoding = chardet.detect(data)
+ data = data.decode(encoding['encoding'])
+
+ soup = BeautifulSoup(data)
+ main_page = soup.prettify()
+ # convert it to valid XHTML
+ # doc, errors = tidy_document(main_page)
+ doc = main_page
+ dom = etree.HTML(doc, getattr(etree, self.PARSER)())
+ try:
+ xslt = etree.parse(self.importer_instance.source_file)
+ self.importer_instance.source_file.seek(0)
+ transform = etree.XSLT(xslt)
+ except (etree.XSLTParseError, etree.XMLSyntaxError, TypeError):
+ return (0, 0, _(u"The source file is not a valid XSLT file."))
+ newdom = transform(dom)
+ items = []
+ # load an alternate xslt file to apply to linked page
+ transform_child = None
+ if self.importer_instance.source_file_alt:
+ try:
+ alt_xslt = etree.parse(self.importer_instance.source_file_alt)
+ self.importer_instance.source_file_alt.seek(0)
+ transform_child = etree.XSLT(alt_xslt)
+ except (etree.XSLTParseError, etree.XMLSyntaxError, TypeError):
+ return (0, 0,
+ _(u"The alt source file is not a valid XSLT file."))
+ base_url = u"/".join(self.importer_instance.source.split(u'/')[:-1])
+ base_url += u"/"
+ for item in newdom.getroot():
+ c_item = {child.tag: clean_field(child.text)
+ for child in item.getchildren() if child.text}
+ # try to have more information on the linked page
+ if transform_child and 'link' in c_item:
+ # not an absolute address
+ if not c_item['link'].startswith('http://') and \
+ not c_item['link'].startswith('https://'):
+ c_item['link'] = base_url + c_item['link']
+ try:
+ child_page = urllib2.urlopen(c_item['link'])
+ assert child_page.getcode() == 200
+ except (urllib2.URLError, AssertionError):
+ # don't stop the export for a bad link
+ items.append(c_item)
+ continue
+ data = child_page.read()
+ encoding = chardet.detect(data)
+ data = data.decode(encoding['encoding'])
+ child_page = BeautifulSoup(data).prettify()
+ child_dom = etree.HTML(child_page, etree.HTMLParser())
+ extra_keys = transform_child(child_dom).getroot()
+ if len(extra_keys):
+ c_item.update({extra.tag: etree.tostring(extra)
+ for extra in extra_keys[0].getchildren()})
+ items.append(c_item)
+ # change relative link to full link, simplify, unescape HTML entities
+ html_unescape = HTMLParser.HTMLParser().unescape
+ for item in items:
+ for k in item:
+ val = item[k]
+ for r, replaced in RE_CLEANS:
+ val = re.sub(r, replaced % {'base_url': base_url}, val)
+ item[k] = html_unescape(val)
+ self.key_categories = self.importer_instance.get_key_category_dict()
+ self.missing_cats = set()
+ self.updated_item, self.new_item = 0, 0
+ for item in items:
+ self.add_dct_item(item)
+ msg = ''
+ if self.missing_cats:
+ msg = _(
+ u"Names \"%s\" doesn't match existing categories. "
+ u"Modify the import to match theses names with categories.") %\
+ (u'", "'.join(self.missing_cats))
+ return (self.new_item, self.updated_item, msg)
+
+ @classmethod
+ def _internal_parse_date(cls, locale, year, month, day):
+ try:
+ year = datetime.date.today().year if not year else int(year)
+ except ValueError:
+ return
+ month = month.encode('utf-8')
+ if locale in MONTH_NAMES and month in MONTH_NAMES[locale]:
+ month = MONTH_NAMES[locale].index(month) + 1
+ else:
+ try:
+ month = int(month)
+ except ValueError:
+ return
+ try:
+ day = int(day)
+ except ValueError:
+ return
+ try:
+ return datetime.date(year, month, day)
+ except ValueError:
+ return
+
+ def parse_date(self, date):
+ dct = {}
+ has_dates = False
+ for locale in DATE_PARSINGS:
+ if has_dates:
+ break
+ for r in DATE_PARSINGS[locale]:
+ m = r.search(date)
+ if not m:
+ continue
+ values = m.groupdict()
+ date = self._internal_parse_date(
+ locale, 'year1' in values and values['year1'],
+ values['month1'], values['day1'])
+ if not date:
+ continue
+ dct['start_date'] = date
+ has_dates = True
+ if 'day2' not in values:
+ break
+ date = self._internal_parse_date(
+ locale, 'year2' in values and values['year2'],
+ values['month2'], values['day2'])
+ if date:
+ dct['end_date'] = date
+ break
+ return dct
+
+ def add_dct_item(self, item):
+ if not self.importer_instance.default_localisation and \
+ "point" not in item and not ("lat" in item and item['lat']):
+ return
+ cls = None
+ origin = self.importer_instance.origin
+ origin_lnk = item.get('link')
+ if origin_lnk:
+ origin = u"<a href='%s' target='_blank'>%s</a>" % (
+ origin_lnk, origin)
+ dct = {
+ 'origin': origin,
+ 'license': self.importer_instance.license,
+ 'name': item['name']}
+ category = None
+ if 'category' in item and item['category']:
+ if item['category'] in self.key_categories:
+ category = self.key_categories[item['category']]
+ else:
+ self.missing_cats.add(item['category'])
+ cls = self.marker_cls
+ if 'point' in item:
+ x, y = item['point'].split(",")
+ dct['point'] = 'SRID=4326;POINT(%s %s)' % (x, y)
+ elif 'lat' in item and item['lat']:
+ dct['point'] = 'SRID=4326;POINT(%s %s)' % (item['lon'],
+ item['lat'])
+ else:
+ dct['point'] = self.importer_instance.default_localisation
+ dct['description'] = item.get('description', '')
+ if 'date' in item:
+ dct.update(self.parse_date(item['date']))
+ if "start_date" in item and item["start_date"]:
+ dct['start_date'] = item["start_date"]
+ if "end_date" in item and item["end_date"]:
+ dct['end_date'] = item["end_date"]
+ key = item['key']
+ it, updated, created = self.create_or_update_item(cls, dct, key,
+ category=category)
+ if updated:
+ self.updated_item += 1
+ if created:
+ self.new_item += 1
+
+
+class XMLXsltManager(HtmlXsltManager):
+ PARSER = 'XMLParser'
+
+import icalendar
+
+
+class IcalManager(ImportManager):
+ def get(self):
+ u"""
+ Get data from an icalendar source
+ """
+ from models import Marker
+ new_item, updated_item, msg = 0, 0, ''
+ source, msg = self.get_source_file([])
+ if msg:
+ return (0, 0, msg)
+
+ data = source.read()
+ try:
+ cal = icalendar.Calendar.from_ical(data)
+ except ValueError as e:
+ return (new_item, updated_item,
+ _(u"Error on icalendar parsing: " + e.message))
+
+ default_dct = {'origin': self.importer_instance.origin,
+ 'license': self.importer_instance.license}
+ if self.importer_instance.default_localisation:
+ default_dct['point'] = self.importer_instance.default_localisation
+
+ for event in cal.walk('VEVENT'):
+ dct = default_dct.copy()
+ dct['name'] = event.get('SUMMARY', '')
+ if dct['name']:
+ dct['name'] = unicode(dct['name'])
+ dct['description'] = event.get('DESCRIPTION', '')
+ if dct['description']:
+ dct['description'] = unicode(dct['description'])
+ loc = event.get('LOCATION', None)
+ if loc:
+ dct['description'] += u"<br/>{}".format(unicode(loc))
+ url = event.get('URL', None)
+ if url:
+ dct['description'] += u"<br/><a href='{}'>{}</a>".format(
+ unicode(url), unicode(_(u'Link')))
+ dct['start_date'] = event.get('DTSTART', None)
+ if dct['start_date']:
+ dct['start_date'] = event.decoded('DTSTART')
+ dct['end_date'] = event.get('DTEND', None)
+ if dct['end_date']:
+ dct['end_date'] = event.decoded('DTEND')
+ point = event.get('GEO', None)
+ if point:
+ dct['point'] = 'SRID=4326;POINT(%s %s)' % (point.longitude,
+ point.latitude)
+
+ if not dct.get('point', None):
+ continue
+
+ cls = Marker
+ pl_id = event.get('UID', None)
+ if not pl_id:
+ pl_id = dct['name'] + "-" + unicode(self.importer_instance.pk)
+ pl_id += "-" + unicode(self.importer_instance.pk)
+ it, updated, created = self.create_or_update_item(cls, dct, pl_id)
+ if updated:
+ updated_item += 1
+ if created:
+ new_item += 1
+ return (new_item, updated_item, msg)