#!/usr/bin/env python # -*- coding: utf-8 -*- # Copyright (C) 2012 Étienne Loks # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as # published by the Free Software Foundation, either version 3 of the # License, or (at your option) any later version. # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # You should have received a copy of the GNU General Public License # along with this program. If not, see . # See the file COPYING for details. """ Utilitaries """ import datetime import os import re import StringIO import tempfile import urllib2 import unicodedata import zipfile from osgeo import ogr, osr from lxml import etree from django.conf import settings from django.contrib.gis.gdal import DataSource, OGRGeomType, check_err from django.core.exceptions import ObjectDoesNotExist from django.shortcuts import render_to_response from django.utils.translation import ugettext_lazy as _ from chimere import get_version from external_utils import OsmApi def unicode_normalize(string): return ''.join( (c for c in unicodedata.normalize('NFD', string) if unicodedata.category(c) != 'Mn')) class ImportManager: u""" Generic class for specific importers """ default_source = None def __init__(self, importer_instance): self.importer_instance = importer_instance self.default_name = " - ".join([cat.name for cat in self.importer_instance.categories.order_by('name').all()]) def get(self): pass def put(self): pass def create_or_update_item(self, cls, values, import_key, version=None): updated, created, item = False, False, None if import_key: dct_import = { 'import_key__icontains':'%s:%s;' % ( self.importer_instance.importer_type, import_key), 'import_source':self.importer_instance.source} try: item = cls.objects.get(**dct_import) if version and item.import_version == int(version): # no update since the last import return item, None, None for k in values: setattr(item, k, values[k]) item.save() updated = True except ObjectDoesNotExist: pass if not item: values.update({ 'import_source':self.importer_instance.source}) values['status'] = 'I' item = cls.objects.create(**values) created = True if import_key: item.set_key(self.importer_instance.importer_type, import_key) item.categories.clear() for cat in self.importer_instance.categories.all(): item.categories.add(cat) return item, updated, created @classmethod def get_files_inside_zip(cls, zippedfile, suffixes, dest_dir=None): try: flz = zipfile.ZipFile(zippedfile) except zipfile.BadZipfile: return [], _(u"Bad zip file") namelist = flz.namelist() filenames = [] for suffix in suffixes: current_file_name = None for name in namelist: if name.endswith(suffix) \ or name.endswith(suffix.lower()) \ or name.endswith(suffix.upper()): current_file_name = name filenames.append(current_file_name) files = [] for filename in filenames: if filename: if dest_dir: files.append(filename) flz.extract(filename, dest_dir) else: files.append(flz.open(filename)) else: files.append(None) return files def get_source_file(self, source, suffixes, dest_dir=None, extra_url=None): if not hasattr(source, 'read'): if not source: source = self.importer_instance.source \ if self.importer_instance.source else self.default_source try: url = source if extra_url: url += extra_url remotehandle = urllib2.urlopen(url) source = StringIO.StringIO(remotehandle.read()) remotehandle.close() except ValueError: # assume it is a local file try: source = open(source) except IOError, msg: return (None, msg) except urllib2.URLError as error: return (None, error.message) if self.importer_instance.zipped: try: files = self.get_files_inside_zip(source, suffixes, dest_dir) except zipfile.BadZipfile: return (None, _(u"Bad zip file")) if not files or None in files: return (None, _(u"Missing file(s) inside the zip file")) source = files[0] if len(suffixes) == 1 else files return (source, None) class KMLManager(ImportManager): u""" KML importer The filtr argument has to be defined as the exact name of the folder to be imported """ XPATH = '//kml:Folder/kml:name[text()="%s"]/../kml:Placemark' DEFAULT_XPATH = '//kml:Placemark' def __init__(self, importer_instance, ns=''): self.importer_instance = importer_instance self.ns = ns def get(self, source=None): u""" Get data from the source Args: - source (None): input file if not provided get it from the distant source provided in the importer instance. Return a tuple with: - number of new item ; - number of item updated ; - error detail on error """ from models import Marker new_item, updated_item, msg = 0, 0, '' source, msg = self.get_source_file(source, ['.kml']) if msg: return (0, 0, msg) doc = source # remove empty lines before declaration (bad XML file) if hasattr(source, 'getvalue'): splitted = source.getvalue().split('\n') for idx, line in enumerate(splitted): if line.strip(): break doc = StringIO.StringIO("\n".join(splitted[idx:])) tree = etree.parse(doc) # try to get default namespace if not self.ns: self.ns = tree.getroot().nsmap[None] xpath = self.XPATH % self.importer_instance.filtr \ if self.importer_instance.filtr else self.DEFAULT_XPATH for placemark in tree.xpath(xpath, namespaces={'kml':self.ns}): name, point = None, None pl_id = placemark.attrib.get('id') pl_key = 'kml-%d' % self.importer_instance.pk ns = '{%s}' % self.ns for item in placemark: if item.tag == ns + 'name': name = item.text elif item.tag == ns + 'description': description = item.text elif item.tag == ns + 'Point': for coord in item: if coord.tag == ns + 'coordinates': x, y, z = coord.text.split(',') point = 'SRID=4326;POINT(%s %s)' % (x, y) if point: dct = {'point':point, 'description':description, 'name':name,} m = None if pl_id: dct_import = { 'import_key__icontains':'%s:%s;' % (pl_key, pl_id), 'import_source':self.importer_instance.source} try: m = Marker.objects.get(**dct_import) for k in dct: setattr(m, k, dct[k]) m.save() updated_item += 1 except ObjectDoesNotExist: m = None dct.update({ 'import_source':self.importer_instance.source}) if not m: dct['status'] = 'I' m = Marker.objects.create(**dct) new_item += 1 if pl_id: m.set_key(pl_key, pl_id) m.categories.clear() for cat in self.importer_instance.categories.all(): m.categories.add(cat) return (new_item, updated_item, msg) @classmethod def export(cls, queryset): dct = {'name':settings.PROJECT_NAME, 'description':unicode(datetime.date.today()), 'locations':queryset.all() } filename = unicode_normalize(settings.PROJECT_NAME + dct['description']\ + '.kml') result = render_to_response('chimere/export.kml', dct) return filename, result class ShapefileManager(ImportManager): u""" Shapefile importer """ def get(self, source=None): u""" Get data from the source Args: - source (None): input file if not provided get it from the distant source provided in the importer instance. Return a tuple with: - number of new item ; - number of item updated ; - error detail on error """ from models import Marker, Route new_item, updated_item, msg = 0, 0, '' tmpdir = tempfile.mkdtemp() sources, msg = self.get_source_file(source, ['.shp', '.dbf', '.prj', '.shx'], dest_dir=tmpdir) if msg: return (0, 0, msg) if not sources: return (0, 0, _(u"Error while reading the data source.")) # get the srid srid = self.importer_instance.srid if not srid: prjfilename = tmpdir + os.sep + sources[2] try: from osgeo import osr with open(prjfilename, 'r') as prj_file: prj_txt = prj_file.read() srs = osr.SpatialReference() srs.ImportFromESRI([prj_txt]) srs.AutoIdentifyEPSG() srid = srs.GetAuthorityCode(None) except ImportError: pass if not srid: # try with the default projection srid = settings.CHIMERE_EPSG_DISPLAY_PROJECTION shapefilename = tmpdir + os.sep + sources[0] ds = DataSource(shapefilename) lyr = ds[0] # for this first version it is assumed that the first field is a # id name and the second field is the name id_name = lyr.fields[0] if len(lyr.fields) > 0 else None # test if id_name is well guess if id_name: ids = lyr.get_fields(id_name) if len(ids) != len(set(ids)): id_name = None lbl_name = None if len(lyr.fields) > 1: lbl_name = lyr.fields[1] elif id_name: lbl_name = id_name if lyr.geom_type not in ('Point', 'LineString'): return (0, 0, _(u"Type of geographic item of this shapefile " u"is not managed by Chimère.")) geom_key = 'point' if lyr.geom_type == 'Point' else 'route' geom_cls = Marker if lyr.geom_type == 'Point' else Route indexes = [] for idx, feat in enumerate(lyr): name = unicode(idx) if lbl_name: name = feat.get(lbl_name) try: name = unicode(name) except UnicodeDecodeError: try: name = unicode( name.decode(settings.CHIMERE_SHAPEFILE_ENCODING)) except: continue geom = feat.geom.wkt dct = {geom_key:'SRID=%s;%s' % (srid, feat.geom.wkt), 'name':name } import_key = feat.get(id_name) if id_name else '' item, updated, created = self.create_or_update_item( geom_cls, dct, import_key) if updated: updated_item += 1 if created: new_item += 1 """ m = None if id_name: c_id = feat.get(id_name) dct_import = { 'import_key__icontains':'%s:%s;' % (id_name, c_id), 'import_source':self.importer_instance.source} try: m = Marker.objects.get(**dct_import) for k in dct: setattr(m, k, dct[k]) m.save() updated_item += 1 except ObjectDoesNotExist: m = None dct.update({ 'import_source':self.importer_instance.source}) if not m: dct['status'] = 'I' m = Marker.objects.create(**dct) new_item += 1 if id_name: m.set_key(id_name, c_id) m.categories.clear() for cat in self.importer_instance.categories.all(): m.categories.add(cat)""" # clean up tmpdirs = set() for src in sources: dirs = os.sep.join(src.split(os.sep)[:-1]) if dirs: tmpdirs.add(tmpdir + os.sep + dirs) os.remove(tmpdir + os.sep + src) for dr in tmpdirs: os.removedirs(dr) return (new_item, updated_item, msg) @classmethod def export(cls, queryset): date = unicode(datetime.date.today()) tmp = tempfile.NamedTemporaryFile(suffix='.shp', mode='w+b') tmp.close() tmp_name = tmp.name field_names = [field.name for field in queryset.model._meta.fields] geo_field = getattr(queryset.model, 'point' if 'point' in field_names else 'route')._field dr = ogr.GetDriverByName('ESRI Shapefile') ds = dr.CreateDataSource(tmp_name) if ds is None: raise Exception(_(u'Could not create file!')) ogr_type = OGRGeomType(geo_field.geom_type).num srs = osr.SpatialReference() srs.ImportFromEPSG(geo_field.srid) layer = ds.CreateLayer('lyr', srs=srs, geom_type=ogr_type) for field_name in ('name', 'category'): field_defn = ogr.FieldDefn(str(field_name), ogr.OFTString) field_defn.SetWidth(255) if layer.CreateField(field_defn) != 0: raise Exception(_(u'Failed to create field')) feature_def = layer.GetLayerDefn() for item in queryset: # duplicate items when in several categories for category in item.categories.all(): feat = ogr.Feature(feature_def) feat.SetField('name', str(unicode_normalize(item.name)[:80])) feat.SetField('category', str(unicode_normalize(category.name)[:80])) geom = getattr(item, geo_field.name) if not geom: continue ogr_geom = ogr.CreateGeometryFromWkt(geom.wkt) check_err(feat.SetGeometry(ogr_geom)) check_err(layer.CreateFeature(feat)) # Cleaning up ds.Destroy() # writing to a zip file filename = unicode_normalize(settings.PROJECT_NAME) + '-' + date buff = StringIO.StringIO() zip_file = zipfile.ZipFile(buff, 'w', zipfile.ZIP_DEFLATED) suffixes = ['shp', 'shx', 'prj', 'dbf'] for suffix in suffixes: name = tmp_name.replace('.shp', '.' + suffix) arcname = '.'.join((filename, suffix)) zip_file.write(name, arcname=arcname) zip_file.close() buff.flush() zip_stream = buff.getvalue() buff.close() return filename, zip_stream RE_NODE = re.compile('node\[([^\]]*)\]') # manage deleted item from OSM class OSMManager(ImportManager): u""" OSM importer/exporter The source url is a path to an OSM file or a XAPI url The filtr argument is XAPI args or empty if it is an OSM file. """ default_source = settings.CHIMERE_XAPI_URL def get(self, source=None): u""" Get data from the source Args: - source (None): input file if not provided get it from the distant source provided in the importer instance. Return a tuple with: - new items; - updated items; - error detail on error. """ source, msg = self.get_source_file(source, ['.osm'], extra_url=self.importer_instance.filtr) if not source: return (0, 0, msg) tree = etree.parse(source) # only import node or ways if tree.xpath('count(//way)') and tree.xpath('count(//node)'): return self.import_ways(tree) elif tree.xpath('count(//node)'): return self.import_nodes(tree) return 0, 0, _(u"Nothing to import") def import_ways(self, tree): from chimere.models import Marker, Route msg, items, new_item, updated_item = "", [], 0 , 0 nodes = {} for node in tree.xpath('//node'): node_id = node.attrib.get('id') for item in node: k = item.attrib.get('k') if node_id: nodes[node_id] = '%s %s' % (node.get('lon'), node.get('lat')) for way in tree.xpath('//way'): name = None points = [] node_id = way.attrib.get('id') version = way.attrib.get('version') for item in way: k = item.attrib.get('k') if k == 'name': name = item.attrib.get('v') if item.tag == 'nd': points.append(item.get('ref')) if not name: name = self.default_name if not points: continue wkt = 'SRID=4326;LINESTRING(%s)' % ",".join([nodes[point_id] for point_id in points if point_id in nodes]) dct = {'route':wkt, 'name':name, 'import_version':version} item, updated, created = self.create_or_update_item( Route, dct, node_id, version) if updated: updated_item += 1 if created: new_item += 1 items.append(item) return new_item, updated_item, msg def import_nodes(self, tree): from chimere.models import Marker msg, items, new_item, updated_item = "", [], 0 , 0 for node in tree.xpath('//node'): name = None node_id = node.attrib.get('id') if not node_id: continue version = node.attrib.get('version') for item in node: k = item.attrib.get('k') if k == 'name': name = item.attrib.get('v') if not name: name = self.default_name point = 'SRID=4326;POINT(%s %s)' % (node.get('lon'), node.get('lat')) dct = {'point':point, 'name':name, 'import_version':version} item, updated, created = self.create_or_update_item( Marker, dct, node_id, version) if updated: updated_item += 1 if created: new_item += 1 items.append(item) return (new_item, updated_item, msg) def put(self): # first of all: reimport in order to verify that no changes has been # made since the last import from models import Marker new_item, updated_item, msg = self.get() # check if import is possible if msg: return 0, msg if new_item: return 0, _(u"New items imported - validate them before exporting") if Marker.objects.filter(status='I').count(): return 0, _(u"There are items from a former import not yet " u"validated - validate them before exporting") # start import api = OsmApi.OsmApi(api=settings.CHIMERE_OSM_API_URL, username=settings.CHIMERE_OSM_USER, password=settings.CHIMERE_OSM_PASSWORD) api.ChangesetCreate({u"comment": u"Import from Chimère %s" % \ get_version()}) tag = RE_NODE.finddall(self.importer_instance.filtr) if not tag: return 0, _(u"Bad param") tag = tag[0].split('=') default_dct = {'tag':{tag[0]:tag[1]}, 'import_source':self.importer_instance.source} for idx, item in Marker.objects.filter(status='A', categories=self.importer_instance.categories.all()): dct = default_dct.update({ 'name':item.name, 'lon':item.point.lon, 'lat':item.point.lat}) node = None import_key = marker.get_key('OSM') if not import_key: node = OsmApi.NodeCreate(dct) item.set_key('OSM', node['id']) else: dct['id'] = import_key node = OsmApi.NodeUpdate(dct) item.import_version = node['version'] item.save() api.ChangesetClose() return idx+1, None