summaryrefslogtreecommitdiff
path: root/chimere/utils.py
diff options
context:
space:
mode:
Diffstat (limited to 'chimere/utils.py')
-rw-r--r--chimere/utils.py516
1 files changed, 300 insertions, 216 deletions
diff --git a/chimere/utils.py b/chimere/utils.py
index 790fd56..8066255 100644
--- a/chimere/utils.py
+++ b/chimere/utils.py
@@ -22,14 +22,14 @@ Utilitaries
"""
import csv
+import collections
import datetime
import feedparser
-import simplejson as json
+import json
import os
import re
import StringIO
import tempfile
-from urllib import urlencode
import urllib2
import unicodedata
import zipfile
@@ -47,26 +47,30 @@ from django.utils.translation import ugettext_lazy as _
from chimere import get_version
from external_utils import OsmApi
+
def unicode_normalize(string):
if type(string) == str:
string = unicode(string.decode('utf-8'))
return ''.join(
(c for c in unicodedata.normalize('NFD', string)
- if unicodedata.category(c) != 'Mn'))
+ if unicodedata.category(c) != 'Mn'))
+
class ImportManager(object):
u"""
Generic class for specific importers
"""
default_source = None
+
def __init__(self, importer_instance):
self.importer_instance = importer_instance
if self.importer_instance.default_name:
self.default_name = self.importer_instance.default_name
else:
- self.default_name = " - ".join([cat.name
+ self.default_name = " - ".join([
+ cat.name
for cat in self.importer_instance.categories.order_by(
- 'name').all()])
+ 'name').all()])
def get(self):
raise NotImplementedError
@@ -85,8 +89,8 @@ class ImportManager(object):
item = None
if import_key or pk:
dct_import = {
- 'import_key__icontains':'%s:%s;' % (key, import_key),
- 'import_source':self.importer_instance.source}
+ 'import_key__icontains': '%s:%s;' % (key, import_key),
+ 'import_source': self.importer_instance.source}
ref_item = cls.objects.filter(**dct_import)
try:
item = None
@@ -102,7 +106,7 @@ class ImportManager(object):
return ref_item, None, None
if not self.importer_instance.overwrite \
and ref_item.modified_since_import:
- dct_import['ref_item'] = ref_item
+ return ref_item, None, None
else:
item = ref_item
for k in values:
@@ -123,16 +127,17 @@ class ImportManager(object):
if not self.importer_instance.get_description and \
self.importer_instance.default_description:
values['description'] = \
- self.importer_instance.default_description
+ self.importer_instance.default_description
values.update({
- 'import_source':self.importer_instance.source})
- values['status'] = 'I' \
- if not self.importer_instance.automatic_update else 'A'
+ 'import_source': self.importer_instance.source})
+ values['status'] = self.importer_instance.default_status
if not self.importer_instance.associate_marker_to_way\
- and cls.__name__ == 'Route':
+ and cls.__name__ == 'Route':
values['has_associated_marker'] = False
try:
item = cls.objects.create(**values)
+ item.modified_since_import = False
+ item.save()
except TypeError:
# error on data source
return None, False, False
@@ -159,8 +164,8 @@ class ImportManager(object):
current_file_name = None
for name in namelist:
if name.endswith(suffix) \
- or name.endswith(suffix.lower()) \
- or name.endswith(suffix.upper()):
+ or name.endswith(suffix.lower()) \
+ or name.endswith(suffix.upper()):
current_file_name = name
filenames.append(current_file_name)
files = []
@@ -181,7 +186,7 @@ class ImportManager(object):
if not hasattr(source, 'read'):
if not source:
source = self.importer_instance.source \
- if self.importer_instance.source else self.default_source
+ if self.importer_instance.source else self.default_source
try:
url = source
if extra_url:
@@ -208,6 +213,7 @@ class ImportManager(object):
source = files[0] if len(suffixes) == 1 else files
return (source, None)
+
class KMLManager(ImportManager):
u"""
KML importer
@@ -216,6 +222,7 @@ class KMLManager(ImportManager):
"""
XPATH = '//kml:Folder/kml:name[text()="%s"]/../kml:Placemark'
DEFAULT_XPATH = '//kml:Placemark'
+
def __init__(self, importer_instance, ns=''):
super(KMLManager, self).__init__(importer_instance)
self.ns = ns
@@ -250,9 +257,9 @@ class KMLManager(ImportManager):
if not self.ns:
self.ns = tree.getroot().nsmap[None]
xpath = self.XPATH % self.importer_instance.filtr \
- if self.importer_instance.filtr else self.DEFAULT_XPATH
+ if self.importer_instance.filtr else self.DEFAULT_XPATH
for placemark in tree.xpath(xpath,
- namespaces={'kml':self.ns}):
+ namespaces={'kml': self.ns}):
name, point, line = None, None, None
pl_id = placemark.attrib.get('id')
pl_key = 'kml-%d' % self.importer_instance.pk
@@ -280,10 +287,10 @@ class KMLManager(ImportManager):
for p in points if p])
line = 'SRID=4326;LINESTRING(%s)' % points
cls = None
- dct = {'description':description,
- 'name':name,
- 'origin':self.importer_instance.origin,
- 'license':self.importer_instance.license}
+ dct = {'description': description,
+ 'name': name,
+ 'origin': self.importer_instance.origin,
+ 'license': self.importer_instance.license}
if point:
dct['point'] = point
cls = Marker
@@ -293,7 +300,7 @@ class KMLManager(ImportManager):
cls = Route
if cls:
item, updated, created = self.create_or_update_item(
- cls, dct, pl_id, key=pl_key)
+ cls, dct, pl_id, key=pl_key)
if updated:
updated_item += 1
if created:
@@ -302,15 +309,17 @@ class KMLManager(ImportManager):
@classmethod
def export(cls, queryset):
- dct = {'name':settings.PROJECT_NAME,
- 'description':unicode(datetime.date.today()),
- 'locations':queryset.all()
+ dct = {
+ 'name': settings.PROJECT_NAME,
+ 'description': unicode(datetime.date.today()),
+ 'locations': queryset.all()
}
- filename = unicode_normalize(settings.PROJECT_NAME + dct['description']\
+ filename = unicode_normalize(settings.PROJECT_NAME + dct['description']
+ '.kml')
result = render_to_response('chimere/export.kml', dct)
return filename, result
+
class ShapefileManager(ImportManager):
u"""
Shapefile importer
@@ -352,7 +361,7 @@ class ShapefileManager(ImportManager):
srid = settings.CHIMERE_EPSG_DISPLAY_PROJECTION
msg = _(u"SRID cannot be guessed. The default SRID (%s) has "
u"been used.") % srid
- #If imported items are not well located "
+ # If imported items are not well located "
# u"ask your data provider for the SRID to use.") % srid
shapefilename = tmpdir + os.sep + sources[0]
ds = DataSource(shapefilename)
@@ -375,7 +384,7 @@ class ShapefileManager(ImportManager):
u"is not managed by Chimère.") % lyr.geom_type)
geom_key = 'point' if lyr.geom_type == 'Point' else 'route'
geom_cls = Marker if lyr.geom_type == 'Point' else Route
- indexes = []
+ # indexes = []
for idx, feat in enumerate(lyr):
name = unicode(idx)
if lbl_name:
@@ -385,7 +394,7 @@ class ShapefileManager(ImportManager):
except UnicodeDecodeError:
try:
name = unicode(
- name.decode(settings.CHIMERE_SHAPEFILE_ENCODING))
+ name.decode(settings.CHIMERE_SHAPEFILE_ENCODING))
except:
continue
try:
@@ -394,15 +403,17 @@ class ShapefileManager(ImportManager):
return (0, 0, _(u"Bad Shapefile"))
if feat.geom.geom_type == 'MultiLineString':
geoms = [geom.wkt for geom in feat.geom]
- import_key = feat.get(id_name) if id_name and len(geoms) == 1 else ''
+ import_key = feat.get(id_name) if id_name and len(geoms) == 1 \
+ else ''
for geom in geoms:
- dct = {geom_key:'SRID=%s;%s' % (srid, geom),
- 'name':name,
- 'origin':self.importer_instance.origin,
- 'license':self.importer_instance.license
- }
+ dct = {
+ geom_key: 'SRID=%s;%s' % (srid, geom),
+ 'name': name,
+ 'origin': self.importer_instance.origin,
+ 'license': self.importer_instance.license
+ }
item, updated, created = self.create_or_update_item(
- geom_cls, dct, import_key)
+ geom_cls, dct, import_key)
if updated:
updated_item += 1
if created:
@@ -427,8 +438,9 @@ class ShapefileManager(ImportManager):
tmp_name = tmp.name
field_names = [field.name for field in queryset.model._meta.fields]
- geo_field = getattr(queryset.model,
- 'point' if 'point' in field_names else 'route')._field
+ geo_field = getattr(
+ queryset.model,
+ 'point' if 'point' in field_names else 'route')._field
dr = ogr.GetDriverByName('ESRI Shapefile')
ds = dr.CreateDataSource(tmp_name)
@@ -454,7 +466,7 @@ class ShapefileManager(ImportManager):
feat = ogr.Feature(feature_def)
feat.SetField('name', str(unicode_normalize(item.name)[:80]))
feat.SetField('category',
- str(unicode_normalize(category.name)[:80]))
+ str(unicode_normalize(category.name)[:80]))
geom = getattr(item, geo_field.name)
if not geom:
@@ -480,6 +492,7 @@ class ShapefileManager(ImportManager):
buff.close()
return filename, zip_stream
+
class CSVManager(ImportManager):
u"""
CSV importer
@@ -490,9 +503,8 @@ class CSVManager(ImportManager):
# (label, getter, setter)
COLS = [("Id", 'pk', 'pk'), (_(u"Name"), 'name', 'name'),
- (_(u"Categories"), lambda obj:", ".join(
- [c.name for c in obj.categories.all()]),
- set_categories),
+ (_(u"Categories"), lambda obj: ", ".join(
+ [c.name for c in obj.categories.all()]), set_categories),
(_(u"State"), 'status', lambda x: x),
(_(u"Description"), 'description', 'description'),
(_(u"Localisation"), 'geometry', 'geometry')]
@@ -512,40 +524,32 @@ class CSVManager(ImportManager):
if msg:
return (0, 0, msg)
reader = csv.reader(source, delimiter=';', quotechar='"')
- prop_cols, nominatim_fields = [], {}
- reverse_nominatim_dct = dict((v, k)
- for k, v in settings.CHIMERE_NOMINATIM_FIELDS.iteritems())
- nominatim_default_query = settings.CHIMERE_NOMINATIM_FIELDS
- for idx, pm in enumerate(Marker.all_properties()):
+ prop_cols = []
+ for pm in Marker.all_properties():
prop_cols.append((pm.name, pm.getAttrName(),
- pm.getAttrName()+'_set'))
- if settings.CHIMERE_NOMINATIM_FIELDS and \
- pm.slug in reverse_nominatim_dct:
- nominatim_fields[idx+len(self.COLS)] = \
- reverse_nominatim_dct[pm.slug]
- nominatim_default_query.pop(reverse_nominatim_dct[pm.slug])
+ pm.getAttrName() + '_set'))
cols = list(self.COLS) + prop_cols
- datas = []
+ # datas = []
for idx, row in enumerate(reader):
- if not idx: # first row
+ if not idx: # first row
try:
assert(len(row) >= len(cols))
except AssertionError:
- return (0, 0, _(u"Invalid CSV format - not enough columns "
- u"check a reference CSV file"))
+ return (0, 0, _(u"Invalid CSV format"))
continue
if len(row) < len(cols):
continue
- pk, name, cats, state = row[0], row[1], row[2], row[3]
+ # pk, name, cats, state = row[0], row[1], row[2], row[3]
+ pk, name = row[0], row[1]
geom = row[5]
description = ''
if self.importer_instance.get_description:
description = row[4]
COL_INDEX = 6
- dct = {'description':description,
- 'name':name,
- 'origin':self.importer_instance.origin,
- 'license':self.importer_instance.license}
+ dct = {'description': description,
+ 'name': name,
+ 'origin': self.importer_instance.origin,
+ 'license': self.importer_instance.license}
cls = None
if 'POINT' in geom:
cls = Marker
@@ -553,27 +557,11 @@ class CSVManager(ImportManager):
elif 'LINE' in geom:
cls = Route
dct['route'] = geom
- elif settings.CHIMERE_NOMINATIM_FIELDS:
- nominatim_query = settings.NOMINATIM_URL + "?"
- nominatim_keys = nominatim_default_query.copy()
- nominatim_keys['format'] = 'json'
- for idx in nominatim_fields:
- nominatim_keys[nominatim_fields[idx]] = row[idx]
- nominatim_query += urlencode(nominatim_keys)
- remotehandle = urllib2.urlopen(nominatim_query)
- result = StringIO.StringIO(remotehandle.read())
- remotehandle.close()
- result = json.load(result)
- if not result:
- continue
- result = result[0]
- cls = Marker
- dct['point'] = "POINT(%s %s)" % (result['lon'], result['lat'])
else:
continue
import_key = pk if pk else name.decode('utf-8')
- item, updated, created = self.create_or_update_item(cls, dct,
- import_key, pk=pk)
+ item, updated, created = self.create_or_update_item(
+ cls, dct, import_key, pk=pk)
if updated:
updated_item += 1
if created:
@@ -581,19 +569,17 @@ class CSVManager(ImportManager):
for idx, col in enumerate(cols[COL_INDEX:]):
name, getter, setter_val = col
setter = getattr(item, setter_val)
- val = row[idx+COL_INDEX]
+ val = row[idx + COL_INDEX]
setter(item, val)
return (new_item, updated_item, msg)
@classmethod
- def export(cls, queryset, cols=[]):
- dct = {'description':unicode(datetime.date.today()), 'data':[]}
- cls_name = queryset.model.__name__.lower()
- if not cols:
- cols = list(cls.COLS)
- if hasattr(queryset.model, 'all_properties'):
- for pm in queryset.model.all_properties():
- cols.append((pm.name, pm.getAttrName(), pm.getAttrName()+'_set'))
+ def export(cls, queryset):
+ dct = {'description': unicode(datetime.date.today()), 'data': []}
+ # cls_name = queryset.model.__name__.lower()
+ cols = list(cls.COLS)
+ for pm in queryset.model.all_properties():
+ cols.append((pm.name, pm.getAttrName(), pm.getAttrName() + '_set'))
header = [col[0] for col in cols]
dct['data'].append(header)
for item in queryset.all():
@@ -602,16 +588,14 @@ class CSVManager(ImportManager):
if callable(attr):
data.append(attr(item))
else:
- v = getattr(item, attr)
- if v == None:
- v = ''
- data.append(v)
+ data.append(getattr(item, attr))
dct['data'].append(data)
- filename = unicode_normalize(settings.PROJECT_NAME + dct['description']\
+ filename = unicode_normalize(settings.PROJECT_NAME + dct['description']
+ '.csv')
result = render_to_response('chimere/export.csv', dct)
return filename, result
+
class GeoRSSManager(ImportManager):
u"""
RSS importer.
@@ -627,19 +611,19 @@ class GeoRSSManager(ImportManager):
- number of item updated ;
- error detail on error
"""
- from models import Marker
+ from models import Marker, Route
new_item, updated_item, msg = 0, 0, ''
feed = feedparser.parse(self.importer_instance.source)
- if feed['bozo'] and not isinstance(feed['bozo_exception'],
- feedparser.CharacterEncodingOverride):
+ if feed['bozo'] and not isinstance(
+ feed['bozo_exception'], feedparser.CharacterEncodingOverride):
return (0, 0, _(u"RSS feed is not well formed"))
for item in feed['items']:
if "georss_point" not in item and 'georss_line' not in item \
and not ("geo_lat" in item and "geo_long" in item):
continue
cls = None
- dct = {'origin':self.importer_instance.origin,
- 'license':self.importer_instance.license}
+ dct = {'origin': self.importer_instance.origin,
+ 'license': self.importer_instance.license}
if 'georss_point' in item or "geo_lat" in item:
cls = Marker
if 'georss_point' in item:
@@ -661,11 +645,11 @@ class GeoRSSManager(ImportManager):
points = item['georss_line'].split(' ')
reordered_points = []
# lat, lon -> x, y
- for idx in xrange(len(points)/2):
- reordered_points.append("%s %s" % (points[idx*2+1],
- points[idx*2]))
+ for idx in xrange(len(points) / 2):
+ reordered_points.append("%s %s" % (points[idx * 2 + 1],
+ points[idx * 2]))
dct['route'] = 'SRID=4326;LINESTRING(%s)' % \
- ",".join(reordered_points)
+ ",".join(reordered_points)
dct['name'] = item['title']
pl_id = item['id'] if 'id' in item else item['title']
@@ -676,10 +660,101 @@ class GeoRSSManager(ImportManager):
new_item += 1
return (new_item, updated_item, msg)
+
+class JsonManager(ImportManager):
+ u"""
+ Json importer.
+ This manager only gets and do not produce Json feed
+ """
+
+ def get(self):
+ u"""
+ Get data from a json simple source
+
+ Return a tuple with:
+ - number of new item ;
+ - number of item updated ;
+ - error detail on error
+ """
+ from models import Marker
+ new_item, updated_item, msg = 0, 0, ''
+ source, msg = self.get_source_file(['.json'])
+ if msg:
+ return (0, 0, msg)
+
+ vals = source.read().replace('\n', ' ')
+ try:
+ values = json.JSONDecoder(
+ object_pairs_hook=collections.OrderedDict).decode(vals)
+ except ValueError as e:
+ return (new_item, updated_item,
+ _(u"JSON file is not well formed: " + e.message))
+ # configuration in filtr
+ try:
+ filtr = json.JSONDecoder().decode(self.importer_instance.filtr)
+ except ValueError:
+ return (
+ new_item, updated_item,
+ _(u"Bad configuration: filter field must be a valid "
+ u"JSON string"))
+
+ vls = filtr.values()
+ for k in ('name', 'id', 'description'):
+ if k not in vls:
+ return (
+ new_item, updated_item,
+ _(u"A key must be associated to \"%s\" in the "
+ u"filter.") % k)
+
+ default_dct = {'origin': self.importer_instance.origin,
+ 'license': self.importer_instance.license}
+ if 'prefix_name' in filtr:
+ default_dct['name'] = filtr.pop('prefix_name')
+ if 'prefix_description' in filtr:
+ default_dct['description'] = filtr.pop('prefix_description')
+ if self.importer_instance.default_localisation:
+ default_dct['point'] = self.importer_instance.default_localisation
+
+ for item in values:
+ dct = default_dct.copy()
+ for k in filtr:
+ if k in item and item[k]:
+ if filtr[k] not in dct:
+ dct[filtr[k]] = ""
+ else:
+ if filtr[k] == 'description':
+ dct[filtr[k]] += "<br/>"
+ else:
+ dct[filtr[k]] += " "
+ dct[filtr[k]] += item[k]
+ if 'point' in item:
+ x, y = item['point'].split(",")
+ dct['point'] = 'SRID=4326;POINT(%s %s)' % (x, y)
+ elif 'lat' in item and item['lat'] \
+ and 'lon' in item and item['lon']:
+ dct['point'] = 'SRID=4326;POINT(%s %s)' % (item['lon'],
+ item['lat'])
+ elif 'x' in item and item['x'] \
+ and 'y' in item and item['y']:
+ dct['point'] = 'SRID=4326;POINT(%s %s)' % (item['x'],
+ item['y'])
+ if not dct['point']:
+ continue
+ cls = Marker
+ pl_id = (dct.pop('id') if 'id' in dct else dct['name']) \
+ + "-" + unicode(self.importer_instance.pk)
+ it, updated, created = self.create_or_update_item(cls, dct, pl_id)
+ if updated:
+ updated_item += 1
+ if created:
+ new_item += 1
+ return (new_item, updated_item, msg)
+
RE_HOOK = re.compile('\[([^\]]*)\]')
# TODO: manage deleted item from OSM
+
class OSMManager(ImportManager):
u"""
OSM importer/exporter
@@ -697,8 +772,8 @@ class OSMManager(ImportManager):
- updated items;
- error detail on error.
"""
- source, msg = self.get_source_file(['.osm'],
- extra_url=self.importer_instance.filtr)
+ source, msg = self.get_source_file(
+ ['.osm'], extra_url=self.importer_instance.filtr)
if not source:
return (0, 0, msg)
@@ -711,8 +786,8 @@ class OSMManager(ImportManager):
return 0, 0, _(u"Nothing to import")
def import_ways(self, tree):
- from chimere.models import Marker, Route
- msg, items, new_item, updated_item = "", [], 0 , 0
+ from chimere.models import Route
+ msg, items, new_item, updated_item = "", [], 0, 0
nodes = {}
for node in tree.xpath('//node'):
node_id = node.attrib.get('id')
@@ -734,17 +809,17 @@ class OSMManager(ImportManager):
points.append(item.get('ref'))
if not points:
continue
- wkt = 'SRID=4326;LINESTRING(%s)' % ",".join([nodes[point_id]
- for point_id in points if point_id in nodes])
- dct = {'route':wkt,
- 'name':name,
- 'origin':self.importer_instance.origin \
- or u'OpenStreetMap.org',
- 'license':self.importer_instance.license \
- or u'ODbL',
- 'import_version':version}
+ wkt = 'SRID=4326;LINESTRING(%s)' % ",".join(
+ [nodes[point_id] for point_id in points if point_id in nodes])
+ dct = {'route': wkt,
+ 'name': name,
+ 'origin': self.importer_instance.origin
+ or u'OpenStreetMap.org',
+ 'license': self.importer_instance.license
+ or u'ODbL',
+ 'import_version': version}
item, updated, created = self.create_or_update_item(
- Route, dct, node_id, version)
+ Route, dct, node_id, version)
if updated:
updated_item += 1
if created:
@@ -754,7 +829,7 @@ class OSMManager(ImportManager):
def import_nodes(self, tree):
from chimere.models import Marker
- msg, items, new_item, updated_item = "", [], 0 , 0
+ msg, items, new_item, updated_item = "", [], 0, 0
for node in tree.xpath('//node'):
name = None
node_id = node.attrib.get('id')
@@ -767,15 +842,15 @@ class OSMManager(ImportManager):
name = item.attrib.get('v')
point = 'SRID=4326;POINT(%s %s)' % (node.get('lon'),
node.get('lat'))
- dct = {'point':point,
- 'name':name,
- 'origin':self.importer_instance.origin \
- or u'OpenStreetMap.org',
- 'license':self.importer_instance.license \
- or u'ODbL',
- 'import_version':version}
+ dct = {'point': point,
+ 'name': name,
+ 'origin': self.importer_instance.origin
+ or u'OpenStreetMap.org',
+ 'license': self.importer_instance.license
+ or u'ODbL',
+ 'import_version': version}
item, updated, created = self.create_or_update_item(
- Marker, dct, node_id, version)
+ Marker, dct, node_id, version)
if updated:
updated_item += 1
if created:
@@ -810,8 +885,8 @@ class OSMManager(ImportManager):
username = username.encode('latin1')
password = password.encode('latin1')
api = OsmApi.OsmApi(api=api, username=username, password=password)
- api.ChangesetCreate({u"comment": u"Import from Chimère %s" % \
- get_version()})
+ api.ChangesetCreate({u"comment": u"Import from Chimère %s" %
+ get_version()})
hooks = RE_HOOK.findall(self.importer_instance.filtr)
if not hooks:
hooks = RE_HOOK.findall(self.importer_instance.source)
@@ -825,28 +900,31 @@ class OSMManager(ImportManager):
continue
if key == 'bbox':
x1, y1, x2, y2 = [float(val) for val in value.split(',')]
- bbox = GEOSGeometry(
+ bbox = GEOSGeometry(
'POLYGON((%f %f,%f %f,%f %f,%f %f,%f %f))' % (
- x1, y1, x2, y1, x2, y2, x1, y2, x1, y1), srid=4326)
+ x1, y1, x2, y1, x2, y2, x1, y2, x1, y1), srid=4326)
continue
tags[key] = value
if not tags:
return 0, _(u"No non ambigious tag is defined in the XAPI request")
if not bbox:
- return 0, _(u"No bounding box is defined in the XAPI request."\
- u"If you are sure to manage the entire planet set the bounding box"\
- u" to -180,-90,180,90")
- default_dct = {'tag':tags,
- 'import_source':self.importer_instance.source}
+ return 0, _(
+ u"No bounding box is defined in the XAPI request."
+ u"If you are sure to manage the entire planet set the "
+ u"bounding box to -180,-90,180,90")
+ default_dct = {'tag': tags,
+ 'import_source': self.importer_instance.source}
idx = -1
- for idx, item in enumerate(Marker.objects.filter(status='A',
- point__contained=bbox,
- categories=self.importer_instance.categories.all(),
- not_for_osm=False, modified_since_import=True,
- route=None).all()):
+ for idx, item in enumerate(
+ Marker.objects.filter(
+ status='A',
+ point__contained=bbox,
+ categories=self.importer_instance.categories.all(),
+ not_for_osm=False, modified_since_import=True,
+ route=None).all()):
dct = default_dct.copy()
- dct.update({'lon':item.point.x,
- 'lat':item.point.y})
+ dct.update({'lon': item.point.x,
+ 'lat': item.point.y})
dct['tag']['name'] = item.name
node = None
import_key = item.get_key('OSM')
@@ -861,7 +939,7 @@ class OSMManager(ImportManager):
if error.status == 404:
dct.pop('id')
dct.pop('version')
- pass # if the node doesn't exist it is created
+ pass # if the node doesn't exist it is created
else:
raise
if not updated:
@@ -870,20 +948,23 @@ class OSMManager(ImportManager):
item.import_version = node['version']
item.save()
api.ChangesetClose()
- return idx+1, None
+ return idx + 1, None
+
-import urllib2, chardet, HTMLParser
+import chardet
+import HTMLParser
from BeautifulSoup import BeautifulSoup
-from lxml import etree
+
RE_CLEANS = ((re.compile('(\n)*|^( )*(\n)*( )*|( )*(\n)*( )*$'), ''),
(re.compile(' ( )*'), ' '),
(re.compile(r"""<a href=["'](?!https?)(.*)["']"""),
- '<a href="%(base_url)s\\1"'),
+ '<a href="%(base_url)s\\1"'),
)
from calendar import TimeEncoding, month_name
+
def get_month_name(month_no, locale):
with TimeEncoding(locale) as encoding:
s = month_name[month_no]
@@ -891,62 +972,62 @@ def get_month_name(month_no, locale):
s = s.decode(encoding)
return s
-MONTH_NAMES = {locale:[get_month_name(no_month, locale+'.UTF-8')
- for no_month in xrange(1, 13)] for locale in ['fr_FR']}
+MONTH_NAMES = {locale: [get_month_name(no_month, locale + '.UTF-8')
+ for no_month in xrange(1, 13)] for locale in ['fr_FR']}
try:
- UNI_MONTH_NAMES = {locale:[m.decode('utf-8') for m in MONTH_NAMES[locale]]
- for locale in MONTH_NAMES}
+ UNI_MONTH_NAMES = {locale: [m.decode('utf-8') for m in MONTH_NAMES[locale]]
+ for locale in MONTH_NAMES}
except UnicodeEncodeError:
- UNI_MONTH_NAMES = {locale:[m for m in MONTH_NAMES[locale]]
- for locale in MONTH_NAMES}
-
-DATE_PARSINGS = {'fr_FR':[
- re.compile(r'(?P<day1>\d{1,2}) '\
- r'(?P<month1>'+ '|'.join(UNI_MONTH_NAMES['fr_FR']) +') '\
- r'(?P<year1>\d{4})?[^\d]*'\
- r'(?P<day2>\d{1,2}) '\
- r'(?P<month2>'+ '|'.join(UNI_MONTH_NAMES['fr_FR']) +') *'\
- r'(?P<year2>\d{4})?.*'),
- re.compile(r'(?P<day1>\d{1,2}) '\
- r'(?P<month1>'+ '|'.join(UNI_MONTH_NAMES['fr_FR']) +') *'\
- r'(?P<year1>\d{4})?')
- ],
- 'en':[
- re.compile(r'(?P<year1>\d{4})-'\
- r'(?P<month1>\d{2})-'\
- r'(?P<day1>\d{2})'\
- r'(?:T'\
- r'(?P<hour1>\d{2})?:'\
- r'(?P<minut1>\d{2})?:'\
- r'(?P<second1>\d{2})'\
- r')?.*'\
- r'(?P<year2>\d{4})-'\
- r'(?P<month2>\d{2})-'\
- r'(?P<day2>\d{2})'\
- r'(?:T'\
- r'(?P<hour2>\d{2})?:'\
- r'(?P<minut2>\d{2})?:'\
- r'(?P<second2>\d{2})'\
- r')?.*'
- ),
- re.compile(r'(?P<year1>\d{4})-'\
- r'(?P<month1>\d{2})-'\
- r'(?P<day1>\d{2})'\
- r'(?:T'\
- r'(?P<hour1>\d{2})?:'\
- r'(?P<minut1>\d{2})?:'\
- r'(?P<second1>\d{2})'\
- r')?'
- )
- ],
- }
+ UNI_MONTH_NAMES = {locale: [m for m in MONTH_NAMES[locale]]
+ for locale in MONTH_NAMES}
+
+DATE_PARSINGS = {
+ 'fr_FR': [
+ re.compile(r'(?P<day1>\d{1,2}) '
+ r'(?P<month1>' + '|'.join(UNI_MONTH_NAMES['fr_FR']) + ') '
+ r'(?P<year1>\d{4})?[^\d]*'
+ r'(?P<day2>\d{1,2}) '
+ r'(?P<month2>' + '|'.join(UNI_MONTH_NAMES['fr_FR']) + ') *'
+ r'(?P<year2>\d{4})?.*'),
+ re.compile(r'(?P<day1>\d{1,2}) '
+ r'(?P<month1>' + '|'.join(UNI_MONTH_NAMES['fr_FR']) + ') * '
+ r'(?P<year1>\d{4})?')],
+ 'en': [
+ re.compile(r'(?P<year1>\d{4})-'
+ r'(?P<month1>\d{2})-'
+ r'(?P<day1>\d{2})'
+ r'(?:T'
+ r'(?P<hour1>\d{2})?:'
+ r'(?P<minut1>\d{2})?:'
+ r'(?P<second1>\d{2})'
+ r')?.*'
+ r'(?P<year2>\d{4})-'
+ r'(?P<month2>\d{2})-'
+ r'(?P<day2>\d{2})'
+ r'(?:T'
+ r'(?P<hour2>\d{2})?:'
+ r'(?P<minut2>\d{2})?:'
+ r'(?P<second2>\d{2})'
+ r')?.*'),
+ re.compile(r'(?P<year1>\d{4})-'
+ r'(?P<month1>\d{2})-'
+ r'(?P<day1>\d{2})'
+ r'(?:T'
+ r'(?P<hour1>\d{2})?:'
+ r'(?P<minut1>\d{2})?:'
+ r'(?P<second1>\d{2})'
+ r')?')],
+}
+
def clean_field(value):
return value.strip()
+
class HtmlXsltManager(ImportManager):
PARSER = 'HTMLParser'
+
def get(self):
u"""
Get data from the source
@@ -970,7 +1051,7 @@ class HtmlXsltManager(ImportManager):
soup = BeautifulSoup(data)
main_page = soup.prettify()
# convert it to valid XHTML
- #doc, errors = tidy_document(main_page)
+ # doc, errors = tidy_document(main_page)
doc = main_page
dom = etree.HTML(doc, getattr(etree, self.PARSER)())
try:
@@ -994,8 +1075,8 @@ class HtmlXsltManager(ImportManager):
base_url = u"/".join(self.importer_instance.source.split(u'/')[:-1])
base_url += u"/"
for item in newdom.getroot():
- c_item = {child.tag:clean_field(child.text)
- for child in item.getchildren() if child.text}
+ c_item = {child.tag: clean_field(child.text)
+ for child in item.getchildren() if child.text}
# try to have more information on the linked page
if transform_child and 'link' in c_item:
# not an absolute address
@@ -1016,8 +1097,8 @@ class HtmlXsltManager(ImportManager):
child_dom = etree.HTML(child_page, etree.HTMLParser())
extra_keys = transform_child(child_dom).getroot()
if len(extra_keys):
- c_item.update({extra.tag:etree.tostring(extra)
- for extra in extra_keys[0].getchildren()})
+ c_item.update({extra.tag: etree.tostring(extra)
+ for extra in extra_keys[0].getchildren()})
items.append(c_item)
# change relative link to full link, simplify, unescape HTML entities
html_unescape = HTMLParser.HTMLParser().unescape
@@ -1025,7 +1106,7 @@ class HtmlXsltManager(ImportManager):
for k in item:
val = item[k]
for r, replaced in RE_CLEANS:
- val = re.sub(r, replaced % {'base_url':base_url}, val)
+ val = re.sub(r, replaced % {'base_url': base_url}, val)
item[k] = html_unescape(val)
self.key_categories = self.importer_instance.get_key_category_dict()
self.missing_cats = set()
@@ -1034,9 +1115,10 @@ class HtmlXsltManager(ImportManager):
self.add_dct_item(item)
msg = ''
if self.missing_cats:
- msg = _(u"Names \"%s\" doesn't match existing categories. "
- u"Modify the import to match theses names with categories.") % (
- u'", "'.join(self.missing_cats))
+ msg = _(
+ u"Names \"%s\" doesn't match existing categories. "
+ u"Modify the import to match theses names with categories.") %\
+ (u'", "'.join(self.missing_cats))
return (self.new_item, self.updated_item, msg)
@classmethod
@@ -1073,18 +1155,18 @@ class HtmlXsltManager(ImportManager):
if not m:
continue
values = m.groupdict()
- date = self._internal_parse_date(locale,
- 'year1' in values and values['year1'],
- values['month1'], values['day1'])
+ date = self._internal_parse_date(
+ locale, 'year1' in values and values['year1'],
+ values['month1'], values['day1'])
if not date:
continue
dct['start_date'] = date
has_dates = True
if 'day2' not in values:
break
- date = self._internal_parse_date(locale,
- 'year2' in values and values['year2'],
- values['month2'], values['day2'])
+ date = self._internal_parse_date(
+ locale, 'year2' in values and values['year2'],
+ values['month2'], values['day2'])
if date:
dct['end_date'] = date
break
@@ -1092,13 +1174,14 @@ class HtmlXsltManager(ImportManager):
def add_dct_item(self, item):
if not self.importer_instance.default_localisation and \
- not "point" in item and not ("lat" in item and item['lat']):
+ "point" not in item and not ("lat" in item and item['lat']):
return
cls = None
- dct = {'origin':"<a href='%s'>%s</a>" % (item['link'],
- self.importer_instance.origin),
- 'license':self.importer_instance.license,
- 'name':item['name']}
+ dct = {
+ 'origin': "<a href='%s' target='_blank'>%s</a>" % (
+ item.get('link') or '#', self.importer_instance.origin),
+ 'license': self.importer_instance.license,
+ 'name': item['name']}
category = None
if 'category' in item and item['category']:
if item['category'] in self.key_categories:
@@ -1114,7 +1197,7 @@ class HtmlXsltManager(ImportManager):
item['lat'])
else:
dct['point'] = self.importer_instance.default_localisation
- dct['description'] = item['description']
+ dct['description'] = item.get('description', '')
if 'date' in item:
dct.update(self.parse_date(item['date']))
key = item['key']
@@ -1125,5 +1208,6 @@ class HtmlXsltManager(ImportManager):
if created:
self.new_item += 1
+
class XMLXsltManager(HtmlXsltManager):
PARSER = 'XMLParser'