summaryrefslogtreecommitdiff
path: root/chimere/utils.py
diff options
context:
space:
mode:
Diffstat (limited to 'chimere/utils.py')
-rw-r--r--chimere/utils.py370
1 files changed, 196 insertions, 174 deletions
diff --git a/chimere/utils.py b/chimere/utils.py
index c5c59e9..e017762 100644
--- a/chimere/utils.py
+++ b/chimere/utils.py
@@ -45,26 +45,30 @@ from django.utils.translation import ugettext_lazy as _
from chimere import get_version
from external_utils import OsmApi
+
def unicode_normalize(string):
if type(string) == str:
string = unicode(string.decode('utf-8'))
return ''.join(
(c for c in unicodedata.normalize('NFD', string)
- if unicodedata.category(c) != 'Mn'))
+ if unicodedata.category(c) != 'Mn'))
+
class ImportManager(object):
u"""
Generic class for specific importers
"""
default_source = None
+
def __init__(self, importer_instance):
self.importer_instance = importer_instance
if self.importer_instance.default_name:
self.default_name = self.importer_instance.default_name
else:
- self.default_name = " - ".join([cat.name
+ self.default_name = " - ".join([
+ cat.name
for cat in self.importer_instance.categories.order_by(
- 'name').all()])
+ 'name').all()])
def get(self):
raise NotImplementedError
@@ -83,8 +87,8 @@ class ImportManager(object):
item = None
if import_key or pk:
dct_import = {
- 'import_key__icontains':'%s:%s;' % (key, import_key),
- 'import_source':self.importer_instance.source}
+ 'import_key__icontains': '%s:%s;' % (key, import_key),
+ 'import_source': self.importer_instance.source}
ref_item = cls.objects.filter(**dct_import)
try:
item = None
@@ -121,12 +125,12 @@ class ImportManager(object):
if not self.importer_instance.get_description and \
self.importer_instance.default_description:
values['description'] = \
- self.importer_instance.default_description
+ self.importer_instance.default_description
values.update({
- 'import_source':self.importer_instance.source})
+ 'import_source': self.importer_instance.source})
values['status'] = self.importer_instance.default_status
if not self.importer_instance.associate_marker_to_way\
- and cls.__name__ == 'Route':
+ and cls.__name__ == 'Route':
values['has_associated_marker'] = False
try:
item = cls.objects.create(**values)
@@ -158,8 +162,8 @@ class ImportManager(object):
current_file_name = None
for name in namelist:
if name.endswith(suffix) \
- or name.endswith(suffix.lower()) \
- or name.endswith(suffix.upper()):
+ or name.endswith(suffix.lower()) \
+ or name.endswith(suffix.upper()):
current_file_name = name
filenames.append(current_file_name)
files = []
@@ -180,7 +184,7 @@ class ImportManager(object):
if not hasattr(source, 'read'):
if not source:
source = self.importer_instance.source \
- if self.importer_instance.source else self.default_source
+ if self.importer_instance.source else self.default_source
try:
url = source
if extra_url:
@@ -207,6 +211,7 @@ class ImportManager(object):
source = files[0] if len(suffixes) == 1 else files
return (source, None)
+
class KMLManager(ImportManager):
u"""
KML importer
@@ -215,6 +220,7 @@ class KMLManager(ImportManager):
"""
XPATH = '//kml:Folder/kml:name[text()="%s"]/../kml:Placemark'
DEFAULT_XPATH = '//kml:Placemark'
+
def __init__(self, importer_instance, ns=''):
super(KMLManager, self).__init__(importer_instance)
self.ns = ns
@@ -249,9 +255,9 @@ class KMLManager(ImportManager):
if not self.ns:
self.ns = tree.getroot().nsmap[None]
xpath = self.XPATH % self.importer_instance.filtr \
- if self.importer_instance.filtr else self.DEFAULT_XPATH
+ if self.importer_instance.filtr else self.DEFAULT_XPATH
for placemark in tree.xpath(xpath,
- namespaces={'kml':self.ns}):
+ namespaces={'kml': self.ns}):
name, point, line = None, None, None
pl_id = placemark.attrib.get('id')
pl_key = 'kml-%d' % self.importer_instance.pk
@@ -279,10 +285,10 @@ class KMLManager(ImportManager):
for p in points if p])
line = 'SRID=4326;LINESTRING(%s)' % points
cls = None
- dct = {'description':description,
- 'name':name,
- 'origin':self.importer_instance.origin,
- 'license':self.importer_instance.license}
+ dct = {'description': description,
+ 'name': name,
+ 'origin': self.importer_instance.origin,
+ 'license': self.importer_instance.license}
if point:
dct['point'] = point
cls = Marker
@@ -292,7 +298,7 @@ class KMLManager(ImportManager):
cls = Route
if cls:
item, updated, created = self.create_or_update_item(
- cls, dct, pl_id, key=pl_key)
+ cls, dct, pl_id, key=pl_key)
if updated:
updated_item += 1
if created:
@@ -301,15 +307,17 @@ class KMLManager(ImportManager):
@classmethod
def export(cls, queryset):
- dct = {'name':settings.PROJECT_NAME,
- 'description':unicode(datetime.date.today()),
- 'locations':queryset.all()
+ dct = {
+ 'name': settings.PROJECT_NAME,
+ 'description': unicode(datetime.date.today()),
+ 'locations': queryset.all()
}
- filename = unicode_normalize(settings.PROJECT_NAME + dct['description']\
+ filename = unicode_normalize(settings.PROJECT_NAME + dct['description']
+ '.kml')
result = render_to_response('chimere/export.kml', dct)
return filename, result
+
class ShapefileManager(ImportManager):
u"""
Shapefile importer
@@ -351,7 +359,7 @@ class ShapefileManager(ImportManager):
srid = settings.CHIMERE_EPSG_DISPLAY_PROJECTION
msg = _(u"SRID cannot be guessed. The default SRID (%s) has "
u"been used.") % srid
- #If imported items are not well located "
+ # If imported items are not well located "
# u"ask your data provider for the SRID to use.") % srid
shapefilename = tmpdir + os.sep + sources[0]
ds = DataSource(shapefilename)
@@ -374,7 +382,7 @@ class ShapefileManager(ImportManager):
u"is not managed by Chimère.") % lyr.geom_type)
geom_key = 'point' if lyr.geom_type == 'Point' else 'route'
geom_cls = Marker if lyr.geom_type == 'Point' else Route
- indexes = []
+ # indexes = []
for idx, feat in enumerate(lyr):
name = unicode(idx)
if lbl_name:
@@ -384,7 +392,7 @@ class ShapefileManager(ImportManager):
except UnicodeDecodeError:
try:
name = unicode(
- name.decode(settings.CHIMERE_SHAPEFILE_ENCODING))
+ name.decode(settings.CHIMERE_SHAPEFILE_ENCODING))
except:
continue
try:
@@ -393,15 +401,17 @@ class ShapefileManager(ImportManager):
return (0, 0, _(u"Bad Shapefile"))
if feat.geom.geom_type == 'MultiLineString':
geoms = [geom.wkt for geom in feat.geom]
- import_key = feat.get(id_name) if id_name and len(geoms) == 1 else ''
+ import_key = feat.get(id_name) if id_name and len(geoms) == 1 \
+ else ''
for geom in geoms:
- dct = {geom_key:'SRID=%s;%s' % (srid, geom),
- 'name':name,
- 'origin':self.importer_instance.origin,
- 'license':self.importer_instance.license
- }
+ dct = {
+ geom_key: 'SRID=%s;%s' % (srid, geom),
+ 'name': name,
+ 'origin': self.importer_instance.origin,
+ 'license': self.importer_instance.license
+ }
item, updated, created = self.create_or_update_item(
- geom_cls, dct, import_key)
+ geom_cls, dct, import_key)
if updated:
updated_item += 1
if created:
@@ -426,8 +436,9 @@ class ShapefileManager(ImportManager):
tmp_name = tmp.name
field_names = [field.name for field in queryset.model._meta.fields]
- geo_field = getattr(queryset.model,
- 'point' if 'point' in field_names else 'route')._field
+ geo_field = getattr(
+ queryset.model,
+ 'point' if 'point' in field_names else 'route')._field
dr = ogr.GetDriverByName('ESRI Shapefile')
ds = dr.CreateDataSource(tmp_name)
@@ -453,7 +464,7 @@ class ShapefileManager(ImportManager):
feat = ogr.Feature(feature_def)
feat.SetField('name', str(unicode_normalize(item.name)[:80]))
feat.SetField('category',
- str(unicode_normalize(category.name)[:80]))
+ str(unicode_normalize(category.name)[:80]))
geom = getattr(item, geo_field.name)
if not geom:
@@ -479,6 +490,7 @@ class ShapefileManager(ImportManager):
buff.close()
return filename, zip_stream
+
class CSVManager(ImportManager):
u"""
CSV importer
@@ -489,9 +501,8 @@ class CSVManager(ImportManager):
# (label, getter, setter)
COLS = [("Id", 'pk', 'pk'), (_(u"Name"), 'name', 'name'),
- (_(u"Categories"), lambda obj:", ".join(
- [c.name for c in obj.categories.all()]),
- set_categories),
+ (_(u"Categories"), lambda obj: ", ".join(
+ [c.name for c in obj.categories.all()]), set_categories),
(_(u"State"), 'status', lambda x: x),
(_(u"Description"), 'description', 'description'),
(_(u"Localisation"), 'geometry', 'geometry')]
@@ -514,11 +525,11 @@ class CSVManager(ImportManager):
prop_cols = []
for pm in Marker.all_properties():
prop_cols.append((pm.name, pm.getAttrName(),
- pm.getAttrName()+'_set'))
+ pm.getAttrName() + '_set'))
cols = list(self.COLS) + prop_cols
- datas = []
+ # datas = []
for idx, row in enumerate(reader):
- if not idx: # first row
+ if not idx: # first row
try:
assert(len(row) >= len(cols))
except AssertionError:
@@ -526,16 +537,17 @@ class CSVManager(ImportManager):
continue
if len(row) < len(cols):
continue
- pk, name, cats, state = row[0], row[1], row[2], row[3]
+ # pk, name, cats, state = row[0], row[1], row[2], row[3]
+ pk, name = row[0], row[1]
geom = row[5]
description = ''
if self.importer_instance.get_description:
description = row[4]
COL_INDEX = 6
- dct = {'description':description,
- 'name':name,
- 'origin':self.importer_instance.origin,
- 'license':self.importer_instance.license}
+ dct = {'description': description,
+ 'name': name,
+ 'origin': self.importer_instance.origin,
+ 'license': self.importer_instance.license}
cls = None
if 'POINT' in geom:
cls = Marker
@@ -546,8 +558,8 @@ class CSVManager(ImportManager):
else:
continue
import_key = pk if pk else name.decode('utf-8')
- item, updated, created = self.create_or_update_item(cls, dct,
- import_key, pk=pk)
+ item, updated, created = self.create_or_update_item(
+ cls, dct, import_key, pk=pk)
if updated:
updated_item += 1
if created:
@@ -555,17 +567,17 @@ class CSVManager(ImportManager):
for idx, col in enumerate(cols[COL_INDEX:]):
name, getter, setter_val = col
setter = getattr(item, setter_val)
- val = row[idx+COL_INDEX]
+ val = row[idx + COL_INDEX]
setter(item, val)
return (new_item, updated_item, msg)
@classmethod
def export(cls, queryset):
- dct = {'description':unicode(datetime.date.today()), 'data':[]}
- cls_name = queryset.model.__name__.lower()
+ dct = {'description': unicode(datetime.date.today()), 'data': []}
+ # cls_name = queryset.model.__name__.lower()
cols = list(cls.COLS)
for pm in queryset.model.all_properties():
- cols.append((pm.name, pm.getAttrName(), pm.getAttrName()+'_set'))
+ cols.append((pm.name, pm.getAttrName(), pm.getAttrName() + '_set'))
header = [col[0] for col in cols]
dct['data'].append(header)
for item in queryset.all():
@@ -576,11 +588,12 @@ class CSVManager(ImportManager):
else:
data.append(getattr(item, attr))
dct['data'].append(data)
- filename = unicode_normalize(settings.PROJECT_NAME + dct['description']\
+ filename = unicode_normalize(settings.PROJECT_NAME + dct['description']
+ '.csv')
result = render_to_response('chimere/export.csv', dct)
return filename, result
+
class GeoRSSManager(ImportManager):
u"""
RSS importer.
@@ -596,19 +609,19 @@ class GeoRSSManager(ImportManager):
- number of item updated ;
- error detail on error
"""
- from models import Marker
+ from models import Marker, Route
new_item, updated_item, msg = 0, 0, ''
feed = feedparser.parse(self.importer_instance.source)
- if feed['bozo'] and not isinstance(feed['bozo_exception'],
- feedparser.CharacterEncodingOverride):
+ if feed['bozo'] and not isinstance(
+ feed['bozo_exception'], feedparser.CharacterEncodingOverride):
return (0, 0, _(u"RSS feed is not well formed"))
for item in feed['items']:
if "georss_point" not in item and 'georss_line' not in item \
and not ("geo_lat" in item and "geo_long" in item):
continue
cls = None
- dct = {'origin':self.importer_instance.origin,
- 'license':self.importer_instance.license}
+ dct = {'origin': self.importer_instance.origin,
+ 'license': self.importer_instance.license}
if 'georss_point' in item or "geo_lat" in item:
cls = Marker
if 'georss_point' in item:
@@ -630,11 +643,11 @@ class GeoRSSManager(ImportManager):
points = item['georss_line'].split(' ')
reordered_points = []
# lat, lon -> x, y
- for idx in xrange(len(points)/2):
- reordered_points.append("%s %s" % (points[idx*2+1],
- points[idx*2]))
+ for idx in xrange(len(points) / 2):
+ reordered_points.append("%s %s" % (points[idx * 2 + 1],
+ points[idx * 2]))
dct['route'] = 'SRID=4326;LINESTRING(%s)' % \
- ",".join(reordered_points)
+ ",".join(reordered_points)
dct['name'] = item['title']
pl_id = item['id'] if 'id' in item else item['title']
@@ -649,6 +662,7 @@ RE_HOOK = re.compile('\[([^\]]*)\]')
# TODO: manage deleted item from OSM
+
class OSMManager(ImportManager):
u"""
OSM importer/exporter
@@ -666,8 +680,8 @@ class OSMManager(ImportManager):
- updated items;
- error detail on error.
"""
- source, msg = self.get_source_file(['.osm'],
- extra_url=self.importer_instance.filtr)
+ source, msg = self.get_source_file(
+ ['.osm'], extra_url=self.importer_instance.filtr)
if not source:
return (0, 0, msg)
@@ -680,8 +694,8 @@ class OSMManager(ImportManager):
return 0, 0, _(u"Nothing to import")
def import_ways(self, tree):
- from chimere.models import Marker, Route
- msg, items, new_item, updated_item = "", [], 0 , 0
+ from chimere.models import Route
+ msg, items, new_item, updated_item = "", [], 0, 0
nodes = {}
for node in tree.xpath('//node'):
node_id = node.attrib.get('id')
@@ -703,17 +717,17 @@ class OSMManager(ImportManager):
points.append(item.get('ref'))
if not points:
continue
- wkt = 'SRID=4326;LINESTRING(%s)' % ",".join([nodes[point_id]
- for point_id in points if point_id in nodes])
- dct = {'route':wkt,
- 'name':name,
- 'origin':self.importer_instance.origin \
- or u'OpenStreetMap.org',
- 'license':self.importer_instance.license \
- or u'ODbL',
- 'import_version':version}
+ wkt = 'SRID=4326;LINESTRING(%s)' % ",".join(
+ [nodes[point_id] for point_id in points if point_id in nodes])
+ dct = {'route': wkt,
+ 'name': name,
+ 'origin': self.importer_instance.origin
+ or u'OpenStreetMap.org',
+ 'license': self.importer_instance.license
+ or u'ODbL',
+ 'import_version': version}
item, updated, created = self.create_or_update_item(
- Route, dct, node_id, version)
+ Route, dct, node_id, version)
if updated:
updated_item += 1
if created:
@@ -723,7 +737,7 @@ class OSMManager(ImportManager):
def import_nodes(self, tree):
from chimere.models import Marker
- msg, items, new_item, updated_item = "", [], 0 , 0
+ msg, items, new_item, updated_item = "", [], 0, 0
for node in tree.xpath('//node'):
name = None
node_id = node.attrib.get('id')
@@ -736,15 +750,15 @@ class OSMManager(ImportManager):
name = item.attrib.get('v')
point = 'SRID=4326;POINT(%s %s)' % (node.get('lon'),
node.get('lat'))
- dct = {'point':point,
- 'name':name,
- 'origin':self.importer_instance.origin \
- or u'OpenStreetMap.org',
- 'license':self.importer_instance.license \
- or u'ODbL',
- 'import_version':version}
+ dct = {'point': point,
+ 'name': name,
+ 'origin': self.importer_instance.origin
+ or u'OpenStreetMap.org',
+ 'license': self.importer_instance.license
+ or u'ODbL',
+ 'import_version': version}
item, updated, created = self.create_or_update_item(
- Marker, dct, node_id, version)
+ Marker, dct, node_id, version)
if updated:
updated_item += 1
if created:
@@ -779,8 +793,8 @@ class OSMManager(ImportManager):
username = username.encode('latin1')
password = password.encode('latin1')
api = OsmApi.OsmApi(api=api, username=username, password=password)
- api.ChangesetCreate({u"comment": u"Import from Chimère %s" % \
- get_version()})
+ api.ChangesetCreate({u"comment": u"Import from Chimère %s" %
+ get_version()})
hooks = RE_HOOK.findall(self.importer_instance.filtr)
if not hooks:
hooks = RE_HOOK.findall(self.importer_instance.source)
@@ -794,28 +808,31 @@ class OSMManager(ImportManager):
continue
if key == 'bbox':
x1, y1, x2, y2 = [float(val) for val in value.split(',')]
- bbox = GEOSGeometry(
+ bbox = GEOSGeometry(
'POLYGON((%f %f,%f %f,%f %f,%f %f,%f %f))' % (
- x1, y1, x2, y1, x2, y2, x1, y2, x1, y1), srid=4326)
+ x1, y1, x2, y1, x2, y2, x1, y2, x1, y1), srid=4326)
continue
tags[key] = value
if not tags:
return 0, _(u"No non ambigious tag is defined in the XAPI request")
if not bbox:
- return 0, _(u"No bounding box is defined in the XAPI request."\
- u"If you are sure to manage the entire planet set the bounding box"\
- u" to -180,-90,180,90")
- default_dct = {'tag':tags,
- 'import_source':self.importer_instance.source}
+ return 0, _(
+ u"No bounding box is defined in the XAPI request."
+ u"If you are sure to manage the entire planet set the "
+ u"bounding box to -180,-90,180,90")
+ default_dct = {'tag': tags,
+ 'import_source': self.importer_instance.source}
idx = -1
- for idx, item in enumerate(Marker.objects.filter(status='A',
- point__contained=bbox,
- categories=self.importer_instance.categories.all(),
- not_for_osm=False, modified_since_import=True,
- route=None).all()):
+ for idx, item in enumerate(
+ Marker.objects.filter(
+ status='A',
+ point__contained=bbox,
+ categories=self.importer_instance.categories.all(),
+ not_for_osm=False, modified_since_import=True,
+ route=None).all()):
dct = default_dct.copy()
- dct.update({'lon':item.point.x,
- 'lat':item.point.y})
+ dct.update({'lon': item.point.x,
+ 'lat': item.point.y})
dct['tag']['name'] = item.name
node = None
import_key = item.get_key('OSM')
@@ -830,7 +847,7 @@ class OSMManager(ImportManager):
if error.status == 404:
dct.pop('id')
dct.pop('version')
- pass # if the node doesn't exist it is created
+ pass # if the node doesn't exist it is created
else:
raise
if not updated:
@@ -839,20 +856,23 @@ class OSMManager(ImportManager):
item.import_version = node['version']
item.save()
api.ChangesetClose()
- return idx+1, None
+ return idx + 1, None
+
-import urllib2, chardet, HTMLParser
+import chardet
+import HTMLParser
from BeautifulSoup import BeautifulSoup
-from lxml import etree
+
RE_CLEANS = ((re.compile('(\n)*|^( )*(\n)*( )*|( )*(\n)*( )*$'), ''),
(re.compile(' ( )*'), ' '),
(re.compile(r"""<a href=["'](?!https?)(.*)["']"""),
- '<a href="%(base_url)s\\1"'),
+ '<a href="%(base_url)s\\1"'),
)
from calendar import TimeEncoding, month_name
+
def get_month_name(month_no, locale):
with TimeEncoding(locale) as encoding:
s = month_name[month_no]
@@ -860,62 +880,62 @@ def get_month_name(month_no, locale):
s = s.decode(encoding)
return s
-MONTH_NAMES = {locale:[get_month_name(no_month, locale+'.UTF-8')
- for no_month in xrange(1, 13)] for locale in ['fr_FR']}
+MONTH_NAMES = {locale: [get_month_name(no_month, locale + '.UTF-8')
+ for no_month in xrange(1, 13)] for locale in ['fr_FR']}
try:
- UNI_MONTH_NAMES = {locale:[m.decode('utf-8') for m in MONTH_NAMES[locale]]
- for locale in MONTH_NAMES}
+ UNI_MONTH_NAMES = {locale: [m.decode('utf-8') for m in MONTH_NAMES[locale]]
+ for locale in MONTH_NAMES}
except UnicodeEncodeError:
- UNI_MONTH_NAMES = {locale:[m for m in MONTH_NAMES[locale]]
- for locale in MONTH_NAMES}
-
-DATE_PARSINGS = {'fr_FR':[
- re.compile(r'(?P<day1>\d{1,2}) '\
- r'(?P<month1>'+ '|'.join(UNI_MONTH_NAMES['fr_FR']) +') '\
- r'(?P<year1>\d{4})?[^\d]*'\
- r'(?P<day2>\d{1,2}) '\
- r'(?P<month2>'+ '|'.join(UNI_MONTH_NAMES['fr_FR']) +') *'\
- r'(?P<year2>\d{4})?.*'),
- re.compile(r'(?P<day1>\d{1,2}) '\
- r'(?P<month1>'+ '|'.join(UNI_MONTH_NAMES['fr_FR']) +') *'\
- r'(?P<year1>\d{4})?')
- ],
- 'en':[
- re.compile(r'(?P<year1>\d{4})-'\
- r'(?P<month1>\d{2})-'\
- r'(?P<day1>\d{2})'\
- r'(?:T'\
- r'(?P<hour1>\d{2})?:'\
- r'(?P<minut1>\d{2})?:'\
- r'(?P<second1>\d{2})'\
- r')?.*'\
- r'(?P<year2>\d{4})-'\
- r'(?P<month2>\d{2})-'\
- r'(?P<day2>\d{2})'\
- r'(?:T'\
- r'(?P<hour2>\d{2})?:'\
- r'(?P<minut2>\d{2})?:'\
- r'(?P<second2>\d{2})'\
- r')?.*'
- ),
- re.compile(r'(?P<year1>\d{4})-'\
- r'(?P<month1>\d{2})-'\
- r'(?P<day1>\d{2})'\
- r'(?:T'\
- r'(?P<hour1>\d{2})?:'\
- r'(?P<minut1>\d{2})?:'\
- r'(?P<second1>\d{2})'\
- r')?'
- )
- ],
- }
+ UNI_MONTH_NAMES = {locale: [m for m in MONTH_NAMES[locale]]
+ for locale in MONTH_NAMES}
+
+DATE_PARSINGS = {
+ 'fr_FR': [
+ re.compile(r'(?P<day1>\d{1,2}) '
+ r'(?P<month1>' + '|'.join(UNI_MONTH_NAMES['fr_FR']) + ') '
+ r'(?P<year1>\d{4})?[^\d]*'
+ r'(?P<day2>\d{1,2}) '
+ r'(?P<month2>' + '|'.join(UNI_MONTH_NAMES['fr_FR']) + ') *'
+ r'(?P<year2>\d{4})?.*'),
+ re.compile(r'(?P<day1>\d{1,2}) '
+ r'(?P<month1>' + '|'.join(UNI_MONTH_NAMES['fr_FR']) + ') * '
+ r'(?P<year1>\d{4})?')],
+ 'en': [
+ re.compile(r'(?P<year1>\d{4})-'
+ r'(?P<month1>\d{2})-'
+ r'(?P<day1>\d{2})'
+ r'(?:T'
+ r'(?P<hour1>\d{2})?:'
+ r'(?P<minut1>\d{2})?:'
+ r'(?P<second1>\d{2})'
+ r')?.*'
+ r'(?P<year2>\d{4})-'
+ r'(?P<month2>\d{2})-'
+ r'(?P<day2>\d{2})'
+ r'(?:T'
+ r'(?P<hour2>\d{2})?:'
+ r'(?P<minut2>\d{2})?:'
+ r'(?P<second2>\d{2})'
+ r')?.*'),
+ re.compile(r'(?P<year1>\d{4})-'
+ r'(?P<month1>\d{2})-'
+ r'(?P<day1>\d{2})'
+ r'(?:T'
+ r'(?P<hour1>\d{2})?:'
+ r'(?P<minut1>\d{2})?:'
+ r'(?P<second1>\d{2})'
+ r')?')],
+}
+
def clean_field(value):
return value.strip()
+
class HtmlXsltManager(ImportManager):
PARSER = 'HTMLParser'
+
def get(self):
u"""
Get data from the source
@@ -939,7 +959,7 @@ class HtmlXsltManager(ImportManager):
soup = BeautifulSoup(data)
main_page = soup.prettify()
# convert it to valid XHTML
- #doc, errors = tidy_document(main_page)
+ # doc, errors = tidy_document(main_page)
doc = main_page
dom = etree.HTML(doc, getattr(etree, self.PARSER)())
try:
@@ -963,8 +983,8 @@ class HtmlXsltManager(ImportManager):
base_url = u"/".join(self.importer_instance.source.split(u'/')[:-1])
base_url += u"/"
for item in newdom.getroot():
- c_item = {child.tag:clean_field(child.text)
- for child in item.getchildren() if child.text}
+ c_item = {child.tag: clean_field(child.text)
+ for child in item.getchildren() if child.text}
# try to have more information on the linked page
if transform_child and 'link' in c_item:
# not an absolute address
@@ -985,8 +1005,8 @@ class HtmlXsltManager(ImportManager):
child_dom = etree.HTML(child_page, etree.HTMLParser())
extra_keys = transform_child(child_dom).getroot()
if len(extra_keys):
- c_item.update({extra.tag:etree.tostring(extra)
- for extra in extra_keys[0].getchildren()})
+ c_item.update({extra.tag: etree.tostring(extra)
+ for extra in extra_keys[0].getchildren()})
items.append(c_item)
# change relative link to full link, simplify, unescape HTML entities
html_unescape = HTMLParser.HTMLParser().unescape
@@ -994,7 +1014,7 @@ class HtmlXsltManager(ImportManager):
for k in item:
val = item[k]
for r, replaced in RE_CLEANS:
- val = re.sub(r, replaced % {'base_url':base_url}, val)
+ val = re.sub(r, replaced % {'base_url': base_url}, val)
item[k] = html_unescape(val)
self.key_categories = self.importer_instance.get_key_category_dict()
self.missing_cats = set()
@@ -1003,9 +1023,10 @@ class HtmlXsltManager(ImportManager):
self.add_dct_item(item)
msg = ''
if self.missing_cats:
- msg = _(u"Names \"%s\" doesn't match existing categories. "
- u"Modify the import to match theses names with categories.") % (
- u'", "'.join(self.missing_cats))
+ msg = _(
+ u"Names \"%s\" doesn't match existing categories. "
+ u"Modify the import to match theses names with categories.") %\
+ (u'", "'.join(self.missing_cats))
return (self.new_item, self.updated_item, msg)
@classmethod
@@ -1042,18 +1063,18 @@ class HtmlXsltManager(ImportManager):
if not m:
continue
values = m.groupdict()
- date = self._internal_parse_date(locale,
- 'year1' in values and values['year1'],
- values['month1'], values['day1'])
+ date = self._internal_parse_date(
+ locale, 'year1' in values and values['year1'],
+ values['month1'], values['day1'])
if not date:
continue
dct['start_date'] = date
has_dates = True
if 'day2' not in values:
break
- date = self._internal_parse_date(locale,
- 'year2' in values and values['year2'],
- values['month2'], values['day2'])
+ date = self._internal_parse_date(
+ locale, 'year2' in values and values['year2'],
+ values['month2'], values['day2'])
if date:
dct['end_date'] = date
break
@@ -1061,14 +1082,14 @@ class HtmlXsltManager(ImportManager):
def add_dct_item(self, item):
if not self.importer_instance.default_localisation and \
- not "point" in item and not ("lat" in item and item['lat']):
+ "point" not in item and not ("lat" in item and item['lat']):
return
cls = None
- dct = {'origin':"<a href='%s' target='_blank'>%s</a>" % (
- item.get('link') or '#',
- self.importer_instance.origin),
- 'license':self.importer_instance.license,
- 'name':item['name']}
+ dct = {
+ 'origin': "<a href='%s' target='_blank'>%s</a>" % (
+ item.get('link') or '#', self.importer_instance.origin),
+ 'license': self.importer_instance.license,
+ 'name': item['name']}
category = None
if 'category' in item and item['category']:
if item['category'] in self.key_categories:
@@ -1095,5 +1116,6 @@ class HtmlXsltManager(ImportManager):
if created:
self.new_item += 1
+
class XMLXsltManager(HtmlXsltManager):
PARSER = 'XMLParser'