diff options
author | Étienne Loks <etienne.loks@proxience.com> | 2015-11-19 16:39:40 +0100 |
---|---|---|
committer | Étienne Loks <etienne.loks@proxience.com> | 2015-11-19 16:39:40 +0100 |
commit | d2ca6b5ea37832ba7477b3d02425b83ca0938f49 (patch) | |
tree | dd7ac96bc78afc59c29ab5da656faf02b9e05869 /chimere/utils.py | |
parent | 29ac339c908a590af01e38daf3b072dc476fa2a0 (diff) | |
download | Chimère-d2ca6b5ea37832ba7477b3d02425b83ca0938f49.tar.bz2 Chimère-d2ca6b5ea37832ba7477b3d02425b83ca0938f49.zip |
Flake8
Diffstat (limited to 'chimere/utils.py')
-rw-r--r-- | chimere/utils.py | 370 |
1 files changed, 196 insertions, 174 deletions
diff --git a/chimere/utils.py b/chimere/utils.py index c5c59e9..e017762 100644 --- a/chimere/utils.py +++ b/chimere/utils.py @@ -45,26 +45,30 @@ from django.utils.translation import ugettext_lazy as _ from chimere import get_version from external_utils import OsmApi + def unicode_normalize(string): if type(string) == str: string = unicode(string.decode('utf-8')) return ''.join( (c for c in unicodedata.normalize('NFD', string) - if unicodedata.category(c) != 'Mn')) + if unicodedata.category(c) != 'Mn')) + class ImportManager(object): u""" Generic class for specific importers """ default_source = None + def __init__(self, importer_instance): self.importer_instance = importer_instance if self.importer_instance.default_name: self.default_name = self.importer_instance.default_name else: - self.default_name = " - ".join([cat.name + self.default_name = " - ".join([ + cat.name for cat in self.importer_instance.categories.order_by( - 'name').all()]) + 'name').all()]) def get(self): raise NotImplementedError @@ -83,8 +87,8 @@ class ImportManager(object): item = None if import_key or pk: dct_import = { - 'import_key__icontains':'%s:%s;' % (key, import_key), - 'import_source':self.importer_instance.source} + 'import_key__icontains': '%s:%s;' % (key, import_key), + 'import_source': self.importer_instance.source} ref_item = cls.objects.filter(**dct_import) try: item = None @@ -121,12 +125,12 @@ class ImportManager(object): if not self.importer_instance.get_description and \ self.importer_instance.default_description: values['description'] = \ - self.importer_instance.default_description + self.importer_instance.default_description values.update({ - 'import_source':self.importer_instance.source}) + 'import_source': self.importer_instance.source}) values['status'] = self.importer_instance.default_status if not self.importer_instance.associate_marker_to_way\ - and cls.__name__ == 'Route': + and cls.__name__ == 'Route': values['has_associated_marker'] = False try: item = cls.objects.create(**values) @@ -158,8 +162,8 @@ class ImportManager(object): current_file_name = None for name in namelist: if name.endswith(suffix) \ - or name.endswith(suffix.lower()) \ - or name.endswith(suffix.upper()): + or name.endswith(suffix.lower()) \ + or name.endswith(suffix.upper()): current_file_name = name filenames.append(current_file_name) files = [] @@ -180,7 +184,7 @@ class ImportManager(object): if not hasattr(source, 'read'): if not source: source = self.importer_instance.source \ - if self.importer_instance.source else self.default_source + if self.importer_instance.source else self.default_source try: url = source if extra_url: @@ -207,6 +211,7 @@ class ImportManager(object): source = files[0] if len(suffixes) == 1 else files return (source, None) + class KMLManager(ImportManager): u""" KML importer @@ -215,6 +220,7 @@ class KMLManager(ImportManager): """ XPATH = '//kml:Folder/kml:name[text()="%s"]/../kml:Placemark' DEFAULT_XPATH = '//kml:Placemark' + def __init__(self, importer_instance, ns=''): super(KMLManager, self).__init__(importer_instance) self.ns = ns @@ -249,9 +255,9 @@ class KMLManager(ImportManager): if not self.ns: self.ns = tree.getroot().nsmap[None] xpath = self.XPATH % self.importer_instance.filtr \ - if self.importer_instance.filtr else self.DEFAULT_XPATH + if self.importer_instance.filtr else self.DEFAULT_XPATH for placemark in tree.xpath(xpath, - namespaces={'kml':self.ns}): + namespaces={'kml': self.ns}): name, point, line = None, None, None pl_id = placemark.attrib.get('id') pl_key = 'kml-%d' % self.importer_instance.pk @@ -279,10 +285,10 @@ class KMLManager(ImportManager): for p in points if p]) line = 'SRID=4326;LINESTRING(%s)' % points cls = None - dct = {'description':description, - 'name':name, - 'origin':self.importer_instance.origin, - 'license':self.importer_instance.license} + dct = {'description': description, + 'name': name, + 'origin': self.importer_instance.origin, + 'license': self.importer_instance.license} if point: dct['point'] = point cls = Marker @@ -292,7 +298,7 @@ class KMLManager(ImportManager): cls = Route if cls: item, updated, created = self.create_or_update_item( - cls, dct, pl_id, key=pl_key) + cls, dct, pl_id, key=pl_key) if updated: updated_item += 1 if created: @@ -301,15 +307,17 @@ class KMLManager(ImportManager): @classmethod def export(cls, queryset): - dct = {'name':settings.PROJECT_NAME, - 'description':unicode(datetime.date.today()), - 'locations':queryset.all() + dct = { + 'name': settings.PROJECT_NAME, + 'description': unicode(datetime.date.today()), + 'locations': queryset.all() } - filename = unicode_normalize(settings.PROJECT_NAME + dct['description']\ + filename = unicode_normalize(settings.PROJECT_NAME + dct['description'] + '.kml') result = render_to_response('chimere/export.kml', dct) return filename, result + class ShapefileManager(ImportManager): u""" Shapefile importer @@ -351,7 +359,7 @@ class ShapefileManager(ImportManager): srid = settings.CHIMERE_EPSG_DISPLAY_PROJECTION msg = _(u"SRID cannot be guessed. The default SRID (%s) has " u"been used.") % srid - #If imported items are not well located " + # If imported items are not well located " # u"ask your data provider for the SRID to use.") % srid shapefilename = tmpdir + os.sep + sources[0] ds = DataSource(shapefilename) @@ -374,7 +382,7 @@ class ShapefileManager(ImportManager): u"is not managed by Chimère.") % lyr.geom_type) geom_key = 'point' if lyr.geom_type == 'Point' else 'route' geom_cls = Marker if lyr.geom_type == 'Point' else Route - indexes = [] + # indexes = [] for idx, feat in enumerate(lyr): name = unicode(idx) if lbl_name: @@ -384,7 +392,7 @@ class ShapefileManager(ImportManager): except UnicodeDecodeError: try: name = unicode( - name.decode(settings.CHIMERE_SHAPEFILE_ENCODING)) + name.decode(settings.CHIMERE_SHAPEFILE_ENCODING)) except: continue try: @@ -393,15 +401,17 @@ class ShapefileManager(ImportManager): return (0, 0, _(u"Bad Shapefile")) if feat.geom.geom_type == 'MultiLineString': geoms = [geom.wkt for geom in feat.geom] - import_key = feat.get(id_name) if id_name and len(geoms) == 1 else '' + import_key = feat.get(id_name) if id_name and len(geoms) == 1 \ + else '' for geom in geoms: - dct = {geom_key:'SRID=%s;%s' % (srid, geom), - 'name':name, - 'origin':self.importer_instance.origin, - 'license':self.importer_instance.license - } + dct = { + geom_key: 'SRID=%s;%s' % (srid, geom), + 'name': name, + 'origin': self.importer_instance.origin, + 'license': self.importer_instance.license + } item, updated, created = self.create_or_update_item( - geom_cls, dct, import_key) + geom_cls, dct, import_key) if updated: updated_item += 1 if created: @@ -426,8 +436,9 @@ class ShapefileManager(ImportManager): tmp_name = tmp.name field_names = [field.name for field in queryset.model._meta.fields] - geo_field = getattr(queryset.model, - 'point' if 'point' in field_names else 'route')._field + geo_field = getattr( + queryset.model, + 'point' if 'point' in field_names else 'route')._field dr = ogr.GetDriverByName('ESRI Shapefile') ds = dr.CreateDataSource(tmp_name) @@ -453,7 +464,7 @@ class ShapefileManager(ImportManager): feat = ogr.Feature(feature_def) feat.SetField('name', str(unicode_normalize(item.name)[:80])) feat.SetField('category', - str(unicode_normalize(category.name)[:80])) + str(unicode_normalize(category.name)[:80])) geom = getattr(item, geo_field.name) if not geom: @@ -479,6 +490,7 @@ class ShapefileManager(ImportManager): buff.close() return filename, zip_stream + class CSVManager(ImportManager): u""" CSV importer @@ -489,9 +501,8 @@ class CSVManager(ImportManager): # (label, getter, setter) COLS = [("Id", 'pk', 'pk'), (_(u"Name"), 'name', 'name'), - (_(u"Categories"), lambda obj:", ".join( - [c.name for c in obj.categories.all()]), - set_categories), + (_(u"Categories"), lambda obj: ", ".join( + [c.name for c in obj.categories.all()]), set_categories), (_(u"State"), 'status', lambda x: x), (_(u"Description"), 'description', 'description'), (_(u"Localisation"), 'geometry', 'geometry')] @@ -514,11 +525,11 @@ class CSVManager(ImportManager): prop_cols = [] for pm in Marker.all_properties(): prop_cols.append((pm.name, pm.getAttrName(), - pm.getAttrName()+'_set')) + pm.getAttrName() + '_set')) cols = list(self.COLS) + prop_cols - datas = [] + # datas = [] for idx, row in enumerate(reader): - if not idx: # first row + if not idx: # first row try: assert(len(row) >= len(cols)) except AssertionError: @@ -526,16 +537,17 @@ class CSVManager(ImportManager): continue if len(row) < len(cols): continue - pk, name, cats, state = row[0], row[1], row[2], row[3] + # pk, name, cats, state = row[0], row[1], row[2], row[3] + pk, name = row[0], row[1] geom = row[5] description = '' if self.importer_instance.get_description: description = row[4] COL_INDEX = 6 - dct = {'description':description, - 'name':name, - 'origin':self.importer_instance.origin, - 'license':self.importer_instance.license} + dct = {'description': description, + 'name': name, + 'origin': self.importer_instance.origin, + 'license': self.importer_instance.license} cls = None if 'POINT' in geom: cls = Marker @@ -546,8 +558,8 @@ class CSVManager(ImportManager): else: continue import_key = pk if pk else name.decode('utf-8') - item, updated, created = self.create_or_update_item(cls, dct, - import_key, pk=pk) + item, updated, created = self.create_or_update_item( + cls, dct, import_key, pk=pk) if updated: updated_item += 1 if created: @@ -555,17 +567,17 @@ class CSVManager(ImportManager): for idx, col in enumerate(cols[COL_INDEX:]): name, getter, setter_val = col setter = getattr(item, setter_val) - val = row[idx+COL_INDEX] + val = row[idx + COL_INDEX] setter(item, val) return (new_item, updated_item, msg) @classmethod def export(cls, queryset): - dct = {'description':unicode(datetime.date.today()), 'data':[]} - cls_name = queryset.model.__name__.lower() + dct = {'description': unicode(datetime.date.today()), 'data': []} + # cls_name = queryset.model.__name__.lower() cols = list(cls.COLS) for pm in queryset.model.all_properties(): - cols.append((pm.name, pm.getAttrName(), pm.getAttrName()+'_set')) + cols.append((pm.name, pm.getAttrName(), pm.getAttrName() + '_set')) header = [col[0] for col in cols] dct['data'].append(header) for item in queryset.all(): @@ -576,11 +588,12 @@ class CSVManager(ImportManager): else: data.append(getattr(item, attr)) dct['data'].append(data) - filename = unicode_normalize(settings.PROJECT_NAME + dct['description']\ + filename = unicode_normalize(settings.PROJECT_NAME + dct['description'] + '.csv') result = render_to_response('chimere/export.csv', dct) return filename, result + class GeoRSSManager(ImportManager): u""" RSS importer. @@ -596,19 +609,19 @@ class GeoRSSManager(ImportManager): - number of item updated ; - error detail on error """ - from models import Marker + from models import Marker, Route new_item, updated_item, msg = 0, 0, '' feed = feedparser.parse(self.importer_instance.source) - if feed['bozo'] and not isinstance(feed['bozo_exception'], - feedparser.CharacterEncodingOverride): + if feed['bozo'] and not isinstance( + feed['bozo_exception'], feedparser.CharacterEncodingOverride): return (0, 0, _(u"RSS feed is not well formed")) for item in feed['items']: if "georss_point" not in item and 'georss_line' not in item \ and not ("geo_lat" in item and "geo_long" in item): continue cls = None - dct = {'origin':self.importer_instance.origin, - 'license':self.importer_instance.license} + dct = {'origin': self.importer_instance.origin, + 'license': self.importer_instance.license} if 'georss_point' in item or "geo_lat" in item: cls = Marker if 'georss_point' in item: @@ -630,11 +643,11 @@ class GeoRSSManager(ImportManager): points = item['georss_line'].split(' ') reordered_points = [] # lat, lon -> x, y - for idx in xrange(len(points)/2): - reordered_points.append("%s %s" % (points[idx*2+1], - points[idx*2])) + for idx in xrange(len(points) / 2): + reordered_points.append("%s %s" % (points[idx * 2 + 1], + points[idx * 2])) dct['route'] = 'SRID=4326;LINESTRING(%s)' % \ - ",".join(reordered_points) + ",".join(reordered_points) dct['name'] = item['title'] pl_id = item['id'] if 'id' in item else item['title'] @@ -649,6 +662,7 @@ RE_HOOK = re.compile('\[([^\]]*)\]') # TODO: manage deleted item from OSM + class OSMManager(ImportManager): u""" OSM importer/exporter @@ -666,8 +680,8 @@ class OSMManager(ImportManager): - updated items; - error detail on error. """ - source, msg = self.get_source_file(['.osm'], - extra_url=self.importer_instance.filtr) + source, msg = self.get_source_file( + ['.osm'], extra_url=self.importer_instance.filtr) if not source: return (0, 0, msg) @@ -680,8 +694,8 @@ class OSMManager(ImportManager): return 0, 0, _(u"Nothing to import") def import_ways(self, tree): - from chimere.models import Marker, Route - msg, items, new_item, updated_item = "", [], 0 , 0 + from chimere.models import Route + msg, items, new_item, updated_item = "", [], 0, 0 nodes = {} for node in tree.xpath('//node'): node_id = node.attrib.get('id') @@ -703,17 +717,17 @@ class OSMManager(ImportManager): points.append(item.get('ref')) if not points: continue - wkt = 'SRID=4326;LINESTRING(%s)' % ",".join([nodes[point_id] - for point_id in points if point_id in nodes]) - dct = {'route':wkt, - 'name':name, - 'origin':self.importer_instance.origin \ - or u'OpenStreetMap.org', - 'license':self.importer_instance.license \ - or u'ODbL', - 'import_version':version} + wkt = 'SRID=4326;LINESTRING(%s)' % ",".join( + [nodes[point_id] for point_id in points if point_id in nodes]) + dct = {'route': wkt, + 'name': name, + 'origin': self.importer_instance.origin + or u'OpenStreetMap.org', + 'license': self.importer_instance.license + or u'ODbL', + 'import_version': version} item, updated, created = self.create_or_update_item( - Route, dct, node_id, version) + Route, dct, node_id, version) if updated: updated_item += 1 if created: @@ -723,7 +737,7 @@ class OSMManager(ImportManager): def import_nodes(self, tree): from chimere.models import Marker - msg, items, new_item, updated_item = "", [], 0 , 0 + msg, items, new_item, updated_item = "", [], 0, 0 for node in tree.xpath('//node'): name = None node_id = node.attrib.get('id') @@ -736,15 +750,15 @@ class OSMManager(ImportManager): name = item.attrib.get('v') point = 'SRID=4326;POINT(%s %s)' % (node.get('lon'), node.get('lat')) - dct = {'point':point, - 'name':name, - 'origin':self.importer_instance.origin \ - or u'OpenStreetMap.org', - 'license':self.importer_instance.license \ - or u'ODbL', - 'import_version':version} + dct = {'point': point, + 'name': name, + 'origin': self.importer_instance.origin + or u'OpenStreetMap.org', + 'license': self.importer_instance.license + or u'ODbL', + 'import_version': version} item, updated, created = self.create_or_update_item( - Marker, dct, node_id, version) + Marker, dct, node_id, version) if updated: updated_item += 1 if created: @@ -779,8 +793,8 @@ class OSMManager(ImportManager): username = username.encode('latin1') password = password.encode('latin1') api = OsmApi.OsmApi(api=api, username=username, password=password) - api.ChangesetCreate({u"comment": u"Import from Chimère %s" % \ - get_version()}) + api.ChangesetCreate({u"comment": u"Import from Chimère %s" % + get_version()}) hooks = RE_HOOK.findall(self.importer_instance.filtr) if not hooks: hooks = RE_HOOK.findall(self.importer_instance.source) @@ -794,28 +808,31 @@ class OSMManager(ImportManager): continue if key == 'bbox': x1, y1, x2, y2 = [float(val) for val in value.split(',')] - bbox = GEOSGeometry( + bbox = GEOSGeometry( 'POLYGON((%f %f,%f %f,%f %f,%f %f,%f %f))' % ( - x1, y1, x2, y1, x2, y2, x1, y2, x1, y1), srid=4326) + x1, y1, x2, y1, x2, y2, x1, y2, x1, y1), srid=4326) continue tags[key] = value if not tags: return 0, _(u"No non ambigious tag is defined in the XAPI request") if not bbox: - return 0, _(u"No bounding box is defined in the XAPI request."\ - u"If you are sure to manage the entire planet set the bounding box"\ - u" to -180,-90,180,90") - default_dct = {'tag':tags, - 'import_source':self.importer_instance.source} + return 0, _( + u"No bounding box is defined in the XAPI request." + u"If you are sure to manage the entire planet set the " + u"bounding box to -180,-90,180,90") + default_dct = {'tag': tags, + 'import_source': self.importer_instance.source} idx = -1 - for idx, item in enumerate(Marker.objects.filter(status='A', - point__contained=bbox, - categories=self.importer_instance.categories.all(), - not_for_osm=False, modified_since_import=True, - route=None).all()): + for idx, item in enumerate( + Marker.objects.filter( + status='A', + point__contained=bbox, + categories=self.importer_instance.categories.all(), + not_for_osm=False, modified_since_import=True, + route=None).all()): dct = default_dct.copy() - dct.update({'lon':item.point.x, - 'lat':item.point.y}) + dct.update({'lon': item.point.x, + 'lat': item.point.y}) dct['tag']['name'] = item.name node = None import_key = item.get_key('OSM') @@ -830,7 +847,7 @@ class OSMManager(ImportManager): if error.status == 404: dct.pop('id') dct.pop('version') - pass # if the node doesn't exist it is created + pass # if the node doesn't exist it is created else: raise if not updated: @@ -839,20 +856,23 @@ class OSMManager(ImportManager): item.import_version = node['version'] item.save() api.ChangesetClose() - return idx+1, None + return idx + 1, None + -import urllib2, chardet, HTMLParser +import chardet +import HTMLParser from BeautifulSoup import BeautifulSoup -from lxml import etree + RE_CLEANS = ((re.compile('(\n)*|^( )*(\n)*( )*|( )*(\n)*( )*$'), ''), (re.compile(' ( )*'), ' '), (re.compile(r"""<a href=["'](?!https?)(.*)["']"""), - '<a href="%(base_url)s\\1"'), + '<a href="%(base_url)s\\1"'), ) from calendar import TimeEncoding, month_name + def get_month_name(month_no, locale): with TimeEncoding(locale) as encoding: s = month_name[month_no] @@ -860,62 +880,62 @@ def get_month_name(month_no, locale): s = s.decode(encoding) return s -MONTH_NAMES = {locale:[get_month_name(no_month, locale+'.UTF-8') - for no_month in xrange(1, 13)] for locale in ['fr_FR']} +MONTH_NAMES = {locale: [get_month_name(no_month, locale + '.UTF-8') + for no_month in xrange(1, 13)] for locale in ['fr_FR']} try: - UNI_MONTH_NAMES = {locale:[m.decode('utf-8') for m in MONTH_NAMES[locale]] - for locale in MONTH_NAMES} + UNI_MONTH_NAMES = {locale: [m.decode('utf-8') for m in MONTH_NAMES[locale]] + for locale in MONTH_NAMES} except UnicodeEncodeError: - UNI_MONTH_NAMES = {locale:[m for m in MONTH_NAMES[locale]] - for locale in MONTH_NAMES} - -DATE_PARSINGS = {'fr_FR':[ - re.compile(r'(?P<day1>\d{1,2}) '\ - r'(?P<month1>'+ '|'.join(UNI_MONTH_NAMES['fr_FR']) +') '\ - r'(?P<year1>\d{4})?[^\d]*'\ - r'(?P<day2>\d{1,2}) '\ - r'(?P<month2>'+ '|'.join(UNI_MONTH_NAMES['fr_FR']) +') *'\ - r'(?P<year2>\d{4})?.*'), - re.compile(r'(?P<day1>\d{1,2}) '\ - r'(?P<month1>'+ '|'.join(UNI_MONTH_NAMES['fr_FR']) +') *'\ - r'(?P<year1>\d{4})?') - ], - 'en':[ - re.compile(r'(?P<year1>\d{4})-'\ - r'(?P<month1>\d{2})-'\ - r'(?P<day1>\d{2})'\ - r'(?:T'\ - r'(?P<hour1>\d{2})?:'\ - r'(?P<minut1>\d{2})?:'\ - r'(?P<second1>\d{2})'\ - r')?.*'\ - r'(?P<year2>\d{4})-'\ - r'(?P<month2>\d{2})-'\ - r'(?P<day2>\d{2})'\ - r'(?:T'\ - r'(?P<hour2>\d{2})?:'\ - r'(?P<minut2>\d{2})?:'\ - r'(?P<second2>\d{2})'\ - r')?.*' - ), - re.compile(r'(?P<year1>\d{4})-'\ - r'(?P<month1>\d{2})-'\ - r'(?P<day1>\d{2})'\ - r'(?:T'\ - r'(?P<hour1>\d{2})?:'\ - r'(?P<minut1>\d{2})?:'\ - r'(?P<second1>\d{2})'\ - r')?' - ) - ], - } + UNI_MONTH_NAMES = {locale: [m for m in MONTH_NAMES[locale]] + for locale in MONTH_NAMES} + +DATE_PARSINGS = { + 'fr_FR': [ + re.compile(r'(?P<day1>\d{1,2}) ' + r'(?P<month1>' + '|'.join(UNI_MONTH_NAMES['fr_FR']) + ') ' + r'(?P<year1>\d{4})?[^\d]*' + r'(?P<day2>\d{1,2}) ' + r'(?P<month2>' + '|'.join(UNI_MONTH_NAMES['fr_FR']) + ') *' + r'(?P<year2>\d{4})?.*'), + re.compile(r'(?P<day1>\d{1,2}) ' + r'(?P<month1>' + '|'.join(UNI_MONTH_NAMES['fr_FR']) + ') * ' + r'(?P<year1>\d{4})?')], + 'en': [ + re.compile(r'(?P<year1>\d{4})-' + r'(?P<month1>\d{2})-' + r'(?P<day1>\d{2})' + r'(?:T' + r'(?P<hour1>\d{2})?:' + r'(?P<minut1>\d{2})?:' + r'(?P<second1>\d{2})' + r')?.*' + r'(?P<year2>\d{4})-' + r'(?P<month2>\d{2})-' + r'(?P<day2>\d{2})' + r'(?:T' + r'(?P<hour2>\d{2})?:' + r'(?P<minut2>\d{2})?:' + r'(?P<second2>\d{2})' + r')?.*'), + re.compile(r'(?P<year1>\d{4})-' + r'(?P<month1>\d{2})-' + r'(?P<day1>\d{2})' + r'(?:T' + r'(?P<hour1>\d{2})?:' + r'(?P<minut1>\d{2})?:' + r'(?P<second1>\d{2})' + r')?')], +} + def clean_field(value): return value.strip() + class HtmlXsltManager(ImportManager): PARSER = 'HTMLParser' + def get(self): u""" Get data from the source @@ -939,7 +959,7 @@ class HtmlXsltManager(ImportManager): soup = BeautifulSoup(data) main_page = soup.prettify() # convert it to valid XHTML - #doc, errors = tidy_document(main_page) + # doc, errors = tidy_document(main_page) doc = main_page dom = etree.HTML(doc, getattr(etree, self.PARSER)()) try: @@ -963,8 +983,8 @@ class HtmlXsltManager(ImportManager): base_url = u"/".join(self.importer_instance.source.split(u'/')[:-1]) base_url += u"/" for item in newdom.getroot(): - c_item = {child.tag:clean_field(child.text) - for child in item.getchildren() if child.text} + c_item = {child.tag: clean_field(child.text) + for child in item.getchildren() if child.text} # try to have more information on the linked page if transform_child and 'link' in c_item: # not an absolute address @@ -985,8 +1005,8 @@ class HtmlXsltManager(ImportManager): child_dom = etree.HTML(child_page, etree.HTMLParser()) extra_keys = transform_child(child_dom).getroot() if len(extra_keys): - c_item.update({extra.tag:etree.tostring(extra) - for extra in extra_keys[0].getchildren()}) + c_item.update({extra.tag: etree.tostring(extra) + for extra in extra_keys[0].getchildren()}) items.append(c_item) # change relative link to full link, simplify, unescape HTML entities html_unescape = HTMLParser.HTMLParser().unescape @@ -994,7 +1014,7 @@ class HtmlXsltManager(ImportManager): for k in item: val = item[k] for r, replaced in RE_CLEANS: - val = re.sub(r, replaced % {'base_url':base_url}, val) + val = re.sub(r, replaced % {'base_url': base_url}, val) item[k] = html_unescape(val) self.key_categories = self.importer_instance.get_key_category_dict() self.missing_cats = set() @@ -1003,9 +1023,10 @@ class HtmlXsltManager(ImportManager): self.add_dct_item(item) msg = '' if self.missing_cats: - msg = _(u"Names \"%s\" doesn't match existing categories. " - u"Modify the import to match theses names with categories.") % ( - u'", "'.join(self.missing_cats)) + msg = _( + u"Names \"%s\" doesn't match existing categories. " + u"Modify the import to match theses names with categories.") %\ + (u'", "'.join(self.missing_cats)) return (self.new_item, self.updated_item, msg) @classmethod @@ -1042,18 +1063,18 @@ class HtmlXsltManager(ImportManager): if not m: continue values = m.groupdict() - date = self._internal_parse_date(locale, - 'year1' in values and values['year1'], - values['month1'], values['day1']) + date = self._internal_parse_date( + locale, 'year1' in values and values['year1'], + values['month1'], values['day1']) if not date: continue dct['start_date'] = date has_dates = True if 'day2' not in values: break - date = self._internal_parse_date(locale, - 'year2' in values and values['year2'], - values['month2'], values['day2']) + date = self._internal_parse_date( + locale, 'year2' in values and values['year2'], + values['month2'], values['day2']) if date: dct['end_date'] = date break @@ -1061,14 +1082,14 @@ class HtmlXsltManager(ImportManager): def add_dct_item(self, item): if not self.importer_instance.default_localisation and \ - not "point" in item and not ("lat" in item and item['lat']): + "point" not in item and not ("lat" in item and item['lat']): return cls = None - dct = {'origin':"<a href='%s' target='_blank'>%s</a>" % ( - item.get('link') or '#', - self.importer_instance.origin), - 'license':self.importer_instance.license, - 'name':item['name']} + dct = { + 'origin': "<a href='%s' target='_blank'>%s</a>" % ( + item.get('link') or '#', self.importer_instance.origin), + 'license': self.importer_instance.license, + 'name': item['name']} category = None if 'category' in item and item['category']: if item['category'] in self.key_categories: @@ -1095,5 +1116,6 @@ class HtmlXsltManager(ImportManager): if created: self.new_item += 1 + class XMLXsltManager(HtmlXsltManager): PARSER = 'XMLParser' |