#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Copyright (C) 2012-2017  Étienne Loks  <etienne.loks_AT_peacefrogsDOTnet>

# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.

# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.

# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.

# See the file COPYING for details.

"""
Utilitaries
"""

from copy import deepcopy
import csv
import collections
import datetime
import feedparser
import html
import io
import json
import os
from tidylib import tidy_document
import re
import tempfile
import urllib
import unicodedata
import zipfile

from osgeo import ogr, osr
from osmapi import OsmApi
from lxml import etree

from django.conf import settings
from django.contrib.gis.gdal import DataSource, OGRGeomType, check_err
from django.contrib.gis.geos import GEOSGeometry
from django.core.exceptions import ObjectDoesNotExist
from django.shortcuts import render_to_response
from django.utils.translation import ugettext_lazy as _

from chimere import get_version


def unicode_normalize(string):
    return ''.join(
        (c for c in unicodedata.normalize('NFD', string)
         if unicodedata.category(c) not in ('Mn', 'Sm', 'Sc')))


class ImportManager(object):
    """
    Generic class for specific importers
    """
    default_source = None

    def __init__(self, importer_instance):
        self.importer_instance = importer_instance
        if self.importer_instance.default_name:
            self.default_name = self.importer_instance.default_name
        else:
            self.default_name = " - ".join([
                cat.name
                for cat in self.importer_instance.categories.order_by(
                    'name').all()])

    def get(self):
        raise NotImplementedError

    def put(self, extra_args={}):
        raise NotImplementedError

    def create_or_update_item(self, cls, values, import_key, version=None,
                              key='', pk=None, category=None):
        from chimere.models import PropertyModel
        updated, created, item = False, False, None
        import_key = str(import_key).replace(':', '^')
        if not values.get('name'):
            values['name'] = self.default_name
        if not key:
            key = self.importer_instance.importer_type
        item = None
        pms = [pm["slug"] for pm in PropertyModel.objects.values('slug').all()]
        properties = {}
        for k in values.keys():
            if k in pms:
                properties[k] = values.pop(k)
        if import_key or pk:
            dct_import = {
                'import_key__icontains': '%s:%s;' % (key, import_key),
                'import_source': self.importer_instance.source}
            ref_item = cls.objects.filter(**dct_import)
            try:
                item = None
                if pk:
                    ref_item = cls.objects.get(pk=pk)
                else:
                    ref_item = cls.objects.filter(**dct_import)
                    if not ref_item.count():
                        raise ObjectDoesNotExist
                    ref_item = ref_item.all()[0]
                if version and ref_item.import_version == int(version):
                    # no update since the last import
                    return ref_item, None, None
                if not self.importer_instance.overwrite \
                   and ref_item.modified_since_import:
                    return ref_item, None, None
                else:
                    item = ref_item
                    for k in values:
                        if values[k]:
                            setattr(item, k, values[k])
                    try:
                        item.save()
                        # force the modified_since_import status
                        item.modified_since_import = False
                        item.save()
                    except TypeError:
                        # error on data source
                        return None, False, False
                    updated = True
            except ObjectDoesNotExist:
                pass
        if not item:
            if not self.importer_instance.get_description and \
               self.importer_instance.default_description:
                values['description'] = \
                    self.importer_instance.default_description
            values.update({
                'import_source': self.importer_instance.source})
            values['status'] = self.importer_instance.default_status
            item = cls.objects.create(**values)
            item.modified_since_import = False
            item.save()
            try:
                item = cls.objects.create(**values)
                item.modified_since_import = False
                item.save()
            except TypeError:
                # error on data source
                return None, False, False
            created = True
        if import_key:
            item.set_key(key, import_key)
        item.categories.clear()
        if category:
            item.categories.add(category)
        else:
            for cat in self.importer_instance.categories.all():
                item.categories.add(cat)
        for prop in properties:
            item.setProperty(prop, properties[prop])
        return item, updated, created

    @classmethod
    def get_files_inside_zip(cls, zippedfile, suffixes, dest_dir=None):
        try:
            flz = zipfile.ZipFile(zippedfile)
        except zipfile.BadZipfile:
            return [], _("Bad zip file")
        namelist = flz.namelist()
        filenames = []
        for suffix in suffixes:
            current_file_name = None
            for name in namelist:
                if name.endswith(suffix) \
                        or name.endswith(suffix.lower()) \
                        or name.endswith(suffix.upper()):
                    current_file_name = name
            filenames.append(current_file_name)
        files = []
        for filename in filenames:
            if filename:
                if dest_dir:
                    files.append(filename)
                    flz.extract(filename, dest_dir)
                else:
                    files.append(flz.open(filename))
            else:
                files.append(None)
        return files

    def get_source_file(self, suffixes, dest_dir=None,
                        extra_url=None):
        source = self.importer_instance.source_file
        try:
            source.read
        except ValueError:
            if not source:
                source = self.importer_instance.source \
                    if self.importer_instance.source else self.default_source
            try:
                url = source
                if extra_url:
                    url += extra_url
                remotehandle = urllib.request.urlopen(url)
                source = io.BytesIO(remotehandle.read())
                remotehandle.close()
            except ValueError:
                # assume it is a local file
                try:
                    source = open(source)
                except IOError as msg:
                    return (None, msg)
            except (urllib.error.URLError, AttributeError) as error:
                return (None, str(error))
        if self.importer_instance.zipped:
            try:
                files = self.get_files_inside_zip(
                    self.importer_instance.source_file or
                    self.importer_instance.source , suffixes, dest_dir)
            except zipfile.BadZipfile:
                return (None, _("Bad zip file"))
            if not files or None in files or [] in files:
                return (None,
                        _("Missing file(s) inside the zip file"))
            source = files[0] if len(suffixes) == 1 else files
        return (source, None)


class KMLManager(ImportManager):
    """
    KML importer
    The filtr argument has to be defined as the exact name of the folder to be
    imported
    """
    XPATH = '//kml:Folder/kml:name[text()="%s"]/../kml:Placemark'
    DEFAULT_XPATH = '//kml:Placemark'

    def __init__(self, importer_instance, ns=''):
        super(KMLManager, self).__init__(importer_instance)
        self.ns = ns

    def get(self):
        """
        Get data from a KML source

        Return a tuple with:
         - number of new item ;
         - number of item updated ;
         - error detail on error
        """
        from chimere.models import Marker, Route
        new_item, updated_item, msg = 0, 0, ''
        source, msg = self.get_source_file(['.kml'])
        if msg:
            return (0, 0, msg)
        doc = source
        # remove empty lines before declaration (bad XML file)
        if hasattr(source, 'getvalue'):
            splitted = source.getvalue().decode('utf-8').split('\n')
            for idx, line in enumerate(splitted):
                if line.strip():
                    break
            doc = io.BytesIO("\n".join(splitted[idx:]).encode('utf-8'))
        try:
            tree = etree.parse(doc)
        except:
            return (0, 0, _("Bad XML file"))
        # try to get default namespace
        if not self.ns:
            self.ns = tree.getroot().nsmap[None]
        xpath = self.XPATH % self.importer_instance.filtr \
            if self.importer_instance.filtr else self.DEFAULT_XPATH
        for placemark in tree.xpath(xpath,
                                    namespaces={'kml': self.ns}):
            name, point, line = None, None, None
            pl_id = placemark.attrib.get('id')
            pl_key = 'kml-%d' % self.importer_instance.pk
            ns = '{%s}' % self.ns
            description = ''
            for item in placemark:
                if item.tag == ns + 'name':
                    name = item.text
                    if not pl_id:
                        # if no ID is provided assume that name is a key
                        pl_id = name
                elif item.tag == ns + 'description':
                    if self.importer_instance.get_description:
                        description = item.text
                elif item.tag == ns + 'Point':
                    for coord in item:
                        if coord.tag == ns + 'coordinates':
                            x, y, z = coord.text.split(',')
                            point = 'SRID=4326;POINT(%s %s)' % (x, y)
                elif item.tag == ns + 'LineString':
                    for coord in item:
                        if coord.tag == ns + 'coordinates':
                            points = coord.text.replace('\n', ' ').split(' ')
                            points = ",".join([" ".join(p.split(',')[:2])
                                               for p in points if p])
                            line = 'SRID=4326;LINESTRING(%s)' % points
            cls = None
            dct = {'description': description,
                   'name': name,
                   'origin': self.importer_instance.origin,
                   'license': self.importer_instance.license}
            if point:
                dct['point'] = point
                cls = Marker
            if line:
                dct['route'] = line
                dct.pop('description')
                cls = Route
            if cls:
                item, updated, created = self.create_or_update_item(
                    cls, dct, pl_id, key=pl_key)
                if updated:
                    updated_item += 1
                if created:
                    new_item += 1
        return (new_item, updated_item, msg)

    @classmethod
    def export(cls, queryset):
        dct = {
            'name': settings.PROJECT_NAME,
            'description': str(datetime.date.today()),
            'locations': queryset.all()
        }
        filename = unicode_normalize(settings.PROJECT_NAME + dct['description']
                                     + '.kml')
        result = render_to_response('chimere/export.kml', dct)
        return filename, result


class ShapefileManager(ImportManager):
    """
    Shapefile importer
    """
    def get(self):
        """
        Get data from a Shapefile source

        Return a tuple with:
         - number of new item ;
         - number of item updated ;
         - error detail on error

        The filtr argument allow to specify match between the shapefile cols
        and the db. JSON format is used.
        """
        from chimere.models import Marker, Route, Polygon
        new_item, updated_item, msg = 0, 0, ''
        tmpdir = tempfile.mkdtemp()
        res = self.get_source_file(['.shp', '.dbf', '.prj', '.shx'],
                                            dest_dir=tmpdir)
        sources, msg = self.get_source_file(['.shp', '.dbf', '.prj', '.shx'],
                                            dest_dir=tmpdir)
        if msg:
            return (0, 0, msg)
        if not sources:
            return (0, 0, _("Error while reading the data source."))
        # get the srid
        srid = self.importer_instance.srid
        if not srid:
            prjfilename = tmpdir + os.sep + sources[2]
            try:
                from osgeo import osr
                with open(prjfilename, 'r') as prj_file:
                    prj_txt = prj_file.read()
                    srs = osr.SpatialReference()
                    srs.ImportFromESRI([prj_txt])
                    srs.AutoIdentifyEPSG()
                    srid = srs.GetAuthorityCode(None)
            except ImportError:
                pass
            if not srid:
                # try with the default projection
                srid = settings.CHIMERE_EPSG_DISPLAY_PROJECTION
                msg = _("SRID cannot be guessed. The default SRID (%s) has "
                        "been used.") % srid
                # If imported items are not well located "
                #        "ask your data provider for the SRID to use.") % srid
        shapefilename = tmpdir + os.sep + sources[0]
        ds = DataSource(shapefilename)
        lyr = ds[0]
        default_dct = {}
        filtr = self.importer_instance.filtr or {}
        if filtr:
            try:
                filtr = json.JSONDecoder().decode(self.importer_instance.filtr)
            except ValueError:
                return (
                    new_item, updated_item,
                    _("Bad configuration: filter must be a valid "
                      "JSON string"))
            for k in ('id',):
                if k not in filtr:
                    return (
                        new_item, updated_item,
                        _("The key \"%s\" is missing in the "
                          "filter.") % k)
            for k in filtr:
                try:
                    ids = lyr.get_fields(k)
                except:
                    return (
                        new_item, updated_item,
                        _("Config: {} is not an appropriate column name "
                          "for this Shapefile. Available columns "
                          " are: {}").format(k, ", ".join(
                              [j for j in lyr.fields])))
            default_dct = {'origin': self.importer_instance.origin,
                           'license': self.importer_instance.license}
            if 'prefix_name' in filtr:
                default_dct['name'] = filtr.pop('prefix_name')
            if 'prefix_description' in filtr:
                default_dct['description'] = filtr.pop('prefix_description')
        else:
            # if no filtr it is assumed that the first field is a
            # id name and the second field is the name
            id_name = lyr.fields[0] if len(lyr.fields) > 0 else None
            # test if id_name is well guess
            if id_name:
                ids = lyr.get_fields(id_name)
                if len(ids) != len(set(ids)):
                    id_name = None
            filtr['id'] = id_name
            if len(lyr.fields) > 1:
                filtr["name"] = lyr.fields[1]
            elif id_name:
                filtr["name"] = id_name

        if lyr.geom_type not in ('Point', 'LineString', 'Polygon'):
            return (0, 0, _("Type of geographic item (%s) of this shapefile "
                            "is not managed by Chimère.") % lyr.geom_type)
        geom_key = ''
        geom_cls = None
        if lyr.geom_type == 'Point':
            geom_key = 'point'
            geom_cls = Marker
        elif lyr.geom_type == 'Polygon':
            geom_key = 'polygon'
            geom_cls = Polygon
        else:
            geom_key = 'route'
            geom_cls = Route
        # indexes = []
        for idx, feat in enumerate(lyr):
            dct = default_dct.copy()
            for k in filtr:
                val = feat.get(k)
                try:
                    val = str(val)
                except UnicodeDecodeError:
                    try:
                        val = str(
                            val.decode(settings.CHIMERE_SHAPEFILE_ENCODING))
                    except:
                        continue
                if filtr[k] not in dct:
                    dct[filtr[k]] = ''
                dct[filtr[k]] += val
            try:
                geoms = [feat.geom.wkt]
            except:
                return (0, 0, _("Bad Shapefile"))
            if feat.geom.geom_type == 'MultiLineString':
                geoms = [geom.wkt for geom in feat.geom]
            import_key = dct.pop('id')
            for geom in geoms:
                dct[geom_key] = 'SRID=%s;%s' % (srid, geom)
                item, updated, created = self.create_or_update_item(
                    geom_cls, dct, import_key)
                if updated:
                    updated_item += 1
                if created:
                    new_item += 1
        # clean up
        tmpdirs = set()
        for src in sources:
            dirs = os.sep.join(src.split(os.sep)[:-1])
            if dirs:
                tmpdirs.add(tmpdir + os.sep + dirs)
            os.remove(tmpdir + os.sep + src)
        for dr in tmpdirs:
            os.removedirs(dr)
        return (new_item, updated_item, msg)

    @classmethod
    def export(cls, queryset):
        date = str(datetime.date.today())

        tmp = tempfile.NamedTemporaryFile(suffix='.shp', mode='w+b')
        tmp.close()

        tmp_name = tmp.name
        field_names = [field.name for field in queryset.model._meta.fields]
        geo_field = getattr(
            queryset.model,
            'point' if 'point' in field_names else 'route')._field

        dr = ogr.GetDriverByName('ESRI Shapefile')
        ds = dr.CreateDataSource(tmp_name)
        if ds is None:
            raise Exception(_('Could not create file!'))
        ogr_type = OGRGeomType(geo_field.geom_type).num
        srs = osr.SpatialReference()
        srs.ImportFromEPSG(geo_field.srid)

        layer = ds.CreateLayer('lyr', srs=srs, geom_type=ogr_type)

        for field_name in ('name', 'category'):
            field_defn = ogr.FieldDefn(str(field_name), ogr.OFTString)
            field_defn.SetWidth(255)
            if layer.CreateField(field_defn) != 0:
                raise Exception(_('Failed to create field'))

        feature_def = layer.GetLayerDefn()

        for item in queryset:
            # duplicate items when in several categories
            q = item.categories
            if not q.count():
                categories = [None]
            else:
                categories = q.all()
            for category in categories:
                feat = ogr.Feature(feature_def)
                feat.SetField('name', str(unicode_normalize(item.name)[:80]))
                if category:
                    feat.SetField('category',
                                  str(unicode_normalize(category.name)[:80]))

                geom = getattr(item, geo_field.name)
                if not geom:
                    continue
                ogr_geom = ogr.CreateGeometryFromWkt(geom.wkt)
                check_err(feat.SetGeometry(ogr_geom))
            check_err(layer.CreateFeature(feat))
        # Cleaning up
        ds.Destroy()

        # writing to a zip file
        filename = unicode_normalize(settings.PROJECT_NAME) + '-' + date
        buff = io.BytesIO()
        zip_file = zipfile.ZipFile(buff, 'w', zipfile.ZIP_DEFLATED)
        suffixes = ['shp', 'shx', 'prj', 'dbf']
        for suffix in suffixes:
            name = tmp_name.replace('.shp', '.' + suffix)
            arcname = '.'.join((filename, suffix))
            zip_file.write(name, arcname=arcname)
        zip_file.close()
        buff.flush()
        zip_stream = buff.getvalue()
        buff.close()
        return filename, zip_stream


class CSVManager(ImportManager):
    """
    CSV importer
    """
    @classmethod
    def set_categories(value):
        return

    # (label, getter, setter)
    COLS = [("Id", 'pk', 'pk'), (_("Name"), 'name', 'name'),
            (_("Categories"), lambda obj: ", ".join(
                [c.name for c in obj.categories.all()]), set_categories),
            (_("State"), 'status', lambda x: x),
            (_("Description"), 'description', 'description'),
            (_("Localisation"), 'geometry', 'geometry')]

    def get(self):
        """
        Get data from a CSV source

        Return a tuple with:
         - number of new item ;
         - number of item updated ;
         - error detail on error
        """
        from chimere.models import Marker, Route
        new_item, updated_item, msg = 0, 0, ''
        source, msg = self.get_source_file(['.csv'])
        if msg:
            return (0, 0, msg)
        reader = csv.reader(source, delimiter=';', quotechar='"')
        prop_cols = []
        for pm in Marker.all_properties():
            prop_cols.append((pm.name, pm.getAttrName(),
                              pm.getAttrName() + '_set'))
        cols = list(self.COLS) + prop_cols
        # datas = []
        for idx, row in enumerate(reader):
            if not idx:  # first row
                try:
                    assert(len(row) >= len(cols))
                except AssertionError:
                    return (0, 0, _("Invalid CSV format"))
                continue
            if len(row) < len(cols):
                continue
            # pk, name, cats, state = row[0], row[1], row[2], row[3]
            pk, name = row[0], row[1]
            geom = row[5]
            description = ''
            if self.importer_instance.get_description:
                description = row[4]
            COL_INDEX = 6
            dct = {'description': description,
                   'name': name,
                   'origin': self.importer_instance.origin,
                   'license': self.importer_instance.license}
            cls = None
            if 'POINT' in geom:
                cls = Marker
                dct['point'] = geom
            elif 'LINE' in geom:
                cls = Route
                dct['route'] = geom
            else:
                continue
            import_key = pk if pk else name.decode('utf-8')
            item, updated, created = self.create_or_update_item(
                cls, dct, import_key, pk=pk)
            if updated:
                updated_item += 1
            if created:
                new_item += 1
            for idx, col in enumerate(cols[COL_INDEX:]):
                name, getter, setter_val = col
                setter = getattr(item, setter_val)
                val = row[idx + COL_INDEX]
                setter(item, val)
        return (new_item, updated_item, msg)

    @classmethod
    def export(cls, queryset):
        dct = {'description': str(datetime.date.today()), 'data': []}
        # cls_name = queryset.model.__name__.lower()
        cols = list(cls.COLS)
        for pm in queryset.model.all_properties():
            cols.append((pm.name, pm.getAttrName(), pm.getAttrName() + '_set'))
        header = [col[0] for col in cols]
        dct['data'].append(header)
        for item in queryset.all():
            data = []
            for (lbl, attr, setr) in cols:
                if callable(attr):
                    data.append(attr(item))
                else:
                    data.append(getattr(item, attr))
            dct['data'].append(data)
        filename = unicode_normalize(settings.PROJECT_NAME + dct['description']
                                     + '.csv')
        result = render_to_response('chimere/export.csv', dct)
        return filename, result


class GeoRSSManager(ImportManager):
    """
    RSS importer.
    This manager only gets and do not produce GeoRSSFeed
    """

    def get(self):
        """
        Get data from a GeoRSS simple source

        Return a tuple with:
         - number of new item ;
         - number of item updated ;
         - error detail on error
        """
        from chimere.models import Marker, Route
        new_item, updated_item, msg = 0, 0, ''
        feed = feedparser.parse(self.importer_instance.source)
        if feed['bozo'] and not isinstance(
                feed['bozo_exception'], feedparser.CharacterEncodingOverride):
            return (0, 0, _("RSS feed is not well formed"))
        # differ with feed parser version
        item_key = 'items'
        if 'entries' in feed:
            item_key = 'entries'
        for item in feed[item_key]:
            if 'where' not in item and "georss_point" not in item \
               and 'georss_line' not in item \
               and not ("geo_lat" in item and "geo_long" in item):
                continue
            cls = None
            dct = {'origin': self.importer_instance.origin,
                   'license': self.importer_instance.license}
            if "where" in item and 'coordinates' in item['where']:
                coord = item['where']['coordinates']
                if item['where']['type'] == 'Point':
                    cls = Marker
                    dct['point'] = 'SRID=4326;POINT(%s %s)' % (
                        coord[0], coord[1])
                elif item['where']['type'] == 'LineString':
                    cls = Route
                    dct['route'] = 'SRID=4326;LINESTRING(%s)' % (
                        ",".join(["{} {}".format(c[0], c[1]) for c in coord]))
                else:
                    continue
            elif 'georss_point' in item or "geo_lat" in item:
                cls = Marker
                if 'georss_point' in item:
                    try:
                        y, x = item['georss_point'].split(' ')
                    except ValueError:
                        continue
                else:
                    y = item['geo_lat']
                    x = item['geo_long']
                dct['point'] = 'SRID=4326;POINT(%s %s)' % (x, y)
            elif "georss_line" in item:
                cls = Route
                points = item['georss_line'].split(' ')
                reordered_points = []
                # lat, lon -> x, y
                for idx in range(int(len(points) / 2)):
                    reordered_points.append("%s %s" % (points[idx * 2 + 1],
                                                       points[idx * 2]))
                dct['route'] = 'SRID=4326;LINESTRING(%s)' % \
                    ",".join(reordered_points)
            else:
                continue
            if self.importer_instance.get_description:
                for k in ['description', 'summary', 'value']:
                    if k in item:
                        dct['description'] = item[k]
                        break

            dct['name'] = item['title']
            pl_id = item['id'] if 'id' in item else item['title']
            it, updated, created = self.create_or_update_item(cls, dct, pl_id)
            if updated:
                updated_item += 1
            if created:
                new_item += 1
        return (new_item, updated_item, msg)


class JsonManager(ImportManager):
    """
    Json importer.
    This manager only gets and do not produce Json feed
    """

    def extract_dict_values(self, item, filtr):
        """
        Extract values from a dict.

        :param item: the source dictionary
        :param filtr: the filter, a dictionary that contains keys or dictionary,
        each dictionary is parsed for each values
        :return: an iterator giving tuple of final keys and values.

        example:
        item = {'comment': {'fr': "Commentaire", 'en': "Comment"},
                'latitude': 1.0, 'longitude': -1.0}
        filtr = {'comment': {'fr': "description"}, 'latitude': 'y',
                 'longitude': 'x'}
        print(list(extract_dict_values(item, filtr)))
        [("description", "Commentaire"), ("y", 1.0), ("x", -1.0)]
        """
        for k in filtr:
            if k not in item:
                continue
            if not isinstance(filtr[k], dict):
                yield filtr[k], item[k]
                continue
            for key, value in self.extract_dict_values(item[k], filtr[k]):
                yield key, value

    def get(self):
        """
        Get data from a json simple source

        Return a tuple with:
         - number of new item ;
         - number of item updated ;
         - error detail on error
        """
        from chimere.models import Marker
        new_item, updated_item, msg = 0, 0, ''
        source, msg = self.get_source_file(['.json'])
        if msg:
            return (0, 0, msg)

        vals = source.read().decode("utf-8").replace('\n', ' ')
        try:
            values = json.JSONDecoder(
                object_pairs_hook=collections.OrderedDict).decode(vals)
        except ValueError as e:
            return (new_item, updated_item,
                    _("JSON file is not well formed: ") + str(e))

        filtr = self.importer_instance.filtr
        # a left part before "{" indicate keys to be used to access to the
        # event list - separated by ";"
        left_part = filtr.split('{')[0]
        if left_part:
            filtr = filtr[len(left_part):]
            for key in left_part.split(';'):
                if key not in values:
                    return (
                        new_item, updated_item,
                        _("Bad filter configuration a key doesn't "
                          "match with json source: ") + key)
                values = values[key]

        # configuration in filtr
        try:
            filtr = json.JSONDecoder().decode(filtr)
        except ValueError:
            return (
                new_item, updated_item,
                _("Bad configuration: filter field must be a valid "
                  "JSON string"))

        # check that mandatory fields are available
        vls = []
        cvalues = filtr.copy()
        while cvalues:
            new_values = {}
            for idx, val in enumerate(cvalues.values()):
                if isinstance(val, dict):
                    for k in val:
                        new_values["{}-{}".format(idx, k)] = val[k]
                else:
                    vls.append(val)
            cvalues = new_values

        for k in ('name', 'id', 'description'):
            if k not in vls:
                return (
                    new_item, updated_item,
                    _("A key must be associated to \"%s\" in the "
                      "filter.") % k)

        default_dct = {'origin': self.importer_instance.origin,
                       'license': self.importer_instance.license,
                       'description': ""}
        if 'prefix_name' in filtr:
            default_dct['name'] = filtr.pop('prefix_name')
        if 'prefix_description' in filtr:
            default_dct['description'] = filtr.pop('prefix_description')
        if self.importer_instance.default_localisation:
            default_dct['point'] = self.importer_instance.default_localisation

        for item in values:
            dct = default_dct.copy()

            for key, value in self.extract_dict_values(item, filtr):
                """
                for k in filtr:
                """
                if key.startswith('prefix_') or key.startswith('suffix_'):
                    continue
                if key == 'external_image':
                    value = '<img src="{}">'.format(value)
                if key not in dct:
                    dct[key] = ""
                else:
                    if key == 'description':
                        dct[key] += "<br/>"
                    else:
                        dct[key] += " "
                dct[key] += str(value) if value else ""

            if 'point' in dct and isinstance(dct['point'], str):
                x, y = dct['point'].split(",")
                dct['point'] = 'SRID=4326;POINT(%s %s)' % (x, y)
            elif 'lat' in dct and dct['lat'] \
                    and 'lon' in dct and dct['lon']:
                dct['point'] = 'SRID=4326;POINT(%s %s)' % (dct.pop('lon'),
                                                           dct.pop('lat'))
            elif 'x' in dct and dct['x'] \
                    and 'y' in dct and dct['y']:
                dct['point'] = 'SRID=4326;POINT(%s %s)' % (dct['x'],
                                                           dct['y'])
            if not dct['point']:
                continue
            # manage prefixes and suffixes
            for k in filtr:
                if k.startswith('prefix_') or k.startswith('suffix_'):
                    pos = k.split('_')[0]
                    key = '_'.join(k.split('_')[1:])
                    if key in dct:
                        if pos == 'prefix':
                            dct[key] = filtr[k] + dct[key]
                        else:
                            dct[key] += filtr[k]

            if 'external_image' in dct:
                dct['description'] = \
                    dct.pop('external_image') + dct['description']

            cls = Marker
            pl_id = (dct.pop('id') if 'id' in dct else dct['name']) \
                + "-" + str(self.importer_instance.pk)

            it, updated, created = self.create_or_update_item(cls, dct, pl_id)
            if updated:
                updated_item += 1
            if created:
                new_item += 1
        return new_item, updated_item, msg

RE_HOOK = re.compile('\[([^\]]*)\]')

# TODO: manage deleted item from OSM


class OSMManager(ImportManager):
    """
    OSM importer/exporter
    The source url is a path to an OSM file or a XAPI url
    The filtr argument is XAPI args or empty if it is an OSM file.
    """
    default_source = settings.CHIMERE_XAPI_URL

    def get(self):
        """
        Get data from the source

        Return a tuple with:
        - new items;
        - updated items;
        - error detail on error.
        """
        source, msg = self.get_source_file(
            ['.osm'], extra_url=self.importer_instance.filtr)
        if not source:
            return (0, 0, msg)

        tree = etree.parse(source)
        # only import node or ways
        if tree.xpath('count(//way)') and tree.xpath('count(//node)'):
            return self.import_ways(tree)
        elif tree.xpath('count(//node)'):
            return self.import_nodes(tree)
        return 0, 0, _("Nothing to import")

    def import_ways(self, tree):
        from chimere.models import Route
        msg, items, new_item, updated_item = "", [], 0, 0
        nodes = {}
        for node in tree.xpath('//node'):
            node_id = node.attrib.get('id')
            for item in node:
                k = item.attrib.get('k')
            if node_id:
                nodes[node_id] = '%s %s' % (node.get('lon'),
                                            node.get('lat'))
        for way in tree.xpath('//way'):
            name = None
            points = []
            node_id = way.attrib.get('id')
            version = way.attrib.get('version')
            for item in way:
                k = item.attrib.get('k')
                if k == 'name':
                    name = item.attrib.get('v')
                if item.tag == 'nd':
                    points.append(item.get('ref'))
            if not points:
                continue
            wkt = 'SRID=4326;LINESTRING(%s)' % ",".join(
                [nodes[point_id] for point_id in points if point_id in nodes])
            dct = {'route': wkt,
                   'name': name,
                   'origin': self.importer_instance.origin
                   or 'OpenStreetMap.org',
                   'license': self.importer_instance.license
                   or 'ODbL',
                   'import_version': version}
            item, updated, created = self.create_or_update_item(
                Route, dct, node_id, version)
            if updated:
                updated_item += 1
            if created:
                new_item += 1
            items.append(item)
        return new_item, updated_item, msg

    def import_nodes(self, tree):
        from chimere.models import Marker
        msg, items, new_item, updated_item = "", [], 0, 0
        for node in tree.xpath('//node'):
            name = None
            node_id = node.attrib.get('id')
            if not node_id:
                continue
            version = node.attrib.get('version')
            for item in node:
                k = item.attrib.get('k')
                if k == 'name':
                    name = item.attrib.get('v')
            point = 'SRID=4326;POINT(%s %s)' % (node.get('lon'),
                                                node.get('lat'))
            dct = {'point': point,
                   'name': name,
                   'origin': self.importer_instance.origin
                   or 'OpenStreetMap.org',
                   'license': self.importer_instance.license
                   or 'ODbL',
                   'import_version': version}
            item, updated, created = self.create_or_update_item(
                Marker, dct, node_id, version)
            if updated:
                updated_item += 1
            if created:
                new_item += 1
            items.append(item)
        return (new_item, updated_item, msg)

    def put(self, extra_args={}):
        # first of all: reimport in order to verify that no changes has been
        # made since the last import
        from chimere.models import Marker
        new_item, updated_item, msg = self.get()
        # check if import is possible
        if msg:
            return 0, msg
        if new_item:
            return 0, _("New items imported - validate them before exporting")
        if Marker.objects.filter(status='I').count():
            return 0, _("There are items from a former import not yet "
                        "validated - validate them before exporting")
        # start import
        api = settings.CHIMERE_OSM_API_URL
        username = settings.CHIMERE_OSM_USER
        password = settings.CHIMERE_OSM_PASSWORD
        if extra_args:
            try:
                api = extra_args['api']
                username = extra_args['username']
                password = extra_args['password']
            except KeyError:
                return 0, _("Bad params - programming error")
        username = username.encode('latin1')
        password = password.encode('latin1')
        api = OsmApi.OsmApi(api=api, username=username, password=password)
        api.ChangesetCreate({"comment": "Import from Chimère %s" %
                             get_version()})
        hooks = RE_HOOK.findall(self.importer_instance.filtr)
        if not hooks:
            hooks = RE_HOOK.findall(self.importer_instance.source)
            if not hooks:
                return 0, _("Bad param")
        tags = {}
        bbox = []
        for hook in hooks:
            key, value = hook.split('=')
            if '*' in value or '|' in key or '|' in value:
                continue
            if key == 'bbox':
                x1, y1, x2, y2 = [float(val) for val in value.split(',')]
                bbox = GEOSGeometry(
                    'POLYGON((%f %f,%f %f,%f %f,%f %f,%f %f))' % (
                        x1, y1, x2, y1, x2, y2, x1, y2, x1, y1), srid=4326)
                continue
            tags[key] = value
        if not tags:
            return 0, _("No non ambigious tag is defined in the XAPI request")
        if not bbox:
            return 0, _(
                "No bounding box is defined in the XAPI request."
                "If you are sure to manage the entire planet set the "
                "bounding box to -180,-90,180,90")
        default_dct = {'tag': tags,
                       'import_source': self.importer_instance.source}
        idx = -1
        for idx, item in enumerate(
                Marker.objects.filter(
                    status='A',
                    point__contained=bbox,
                    categories=self.importer_instance.categories.all(),
                    not_for_osm=False, modified_since_import=True,
                    route=None).all()):
            dct = default_dct.copy()
            dct.update({'lon': item.point.x,
                        'lat': item.point.y})
            dct['tag']['name'] = item.name
            node = None
            import_key = item.get_key('OSM')
            updated = False
            if import_key:
                try:
                    dct['id'] = import_key
                    dct['version'] = item.import_version
                    node = api.NodeUpdate(dct)
                    updated = True
                except OsmApi.ApiError as error:
                    if error.status == 404:
                        dct.pop('id')
                        dct.pop('version')
                        pass  # if the node doesn't exist it is created
                    else:
                        raise
            if not updated:
                node = api.NodeCreate(dct)
                item.set_key('OSM', node['id'])
            item.import_version = node['version']
            item.save()
        api.ChangesetClose()
        return idx + 1, None


import chardet
from html.parser import HTMLParser
from bs4 import BeautifulSoup


RE_CLEANS = ((re.compile('(\n)*|^( )*(\n)*( )*|( )*(\n)*( )*$'), ''),
             (re.compile(' ( )*'), ' '),
             (re.compile(r"""<a href=["'](?!https?)(.*)["']"""),
              '<a href="%(base_url)s\\1"'),
             )

from calendar import month_name, different_locale


def get_month_name(month_no, locale):
    with different_locale(locale):
        return month_name[month_no]

MONTH_NAMES = {locale: [get_month_name(no_month, locale + '.UTF-8')
               for no_month in range(1, 13)] for locale in ['fr_FR']}

UNI_MONTH_NAMES = {locale: [m for m in MONTH_NAMES[locale]]
                   for locale in MONTH_NAMES}

DATE_PARSINGS = {
    'fr_FR': [
        re.compile(r'(?P<day1>\d{1,2}) '
                   r'(?P<month1>' + '|'.join(UNI_MONTH_NAMES['fr_FR']) + ') '
                   r'(?P<year1>\d{4})?[^\d]*'
                   r'(?P<day2>\d{1,2}) '
                   r'(?P<month2>' + '|'.join(UNI_MONTH_NAMES['fr_FR']) + ') *'
                   r'(?P<year2>\d{4})?.*'),
        re.compile(r'(?P<day1>\d{1,2}) '
                   r'(?P<month1>' + '|'.join(UNI_MONTH_NAMES['fr_FR']) + ') * '
                   r'(?P<year1>\d{4})?')],
    'en': [
        re.compile(r'(?P<year1>\d{4})-'
                   r'(?P<month1>\d{2})-'
                   r'(?P<day1>\d{2})'
                   r'(?:T'
                   r'(?P<hour1>\d{2})?:'
                   r'(?P<minut1>\d{2})?:'
                   r'(?P<second1>\d{2})'
                   r')?.*'
                   r'(?P<year2>\d{4})-'
                   r'(?P<month2>\d{2})-'
                   r'(?P<day2>\d{2})'
                   r'(?:T'
                   r'(?P<hour2>\d{2})?:'
                   r'(?P<minut2>\d{2})?:'
                   r'(?P<second2>\d{2})'
                   r')?.*'),
        re.compile(r'(?P<year1>\d{4})-'
                   r'(?P<month1>\d{2})-'
                   r'(?P<day1>\d{2})'
                   r'(?:T'
                   r'(?P<hour1>\d{2})?:'
                   r'(?P<minut1>\d{2})?:'
                   r'(?P<second1>\d{2})'
                   r')?')],
}


def clean_field(value):
    return value.strip()


class HtmlXsltManager(ImportManager):
    PARSER = 'HTMLParser'

    def get(self):
        """
        Get data from the source

        Return a tuple with:
        - new items;
        - updated items;
        - error detail on error.
        """
        from chimere.models import Marker
        self.marker_cls = Marker
        try:
            main_page = urllib.request.urlopen(self.importer_instance.source,
                                               timeout=20)
            assert main_page.getcode() == 200
        except (urllib.error.URLError, AssertionError):
            return (0, 0, _("Source page is unreachable."))
        data = main_page.read()
        encoding = chardet.detect(data)
        data = data.decode(encoding['encoding'])

        if 'HTML' in self.PARSER:
            soup = BeautifulSoup(data)
            main_page = soup.prettify()
            # convert it to valid XHTML
            doc, errors = tidy_document(main_page)
            dom = etree.HTML(doc, getattr(etree, self.PARSER)())
        else:
            main_page = data
            dom = etree.XML(main_page.encode('utf-8'), getattr(
                etree, self.PARSER)())
        try:
            xslt = etree.parse(self.importer_instance.source_file)
            self.importer_instance.source_file.seek(0)
            transform = etree.XSLT(xslt)
        except (etree.XSLTParseError, etree.XMLSyntaxError, TypeError):
            return (0, 0, _("The source file is not a valid XSLT file."))
        newdom = transform(dom)
        items = []
        # load an alternate xslt file to apply to linked page
        transform_child = None
        if self.importer_instance.source_file_alt:
            try:
                alt_xslt = etree.parse(self.importer_instance.source_file_alt)
                self.importer_instance.source_file_alt.seek(0)
                transform_child = etree.XSLT(alt_xslt)
            except (etree.XSLTParseError, etree.XMLSyntaxError, TypeError):
                return (0, 0,
                        _("The alt source file is not a valid XSLT file."))
        base_url = "/".join(self.importer_instance.source.split('/')[:-1])
        base_url += "/"
        for item in newdom.getroot():
            c_item = {child.tag: clean_field(child.text)
                      for child in item.getchildren() if child.text}
            # try to have more information on the linked page
            if transform_child and 'link' in c_item:
                # not an absolute address
                if not c_item['link'].startswith('http://') and \
                   not c_item['link'].startswith('https://'):
                    c_item['link'] = base_url + c_item['link']
                try:
                    child_page = urllib.request.urlopen(c_item['link'])
                    assert child_page.getcode() == 200
                except (urllib.error.URLError, AssertionError):
                    # don't stop the export for a bad link
                    items.append(c_item)
                    continue
                data = child_page.read()
                encoding = chardet.detect(data)
                data = data.decode(encoding['encoding'])
                child_page = BeautifulSoup(data).prettify()
                child_dom = etree.HTML(child_page, etree.HTMLParser())
                extra_keys = transform_child(child_dom).getroot()
                if len(extra_keys):
                    c_item.update({extra.tag: etree.tostring(extra)
                                   for extra in extra_keys[0].getchildren()})
            items.append(c_item)
        # change relative link to full link, simplify, unescape HTML entities
        html_unescape = html.unescape
        for item in items:
            for k in item:
                val = item[k]
                if type(val) == bytes:
                    val = val.decode('utf-8')
                for r, replaced in RE_CLEANS:
                    val = re.sub(r, replaced % {'base_url': base_url}, val)
                item[k] = html_unescape(val)
        self.key_categories = self.importer_instance.get_key_category_dict()
        self.missing_cats = set()
        self.updated_item, self.new_item = 0, 0
        for item in items:
            self.add_dct_item(item)
        msg = ''
        if self.missing_cats:
            msg = _(
                "Names \"%s\" doesn't match existing categories. "
                "Modify the import to match theses names with categories.") %\
                ('", "'.join(self.missing_cats))
        return (self.new_item, self.updated_item, msg)

    @classmethod
    def _internal_parse_date(cls, locale, year, month, day):
        try:
            year = datetime.date.today().year if not year else int(year)
        except ValueError:
            return
        month = month.encode('utf-8')
        if locale in MONTH_NAMES and month in MONTH_NAMES[locale]:
            month = MONTH_NAMES[locale].index(month) + 1
        else:
            try:
                month = int(month)
            except ValueError:
                return
        try:
            day = int(day)
        except ValueError:
            return
        try:
            return datetime.date(year, month, day)
        except ValueError:
            return

    def parse_date(self, date):
        dct = {}
        has_dates = False
        if type(date) == bytes:
            date = date.decode('utf-8')
        for locale in DATE_PARSINGS:
            if has_dates:
                break
            for r in DATE_PARSINGS[locale]:
                if not date:
                    continue
                m = r.search(date)
                if not m:
                    continue
                values = m.groupdict()
                date = self._internal_parse_date(
                    locale, 'year1' in values and values['year1'],
                    values['month1'], values['day1'])
                if not date:
                    continue
                dct['start_date'] = date
                has_dates = True
                if 'day2' not in values:
                    break
                date = self._internal_parse_date(
                    locale, 'year2' in values and values['year2'],
                    values['month2'], values['day2'])
                if date:
                    dct['end_date'] = date
                break
        return dct

    def add_dct_item(self, item):
        if not self.importer_instance.default_localisation and \
                "point" not in item and not ("lat" in item and item['lat']):
            return
        cls = None
        origin = self.importer_instance.origin
        origin_lnk = item.get('link')
        # filter non relevant links
        if origin_lnk and origin_lnk.startswith('http'):
            origin = "<a href='%s' target='_blank'>%s</a>" % (
                origin_lnk, origin)
        dct = {
            'origin': origin,
            'license': self.importer_instance.license,
            'name': item['name']}
        category = None
        if 'category' in item and item['category']:
            if item['category'] in self.key_categories:
                category = self.key_categories[item['category']]
            else:
                self.missing_cats.add(item['category'])
        cls = self.marker_cls
        if 'point' in item:
            x, y = item['point'].split(",")
            dct['point'] = 'SRID=4326;POINT(%s %s)' % (x, y)
        elif 'lat' in item and item['lat']:
            dct['point'] = 'SRID=4326;POINT(%s %s)' % (item['lon'],
                                                       item['lat'])
        else:
            dct['point'] = self.importer_instance.default_localisation
        dct['description'] = item.get('description', '')
        if 'date' in item:
            dct.update(self.parse_date(item['date']))
        if "start_date" in item and item["start_date"]:
            dct['start_date'] = item["start_date"]
            if "end_date" in item and item["end_date"]:
                dct['end_date'] = item["end_date"]
        key = item['key']
        it, updated, created = self.create_or_update_item(cls, dct, key,
                                                          category=category)
        if updated:
            self.updated_item += 1
        if created:
            self.new_item += 1


class XMLXsltManager(HtmlXsltManager):
    PARSER = 'XMLParser'

import icalendar


class IcalManager(ImportManager):
    def get(self):
        """
        Get data from an icalendar source
        """
        from chimere.models import Marker
        new_item, updated_item, msg = 0, 0, ''
        source, msg = self.get_source_file([])
        if msg:
            return (0, 0, msg)

        data = source.read()
        try:
            cal = icalendar.Calendar.from_ical(data)
        except ValueError as e:
            return (new_item, updated_item,
                    _("Error on icalendar parsing: ") + str(e))

        default_dct = {'origin': self.importer_instance.origin,
                       'license': self.importer_instance.license}
        if self.importer_instance.default_localisation:
            default_dct['point'] = self.importer_instance.default_localisation

        for event in cal.walk('VEVENT'):
            dct = default_dct.copy()
            dct['name'] = event.get('SUMMARY', '')
            if dct['name']:
                dct['name'] = str(dct['name'])
            dct['description'] = event.get('DESCRIPTION', '')
            if dct['description']:
                dct['description'] = str(dct['description'])
            loc = event.get('LOCATION', None)
            if loc:
                dct['description'] += "<br/>{}".format(str(loc))
            url = event.get('URL', None)
            if url:
                dct['description'] += "<br/><a href='{}'>{}</a>".format(
                    str(url), str(_('Link')))
            dct['start_date'] = event.get('DTSTART', None)
            if dct['start_date']:
                dct['start_date'] = event.decoded('DTSTART')
            dct['end_date'] = event.get('DTEND', None)
            if dct['end_date']:
                dct['end_date'] = event.decoded('DTEND')
            point = event.get('GEO', None)
            if point:
                dct['point'] = 'SRID=4326;POINT(%s %s)' % (point.longitude,
                                                           point.latitude)

            if not dct.get('point', None):
                continue

            cls = Marker
            pl_id = event.get('UID', None)
            if not pl_id:
                pl_id = dct['name'] + "-" + str(self.importer_instance.pk)
            pl_id += "-" + str(self.importer_instance.pk)
            it, updated, created = self.create_or_update_item(cls, dct, pl_id)
            if updated:
                updated_item += 1
            if created:
                new_item += 1
        return (new_item, updated_item, msg)