#!/usr/bin/env python
# -*- coding: utf-8 -*-

import csv
import datetime
import hashlib
import sys

from django.core.management.base import BaseCommand

from ishtar_common.models import Document

BLOCKSIZE = 65536


def get_hexdigest(filename):
    m = hashlib.sha256()
    with open(filename, 'rb') as afile:
        buf = afile.read(BLOCKSIZE)
        while len(buf) > 0:
            m.update(buf)
            buf = afile.read(BLOCKSIZE)
    return m.hexdigest()


class Command(BaseCommand):
    help = 'Re-associate similar images in the database'

    def add_arguments(self, parser):
        parser.add_argument(
            '--merge-title', type=str, default='', dest='merged-title',
            help='If specified when title differs the given title will be '
                 'used.')
        parser.add_argument(
            '--output-path', type=str, default='', dest='output-path',
            help='Output path for results CSV files. Default to current path.')
        parser.add_argument(
            '--ignore-reference', dest='ignore-reference', action='store_true',
            help='Ignore the reference on diff between models.')
        parser.add_argument(
            '--delete-missing', dest='delete-missing', action='store_true',
            default=False, help='Delete document with missing images.')
        parser.add_argument(
            '--quiet', dest='quiet', action='store_true',
            help='Quiet output.')

    def handle(self, *args, **options):
        quiet = options['quiet']
        ignore_ref = options['ignore-reference']
        delete_missing = options['delete-missing']
        merged_title = options['merged-title']
        output_path = options['output-path']

        q = Document.objects.filter(image__isnull=False).exclude(
            image='')
        hashes = {}
        missing_images = []
        count = q.count()
        out = sys.stdout
        if not quiet:
            out.write("* {} images\n".format(count))
        for idx, doc in enumerate(q.all()):
            if not quiet:
                out.write("\r* hashes calculation: {} %".format(
                    int(float(idx + 1) / count * 100)))
                out.flush()
            path = doc.image.path
            try:
                hexdigest = get_hexdigest(path)
            except IOError:
                missing_images.append(doc.pk)
                continue
            if hexdigest not in hashes:
                hashes[hexdigest] = []
            hashes[hexdigest].append(doc.pk)
        nb_missing_images = len(missing_images)
        if not quiet:
            out.write("\n* {} missing images\n".format(nb_missing_images))

        if missing_images and delete_missing:
            for nb, idx in enumerate(missing_images):
                if not quiet:
                    out.write(
                        "\r* delete document with missing images: {} %".format(
                            int(float(nb + 1) / nb_missing_images * 100)))
                    out.flush()
                doc = Document.objects.get(pk=idx)
                doc.delete()
            if not quiet:
                out.write("\n")

        attributes = [
            'title', 'associated_file', 'internal_reference', 'source_type',
            'support_type', 'format_type', 'scale',
            'authors_raw', 'associated_url', 'receipt_date', 'creation_date',
            'receipt_date_in_documentation', 'item_number', 'description',
            'comment', 'additional_information', 'duplicate'
        ]
        if not ignore_ref:
            attributes.append('reference')

        m2ms = ['authors', 'licenses']

        nb_conflicted_items = 0
        nb_merged_items = 0
        distinct_image = 0
        conflicts = []
        merged = []

        count = len(hashes)

        for idx, hash in enumerate(hashes):
            if not quiet:
                out.write("\r* merge similar images: {} %".format(
                    int(float(idx + 1) / count * 100)))
                out.flush()
            if len(hashes[hash]) < 2:
                distinct_image += 1
                continue
            items = [Document.objects.get(pk=pk) for pk in hashes[hash]]
            ref_item = items[0]
            other_items = items[1:]

            for item in other_items:
                ref_item = Document.objects.get(pk=ref_item.pk)
                conflicted_values = []
                for attr in attributes:
                    ref_value = getattr(ref_item, attr)
                    other_value = getattr(item, attr)
                    if not other_value:
                        continue
                    if ref_value:
                        if other_value != ref_value:
                            if attr == 'title' and merged_title:
                                setattr(ref_item, 'title', merged_title)
                            else:
                                conflicted_values.append(
                                    (attr, ref_value, other_value)
                                )
                    else:
                        setattr(ref_item, attr, other_value)

                base_csv = [
                    ref_item.pk,
                    ref_item.reference.encode('utf-8') if
                    ref_item.reference else "",
                    ref_item.cache_related_label.encode('utf-8') if
                    ref_item.cache_related_label else "",
                    ref_item.image.name.encode('utf-8'),
                    item.pk,
                    item.reference.encode('utf-8') if
                    item.reference else "",
                    item.cache_related_label.encode('utf-8') if
                    item.cache_related_label else "",
                    item.image.name.encode('utf-8'),
                ]
                if conflicted_values:
                    nb_conflicted_items += 1
                    for attr, ref_value, other_value in conflicted_values:
                        conflicts.append(base_csv + [
                            attr, str(ref_value).encode('utf-8'),
                            str(other_value).encode('utf-8')
                        ])
                    continue

                merged.append(base_csv)

                for m2m in m2ms:
                    for m2 in getattr(item, m2m).all():
                        if m2 not in getattr(ref_item, m2m).all():
                            getattr(ref_item, m2m).add(m2)

                for rel_attr in Document.RELATED_MODELS:
                    ref_rel_items = [
                        r.pk for r in getattr(ref_item, rel_attr).all()]
                    for rel_item in getattr(item, rel_attr).all():
                        if rel_item.pk not in ref_rel_items:
                            getattr(ref_item, rel_attr).add(rel_item)

                ref_item.skip_history_when_saving = True
                ref_item.save()
                item.delete()
                nb_merged_items += 1
        if not quiet:
            out.write("\n")

        n = datetime.datetime.now().isoformat().split('.')[0].replace(':', '-')
        if conflicts:
            filename = output_path + "{}-conflict.csv".format(n)
            with open(filename, 'w') as csvfile:
                writer = csv.writer(csvfile)
                writer.writerow(
                    ["Document 1 - pk", "Document 1 - Ref",
                     "Document 1 - related", "Document 1 - image path",
                     "Document 2 - pk", "Document 2 - Ref",
                     "Document 2 - related", "Document 2 - image path",
                     "Attribute", "Document 1 - value", "Document 2 - value"
                     ]
                )
                for conflict in conflicts:
                    writer.writerow(conflict)
            if not quiet:
                out.write("* {} conflicted items ({})\n".format(
                    nb_conflicted_items, filename))
        if merged:
            filename = output_path + "{}-merged.csv".format(n)
            with open(filename, 'w') as csvfile:
                writer = csv.writer(csvfile)
                writer.writerow(
                    ["Document 1 - pk", "Document 1 - Ref",
                     "Document 1 - related", "Document 1 - image path",
                     "Document 2 - pk", "Document 2 - Ref",
                     "Document 2 - related", "Document 2 - image path",
                     ]
                )
                for merge in merged:
                    writer.writerow(merge)
            if not quiet:
                out.write("* {} merged items ({})\n".format(nb_merged_items,
                                                            filename))
        if not quiet:
            out.write("* {} distinct images\n".format(distinct_image))