1 files changed, 402 insertions, 0 deletions
diff --git a/archaeological_operations/import_from_dbf.py b/archaeological_operations/import_from_dbf.py
new file mode 100644
index 000000000..d0f55cdbe
--- /dev/null
+++ b/archaeological_operations/import_from_dbf.py
@@ -0,0 +1,402 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# Copyright (C) 2013  Étienne Loks  <etienne.loks_AT_peacefrogsDOTnet>
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as
+# published by the Free Software Foundation, either version 3 of the
+# License, or (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Affero General Public License for more details.
+
+# You should have received a copy of the GNU Affero General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+# See the file COPYING for details.
+
+"""
+Utils: import archaelogical operation from a DBF file
+"""
+
+from __future__ import unicode_literals
+
+import datetime
+import dbf
+import re
+
+from django.contrib.auth.models import User
+from django.db import transaction
+
+from archaeological_operations.import_from_csv import parse_operationtype, \
+                    parse_multivalue, parse_person as _parse_person, parse_date
+from archaeological_operations.models import Operation, OperationType, Period, \
+                                             AdministrativeAct, ActType
+
+def parse_person(surname, name, owner):
+    return _parse_person(surname, name, None, owner)
+
+def parse_ha(value):
+    try:
+        val = float(value)
+    except ValueError:
+        val = 0
+    return val * 10000
+
+period_types = {
+    '':'not_yet_documented',
+    'IND':'indetermined',
+    'CON': 'contemporan',
+    'MOD': 'modern',
+    'REC': 'recent_times',
+    'BMA': 'low_middle_age',
+    'MAC': 'classic_middle_age',
+    'HMA': 'high_middle_age',
+    'MA' : 'middle_age',
+    'MED': 'middle_age',
+    'BAS': 'low_empire',
+    'HAU': 'high-empire',
+    'NRE': 'republic',
+    'GAL': 'gallo-roman',
+    'FE2': 'second_iron_age',
+    'FE1': 'first_iron_age',
+    'BRF': 'final_bronze_age',
+    'BRM': 'middle_bronze_age',
+    'BRA': 'old_bronze_age',
+    'FER': 'iron_age',
+    'BRO': 'bronze_age',
+    'PRO': 'protohistory',
+    'NEF': 'final_neolithic',
+    'NER': 'recent_neolithic',
+    'NEM': 'middle_neolithic',
+    'NEA': 'old_neolithic',
+    'NEO': 'neolithic',
+    'MER': 'recent_mesolithic',
+    'MEM': 'middle_mesolithic',
+    'MEA': 'old_mesolithic',
+    'MES': 'mesolithic',
+    'PAF': 'final_paleolithic',
+    'PAS': 'late_paleolithic',
+    'PAM': 'middle_paleolithic',
+    'PAA': 'ancien_paleolithic',
+    'PAL': 'paleolithic'
+}
+
+_CACHED_PERIOD_TYPES = {}
+
+def _prepare_period_types():
+    for k in period_types.keys():
+        _CACHED_PERIOD_TYPES[k] = Period.objects.get(txt_idx=period_types[k])
+
+_period_re_filter = re.compile('^EUR')
+_period2_re_filter = re.compile('-*$')
+
+def parse_period(value):
+    value = _period_re_filter.sub('', value)
+    value = _period2_re_filter.sub('', value)
+    if value not in _CACHED_PERIOD_TYPES.keys():
+        value = ''
+    return _CACHED_PERIOD_TYPES[value]
+
+ope_types = {
+    'AET':('other_study',
+           'Autre étude', True),
+    'APP':('assistance_preparation_help',
+           'Aide à la préparation de publication', True),
+    'DOC':('documents_study',
+           'Étude documentaire', True),
+    'EV':('evaluation',
+          "Fouille d'évaluation", True),
+    'FOU':('ancient_excavation',
+           "Fouille ancienne", True),
+    'FP':('prog_excavation',
+          "Fouille programmée", False),
+    'MH':('building_study', "Fouille avant MH", True),
+    'OPD':('diag_prev_excavation',
+           "Opération préventive de diagnostic", True),
+    'PAN':('analysis_program',
+           "Programme d'analyses", False),
+    'PCR':('collective_research_project',
+           "Projet collectif de recherche", False),
+    'PMS':('specialized_eqp_prospection',
+           "Prospection avec matériel spécialisé", False),
+    'PRD':('diachronic_prospection',
+           "Prospection diachronique", False),
+    'PI':('diachronic_prospection',
+           "Prospection diachronique", False),
+    'PRM':('metal_detector_prospection',
+           "Prospection détecteur de métaux", False),
+    'PRT':('thematic_prospection',
+           "Prospection thématique", False),
+    'PT':('thematic_prospection',
+           "Prospection thématique", False),
+    'RAR':('cave_art_record',
+           "Relevé d'art rupestre", False),
+    'SD':('sampling_research',
+          "Sondage", False),
+    'SP':('prev_excavation',
+          "Fouille préventive", True),
+    'SU':('emergency_excavation',
+          "Fouille préventive d'urgence", True),
+}
+
+_CACHED_OPE_TYPES = {}
+
+def _prepare_ope_types():
+    for k in ope_types.keys():
+        txt_idx, label, preventive = ope_types[k]
+        ot, created = OperationType.objects.get_or_create(txt_idx=txt_idx,
+                              defaults={'label':label, 'preventive':preventive})
+        if k not in _CACHED_OPE_TYPES.keys():
+            _CACHED_OPE_TYPES[k] = ot
+
+def parse_patriarche_operationtype(value):
+    if value not in _CACHED_OPE_TYPES.keys():
+        return None
+    return _CACHED_OPE_TYPES[value]
+
+_dpt_re_filter = re.compile('^\([0-9]*\) ')
+
+def parse_ope_name(value):
+    if not value:
+        return ''
+    value = value.strip()
+    if value.lower() == 'null':
+        return ''
+    value = _dpt_re_filter.sub('', value)
+    return value
+
+class Column:
+    def __init__(self, col_models, format, associated_cols=None, multi=False):
+        self.col_models, self.format = col_models, format
+        self.associated_cols, self.multi = associated_cols, multi
+
+
+PATRIARCHE_DBF_OPE_COLS = [
+    Column(('operation_type',), 'parse_patriarche_operationtype'),
+    Column(('common_name',), 'parse_ope_name'),
+    [],
+    Column(('in_charge',), 'parse_person', [2]),
+    [],  # 'etat'
+    Column(('comment',), unicode),  #'adresse'
+    [],  #'origine C(3)'
+    Column(('periods',), 'parse_period', multi=True),
+    [],  #'programme C(254)'
+    [],  # 'rattach_pc C(254)'
+    [],  # 'code_dossi N(8,0)'
+    Column(('administrative_act', 'ref_sra'), unicode),
+    Column(('administrative_act', 'signature_date'), parse_date),
+    Column(('start_date',), parse_date),
+    Column(('end_date',), parse_date),
+    Column(('year',), int, []),
+    [],  # 'identifica C(254)'
+    Column(('code_patriarche',), int),
+    [],  # 'x_degre N(16,6)'),
+    [],  # 'y_degre N(16,6)'),
+    [],  # 'x_saisi C(12)'),
+    [],  # 'y_saisi C(12)'),
+    [],  # 'georeferen C(3)'),
+    [],  # 'geometrie C(3)'),
+    Column(('surface',), parse_ha)
+]
+
+DBF_OPE_COLS = PATRIARCHE_DBF_OPE_COLS
+
+def import_from_dbf(filename, update=False, col_defs=DBF_OPE_COLS,
+                    person=None, stdout=None):
+    """
+    Import from a DBF file.
+    Return number of operation treated and errors.
+    """
+    try:
+        table =  dbf.Table(filename)
+    except (dbf.DbfError, TypeError):
+        return 0, ["Incorrect DBF file."]
+
+    new_ops, errors = import_operations_dbf(table, col_defs=col_defs,
+                                    update=update, person=person, stdout=stdout)
+    return new_ops, errors
+
+ERROR_LBLS = {'missing_ope_type':'* Missing operation type: ',
+              'missing_patriarche_code':'* Missing patriarche code: '}
+
+@transaction.commit_manually
+def import_operations_dbf(values, col_defs=DBF_OPE_COLS, update=False,
+                          person=None, stdout=None):
+    default_person = person or User.objects.order_by('pk').all()[0]
+    # key : (class, default, reverse)
+    key_classes = {
+    'administrative_act':(AdministrativeAct, {'history_modifier':default_person,
+                           'act_type':ActType.objects.get(
+                                      txt_idx='excavation_order')}, 'operation'),
+    }
+    _prepare_ope_types()
+    _prepare_period_types()
+
+    ope_default = {'history_modifier':default_person}
+    current_import = []
+    new_ops = 0
+    errors_nb = {}
+    for error in ERROR_LBLS.keys():
+        errors_nb[error] = 0
+    error_ope, error_reversed, error_multis = [], [], []
+    multi_keys = set([column.col_models[0]
+                      for column in col_defs if column and column.multi])
+    for line_idx, vals in enumerate(values):
+        if stdout:
+            stdout.write("\r* line %d" % (line_idx))
+        if not line_idx:
+            continue # remove header
+        args = {}
+        for col_idx, val in enumerate(vals):
+            if len(col_defs) <= col_idx or not col_defs[col_idx]:
+                continue
+            col_def = col_defs[col_idx]
+            attrs, typ = col_def.col_models, col_def.format
+            extra_cols = col_def.associated_cols
+            if not callable(typ):
+                typ = globals()[typ]
+            c_args = args
+            for attr in attrs:
+                if attr not in c_args:
+                    c_args[attr] = {}
+                c_args = c_args[attr]
+            if not extra_cols:
+                try:
+                    v = typ(val)
+                except TypeError:
+                    v = None
+            else:
+                arguments = [vals[col_number] for col_number in extra_cols]
+                if not [arg for arg in arguments if arg]:
+                    continue
+                arguments += [default_person]
+                v = typ(val, *arguments)
+            if len(attrs) == 1:
+                args[attrs[0]] = v
+            elif len(attrs) == 2:
+                args[attrs[0]][attrs[1]] = v
+            elif len(attrs) == 3:
+                args[attrs[0]][attrs[1]][attrs[2]] = v
+        # manage exploded dates
+        for k in args.keys():
+            if '__year' in k:
+                key = k[:-len('__year')]
+                try:
+                    v = datetime.datetime(args[k], args[key+'__month'],
+                                          args[key+'__day'])
+                    args[key] = v
+                except:
+                    pass
+                args.pop(k)
+                args.pop(key+'__month')
+                args.pop(key+'__day')
+        reversed_items, multis = [], []
+        for k in args.keys():
+            if k in key_classes:
+                cls, default, reverse = key_classes[k]
+                default.update(args[k])
+                if reverse:
+                    reversed_items.append((cls, default, reverse))
+                    args.pop(k)
+                    continue
+                try:
+                    obj = cls.objects.get(**default)
+                except:
+                    obj = cls.objects.create(**default)
+                    obj.save()
+                transaction.commit()
+                args[k] = obj
+            elif type(args[k]) == list or k in multi_keys:
+                multis.append((k, args[k]))
+                args.pop(k)
+        op = None
+        if not update and not args.get('operation_type'):
+            errors_nb['missing_ope_type'] += 1
+            continue
+        try:
+            op = Operation.objects.get(code_patriarche=args['code_patriarche'])
+            if not update and op.pk not in current_import:
+                errors_nb['already_available_patriarche_code'] += 1
+                continue
+        except:
+            pass
+        # check
+        if not args.get('year') and args.get('start_date'):
+            args['year'] = args['start_date'].year
+        updated = False
+        # creation
+        if not op:
+            args.update(ope_default)
+            for k in args.keys():
+                if not args[k]:
+                    args.pop(k)
+            op = Operation.objects.create(**args)
+            new_ops += 1
+            transaction.commit()
+        else: # mise à jour
+            try:
+                for k in args:
+                    if not args[k]:
+                        args[k] = None
+                    #if getattr(op, k):
+                    #    continue
+                    setattr(op, k, args[k])
+                op.save()
+            except:
+                transaction.rollback()
+                continue
+            transaction.commit()
+            updated = True
+        try:
+            for cls, default, reverse in reversed_items:
+                default[reverse] = op
+                it = cls(**default).save()
+        except:
+            transaction.rollback()
+            current_import.append(op.pk)
+            error_reversed.append((line_idx, reversed_items))
+            continue
+        transaction.commit()
+        try:
+            for k, vals in multis:
+                if op.pk not in current_import and updated:
+                    getattr(op, k).clear()
+                if type(vals) not in (list, tuple):
+                    vals = [vals]
+                for v in vals:
+                    getattr(op, k).add(v)
+                    op.save()
+        except:
+            transaction.rollback()
+            current_import.append(op.pk)
+            error_multis.append((line_idx, multis))
+            continue
+        transaction.commit()
+        current_import.append(op.pk)
+
+    errors = []
+    for error_key in errors_nb:
+        nb_error = errors_nb[error_key]
+        if nb_error:
+            errors.append(ERROR_LBLS[error_key] + str(nb_error))
+    if error_ope:
+        error = "Error while recording theses operations:\n"
+        for line_idx, args in error_ope:
+            error += "line: " + str(line_idx) + " args: " + str(args) + '\n'
+        errors.append(error)
+    if error_multis:
+        error = "Error while recording theses multiples items attached to "\
+                "operation:"
+        for line_idx, args in error_multis:
+            error += "line: " + str(line_idx) + " args: " + str(args) + '\n'
+        errors.append(error)
+    if error_reversed:
+        error = "Error while recording theses items that depend to operation:"
+        for line_idx, args in error_reversed:
+            error += "line: " + str(line_idx) + " args: " + str(args) + '\n'
+        errors.append(error)
+    return new_ops, errors
+