summaryrefslogtreecommitdiff
path: root/ishtar_common/scripts/import_ope_from_csv.py
diff options
context:
space:
mode:
Diffstat (limited to 'ishtar_common/scripts/import_ope_from_csv.py')
-rwxr-xr-xishtar_common/scripts/import_ope_from_csv.py411
1 files changed, 411 insertions, 0 deletions
diff --git a/ishtar_common/scripts/import_ope_from_csv.py b/ishtar_common/scripts/import_ope_from_csv.py
new file mode 100755
index 000000000..fd29fd3f6
--- /dev/null
+++ b/ishtar_common/scripts/import_ope_from_csv.py
@@ -0,0 +1,411 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# Copyright (C) 2011 Étienne Loks <etienne.loks_AT_peacefrogsDOTnet>
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as
+# published by the Free Software Foundation, either version 3 of the
+# License, or (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+# See the file COPYING for details.
+
+"""
+Scripts
+"""
+
+DELIMITER = ";"
+QUOTECHAR = '"'
+
+import sys
+import datetime
+import csv, codecs
+sys.path.append('.')
+
+from django.core.management import setup_environ
+import settings
+
+setup_environ(settings)
+
+from optparse import OptionParser
+
+from ishtar_base import models
+
+usage = u"usage: % %prog csv_file.csv update sra_nantes"
+parser = OptionParser(usage=usage)
+
+(options, args) = parser.parse_args()
+
+try:
+ assert len(args) >= 1
+except AssertionError:
+ parser.error(u"You must provide one csv file.")
+
+update = True if len(args) > 1 and args[1] else False
+
+def unicode_csv_reader(unicode_csv_data, dialect=csv.excel, **kwargs):
+ # csv.py doesn't do Unicode; encode temporarily as UTF-8:
+ csv_reader = csv.reader(utf_8_encoder(unicode_csv_data),
+ dialect=dialect, **kwargs)
+ for row in csv_reader:
+ # decode UTF-8 back to Unicode, cell by cell:
+ yield [unicode(cell, 'utf-8') for cell in row]
+
+def utf_8_encoder(unicode_csv_data):
+ for line in unicode_csv_data:
+ yield line.encode('utf-8')
+
+try:
+ values = unicode_csv_reader(codecs.open(args[0], 'rb', "utf-8"),
+ delimiter=DELIMITER, quotechar=QUOTECHAR)
+except (IOError):
+ parser.error(u"Incorrect CSV file.")
+
+from django.db import transaction
+
+from ishtar_base.models import Operation, OperationType, User, Town,\
+ Person, PersonType, Period, AdministrativeAct, ActType
+
+# lbl, txt_idx
+TYPES = {
+ u"SU":(u'emergency_excavation', u"Sauvetage urgent"),
+ u"EV":(u'evaluation', u"Évaluation"),
+ u"SD":(u'Sampling', u"Sondage"),
+ u"FP":(u"prog_excavation", u"Fouille programmée"),
+ u"SP":(u"prev_excavation", u"Fouille préventive"),
+ u"OPD":(u"arch_diagnostic", u"Diagnostic")
+}
+
+for k in TYPES.keys():
+ try:
+ TYPES[k] = OperationType.objects.get(txt_idx=TYPES[k][0])
+ except:
+ ot = OperationType(txt_idx=TYPES[k][0], label=TYPES[k][1])
+ ot.save()
+ TYPES[k] = ot
+
+def parse_operationtype(value):
+ if value not in TYPES:
+ return None
+ return TYPES[value]
+
+# lbl, txt_idx
+PERIODS = {
+ u'MA':u'middle_age',
+ u'IND':u'indetermined',
+ u'CON':u'contemporan',
+ u'MOD':u'modern',
+ u'REC':u'recent_times',
+ u'BMA':u'low_middle_age',
+ u'MAC':u'classic_middle_age',
+ u'HMA':u'high_middle_age',
+ u'BAS':u'low_empire',
+ u'HAU':u'high-empire',
+ u'NRE':u'republic',
+ u'GAL':u'gallo-roman',
+ u'FE2':u'second_iron_age',
+ u'FE1':u'first_iron_age',
+ u'BRF':u'final_bronze_age',
+ u'BRM':u'middle_bronze_age',
+ u'BRA':u'old_bronze_age',
+ u'FER':u'iron_age',
+ u'BRO':u'bronze_age',
+ u'PRO':u'protohistory',
+ u'NEF':u'final_neolithic',
+ u'NER':u'recent_neolithic',
+ u'NEM':u'middle_neolithic',
+ u'NEA':u'old_neolithic',
+ u'NEO':u'neolithic',
+ u'MER':u'recent_mesolithic',
+ u'MEM':u'middle_mesolithic',
+ u'MEA':u'old_mesolithic',
+ u'MES':u'mesolithic',
+ u'PAF':u'final_paleolithic',
+ u'PAS':u'late_paleolithic',
+ u'PAM':u'middle_paleolithic',
+ u'PAA':u'ancien_paleolithic',
+ u'PAL':u'paleolithic',
+ u'':u'not_yet_documented',
+}
+for k in PERIODS.keys():
+ PERIODS[k] = Period.objects.get(txt_idx=PERIODS[k])
+PERIODS_KEYS = PERIODS.keys()
+PERIODS_KEYS.sort(key=len)
+PERIODS_KEYS.reverse()
+
+def parse_period(value):
+ value = value[3:] if value.startswith('EUR') else value
+ while value.endswith('-'):
+ value = value[:-1]
+ value = value[3:] if value.startswith('EUR') else value
+ if not value:
+ return [PERIODS[u'']]
+ periods, old_val = [], u''
+ while value and old_val != value:
+ old_val = value
+ for k in PERIODS_KEYS:
+ if value.startswith(k):
+ periods.append(PERIODS[k])
+ value = value[len(k):]
+ break
+ return periods
+
+def parse_date(value):
+ try:
+ return datetime.datetime.strptime(value, '%d/%m/%Y')
+ except:
+ return None
+
+def parse_surface(value):
+ value = value.replace(',', '.')
+ try:
+ # hectare en metre carrés
+ value = float(value) * 10000
+ if value:
+ return value
+ return None
+ except:
+ return None
+
+def parse_year(value):
+ try:
+ yr = int(value)
+ except:
+ return
+ if yr < 1900 or yr > 2100:
+ return
+ return yr
+
+default_person = User.objects.get(pk=2)
+
+
+# si pas de start date : premier janvier de year
+
+# attrs, convert
+COLS = [
+ [], # numéro de dossier ?
+ (('operation_type',), parse_operationtype),
+ (('common_name',), unicode),
+ (('in_charge', 'name'), unicode),
+ (('in_charge', 'surname'), unicode),
+ [], # État ?
+ [], # Adresse ?
+ [], # origine ?
+ (('periods',), parse_period),
+ [], # Programme ?
+ [], # Rattach PC ?
+ [], # vide
+ (('administrative_act', 'ref_sra'), unicode),
+ (('administrative_act', 'signature_date'), parse_date),
+ (('start_date',), parse_date),
+ (('excavation_end_date',), parse_date),
+ (('year',), parse_year),
+ [], # identification
+ (('code_patriarche',), int),
+ [], # X degré
+ [], # Y degré
+ [], # X saisi ?
+ [], # Y saisi ?
+ [], # georef
+ [], # geometrie
+ (('surface',), parse_surface),
+]
+
+def parse_insee(value):
+ values = []
+ while len(value) > 4:
+ values.append(value[:5])
+ value = value[5:]
+ towns = []
+ for value in values:
+ try:
+ town = Town.objects.get(numero_insee=value)
+ towns.append(town)
+ except:
+ sys.stderr.write('Numero INSEE : %s non existant en base' % value)
+ return towns
+
+def parse_patriarche(value):
+ if not value:
+ return
+ return '18' + unicode(value)
+
+COLS_SRA_NANTES = [
+ [],
+ [],
+ [], #(('year',), int),
+ (('towns',), parse_insee),
+ [], # Intit TODO
+ [], # Adresse site... TODO
+ [], # prog/prev
+ [], #(('in_charge', 'attached_to', 'name'), unicode),
+ [], #(('in_charge', 'name'), unicode),
+ [], #(('in_charge' num
+ [], #(('in_charge', 'surname'), unicode),
+ [], # num ope non patriarche
+ (('start_date__year',), int),
+ (('excavation_end_date__year',), int),
+ (('start_date__day',), int),
+ (('excavation_end_date__day',), int),
+ (('start_date__month',), int),
+ (('excavation_end_date__month',), int),
+ [],
+ [],
+ [],
+ [],
+ [('code_patriarche',), parse_patriarche]
+]
+
+col_defs = COLS_SRA_NANTES if len(args) > 2 and args[2] else COLS
+
+ope_default = {'history_modifier':default_person}
+
+# key : (class, default, reverse, extra)
+key_classes = {
+'in_charge':(Person, {'history_modifier':default_person,
+ 'title':'',
+ 'person_type':PersonType.objects.get(
+ txt_idx='head_scientist')}, False),
+'administrative_act':(AdministrativeAct, {'history_modifier':default_person,
+ 'act_type':ActType.objects.get(
+ txt_idx='excavation_order')}, 'operation'),
+}
+
+@transaction.commit_manually
+def import_operations(col_defs=COLS, update=False):
+ new_ops = 0
+ error_ope, error_reversed, error_multis = [], [], []
+ for line_idx, vals in enumerate(values):
+ sys.stdout.write("\r* line %d" % (line_idx))
+ if not line_idx:
+ continue # remove header
+ args = {}
+ for col_idx, val in enumerate(vals):
+ if len(col_defs) <= col_idx or not col_defs[col_idx]:
+ continue
+ attrs, typ = col_defs[col_idx]
+ c_args = args
+ for attr in attrs:
+ if attr not in c_args:
+ c_args[attr] = {}
+ c_args = c_args[attr]
+ try:
+ v = typ(val)
+ except:
+ v = ""
+ if len(attrs) == 1:
+ args[attrs[0]] = v
+ elif len(attrs) == 2:
+ args[attrs[0]][attrs[1]] = v
+ elif len(attrs) == 3:
+ args[attrs[0]][attrs[1]][attrs[2]] = v
+ # manage exploded dates
+ for k in args.keys():
+ if '__year' in k:
+ key = k[:-len('__year')]
+ try:
+ v = datetime.datetime(args[k], args[key+'__month'],
+ args[key+'__day'])
+ args[key] = v
+ except:
+ pass
+ args.pop(k)
+ args.pop(key+'__month')
+ args.pop(key+'__day')
+ reversed_items, multis = [], []
+ for k in args.keys():
+ if k in key_classes:
+ cls, default, reverse = key_classes[k]
+ default.update(args[k])
+ if reverse:
+ reversed_items.append((cls, default, reverse))
+ args.pop(k)
+ continue
+ try:
+ obj = cls.objects.get(**default)
+ except:
+ obj = cls.objects.create(**default)
+ obj.save()
+ transaction.commit()
+ args[k] = obj
+ elif type(args[k]) == list:
+ multis.append((k, args[k]))
+ args.pop(k)
+ op = None
+ if not update and not args['operation_type']:
+ continue
+ try:
+ op = Operation.objects.get(code_patriarche=args['code_patriarche'])
+ if not update:
+ continue
+ except:
+ if update:
+ continue
+ # creation
+ if not op:
+ args.update(ope_default)
+ args['operation_code'] = Operation.get_available_operation_code(
+ args['year'])
+ try:
+ op = Operation.objects.create(**args)
+ op.save()
+ new_ops += 1
+ except:
+ error_ope.append((line_idx, args))
+ transaction.rollback()
+ continue
+ transaction.commit()
+ else: # mise à jour
+ try:
+ for k in args:
+ if getattr(op, k):
+ continue
+ setattr(op, k, args[k])
+ op.save()
+ except:
+ transaction.rollback()
+ continue
+ transaction.commit()
+ try:
+ for cls, default, reverse in reversed_items:
+ default[reverse] = op
+ it = cls(**default).save()
+ except:
+ transaction.rollback()
+ error_reversed.append((line_idx, reversed_items))
+ continue
+ transaction.commit()
+ try:
+ for k, vals in multis:
+ for v in vals:
+ getattr(op, k).add(v)
+ op.save()
+ except:
+ transaction.rollback()
+ error_multis.append((line_idx, multis))
+ continue
+ transaction.commit()
+
+ if error_ope:
+ sys.stderr.write("\nError while recording theses operations:\n")
+ for line_idx, args in error_ope:
+ sys.stderr.write("line: " + str(line_idx) + " args: " + str(args) + '\n')
+ if error_multis:
+ sys.stderr.write( "\nError while recording theses multiples items attached to operation:")
+ for line_idx, args in error_multis:
+ sys.stderr.write("line: " + str(line_idx) + " args: " + str(args) + '\n')
+ if error_reversed:
+ sys.stderr.write("\nError while recording theses items that depend to operation:")
+ for line_idx, args in error_reversed:
+ sys.stderr.write("line: " + str(line_idx) + " args: " + str(args) + '\n')
+
+ sys.stdout.write("\n%d new operations recorded\n" % new_ops)
+
+import_operations(col_defs=col_defs, update=update)