diff options
Diffstat (limited to 'ishtar_common/scripts/import_ope_from_csv.py')
-rwxr-xr-x | ishtar_common/scripts/import_ope_from_csv.py | 411 |
1 files changed, 411 insertions, 0 deletions
diff --git a/ishtar_common/scripts/import_ope_from_csv.py b/ishtar_common/scripts/import_ope_from_csv.py new file mode 100755 index 000000000..fd29fd3f6 --- /dev/null +++ b/ishtar_common/scripts/import_ope_from_csv.py @@ -0,0 +1,411 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# Copyright (C) 2011 Étienne Loks <etienne.loks_AT_peacefrogsDOTnet> + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. + +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. + +# See the file COPYING for details. + +""" +Scripts +""" + +DELIMITER = ";" +QUOTECHAR = '"' + +import sys +import datetime +import csv, codecs +sys.path.append('.') + +from django.core.management import setup_environ +import settings + +setup_environ(settings) + +from optparse import OptionParser + +from ishtar_base import models + +usage = u"usage: % %prog csv_file.csv update sra_nantes" +parser = OptionParser(usage=usage) + +(options, args) = parser.parse_args() + +try: + assert len(args) >= 1 +except AssertionError: + parser.error(u"You must provide one csv file.") + +update = True if len(args) > 1 and args[1] else False + +def unicode_csv_reader(unicode_csv_data, dialect=csv.excel, **kwargs): + # csv.py doesn't do Unicode; encode temporarily as UTF-8: + csv_reader = csv.reader(utf_8_encoder(unicode_csv_data), + dialect=dialect, **kwargs) + for row in csv_reader: + # decode UTF-8 back to Unicode, cell by cell: + yield [unicode(cell, 'utf-8') for cell in row] + +def utf_8_encoder(unicode_csv_data): + for line in unicode_csv_data: + yield line.encode('utf-8') + +try: + values = unicode_csv_reader(codecs.open(args[0], 'rb', "utf-8"), + delimiter=DELIMITER, quotechar=QUOTECHAR) +except (IOError): + parser.error(u"Incorrect CSV file.") + +from django.db import transaction + +from ishtar_base.models import Operation, OperationType, User, Town,\ + Person, PersonType, Period, AdministrativeAct, ActType + +# lbl, txt_idx +TYPES = { + u"SU":(u'emergency_excavation', u"Sauvetage urgent"), + u"EV":(u'evaluation', u"Évaluation"), + u"SD":(u'Sampling', u"Sondage"), + u"FP":(u"prog_excavation", u"Fouille programmée"), + u"SP":(u"prev_excavation", u"Fouille préventive"), + u"OPD":(u"arch_diagnostic", u"Diagnostic") +} + +for k in TYPES.keys(): + try: + TYPES[k] = OperationType.objects.get(txt_idx=TYPES[k][0]) + except: + ot = OperationType(txt_idx=TYPES[k][0], label=TYPES[k][1]) + ot.save() + TYPES[k] = ot + +def parse_operationtype(value): + if value not in TYPES: + return None + return TYPES[value] + +# lbl, txt_idx +PERIODS = { + u'MA':u'middle_age', + u'IND':u'indetermined', + u'CON':u'contemporan', + u'MOD':u'modern', + u'REC':u'recent_times', + u'BMA':u'low_middle_age', + u'MAC':u'classic_middle_age', + u'HMA':u'high_middle_age', + u'BAS':u'low_empire', + u'HAU':u'high-empire', + u'NRE':u'republic', + u'GAL':u'gallo-roman', + u'FE2':u'second_iron_age', + u'FE1':u'first_iron_age', + u'BRF':u'final_bronze_age', + u'BRM':u'middle_bronze_age', + u'BRA':u'old_bronze_age', + u'FER':u'iron_age', + u'BRO':u'bronze_age', + u'PRO':u'protohistory', + u'NEF':u'final_neolithic', + u'NER':u'recent_neolithic', + u'NEM':u'middle_neolithic', + u'NEA':u'old_neolithic', + u'NEO':u'neolithic', + u'MER':u'recent_mesolithic', + u'MEM':u'middle_mesolithic', + u'MEA':u'old_mesolithic', + u'MES':u'mesolithic', + u'PAF':u'final_paleolithic', + u'PAS':u'late_paleolithic', + u'PAM':u'middle_paleolithic', + u'PAA':u'ancien_paleolithic', + u'PAL':u'paleolithic', + u'':u'not_yet_documented', +} +for k in PERIODS.keys(): + PERIODS[k] = Period.objects.get(txt_idx=PERIODS[k]) +PERIODS_KEYS = PERIODS.keys() +PERIODS_KEYS.sort(key=len) +PERIODS_KEYS.reverse() + +def parse_period(value): + value = value[3:] if value.startswith('EUR') else value + while value.endswith('-'): + value = value[:-1] + value = value[3:] if value.startswith('EUR') else value + if not value: + return [PERIODS[u'']] + periods, old_val = [], u'' + while value and old_val != value: + old_val = value + for k in PERIODS_KEYS: + if value.startswith(k): + periods.append(PERIODS[k]) + value = value[len(k):] + break + return periods + +def parse_date(value): + try: + return datetime.datetime.strptime(value, '%d/%m/%Y') + except: + return None + +def parse_surface(value): + value = value.replace(',', '.') + try: + # hectare en metre carrés + value = float(value) * 10000 + if value: + return value + return None + except: + return None + +def parse_year(value): + try: + yr = int(value) + except: + return + if yr < 1900 or yr > 2100: + return + return yr + +default_person = User.objects.get(pk=2) + + +# si pas de start date : premier janvier de year + +# attrs, convert +COLS = [ + [], # numéro de dossier ? + (('operation_type',), parse_operationtype), + (('common_name',), unicode), + (('in_charge', 'name'), unicode), + (('in_charge', 'surname'), unicode), + [], # État ? + [], # Adresse ? + [], # origine ? + (('periods',), parse_period), + [], # Programme ? + [], # Rattach PC ? + [], # vide + (('administrative_act', 'ref_sra'), unicode), + (('administrative_act', 'signature_date'), parse_date), + (('start_date',), parse_date), + (('excavation_end_date',), parse_date), + (('year',), parse_year), + [], # identification + (('code_patriarche',), int), + [], # X degré + [], # Y degré + [], # X saisi ? + [], # Y saisi ? + [], # georef + [], # geometrie + (('surface',), parse_surface), +] + +def parse_insee(value): + values = [] + while len(value) > 4: + values.append(value[:5]) + value = value[5:] + towns = [] + for value in values: + try: + town = Town.objects.get(numero_insee=value) + towns.append(town) + except: + sys.stderr.write('Numero INSEE : %s non existant en base' % value) + return towns + +def parse_patriarche(value): + if not value: + return + return '18' + unicode(value) + +COLS_SRA_NANTES = [ + [], + [], + [], #(('year',), int), + (('towns',), parse_insee), + [], # Intit TODO + [], # Adresse site... TODO + [], # prog/prev + [], #(('in_charge', 'attached_to', 'name'), unicode), + [], #(('in_charge', 'name'), unicode), + [], #(('in_charge' num + [], #(('in_charge', 'surname'), unicode), + [], # num ope non patriarche + (('start_date__year',), int), + (('excavation_end_date__year',), int), + (('start_date__day',), int), + (('excavation_end_date__day',), int), + (('start_date__month',), int), + (('excavation_end_date__month',), int), + [], + [], + [], + [], + [('code_patriarche',), parse_patriarche] +] + +col_defs = COLS_SRA_NANTES if len(args) > 2 and args[2] else COLS + +ope_default = {'history_modifier':default_person} + +# key : (class, default, reverse, extra) +key_classes = { +'in_charge':(Person, {'history_modifier':default_person, + 'title':'', + 'person_type':PersonType.objects.get( + txt_idx='head_scientist')}, False), +'administrative_act':(AdministrativeAct, {'history_modifier':default_person, + 'act_type':ActType.objects.get( + txt_idx='excavation_order')}, 'operation'), +} + +@transaction.commit_manually +def import_operations(col_defs=COLS, update=False): + new_ops = 0 + error_ope, error_reversed, error_multis = [], [], [] + for line_idx, vals in enumerate(values): + sys.stdout.write("\r* line %d" % (line_idx)) + if not line_idx: + continue # remove header + args = {} + for col_idx, val in enumerate(vals): + if len(col_defs) <= col_idx or not col_defs[col_idx]: + continue + attrs, typ = col_defs[col_idx] + c_args = args + for attr in attrs: + if attr not in c_args: + c_args[attr] = {} + c_args = c_args[attr] + try: + v = typ(val) + except: + v = "" + if len(attrs) == 1: + args[attrs[0]] = v + elif len(attrs) == 2: + args[attrs[0]][attrs[1]] = v + elif len(attrs) == 3: + args[attrs[0]][attrs[1]][attrs[2]] = v + # manage exploded dates + for k in args.keys(): + if '__year' in k: + key = k[:-len('__year')] + try: + v = datetime.datetime(args[k], args[key+'__month'], + args[key+'__day']) + args[key] = v + except: + pass + args.pop(k) + args.pop(key+'__month') + args.pop(key+'__day') + reversed_items, multis = [], [] + for k in args.keys(): + if k in key_classes: + cls, default, reverse = key_classes[k] + default.update(args[k]) + if reverse: + reversed_items.append((cls, default, reverse)) + args.pop(k) + continue + try: + obj = cls.objects.get(**default) + except: + obj = cls.objects.create(**default) + obj.save() + transaction.commit() + args[k] = obj + elif type(args[k]) == list: + multis.append((k, args[k])) + args.pop(k) + op = None + if not update and not args['operation_type']: + continue + try: + op = Operation.objects.get(code_patriarche=args['code_patriarche']) + if not update: + continue + except: + if update: + continue + # creation + if not op: + args.update(ope_default) + args['operation_code'] = Operation.get_available_operation_code( + args['year']) + try: + op = Operation.objects.create(**args) + op.save() + new_ops += 1 + except: + error_ope.append((line_idx, args)) + transaction.rollback() + continue + transaction.commit() + else: # mise à jour + try: + for k in args: + if getattr(op, k): + continue + setattr(op, k, args[k]) + op.save() + except: + transaction.rollback() + continue + transaction.commit() + try: + for cls, default, reverse in reversed_items: + default[reverse] = op + it = cls(**default).save() + except: + transaction.rollback() + error_reversed.append((line_idx, reversed_items)) + continue + transaction.commit() + try: + for k, vals in multis: + for v in vals: + getattr(op, k).add(v) + op.save() + except: + transaction.rollback() + error_multis.append((line_idx, multis)) + continue + transaction.commit() + + if error_ope: + sys.stderr.write("\nError while recording theses operations:\n") + for line_idx, args in error_ope: + sys.stderr.write("line: " + str(line_idx) + " args: " + str(args) + '\n') + if error_multis: + sys.stderr.write( "\nError while recording theses multiples items attached to operation:") + for line_idx, args in error_multis: + sys.stderr.write("line: " + str(line_idx) + " args: " + str(args) + '\n') + if error_reversed: + sys.stderr.write("\nError while recording theses items that depend to operation:") + for line_idx, args in error_reversed: + sys.stderr.write("line: " + str(line_idx) + " args: " + str(args) + '\n') + + sys.stdout.write("\n%d new operations recorded\n" % new_ops) + +import_operations(col_defs=col_defs, update=update) |