#!/usr/bin/env python # -*- coding: utf-8 -*- # Copyright (C) 2011 Étienne Loks # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU Affero General Public License as # published by the Free Software Foundation, either version 3 of the # License, or (at your option) any later version. # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU Affero General Public License for more details. # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see . # See the file COPYING for details. """ Scripts """ DELIMITER = ";" QUOTECHAR = '"' import sys import datetime import csv, codecs sys.path.append('.') from django.core.management import setup_environ import settings setup_environ(settings) from optparse import OptionParser from ishtar_base import models usage = u"usage: % %prog csv_file.csv update sra_nantes" parser = OptionParser(usage=usage) (options, args) = parser.parse_args() try: assert len(args) >= 1 except AssertionError: parser.error(u"You must provide one csv file.") update = True if len(args) > 1 and args[1] else False def unicode_csv_reader(unicode_csv_data, dialect=csv.excel, **kwargs): # csv.py doesn't do Unicode; encode temporarily as UTF-8: csv_reader = csv.reader(utf_8_encoder(unicode_csv_data), dialect=dialect, **kwargs) for row in csv_reader: # decode UTF-8 back to Unicode, cell by cell: yield [unicode(cell, 'utf-8') for cell in row] def utf_8_encoder(unicode_csv_data): for line in unicode_csv_data: yield line.encode('utf-8') try: values = unicode_csv_reader(codecs.open(args[0], 'rb', "utf-8"), delimiter=DELIMITER, quotechar=QUOTECHAR) except (IOError): parser.error(u"Incorrect CSV file.") from django.db import transaction from ishtar_base.models import Operation, OperationType, User, Town,\ Person, PersonType, Period, AdministrativeAct, ActType # lbl, txt_idx TYPES = { u"SU":(u'emergency_excavation', u"Sauvetage urgent"), u"EV":(u'evaluation', u"Évaluation"), u"SD":(u'Sampling', u"Sondage"), u"FP":(u"prog_excavation", u"Fouille programmée"), u"SP":(u"prev_excavation", u"Fouille préventive"), u"OPD":(u"arch_diagnostic", u"Diagnostic") } for k in TYPES.keys(): try: TYPES[k] = OperationType.objects.get(txt_idx=TYPES[k][0]) except: ot = OperationType(txt_idx=TYPES[k][0], label=TYPES[k][1]) ot.save() TYPES[k] = ot def parse_operationtype(value): if value not in TYPES: return None return TYPES[value] # lbl, txt_idx PERIODS = { u'MA':u'middle_age', u'IND':u'indetermined', u'CON':u'contemporan', u'MOD':u'modern', u'REC':u'recent_times', u'BMA':u'low_middle_age', u'MAC':u'classic_middle_age', u'HMA':u'high_middle_age', u'BAS':u'low_empire', u'HAU':u'high-empire', u'NRE':u'republic', u'GAL':u'gallo-roman', u'FE2':u'second_iron_age', u'FE1':u'first_iron_age', u'BRF':u'final_bronze_age', u'BRM':u'middle_bronze_age', u'BRA':u'old_bronze_age', u'FER':u'iron_age', u'BRO':u'bronze_age', u'PRO':u'protohistory', u'NEF':u'final_neolithic', u'NER':u'recent_neolithic', u'NEM':u'middle_neolithic', u'NEA':u'old_neolithic', u'NEO':u'neolithic', u'MER':u'recent_mesolithic', u'MEM':u'middle_mesolithic', u'MEA':u'old_mesolithic', u'MES':u'mesolithic', u'PAF':u'final_paleolithic', u'PAS':u'late_paleolithic', u'PAM':u'middle_paleolithic', u'PAA':u'ancien_paleolithic', u'PAL':u'paleolithic', u'':u'not_yet_documented', } for k in PERIODS.keys(): PERIODS[k] = Period.objects.get(txt_idx=PERIODS[k]) PERIODS_KEYS = PERIODS.keys() PERIODS_KEYS.sort(key=len) PERIODS_KEYS.reverse() def parse_period(value): value = value[3:] if value.startswith('EUR') else value while value.endswith('-'): value = value[:-1] value = value[3:] if value.startswith('EUR') else value if not value: return [PERIODS[u'']] periods, old_val = [], u'' while value and old_val != value: old_val = value for k in PERIODS_KEYS: if value.startswith(k): periods.append(PERIODS[k]) value = value[len(k):] break return periods def parse_date(value): try: return datetime.datetime.strptime(value, '%d/%m/%Y') except: return None def parse_surface(value): value = value.replace(',', '.') try: # hectare en metre carrés value = float(value) * 10000 if value: return value return None except: return None def parse_year(value): try: yr = int(value) except: return if yr < 1900 or yr > 2100: return return yr default_person = User.objects.get(pk=2) # si pas de start date : premier janvier de year # attrs, convert COLS = [ [], # numéro de dossier ? (('operation_type',), parse_operationtype), (('common_name',), unicode), (('in_charge', 'name'), unicode), (('in_charge', 'surname'), unicode), [], # État ? [], # Adresse ? [], # origine ? (('periods',), parse_period), [], # Programme ? [], # Rattach PC ? [], # vide (('administrative_act', 'ref_sra'), unicode), (('administrative_act', 'signature_date'), parse_date), (('start_date',), parse_date), (('excavation_end_date',), parse_date), (('year',), parse_year), [], # identification (('code_patriarche',), int), [], # X degré [], # Y degré [], # X saisi ? [], # Y saisi ? [], # georef [], # geometrie (('surface',), parse_surface), ] def parse_insee(value): values = [] while len(value) > 4: values.append(value[:5]) value = value[5:] towns = [] for value in values: try: town = Town.objects.get(numero_insee=value) towns.append(town) except: sys.stderr.write('Numero INSEE : %s non existant en base' % value) return towns def parse_patriarche(value): if not value: return return '18' + unicode(value) COLS_SRA_NANTES = [ [], [], [], #(('year',), int), (('towns',), parse_insee), [], # Intit TODO [], # Adresse site... TODO [], # prog/prev [], #(('in_charge', 'attached_to', 'name'), unicode), [], #(('in_charge', 'name'), unicode), [], #(('in_charge' num [], #(('in_charge', 'surname'), unicode), [], # num ope non patriarche (('start_date__year',), int), (('excavation_end_date__year',), int), (('start_date__day',), int), (('excavation_end_date__day',), int), (('start_date__month',), int), (('excavation_end_date__month',), int), [], [], [], [], [('code_patriarche',), parse_patriarche] ] col_defs = COLS_SRA_NANTES if len(args) > 2 and args[2] else COLS ope_default = {'history_modifier':default_person} # key : (class, default, reverse, extra) key_classes = { 'in_charge':(Person, {'history_modifier':default_person, 'title':'', 'person_type':PersonType.objects.get( txt_idx='head_scientist')}, False), 'administrative_act':(AdministrativeAct, {'history_modifier':default_person, 'act_type':ActType.objects.get( txt_idx='excavation_order')}, 'operation'), } @transaction.commit_manually def import_operations(col_defs=COLS, update=False): new_ops = 0 error_ope, error_reversed, error_multis = [], [], [] for line_idx, vals in enumerate(values): sys.stdout.write("\r* line %d" % (line_idx)) if not line_idx: continue # remove header args = {} for col_idx, val in enumerate(vals): if len(col_defs) <= col_idx or not col_defs[col_idx]: continue attrs, typ = col_defs[col_idx] c_args = args for attr in attrs: if attr not in c_args: c_args[attr] = {} c_args = c_args[attr] try: v = typ(val) except: v = "" if len(attrs) == 1: args[attrs[0]] = v elif len(attrs) == 2: args[attrs[0]][attrs[1]] = v elif len(attrs) == 3: args[attrs[0]][attrs[1]][attrs[2]] = v # manage exploded dates for k in args.keys(): if '__year' in k: key = k[:-len('__year')] try: v = datetime.datetime(args[k], args[key+'__month'], args[key+'__day']) args[key] = v except: pass args.pop(k) args.pop(key+'__month') args.pop(key+'__day') reversed_items, multis = [], [] for k in args.keys(): if k in key_classes: cls, default, reverse = key_classes[k] default.update(args[k]) if reverse: reversed_items.append((cls, default, reverse)) args.pop(k) continue try: obj = cls.objects.get(**default) except: obj = cls.objects.create(**default) obj.save() transaction.commit() args[k] = obj elif type(args[k]) == list: multis.append((k, args[k])) args.pop(k) op = None if not update and not args['operation_type']: continue try: op = Operation.objects.get(code_patriarche=args['code_patriarche']) if not update: continue except: if update: continue # creation if not op: args.update(ope_default) args['operation_code'] = Operation.get_available_operation_code( args['year']) try: op = Operation.objects.create(**args) op.save() new_ops += 1 except: error_ope.append((line_idx, args)) transaction.rollback() continue transaction.commit() else: # mise à jour try: for k in args: if getattr(op, k): continue setattr(op, k, args[k]) op.save() except: transaction.rollback() continue transaction.commit() try: for cls, default, reverse in reversed_items: default[reverse] = op it = cls(**default).save() except: transaction.rollback() error_reversed.append((line_idx, reversed_items)) continue transaction.commit() try: for k, vals in multis: for v in vals: getattr(op, k).add(v) op.save() except: transaction.rollback() error_multis.append((line_idx, multis)) continue transaction.commit() if error_ope: sys.stderr.write("\nError while recording theses operations:\n") for line_idx, args in error_ope: sys.stderr.write("line: " + str(line_idx) + " args: " + str(args) + '\n') if error_multis: sys.stderr.write( "\nError while recording theses multiples items attached to operation:") for line_idx, args in error_multis: sys.stderr.write("line: " + str(line_idx) + " args: " + str(args) + '\n') if error_reversed: sys.stderr.write("\nError while recording theses items that depend to operation:") for line_idx, args in error_reversed: sys.stderr.write("line: " + str(line_idx) + " args: " + str(args) + '\n') sys.stdout.write("\n%d new operations recorded\n" % new_ops) import_operations(col_defs=col_defs, update=update)