diff options
Diffstat (limited to 'archaeological_operations/import_from_csv.py')
-rw-r--r-- | archaeological_operations/import_from_csv.py | 348 |
1 files changed, 318 insertions, 30 deletions
diff --git a/archaeological_operations/import_from_csv.py b/archaeological_operations/import_from_csv.py index 96ffcf085..b4e18635a 100644 --- a/archaeological_operations/import_from_csv.py +++ b/archaeological_operations/import_from_csv.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- -# Copyright (C) 2012 Étienne Loks <etienne.loks_AT_peacefrogsDOTnet> +# Copyright (C) 2012-2013 Étienne Loks <etienne.loks_AT_peacefrogsDOTnet> # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU Affero General Public License as @@ -32,13 +32,22 @@ from django.conf import settings from django.contrib.auth.models import User from django.db import transaction -from ishtar_common.models import Town, Person, PersonType +from ishtar_common.models import Town, Person, PersonType, OrganizationType, \ + Organization +from archaeological_files.models import PermitType, File, FileType from archaeological_operations.models import Operation, OperationType, Period, \ - AdministrativeAct, ActType + AdministrativeAct, ActType, OperationSource +DEFAULT_PERSON = User.objects.order_by('pk').all()[0] + +class Column: + def __init__(self, col_models, format, associated_cols=None, multi=False): + self.col_models, self.format = col_models, format + self.associated_cols, self.multi = associated_cols, multi def unicode_csv_reader(unicode_csv_data, dialect=csv.excel, **kwargs): # csv.py doesn't do Unicode; encode temporarily as UTF-8: + csv_reader = csv.reader(utf_8_encoder(unicode_csv_data), dialect=dialect, **kwargs) for row in csv_reader: @@ -57,6 +66,13 @@ for k in settings.ISHTAR_OPE_TYPES.keys(): 'preventive':k[0]==u'préventive'}) ope_types[k] = ot +def parse_string(value): + value = value.strip() + if value == '#EMPTY': + value = '' + value = value.replace(' ', ' ') + return value + def parse_multivalue(value): s1 = re.sub('(.)([A-Z][a-z]+)', r'\1 \2', name) s1 = re.sub('([a-z0-9])([A-Z])', r'\1 \2', s1) @@ -76,6 +92,7 @@ periods_keys.sort(key=len) periods_keys.reverse() def parse_period(value): + value = parse_string(value) value = value[3:] if value.startswith('EUR') else value while value.endswith('-'): value = value[:-1] @@ -92,13 +109,107 @@ def parse_period(value): break return period +period_names = {} +for k in settings.ISHTAR_PERIODS.keys(): + period = Period.objects.get(txt_idx=settings.ISHTAR_PERIODS[k]) + period_names[period.label] = period +period_names_keys = period_names.keys() +period_names_keys.sort(key=len) +period_names_keys.reverse() + +def parse_period_name(value): + value = parse_string(value) + if not value: + return [period_names[u'']] + period, old_val = [], u'' + while value and old_val != value: + old_val = value + for k in period_names_keys: + if value.startswith(k): + period.append(period_names[k]) + value = value[len(k):] + break + return period + +_CACHED_PERMIT_TYPES = {} +for k in settings.ISHTAR_PERMIT_TYPES: + txt_idx, label = settings.ISHTAR_PERMIT_TYPES[k] + permit_type, created = PermitType.objects.get_or_create(txt_idx=txt_idx, + defaults={'label':label, + 'available':True}) + _CACHED_PERMIT_TYPES[k] = permit_type + +def parse_permittype(value): + value = parse_string(value).lower() + if value not in _CACHED_PERMIT_TYPES: + if not "" in _CACHED_PERMIT_TYPES: + return + value = "" + return _CACHED_PERMIT_TYPES[value] + +_CACHED_ADMIN_ACT_TYPES = {} +def parse_admin_act_typ(value, code, owner): + value = parse_string(value).lower() + code = parse_string(code).lower() + if not value or not code: + return + if code not in _CACHED_ADMIN_ACT_TYPES: + act_type, created = ActType.objects.get_or_create(txt_idx=code, + defaults={'label':value}) + _CACHED_ADMIN_ACT_TYPES[code] = act_type + return _CACHED_ADMIN_ACT_TYPES[code] + +def parse_fileref(value): + value = parse_string(value).split('/')[0] + match = re.search('[0-9].[0-9]*', value) + if not match: + return None + return int(match.group()) + +def parse_orga(value, alternate_value, owner): + value = parse_string(value) + if not value: + value = parse_string(alternate_value) + if not value: + return + q = Organization.objects.filter(name__iexact=value) + if q.count(): + return q.all()[0] + try: + organization_type = OrganizationType.objects.get(label__iexact=value) + except ObjectDoesNotExist: + organization_type = OrganizationType.objects.get(txt_idx='undefined') + orga = Organization.objects.create(name=value, + organization_type=organization_type, + history_modifier=owner) + return orga + +def parse_bool(value): + value = parse_string(value) + if value.lower() in ('yes', 'oui'): + value = True + elif value.lower() in ('no', 'non'): + value = False + else: + value = None + return value + def parse_date(value): + value = parse_string(value).split(' ')[0] try: return datetime.datetime.strptime(value, '%d/%m/%Y') except: return None +def parse_yearref(value): + value = parse_string(value).split('.')[0] + match = re.search('[0-9].[0-9]*', value) + if not match: + return None + return int(match.group()) + def parse_surface(value): + value = parse_string(value) value = value.replace(',', '.') try: # hectare en metre carrés @@ -110,6 +221,7 @@ def parse_surface(value): return None def parse_year(value): + value = parse_string(value) try: yr = int(value) except: @@ -119,6 +231,7 @@ def parse_year(value): return yr def parse_insee(value): + value = parse_string(value) values = [] while len(value) > 4: values.append(value[:5]) @@ -133,7 +246,8 @@ def parse_insee(value): continue return towns -def parse_patriarche(value): +def parse_trunc_patriarche(value): + value = parse_string(value) if not value: return value = value.replace(' ', '') @@ -144,6 +258,7 @@ def parse_patriarche(value): return '18' + unicode(value) def parse_operation_code(value): + value = parse_string(value) code = value.split('.')[-1] try: return int(code) @@ -151,11 +266,37 @@ def parse_operation_code(value): return def parse_title(value): + value = parse_string(value) if not value: return return value.title() +def parse_name_surname(value): + value = parse_string(value) + items = value.split(' ') + name = items[0] + surname = "" + if len(items) > 1: + name = " ".join(items[:-1]) + surname = items[-1] + values = {"surname":parse_title(surname), + "name":parse_title(name)} + if not values['surname'] and not values['name']: + return + q = Person.objects.filter(**values) + if q.count(): + return q.all()[0] + else: + defaults = {'history_modifier':DEFAULT_PERSON, + 'title':'', + 'person_type':PersonType.objects.get( + txt_idx='head_scientist')} + defaults.update(values) + p = Person.objects.create(**defaults) + return p + def parse_person(surname, name, old_ref, owner): + value = parse_string(value) values = {"surname":parse_title(surname), "name":parse_title(name)} if not values['surname'] and not values['name']: @@ -171,8 +312,115 @@ def parse_person(surname, name, old_ref, owner): defaults.update(values) p = Person.objects.create(**defaults) return p + +def parse_comment_addr_nature(addr, nature, owner): + addr = parse_string(addr) + nature = parse_string(nature) + comments = [] + if nature: + comments += [u"Amanégement :", nature] + if addr: + comments += [u"Adresse :", addr] + if not comments: + return "" + return u"\n".join(comments) + # si pas de start date : premier janvier de year +ope_types = { + 'AET':('other_study', + 'Autre étude', True), + 'APP':('assistance_preparation_help', + 'Aide à la préparation de publication', True), + 'DOC':('documents_study', + 'Étude documentaire', True), + 'EV':('evaluation', + "Fouille d'évaluation", True), + 'FOU':('ancient_excavation', + "Fouille ancienne", True), + 'FP':('prog_excavation', + "Fouille programmée", False), + 'MH':('building_study', "Fouille avant MH", True), + 'OPD':('arch_diagnostic', + "Diagnostic archéologique", True), + 'PAN':('analysis_program', + "Programme d'analyses", False), + 'PCR':('collective_research_project', + "Projet collectif de recherche", False), + 'PMS':('specialized_eqp_prospection', + "Prospection avec matériel spécialisé", False), + 'PRD':('diachronic_prospection', + "Prospection diachronique", False), + 'PI':('diachronic_prospection', + "Prospection diachronique", False), + 'PRM':('metal_detector_prospection', + "Prospection détecteur de métaux", False), + 'PRT':('thematic_prospection', + "Prospection thématique", False), + 'PT':('thematic_prospection', + "Prospection thématique", False), + 'RAR':('cave_art_record', + "Relevé d'art rupestre", False), + 'SD':('sampling_research', + "Sondage", False), + 'SP':('prev_excavation', + "Fouille préventive", True), + 'SU':('emergency_excavation', + "Fouille préventive d'urgence", True), +} + +_CACHED_OPE_TYPES = {} + +def _prepare_ope_types(): + for k in ope_types.keys(): + txt_idx, label, preventive = ope_types[k] + ot, created = OperationType.objects.get_or_create(txt_idx=txt_idx, + defaults={'label':label, 'preventive':preventive}) + if k not in _CACHED_OPE_TYPES.keys(): + _CACHED_OPE_TYPES[k] = ot + +def parse_patriarche_operationtype(value): + if value not in _CACHED_OPE_TYPES.keys(): + return None + return _CACHED_OPE_TYPES[value] + +_dpt_re_filter = re.compile('^\([0-9]*\) ') + +def parse_ope_name(value): + if not value: + return '' + value = value.strip() + if value.lower() == 'null': + return '' + value = _dpt_re_filter.sub('', value) + return value + +def parse_ha(value): + value = parse_string(value) + try: + value = float(value) + except: + value = None + return value + +def parse_rapp_index(value): + value = parse_string(value) + items = re.findall(r'[0-9]+$', value) + if items: + return int(items[-1]) + +_CACHED_DOC_TYPES = {} + +def parse_doc_types(value): + value = parse_string(value) + if value not in _CACHED_DOC_TYPES: + if value not in settings.ISHTAR_DOC_TYPES: + return + _CACHED_DOC_TYPES[value], created = SourceType.objects.get_or_create( + txt_idx=value, + defaults={"label":settings.ISHTAR_DOC_TYPES[value]}) + return _CACHED_DOC_TYPES[value] + # attrs, convert DEFAULT_OPE_COLS = [ [], # numéro de dossier ? @@ -203,11 +451,18 @@ DEFAULT_OPE_COLS = [ (('surface',), parse_surface), ] -OPE_COLS = settings.ISHTAR_OPE_COL_FORMAT if settings.ISHTAR_OPE_COL_FORMAT \ +_OPE_COLS = settings.ISHTAR_OPE_COL_FORMAT if settings.ISHTAR_OPE_COL_FORMAT \ else DEFAULT_OPE_COLS -@transaction.commit_manually -def import_operations_csv(values, col_defs=OPE_COLS, update=False, person=None, +OPE_COLS = [] +for cols in _OPE_COLS: + if cols: + OPE_COLS.append(Column(*cols)) + else: + OPE_COLS.append(None) + +#@transaction.commit_manually +def import_operations_csv(values, col_defs=OPE_COLS, update=True, person=None, stdout=None): default_person = person or User.objects.order_by('pk').all()[0] # key : (class, default, reverse) @@ -215,11 +470,17 @@ def import_operations_csv(values, col_defs=OPE_COLS, update=False, person=None, 'administrative_act':(AdministrativeAct, {'history_modifier':default_person, 'act_type':ActType.objects.get( txt_idx='excavation_order')}, 'operation'), + 'associated_file':(File, {'history_modifier':default_person, + 'file_type':FileType.objects.get(txt_idx='undefined')}, None), + 'source':(OperationSource, {}, 'operation') } + _prepare_ope_types() ope_default = {'history_modifier':default_person} new_ops = 0 error_ope, error_reversed, error_multis = [], [], [] + multi_keys = set([column.col_models[0] + for column in col_defs if column and column.multi]) for line_idx, vals in enumerate(values): if stdout: stdout.write("\r* line %d" % (line_idx)) @@ -229,7 +490,9 @@ def import_operations_csv(values, col_defs=OPE_COLS, update=False, person=None, for col_idx, val in enumerate(vals): if len(col_defs) <= col_idx or not col_defs[col_idx]: continue - attrs, typ, extra_cols = col_defs[col_idx] + col_def = col_defs[col_idx] + attrs, typ = col_def.col_models, col_def.format + extra_cols = col_def.associated_cols if not callable(typ): typ = globals()[typ] c_args = args @@ -268,39 +531,48 @@ def import_operations_csv(values, col_defs=OPE_COLS, update=False, person=None, args.pop(key+'__month') args.pop(key+'__day') reversed_items, multis = [], [] + attached_models, attached_instance_models = {}, {} for k in args.keys(): if k in key_classes: cls, default, reverse = key_classes[k] default.update(args[k]) if reverse: - reversed_items.append((cls, default, reverse)) + reversed_items.append((cls, default.copy(), reverse)) args.pop(k) continue - try: - obj = cls.objects.get(**default) - except: - obj = cls.objects.create(**default) - obj.save() - transaction.commit() - args[k] = obj - elif type(args[k]) == list: + args.pop(k) + attached_instance_models[k] = default.copy() + elif type(args[k]) == list or k in multi_keys: multis.append((k, args[k])) args.pop(k) + elif '__' in k: + mod, value = k.split('__') + attached_models[mod] = args.pop(k) op = None - if not update and not args['operation_type']: + if not update and not args.get('operation_type'): #print "Pas d'operation_type" continue + #transaction.commit() + q = Operation.objects.filter(code_patriarche=args['code_patriarche']) try: - op = Operation.objects.get(code_patriarche=args['code_patriarche']) - if not update: - #print "Code patriarche existant" - continue + if q.count(): + if not update: + #print "Code patriarche existant" + continue + op = q.all()[0] except: - pass + continue # check if not args.get('year') and args.get('start_date'): args['year'] = args['start_date'].year # creation + """ + print args + print reversed_items + print multis + print attached_models + print attached_instance_models + """ if not op: args.update(ope_default) #if not args.get('operation_code'): @@ -314,7 +586,7 @@ def import_operations_csv(values, col_defs=OPE_COLS, update=False, person=None, # error_ope.append((line_idx, args)) # transaction.rollback() # continue - transaction.commit() + #transaction.commit() else: # mise à jour try: for k in args: @@ -323,28 +595,43 @@ def import_operations_csv(values, col_defs=OPE_COLS, update=False, person=None, setattr(op, k, args[k]) op.save() except: - transaction.rollback() + #transaction.rollback() continue - transaction.commit() + #transaction.commit() try: for cls, default, reverse in reversed_items: default[reverse] = op it = cls(**default).save() + #transaction.commit() except: - transaction.rollback() + #transaction.rollback() error_reversed.append((line_idx, reversed_items)) continue - transaction.commit() try: for k, vals in multis: for v in vals: getattr(op, k).add(v) op.save() except: - transaction.rollback() + #transaction.rollback() error_multis.append((line_idx, multis)) continue - transaction.commit() + for attr in attached_models: + setattr(op, attr, attached_models[attr]) + op.save() + #transaction.commit() + for attr in attached_instance_models: + default = attached_instance_models[attr] + obj = getattr(op, attr) + if not obj: + obj = cls.objects.create(**default) + obj.save() + setattr(op, attr, obj) + else: + for k in default: + setattr(obj, k, default[k]) + obj.save() + #transaction.commit() errors = [] if error_ope: @@ -363,6 +650,7 @@ def import_operations_csv(values, col_defs=OPE_COLS, update=False, person=None, for line_idx, args in error_reversed: error += "line: " + str(line_idx) + " args: " + str(args) + '\n' errors.append(error) + #transaction.commit() return new_ops, errors def import_from_csv(filename, update=False, col_defs=OPE_COLS, |