#!/usr/bin/env python # -*- coding: utf-8 -*- # Copyright (C) 2012-2013 Étienne Loks # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU Affero General Public License as # published by the Free Software Foundation, either version 3 of the # License, or (at your option) any later version. # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU Affero General Public License for more details. # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see . # See the file COPYING for details. """ Utils: import archaelogical operation from a CSV file """ DELIMITER = ";" QUOTECHAR = '"' import datetime import csv, codecs import re from django.conf import settings from django.contrib.auth.models import User from django.db import transaction from django.template.defaultfilters import slugify from archaeological_operations.utils import parse_parcels from ishtar_common.models import Town, Person, PersonType, OrganizationType, \ Organization, SourceType from archaeological_files.models import PermitType, File, FileType from archaeological_operations.models import Operation, OperationType, Period, \ AdministrativeAct, ActType, OperationSource, Parcel DEFAULT_PERSON = User.objects.order_by('pk').all()[0] class Column: def __init__(self, col_models, format, associated_cols=None, multi=False): self.col_models, self.format = col_models, format self.associated_cols, self.multi = associated_cols, multi def unicode_csv_reader(unicode_csv_data, dialect=csv.excel, **kwargs): # csv.py doesn't do Unicode; encode temporarily as UTF-8: csv_reader = csv.reader(utf_8_encoder(unicode_csv_data), dialect=dialect, **kwargs) for row in csv_reader: # decode UTF-8 back to Unicode, cell by cell: yield [unicode(cell, 'utf-8') for cell in row] def utf_8_encoder(unicode_csv_data): for line in unicode_csv_data: yield line.encode('utf-8') ope_types = {} for k in settings.ISHTAR_OPE_TYPES.keys(): ot, created = OperationType.objects.get_or_create( txt_idx=settings.ISHTAR_OPE_TYPES[k][0], defaults={'label':settings.ISHTAR_OPE_TYPES[k][1], 'preventive':k[0]==u'préventive'}) ope_types[k] = ot def _get_parse_string(trunc_number=None): def parse_string(value): value = value.strip() if value == '#EMPTY': value = '' value = value.replace(' ', ' ') if trunc_number: value = value[:trunc_number] return value return parse_string parse_string = _get_parse_string() def parse_multivalue(value): s1 = re.sub('(.)([A-Z][a-z]+)', r'\1 \2', name) s1 = re.sub('([a-z0-9])([A-Z])', r'\1 \2', s1) return re.sub('([0-9])([a-z])', r'\1 \2', s1) def parse_operationtype(value, preventive, owner): value = (preventive.strip(), value.strip()) if value not in ope_types: return None return ope_types[value] periods = {} for k in settings.ISHTAR_PERIODS.keys(): periods[k] = Period.objects.get(txt_idx=settings.ISHTAR_PERIODS[k]) periods_keys = periods.keys() periods_keys.sort(key=len) periods_keys.reverse() def parse_period(value): value = parse_string(value) value = value[3:] if value.startswith('EUR') else value while value.endswith('-'): value = value[:-1] value = value[3:] if value.startswith('EUR') else value if not value: return [periods[u'']] period, old_val = [], u'' while value and old_val != value: old_val = value for k in periods_keys: if value.startswith(k): period.append(periods[k]) value = value[len(k):] break return period _REPLACED_PERIOD = [('deuxieme', 'second')] _REPLACED_PERIOD += [(y, x) for x, y in _REPLACED_PERIOD] REPLACED_PERIOD_DCT = dict(_REPLACED_PERIOD) period_names = {} for k in settings.ISHTAR_PERIODS.keys(): period = Period.objects.get(txt_idx=settings.ISHTAR_PERIODS[k]) slug = slugify(period.label) period_names[slug] = period for k in REPLACED_PERIOD_DCT.keys(): if k in slug: period_names[slug.replace(k, REPLACED_PERIOD_DCT[k])] = period period_names_keys = period_names.keys() period_names_keys.sort(key=len) period_names_keys.reverse() def parse_period_name(value): value = parse_string(value) if not value: return [period_names[u'']] period, old_val = [], u'' value = slugify(value) while value and old_val != value: old_val = value for k in period_names_keys: if value.startswith(k): period.append(period_names[k]) value = value[len(k):] break return period _CACHED_PERMIT_TYPES = {} for k in settings.ISHTAR_PERMIT_TYPES: txt_idx, label = settings.ISHTAR_PERMIT_TYPES[k] permit_type, created = PermitType.objects.get_or_create(txt_idx=txt_idx, defaults={'label':label, 'available':True}) _CACHED_PERMIT_TYPES[k] = permit_type def parse_permittype(value): value = parse_string(value).lower() if value not in _CACHED_PERMIT_TYPES: if not "" in _CACHED_PERMIT_TYPES: return value = "" return _CACHED_PERMIT_TYPES[value] _CACHED_ADMIN_ACT_TYPES = {} def parse_admin_act_typ(value, code, owner): value = parse_string(value).lower() code = parse_string(code).lower() if not value or not code: return if code not in _CACHED_ADMIN_ACT_TYPES: act_type, created = ActType.objects.get_or_create(txt_idx=code, defaults={'label':value}) _CACHED_ADMIN_ACT_TYPES[code] = act_type return _CACHED_ADMIN_ACT_TYPES[code] def parse_fileref(value): value = parse_string(value).split('/')[0] value = value.split('.')[0] match = re.search('[0-9].[0-9]*', value) if not match: return None return int(match.group()) def parse_orga(value, alternate_value, owner): value = parse_string(value) if not value: value = parse_string(alternate_value) if not value: return q = Organization.objects.filter(name__iexact=value) if q.count(): return q.all()[0] try: organization_type = OrganizationType.objects.get(label__iexact=value) except ObjectDoesNotExist: organization_type = OrganizationType.objects.get(txt_idx='undefined') orga = Organization.objects.create(name=value, organization_type=organization_type, history_modifier=owner) return orga def parse_bool(value): value = parse_string(value) if value.lower() in ('yes', 'oui'): value = True elif value.lower() in ('no', 'non'): value = False else: value = None return value def parse_date(value): value = parse_string(value).split(' ')[0] try: return datetime.datetime.strptime(value, '%d/%m/%Y') except: return None def parse_yearref(value): value = parse_string(value).split('.')[0] match = re.search('[0-9].[0-9]*', value) if not match: return None return int(match.group()) def parse_surface(value): value = parse_string(value) value = value.replace(',', '.') try: # hectare en metre carrés value = float(value) * 10000 if value: return value return None except: return None def parse_year(value): value = parse_string(value) try: yr = int(value) except: return None if yr < 1900 or yr > 2100: return None return yr def parse_insee(value): value = parse_string(value) values = [] while len(value) > 4: values.append(value[:5]) value = value[5:] towns = [] for value in values: try: town = Town.objects.get(numero_insee=value) towns.append(town) except: #sys.stderr.write('Numero INSEE : %s non existant en base' % value) continue return towns def parse_trunc_patriarche(value): value = parse_string(value) if not value: return value = value.replace(' ', '') try: int(value) except: return return '18' + unicode(value) def parse_operation_code(value): value = parse_string(value) code = value.split('.')[-1] try: return int(code) except: return def parse_title(value): value = parse_string(value) if not value: return return value.title() def parse_name_surname(value): value = parse_string(value) items = value.split(' ') name = items[0] surname = "" if len(items) > 1: name = " ".join(items[:-1]) surname = items[-1] values = {"surname":parse_title(surname)[:30], "name":parse_title(name)[:30]} if not values['surname'] and not values['name']: return q = Person.objects.filter(**values) if q.count(): return q.all()[0] else: defaults = {'history_modifier':DEFAULT_PERSON, 'title':''} defaults.update(values) p = Person.objects.create(**defaults) p.person_types.add(PersonType.objects.get( txt_idx='head_scientist')) return p def parse_person(surname, name, old_ref, owner): value = parse_string(value) values = {"surname":parse_title(surname), "name":parse_title(name)} if not values['surname'] and not values['name']: return q = Person.objects.filter(**values) if q.count(): return q.all()[0] else: defaults = {'history_modifier':owner, 'title':''} defaults.update(values) p = Person.objects.create(**defaults) p.person_types.add(PersonType.objects.get( txt_idx='head_scientist')) return p def parse_comment_addr_nature(nature, addr, owner): addr = parse_string(addr) nature = parse_string(nature) comments = [] if nature: comments += [u"Aménagement :", nature] if addr: comments += [u"Adresse :", addr] if not comments: return "" return u"\n".join(comments) # si pas de start date : premier janvier de year ope_types = { 'AET':('other_study', 'Autre étude', True), 'APP':('assistance_preparation_help', 'Aide à la préparation de publication', True), 'DOC':('documents_study', 'Étude documentaire', True), 'EV':('evaluation', "Fouille d'évaluation", True), 'FOU':('ancient_excavation', "Fouille ancienne", True), 'FP':('prog_excavation', "Fouille programmée", False), 'MH':('building_study', "Fouille avant MH", True), 'OPD':('arch_diagnostic', "Diagnostic archéologique", True), 'PAN':('analysis_program', "Programme d'analyses", False), 'PCR':('collective_research_project', "Projet collectif de recherche", False), 'PMS':('specialized_eqp_prospection', "Prospection avec matériel spécialisé", False), 'PRD':('diachronic_prospection', "Prospection diachronique", False), 'PI':('diachronic_prospection', "Prospection diachronique", False), 'PRM':('metal_detector_prospection', "Prospection détecteur de métaux", False), 'PRT':('thematic_prospection', "Prospection thématique", False), 'PT':('thematic_prospection', "Prospection thématique", False), 'RAR':('cave_art_record', "Relevé d'art rupestre", False), 'SD':('sampling_research', "Sondage", False), 'SP':('prev_excavation', "Fouille préventive", True), 'SU':('emergency_excavation', "Fouille préventive d'urgence", True), } _CACHED_OPE_TYPES = {} def _prepare_ope_types(): for k in ope_types.keys(): txt_idx, label, preventive = ope_types[k] ot, created = OperationType.objects.get_or_create(txt_idx=txt_idx, defaults={'label':label, 'preventive':preventive}) if k not in _CACHED_OPE_TYPES.keys(): _CACHED_OPE_TYPES[k] = ot def parse_patriarche_operationtype(value): if value not in _CACHED_OPE_TYPES.keys(): return None return _CACHED_OPE_TYPES[value] _dpt_re_filter = re.compile('^\([0-9]*\) ') def parse_ope_name(value): if not value: return '' value = value.strip() if value.lower() == 'null': return '' value = _dpt_re_filter.sub('', value) return value def parse_ha(value): value = parse_string(value) try: value = float(value) * 10000 except: value = None return value def parse_rapp_index(value): value = parse_string(value) items = re.findall(r'[0-9]+$', value) if items: return int(items[-1]) _CACHED_DOC_TYPES = {} def parse_doc_types(value): value = parse_string(value) if value not in _CACHED_DOC_TYPES: if value not in settings.ISHTAR_DOC_TYPES: return _CACHED_DOC_TYPES[value], created = SourceType.objects.get_or_create( txt_idx=value, defaults={"label":settings.ISHTAR_DOC_TYPES[value]}) return _CACHED_DOC_TYPES[value] # attrs, convert DEFAULT_OPE_COLS = [ [], # numéro de dossier ? (('operation_type',), parse_operationtype), (('common_name',), unicode), (('in_charge', 'name'), unicode), (('in_charge', 'surname'), unicode), [], # État ? [], # Adresse ? [], # origine ? (('periods',), parse_period), [], # Programme ? [], # Rattach PC ? [], # vide (('administrative_act', 'ref_sra'), unicode), (('administrative_act', 'signature_date'), parse_date), (('start_date',), parse_date), (('excavation_end_date',), parse_date), (('year',), parse_year), [], # identification (('code_patriarche',), int), [], # X degré [], # Y degré [], # X saisi ? [], # Y saisi ? [], # georef [], # geometrie (('surface',), parse_surface), ] _OPE_COLS = settings.ISHTAR_OPE_COL_FORMAT if settings.ISHTAR_OPE_COL_FORMAT \ else DEFAULT_OPE_COLS OPE_COLS = [] for cols in _OPE_COLS: if cols: OPE_COLS.append(Column(*cols)) else: OPE_COLS.append(None) def ope_postimportfix(ope, dct): changed = False if not ope.year: sd = dct.get('start_date') ed = dct.get('end_date') if sd: ope.year = sd.year changed = True elif ed: ope.year = ed.year changed = True if changed: ope.save() return ope class BreakIt(Exception): pass class RelatedClass: def __init__(self, key, cls, default_data={}, reverse_key=False, unique_keys=[], extra_data=[], mandatory_fields=[], multi=None): self.key, self.cls, self.default_data = key, cls, default_data self.reverse_key, self.unique_keys = reverse_key, unique_keys self.extra_data, self.multi = extra_data, multi self.mandatory_fields = mandatory_fields def create_object(self, data): for mandatory_field in self.mandatory_fields: if not data.get(mandatory_field): return None if self.unique_keys: unique_data = {} for k in self.unique_keys: unique_data[k] = data.pop(k) created = False filtr = unique_data.copy() q = None # check if all condition have a value if not [k for k in filtr if not filtr[k]]: q = self.cls.objects.filter(**unique_data) if q and q.count() > 1: obj = q.all()[0] else: unique_data['defaults'] = data try: obj, created = self.cls.objects.get_or_create(**unique_data) except ValueError: return None if not created: for k in unique_data['defaults']: try: setattr(obj, k, unique_data['defaults'][k]) except ValueError: continue obj.save() else: obj = self.cls.objects.create(**data) return obj def create(self, item, data, attr=None): datas = data if not self.multi: datas = [data] objs = [] for data in datas: if self.reverse_key: data[self.reverse_key] = item if self.reverse_key not in self.unique_keys: self.unique_keys.append(self.reverse_key) obj = self.create_object(data) else: obj = getattr(item, attr) if not obj: obj = self.create_object(data) setattr(item, attr, obj) item.save() else: for k in data: setattr(obj, k, data[k]) obj.save() objs.append(obj) if not self.multi: return objs[0] return objs #@transaction.commit_manually def import_operations_csv(values, col_defs=OPE_COLS, update=True, person=None, stdout=None, lines=None): default_person = person or User.objects.order_by('pk').all()[0] RELATED_CLASSES = [ RelatedClass('administrative_act', AdministrativeAct, default_data={'history_modifier':default_person, 'act_type':ActType.objects.get( txt_idx='excavation_order') }, reverse_key='operation', unique_keys=['ref_sra']), #RelatedClass('associated_file', File, # extra_data=['year'], # default_data={'history_modifier':default_person, # 'file_type':FileType.objects.get( # txt_idx='undefined')}, # unique_keys=['internal_reference', 'year']), RelatedClass('source', OperationSource, reverse_key='operation', unique_keys=['index']), RelatedClass('parcels', Parcel, reverse_key='operation', unique_keys=['operation', 'town', 'section', 'parcel_number'], multi=True), ] RELATED_CLASSES_KEYS = dict([(rel_cls.key, rel_cls) for rel_cls in RELATED_CLASSES]) related_classes = RELATED_CLASSES_KEYS _prepare_ope_types() ope_default = {'history_modifier':default_person} new_ops = 0 error_ope, error_reversed, error_multis = [], [], [] multi_keys = set([column.col_models[0] for column in col_defs if column and column.multi]) restrict_lines = [] start_line, end_line = None, None if lines: if '-' not in lines: restrict_lines = [int(line) for line in lines.split(',')] else: start_line, end_line = lines.split('-') start_line, end_line = int(start_line), int(end_line)+1 if start_line: values = list(values)[start_line:] if end_line: values = list(values)[:end_line+1] for line_idx, vals in enumerate(values): if restrict_lines: if line_idx > max(restrict_lines): break if line_idx not in restrict_lines: continue if start_line: line_idx = line_idx + start_line if stdout: stdout.write("\r* line %d" % (line_idx)) if not line_idx: continue # remove header args = {} for col_idx, val in enumerate(vals): if len(col_defs) <= col_idx or not col_defs[col_idx]: continue col_def = col_defs[col_idx] attrs, typ = col_def.col_models, col_def.format extra_cols = col_def.associated_cols if not callable(typ): if typ.startswith('parse_string_'): typ = _get_parse_string(int(typ.split('_')[-1])) else: typ = globals()[typ] c_args = args for attr in attrs: if attr not in c_args: c_args[attr] = {} c_args = c_args[attr] try: if not extra_cols: v = typ(val) else: arguments = [vals[col_number] for col_number in extra_cols] if not [arg for arg in arguments if arg]: continue arguments += [default_person] v = typ(val, *arguments) except: v = None if len(attrs) == 1: args[attrs[0]] = v elif len(attrs) == 2: args[attrs[0]][attrs[1]] = v elif len(attrs) == 3: args[attrs[0]][attrs[1]][attrs[2]] = v # manage exploded dates for k in args.keys(): if '__year' in k: key = k[:-len('__year')] try: v = datetime.datetime(args[k], args[key+'__month'], args[key+'__day']) args[key] = v except: pass args.pop(k) args.pop(key+'__month') args.pop(key+'__day') related_items = [] multis = [] attached_models = {} for k in args.keys(): try: if k in related_classes: rel_cls = related_classes[k] cls, default = rel_cls.cls, rel_cls.default_data reverse_key = rel_cls.reverse_key values = None if rel_cls.multi: values = [] for v in args[k]: v.update(default) values.append(v) else: values = default.copy() values.update(args[k]) exited = False for extra in rel_cls.extra_data: if not args.get(extra): raise BreakIt values[extra] = args[extra] args.pop(k) related_items.append((rel_cls, values, k)) elif k in multi_keys: multis.append((k, args[k])) args.pop(k) elif '__' in k: mod, value = k.split('__') attached_models[(mod, value)] = args.pop(k) except BreakIt: args.pop(k) continue op = None if not args.get('operation_type'): #print "Pas d'operation_type" continue #transaction.commit() try: int(args['code_patriarche']) except ValueError: continue q = Operation.objects.filter(code_patriarche=args['code_patriarche']) if q.count(): if not update: #print "Code patriarche existant" continue op = q.all()[0] # check if not args.get('year') and args.get('start_date'): args['year'] = args['start_date'].year # creation """ print "args", args print "multis", multis print "attached_models", attached_models """ if not op: args.update(ope_default) #if not args.get('operation_code'): # args['operation_code'] = Operation.get_available_operation_code( # args['year']) #try: op = Operation.objects.create(**args) #op.save() new_ops += 1 #except: # error_ope.append((line_idx, args)) # transaction.rollback() # continue #transaction.commit() else: # mise à jour try: for k in args: if getattr(op, k): continue setattr(op, k, args[k]) op.save() except: #transaction.rollback() continue #transaction.commit() try: for k, vals in multis: if not vals: continue for v in vals: getattr(op, k).add(v) op.save() except: #transaction.rollback() error_multis.append((line_idx, multis)) for attr, attached_attr in attached_models: field = getattr(op, attr) if field: setattr(field, attached_attr, attached_models[(attr, attached_attr)]) field.save() #transaction.commit() for rel_cls, data, attr in related_items: rel_cls.create(op, data, attr) #try: # rel_cls.create(op, data, attr) #except: # error_reversed.append((line_idx, data)) ope_postimportfix(op, args) #transaction.commit() errors = [] if error_ope: error = "Error while recording theses operations:\n" for line_idx, args in error_ope: error += "line: " + str(line_idx) + " args: " + str(args) + '\n' errors.append(error) if error_multis: error = "Error while recording theses multiples items attached to "\ "operation:" for line_idx, args in error_multis: error += "line: " + str(line_idx) + " args: " + str(args) + '\n' errors.append(error) if error_reversed: error = "Error while recording theses items that depend to operation:" for line_idx, args in error_reversed: error += "line: " + str(line_idx) + " args: " + str(args) + '\n' errors.append(error) #transaction.commit() return new_ops, errors def import_from_csv(filename, update=True, col_defs=OPE_COLS, person=None, stdout=None, lines=None): """ Import from a CSV file. Return number of operation treated and errors. """ try: values = unicode_csv_reader(codecs.open(filename, 'rb', "utf-8"), delimiter=DELIMITER, quotechar=QUOTECHAR) except (IOError): return 0, [u"Incorrect CSV file."] new_ops, errors = import_operations_csv(values, col_defs=col_defs, update=update, person=person, stdout=stdout, lines=lines) return new_ops, errors