summaryrefslogtreecommitdiff
path: root/archaeological_operations/import_from_csv.py
diff options
context:
space:
mode:
Diffstat (limited to 'archaeological_operations/import_from_csv.py')
-rw-r--r--archaeological_operations/import_from_csv.py348
1 files changed, 318 insertions, 30 deletions
diff --git a/archaeological_operations/import_from_csv.py b/archaeological_operations/import_from_csv.py
index 96ffcf085..b4e18635a 100644
--- a/archaeological_operations/import_from_csv.py
+++ b/archaeological_operations/import_from_csv.py
@@ -1,6 +1,6 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
-# Copyright (C) 2012 Étienne Loks <etienne.loks_AT_peacefrogsDOTnet>
+# Copyright (C) 2012-2013 Étienne Loks <etienne.loks_AT_peacefrogsDOTnet>
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as
@@ -32,13 +32,22 @@ from django.conf import settings
from django.contrib.auth.models import User
from django.db import transaction
-from ishtar_common.models import Town, Person, PersonType
+from ishtar_common.models import Town, Person, PersonType, OrganizationType, \
+ Organization
+from archaeological_files.models import PermitType, File, FileType
from archaeological_operations.models import Operation, OperationType, Period, \
- AdministrativeAct, ActType
+ AdministrativeAct, ActType, OperationSource
+DEFAULT_PERSON = User.objects.order_by('pk').all()[0]
+
+class Column:
+ def __init__(self, col_models, format, associated_cols=None, multi=False):
+ self.col_models, self.format = col_models, format
+ self.associated_cols, self.multi = associated_cols, multi
def unicode_csv_reader(unicode_csv_data, dialect=csv.excel, **kwargs):
# csv.py doesn't do Unicode; encode temporarily as UTF-8:
+
csv_reader = csv.reader(utf_8_encoder(unicode_csv_data),
dialect=dialect, **kwargs)
for row in csv_reader:
@@ -57,6 +66,13 @@ for k in settings.ISHTAR_OPE_TYPES.keys():
'preventive':k[0]==u'préventive'})
ope_types[k] = ot
+def parse_string(value):
+ value = value.strip()
+ if value == '#EMPTY':
+ value = ''
+ value = value.replace(' ', ' ')
+ return value
+
def parse_multivalue(value):
s1 = re.sub('(.)([A-Z][a-z]+)', r'\1 \2', name)
s1 = re.sub('([a-z0-9])([A-Z])', r'\1 \2', s1)
@@ -76,6 +92,7 @@ periods_keys.sort(key=len)
periods_keys.reverse()
def parse_period(value):
+ value = parse_string(value)
value = value[3:] if value.startswith('EUR') else value
while value.endswith('-'):
value = value[:-1]
@@ -92,13 +109,107 @@ def parse_period(value):
break
return period
+period_names = {}
+for k in settings.ISHTAR_PERIODS.keys():
+ period = Period.objects.get(txt_idx=settings.ISHTAR_PERIODS[k])
+ period_names[period.label] = period
+period_names_keys = period_names.keys()
+period_names_keys.sort(key=len)
+period_names_keys.reverse()
+
+def parse_period_name(value):
+ value = parse_string(value)
+ if not value:
+ return [period_names[u'']]
+ period, old_val = [], u''
+ while value and old_val != value:
+ old_val = value
+ for k in period_names_keys:
+ if value.startswith(k):
+ period.append(period_names[k])
+ value = value[len(k):]
+ break
+ return period
+
+_CACHED_PERMIT_TYPES = {}
+for k in settings.ISHTAR_PERMIT_TYPES:
+ txt_idx, label = settings.ISHTAR_PERMIT_TYPES[k]
+ permit_type, created = PermitType.objects.get_or_create(txt_idx=txt_idx,
+ defaults={'label':label,
+ 'available':True})
+ _CACHED_PERMIT_TYPES[k] = permit_type
+
+def parse_permittype(value):
+ value = parse_string(value).lower()
+ if value not in _CACHED_PERMIT_TYPES:
+ if not "" in _CACHED_PERMIT_TYPES:
+ return
+ value = ""
+ return _CACHED_PERMIT_TYPES[value]
+
+_CACHED_ADMIN_ACT_TYPES = {}
+def parse_admin_act_typ(value, code, owner):
+ value = parse_string(value).lower()
+ code = parse_string(code).lower()
+ if not value or not code:
+ return
+ if code not in _CACHED_ADMIN_ACT_TYPES:
+ act_type, created = ActType.objects.get_or_create(txt_idx=code,
+ defaults={'label':value})
+ _CACHED_ADMIN_ACT_TYPES[code] = act_type
+ return _CACHED_ADMIN_ACT_TYPES[code]
+
+def parse_fileref(value):
+ value = parse_string(value).split('/')[0]
+ match = re.search('[0-9].[0-9]*', value)
+ if not match:
+ return None
+ return int(match.group())
+
+def parse_orga(value, alternate_value, owner):
+ value = parse_string(value)
+ if not value:
+ value = parse_string(alternate_value)
+ if not value:
+ return
+ q = Organization.objects.filter(name__iexact=value)
+ if q.count():
+ return q.all()[0]
+ try:
+ organization_type = OrganizationType.objects.get(label__iexact=value)
+ except ObjectDoesNotExist:
+ organization_type = OrganizationType.objects.get(txt_idx='undefined')
+ orga = Organization.objects.create(name=value,
+ organization_type=organization_type,
+ history_modifier=owner)
+ return orga
+
+def parse_bool(value):
+ value = parse_string(value)
+ if value.lower() in ('yes', 'oui'):
+ value = True
+ elif value.lower() in ('no', 'non'):
+ value = False
+ else:
+ value = None
+ return value
+
def parse_date(value):
+ value = parse_string(value).split(' ')[0]
try:
return datetime.datetime.strptime(value, '%d/%m/%Y')
except:
return None
+def parse_yearref(value):
+ value = parse_string(value).split('.')[0]
+ match = re.search('[0-9].[0-9]*', value)
+ if not match:
+ return None
+ return int(match.group())
+
def parse_surface(value):
+ value = parse_string(value)
value = value.replace(',', '.')
try:
# hectare en metre carrés
@@ -110,6 +221,7 @@ def parse_surface(value):
return None
def parse_year(value):
+ value = parse_string(value)
try:
yr = int(value)
except:
@@ -119,6 +231,7 @@ def parse_year(value):
return yr
def parse_insee(value):
+ value = parse_string(value)
values = []
while len(value) > 4:
values.append(value[:5])
@@ -133,7 +246,8 @@ def parse_insee(value):
continue
return towns
-def parse_patriarche(value):
+def parse_trunc_patriarche(value):
+ value = parse_string(value)
if not value:
return
value = value.replace(' ', '')
@@ -144,6 +258,7 @@ def parse_patriarche(value):
return '18' + unicode(value)
def parse_operation_code(value):
+ value = parse_string(value)
code = value.split('.')[-1]
try:
return int(code)
@@ -151,11 +266,37 @@ def parse_operation_code(value):
return
def parse_title(value):
+ value = parse_string(value)
if not value:
return
return value.title()
+def parse_name_surname(value):
+ value = parse_string(value)
+ items = value.split(' ')
+ name = items[0]
+ surname = ""
+ if len(items) > 1:
+ name = " ".join(items[:-1])
+ surname = items[-1]
+ values = {"surname":parse_title(surname),
+ "name":parse_title(name)}
+ if not values['surname'] and not values['name']:
+ return
+ q = Person.objects.filter(**values)
+ if q.count():
+ return q.all()[0]
+ else:
+ defaults = {'history_modifier':DEFAULT_PERSON,
+ 'title':'',
+ 'person_type':PersonType.objects.get(
+ txt_idx='head_scientist')}
+ defaults.update(values)
+ p = Person.objects.create(**defaults)
+ return p
+
def parse_person(surname, name, old_ref, owner):
+ value = parse_string(value)
values = {"surname":parse_title(surname),
"name":parse_title(name)}
if not values['surname'] and not values['name']:
@@ -171,8 +312,115 @@ def parse_person(surname, name, old_ref, owner):
defaults.update(values)
p = Person.objects.create(**defaults)
return p
+
+def parse_comment_addr_nature(addr, nature, owner):
+ addr = parse_string(addr)
+ nature = parse_string(nature)
+ comments = []
+ if nature:
+ comments += [u"Amanégement :", nature]
+ if addr:
+ comments += [u"Adresse :", addr]
+ if not comments:
+ return ""
+ return u"\n".join(comments)
+
# si pas de start date : premier janvier de year
+ope_types = {
+ 'AET':('other_study',
+ 'Autre étude', True),
+ 'APP':('assistance_preparation_help',
+ 'Aide à la préparation de publication', True),
+ 'DOC':('documents_study',
+ 'Étude documentaire', True),
+ 'EV':('evaluation',
+ "Fouille d'évaluation", True),
+ 'FOU':('ancient_excavation',
+ "Fouille ancienne", True),
+ 'FP':('prog_excavation',
+ "Fouille programmée", False),
+ 'MH':('building_study', "Fouille avant MH", True),
+ 'OPD':('arch_diagnostic',
+ "Diagnostic archéologique", True),
+ 'PAN':('analysis_program',
+ "Programme d'analyses", False),
+ 'PCR':('collective_research_project',
+ "Projet collectif de recherche", False),
+ 'PMS':('specialized_eqp_prospection',
+ "Prospection avec matériel spécialisé", False),
+ 'PRD':('diachronic_prospection',
+ "Prospection diachronique", False),
+ 'PI':('diachronic_prospection',
+ "Prospection diachronique", False),
+ 'PRM':('metal_detector_prospection',
+ "Prospection détecteur de métaux", False),
+ 'PRT':('thematic_prospection',
+ "Prospection thématique", False),
+ 'PT':('thematic_prospection',
+ "Prospection thématique", False),
+ 'RAR':('cave_art_record',
+ "Relevé d'art rupestre", False),
+ 'SD':('sampling_research',
+ "Sondage", False),
+ 'SP':('prev_excavation',
+ "Fouille préventive", True),
+ 'SU':('emergency_excavation',
+ "Fouille préventive d'urgence", True),
+}
+
+_CACHED_OPE_TYPES = {}
+
+def _prepare_ope_types():
+ for k in ope_types.keys():
+ txt_idx, label, preventive = ope_types[k]
+ ot, created = OperationType.objects.get_or_create(txt_idx=txt_idx,
+ defaults={'label':label, 'preventive':preventive})
+ if k not in _CACHED_OPE_TYPES.keys():
+ _CACHED_OPE_TYPES[k] = ot
+
+def parse_patriarche_operationtype(value):
+ if value not in _CACHED_OPE_TYPES.keys():
+ return None
+ return _CACHED_OPE_TYPES[value]
+
+_dpt_re_filter = re.compile('^\([0-9]*\) ')
+
+def parse_ope_name(value):
+ if not value:
+ return ''
+ value = value.strip()
+ if value.lower() == 'null':
+ return ''
+ value = _dpt_re_filter.sub('', value)
+ return value
+
+def parse_ha(value):
+ value = parse_string(value)
+ try:
+ value = float(value)
+ except:
+ value = None
+ return value
+
+def parse_rapp_index(value):
+ value = parse_string(value)
+ items = re.findall(r'[0-9]+$', value)
+ if items:
+ return int(items[-1])
+
+_CACHED_DOC_TYPES = {}
+
+def parse_doc_types(value):
+ value = parse_string(value)
+ if value not in _CACHED_DOC_TYPES:
+ if value not in settings.ISHTAR_DOC_TYPES:
+ return
+ _CACHED_DOC_TYPES[value], created = SourceType.objects.get_or_create(
+ txt_idx=value,
+ defaults={"label":settings.ISHTAR_DOC_TYPES[value]})
+ return _CACHED_DOC_TYPES[value]
+
# attrs, convert
DEFAULT_OPE_COLS = [
[], # numéro de dossier ?
@@ -203,11 +451,18 @@ DEFAULT_OPE_COLS = [
(('surface',), parse_surface),
]
-OPE_COLS = settings.ISHTAR_OPE_COL_FORMAT if settings.ISHTAR_OPE_COL_FORMAT \
+_OPE_COLS = settings.ISHTAR_OPE_COL_FORMAT if settings.ISHTAR_OPE_COL_FORMAT \
else DEFAULT_OPE_COLS
-@transaction.commit_manually
-def import_operations_csv(values, col_defs=OPE_COLS, update=False, person=None,
+OPE_COLS = []
+for cols in _OPE_COLS:
+ if cols:
+ OPE_COLS.append(Column(*cols))
+ else:
+ OPE_COLS.append(None)
+
+#@transaction.commit_manually
+def import_operations_csv(values, col_defs=OPE_COLS, update=True, person=None,
stdout=None):
default_person = person or User.objects.order_by('pk').all()[0]
# key : (class, default, reverse)
@@ -215,11 +470,17 @@ def import_operations_csv(values, col_defs=OPE_COLS, update=False, person=None,
'administrative_act':(AdministrativeAct, {'history_modifier':default_person,
'act_type':ActType.objects.get(
txt_idx='excavation_order')}, 'operation'),
+ 'associated_file':(File, {'history_modifier':default_person,
+ 'file_type':FileType.objects.get(txt_idx='undefined')}, None),
+ 'source':(OperationSource, {}, 'operation')
}
+ _prepare_ope_types()
ope_default = {'history_modifier':default_person}
new_ops = 0
error_ope, error_reversed, error_multis = [], [], []
+ multi_keys = set([column.col_models[0]
+ for column in col_defs if column and column.multi])
for line_idx, vals in enumerate(values):
if stdout:
stdout.write("\r* line %d" % (line_idx))
@@ -229,7 +490,9 @@ def import_operations_csv(values, col_defs=OPE_COLS, update=False, person=None,
for col_idx, val in enumerate(vals):
if len(col_defs) <= col_idx or not col_defs[col_idx]:
continue
- attrs, typ, extra_cols = col_defs[col_idx]
+ col_def = col_defs[col_idx]
+ attrs, typ = col_def.col_models, col_def.format
+ extra_cols = col_def.associated_cols
if not callable(typ):
typ = globals()[typ]
c_args = args
@@ -268,39 +531,48 @@ def import_operations_csv(values, col_defs=OPE_COLS, update=False, person=None,
args.pop(key+'__month')
args.pop(key+'__day')
reversed_items, multis = [], []
+ attached_models, attached_instance_models = {}, {}
for k in args.keys():
if k in key_classes:
cls, default, reverse = key_classes[k]
default.update(args[k])
if reverse:
- reversed_items.append((cls, default, reverse))
+ reversed_items.append((cls, default.copy(), reverse))
args.pop(k)
continue
- try:
- obj = cls.objects.get(**default)
- except:
- obj = cls.objects.create(**default)
- obj.save()
- transaction.commit()
- args[k] = obj
- elif type(args[k]) == list:
+ args.pop(k)
+ attached_instance_models[k] = default.copy()
+ elif type(args[k]) == list or k in multi_keys:
multis.append((k, args[k]))
args.pop(k)
+ elif '__' in k:
+ mod, value = k.split('__')
+ attached_models[mod] = args.pop(k)
op = None
- if not update and not args['operation_type']:
+ if not update and not args.get('operation_type'):
#print "Pas d'operation_type"
continue
+ #transaction.commit()
+ q = Operation.objects.filter(code_patriarche=args['code_patriarche'])
try:
- op = Operation.objects.get(code_patriarche=args['code_patriarche'])
- if not update:
- #print "Code patriarche existant"
- continue
+ if q.count():
+ if not update:
+ #print "Code patriarche existant"
+ continue
+ op = q.all()[0]
except:
- pass
+ continue
# check
if not args.get('year') and args.get('start_date'):
args['year'] = args['start_date'].year
# creation
+ """
+ print args
+ print reversed_items
+ print multis
+ print attached_models
+ print attached_instance_models
+ """
if not op:
args.update(ope_default)
#if not args.get('operation_code'):
@@ -314,7 +586,7 @@ def import_operations_csv(values, col_defs=OPE_COLS, update=False, person=None,
# error_ope.append((line_idx, args))
# transaction.rollback()
# continue
- transaction.commit()
+ #transaction.commit()
else: # mise à jour
try:
for k in args:
@@ -323,28 +595,43 @@ def import_operations_csv(values, col_defs=OPE_COLS, update=False, person=None,
setattr(op, k, args[k])
op.save()
except:
- transaction.rollback()
+ #transaction.rollback()
continue
- transaction.commit()
+ #transaction.commit()
try:
for cls, default, reverse in reversed_items:
default[reverse] = op
it = cls(**default).save()
+ #transaction.commit()
except:
- transaction.rollback()
+ #transaction.rollback()
error_reversed.append((line_idx, reversed_items))
continue
- transaction.commit()
try:
for k, vals in multis:
for v in vals:
getattr(op, k).add(v)
op.save()
except:
- transaction.rollback()
+ #transaction.rollback()
error_multis.append((line_idx, multis))
continue
- transaction.commit()
+ for attr in attached_models:
+ setattr(op, attr, attached_models[attr])
+ op.save()
+ #transaction.commit()
+ for attr in attached_instance_models:
+ default = attached_instance_models[attr]
+ obj = getattr(op, attr)
+ if not obj:
+ obj = cls.objects.create(**default)
+ obj.save()
+ setattr(op, attr, obj)
+ else:
+ for k in default:
+ setattr(obj, k, default[k])
+ obj.save()
+ #transaction.commit()
errors = []
if error_ope:
@@ -363,6 +650,7 @@ def import_operations_csv(values, col_defs=OPE_COLS, update=False, person=None,
for line_idx, args in error_reversed:
error += "line: " + str(line_idx) + " args: " + str(args) + '\n'
errors.append(error)
+ #transaction.commit()
return new_ops, errors
def import_from_csv(filename, update=False, col_defs=OPE_COLS,