From b1260f85a40905c60f3ab38e3397230efdb15e4b Mon Sep 17 00:00:00 2001 From: Étienne Loks Date: Sun, 3 Mar 2013 16:12:10 +0100 Subject: Work on DBF imports --- archaeological_operations/import_from_csv.py | 380 +++++++++++++++++++++ archaeological_operations/import_from_dbf.py | 288 ++++++++++++++++ .../management/commands/import_operations.py | 33 +- archaeological_operations/tests.py | 4 +- archaeological_operations/utils.py | 380 --------------------- example_project/local_settings.py.sample | 3 + 6 files changed, 699 insertions(+), 389 deletions(-) create mode 100644 archaeological_operations/import_from_csv.py create mode 100644 archaeological_operations/import_from_dbf.py delete mode 100644 archaeological_operations/utils.py diff --git a/archaeological_operations/import_from_csv.py b/archaeological_operations/import_from_csv.py new file mode 100644 index 000000000..b1f8989ce --- /dev/null +++ b/archaeological_operations/import_from_csv.py @@ -0,0 +1,380 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# Copyright (C) 2012 Étienne Loks + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. + +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +# See the file COPYING for details. + +""" +Utils: import archaelogical operation from a CSV file +""" + +DELIMITER = ";" +QUOTECHAR = '"' + +import datetime +import csv, codecs +import re + +from django.conf import settings +from django.contrib.auth.models import User +from django.db import transaction + +from ishtar_common.models import Town, Person, PersonType +from archaeological_operations.models import Operation, OperationType, Period, \ + AdministrativeAct, ActType + + +def unicode_csv_reader(unicode_csv_data, dialect=csv.excel, **kwargs): + # csv.py doesn't do Unicode; encode temporarily as UTF-8: + csv_reader = csv.reader(utf_8_encoder(unicode_csv_data), + dialect=dialect, **kwargs) + for row in csv_reader: + # decode UTF-8 back to Unicode, cell by cell: + yield [unicode(cell, 'utf-8') for cell in row] + +def utf_8_encoder(unicode_csv_data): + for line in unicode_csv_data: + yield line.encode('utf-8') + +ope_types = {} +for k in settings.ISHTAR_OPE_TYPES.keys(): + ot, created = OperationType.objects.get_or_create( + txt_idx=settings.ISHTAR_OPE_TYPES[k][0], + defaults={'label':settings.ISHTAR_OPE_TYPES[k][1], + 'preventive':k[0]==u'préventive'}) + ope_types[k] = ot + +def parse_multivalue(value): + s1 = re.sub('(.)([A-Z][a-z]+)', r'\1 \2', name) + s1 = re.sub('([a-z0-9])([A-Z])', r'\1 \2', s1) + return re.sub('([0-9])([a-z])', r'\1 \2', s1) + +def parse_operationtype(value, preventive, owner): + value = (preventive.strip(), value.strip()) + if value not in ope_types: + return None + return ope_types[value] + +periods = {} +for k in settings.ISHTAR_PERIODS.keys(): + periods[k] = Period.objects.get(txt_idx=settings.ISHTAR_PERIODS[k]) +periods_keys = periods.keys() +periods_keys.sort(key=len) +periods_keys.reverse() + +def parse_period(value): + value = value[3:] if value.startswith('EUR') else value + while value.endswith('-'): + value = value[:-1] + value = value[3:] if value.startswith('EUR') else value + if not value: + return [periods[u'']] + period, old_val = [], u'' + while value and old_val != value: + old_val = value + for k in periods_keys: + if value.startswith(k): + period.append(periods[k]) + value = value[len(k):] + break + return period + +def parse_date(value): + try: + return datetime.datetime.strptime(value, '%d/%m/%Y') + except: + return None + +def parse_surface(value): + value = value.replace(',', '.') + try: + # hectare en metre carrés + value = float(value) * 10000 + if value: + return value + return None + except: + return None + +def parse_year(value): + try: + yr = int(value) + except: + return None + if yr < 1900 or yr > 2100: + return None + return yr + +def parse_insee(value): + values = [] + while len(value) > 4: + values.append(value[:5]) + value = value[5:] + towns = [] + for value in values: + try: + town = Town.objects.get(numero_insee=value) + towns.append(town) + except: + #sys.stderr.write('Numero INSEE : %s non existant en base' % value) + continue + return towns + +def parse_patriarche(value): + if not value: + return + value = value.replace(' ', '') + try: + int(value) + except: + return + return '18' + unicode(value) + +def parse_operation_code(value): + code = value.split('.')[-1] + try: + return int(code) + except: + return + +def parse_title(value): + if not value: + return + return value.title() + +def parse_person(surname, name, old_ref, owner): + values = {"surname":parse_title(surname), + "name":parse_title(name)} + q = Person.objects.filter(**values) + if q.count(): + return q.all()[0] + else: + defaults = {'history_modifier':owner, + 'title':'', + 'person_type':PersonType.objects.get( + txt_idx='head_scientist')} + defaults.update(values) + p = Person.objects.create(**defaults) + return p +# si pas de start date : premier janvier de year + +# attrs, convert +DEFAULT_OPE_COLS = [ + [], # numéro de dossier ? + (('operation_type',), parse_operationtype), + (('common_name',), unicode), + (('in_charge', 'name'), unicode), + (('in_charge', 'surname'), unicode), + [], # État ? + [], # Adresse ? + [], # origine ? + (('periods',), parse_period), + [], # Programme ? + [], # Rattach PC ? + [], # vide + (('administrative_act', 'ref_sra'), unicode), + (('administrative_act', 'signature_date'), parse_date), + (('start_date',), parse_date), + (('excavation_end_date',), parse_date), + (('year',), parse_year), + [], # identification + (('code_patriarche',), int), + [], # X degré + [], # Y degré + [], # X saisi ? + [], # Y saisi ? + [], # georef + [], # geometrie + (('surface',), parse_surface), +] + +OPE_COLS = settings.ISHTAR_OPE_COL_FORMAT if settings.ISHTAR_OPE_COL_FORMAT \ + else DEFAULT_OPE_COLS + +@transaction.commit_manually +def import_operations_csv(values, col_defs=OPE_COLS, update=False, person=None, + stdout=None): + default_person = person or User.objects.order_by('pk').all()[0] + # key : (class, default, reverse) + key_classes = { + 'administrative_act':(AdministrativeAct, {'history_modifier':default_person, + 'act_type':ActType.objects.get( + txt_idx='excavation_order')}, 'operation'), + } + + ope_default = {'history_modifier':default_person} + new_ops = 0 + error_ope, error_reversed, error_multis = [], [], [] + for line_idx, vals in enumerate(values): + if stdout: + stdout.write("\r* line %d" % (line_idx)) + if not line_idx: + continue # remove header + args = {} + for col_idx, val in enumerate(vals): + if len(col_defs) <= col_idx or not col_defs[col_idx]: + continue + attrs, typ, extra_cols = col_defs[col_idx] + if not callable(typ): + typ = globals()[typ] + c_args = args + for attr in attrs: + if attr not in c_args: + c_args[attr] = {} + c_args = c_args[attr] + try: + if not extra_cols: + v = typ(val) + else: + arguments = [vals[col_number] for col_number in extra_cols] + if not [arg for arg in arguments if arg]: + continue + arguments += [default_person] + v = typ(val, *arguments) + except: + v = None + if len(attrs) == 1: + args[attrs[0]] = v + elif len(attrs) == 2: + args[attrs[0]][attrs[1]] = v + elif len(attrs) == 3: + args[attrs[0]][attrs[1]][attrs[2]] = v + # manage exploded dates + for k in args.keys(): + if '__year' in k: + key = k[:-len('__year')] + try: + v = datetime.datetime(args[k], args[key+'__month'], + args[key+'__day']) + args[key] = v + except: + pass + args.pop(k) + args.pop(key+'__month') + args.pop(key+'__day') + reversed_items, multis = [], [] + for k in args.keys(): + if k in key_classes: + cls, default, reverse = key_classes[k] + default.update(args[k]) + if reverse: + reversed_items.append((cls, default, reverse)) + args.pop(k) + continue + try: + obj = cls.objects.get(**default) + except: + obj = cls.objects.create(**default) + obj.save() + transaction.commit() + args[k] = obj + elif type(args[k]) == list: + multis.append((k, args[k])) + args.pop(k) + op = None + if not update and not args['operation_type']: + #print "Pas d'operation_type" + continue + try: + op = Operation.objects.get(code_patriarche=args['code_patriarche']) + if not update: + #print "Code patriarche existant" + continue + except: + pass + # check + if not args.get('year') and args.get('start_date'): + args['year'] = args['start_date'].year + # creation + if not op: + args.update(ope_default) + #if not args.get('operation_code'): + # args['operation_code'] = Operation.get_available_operation_code( + # args['year']) + #try: + op = Operation.objects.create(**args) + #op.save() + new_ops += 1 + #except: + # error_ope.append((line_idx, args)) + # transaction.rollback() + # continue + transaction.commit() + else: # mise à jour + try: + for k in args: + if getattr(op, k): + continue + setattr(op, k, args[k]) + op.save() + except: + transaction.rollback() + continue + transaction.commit() + try: + for cls, default, reverse in reversed_items: + default[reverse] = op + it = cls(**default).save() + except: + transaction.rollback() + error_reversed.append((line_idx, reversed_items)) + continue + transaction.commit() + try: + for k, vals in multis: + for v in vals: + getattr(op, k).add(v) + op.save() + except: + transaction.rollback() + error_multis.append((line_idx, multis)) + continue + transaction.commit() + + errors = [] + if error_ope: + error = "Error while recording theses operations:\n" + for line_idx, args in error_ope: + error += "line: " + str(line_idx) + " args: " + str(args) + '\n' + errors.append(error) + if error_multis: + error = "Error while recording theses multiples items attached to "\ + "operation:" + for line_idx, args in error_multis: + error += "line: " + str(line_idx) + " args: " + str(args) + '\n' + errors.append(error) + if error_reversed: + error = "Error while recording theses items that depend to operation:" + for line_idx, args in error_reversed: + error += "line: " + str(line_idx) + " args: " + str(args) + '\n' + errors.append(error) + return new_ops, errors + +def import_from_csv(filename, update=False, col_defs=OPE_COLS, + person=None, stdout=None): + """ + Import from a CSV file. + Return number of operation treated and errors. + """ + try: + values = unicode_csv_reader(codecs.open(filename, 'rb', "utf-8"), + delimiter=DELIMITER, quotechar=QUOTECHAR) + except (IOError): + return 0, [u"Incorrect CSV file."] + + new_ops, errors = import_operations_csv(values, col_defs=col_defs, + update=update, person=person, stdout=stdout) + return new_ops, errors diff --git a/archaeological_operations/import_from_dbf.py b/archaeological_operations/import_from_dbf.py new file mode 100644 index 000000000..982e6785f --- /dev/null +++ b/archaeological_operations/import_from_dbf.py @@ -0,0 +1,288 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# Copyright (C) 2013 Étienne Loks + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. + +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +# See the file COPYING for details. + +""" +Utils: import archaelogical operation from a DBF file +""" + + +import datetime +import dbf + +from django.contrib.auth.models import User +from django.db import transaction + +from archaeological_operations.import_from_csv import parse_operationtype, \ + parse_multivalue, parse_person as _parse_person, parse_date +from archaeological_operations.models import Operation, OperationType, Period, \ + AdministrativeAct, ActType + +def parse_person(surname, name, owner): + return _parse_person(surname, name, None, owner) + +def parse_ha(value): + try: + val = float(value) + except ValueError: + val = 0 + return val * 10000 + +ope_types = { + 'AET':None, + 'APP':None, + 'DOC':(u'documents_study', + u'Étude documentaire'), + 'EV':(u'evaluation', + u'Évaluation'), + 'FOU':(u'prev_excavation', + u"Fouille archéologique préventive"), + 'FP':(u'prog_excavation', + u"Fouille archéologique programmée"), + 'MH':(u'building_study', u"Étude de bâti (préventif)"), + 'OPD':None, + 'PCR':None, + 'PMS':None, + 'PRD':None, + 'PRT':None, + 'PRM':None, + 'RAR':None, + 'SD':(u'sampling_research', + u"Sondage (préventif)"), + 'SP':(u'prev_excavation', + u"Fouille archéologique préventive"), + 'SU':(u'emergency_excavation', + u"Sauvetage urgent"), +} + + + +def parse_patriarche_operationtype(value): + if value not in ope_types.keys(): + print value + return None + if not ope_types[value]: + return None + return OperationType.objects.get(txt_idx=ope_types[value][0]) + +PATRIARCHE_DBF_OPE_COLS = [ + (('operation_type',), 'parse_patriarche_operationtype', []), + (('common_name',), unicode, []), + [], + (('in_charge',), 'parse_person', [2]), + [], #'etat', + [], #'adresse', + [], #'origine C(3)', + [], # 'chronologi C(12)', + [], #'programme C(254)', + [], # 'rattach_pc C(254)', + [], # 'code_dossi N(8,0)', + (('administrative_act', 'ref_sra'), unicode, []), + (('administrative_act', 'signature_date'), parse_date, []), + (('start_date',), parse_date, []), + (('end_date',), parse_date, []), + (('year',), int, []), + [], # 'identifica C(254)', + (('code_patriarche',), int, []), + [], # 'x_degre N(16,6)', + [], # 'y_degre N(16,6)', + [], # 'x_saisi C(12)', + [], # 'y_saisi C(12)', + [], # 'georeferen C(3)', + [], # 'geometrie C(3)', + (('surface',), parse_ha, []) +] + +DBF_OPE_COLS = PATRIARCHE_DBF_OPE_COLS + +def import_from_dbf(filename, update=False, col_defs=DBF_OPE_COLS, + person=None, stdout=None): + """ + Import from a DBF file. + Return number of operation treated and errors. + """ + try: + table = dbf.Table(filename) + except (dbf.DbfError, TypeError): + return 0, [u"Incorrect DBF file."] + + new_ops, errors = import_operations_dbf(table, col_defs=col_defs, + update=update, person=person, stdout=stdout) + return new_ops, errors + +ERROR_LBLS = {'missing_ope_type':'* Missing operation type: ', + 'missing_patriarche_code':'* Missing patriarche code: '} + +@transaction.commit_manually +def import_operations_dbf(values, col_defs=DBF_OPE_COLS, update=False, + person=None, stdout=None): + default_person = person or User.objects.order_by('pk').all()[0] + # key : (class, default, reverse) + key_classes = { + 'administrative_act':(AdministrativeAct, {'history_modifier':default_person, + 'act_type':ActType.objects.get( + txt_idx='excavation_order')}, 'operation'), + } + ope_default = {'history_modifier':default_person} + current_import = [] + new_ops = 0 + errors_nb = {} + for error in ERROR_LBLS.keys(): + errors_nb[error] = 0 + error_ope, error_reversed, error_multis = [], [], [] + for line_idx, vals in enumerate(values): + if stdout: + stdout.write("\r* line %d" % (line_idx)) + if not line_idx: + continue # remove header + args = {} + for col_idx, val in enumerate(vals): + if len(col_defs) <= col_idx or not col_defs[col_idx]: + continue + attrs, typ, extra_cols = col_defs[col_idx] + if not callable(typ): + typ = globals()[typ] + c_args = args + for attr in attrs: + if attr not in c_args: + c_args[attr] = {} + c_args = c_args[attr] + if not extra_cols: + try: + v = typ(val) + except TypeError: + v = None + else: + arguments = [vals[col_number] for col_number in extra_cols] + if not [arg for arg in arguments if arg]: + continue + arguments += [default_person] + v = typ(val, *arguments) + if len(attrs) == 1: + args[attrs[0]] = v + elif len(attrs) == 2: + args[attrs[0]][attrs[1]] = v + elif len(attrs) == 3: + args[attrs[0]][attrs[1]][attrs[2]] = v + # manage exploded dates + for k in args.keys(): + if '__year' in k: + key = k[:-len('__year')] + try: + v = datetime.datetime(args[k], args[key+'__month'], + args[key+'__day']) + args[key] = v + except: + pass + args.pop(k) + args.pop(key+'__month') + args.pop(key+'__day') + reversed_items, multis = [], [] + for k in args.keys(): + if k in key_classes: + cls, default, reverse = key_classes[k] + default.update(args[k]) + if reverse: + reversed_items.append((cls, default, reverse)) + args.pop(k) + continue + try: + obj = cls.objects.get(**default) + except: + obj = cls.objects.create(**default) + obj.save() + transaction.commit() + args[k] = obj + elif type(args[k]) == list: + multis.append((k, args[k])) + args.pop(k) + op = None + if not update and not args.get('operation_type'): + errors_nb['missing_ope_type'] += 1 + continue + try: + op = Operation.objects.get(code_patriarche=args['code_patriarche']) + if not update and op.pk not in current_import: + errors_nb['already_available_patriarche_code'] += 1 + continue + except: + pass + # check + if not args.get('year') and args.get('start_date'): + args['year'] = args['start_date'].year + # creation + if not op: + args.update(ope_default) + op = Operation.objects.create(**args) + new_ops += 1 + transaction.commit() + current_import.append(op.pk) + else: # mise à jour + try: + for k in args: + if getattr(op, k): + continue + setattr(op, k, args[k]) + op.save() + except: + transaction.rollback() + continue + transaction.commit() + try: + for cls, default, reverse in reversed_items: + default[reverse] = op + it = cls(**default).save() + except: + transaction.rollback() + error_reversed.append((line_idx, reversed_items)) + continue + transaction.commit() + try: + for k, vals in multis: + for v in vals: + getattr(op, k).add(v) + op.save() + except: + transaction.rollback() + error_multis.append((line_idx, multis)) + continue + transaction.commit() + + errors = [] + for error_key in errors_nb: + nb_error = errors_nb[error_key] + if nb_error: + errors.append(ERROR_LBLS[error_key] + str(nb_error)) + if error_ope: + error = "Error while recording theses operations:\n" + for line_idx, args in error_ope: + error += "line: " + str(line_idx) + " args: " + str(args) + '\n' + errors.append(error) + if error_multis: + error = "Error while recording theses multiples items attached to "\ + "operation:" + for line_idx, args in error_multis: + error += "line: " + str(line_idx) + " args: " + str(args) + '\n' + errors.append(error) + if error_reversed: + error = "Error while recording theses items that depend to operation:" + for line_idx, args in error_reversed: + error += "line: " + str(line_idx) + " args: " + str(args) + '\n' + errors.append(error) + return new_ops, errors + diff --git a/archaeological_operations/management/commands/import_operations.py b/archaeological_operations/management/commands/import_operations.py index b43954e16..cdcbff54b 100755 --- a/archaeological_operations/management/commands/import_operations.py +++ b/archaeological_operations/management/commands/import_operations.py @@ -18,18 +18,37 @@ # See the file COPYING for details. from django.core.management.base import BaseCommand, CommandError -from archaeological_operations.utils import import_from_csv +from archaeological_operations.import_from_csv import import_from_csv +from archaeological_operations.import_from_dbf import import_from_dbf + +IMPORTERS = {'csv':import_from_csv, + 'dbf':import_from_dbf, + 'db3':import_from_dbf, + 'fp':import_from_dbf, + 'vfp':import_from_dbf} class Command(BaseCommand): - args = ' []' - help = "Import archaelogical operation from a CSV file." + args = ' [ ]' + help = "Import archaelogical operations" def handle(self, *args, **options): if not args or not args[0]: - raise CommandError("No CSV file provided." % args[0]) - nb_ops, errors = import_from_csv(args[0], - update=len(args) > 1 and args[1], - stdout=self.stdout) + raise CommandError("No file provided." % args[0]) + filename = args[0] + update = len(args) > 1 and args[1] + file_type = len(args) > 1 and args[2] + if not file_type: + suffix = filename.split('.')[-1].lower() + if suffix in IMPORTERS.keys(): + file_type = suffix + else: + raise CommandError("This file extension is not managed. "\ + "Specify manualy the file type.") + elif file_type not in IMPORTERS.keys(): + raise CommandError("This file type is not managed.") + nb_ops, errors = IMPORTERS[file_type](filename, + update=update, + stdout=self.stdout) self.stdout.write('\n* %d operation treated\n' % nb_ops) if errors: self.stderr.write('\n'.join(errors)) diff --git a/archaeological_operations/tests.py b/archaeological_operations/tests.py index 551f1e660..9746dc3f6 100644 --- a/archaeological_operations/tests.py +++ b/archaeological_operations/tests.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- -# Copyright (C) 2012 Étienne Loks +# Copyright (C) 2012-2013 Étienne Loks # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU Affero General Public License as @@ -41,4 +41,4 @@ class ImportOperationTest(TestCase): Test operation import """ call_command('import_operations', os.sep.join([os.getcwd(), '..', - 'archaeological_operations', 'tests', 'sample.csv'])) + 'archaeological_operations', 'tests', 'sample.dbf'])) diff --git a/archaeological_operations/utils.py b/archaeological_operations/utils.py deleted file mode 100644 index a244b556e..000000000 --- a/archaeological_operations/utils.py +++ /dev/null @@ -1,380 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- -# Copyright (C) 2012 Étienne Loks - -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU Affero General Public License as -# published by the Free Software Foundation, either version 3 of the -# License, or (at your option) any later version. - -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Affero General Public License for more details. - -# You should have received a copy of the GNU Affero General Public License -# along with this program. If not, see . - -# See the file COPYING for details. - -""" -Utils: import archaelogical operation from a CSV file -""" - -DELIMITER = ";" -QUOTECHAR = '"' - -import datetime -import csv, codecs -import re - -from django.conf import settings -from django.contrib.auth.models import User -from django.db import transaction - -from ishtar_common.models import Town, Person, PersonType -from archaeological_operations.models import Operation, OperationType, Period, \ - AdministrativeAct, ActType - - -def unicode_csv_reader(unicode_csv_data, dialect=csv.excel, **kwargs): - # csv.py doesn't do Unicode; encode temporarily as UTF-8: - csv_reader = csv.reader(utf_8_encoder(unicode_csv_data), - dialect=dialect, **kwargs) - for row in csv_reader: - # decode UTF-8 back to Unicode, cell by cell: - yield [unicode(cell, 'utf-8') for cell in row] - -def utf_8_encoder(unicode_csv_data): - for line in unicode_csv_data: - yield line.encode('utf-8') - -ope_types = {} -for k in settings.ISHTAR_OPE_TYPES.keys(): - ot, created = OperationType.objects.get_or_create( - txt_idx=settings.ISHTAR_OPE_TYPES[k][0], - defaults={'label':settings.ISHTAR_OPE_TYPES[k][1], - 'preventive':k[0]==u'préventive'}) - ope_types[k] = ot - -def parse_multivalue(value): - s1 = re.sub('(.)([A-Z][a-z]+)', r'\1 \2', name) - s1 = re.sub('([a-z0-9])([A-Z])', r'\1 \2', s1) - return re.sub('([0-9])([a-z])', r'\1 \2', s1) - -def parse_operationtype(value, preventive, owner): - value = (preventive.strip(), value.strip()) - if value not in ope_types: - return None - return ope_types[value] - -periods = {} -for k in settings.ISHTAR_PERIODS.keys(): - periods[k] = Period.objects.get(txt_idx=settings.ISHTAR_PERIODS[k]) -periods_keys = periods.keys() -periods_keys.sort(key=len) -periods_keys.reverse() - -def parse_period(value): - value = value[3:] if value.startswith('EUR') else value - while value.endswith('-'): - value = value[:-1] - value = value[3:] if value.startswith('EUR') else value - if not value: - return [periods[u'']] - period, old_val = [], u'' - while value and old_val != value: - old_val = value - for k in periods_keys: - if value.startswith(k): - period.append(periods[k]) - value = value[len(k):] - break - return period - -def parse_date(value): - try: - return datetime.datetime.strptime(value, '%d/%m/%Y') - except: - return None - -def parse_surface(value): - value = value.replace(',', '.') - try: - # hectare en metre carrés - value = float(value) * 10000 - if value: - return value - return None - except: - return None - -def parse_year(value): - try: - yr = int(value) - except: - return None - if yr < 1900 or yr > 2100: - return None - return yr - -def parse_insee(value): - values = [] - while len(value) > 4: - values.append(value[:5]) - value = value[5:] - towns = [] - for value in values: - try: - town = Town.objects.get(numero_insee=value) - towns.append(town) - except: - #sys.stderr.write('Numero INSEE : %s non existant en base' % value) - continue - return towns - -def parse_patriarche(value): - if not value: - return - value = value.replace(' ', '') - try: - int(value) - except: - return - return '18' + unicode(value) - -def parse_operation_code(value): - code = value.split('.')[-1] - try: - return int(code) - except: - return - -def parse_title(value): - if not value: - return - return value.title() - -def parse_person(surname, name, old_ref, owner): - values = {"surname":parse_title(surname), - "name":parse_title(name)} - q = Person.objects.filter(**values) - if q.count(): - return q.all()[0] - else: - defaults = {'history_modifier':owner, - 'title':'', - 'person_type':PersonType.objects.get( - txt_idx='head_scientist')} - defaults.update(values) - p = Person.objects.create(**defaults) - return p -# si pas de start date : premier janvier de year - -# attrs, convert -DEFAULT_OPE_COLS = [ - [], # numéro de dossier ? - (('operation_type',), parse_operationtype), - (('common_name',), unicode), - (('in_charge', 'name'), unicode), - (('in_charge', 'surname'), unicode), - [], # État ? - [], # Adresse ? - [], # origine ? - (('periods',), parse_period), - [], # Programme ? - [], # Rattach PC ? - [], # vide - (('administrative_act', 'ref_sra'), unicode), - (('administrative_act', 'signature_date'), parse_date), - (('start_date',), parse_date), - (('excavation_end_date',), parse_date), - (('year',), parse_year), - [], # identification - (('code_patriarche',), int), - [], # X degré - [], # Y degré - [], # X saisi ? - [], # Y saisi ? - [], # georef - [], # geometrie - (('surface',), parse_surface), -] - -OPE_COLS = settings.ISHTAR_OPE_COL_FORMAT if settings.ISHTAR_OPE_COL_FORMAT \ - else DEFAULT_OPE_COLS - -@transaction.commit_manually -def import_operations(values, col_defs=OPE_COLS, update=False, person=None, - stdout=None): - default_person = person or User.objects.order_by('pk').all()[0] - # key : (class, default, reverse) - key_classes = { - 'administrative_act':(AdministrativeAct, {'history_modifier':default_person, - 'act_type':ActType.objects.get( - txt_idx='excavation_order')}, 'operation'), - } - - ope_default = {'history_modifier':default_person} - new_ops = 0 - error_ope, error_reversed, error_multis = [], [], [] - for line_idx, vals in enumerate(values): - if stdout: - stdout.write("\r* line %d" % (line_idx)) - if not line_idx: - continue # remove header - args = {} - for col_idx, val in enumerate(vals): - if len(col_defs) <= col_idx or not col_defs[col_idx]: - continue - attrs, typ, extra_cols = col_defs[col_idx] - if not callable(typ): - typ = globals()[typ] - c_args = args - for attr in attrs: - if attr not in c_args: - c_args[attr] = {} - c_args = c_args[attr] - try: - if not extra_cols: - v = typ(val) - else: - arguments = [vals[col_number] for col_number in extra_cols] - if not [arg for arg in arguments if arg]: - continue - arguments += [default_person] - v = typ(val, *arguments) - except: - v = None - if len(attrs) == 1: - args[attrs[0]] = v - elif len(attrs) == 2: - args[attrs[0]][attrs[1]] = v - elif len(attrs) == 3: - args[attrs[0]][attrs[1]][attrs[2]] = v - # manage exploded dates - for k in args.keys(): - if '__year' in k: - key = k[:-len('__year')] - try: - v = datetime.datetime(args[k], args[key+'__month'], - args[key+'__day']) - args[key] = v - except: - pass - args.pop(k) - args.pop(key+'__month') - args.pop(key+'__day') - reversed_items, multis = [], [] - for k in args.keys(): - if k in key_classes: - cls, default, reverse = key_classes[k] - default.update(args[k]) - if reverse: - reversed_items.append((cls, default, reverse)) - args.pop(k) - continue - try: - obj = cls.objects.get(**default) - except: - obj = cls.objects.create(**default) - obj.save() - transaction.commit() - args[k] = obj - elif type(args[k]) == list: - multis.append((k, args[k])) - args.pop(k) - op = None - if not update and not args['operation_type']: - #print "Pas d'operation_type" - continue - try: - op = Operation.objects.get(code_patriarche=args['code_patriarche']) - if not update: - #print "Code patriarche existant" - continue - except: - pass - # check - if not args.get('year') and args.get('start_date'): - args['year'] = args['start_date'].year - # creation - if not op: - args.update(ope_default) - #if not args.get('operation_code'): - # args['operation_code'] = Operation.get_available_operation_code( - # args['year']) - #try: - op = Operation.objects.create(**args) - #op.save() - new_ops += 1 - #except: - # error_ope.append((line_idx, args)) - # transaction.rollback() - # continue - transaction.commit() - else: # mise à jour - try: - for k in args: - if getattr(op, k): - continue - setattr(op, k, args[k]) - op.save() - except: - transaction.rollback() - continue - transaction.commit() - try: - for cls, default, reverse in reversed_items: - default[reverse] = op - it = cls(**default).save() - except: - transaction.rollback() - error_reversed.append((line_idx, reversed_items)) - continue - transaction.commit() - try: - for k, vals in multis: - for v in vals: - getattr(op, k).add(v) - op.save() - except: - transaction.rollback() - error_multis.append((line_idx, multis)) - continue - transaction.commit() - - errors = [] - if error_ope: - error = "Error while recording theses operations:\n" - for line_idx, args in error_ope: - error += "line: " + str(line_idx) + " args: " + str(args) + '\n' - errors.append(error) - if error_multis: - error = "Error while recording theses multiples items attached to "\ - "operation:" - for line_idx, args in error_multis: - error += "line: " + str(line_idx) + " args: " + str(args) + '\n' - errors.append(error) - if error_reversed: - error = "Error while recording theses items that depend to operation:" - for line_idx, args in error_reversed: - error += "line: " + str(line_idx) + " args: " + str(args) + '\n' - errors.append(error) - return new_ops, errors - -def import_from_csv(filename, update=False, col_defs=OPE_COLS, - person=None, stdout=None): - """ - Import from a CSV file. - Return number of operation treated and errors. - """ - try: - values = unicode_csv_reader(codecs.open(filename, 'rb', "utf-8"), - delimiter=DELIMITER, quotechar=QUOTECHAR) - except (IOError): - return 0, [u"Incorrect CSV file."] - - new_ops, errors = import_operations(values, col_defs=col_defs, - update=update, person=person, stdout=stdout) - return new_ops, errors diff --git a/example_project/local_settings.py.sample b/example_project/local_settings.py.sample index dd488d52e..28ab60ab7 100644 --- a/example_project/local_settings.py.sample +++ b/example_project/local_settings.py.sample @@ -26,6 +26,9 @@ DATABASES = { ROOT_URLCONF = 'example_project.urls' MEDIA_URL = 'http://mydomain/static/' +# complete with host/domain names that this website can serve +ALLOWED_HOSTS = [] + # if your installation uses a prefix after the domain name modify and uncomment # theses lines #DOMAIN_PREFIX = "/ishtar-test" -- cgit v1.2.3