diff options
Diffstat (limited to 'archaeological_operations/import_from_csv.py')
-rw-r--r-- | archaeological_operations/import_from_csv.py | 439 |
1 files changed, 0 insertions, 439 deletions
diff --git a/archaeological_operations/import_from_csv.py b/archaeological_operations/import_from_csv.py deleted file mode 100644 index 5155e9fc6..000000000 --- a/archaeological_operations/import_from_csv.py +++ /dev/null @@ -1,439 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- -# Copyright (C) 2012-2013 Étienne Loks <etienne.loks_AT_peacefrogsDOTnet> - -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU Affero General Public License as -# published by the Free Software Foundation, either version 3 of the -# License, or (at your option) any later version. - -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Affero General Public License for more details. - -# You should have received a copy of the GNU Affero General Public License -# along with this program. If not, see <http://www.gnu.org/licenses/>. - -# See the file COPYING for details. - -""" -Utils: import archaeological operation from a CSV file -""" - -DELIMITER = ";" -QUOTECHAR = '"' - -import datetime -import csv, codecs -import re - -from django.conf import settings -from django.db import transaction -from django.template.defaultfilters import slugify - -from archaeological_operations.utils import * - -from django.contrib.auth.models import User -from ishtar_common.models import Town, Person, PersonType, OrganizationType, \ - Organization, SourceType -from archaeological_files.models import PermitType, File, FileType -from archaeological_operations.models import Operation, OperationType, Period, \ - AdministrativeAct, ActType, OperationSource, Parcel - -class Column: - def __init__(self, col_models, format, associated_cols=None, multi=False): - self.col_models, self.format = col_models, format - self.associated_cols, self.multi = associated_cols, multi - -def unicode_csv_reader(unicode_csv_data, dialect=csv.excel, **kwargs): - # csv.py doesn't do Unicode; encode temporarily as UTF-8: - - csv_reader = csv.reader(utf_8_encoder(unicode_csv_data), - dialect=dialect, **kwargs) - for row in csv_reader: - # decode UTF-8 back to Unicode, cell by cell: - yield [unicode(cell, 'utf-8') for cell in row] - -def utf_8_encoder(unicode_csv_data): - for line in unicode_csv_data: - yield line.encode('utf-8') - -# attrs, convert -DEFAULT_OPE_COLS = [ - [], # numéro de dossier ? - (('operation_type',), parse_operationtype), - (('common_name',), unicode), - (('in_charge', 'name'), unicode), - (('in_charge', 'surname'), unicode), - [], # État ? - [], # Adresse ? - [], # origine ? - (('periods',), parse_period), - [], # Programme ? - [], # Rattach PC ? - [], # vide - (('administrative_act', 'ref_sra'), unicode), - (('administrative_act', 'signature_date'), parse_date), - (('start_date',), parse_date), - (('excavation_end_date',), parse_date), - (('year',), parse_year), - [], # identification - (('code_patriarche',), unicode), - [], # X degré - [], # Y degré - [], # X saisi ? - [], # Y saisi ? - [], # georef - [], # geometrie - (('surface',), parse_surface), -] - -_OPE_COLS = settings.ISHTAR_OPE_COL_FORMAT if settings.ISHTAR_OPE_COL_FORMAT \ - else DEFAULT_OPE_COLS - -OPE_COLS = [] -for cols in _OPE_COLS: - if cols: - OPE_COLS.append(Column(*cols)) - else: - OPE_COLS.append(None) - -def ope_postimportfix(ope, dct): - changed = False - if not ope.year: - sd = dct.get('start_date') - ed = dct.get('end_date') - if sd: - ope.year = sd.year - changed = True - elif ed: - ope.year = ed.year - changed = True - if changed: - ope.save() - return ope - -class BreakIt(Exception): - pass - -class RelatedClass: - def __init__(self, key, cls, default_data={}, reverse_key=False, - unique_keys=[], extra_data=[], mandatory_fields=[], - multi=None): - self.key, self.cls, self.default_data = key, cls, default_data - self.reverse_key, self.unique_keys = reverse_key, unique_keys - self.extra_data, self.multi = extra_data, multi - self.mandatory_fields = mandatory_fields - - def create_object(self, data): - for mandatory_field in self.mandatory_fields: - if not data.get(mandatory_field): - return None - if self.unique_keys: - unique_data = {} - for k in self.unique_keys: - unique_data[k] = data.pop(k) - created = False - filtr = unique_data.copy() - q = None - # check if all condition have a value - if not [k for k in filtr if not filtr[k]]: - q = self.cls.objects.filter(**unique_data) - if q and q.count() > 1: - obj = q.all()[0] - else: - unique_data['defaults'] = data - try: - obj, created = self.cls.objects.get_or_create(**unique_data) - except ValueError: - return None - if not created: - for k in unique_data['defaults']: - try: - setattr(obj, k, unique_data['defaults'][k]) - except ValueError: - continue - obj.save() - else: - obj = self.cls.objects.create(**data) - return obj - - def create(self, item, data, attr=None): - datas = data - if not self.multi: - datas = [data] - objs = [] - for data in datas: - if self.reverse_key: - data[self.reverse_key] = item - if self.reverse_key not in self.unique_keys: - self.unique_keys.append(self.reverse_key) - obj = self.create_object(data) - else: - obj = getattr(item, attr) - if not obj: - obj = self.create_object(data) - setattr(item, attr, obj) - item.save() - else: - for k in data: - setattr(obj, k, data[k]) - obj.save() - objs.append(obj) - if not self.multi: - return objs[0] - return objs - -#@transaction.commit_manually -def import_operations_csv(values, col_defs=OPE_COLS, update=True, person=None, - stdout=None, lines=None): - default_person = person or User.objects.order_by('pk').all()[0] - RELATED_CLASSES = [ - RelatedClass('administrative_act', AdministrativeAct, - default_data={'history_modifier':default_person, - 'act_type':ActType.objects.get( - txt_idx='excavation_order') - }, - reverse_key='operation', - unique_keys=['ref_sra']), - #RelatedClass('associated_file', File, - # extra_data=['year'], - # default_data={'history_modifier':default_person, - # 'file_type':FileType.objects.get( - # txt_idx='undefined')}, - # unique_keys=['internal_reference', 'year']), - RelatedClass('source', OperationSource, reverse_key='operation', - unique_keys=['index']), - RelatedClass('parcels', Parcel, reverse_key='operation', - unique_keys=['operation', 'town', 'section', - 'parcel_number'], - multi=True), - ] - RELATED_CLASSES_KEYS = dict([(rel_cls.key, rel_cls) - for rel_cls in RELATED_CLASSES]) - related_classes = RELATED_CLASSES_KEYS - _prepare_ope_types() - - ope_default = {'history_modifier':default_person} - new_ops = 0 - error_ope, error_reversed, error_multis = [], [], [] - multi_keys = set([column.col_models[0] - for column in col_defs if column and column.multi]) - - restrict_lines = [] - start_line, end_line = None, None - if lines: - if '-' not in lines: - restrict_lines = [int(line) for line in lines.split(',')] - else: - start_line, end_line = lines.split('-') - start_line, end_line = int(start_line), int(end_line)+1 - if start_line: - values = list(values)[start_line:] - if end_line: - values = list(values)[:end_line+1] - for line_idx, vals in enumerate(values): - if restrict_lines: - if line_idx > max(restrict_lines): - break - if line_idx not in restrict_lines: - continue - if start_line: - line_idx = line_idx + start_line - if stdout: - stdout.write("\r* line %d" % (line_idx)) - if not line_idx: - continue # remove header - args = {} - for col_idx, val in enumerate(vals): - if len(col_defs) <= col_idx or not col_defs[col_idx]: - continue - col_def = col_defs[col_idx] - attrs, typ = col_def.col_models, col_def.format - extra_cols = col_def.associated_cols - if not callable(typ): - if typ.startswith('parse_string_'): - typ = _get_parse_string(int(typ.split('_')[-1])) - else: - typ = globals()[typ] - c_args = args - for attr in attrs: - if attr not in c_args: - c_args[attr] = {} - c_args = c_args[attr] - try: - if not extra_cols: - v = typ(val) - else: - arguments = [vals[col_number] for col_number in extra_cols] - if not [arg for arg in arguments if arg]: - continue - arguments += [default_person] - v = typ(val, *arguments) - except: - v = None - if len(attrs) == 1: - args[attrs[0]] = v - elif len(attrs) == 2: - args[attrs[0]][attrs[1]] = v - elif len(attrs) == 3: - args[attrs[0]][attrs[1]][attrs[2]] = v - # manage exploded dates - for k in args.keys(): - if '__year' in k: - key = k[:-len('__year')] - try: - v = datetime.datetime(args[k], args[key+'__month'], - args[key+'__day']) - args[key] = v - except: - pass - args.pop(k) - args.pop(key+'__month') - args.pop(key+'__day') - related_items = [] - multis = [] - attached_models = {} - for k in args.keys(): - try: - if k in related_classes: - rel_cls = related_classes[k] - cls, default = rel_cls.cls, rel_cls.default_data - reverse_key = rel_cls.reverse_key - values = None - if rel_cls.multi: - values = [] - for v in args[k]: - v.update(default) - values.append(v) - else: - values = default.copy() - values.update(args[k]) - exited = False - for extra in rel_cls.extra_data: - if not args.get(extra): - raise BreakIt - values[extra] = args[extra] - args.pop(k) - related_items.append((rel_cls, values, k)) - elif k in multi_keys: - multis.append((k, args[k])) - args.pop(k) - elif '__' in k: - mod, value = k.split('__') - attached_models[(mod, value)] = args.pop(k) - except BreakIt: - args.pop(k) - continue - op = None - if not args.get('operation_type'): - #print "Pas d'operation_type" - continue - #transaction.commit() - try: - int(args['code_patriarche']) - except ValueError: - continue - q = Operation.objects.filter(code_patriarche=args['code_patriarche']) - if q.count(): - if not update: - #print "Code patriarche existant" - continue - op = q.all()[0] - # check - if not args.get('year') and args.get('start_date'): - args['year'] = args['start_date'].year - # creation - """ - print "args", args - print "multis", multis - print "attached_models", attached_models - """ - if not op: - args.update(ope_default) - #if not args.get('operation_code'): - # args['operation_code'] = Operation.get_available_operation_code( - # args['year']) - #try: - op = Operation.objects.create(**args) - #op.save() - new_ops += 1 - #except: - # error_ope.append((line_idx, args)) - # transaction.rollback() - # continue - #transaction.commit() - else: # mise à jour - try: - for k in args: - if getattr(op, k): - continue - setattr(op, k, args[k]) - op.save() - except: - #transaction.rollback() - continue - #transaction.commit() - try: - for k, vals in multis: - if not vals: - continue - for v in vals: - getattr(op, k).add(v) - op.save() - except: - #transaction.rollback() - error_multis.append((line_idx, multis)) - for attr, attached_attr in attached_models: - field = getattr(op, attr) - if field: - setattr(field, attached_attr, attached_models[(attr, - attached_attr)]) - field.save() - #transaction.commit() - for rel_cls, data, attr in related_items: - rel_cls.create(op, data, attr) - #try: - # rel_cls.create(op, data, attr) - #except: - # error_reversed.append((line_idx, data)) - ope_postimportfix(op, args) - #transaction.commit() - - errors = [] - if error_ope: - error = "Error while recording these operations:\n" - for line_idx, args in error_ope: - error += "line: " + str(line_idx) + " args: " + str(args) + '\n' - errors.append(error) - if error_multis: - error = "Error while recording these multiples items attached to "\ - "operation:" - for line_idx, args in error_multis: - error += "line: " + str(line_idx) + " args: " + str(args) + '\n' - errors.append(error) - if error_reversed: - error = "Error while recording these items that depend to operation:" - for line_idx, args in error_reversed: - error += "line: " + str(line_idx) + " args: " + str(args) + '\n' - errors.append(error) - #transaction.commit() - return new_ops, errors - -def import_from_csv(filename, update=True, col_defs=OPE_COLS, - person=None, stdout=None, lines=None): - """ - Import from a CSV file. - Return number of operation treated and errors. - """ - try: - values = unicode_csv_reader(codecs.open(filename, 'rb', "utf-8"), - delimiter=DELIMITER, quotechar=QUOTECHAR) - except (IOError): - return 0, [u"Incorrect CSV file."] - - new_ops, errors = import_operations_csv(values, col_defs=col_defs, - update=update, person=person, stdout=stdout, - lines=lines) - return new_ops, errors |