From cadd34a00816a28de6002de7396256b9eaa14531 Mon Sep 17 00:00:00 2001 From: Étienne Loks Date: Wed, 18 Feb 2015 22:38:05 +0100 Subject: Improve archaeological files import --- archaeological_files/data_importer.py | 44 ++++---- .../management/commands/import_operations.py | 111 ------------------- .../management/commands/ishtar_imports.py | 117 +++++++++++++++++++++ ishtar_common/data_importer.py | 85 +++++++++++---- 4 files changed, 208 insertions(+), 149 deletions(-) delete mode 100644 archaeological_operations/management/commands/import_operations.py create mode 100644 archaeological_operations/management/commands/ishtar_imports.py diff --git a/archaeological_files/data_importer.py b/archaeological_files/data_importer.py index ae0cf340c..f60e0f5d1 100644 --- a/archaeological_files/data_importer.py +++ b/archaeological_files/data_importer.py @@ -42,6 +42,7 @@ class ImportClosingFormater(ImportFormater): obj.save() class FileImporterSraPdL(Importer): + DESC = u"Exports dossiers SRA PdL : importeur Filemaker dossiers" LINE_FORMAT = [] OBJECT_CLS = models.File DEFAULTS = {('responsible_town_planning_service', 'attached_to'):{ @@ -62,13 +63,13 @@ class FileImporterSraPdL(Importer): tf.town_dct_init() self.line_format = [ None, # A, 1 - ImportFormater(['address', 'postal_code', ['towns', 'parcels__town']], # B, 2 + ImportFormater(['address', 'postal_code', ['main_town', 'parcels__town']], # B, 2 [UnicodeFormater(500, clean=True), UnicodeFormater(5, re_filter=RE_CD_POSTAL_FILTER), tf], regexp=RE_ADD_CD_POSTAL_TOWN, regexp_formater_args=[[0], [1], [2, 1]], required=False, - comment="Dossier - adresse"), + comment=u"Dossier - adresse"), ImportFormater('general_contractor__raw_name', # C, 3 TODO - extraire nom_prenom_titre UnicodeFormater(200), comment=u"Aménageur - nom brut", @@ -83,11 +84,11 @@ class FileImporterSraPdL(Importer): town_dct=tf._town_dct)], regexp=RE_ADD_CD_POSTAL_TOWN, regexp_formater_args=[[0], [1], [2, 1]], required=False, - comment="Aménageur - adresse"), + comment=u"Aménageur - adresse"), ImportFormater("general_contractor__title", # E, 5 StrChoiceFormater(Person.TYPE, cli=True), required=False, - comment="Aménageur - titre"), + comment=u"Aménageur - titre"), None, # F, 6 None, # G, 7 None, # H, 8 @@ -96,19 +97,20 @@ class FileImporterSraPdL(Importer): required=False), ImportParcelFormater('', required=False, post_processing=True), # J, 10 None, # K, 11 - ImportFormater([['towns', 'parcels__town']], # L, 12 + ImportFormater([['main_town', 'parcels__town']], # L, 12 tf, required=False, - comment="Commune (si non définie avant)"), - ImportFormater([['towns', 'parcels__town']], # M, 13 + comment=u"Commune (si non définie avant)"), + ImportFormater([['main_town', 'parcels__town']], # M, 13 tf, required=False, - comment="Commune (si non définie avant)"), + comment=u"Commune (si non définie avant)"), ImportFormater('saisine_type', # N, 14 - StrChoiceFormater(models.SaisineType.get_types(), - model=models.SaisineType, cli=True), + StrChoiceFormater(models.SaisineType.get_types( + empty_first=False), + model=models.SaisineType, cli=True), required=False, - comment="Type de saisine"), + comment=u"Type de saisine"), None, # O, 15 ImportFormater('comment', # P, 16 UnicodeFormater(2000), @@ -127,7 +129,7 @@ class FileImporterSraPdL(Importer): town_dct=tf._town_dct)], regexp=RE_NAME_ADD_CD_POSTAL_TOWN, regexp_formater_args=[[0], [1], [2], [3, 2]], - comment="Aménageur - adresse", + comment=u"Aménageur - adresse", required=False), ImportFormater('comment', # S, 19 UnicodeFormater(2000), @@ -182,7 +184,7 @@ class FileImporterSraPdL(Importer): ImportFormater('permit_reference', # AW, 49 UnicodeFormater(300, clean=True), regexp=RE_PERMIT_REFERENCE, - comment="Réf. du permis de construire", + comment=u"Réf. du permis de construire", required=False), None, # AX, 50 None, # AY, 51 @@ -191,10 +193,10 @@ class FileImporterSraPdL(Importer): None, # BB, 54 None, # BC, 55 None, # BD, 56 - ImportFormater([['towns', 'parcels__town']], # BE, 57 + ImportFormater([['main_town', 'parcels__town']], # BE, 57 TownINSEEFormater(), required=False, - comment="Commune (si non définie avant)"), + comment=u"Commune (si non définie avant)"), ImportFormater('comment', # BF, 58 UnicodeFormater(2000), comment=u"Commentaire", @@ -216,11 +218,12 @@ class FileImporterSraPdL(Importer): 'responsible_town_planning_service__attached_to__name', # BT, 72 service instructeur UnicodeFormater(300, clean=True), regexp=RE_ORGA, - comment="Service instructeur - nom", + comment=u"Service instructeur - nom", required=False), None, # BU, 73 ImportClosingFormater('', StrToBoolean(cli=True), - post_processing=True, required=False), # BV, 74, end date + post_processing=True, required=False, + comment=u'Dossier clos'), # BV, 74, end date ImportClosingFormater('in_charge__raw_name', # BW, 75 responsable UnicodeFormater(200), comment=u"Responsable - nom brut", @@ -241,10 +244,11 @@ class FileImporterSraPdL(Importer): None, # CE, 83 None, # CF, 84 ImportFormater('permit_type', - StrChoiceFormater(models.PermitType.get_types(), - model=models.PermitType, cli=True), + StrChoiceFormater(models.PermitType.get_types( + empty_first=False), + model=models.PermitType, cli=True), required=False, - comment="Type de permis"), # CG, 85 + comment=u"Type de permis"), # CG, 85 None, # CH, 85 ] diff --git a/archaeological_operations/management/commands/import_operations.py b/archaeological_operations/management/commands/import_operations.py deleted file mode 100644 index 09bfe23b6..000000000 --- a/archaeological_operations/management/commands/import_operations.py +++ /dev/null @@ -1,111 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- -# Copyright (C) 2015 Étienne Loks - -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU Affero General Public License as -# published by the Free Software Foundation, either version 3 of the -# License, or (at your option) any later version. - -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Affero General Public License for more details. - -# You should have received a copy of the GNU Affero General Public License -# along with this program. If not, see . - -# See the file COPYING for details. - -import datetime, unicodecsv - -from django.conf import settings -from django.core.management.base import BaseCommand, CommandError - -from archaeological_operations.data_importer import * - -IMPORTERS = { - 'bibracte-operation':OperationImporterBibracte, - 'bibracte-parcelle':ParcelImporterBibracte, - 'bibracte-docs':DocImporterBibracte, - } - -try: - from archaeological_context_records.data_importer import * - IMPORTERS['bibracte-ue'] = ContextRecordsImporterBibracte - IMPORTERS['bibracte-ue-rel'] = ContextRecordsRelationImporterBibracte -except ImportError: - pass - -try: - from archaeological_finds.data_importer import * - IMPORTERS['bibracte-finds'] = FindsImporterBibracte - IMPORTERS['bibracte-finds-alt'] = FindAltImporterBibracte - IMPORTERS['bibracte-treatments'] = TreatmentImporterBibracte -except ImportError: - pass - -class Command(BaseCommand): - args = ' []' - help = "Import archaeological operations" - - def handle(self, *args, **options): - if not args or not args[0]: - raise CommandError("No file provided.") - if len(args) < 2 or args[1] not in IMPORTERS: - msg = "Bad importer. \nAvailable importers are:\n" - for key in sorted(IMPORTERS.keys()): - msg += "\t* %s: %s\n" % (key, IMPORTERS[key].DESC.encode('utf-8') - or "-") - raise CommandError(msg) - try: - skip_lines = int(args[2]) - except: - skip_lines = 0 - filename = args[0] - importer = IMPORTERS[args[1]](skip_lines=skip_lines, output='cli') - sys.stdout.write("*" * 72 + "\n") - msg = "* Importer - %s" % importer.DESC - if len(msg) < 72: - msg += (71 - len(msg))*" " + "*\n" - sys.stdout.write(msg) - sys.stdout.write("*" * 72 + "\n\n") - sys.stdout.write("Processing...") - with open(filename) as csv_file: - encodings = [settings.ENCODING, settings.ALT_ENCODING, 'utf-8'] - for encoding in encodings: - try: - importer.importation([line for line in - unicodecsv.reader(csv_file, encoding='utf-8')]) - errors = importer.get_csv_errors() - sys.stdout.write("\n") - if errors: - print errors - now = datetime.datetime.now().isoformat('-' - ).replace(':','') - error_file = '.'.join(filename.split('.')[:-1]) \ - + "_errors_%s.csv" % now - sys.stdout.write("Some errors as occured during the ") - sys.stdout.write("import.\n") - try: - with open(error_file, 'w') as fle: - fle.write(errors.encode('utf-8')) - sys.stdout.write("A report has been create in file:"\ - " \"%s\"" % error_file) - except IOError: - sys.stdout.write("Cannot create CSV error file \"%s\"." % - error_file) - sys.stdout.write( - "\n\n* %d item(s) updated, %d item(s) created.\n" % ( - importer.number_updated, importer.number_created)) - break - except ImporterError, e: - if e.type == ImporterError.HEADER and encoding != encodings[-1]: - csv_file.seek(0) - continue - except UnicodeDecodeError: - if encoding != encodings[-1]: - csv_file.seek(0) - continue - sys.stdout.write("\n\n") - diff --git a/archaeological_operations/management/commands/ishtar_imports.py b/archaeological_operations/management/commands/ishtar_imports.py new file mode 100644 index 000000000..23397204b --- /dev/null +++ b/archaeological_operations/management/commands/ishtar_imports.py @@ -0,0 +1,117 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# Copyright (C) 2015 Étienne Loks + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. + +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +# See the file COPYING for details. + +import datetime, unicodecsv + +from django.conf import settings +from django.core.management.base import BaseCommand, CommandError + +from archaeological_operations.data_importer import * + +IMPORTERS = { + 'bibracte-operation':OperationImporterBibracte, + 'bibracte-parcelle':ParcelImporterBibracte, + 'bibracte-docs':DocImporterBibracte, + } + +try: + from archaeological_files.data_importer import * + IMPORTERS['sra-pdl-files'] = FileImporterSraPdL +except ImportError: + pass + +try: + from archaeological_context_records.data_importer import * + IMPORTERS['bibracte-ue'] = ContextRecordsImporterBibracte + IMPORTERS['bibracte-ue-rel'] = ContextRecordsRelationImporterBibracte +except ImportError: + pass + +try: + from archaeological_finds.data_importer import * + IMPORTERS['bibracte-finds'] = FindsImporterBibracte + IMPORTERS['bibracte-finds-alt'] = FindAltImporterBibracte + IMPORTERS['bibracte-treatments'] = TreatmentImporterBibracte +except ImportError: + pass + +class Command(BaseCommand): + args = ' []' + help = "Import archaeological operations" + + def handle(self, *args, **options): + if not args or not args[0]: + raise CommandError("No file provided.") + if len(args) < 2 or args[1] not in IMPORTERS: + msg = "Bad importer. \nAvailable importers are:\n" + for key in sorted(IMPORTERS.keys()): + msg += "\t* %s: %s\n" % (key, IMPORTERS[key].DESC.encode('utf-8') + or "-") + raise CommandError(msg) + try: + skip_lines = int(args[2]) + except: + skip_lines = 0 + filename = args[0] + importer = IMPORTERS[args[1]](skip_lines=skip_lines, output='cli') + sys.stdout.write("*" * 72 + "\n") + msg = "* Importer - %s" % importer.DESC + if len(msg) < 72: + msg += (71 - len(msg))*" " + "*\n" + sys.stdout.write(msg) + sys.stdout.write("*" * 72 + "\n\n") + sys.stdout.write("Processing...") + with open(filename) as csv_file: + encodings = [settings.ENCODING, settings.ALT_ENCODING, 'utf-8'] + for encoding in encodings: + try: + importer.importation([line for line in + unicodecsv.reader(csv_file, encoding='utf-8')]) + errors = importer.get_csv_errors() + sys.stdout.write("\n") + if errors: + print errors + now = datetime.datetime.now().isoformat('-' + ).replace(':','') + error_file = '.'.join(filename.split('.')[:-1]) \ + + "_errors_%s.csv" % now + sys.stdout.write("Some errors as occured during the ") + sys.stdout.write("import.\n") + try: + with open(error_file, 'w') as fle: + fle.write(errors.encode('utf-8')) + sys.stdout.write("A report has been create in file:"\ + " \"%s\"" % error_file) + except IOError: + sys.stdout.write("Cannot create CSV error file \"%s\"." % + error_file) + sys.stdout.write( + "\n\n* %d item(s) updated, %d item(s) created.\n" % ( + importer.number_updated, importer.number_created)) + break + except ImporterError, e: + if e.type == ImporterError.HEADER and encoding != encodings[-1]: + csv_file.seek(0) + continue + except UnicodeDecodeError: + if encoding != encodings[-1]: + csv_file.seek(0) + continue + sys.stdout.write("\n\n") + diff --git a/ishtar_common/data_importer.py b/ishtar_common/data_importer.py index 2963fa680..3db03d916 100644 --- a/ishtar_common/data_importer.py +++ b/ishtar_common/data_importer.py @@ -70,7 +70,7 @@ class ImportFormater(object): lst = [self.formater] for formater in lst: if formater: - formater.check(vals, output) + formater.check(vals, output, self.comment) def post_process(self, obj, context, value, owner=None): raise NotImplemented() @@ -92,9 +92,19 @@ class Formater(object): def format(self, value): return value - def check(self, values, output=None): + def check(self, values, output=None, comment=''): return +class ChoiceChecker(object): + def report_new(self, comment): + if not self.new_keys: + return + msg = u"For \"%s\" these new associations have been made:\n" % comment + sys.stderr.write(msg.encode('utf-8')) + for k in self.new_keys: + msg = u'"%s";"%s"\n' % (k, self.new_keys[k]) + sys.stderr.write(msg.encode('utf-8')) + class UnicodeFormater(Formater): def __init__(self, max_length, clean=False, re_filter=None, notnull=False, db_target=None): @@ -183,7 +193,7 @@ class IntegerFormater(Formater): raise ValueError(_(u"\"%(value)s\" is not an integer") % { 'value':value}) -class StrChoiceFormater(Formater): +class StrChoiceFormater(Formater, ChoiceChecker): def __init__(self, choices, strict=False, equiv_dict={}, model=None, cli=False, many_split='', db_target=None): self.choices = list(choices) @@ -194,6 +204,7 @@ class StrChoiceFormater(Formater): self.db_target = db_target self.create = False self.missings = set() + self.new_keys = {} self.many_split = many_split for key, value in self.choices: value = unicode(value) @@ -219,8 +230,9 @@ class StrChoiceFormater(Formater): def prepare(self, value): return unicode(value).strip() - def _get_choices(self): - msgstr = unicode(_(u"Choice for \"%s\" is not available. "\ + def _get_choices(self, comment=''): + msgstr = comment + u" - " + msgstr += unicode(_(u"Choice for \"%s\" is not available. "\ u"Which one is relevant?\n")) idx = -1 for idx, choice in enumerate(self.choices): @@ -233,7 +245,7 @@ class StrChoiceFormater(Formater): msgstr += unicode(_(u"%d. None of the above - skip")) % idx + u"\n" return msgstr, idx - def check(self, values, output=None): + def check(self, values, output=None, comment=''): if not output or output == 'silent': return if self.many_split: @@ -250,11 +262,13 @@ class StrChoiceFormater(Formater): if output != 'cli': self.missings.add(value) continue - msgstr, idx = self._get_choices() + msgstr, idx = self._get_choices(comment) res = None while res not in range(1, idx+1): - sys.stdout.write(msgstr % value) - res = raw_input(">>> ") + msg = msgstr % value + sys.stdout.write(msg.encode('utf-8')) + sys.stdout.write("\n>>> ") + res = raw_input() try: res = int(res) except ValueError: @@ -266,10 +280,12 @@ class StrChoiceFormater(Formater): v = self.model.objects.get(pk=v) self.equiv_dict[value] = v self.add_key(v, value) + self.new_keys[value] = v elif self.create and res == len(self.choices): self.equiv_dict[value] = self.new(base_value) self.choices.append((self.equiv_dict[value].pk, unicode(self.equiv_dict[value]))) + self.new_keys[value] = unicode(self.equiv_dict[value]) else: self.equiv_dict[value] = None if output == 'db' and self.db_target: @@ -283,6 +299,8 @@ class StrChoiceFormater(Formater): TargetKey.objects.create(**q) except IntegrityError: pass + if output == 'cli': + self.report_new(comment) def new(self, value): return @@ -346,7 +364,7 @@ class DateFormater(Formater): raise ValueError(_(u"\"%(value)s\" is not a valid date") % { 'value':value}) -class StrToBoolean(Formater): +class StrToBoolean(Formater, ChoiceChecker): def __init__(self, choices={}, cli=False, strict=False, db_target=None): self.dct = copy.copy(choices) self.cli = cli @@ -367,6 +385,7 @@ class StrToBoolean(Formater): else: v = None self.dct[value] = v + self.new_keys = {} def prepare(self, value): value = unicode(value).strip() @@ -374,10 +393,11 @@ class StrToBoolean(Formater): value = slugify(value) return value - def check(self, values, output=None): + def check(self, values, output=None, comment=''): if not output or output == 'silent': return - msgstr = unicode(_(u"Choice for \"%s\" is not available. "\ + msgstr = comment + u" - " + msgstr += unicode(_(u"Choice for \"%s\" is not available. "\ u"Which one is relevant?\n")) msgstr += u"1. True\n" msgstr += u"2. False\n" @@ -391,8 +411,10 @@ class StrToBoolean(Formater): continue res = None while res not in range(1, 4): - sys.stdout.write(msgstr % value) - res = raw_input(">>> ") + msg = msgstr % value + sys.stdout.write(msg.encode('utf-8')) + sys.stdout.write("\n>>> ") + res = raw_input() try: res = int(res) except ValueError: @@ -411,6 +433,9 @@ class StrToBoolean(Formater): models.TargetKey.objects.create(**q) except IntegrityError: pass + self.new_keys[value] = unicode(self.dct[value]) + if output == 'cli': + self.report_new(comment) def format(self, value): value = self.prepare(value) @@ -461,6 +486,8 @@ class Importer(object): self._defaults = self.DEFAULTS.copy() self.history_modifier = history_modifier self.output = output + self.result = [] + self.result_cols = [] if not self.history_modifier: if self.import_instance: self.history_modifier = self.import_instance.user @@ -560,16 +587,25 @@ class Importer(object): time_by_item = ellapsed/idx_line if time_by_item: left = ((total - idx_line)*time_by_item).seconds - txt = "\r* %d/%d" % (idx_line+1, total) + txt = u"\r* %d/%d" % (idx_line+1, total) if left: - txt += " (%d seconds left)" % left - sys.stdout.write(txt) + txt += u" (%d seconds left)" % left + sys.stdout.write(txt.encode('utf-8')) sys.stdout.flush() try: self._line_processing(idx_line, line) except ImporterError, msg: self.errors.append((idx_line, None, msg)) + report_name = 'report-%s.csv' % datetime.datetime.now().isoformat() + with open(report_name, 'w') as validity_file: + self.validity_file = UnicodeWriter(validity_file, + delimiter=',', quotechar='"', + quoting=csv.QUOTE_MINIMAL) + self.validity_file.writerow(self.result_cols) + for line in self.result: + self.validity_file.writerow(line) + def _line_processing(self, idx_line, line): if self.skip_lines > idx_line: self.validity.append(line) @@ -669,6 +705,19 @@ class Importer(object): for formater, val in self._post_processing: formater.post_process(obj, data, val, owner=self.history_modifier) + # writing report + self.result.append([]) + for k in data.keys(): + if k not in self.result_cols: + self.result_cols.append(k) + for k in self.result_cols: + if hasattr(obj, k): + self.result[-1].append(unicode(getattr(obj, k))) + elif k not in data or not data[k]: + self.result[-1].append('') + else: + self.result[-1].append(unicode(data[k])) + def _row_processing(self, c_row, idx_col, idx_line, val, data): if idx_col >= len(self.line_format): return @@ -827,7 +876,7 @@ class Importer(object): obj.imports.add(self.import_instance) except IntegrityError as e: raise IntegrityError(e.message) - except: + except cls.MultipleObjectsReturned: created = False obj = cls.objects.filter(**create_dict).all()[0] for attr, value in m2ms: -- cgit v1.2.3