diff options
| -rw-r--r-- | archaeological_operations/data_importer.py | 13 | ||||
| -rw-r--r--[-rwxr-xr-x] | archaeological_operations/management/commands/import_operations.py | 91 | ||||
| -rwxr-xr-x | archaeological_operations/management/commands/import_operations_old.py | 56 | ||||
| -rw-r--r-- | ishtar_common/data_importer.py | 239 | ||||
| -rw-r--r-- | ishtar_common/models.py | 2 | 
5 files changed, 151 insertions, 250 deletions
diff --git a/archaeological_operations/data_importer.py b/archaeological_operations/data_importer.py index e02b53df0..ebcc039d0 100644 --- a/archaeological_operations/data_importer.py +++ b/archaeological_operations/data_importer.py @@ -24,9 +24,9 @@ from django.template.defaultfilters import slugify  from django.utils.translation import ugettext_lazy as _  from ishtar_common.data_importer import * -from ishtar_common.models import Town +from ishtar_common.models import Town, OrganizationType -from archaeological_operations.models import Parcel +from archaeological_operations import models  from archaeological_operations.utils import parse_parcels  RE_PERMIT_REFERENCE = re.compile('[A-Za-z]*(.*)') @@ -45,7 +45,7 @@ class ImportParcelFormater(ImportFormater):          for parcel_dct in parse_parcels(value, owner=owner):              parcel_dct.update(base_dct)              try: -                Parcel.objects.get_or_create(**parcel_dct) +                models.Parcel.objects.get_or_create(**parcel_dct)              except IntegrityError:                  raise ImporterError("Erreur d'import parcelle, contexte : %s" \                                                      % unicode(parcel_dct)) @@ -148,8 +148,9 @@ RE_ORGA = re.compile("([^,]*)")  class OperationImporterBibracte(Importer): +    DESC = u"Exports Bibracte : importeur pour l'onglet opération"      DEFAULTS = { -                ('in_charge',):{ +                ('operator',):{                      'organization_type':OrganizationType.objects.get(                                               txt_idx="operator")},                 } @@ -171,10 +172,8 @@ class OperationImporterBibracte(Importer):          # fin          ImportFormater('excavation_end_date', DateFormater('%Y/%m/%d'),),          # Chronos -        ImportFormater('periods', TypeFormater(models.Period, many=True), -                       many=True), +        ImportFormater('periods', TypeFormater(models.Period, many_split="&")),      ]      OBJECT_CLS = models.Operation -    DEFAULTS = {} diff --git a/archaeological_operations/management/commands/import_operations.py b/archaeological_operations/management/commands/import_operations.py index a9ecf41c9..fe4afc032 100755..100644 --- a/archaeological_operations/management/commands/import_operations.py +++ b/archaeological_operations/management/commands/import_operations.py @@ -1,6 +1,6 @@  #!/usr/bin/env python  # -*- coding: utf-8 -*- -# Copyright (C) 2012-2013  Étienne Loks <etienne.loks_AT_peacefrogsDOTnet> +# Copyright (C) 2015  Étienne Loks <etienne.loks_AT_peacefrogsDOTnet>  # This program is free software: you can redistribute it and/or modify  # it under the terms of the GNU Affero General Public License as @@ -17,40 +17,73 @@  # See the file COPYING for details. +import datetime, unicodecsv + +from django.conf import settings  from django.core.management.base import BaseCommand, CommandError -from archaeological_operations.import_from_csv import import_from_csv -from archaeological_operations.import_from_dbf import import_from_dbf -IMPORTERS = {'csv':import_from_csv, -             'dbf':import_from_dbf, -             'db3':import_from_dbf, -             'fp':import_from_dbf, -             'vfp':import_from_dbf} +from archaeological_operations.data_importer import * + +IMPORTERS = {'bibracte-operation':OperationImporterBibracte}  class Command(BaseCommand): -    args = '<filename> [<lines>]' -    help = "Import archaelogical operations" +    args = '<filename> <importer_name> [<nb lines skipped>]' +    help = "Import archaeological operations"      def handle(self, *args, **options):          if not args or not args[0]:              raise CommandError("No file provided.") +        if len(args) < 2 or args[1] not in IMPORTERS: +            msg = "Bad importer. \nAvailable importers are:\n" +            for key in IMPORTERS: +                msg += "\t* %s: %s" % (key, IMPORTERS[key].DESC.encode('utf-8') +                                     or "-") +            raise CommandError(msg) +        try: +            skip_lines = int(args[2]) +        except: +            skip_lines = 0          filename = args[0] -        update = True -        file_type = None -        lines = len(args) > 1 and args[1] -        if not file_type: -            suffix = filename.split('.')[-1].lower() -            if suffix in IMPORTERS.keys(): -                file_type = suffix -            else: -                raise CommandError("This file extension is not managed. "\ -                                    "Specify manualy the file type.") -        elif file_type not in IMPORTERS.keys(): -            raise CommandError("This file type is not managed.") -        nb_ops, errors = IMPORTERS[file_type](filename, -                                              update=update, -                                              stdout=self.stdout, -                                              lines=lines) -        self.stdout.write('\n* %d operation treated\n' % nb_ops) -        if errors: -            self.stderr.write('\n'.join(errors)) +        importer = IMPORTERS[args[1]](skip_lines=skip_lines, output='cli') +        sys.stdout.write("*" * 72 + "\n") +        msg = "* Importer - %s" % importer.DESC +        if len(msg) < 72: +            msg += (71 - len(msg))*" " + "*\n" +        sys.stdout.write(msg) +        sys.stdout.write("*" * 72 + "\n\n") +        sys.stdout.write("Processing...") +        with open(filename) as csv_file: +            encodings = [settings.ENCODING, settings.ALT_ENCODING, 'utf-8'] +            for encoding in encodings: +                try: +                    importer.importation([line for line in +                            unicodecsv.reader(csv_file, encoding='utf-8')]) +                    errors = importer.get_csv_errors() +                    sys.stdout.write("\n") +                    if errors: +                        print errors +                        now = datetime.datetime.now().isoformat('-' +                                                ).replace(':','') +                        error_file = '.'.join(filename.split('.')[:-1]) \ +                                     + "_errors_%s.csv" % now +                        sys.stdout.write("Some errors as occured during the ") +                        sys.stdout.write("import.\n") +                        try: +                            with open(error_file, 'w') as fle: +                                fle.write(errors.encode('utf-8')) +                            sys.stdout.write("A report has been create in file:"\ +                                             " \"%s\"" % error_file) +                        except IOError: +                            sys.stdout.write("Cannot create CSV error file \"%s\"." % +                                             error_file) +                    break +                except ImporterError, e: +                    if e.type == ImporterError.HEADER and encoding != encodings[-1]: +                        csv_file.seek(0) +                        continue +                except UnicodeDecodeError: +                    if encoding != encodings[-1]: +                        csv_file.seek(0) +                        continue +        sys.stdout.write("\n\n") + diff --git a/archaeological_operations/management/commands/import_operations_old.py b/archaeological_operations/management/commands/import_operations_old.py new file mode 100755 index 000000000..a9ecf41c9 --- /dev/null +++ b/archaeological_operations/management/commands/import_operations_old.py @@ -0,0 +1,56 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# Copyright (C) 2012-2013  Étienne Loks <etienne.loks_AT_peacefrogsDOTnet> + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the +# GNU Affero General Public License for more details. + +# You should have received a copy of the GNU Affero General Public License +# along with this program.  If not, see <http://www.gnu.org/licenses/>. + +# See the file COPYING for details. + +from django.core.management.base import BaseCommand, CommandError +from archaeological_operations.import_from_csv import import_from_csv +from archaeological_operations.import_from_dbf import import_from_dbf + +IMPORTERS = {'csv':import_from_csv, +             'dbf':import_from_dbf, +             'db3':import_from_dbf, +             'fp':import_from_dbf, +             'vfp':import_from_dbf} + +class Command(BaseCommand): +    args = '<filename> [<lines>]' +    help = "Import archaelogical operations" + +    def handle(self, *args, **options): +        if not args or not args[0]: +            raise CommandError("No file provided.") +        filename = args[0] +        update = True +        file_type = None +        lines = len(args) > 1 and args[1] +        if not file_type: +            suffix = filename.split('.')[-1].lower() +            if suffix in IMPORTERS.keys(): +                file_type = suffix +            else: +                raise CommandError("This file extension is not managed. "\ +                                    "Specify manualy the file type.") +        elif file_type not in IMPORTERS.keys(): +            raise CommandError("This file type is not managed.") +        nb_ops, errors = IMPORTERS[file_type](filename, +                                              update=update, +                                              stdout=self.stdout, +                                              lines=lines) +        self.stdout.write('\n* %d operation treated\n' % nb_ops) +        if errors: +            self.stderr.write('\n'.join(errors)) diff --git a/ishtar_common/data_importer.py b/ishtar_common/data_importer.py index 0ce61ba01..87b3a40b0 100644 --- a/ishtar_common/data_importer.py +++ b/ishtar_common/data_importer.py @@ -17,7 +17,7 @@  # See the file COPYING for details. -import copy, csv, datetime, logging, sys +import copy, csv, datetime, logging, re, sys  from tempfile import NamedTemporaryFile  from django.contrib.auth.models import User @@ -216,16 +216,16 @@ class StrChoiceFormater(Formater):          msgstr += unicode(_(u"%d. None of the above - skip")) % idx + u"\n"          if self.many_split:              new_values = [] -            r = re.compile(func.many_split) +            r = re.compile(self.many_split)              for value in values:                  new_values += r.split(value)              values = new_values          for value in values: -            base_value = copy(value) +            base_value = copy.copy(value)              value = self.prepare(value)              if value in self.equiv_dict:                  continue -            if not self.cli: +            if output != 'cli':                  self.missings.add(value)                  continue              res = None @@ -263,9 +263,12 @@ class StrChoiceFormater(Formater):  class TypeFormater(StrChoiceFormater):      def __init__(self, model, cli=False, defaults={}, many_split=False): +        self.create = True +        self.strict = False          self.model = model          self.defaults = defaults          self.many_split = many_split +        self.missings = set()          self.equiv_dict, self.choices = {}, []          for item in model.objects.all():              self.choices.append((item.pk, unicode(item))) @@ -348,6 +351,7 @@ class StrToBoolean(Formater):  logger = logging.getLogger(__name__)  class Importer(object): +    DESC = ""      LINE_FORMAT = []      OBJECT_CLS = None      IMPORTED_LINE_FIELD = None @@ -366,19 +370,18 @@ class Importer(object):          'regex_not_match':_(u"The regexp doesn't match.")          } -    def __init__(self, skip_first_line=False, reference_header=None, +    def __init__(self, skip_lines=0, reference_header=None,                   check_col_num=False, test=False, check_validity=True, -                 history_modifier=None, output=None): +                 history_modifier=None, output='silent'):          """ -         * skip_first_line must be set to True if the data provided has got -           an header. +         * skip_line must be set if the data provided has got headers lines.           * a reference_header can be provided to perform a data compliance             check. It can be useful to warn about bad parsing.           * test doesn't write in the database           * check_validity rewrite a CSV file to be compared          """          self.message = '' -        self.skip_first_line = skip_first_line +        self.skip_lines = skip_lines          self.reference_header = reference_header          self.test = test          self.errors = [] # list of (line, col, message) @@ -408,7 +411,7 @@ class Importer(object):          assert output in ('silent', 'cli', 'db')          vals = []          for idx_line, line in enumerate(table): -            if (self.skip_first_line and not idx_line): +            if self.skip_lines > idx_line:                  continue              for idx_col, val in enumerate(line):                  if idx_col >= len(self.line_format): @@ -424,7 +427,7 @@ class Importer(object):      def importation(self, table):          self.validity_file = None          if not self._initialized: -            self.initialize(table) +            self.initialize(table, self.output)          if self.check_validity:              with NamedTemporaryFile(delete=False) as validity_file:                  self.validity_file = UnicodeWriter(validity_file, @@ -479,7 +482,7 @@ class Importer(object):          self.min_col_number = len(self.line_format) - idx_last_col          # check the conformity with the reference header          if self.reference_header and \ -           self.skip_first_line and \ +           self.skip_lines and \             self.reference_header != table[0]:              raise ImporterError(self.ERRORS['header_check'],                                  type=ImporterError.HEADER) @@ -507,7 +510,7 @@ class Importer(object):                  self.errors.append((idx_line, None, msg))      def _line_processing(self, idx_line, line): -        if (self.skip_first_line and not idx_line): +        if self.skip_lines > idx_line:              if self.validity_file:                  self.validity_file.writerow(line)              return @@ -703,201 +706,6 @@ class Importer(object):          else:              c_row.append(unicode(c_values)) - -    """ -    def _importation(self, table): -        table = list(table) -        if not table or not table[0]: -            raise ImporterError(self.ERRORS['no_data'], ImporterError.HEADER) -        if self.check_col_num and len(table[0]) > len(self.line_format): -            raise ImporterError(self.ERRORS['too_many_cols'] % { -                     'user_col':len(table[0]), 'ref_col':len(self.line_format)}) -        self.errors = [] -        self.messages = [] -        self.number_imported = 0 -        # index of the last required column -        for idx_last_col, formater in enumerate(reversed(self.line_format)): -            if formater.required: -                break -        else: -            idx_last_col += 1 -        # min col number to be filled -        min_col_number = len(self.line_format) - idx_last_col -        # check the conformity with the reference header -        if self.reference_header and \ -           self.skip_first_line and \ -           self.reference_header != table[0]: -            raise ImporterError(self.ERRORS['header_check'], -                                type=ImporterError.HEADER) -        now = datetime.datetime.now() -        for idx_line, line in enumerate(table): -            #self._line_processing() - -            if (self.skip_first_line and not idx_line): -                if self.validity_file: -                    self.validity_file.writerow(line) -                continue -            if not line: -                if self.validity_file: -                    self.validity_file.writerow([]) -                continue -            self.throughs = [] # list of (formater, value) -            self.post_processing = [] # list of (formater, value) -            data = {} - -            # keep in database the raw line for testing purpose -            if self.IMPORTED_LINE_FIELD: -                output = io.StringIO() -                writer = csv.writer(output) -                writer.writerow(line) -                data[self.IMPORTED_LINE_FIELD] = output.getvalue() - -            n = datetime.datetime.now() -            logger.debug('%s - Processing line %d' % (unicode(n-now), idx_line)) -            now = n -            n2 = n -            self.c_errors = False -            c_row = [] -            for idx_col, val in enumerate(line): -                #self._row_processing(self, c_row, idx_col, val): - -                if idx_col >= len(self.line_format): -                    break -                formater = self.line_format[idx_col] -                if not formater.field_name: -                    if self.validity_file: -                        c_row.append(val) -                    continue -                if formater.regexp: -                    # multiline regexp is a mess... -                    val = val.replace('\n', NEW_LINE_BREAK) -                    match = formater.regexp.match(val) -                    if not match: -                        if formater.required: -                            self.errors.append((idx_line+1, idx_col+1, -                                               self.ERRORS['value_required'])) -                        elif not val.strip(): -                            c_row.append("") -                            continue -                        c_errors = True -                        val = val.replace(NEW_LINE_BREAK, '\n') -                        self.errors.append((idx_line+1, idx_col+1, -                                 unicode(self.ERRORS['regex_not_match']) + val)) -                        c_row.append("") -                        continue -                    val_group = [v.replace(NEW_LINE_BREAK, '\n') -                                 for v in match.groups()] -                else: -                    val_group = [val] -                c_values = [] -                for idx_v, v in enumerate(val_group): -                    self.message = '' -                    func = formater.formater -                    if type(func) in (list, tuple): -                        func = func[idx_v] -                    if not callable(func) and type(func) in (unicode, str): -                        func = getattr(self, func) -                    value = None -                    try: -                        if formater.regexp_formater_args: -                            args = [] -                            for idx in formater.regexp_formater_args[idx_v]: -                                args.append(val_group[idx]) -                            value = func.format(*args) -                        else: -                            value = func.format(v) -                    except ValueError, e: -                        c_errors = True -                        self.errors.append((idx_line+1, idx_col+1, e.message)) -                        c_values.append(None) -                        continue -                    if self.message: -                        self.messages.append(self.message) -                    c_values.append(value) -                    if value == None: -                        if formater.required: -                            c_errors = True -                            self.errors.append((idx_line+1, idx_col+1, -                                               self.ERRORS['value_required'])) -                        continue -                    field_name = formater.field_name -                    if type(field_name) in (list, tuple): -                        field_name = field_name[idx_v] -                    field_names = [field_name] -                    if formater.duplicate_field: -                        duplicate_field = formater.duplicate_field -                        if type(duplicate_field) in (list, tuple): -                            duplicate_field = duplicate_field[idx_v] -                        field_names += [duplicate_field] - - -                    if formater.through: -                        throughs.append((formater, value)) -                    else: -                        for field_name in field_names: -                            self._field_name_to_data_dict(field_name, -                                                          value, data) -                if formater.reverse_for_test: -                    c_row.append(formater.reverse_for_test(**c_values)) -                else: -                    c_row.append(unicode(c_values)) - -            if self.validity_file: -                self.validity_file.writerow(c_row) -            if not self.c_errors and (idx_col + 1) < min_col_number: -                self.c_errors = True -                self.errors.append((idx_line+1, idx_col+1, -                               self.ERRORS['not_enough_cols'] % min_col_number)) -            if self.c_errors: -                continue -            n = datetime.datetime.now() -            logger.debug('* %s - Cols read' % (unicode(n-n2))) -            n2 = n -            if self.test: -                continue -            # manage unicity of items (mainly for updates) -            self.number_imported += 1 -            if self.UNICITY_KEYS: -                data['defaults'] = {} -                for k in data.keys(): -                    if k not in self.UNICITY_KEYS \ -                       and k != 'defaults': -                        data['defaults'][k] = data.pop(k) - -            obj, created = self.get_object(self.OBJECT_CLS, data) - -            if not created and 'defaults' in data: -                for k in data['defaults']: -                    setattr(obj, k, data['defaults'][k]) -                obj.save() -            n = datetime.datetime.now() -            logger.debug('* %s - Item saved' % (unicode(n-n2))) -            n2 = n -            for formater, value in self.throughs: -                n = datetime.datetime.now() -                logger.debug('* %s - Processing formater %s' % (unicode(n-n2), -                                                        formater.field_name)) -                n2 = n -                data = {} -                if formater.through_dict: -                    data = formater.through_dict.copy() -                if formater.through_key: -                    data[formater.through_key] = obj -                data[formater.field_name] = value -                through_cls = formater.through -                if formater.through_unicity_keys: -                    data['defaults'] = {} -                    for k in data.keys(): -                        if k not in formater.through_unicity_keys \ -                           and k != 'defaults': -                            data['defaults'][k] = data.pop(k) -                t_obj, created = through_cls.objects.get_or_create(**data) -                if not created and 'defaults' in data: -                    for k in data['defaults']: -                        setattr(t_obj, k, data['defaults'][k]) -                    t_obj.save() -    """ -      def get_object(self, cls, data, path=[]):          m2ms = []          if data and type(data) == dict: @@ -937,6 +745,8 @@ class Importer(object):                      dct = create_dict.copy()                      dct['defaults'] = defaults                      obj, created = cls.objects.get_or_create(**create_dict) +                except IntegrityError as e: +                    raise IntegrityError(e.message)                  except:                      created = False                      obj = cls.objects.filter(**create_dict).all()[0] @@ -946,16 +756,19 @@ class Importer(object):                          values = value                      for v in values:                          getattr(obj, attr).add(v) -            except IntegrityError: -                raise ImporterError("Erreur d'import %s, contexte : %s" \ -                                % (unicode(cls), unicode(data))) +            except IntegrityError as e: +                raise ImporterError("Erreur d'import %s, contexte : %s, erreur : %s" \ +                        % (unicode(cls), unicode(data), e.message.decode('utf-8')))              return obj, created          return data      def get_csv_errors(self): -        csv_errors = [] +        if not self.errors: +            return "" +        csv_errors = ["line,col,error"]          for line, col, error in self.errors: -            csv_errors.append(u'"%d","%d","%s"' % (line or 0, col or 0, +            csv_errors.append(u'"%s","%s","%s"' % (line and unicode(line) or '-', +                                                   col and unicode(col) or '-',                                                          unicode(error)))          return u"\n".join(csv_errors) diff --git a/ishtar_common/models.py b/ishtar_common/models.py index 7ef06ddb7..280376966 100644 --- a/ishtar_common/models.py +++ b/ishtar_common/models.py @@ -386,7 +386,7 @@ class GeneralType(models.Model):          keys = []          content_type = ContentType.objects.get_for_model(self.__class__)          for ik in ItemKey.objects.filter(content_type=content_type, -                                         object_id=ik.pk).all(): +                                         object_id=self.pk).all():              keys.append(ik.key)          return keys  | 
