diff options
| author | Étienne Loks <etienne.loks@proxience.com> | 2015-10-20 22:57:54 +0200 | 
|---|---|---|
| committer | Étienne Loks <etienne.loks@proxience.com> | 2015-10-20 22:57:54 +0200 | 
| commit | f2b6af338b34bac870d595c742267a553e194d43 (patch) | |
| tree | dc211417690fe8e14a9c3beb023e7dde9f225aaa /ishtar_common/data_importer.py | |
| parent | 3a83c9d3636f7f3f9f3d2580de43cf47d598d4f6 (diff) | |
| download | Ishtar-f2b6af338b34bac870d595c742267a553e194d43.tar.bz2 Ishtar-f2b6af338b34bac870d595c742267a553e194d43.zip  | |
Imports: match file - fix sra pdl import
Diffstat (limited to 'ishtar_common/data_importer.py')
| -rw-r--r-- | ishtar_common/data_importer.py | 129 | 
1 files changed, 78 insertions, 51 deletions
diff --git a/ishtar_common/data_importer.py b/ishtar_common/data_importer.py index 972d286ed..065720bbf 100644 --- a/ishtar_common/data_importer.py +++ b/ishtar_common/data_importer.py @@ -29,7 +29,7 @@ import zipfile  from django.conf import settings  from django.contrib.auth.models import User  from django.core.files import File -from django.db import IntegrityError, transaction +from django.db import IntegrityError, DatabaseError, transaction  from django.template.defaultfilters import slugify  from django.utils.translation import ugettext_lazy as _ @@ -64,14 +64,17 @@ class ImportFormater(object):          self.comment = comment          self.force_new = force_new -    def reinit_db_target(self, db_targets): +    def reinit_db_target(self, db_target, nb=0):          if not self.formater:              return -        elif type(self.formater) not in (list, tuple): -            self.formater.db_target = db_targets[0] +        if type(db_target) in (list, tuple): +            db_target = db_target[nb] +        if type(self.formater) not in (list, tuple): +            self.formater.db_target = db_target +            self.formater.init_db_target()          else:              for idx, formater in enumerate(self.formater): -                formater.db_target = db_targets[idx] +                formater.db_target = db_target                  formater.init_db_target()      def init_db_target(self): @@ -249,6 +252,7 @@ class StrChoiceFormater(Formater, ChoiceChecker):          self.create = False          self.missings = set()          self.new_keys = {} +        self.match_table = {}          self.many_split = many_split          for key, value in self.choices:              value = unicode(value) @@ -265,15 +269,18 @@ class StrChoiceFormater(Formater, ChoiceChecker):          if not self.db_target:              return          for target_key in self.db_target.keys.filter(is_set=True).all(): -            value = target_key.value +            key = target_key.key              if not self.strict: -                value = slugify(value) -            if value in self.equiv_dict: +                key = slugify(key) +            if key in self.equiv_dict:                  continue -            v = target_key.key -            if self.model and v: -                v = self.model.objects.get(pk=v) -            self.equiv_dict[value] = v +            v = target_key.value +            if self.model and v and type(v) in (int, unicode): +                try: +                    v = self.model.objects.get(txt_idx=v) +                except: +                    v = self.model.objects.get(pk=v) +            self.equiv_dict[key] = v      def prepare(self, value):          return unicode(value).strip() @@ -304,9 +311,7 @@ class StrChoiceFormater(Formater, ChoiceChecker):              for v in self.equiv_dict:                  q['key'] = v                  value = self.equiv_dict[v] -                if hasattr(value, 'txt_idx'): -                    value = value.txt_idx -                elif hasattr(value, 'pk'): +                if hasattr(value, 'pk'):                      value = value.pk                  q['value'] = value                  with transaction.commit_on_success(): @@ -400,10 +405,12 @@ class StrChoiceFormater(Formater, ChoiceChecker):          return      def format(self, value): +        origin_value = value          value = self.prepare(value)          if not self.strict:              value = slugify(value)          if value in self.equiv_dict: +            self.match_table[origin_value] = self.equiv_dict[value]              return self.equiv_dict[value] @@ -418,6 +425,7 @@ class TypeFormater(StrChoiceFormater):          self.db_target = db_target          self.missings = set()          self.equiv_dict, self.choices = {}, [] +        self.match_table = {}          self.new_keys = {}          for item in model.objects.all():              self.choices.append((item.pk, unicode(item))) @@ -495,6 +503,7 @@ class StrToBoolean(Formater, ChoiceChecker):          self.db_target = db_target          self.missings = set()          self.init_db_target() +        self.match_table = {}          self.new_keys = {}      def init_db_target(self): @@ -564,8 +573,10 @@ class StrToBoolean(Formater, ChoiceChecker):              self.report_new(comment)      def format(self, value): +        origin_value = value          value = self.prepare(value)          if value in self.dct: +            self.match_table[origin_value] = _(self.dct[value])              return self.dct[value]  logger = logging.getLogger(__name__) @@ -700,6 +711,7 @@ class Importer(object):          self.import_instance = import_instance          self.archive = None          self.DB_TARGETS = {} +        self.match_table = {}          if import_instance and import_instance.imported_images:              self.archive = import_instance.imported_images          self._defaults = self.DEFAULTS.copy() @@ -818,6 +830,7 @@ class Importer(object):          return data      def _importation(self, table): +        self.match_table = {}          table = list(table)          if not table or not table[0]:              raise ImporterError(self.ERRORS['no_data'], ImporterError.HEADER) @@ -844,10 +857,10 @@ class Importer(object):          self.now = datetime.datetime.now()          start = datetime.datetime.now()          total = len(table) -        if self.output: +        if self.output == 'cli':              sys.stdout.write("\n")          for idx_line, line in enumerate(table): -            if self.output: +            if self.output == 'cli':                  left = None                  if idx_line > 10:                      ellapsed = datetime.datetime.now() - start @@ -1006,7 +1019,7 @@ class Importer(object):                       unicode(self.ERRORS['regex_not_match']) + val))                  c_row.append("")                  return -            val_group = [v.replace(NEW_LINE_BREAK, '\n') +            val_group = [v.replace(NEW_LINE_BREAK, '\n') if v else ''                           for v in match.groups()]          else:              val_group = [val] @@ -1025,12 +1038,17 @@ class Importer(object):                  values = re.split(func.many_split, values[0])              formated_values = [] +            field_name = formater.field_name +            force_new = formater.force_new +            if type(field_name) in (list, tuple): +                field_name = field_name[idx_v] +            if type(force_new) in (list, tuple): +                force_new = force_new[idx_v] +              if self.DB_TARGETS: -                field_name = formater.field_name -                if type(field_name) in (list, tuple): -                    field_name = field_name[idx_v] -                func.reinit_db_target( -                    self.DB_TARGETS["{}-{}".format(idx_col, field_name)]) +                formater.reinit_db_target( +                    self.DB_TARGETS["{}-{}".format(idx_col + 1, field_name)], +                    idx_v)              for idx, v in enumerate(values):                  value = None                  try: @@ -1051,40 +1069,39 @@ class Importer(object):                      c_values.append('')                      return                  formated_values.append(value) +            if hasattr(func, 'match_table'): +                if field_name not in self.match_table: +                    self.match_table[field_name] = {} +                self.match_table[field_name].update(func.match_table)              value = formated_values              if not many_values:                  value = formated_values[0] -            printed_values = value -            if type(value) not in (list, tuple): -                printed_values = [value] -            try: -                # don't reunicode - unicoded values -                c_values.append(u" ; ".join([v for v in printed_values])) -            except TypeError: -                c_values.append(u" ; ".join([unicode(v) -                                             for v in printed_values])) -            if value is None and formater.required: -                self.c_errors = True -                self.errors.append((idx_line + 1, idx_col + 1, -                                   self.ERRORS['value_required'])) -                return +                printed_values = value +                if type(value) not in (list, tuple): +                    printed_values = [value] +                try: +                    # don't reunicode - unicoded values +                    c_values.append(u" ; ".join([v for v in printed_values])) +                except TypeError: +                    c_values.append(u" ; ".join([unicode(v) +                                                 for v in printed_values])) +                if value is None and formater.required: +                    self.c_errors = True +                    self.errors.append((idx_line + 1, idx_col + 1, +                                       self.ERRORS['value_required'])) +                    return -            field_name = formater.field_name -            force_new = formater.force_new -            if type(field_name) in (list, tuple): -                field_name = field_name[idx_v] -                force_new = force_new[idx_v] -            field_names = [field_name] -            force_news = [force_new] -            for duplicate_field in formater.duplicate_fields: -                if type(duplicate_field[0]) in (list, tuple): -                    duplicate_field, force_new = duplicate_field[idx_v] -                else: -                    duplicate_field, force_new = duplicate_field -                field_names += [duplicate_field] -                force_news += [force_new] +                field_names = [field_name] +                force_news = [force_new] +                for duplicate_field in formater.duplicate_fields: +                    if type(duplicate_field[0]) in (list, tuple): +                        duplicate_field, force_new = duplicate_field[idx_v] +                    else: +                        duplicate_field, force_new = duplicate_field +                    field_names += [duplicate_field] +                    force_news += [force_new]              if formater.through:                  self._throughs.append((formater, value)) @@ -1237,6 +1254,8 @@ class Importer(object):                      raise IntegrityError(e.message)                  except IntegrityError as e:                      raise IntegrityError(e.message) +                except DatabaseError as e: +                    raise IntegrityError(e.message)                  except cls.MultipleObjectsReturned:                      created = False                      obj = cls.objects.filter(**create_dict).all()[0] @@ -1288,6 +1307,14 @@ class Importer(object):      def get_csv_result(self):          return self._get_csv(self.validity) +    def get_csv_matches(self): +        header = [_('field'), _('source'), _('result')] +        values = [] +        for field in self.match_table: +            for source in self.match_table[field]: +                values.append((field, source, self.match_table[field][source])) +        return self._get_csv(values, header=header) +      @classmethod      def choices_check(cls, choices):          def function(value):  | 
