diff options
| author | Étienne Loks <etienne.loks@proxience.com> | 2015-09-23 13:43:19 +0200 | 
|---|---|---|
| committer | Étienne Loks <etienne.loks@proxience.com> | 2015-09-23 13:43:19 +0200 | 
| commit | 10885a31d30fdd592c758ead7530a403ba17bdef (patch) | |
| tree | 2a934d93880fac83661f79accd07ea72f72c367f /ishtar_common/data_importer.py | |
| parent | 37542704dfdb363106e6aed1f4971427fc318394 (diff) | |
| download | Ishtar-10885a31d30fdd592c758ead7530a403ba17bdef.tar.bz2 Ishtar-10885a31d30fdd592c758ead7530a403ba17bdef.zip | |
Flake8
Diffstat (limited to 'ishtar_common/data_importer.py')
| -rw-r--r-- | ishtar_common/data_importer.py | 216 | 
1 files changed, 124 insertions, 92 deletions
| diff --git a/ishtar_common/data_importer.py b/ishtar_common/data_importer.py index 992025bbb..a64b908ea 100644 --- a/ishtar_common/data_importer.py +++ b/ishtar_common/data_importer.py @@ -17,25 +17,30 @@  # See the file COPYING for details. -import copy, csv, datetime, logging, re, sys -from tempfile import NamedTemporaryFile +import copy +import csv +import datetime +import io +import logging +import re +import sys  from django.contrib.auth.models import User -from django.db import DatabaseError, IntegrityError, transaction +from django.db import IntegrityError, transaction  from django.template.defaultfilters import slugify  from django.utils.translation import ugettext_lazy as _ -from ishtar_common.unicode_csv import UnicodeWriter -  NEW_LINE_BREAK = '#####@@@#####'  RE_FILTER_CEDEX = re.compile("(.*) *(?: *CEDEX|cedex|Cedex|Cédex|cédex *\d*)") +  class ImportFormater(object):      def __init__(self, field_name, formater=None, required=True, through=None, -                through_key=None, through_dict=None, through_unicity_keys=None, -                duplicate_fields=[], regexp=None, regexp_formater_args=[], -                force_value=None, post_processing=False, concat=False, comment=""): +                 through_key=None, through_dict=None, +                 through_unicity_keys=None, duplicate_fields=[], regexp=None, +                 regexp_formater_args=[], force_value=None, +                 post_processing=False, concat=False, comment=""):          self.field_name = field_name          self.formater = formater          self.required = required @@ -63,7 +68,8 @@ class ImportFormater(object):      def report_error(self, *args):          return -    def init(self, vals, output=None, choose_default=False, import_instance=None): +    def init(self, vals, output=None, choose_default=False, +             import_instance=None):          try:              lst = iter(self.formater)          except TypeError: @@ -77,9 +83,11 @@ class ImportFormater(object):      def post_process(self, obj, context, value, owner=None):          raise NotImplemented() +  class ImporterError(Exception):      STANDARD = 'S'      HEADER = 'H' +      def __init__(self, message, type='S'):          self.msg = message          self.type = type @@ -87,6 +95,7 @@ class ImporterError(Exception):      def __str__(self):          return self.msg +  class Formater(object):      def __init__(self, *args, **kwargs):          self.db_target = kwargs.get('db_target', None) @@ -98,6 +107,7 @@ class Formater(object):                import_instance=None):          return +  class ChoiceChecker(object):      def report_new(self, comment):          if not self.new_keys: @@ -108,6 +118,7 @@ class ChoiceChecker(object):              msg = u'"%s";"%s"\n' % (k, self.new_keys[k])              sys.stderr.write(msg.encode('utf-8')) +  class UnicodeFormater(Formater):      def __init__(self, max_length, clean=False, re_filter=None, notnull=False,                   db_target=None): @@ -135,13 +146,15 @@ class UnicodeFormater(Formater):          except UnicodeDecodeError:              return          if len(value) > self.max_length: -            raise ValueError(_(u"\"%(value)s\" is too long. "\ -                               u"The max length is %(length)d characters." -                               ) % {'value':value, 'length':self.max_length}) +            raise ValueError( +                _(u"\"%(value)s\" is too long. The max length is %(length)d " +                  u"characters.") % {'value': value, +                                     'length': self.max_length})          if self.notnull and not value:              return          return value +  class BooleanFormater(Formater):      def format(self, value):          value = value.strip().upper() @@ -150,7 +163,8 @@ class BooleanFormater(Formater):          if value in ('', '0', 'NON', 'FAUX', 'NO', 'FALSE'):              return False          raise ValueError(_(u"\"%(value)s\" not equal to yes or no") % { -                                                                 'value':value}) +            'value': value}) +  class FloatFormater(Formater):      def format(self, value): @@ -161,7 +175,8 @@ class FloatFormater(Formater):              return float(value)          except ValueError:              raise ValueError(_(u"\"%(value)s\" is not a float") % { -                                                                 'value':value}) +                'value': value}) +  class YearFormater(Formater):      def format(self, value): @@ -173,7 +188,8 @@ class YearFormater(Formater):              assert value > 0 and value < (datetime.date.today().year + 30)          except (ValueError, AssertionError):              raise ValueError(_(u"\"%(value)s\" is not a valid date") % { -                                                                 'value':value}) +                'value': value}) +  class YearNoFuturFormater(Formater):      def format(self, value): @@ -185,7 +201,8 @@ class YearNoFuturFormater(Formater):              assert value > 0 and value < (datetime.date.today().year)          except (ValueError, AssertionError):              raise ValueError(_(u"\"%(value)s\" is not a valid date") % { -                                                                 'value':value}) +                'value': value}) +  class IntegerFormater(Formater):      def format(self, value): @@ -196,7 +213,8 @@ class IntegerFormater(Formater):              return int(value)          except ValueError:              raise ValueError(_(u"\"%(value)s\" is not an integer") % { -                                                                 'value':value}) +                'value': value}) +  class StrChoiceFormater(Formater, ChoiceChecker):      def __init__(self, choices, strict=False, equiv_dict={}, model=None, @@ -237,15 +255,15 @@ class StrChoiceFormater(Formater, ChoiceChecker):      def _get_choices(self, comment=''):          msgstr = comment + u" - " -        msgstr += unicode(_(u"Choice for \"%s\" is not available. "\ -                           u"Which one is relevant?\n")) +        msgstr += unicode(_(u"Choice for \"%s\" is not available. " +                            u"Which one is relevant?\n"))          idx = -1          for idx, choice in enumerate(self.choices): -            msgstr += u"%d. %s\n" % (idx+1, choice[1]) +            msgstr += u"%d. %s\n" % (idx + 1, choice[1])          idx += 2          if self.create:              msgstr += unicode(_(u"%d. None of the above - create new")) % idx \ -                    + u"\n" +                + u"\n"              idx += 1          msgstr += unicode(_(u"%d. None of the above - skip")) % idx + u"\n"          return msgstr, idx @@ -272,7 +290,7 @@ class StrChoiceFormater(Formater, ChoiceChecker):              res = None              if choose_default:                  res = 1 -            while res not in range(1, idx+1): +            while res not in range(1, idx + 1):                  msg = msgstr % value                  sys.stdout.write(msg.encode('utf-8'))                  sys.stdout.write("\n>>> ") @@ -299,8 +317,8 @@ class StrChoiceFormater(Formater, ChoiceChecker):          if output == 'db' and self.db_target:              from ishtar_common.models import TargetKey              for missing in self.missings: -                q = {'target':self.db_target, 'key':missing, -                     'associated_import':import_instance} +                q = {'target': self.db_target, 'key': missing, +                     'associated_import': import_instance}                  if TargetKey.objects.filter(**q).count():                      continue                  with transaction.commit_on_success(): @@ -324,6 +342,7 @@ class StrChoiceFormater(Formater, ChoiceChecker):          if value in self.equiv_dict:              return self.equiv_dict[value] +  class TypeFormater(StrChoiceFormater):      def __init__(self, model, cli=False, defaults={}, many_split=False,                   db_target=None): @@ -359,6 +378,7 @@ class TypeFormater(StrChoiceFormater):              values['order'] = order          return self.model.objects.create(**values) +  class DateFormater(Formater):      def __init__(self, date_formats=["%d/%m/%Y"], db_target=None):          self.date_formats = date_formats @@ -376,13 +396,14 @@ class DateFormater(Formater):              except:                  continue          raise ValueError(_(u"\"%(value)s\" is not a valid date") % { -                                                           'value':value}) +            'value': value}) +  class StrToBoolean(Formater, ChoiceChecker):      def __init__(self, choices={}, cli=False, strict=False, db_target=None):          self.dct = copy.copy(choices)          self.cli = cli -        self.strict= strict +        self.strict = strict          self.db_target = db_target          self.missings = set()          if self.db_target: @@ -412,8 +433,9 @@ class StrToBoolean(Formater, ChoiceChecker):          if (not output or output == 'silent') and not choose_default:              return          msgstr = comment + u" - " -        msgstr += unicode(_(u"Choice for \"%s\" is not available. "\ -                           u"Which one is relevant?\n")) +        msgstr += unicode(_( +            u"Choice for \"%s\" is not available. " +            u"Which one is relevant?\n"))          msgstr += u"1. True\n"          msgstr += u"2. False\n"          msgstr += u"3. Empty\n" @@ -447,8 +469,8 @@ class StrToBoolean(Formater, ChoiceChecker):              from ishtar_common.models import TargetKey              for missing in self.missings:                  try: -                    q = {'target':self.db_target, 'key':missing, -                         'associated_import':import_instance} +                    q = {'target': self.db_target, 'key': missing, +                         'associated_import': import_instance}                      if not TargetKey.objects.filter(**q).count():                          TargetKey.objects.create(**q)                  except IntegrityError: @@ -463,6 +485,7 @@ class StrToBoolean(Formater, ChoiceChecker):  logger = logging.getLogger(__name__) +  def get_object_from_path(obj, path):      for k in path.split('__')[:-1]:          if not hasattr(obj, k): @@ -470,6 +493,7 @@ def get_object_from_path(obj, path):          obj = getattr(obj, k)      return obj +  class Importer(object):      DESC = ""      LINE_FORMAT = [] @@ -480,21 +504,22 @@ class Importer(object):      DEFAULTS = {}      STR_CUT = {}      ERRORS = { -        'header_check':_(u"The given file is not correct. Check the file " -                  u"format. If you use a CSV file: check that column separator " -                  u"and encoding are similar to the ones used by the reference " -                  u"file."), -        'too_many_cols':_(u"Too many cols (%(user_col)d) when " -                          u"maximum is %(ref_col)d"), -        'no_data':_(u"No data provided"), -        'value_required':_(u"Value is required"), -        'not_enough_cols':_(u"At least %d columns must be filled"), -        'regex_not_match':_(u"The regexp doesn't match.") -        } +        'header_check': _( +            u"The given file is not correct. Check the file " +            u"format. If you use a CSV file: check that column separator " +            u"and encoding are similar to the ones used by the reference " +            u"file."), +        'too_many_cols': _(u"Too many cols (%(user_col)d) when " +                           u"maximum is %(ref_col)d"), +        'no_data': _(u"No data provided"), +        'value_required': _(u"Value is required"), +        'not_enough_cols': _(u"At least %d columns must be filled"), +        'regex_not_match': _(u"The regexp doesn't match.") +    }      def __init__(self, skip_lines=0, reference_header=None, -             check_col_num=False, test=False, history_modifier=None, -             output='silent', import_instance=None): +                 check_col_num=False, test=False, history_modifier=None, +                 output='silent', import_instance=None):          """           * skip_line must be set if the data provided has got headers lines.           * a reference_header can be provided to perform a data compliance @@ -504,8 +529,8 @@ class Importer(object):          self.skip_lines = skip_lines          self.reference_header = reference_header          self.test = test -        self.errors = [] # list of (line, col, message) -        self.validity = [] # list of (line, col, message) +        self.errors = []  # list of (line, col, message) +        self.validity = []  # list of (line, col, message)          self.number_updated = 0          self.number_created = 0          self.check_col_num = check_col_num @@ -526,7 +551,7 @@ class Importer(object):              else:                  # import made by the CLI: get the first admin                  self.history_modifier = User.objects.filter( -                                is_superuser=True).order_by('pk')[0] +                    is_superuser=True).order_by('pk')[0]      def post_processing(self, item, data):          return item @@ -537,8 +562,8 @@ class Importer(object):          * output:           - 'silent': no associations           - 'cli': output by command line interface and stocked in the database -         - 'db': output on the database with no interactive association (further -           exploitation by web interface) +         - 'db': output on the database with no interactive association +           (further exploitation by web interface)          """          assert output in ('silent', 'cli', 'db')          vals = [] @@ -571,12 +596,12 @@ class Importer(object):              keys = field_name.split('__')              current_data = data              for idx, key in enumerate(keys): -                if idx == (len(keys) - 1): # last +                if idx == (len(keys) - 1):  # last                      if concat:                          if not value:                              value = ""                          current_data[key] = (current_data[key] + u"\n") or u""\ -                                            + value +                            + value                      elif force_value and value:                          current_data[key] = value                      elif key not in current_data or not current_data[key]: @@ -592,7 +617,7 @@ class Importer(object):              raise ImporterError(self.ERRORS['no_data'], ImporterError.HEADER)          if self.check_col_num and len(table[0]) > len(self.line_format):              raise ImporterError(self.ERRORS['too_many_cols'] % { -                     'user_col':len(table[0]), 'ref_col':len(self.line_format)}) +                'user_col': len(table[0]), 'ref_col': len(self.line_format)})          self.errors = []          self.validity = []          self.number_imported = 0 @@ -620,10 +645,10 @@ class Importer(object):                  left = None                  if idx_line > 10:                      ellapsed = datetime.datetime.now() - start -                    time_by_item = ellapsed/idx_line +                    time_by_item = ellapsed / idx_line                      if time_by_item: -                        left = ((total - idx_line)*time_by_item).seconds -                txt = u"\r* %d/%d" % (idx_line+1, total) +                        left = ((total - idx_line) * time_by_item).seconds +                txt = u"\r* %d/%d" % (idx_line + 1, total)                  if left:                      txt += u" (%d seconds left)" % left                  sys.stdout.write(txt.encode('utf-8')) @@ -640,8 +665,8 @@ class Importer(object):          if not line:              self.validity.append([])              return -        self._throughs = [] # list of (formater, value) -        self._post_processing = [] # list of (formater, value) +        self._throughs = []  # list of (formater, value) +        self._post_processing = []  # list of (formater, value)          data = {}          # keep in database the raw line for testing purpose @@ -652,7 +677,8 @@ class Importer(object):              data[self.IMPORTED_LINE_FIELD] = output.getvalue()          n = datetime.datetime.now() -        logger.debug('%s - Processing line %d' % (unicode(n-self.now), idx_line)) +        logger.debug('%s - Processing line %d' % (unicode(n - self.now), +                                                  idx_line))          self.now = n          n2 = n          self.c_errors = False @@ -666,12 +692,13 @@ class Importer(object):          self.validity.append(c_row)          if not self.c_errors and (idx_col + 1) < self.min_col_number:              self.c_errors = True -            self.errors.append((idx_line+1, idx_col+1, -                          self.ERRORS['not_enough_cols'] % self.min_col_number)) +            self.errors.append(( +                idx_line + 1, idx_col + 1, +                self.ERRORS['not_enough_cols'] % self.min_col_number))          if self.c_errors:              return          n = datetime.datetime.now() -        logger.debug('* %s - Cols read' % (unicode(n-n2))) +        logger.debug('* %s - Cols read' % (unicode(n - n2)))          n2 = n          if self.test:              return @@ -684,7 +711,7 @@ class Importer(object):                      data['defaults'][k] = data.pop(k)          if 'history_modifier' in \ -                       self.OBJECT_CLS._meta.get_all_field_names(): +                self.OBJECT_CLS._meta.get_all_field_names():              data['history_modifier'] = self.history_modifier          obj, created = self.get_object(self.OBJECT_CLS, data) @@ -702,12 +729,12 @@ class Importer(object):                  setattr(obj, k, data['defaults'][k])              obj.save()          n = datetime.datetime.now() -        logger.debug('* %s - Item saved' % (unicode(n-n2))) +        logger.debug('* %s - Item saved' % (unicode(n - n2)))          n2 = n          for formater, value in self._throughs:              n = datetime.datetime.now() -            logger.debug('* %s - Processing formater %s' % (unicode(n-n2), -                                                    formater.field_name)) +            logger.debug('* %s - Processing formater %s' % (unicode(n - n2), +                         formater.field_name))              n2 = n              data = {}              if formater.through_dict: @@ -756,15 +783,17 @@ class Importer(object):              match = formater.regexp.match(val)              if not match:                  if formater.required: -                    self.errors.append((idx_line+1, idx_col+1, -                                       self.ERRORS['value_required'])) +                    self.errors.append( +                        (idx_line + 1, idx_col + 1, +                         self.ERRORS['value_required']))                  elif not val.strip():                      c_row.append("")                      return                  self.c_errors = True                  val = val.replace(NEW_LINE_BREAK, '\n') -                self.errors.append((idx_line+1, idx_col+1, -                         unicode(self.ERRORS['regex_not_match']) + val)) +                self.errors.append( +                    (idx_line + 1, idx_col + 1, +                     unicode(self.ERRORS['regex_not_match']) + val))                  c_row.append("")                  return              val_group = [v.replace(NEW_LINE_BREAK, '\n') @@ -798,7 +827,7 @@ class Importer(object):                  except ValueError, e:                      if formater.required:                          self.c_errors = True -                    self.errors.append((idx_line+1, idx_col+1, e.message)) +                    self.errors.append((idx_line + 1, idx_col + 1, e.message))                      c_values.append('')                      return                  formated_values.append(value) @@ -814,10 +843,11 @@ class Importer(object):                  # don't reunicode - unicoded values                  c_values.append(u" ; ".join([v for v in printed_values]))              except TypeError: -                c_values.append(u" ; ".join([unicode(v) for v in printed_values])) -            if value == None and formater.required: +                c_values.append(u" ; ".join([unicode(v) +                                             for v in printed_values])) +            if value is None and formater.required:                  self.c_errors = True -                self.errors.append((idx_line+1, idx_col+1, +                self.errors.append((idx_line + 1, idx_col + 1,                                     self.ERRORS['value_required']))                  return @@ -834,8 +864,8 @@ class Importer(object):                  self._throughs.append((formater, value))              else:                  for field_name in field_names: -                    self._field_name_to_data_dict(field_name, -                              value, data, formater.force_value) +                    self._field_name_to_data_dict( +                        field_name, value, data, formater.force_value)          c_row.append(u" ; ".join([v for v in c_values]))      def get_object(self, cls, data, path=[]): @@ -846,7 +876,7 @@ class Importer(object):                  if not data[attribute]:                      continue                  field_object, model, direct, m2m = \ -                                    cls._meta.get_field_by_name(attribute) +                    cls._meta.get_field_by_name(attribute)                  if m2m:                      many_values = data.pop(attribute)                      if hasattr(field_object, 'rel'): @@ -865,12 +895,12 @@ class Importer(object):                              # contruct many dict for each values                              default_dict = {} -                            ## init with simple values that will be duplicated +                            # # init with simple values that will be duplicated                              for key in val.keys():                                  if type(val[key]) not in (list, tuple):                                      default_dict[key] = val[key]                              vals.append(default_dict.copy()) -                            ## manage multiple values +                            # # manage multiple values                              for key in val.keys():                                  if type(val[key]) in (list, tuple):                                      for idx, v in enumerate(val[key]): @@ -893,23 +923,23 @@ class Importer(object):                                  if 'history_modifier' in \                                          model._meta.get_all_field_names():                                      v['defaults']['history_modifier'] = \ -                                                          self.history_modifier +                                        self.history_modifier                                      v, created = model.objects.get_or_create( -                                                                            **v) +                                        **v)                                      if self.import_instance \                                         and hasattr(v, 'imports') and created:                                          v.imports.add(self.import_instance)                                      m2ms.append((attribute, v))                  elif hasattr(field_object, 'rel') and field_object.rel and \ -                   type(data[attribute]) == dict: +                        type(data[attribute]) == dict:                      c_path.append(attribute)                      # put history_modifier for every created item                      if 'history_modifier' in \                         field_object.rel.to._meta.get_all_field_names():                          data[attribute]['history_modifier'] = \ -                                                    self.history_modifier +                            self.history_modifier                      data[attribute], created = self.get_object( -                                   field_object.rel.to, data[attribute], c_path) +                        field_object.rel.to, data[attribute], c_path)              # default values              path = tuple(path)              if path in self._defaults: @@ -931,7 +961,8 @@ class Importer(object):              defaults = {}              if 'history_modifier' in create_dict:                  defaults = { -                        'history_modifier':create_dict.pop('history_modifier')} +                    'history_modifier': create_dict.pop('history_modifier') +                }              try:                  try:                      dct = create_dict.copy() @@ -954,7 +985,7 @@ class Importer(object):              except IntegrityError as e:                  message = e.message                  try: -                    message = unicode(e.message.decode('utf-8')) +                    message = e.message.decode('utf-8')                  except (UnicodeDecodeError, UnicodeDecodeError):                      message = ''                  try: @@ -962,8 +993,8 @@ class Importer(object):                  except UnicodeDecodeError:                      data = ''                  raise ImporterError( -                    "Erreur d'import %s, contexte : %s, erreur : %s" \ -                    % (unicode(cls), unicode(data), e.message.decode('utf-8'))) +                    "Erreur d'import %s, contexte : %s, erreur : %s" +                    % (unicode(cls), unicode(data), message))              return obj, created          return data @@ -982,8 +1013,8 @@ class Importer(object):          return u"\n".join(csv_v)      def get_csv_errors(self): -        return self._get_csv(self.errors, -                            header=[_("line"), _("col"), _("error")]) +        return self._get_csv( +            self.errors, header=[_("line"), _("col"), _("error")])      def get_csv_result(self):          return self._get_csv(self.validity) @@ -996,10 +1027,11 @@ class Importer(object):              if not value:                  return              if value not in choices_dct.values(): -                raise ValueError(_(u"\"%(value)s\" not in %(values)s") % { -                    'value':value, -                    'values':u", ".join([val for val in choices_dct.values()]) +                raise ValueError( +                    _(u"\"%(value)s\" not in %(values)s") % { +                        'value': value, +                        'values': u", ".join([val +                                              for val in choices_dct.values()])                      })              return value          return function - | 
