diff options
-rw-r--r-- | ishtar_common/data_importer.py | 216 |
1 files changed, 124 insertions, 92 deletions
diff --git a/ishtar_common/data_importer.py b/ishtar_common/data_importer.py index 992025bbb..a64b908ea 100644 --- a/ishtar_common/data_importer.py +++ b/ishtar_common/data_importer.py @@ -17,25 +17,30 @@ # See the file COPYING for details. -import copy, csv, datetime, logging, re, sys -from tempfile import NamedTemporaryFile +import copy +import csv +import datetime +import io +import logging +import re +import sys from django.contrib.auth.models import User -from django.db import DatabaseError, IntegrityError, transaction +from django.db import IntegrityError, transaction from django.template.defaultfilters import slugify from django.utils.translation import ugettext_lazy as _ -from ishtar_common.unicode_csv import UnicodeWriter - NEW_LINE_BREAK = '#####@@@#####' RE_FILTER_CEDEX = re.compile("(.*) *(?: *CEDEX|cedex|Cedex|Cédex|cédex *\d*)") + class ImportFormater(object): def __init__(self, field_name, formater=None, required=True, through=None, - through_key=None, through_dict=None, through_unicity_keys=None, - duplicate_fields=[], regexp=None, regexp_formater_args=[], - force_value=None, post_processing=False, concat=False, comment=""): + through_key=None, through_dict=None, + through_unicity_keys=None, duplicate_fields=[], regexp=None, + regexp_formater_args=[], force_value=None, + post_processing=False, concat=False, comment=""): self.field_name = field_name self.formater = formater self.required = required @@ -63,7 +68,8 @@ class ImportFormater(object): def report_error(self, *args): return - def init(self, vals, output=None, choose_default=False, import_instance=None): + def init(self, vals, output=None, choose_default=False, + import_instance=None): try: lst = iter(self.formater) except TypeError: @@ -77,9 +83,11 @@ class ImportFormater(object): def post_process(self, obj, context, value, owner=None): raise NotImplemented() + class ImporterError(Exception): STANDARD = 'S' HEADER = 'H' + def __init__(self, message, type='S'): self.msg = message self.type = type @@ -87,6 +95,7 @@ class ImporterError(Exception): def __str__(self): return self.msg + class Formater(object): def __init__(self, *args, **kwargs): self.db_target = kwargs.get('db_target', None) @@ -98,6 +107,7 @@ class Formater(object): import_instance=None): return + class ChoiceChecker(object): def report_new(self, comment): if not self.new_keys: @@ -108,6 +118,7 @@ class ChoiceChecker(object): msg = u'"%s";"%s"\n' % (k, self.new_keys[k]) sys.stderr.write(msg.encode('utf-8')) + class UnicodeFormater(Formater): def __init__(self, max_length, clean=False, re_filter=None, notnull=False, db_target=None): @@ -135,13 +146,15 @@ class UnicodeFormater(Formater): except UnicodeDecodeError: return if len(value) > self.max_length: - raise ValueError(_(u"\"%(value)s\" is too long. "\ - u"The max length is %(length)d characters." - ) % {'value':value, 'length':self.max_length}) + raise ValueError( + _(u"\"%(value)s\" is too long. The max length is %(length)d " + u"characters.") % {'value': value, + 'length': self.max_length}) if self.notnull and not value: return return value + class BooleanFormater(Formater): def format(self, value): value = value.strip().upper() @@ -150,7 +163,8 @@ class BooleanFormater(Formater): if value in ('', '0', 'NON', 'FAUX', 'NO', 'FALSE'): return False raise ValueError(_(u"\"%(value)s\" not equal to yes or no") % { - 'value':value}) + 'value': value}) + class FloatFormater(Formater): def format(self, value): @@ -161,7 +175,8 @@ class FloatFormater(Formater): return float(value) except ValueError: raise ValueError(_(u"\"%(value)s\" is not a float") % { - 'value':value}) + 'value': value}) + class YearFormater(Formater): def format(self, value): @@ -173,7 +188,8 @@ class YearFormater(Formater): assert value > 0 and value < (datetime.date.today().year + 30) except (ValueError, AssertionError): raise ValueError(_(u"\"%(value)s\" is not a valid date") % { - 'value':value}) + 'value': value}) + class YearNoFuturFormater(Formater): def format(self, value): @@ -185,7 +201,8 @@ class YearNoFuturFormater(Formater): assert value > 0 and value < (datetime.date.today().year) except (ValueError, AssertionError): raise ValueError(_(u"\"%(value)s\" is not a valid date") % { - 'value':value}) + 'value': value}) + class IntegerFormater(Formater): def format(self, value): @@ -196,7 +213,8 @@ class IntegerFormater(Formater): return int(value) except ValueError: raise ValueError(_(u"\"%(value)s\" is not an integer") % { - 'value':value}) + 'value': value}) + class StrChoiceFormater(Formater, ChoiceChecker): def __init__(self, choices, strict=False, equiv_dict={}, model=None, @@ -237,15 +255,15 @@ class StrChoiceFormater(Formater, ChoiceChecker): def _get_choices(self, comment=''): msgstr = comment + u" - " - msgstr += unicode(_(u"Choice for \"%s\" is not available. "\ - u"Which one is relevant?\n")) + msgstr += unicode(_(u"Choice for \"%s\" is not available. " + u"Which one is relevant?\n")) idx = -1 for idx, choice in enumerate(self.choices): - msgstr += u"%d. %s\n" % (idx+1, choice[1]) + msgstr += u"%d. %s\n" % (idx + 1, choice[1]) idx += 2 if self.create: msgstr += unicode(_(u"%d. None of the above - create new")) % idx \ - + u"\n" + + u"\n" idx += 1 msgstr += unicode(_(u"%d. None of the above - skip")) % idx + u"\n" return msgstr, idx @@ -272,7 +290,7 @@ class StrChoiceFormater(Formater, ChoiceChecker): res = None if choose_default: res = 1 - while res not in range(1, idx+1): + while res not in range(1, idx + 1): msg = msgstr % value sys.stdout.write(msg.encode('utf-8')) sys.stdout.write("\n>>> ") @@ -299,8 +317,8 @@ class StrChoiceFormater(Formater, ChoiceChecker): if output == 'db' and self.db_target: from ishtar_common.models import TargetKey for missing in self.missings: - q = {'target':self.db_target, 'key':missing, - 'associated_import':import_instance} + q = {'target': self.db_target, 'key': missing, + 'associated_import': import_instance} if TargetKey.objects.filter(**q).count(): continue with transaction.commit_on_success(): @@ -324,6 +342,7 @@ class StrChoiceFormater(Formater, ChoiceChecker): if value in self.equiv_dict: return self.equiv_dict[value] + class TypeFormater(StrChoiceFormater): def __init__(self, model, cli=False, defaults={}, many_split=False, db_target=None): @@ -359,6 +378,7 @@ class TypeFormater(StrChoiceFormater): values['order'] = order return self.model.objects.create(**values) + class DateFormater(Formater): def __init__(self, date_formats=["%d/%m/%Y"], db_target=None): self.date_formats = date_formats @@ -376,13 +396,14 @@ class DateFormater(Formater): except: continue raise ValueError(_(u"\"%(value)s\" is not a valid date") % { - 'value':value}) + 'value': value}) + class StrToBoolean(Formater, ChoiceChecker): def __init__(self, choices={}, cli=False, strict=False, db_target=None): self.dct = copy.copy(choices) self.cli = cli - self.strict= strict + self.strict = strict self.db_target = db_target self.missings = set() if self.db_target: @@ -412,8 +433,9 @@ class StrToBoolean(Formater, ChoiceChecker): if (not output or output == 'silent') and not choose_default: return msgstr = comment + u" - " - msgstr += unicode(_(u"Choice for \"%s\" is not available. "\ - u"Which one is relevant?\n")) + msgstr += unicode(_( + u"Choice for \"%s\" is not available. " + u"Which one is relevant?\n")) msgstr += u"1. True\n" msgstr += u"2. False\n" msgstr += u"3. Empty\n" @@ -447,8 +469,8 @@ class StrToBoolean(Formater, ChoiceChecker): from ishtar_common.models import TargetKey for missing in self.missings: try: - q = {'target':self.db_target, 'key':missing, - 'associated_import':import_instance} + q = {'target': self.db_target, 'key': missing, + 'associated_import': import_instance} if not TargetKey.objects.filter(**q).count(): TargetKey.objects.create(**q) except IntegrityError: @@ -463,6 +485,7 @@ class StrToBoolean(Formater, ChoiceChecker): logger = logging.getLogger(__name__) + def get_object_from_path(obj, path): for k in path.split('__')[:-1]: if not hasattr(obj, k): @@ -470,6 +493,7 @@ def get_object_from_path(obj, path): obj = getattr(obj, k) return obj + class Importer(object): DESC = "" LINE_FORMAT = [] @@ -480,21 +504,22 @@ class Importer(object): DEFAULTS = {} STR_CUT = {} ERRORS = { - 'header_check':_(u"The given file is not correct. Check the file " - u"format. If you use a CSV file: check that column separator " - u"and encoding are similar to the ones used by the reference " - u"file."), - 'too_many_cols':_(u"Too many cols (%(user_col)d) when " - u"maximum is %(ref_col)d"), - 'no_data':_(u"No data provided"), - 'value_required':_(u"Value is required"), - 'not_enough_cols':_(u"At least %d columns must be filled"), - 'regex_not_match':_(u"The regexp doesn't match.") - } + 'header_check': _( + u"The given file is not correct. Check the file " + u"format. If you use a CSV file: check that column separator " + u"and encoding are similar to the ones used by the reference " + u"file."), + 'too_many_cols': _(u"Too many cols (%(user_col)d) when " + u"maximum is %(ref_col)d"), + 'no_data': _(u"No data provided"), + 'value_required': _(u"Value is required"), + 'not_enough_cols': _(u"At least %d columns must be filled"), + 'regex_not_match': _(u"The regexp doesn't match.") + } def __init__(self, skip_lines=0, reference_header=None, - check_col_num=False, test=False, history_modifier=None, - output='silent', import_instance=None): + check_col_num=False, test=False, history_modifier=None, + output='silent', import_instance=None): """ * skip_line must be set if the data provided has got headers lines. * a reference_header can be provided to perform a data compliance @@ -504,8 +529,8 @@ class Importer(object): self.skip_lines = skip_lines self.reference_header = reference_header self.test = test - self.errors = [] # list of (line, col, message) - self.validity = [] # list of (line, col, message) + self.errors = [] # list of (line, col, message) + self.validity = [] # list of (line, col, message) self.number_updated = 0 self.number_created = 0 self.check_col_num = check_col_num @@ -526,7 +551,7 @@ class Importer(object): else: # import made by the CLI: get the first admin self.history_modifier = User.objects.filter( - is_superuser=True).order_by('pk')[0] + is_superuser=True).order_by('pk')[0] def post_processing(self, item, data): return item @@ -537,8 +562,8 @@ class Importer(object): * output: - 'silent': no associations - 'cli': output by command line interface and stocked in the database - - 'db': output on the database with no interactive association (further - exploitation by web interface) + - 'db': output on the database with no interactive association + (further exploitation by web interface) """ assert output in ('silent', 'cli', 'db') vals = [] @@ -571,12 +596,12 @@ class Importer(object): keys = field_name.split('__') current_data = data for idx, key in enumerate(keys): - if idx == (len(keys) - 1): # last + if idx == (len(keys) - 1): # last if concat: if not value: value = "" current_data[key] = (current_data[key] + u"\n") or u""\ - + value + + value elif force_value and value: current_data[key] = value elif key not in current_data or not current_data[key]: @@ -592,7 +617,7 @@ class Importer(object): raise ImporterError(self.ERRORS['no_data'], ImporterError.HEADER) if self.check_col_num and len(table[0]) > len(self.line_format): raise ImporterError(self.ERRORS['too_many_cols'] % { - 'user_col':len(table[0]), 'ref_col':len(self.line_format)}) + 'user_col': len(table[0]), 'ref_col': len(self.line_format)}) self.errors = [] self.validity = [] self.number_imported = 0 @@ -620,10 +645,10 @@ class Importer(object): left = None if idx_line > 10: ellapsed = datetime.datetime.now() - start - time_by_item = ellapsed/idx_line + time_by_item = ellapsed / idx_line if time_by_item: - left = ((total - idx_line)*time_by_item).seconds - txt = u"\r* %d/%d" % (idx_line+1, total) + left = ((total - idx_line) * time_by_item).seconds + txt = u"\r* %d/%d" % (idx_line + 1, total) if left: txt += u" (%d seconds left)" % left sys.stdout.write(txt.encode('utf-8')) @@ -640,8 +665,8 @@ class Importer(object): if not line: self.validity.append([]) return - self._throughs = [] # list of (formater, value) - self._post_processing = [] # list of (formater, value) + self._throughs = [] # list of (formater, value) + self._post_processing = [] # list of (formater, value) data = {} # keep in database the raw line for testing purpose @@ -652,7 +677,8 @@ class Importer(object): data[self.IMPORTED_LINE_FIELD] = output.getvalue() n = datetime.datetime.now() - logger.debug('%s - Processing line %d' % (unicode(n-self.now), idx_line)) + logger.debug('%s - Processing line %d' % (unicode(n - self.now), + idx_line)) self.now = n n2 = n self.c_errors = False @@ -666,12 +692,13 @@ class Importer(object): self.validity.append(c_row) if not self.c_errors and (idx_col + 1) < self.min_col_number: self.c_errors = True - self.errors.append((idx_line+1, idx_col+1, - self.ERRORS['not_enough_cols'] % self.min_col_number)) + self.errors.append(( + idx_line + 1, idx_col + 1, + self.ERRORS['not_enough_cols'] % self.min_col_number)) if self.c_errors: return n = datetime.datetime.now() - logger.debug('* %s - Cols read' % (unicode(n-n2))) + logger.debug('* %s - Cols read' % (unicode(n - n2))) n2 = n if self.test: return @@ -684,7 +711,7 @@ class Importer(object): data['defaults'][k] = data.pop(k) if 'history_modifier' in \ - self.OBJECT_CLS._meta.get_all_field_names(): + self.OBJECT_CLS._meta.get_all_field_names(): data['history_modifier'] = self.history_modifier obj, created = self.get_object(self.OBJECT_CLS, data) @@ -702,12 +729,12 @@ class Importer(object): setattr(obj, k, data['defaults'][k]) obj.save() n = datetime.datetime.now() - logger.debug('* %s - Item saved' % (unicode(n-n2))) + logger.debug('* %s - Item saved' % (unicode(n - n2))) n2 = n for formater, value in self._throughs: n = datetime.datetime.now() - logger.debug('* %s - Processing formater %s' % (unicode(n-n2), - formater.field_name)) + logger.debug('* %s - Processing formater %s' % (unicode(n - n2), + formater.field_name)) n2 = n data = {} if formater.through_dict: @@ -756,15 +783,17 @@ class Importer(object): match = formater.regexp.match(val) if not match: if formater.required: - self.errors.append((idx_line+1, idx_col+1, - self.ERRORS['value_required'])) + self.errors.append( + (idx_line + 1, idx_col + 1, + self.ERRORS['value_required'])) elif not val.strip(): c_row.append("") return self.c_errors = True val = val.replace(NEW_LINE_BREAK, '\n') - self.errors.append((idx_line+1, idx_col+1, - unicode(self.ERRORS['regex_not_match']) + val)) + self.errors.append( + (idx_line + 1, idx_col + 1, + unicode(self.ERRORS['regex_not_match']) + val)) c_row.append("") return val_group = [v.replace(NEW_LINE_BREAK, '\n') @@ -798,7 +827,7 @@ class Importer(object): except ValueError, e: if formater.required: self.c_errors = True - self.errors.append((idx_line+1, idx_col+1, e.message)) + self.errors.append((idx_line + 1, idx_col + 1, e.message)) c_values.append('') return formated_values.append(value) @@ -814,10 +843,11 @@ class Importer(object): # don't reunicode - unicoded values c_values.append(u" ; ".join([v for v in printed_values])) except TypeError: - c_values.append(u" ; ".join([unicode(v) for v in printed_values])) - if value == None and formater.required: + c_values.append(u" ; ".join([unicode(v) + for v in printed_values])) + if value is None and formater.required: self.c_errors = True - self.errors.append((idx_line+1, idx_col+1, + self.errors.append((idx_line + 1, idx_col + 1, self.ERRORS['value_required'])) return @@ -834,8 +864,8 @@ class Importer(object): self._throughs.append((formater, value)) else: for field_name in field_names: - self._field_name_to_data_dict(field_name, - value, data, formater.force_value) + self._field_name_to_data_dict( + field_name, value, data, formater.force_value) c_row.append(u" ; ".join([v for v in c_values])) def get_object(self, cls, data, path=[]): @@ -846,7 +876,7 @@ class Importer(object): if not data[attribute]: continue field_object, model, direct, m2m = \ - cls._meta.get_field_by_name(attribute) + cls._meta.get_field_by_name(attribute) if m2m: many_values = data.pop(attribute) if hasattr(field_object, 'rel'): @@ -865,12 +895,12 @@ class Importer(object): # contruct many dict for each values default_dict = {} - ## init with simple values that will be duplicated + # # init with simple values that will be duplicated for key in val.keys(): if type(val[key]) not in (list, tuple): default_dict[key] = val[key] vals.append(default_dict.copy()) - ## manage multiple values + # # manage multiple values for key in val.keys(): if type(val[key]) in (list, tuple): for idx, v in enumerate(val[key]): @@ -893,23 +923,23 @@ class Importer(object): if 'history_modifier' in \ model._meta.get_all_field_names(): v['defaults']['history_modifier'] = \ - self.history_modifier + self.history_modifier v, created = model.objects.get_or_create( - **v) + **v) if self.import_instance \ and hasattr(v, 'imports') and created: v.imports.add(self.import_instance) m2ms.append((attribute, v)) elif hasattr(field_object, 'rel') and field_object.rel and \ - type(data[attribute]) == dict: + type(data[attribute]) == dict: c_path.append(attribute) # put history_modifier for every created item if 'history_modifier' in \ field_object.rel.to._meta.get_all_field_names(): data[attribute]['history_modifier'] = \ - self.history_modifier + self.history_modifier data[attribute], created = self.get_object( - field_object.rel.to, data[attribute], c_path) + field_object.rel.to, data[attribute], c_path) # default values path = tuple(path) if path in self._defaults: @@ -931,7 +961,8 @@ class Importer(object): defaults = {} if 'history_modifier' in create_dict: defaults = { - 'history_modifier':create_dict.pop('history_modifier')} + 'history_modifier': create_dict.pop('history_modifier') + } try: try: dct = create_dict.copy() @@ -954,7 +985,7 @@ class Importer(object): except IntegrityError as e: message = e.message try: - message = unicode(e.message.decode('utf-8')) + message = e.message.decode('utf-8') except (UnicodeDecodeError, UnicodeDecodeError): message = '' try: @@ -962,8 +993,8 @@ class Importer(object): except UnicodeDecodeError: data = '' raise ImporterError( - "Erreur d'import %s, contexte : %s, erreur : %s" \ - % (unicode(cls), unicode(data), e.message.decode('utf-8'))) + "Erreur d'import %s, contexte : %s, erreur : %s" + % (unicode(cls), unicode(data), message)) return obj, created return data @@ -982,8 +1013,8 @@ class Importer(object): return u"\n".join(csv_v) def get_csv_errors(self): - return self._get_csv(self.errors, - header=[_("line"), _("col"), _("error")]) + return self._get_csv( + self.errors, header=[_("line"), _("col"), _("error")]) def get_csv_result(self): return self._get_csv(self.validity) @@ -996,10 +1027,11 @@ class Importer(object): if not value: return if value not in choices_dct.values(): - raise ValueError(_(u"\"%(value)s\" not in %(values)s") % { - 'value':value, - 'values':u", ".join([val for val in choices_dct.values()]) + raise ValueError( + _(u"\"%(value)s\" not in %(values)s") % { + 'value': value, + 'values': u", ".join([val + for val in choices_dct.values()]) }) return value return function - |