summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorÉtienne Loks <etienne.loks@proxience.com>2015-09-23 13:43:19 +0200
committerÉtienne Loks <etienne.loks@proxience.com>2015-09-23 13:43:19 +0200
commit9ca517f197978bce8dd8f77aea083166861a83bc (patch)
tree2a934d93880fac83661f79accd07ea72f72c367f
parent807258a84cbfc4e3545b17165339cd17be495112 (diff)
downloadIshtar-9ca517f197978bce8dd8f77aea083166861a83bc.tar.bz2
Ishtar-9ca517f197978bce8dd8f77aea083166861a83bc.zip
Flake8
-rw-r--r--ishtar_common/data_importer.py216
1 files changed, 124 insertions, 92 deletions
diff --git a/ishtar_common/data_importer.py b/ishtar_common/data_importer.py
index 992025bbb..a64b908ea 100644
--- a/ishtar_common/data_importer.py
+++ b/ishtar_common/data_importer.py
@@ -17,25 +17,30 @@
# See the file COPYING for details.
-import copy, csv, datetime, logging, re, sys
-from tempfile import NamedTemporaryFile
+import copy
+import csv
+import datetime
+import io
+import logging
+import re
+import sys
from django.contrib.auth.models import User
-from django.db import DatabaseError, IntegrityError, transaction
+from django.db import IntegrityError, transaction
from django.template.defaultfilters import slugify
from django.utils.translation import ugettext_lazy as _
-from ishtar_common.unicode_csv import UnicodeWriter
-
NEW_LINE_BREAK = '#####@@@#####'
RE_FILTER_CEDEX = re.compile("(.*) *(?: *CEDEX|cedex|Cedex|Cédex|cédex *\d*)")
+
class ImportFormater(object):
def __init__(self, field_name, formater=None, required=True, through=None,
- through_key=None, through_dict=None, through_unicity_keys=None,
- duplicate_fields=[], regexp=None, regexp_formater_args=[],
- force_value=None, post_processing=False, concat=False, comment=""):
+ through_key=None, through_dict=None,
+ through_unicity_keys=None, duplicate_fields=[], regexp=None,
+ regexp_formater_args=[], force_value=None,
+ post_processing=False, concat=False, comment=""):
self.field_name = field_name
self.formater = formater
self.required = required
@@ -63,7 +68,8 @@ class ImportFormater(object):
def report_error(self, *args):
return
- def init(self, vals, output=None, choose_default=False, import_instance=None):
+ def init(self, vals, output=None, choose_default=False,
+ import_instance=None):
try:
lst = iter(self.formater)
except TypeError:
@@ -77,9 +83,11 @@ class ImportFormater(object):
def post_process(self, obj, context, value, owner=None):
raise NotImplemented()
+
class ImporterError(Exception):
STANDARD = 'S'
HEADER = 'H'
+
def __init__(self, message, type='S'):
self.msg = message
self.type = type
@@ -87,6 +95,7 @@ class ImporterError(Exception):
def __str__(self):
return self.msg
+
class Formater(object):
def __init__(self, *args, **kwargs):
self.db_target = kwargs.get('db_target', None)
@@ -98,6 +107,7 @@ class Formater(object):
import_instance=None):
return
+
class ChoiceChecker(object):
def report_new(self, comment):
if not self.new_keys:
@@ -108,6 +118,7 @@ class ChoiceChecker(object):
msg = u'"%s";"%s"\n' % (k, self.new_keys[k])
sys.stderr.write(msg.encode('utf-8'))
+
class UnicodeFormater(Formater):
def __init__(self, max_length, clean=False, re_filter=None, notnull=False,
db_target=None):
@@ -135,13 +146,15 @@ class UnicodeFormater(Formater):
except UnicodeDecodeError:
return
if len(value) > self.max_length:
- raise ValueError(_(u"\"%(value)s\" is too long. "\
- u"The max length is %(length)d characters."
- ) % {'value':value, 'length':self.max_length})
+ raise ValueError(
+ _(u"\"%(value)s\" is too long. The max length is %(length)d "
+ u"characters.") % {'value': value,
+ 'length': self.max_length})
if self.notnull and not value:
return
return value
+
class BooleanFormater(Formater):
def format(self, value):
value = value.strip().upper()
@@ -150,7 +163,8 @@ class BooleanFormater(Formater):
if value in ('', '0', 'NON', 'FAUX', 'NO', 'FALSE'):
return False
raise ValueError(_(u"\"%(value)s\" not equal to yes or no") % {
- 'value':value})
+ 'value': value})
+
class FloatFormater(Formater):
def format(self, value):
@@ -161,7 +175,8 @@ class FloatFormater(Formater):
return float(value)
except ValueError:
raise ValueError(_(u"\"%(value)s\" is not a float") % {
- 'value':value})
+ 'value': value})
+
class YearFormater(Formater):
def format(self, value):
@@ -173,7 +188,8 @@ class YearFormater(Formater):
assert value > 0 and value < (datetime.date.today().year + 30)
except (ValueError, AssertionError):
raise ValueError(_(u"\"%(value)s\" is not a valid date") % {
- 'value':value})
+ 'value': value})
+
class YearNoFuturFormater(Formater):
def format(self, value):
@@ -185,7 +201,8 @@ class YearNoFuturFormater(Formater):
assert value > 0 and value < (datetime.date.today().year)
except (ValueError, AssertionError):
raise ValueError(_(u"\"%(value)s\" is not a valid date") % {
- 'value':value})
+ 'value': value})
+
class IntegerFormater(Formater):
def format(self, value):
@@ -196,7 +213,8 @@ class IntegerFormater(Formater):
return int(value)
except ValueError:
raise ValueError(_(u"\"%(value)s\" is not an integer") % {
- 'value':value})
+ 'value': value})
+
class StrChoiceFormater(Formater, ChoiceChecker):
def __init__(self, choices, strict=False, equiv_dict={}, model=None,
@@ -237,15 +255,15 @@ class StrChoiceFormater(Formater, ChoiceChecker):
def _get_choices(self, comment=''):
msgstr = comment + u" - "
- msgstr += unicode(_(u"Choice for \"%s\" is not available. "\
- u"Which one is relevant?\n"))
+ msgstr += unicode(_(u"Choice for \"%s\" is not available. "
+ u"Which one is relevant?\n"))
idx = -1
for idx, choice in enumerate(self.choices):
- msgstr += u"%d. %s\n" % (idx+1, choice[1])
+ msgstr += u"%d. %s\n" % (idx + 1, choice[1])
idx += 2
if self.create:
msgstr += unicode(_(u"%d. None of the above - create new")) % idx \
- + u"\n"
+ + u"\n"
idx += 1
msgstr += unicode(_(u"%d. None of the above - skip")) % idx + u"\n"
return msgstr, idx
@@ -272,7 +290,7 @@ class StrChoiceFormater(Formater, ChoiceChecker):
res = None
if choose_default:
res = 1
- while res not in range(1, idx+1):
+ while res not in range(1, idx + 1):
msg = msgstr % value
sys.stdout.write(msg.encode('utf-8'))
sys.stdout.write("\n>>> ")
@@ -299,8 +317,8 @@ class StrChoiceFormater(Formater, ChoiceChecker):
if output == 'db' and self.db_target:
from ishtar_common.models import TargetKey
for missing in self.missings:
- q = {'target':self.db_target, 'key':missing,
- 'associated_import':import_instance}
+ q = {'target': self.db_target, 'key': missing,
+ 'associated_import': import_instance}
if TargetKey.objects.filter(**q).count():
continue
with transaction.commit_on_success():
@@ -324,6 +342,7 @@ class StrChoiceFormater(Formater, ChoiceChecker):
if value in self.equiv_dict:
return self.equiv_dict[value]
+
class TypeFormater(StrChoiceFormater):
def __init__(self, model, cli=False, defaults={}, many_split=False,
db_target=None):
@@ -359,6 +378,7 @@ class TypeFormater(StrChoiceFormater):
values['order'] = order
return self.model.objects.create(**values)
+
class DateFormater(Formater):
def __init__(self, date_formats=["%d/%m/%Y"], db_target=None):
self.date_formats = date_formats
@@ -376,13 +396,14 @@ class DateFormater(Formater):
except:
continue
raise ValueError(_(u"\"%(value)s\" is not a valid date") % {
- 'value':value})
+ 'value': value})
+
class StrToBoolean(Formater, ChoiceChecker):
def __init__(self, choices={}, cli=False, strict=False, db_target=None):
self.dct = copy.copy(choices)
self.cli = cli
- self.strict= strict
+ self.strict = strict
self.db_target = db_target
self.missings = set()
if self.db_target:
@@ -412,8 +433,9 @@ class StrToBoolean(Formater, ChoiceChecker):
if (not output or output == 'silent') and not choose_default:
return
msgstr = comment + u" - "
- msgstr += unicode(_(u"Choice for \"%s\" is not available. "\
- u"Which one is relevant?\n"))
+ msgstr += unicode(_(
+ u"Choice for \"%s\" is not available. "
+ u"Which one is relevant?\n"))
msgstr += u"1. True\n"
msgstr += u"2. False\n"
msgstr += u"3. Empty\n"
@@ -447,8 +469,8 @@ class StrToBoolean(Formater, ChoiceChecker):
from ishtar_common.models import TargetKey
for missing in self.missings:
try:
- q = {'target':self.db_target, 'key':missing,
- 'associated_import':import_instance}
+ q = {'target': self.db_target, 'key': missing,
+ 'associated_import': import_instance}
if not TargetKey.objects.filter(**q).count():
TargetKey.objects.create(**q)
except IntegrityError:
@@ -463,6 +485,7 @@ class StrToBoolean(Formater, ChoiceChecker):
logger = logging.getLogger(__name__)
+
def get_object_from_path(obj, path):
for k in path.split('__')[:-1]:
if not hasattr(obj, k):
@@ -470,6 +493,7 @@ def get_object_from_path(obj, path):
obj = getattr(obj, k)
return obj
+
class Importer(object):
DESC = ""
LINE_FORMAT = []
@@ -480,21 +504,22 @@ class Importer(object):
DEFAULTS = {}
STR_CUT = {}
ERRORS = {
- 'header_check':_(u"The given file is not correct. Check the file "
- u"format. If you use a CSV file: check that column separator "
- u"and encoding are similar to the ones used by the reference "
- u"file."),
- 'too_many_cols':_(u"Too many cols (%(user_col)d) when "
- u"maximum is %(ref_col)d"),
- 'no_data':_(u"No data provided"),
- 'value_required':_(u"Value is required"),
- 'not_enough_cols':_(u"At least %d columns must be filled"),
- 'regex_not_match':_(u"The regexp doesn't match.")
- }
+ 'header_check': _(
+ u"The given file is not correct. Check the file "
+ u"format. If you use a CSV file: check that column separator "
+ u"and encoding are similar to the ones used by the reference "
+ u"file."),
+ 'too_many_cols': _(u"Too many cols (%(user_col)d) when "
+ u"maximum is %(ref_col)d"),
+ 'no_data': _(u"No data provided"),
+ 'value_required': _(u"Value is required"),
+ 'not_enough_cols': _(u"At least %d columns must be filled"),
+ 'regex_not_match': _(u"The regexp doesn't match.")
+ }
def __init__(self, skip_lines=0, reference_header=None,
- check_col_num=False, test=False, history_modifier=None,
- output='silent', import_instance=None):
+ check_col_num=False, test=False, history_modifier=None,
+ output='silent', import_instance=None):
"""
* skip_line must be set if the data provided has got headers lines.
* a reference_header can be provided to perform a data compliance
@@ -504,8 +529,8 @@ class Importer(object):
self.skip_lines = skip_lines
self.reference_header = reference_header
self.test = test
- self.errors = [] # list of (line, col, message)
- self.validity = [] # list of (line, col, message)
+ self.errors = [] # list of (line, col, message)
+ self.validity = [] # list of (line, col, message)
self.number_updated = 0
self.number_created = 0
self.check_col_num = check_col_num
@@ -526,7 +551,7 @@ class Importer(object):
else:
# import made by the CLI: get the first admin
self.history_modifier = User.objects.filter(
- is_superuser=True).order_by('pk')[0]
+ is_superuser=True).order_by('pk')[0]
def post_processing(self, item, data):
return item
@@ -537,8 +562,8 @@ class Importer(object):
* output:
- 'silent': no associations
- 'cli': output by command line interface and stocked in the database
- - 'db': output on the database with no interactive association (further
- exploitation by web interface)
+ - 'db': output on the database with no interactive association
+ (further exploitation by web interface)
"""
assert output in ('silent', 'cli', 'db')
vals = []
@@ -571,12 +596,12 @@ class Importer(object):
keys = field_name.split('__')
current_data = data
for idx, key in enumerate(keys):
- if idx == (len(keys) - 1): # last
+ if idx == (len(keys) - 1): # last
if concat:
if not value:
value = ""
current_data[key] = (current_data[key] + u"\n") or u""\
- + value
+ + value
elif force_value and value:
current_data[key] = value
elif key not in current_data or not current_data[key]:
@@ -592,7 +617,7 @@ class Importer(object):
raise ImporterError(self.ERRORS['no_data'], ImporterError.HEADER)
if self.check_col_num and len(table[0]) > len(self.line_format):
raise ImporterError(self.ERRORS['too_many_cols'] % {
- 'user_col':len(table[0]), 'ref_col':len(self.line_format)})
+ 'user_col': len(table[0]), 'ref_col': len(self.line_format)})
self.errors = []
self.validity = []
self.number_imported = 0
@@ -620,10 +645,10 @@ class Importer(object):
left = None
if idx_line > 10:
ellapsed = datetime.datetime.now() - start
- time_by_item = ellapsed/idx_line
+ time_by_item = ellapsed / idx_line
if time_by_item:
- left = ((total - idx_line)*time_by_item).seconds
- txt = u"\r* %d/%d" % (idx_line+1, total)
+ left = ((total - idx_line) * time_by_item).seconds
+ txt = u"\r* %d/%d" % (idx_line + 1, total)
if left:
txt += u" (%d seconds left)" % left
sys.stdout.write(txt.encode('utf-8'))
@@ -640,8 +665,8 @@ class Importer(object):
if not line:
self.validity.append([])
return
- self._throughs = [] # list of (formater, value)
- self._post_processing = [] # list of (formater, value)
+ self._throughs = [] # list of (formater, value)
+ self._post_processing = [] # list of (formater, value)
data = {}
# keep in database the raw line for testing purpose
@@ -652,7 +677,8 @@ class Importer(object):
data[self.IMPORTED_LINE_FIELD] = output.getvalue()
n = datetime.datetime.now()
- logger.debug('%s - Processing line %d' % (unicode(n-self.now), idx_line))
+ logger.debug('%s - Processing line %d' % (unicode(n - self.now),
+ idx_line))
self.now = n
n2 = n
self.c_errors = False
@@ -666,12 +692,13 @@ class Importer(object):
self.validity.append(c_row)
if not self.c_errors and (idx_col + 1) < self.min_col_number:
self.c_errors = True
- self.errors.append((idx_line+1, idx_col+1,
- self.ERRORS['not_enough_cols'] % self.min_col_number))
+ self.errors.append((
+ idx_line + 1, idx_col + 1,
+ self.ERRORS['not_enough_cols'] % self.min_col_number))
if self.c_errors:
return
n = datetime.datetime.now()
- logger.debug('* %s - Cols read' % (unicode(n-n2)))
+ logger.debug('* %s - Cols read' % (unicode(n - n2)))
n2 = n
if self.test:
return
@@ -684,7 +711,7 @@ class Importer(object):
data['defaults'][k] = data.pop(k)
if 'history_modifier' in \
- self.OBJECT_CLS._meta.get_all_field_names():
+ self.OBJECT_CLS._meta.get_all_field_names():
data['history_modifier'] = self.history_modifier
obj, created = self.get_object(self.OBJECT_CLS, data)
@@ -702,12 +729,12 @@ class Importer(object):
setattr(obj, k, data['defaults'][k])
obj.save()
n = datetime.datetime.now()
- logger.debug('* %s - Item saved' % (unicode(n-n2)))
+ logger.debug('* %s - Item saved' % (unicode(n - n2)))
n2 = n
for formater, value in self._throughs:
n = datetime.datetime.now()
- logger.debug('* %s - Processing formater %s' % (unicode(n-n2),
- formater.field_name))
+ logger.debug('* %s - Processing formater %s' % (unicode(n - n2),
+ formater.field_name))
n2 = n
data = {}
if formater.through_dict:
@@ -756,15 +783,17 @@ class Importer(object):
match = formater.regexp.match(val)
if not match:
if formater.required:
- self.errors.append((idx_line+1, idx_col+1,
- self.ERRORS['value_required']))
+ self.errors.append(
+ (idx_line + 1, idx_col + 1,
+ self.ERRORS['value_required']))
elif not val.strip():
c_row.append("")
return
self.c_errors = True
val = val.replace(NEW_LINE_BREAK, '\n')
- self.errors.append((idx_line+1, idx_col+1,
- unicode(self.ERRORS['regex_not_match']) + val))
+ self.errors.append(
+ (idx_line + 1, idx_col + 1,
+ unicode(self.ERRORS['regex_not_match']) + val))
c_row.append("")
return
val_group = [v.replace(NEW_LINE_BREAK, '\n')
@@ -798,7 +827,7 @@ class Importer(object):
except ValueError, e:
if formater.required:
self.c_errors = True
- self.errors.append((idx_line+1, idx_col+1, e.message))
+ self.errors.append((idx_line + 1, idx_col + 1, e.message))
c_values.append('')
return
formated_values.append(value)
@@ -814,10 +843,11 @@ class Importer(object):
# don't reunicode - unicoded values
c_values.append(u" ; ".join([v for v in printed_values]))
except TypeError:
- c_values.append(u" ; ".join([unicode(v) for v in printed_values]))
- if value == None and formater.required:
+ c_values.append(u" ; ".join([unicode(v)
+ for v in printed_values]))
+ if value is None and formater.required:
self.c_errors = True
- self.errors.append((idx_line+1, idx_col+1,
+ self.errors.append((idx_line + 1, idx_col + 1,
self.ERRORS['value_required']))
return
@@ -834,8 +864,8 @@ class Importer(object):
self._throughs.append((formater, value))
else:
for field_name in field_names:
- self._field_name_to_data_dict(field_name,
- value, data, formater.force_value)
+ self._field_name_to_data_dict(
+ field_name, value, data, formater.force_value)
c_row.append(u" ; ".join([v for v in c_values]))
def get_object(self, cls, data, path=[]):
@@ -846,7 +876,7 @@ class Importer(object):
if not data[attribute]:
continue
field_object, model, direct, m2m = \
- cls._meta.get_field_by_name(attribute)
+ cls._meta.get_field_by_name(attribute)
if m2m:
many_values = data.pop(attribute)
if hasattr(field_object, 'rel'):
@@ -865,12 +895,12 @@ class Importer(object):
# contruct many dict for each values
default_dict = {}
- ## init with simple values that will be duplicated
+ # # init with simple values that will be duplicated
for key in val.keys():
if type(val[key]) not in (list, tuple):
default_dict[key] = val[key]
vals.append(default_dict.copy())
- ## manage multiple values
+ # # manage multiple values
for key in val.keys():
if type(val[key]) in (list, tuple):
for idx, v in enumerate(val[key]):
@@ -893,23 +923,23 @@ class Importer(object):
if 'history_modifier' in \
model._meta.get_all_field_names():
v['defaults']['history_modifier'] = \
- self.history_modifier
+ self.history_modifier
v, created = model.objects.get_or_create(
- **v)
+ **v)
if self.import_instance \
and hasattr(v, 'imports') and created:
v.imports.add(self.import_instance)
m2ms.append((attribute, v))
elif hasattr(field_object, 'rel') and field_object.rel and \
- type(data[attribute]) == dict:
+ type(data[attribute]) == dict:
c_path.append(attribute)
# put history_modifier for every created item
if 'history_modifier' in \
field_object.rel.to._meta.get_all_field_names():
data[attribute]['history_modifier'] = \
- self.history_modifier
+ self.history_modifier
data[attribute], created = self.get_object(
- field_object.rel.to, data[attribute], c_path)
+ field_object.rel.to, data[attribute], c_path)
# default values
path = tuple(path)
if path in self._defaults:
@@ -931,7 +961,8 @@ class Importer(object):
defaults = {}
if 'history_modifier' in create_dict:
defaults = {
- 'history_modifier':create_dict.pop('history_modifier')}
+ 'history_modifier': create_dict.pop('history_modifier')
+ }
try:
try:
dct = create_dict.copy()
@@ -954,7 +985,7 @@ class Importer(object):
except IntegrityError as e:
message = e.message
try:
- message = unicode(e.message.decode('utf-8'))
+ message = e.message.decode('utf-8')
except (UnicodeDecodeError, UnicodeDecodeError):
message = ''
try:
@@ -962,8 +993,8 @@ class Importer(object):
except UnicodeDecodeError:
data = ''
raise ImporterError(
- "Erreur d'import %s, contexte : %s, erreur : %s" \
- % (unicode(cls), unicode(data), e.message.decode('utf-8')))
+ "Erreur d'import %s, contexte : %s, erreur : %s"
+ % (unicode(cls), unicode(data), message))
return obj, created
return data
@@ -982,8 +1013,8 @@ class Importer(object):
return u"\n".join(csv_v)
def get_csv_errors(self):
- return self._get_csv(self.errors,
- header=[_("line"), _("col"), _("error")])
+ return self._get_csv(
+ self.errors, header=[_("line"), _("col"), _("error")])
def get_csv_result(self):
return self._get_csv(self.validity)
@@ -996,10 +1027,11 @@ class Importer(object):
if not value:
return
if value not in choices_dct.values():
- raise ValueError(_(u"\"%(value)s\" not in %(values)s") % {
- 'value':value,
- 'values':u", ".join([val for val in choices_dct.values()])
+ raise ValueError(
+ _(u"\"%(value)s\" not in %(values)s") % {
+ 'value': value,
+ 'values': u", ".join([val
+ for val in choices_dct.values()])
})
return value
return function
-