summaryrefslogtreecommitdiff
path: root/ishtar_common/data_importer.py
diff options
context:
space:
mode:
Diffstat (limited to 'ishtar_common/data_importer.py')
-rw-r--r--ishtar_common/data_importer.py149
1 files changed, 110 insertions, 39 deletions
diff --git a/ishtar_common/data_importer.py b/ishtar_common/data_importer.py
index 27714458b..992025bbb 100644
--- a/ishtar_common/data_importer.py
+++ b/ishtar_common/data_importer.py
@@ -63,14 +63,15 @@ class ImportFormater(object):
def report_error(self, *args):
return
- def init(self, vals, output=None, import_instance=None):
+ def init(self, vals, output=None, choose_default=False, import_instance=None):
try:
lst = iter(self.formater)
except TypeError:
lst = [self.formater]
for formater in lst:
if formater:
- formater.check(vals, output,
+ formater.check(vals, output, self.comment,
+ choose_default=choose_default,
import_instance=import_instance)
def post_process(self, obj, context, value, owner=None):
@@ -93,9 +94,20 @@ class Formater(object):
def format(self, value):
return value
- def check(self, values, output=None, import_instance=None):
+ def check(self, values, output=None, comment='', choose_default=False,
+ import_instance=None):
return
+class ChoiceChecker(object):
+ def report_new(self, comment):
+ if not self.new_keys:
+ return
+ msg = u"For \"%s\" these new associations have been made:\n" % comment
+ sys.stderr.write(msg.encode('utf-8'))
+ for k in self.new_keys:
+ msg = u'"%s";"%s"\n' % (k, self.new_keys[k])
+ sys.stderr.write(msg.encode('utf-8'))
+
class UnicodeFormater(Formater):
def __init__(self, max_length, clean=False, re_filter=None, notnull=False,
db_target=None):
@@ -186,7 +198,7 @@ class IntegerFormater(Formater):
raise ValueError(_(u"\"%(value)s\" is not an integer") % {
'value':value})
-class StrChoiceFormater(Formater):
+class StrChoiceFormater(Formater, ChoiceChecker):
def __init__(self, choices, strict=False, equiv_dict={}, model=None,
cli=False, many_split='', db_target=None):
self.choices = list(choices)
@@ -197,6 +209,7 @@ class StrChoiceFormater(Formater):
self.db_target = db_target
self.create = False
self.missings = set()
+ self.new_keys = {}
self.many_split = many_split
for key, value in self.choices:
value = unicode(value)
@@ -222,8 +235,9 @@ class StrChoiceFormater(Formater):
def prepare(self, value):
return unicode(value).strip()
- def _get_choices(self):
- msgstr = unicode(_(u"Choice for \"%s\" is not available. "\
+ def _get_choices(self, comment=''):
+ msgstr = comment + u" - "
+ msgstr += unicode(_(u"Choice for \"%s\" is not available. "\
u"Which one is relevant?\n"))
idx = -1
for idx, choice in enumerate(self.choices):
@@ -236,8 +250,9 @@ class StrChoiceFormater(Formater):
msgstr += unicode(_(u"%d. None of the above - skip")) % idx + u"\n"
return msgstr, idx
- def check(self, values, output=None, import_instance=None):
- if not output or output == 'silent':
+ def check(self, values, output=None, comment='', choose_default=False,
+ import_instance=None):
+ if (not output or output == 'silent') and not choose_default:
return
if self.many_split:
new_values = []
@@ -250,14 +265,18 @@ class StrChoiceFormater(Formater):
value = self.prepare(value)
if value in self.equiv_dict:
continue
- if output != 'cli':
+ if output != 'cli' and not choose_default:
self.missings.add(value)
continue
- msgstr, idx = self._get_choices()
+ msgstr, idx = self._get_choices(comment)
res = None
+ if choose_default:
+ res = 1
while res not in range(1, idx+1):
- sys.stdout.write(msgstr % value)
- res = raw_input(">>> ")
+ msg = msgstr % value
+ sys.stdout.write(msg.encode('utf-8'))
+ sys.stdout.write("\n>>> ")
+ res = raw_input()
try:
res = int(res)
except ValueError:
@@ -269,10 +288,12 @@ class StrChoiceFormater(Formater):
v = self.model.objects.get(pk=v)
self.equiv_dict[value] = v
self.add_key(v, value)
+ self.new_keys[value] = v
elif self.create and res == len(self.choices):
self.equiv_dict[value] = self.new(base_value)
self.choices.append((self.equiv_dict[value].pk,
unicode(self.equiv_dict[value])))
+ self.new_keys[value] = unicode(self.equiv_dict[value])
else:
self.equiv_dict[value] = None
if output == 'db' and self.db_target:
@@ -287,6 +308,8 @@ class StrChoiceFormater(Formater):
TargetKey.objects.create(**q)
except IntegrityError:
pass
+ if output == 'cli':
+ self.report_new(comment)
def new(self, value):
return
@@ -312,6 +335,7 @@ class TypeFormater(StrChoiceFormater):
self.db_target = db_target
self.missings = set()
self.equiv_dict, self.choices = {}, []
+ self.new_keys = {}
for item in model.objects.all():
self.choices.append((item.pk, unicode(item)))
for key in item.get_keys():
@@ -336,21 +360,25 @@ class TypeFormater(StrChoiceFormater):
return self.model.objects.create(**values)
class DateFormater(Formater):
- def __init__(self, date_format="%d/%m/%Y", db_target=None):
- self.date_format = date_format
+ def __init__(self, date_formats=["%d/%m/%Y"], db_target=None):
+ self.date_formats = date_formats
+ if type(date_formats) not in (list, tuple):
+ self.date_formats = [self.date_formats]
self.db_target = db_target
def format(self, value):
value = value.strip()
if not value:
return
- try:
- return datetime.datetime.strptime(value, self.date_format).date()
- except:
- raise ValueError(_(u"\"%(value)s\" is not a valid date") % {
+ for date_format in self.date_formats:
+ try:
+ return datetime.datetime.strptime(value, date_format).date()
+ except:
+ continue
+ raise ValueError(_(u"\"%(value)s\" is not a valid date") % {
'value':value})
-class StrToBoolean(Formater):
+class StrToBoolean(Formater, ChoiceChecker):
def __init__(self, choices={}, cli=False, strict=False, db_target=None):
self.dct = copy.copy(choices)
self.cli = cli
@@ -371,6 +399,7 @@ class StrToBoolean(Formater):
else:
v = None
self.dct[value] = v
+ self.new_keys = {}
def prepare(self, value):
value = unicode(value).strip()
@@ -378,10 +407,12 @@ class StrToBoolean(Formater):
value = slugify(value)
return value
- def check(self, values, output=None, import_instance=None):
- if not output or output == 'silent':
+ def check(self, values, output=None, comment='', choose_default=False,
+ import_instance=None):
+ if (not output or output == 'silent') and not choose_default:
return
- msgstr = unicode(_(u"Choice for \"%s\" is not available. "\
+ msgstr = comment + u" - "
+ msgstr += unicode(_(u"Choice for \"%s\" is not available. "\
u"Which one is relevant?\n"))
msgstr += u"1. True\n"
msgstr += u"2. False\n"
@@ -390,13 +421,17 @@ class StrToBoolean(Formater):
value = self.prepare(value)
if value in self.dct:
continue
- if not self.cli:
+ if output != 'cli' and not choose_default:
self.missings.add(value)
continue
res = None
+ if choose_default:
+ res = 1
while res not in range(1, 4):
- sys.stdout.write(msgstr % value)
- res = raw_input(">>> ")
+ msg = msgstr % value
+ sys.stdout.write(msg.encode('utf-8'))
+ sys.stdout.write("\n>>> ")
+ res = raw_input()
try:
res = int(res)
except ValueError:
@@ -407,15 +442,19 @@ class StrToBoolean(Formater):
self.dct[value] = False
else:
self.dct[value] = None
+ self.new_keys[value] = unicode(self.dct[value])
if output == 'db' and self.db_target:
from ishtar_common.models import TargetKey
for missing in self.missings:
try:
q = {'target':self.db_target, 'key':missing,
'associated_import':import_instance}
- TargetKey.objects.create(**q)
+ if not TargetKey.objects.filter(**q).count():
+ TargetKey.objects.create(**q)
except IntegrityError:
pass
+ if output == 'cli':
+ self.report_new(comment)
def format(self, value):
value = self.prepare(value)
@@ -424,13 +463,22 @@ class StrToBoolean(Formater):
logger = logging.getLogger(__name__)
+def get_object_from_path(obj, path):
+ for k in path.split('__')[:-1]:
+ if not hasattr(obj, k):
+ return
+ obj = getattr(obj, k)
+ return obj
+
class Importer(object):
DESC = ""
LINE_FORMAT = []
OBJECT_CLS = None
IMPORTED_LINE_FIELD = None
UNICITY_KEYS = []
+ EXTRA_DEFAULTS = {}
DEFAULTS = {}
+ STR_CUT = {}
ERRORS = {
'header_check':_(u"The given file is not correct. Check the file "
u"format. If you use a CSV file: check that column separator "
@@ -464,6 +512,12 @@ class Importer(object):
self.line_format = copy.copy(self.LINE_FORMAT)
self.import_instance = import_instance
self._defaults = self.DEFAULTS.copy()
+ # EXTRA_DEFAULTS are for multiple inheritance
+ if self.EXTRA_DEFAULTS:
+ for k in self.EXTRA_DEFAULTS:
+ if k not in self._defaults:
+ self._defaults[k] = {}
+ self._defaults[k].update(self.EXTRA_DEFAULTS[k])
self.history_modifier = history_modifier
self.output = output
if not self.history_modifier:
@@ -474,7 +528,10 @@ class Importer(object):
self.history_modifier = User.objects.filter(
is_superuser=True).order_by('pk')[0]
- def initialize(self, table, output='silent'):
+ def post_processing(self, item, data):
+ return item
+
+ def initialize(self, table, output='silent', choose_default=False):
"""
copy vals in columns and initialize formaters
* output:
@@ -496,12 +553,12 @@ class Importer(object):
vals[idx_col].append(val)
for idx, formater in enumerate(self.line_format):
if formater and idx < len(vals):
- formater.init(vals[idx], output,
+ formater.init(vals[idx], output, choose_default=choose_default,
import_instance=self.import_instance)
- def importation(self, table, initialize=True):
+ def importation(self, table, initialize=True, choose_default=False):
if initialize:
- self.initialize(table, self.output)
+ self.initialize(table, self.output, choose_default=choose_default)
self._importation(table)
@classmethod
@@ -566,10 +623,10 @@ class Importer(object):
time_by_item = ellapsed/idx_line
if time_by_item:
left = ((total - idx_line)*time_by_item).seconds
- txt = "\r* %d/%d" % (idx_line+1, total)
+ txt = u"\r* %d/%d" % (idx_line+1, total)
if left:
- txt += " (%d seconds left)" % left
- sys.stdout.write(txt)
+ txt += u" (%d seconds left)" % left
+ sys.stdout.write(txt.encode('utf-8'))
sys.stdout.flush()
try:
self._line_processing(idx_line, line)
@@ -629,8 +686,8 @@ class Importer(object):
if 'history_modifier' in \
self.OBJECT_CLS._meta.get_all_field_names():
data['history_modifier'] = self.history_modifier
- obj, created = self.get_object(self.OBJECT_CLS, data)
+ obj, created = self.get_object(self.OBJECT_CLS, data)
if self.import_instance and hasattr(obj, 'imports') \
and created:
obj.imports.add(self.import_instance)
@@ -677,6 +734,8 @@ class Importer(object):
for formater, val in self._post_processing:
formater.post_process(obj, data, val, owner=self.history_modifier)
+ obj = self.post_processing(obj, data)
+
def _row_processing(self, c_row, idx_col, idx_line, val, data):
if idx_col >= len(self.line_format):
return
@@ -851,12 +910,19 @@ class Importer(object):
self.history_modifier
data[attribute], created = self.get_object(
field_object.rel.to, data[attribute], c_path)
+ # default values
path = tuple(path)
if path in self._defaults:
for k in self._defaults[path]:
if k not in data or not data[k]:
data[k] = self._defaults[path][k]
+ # pre treatment
+ if path in self.STR_CUT:
+ for k in self.STR_CUT[path]:
+ if k in data and data[k]:
+ data[k] = unicode(data[k])[:self.STR_CUT[path][k]]
+
# filter default values
create_dict = copy.deepcopy(data)
for k in create_dict.keys():
@@ -876,11 +942,7 @@ class Importer(object):
obj.imports.add(self.import_instance)
except IntegrityError as e:
raise IntegrityError(e.message)
- except:
- q = cls.objects.filter(**create_dict)
- if not q.count():
- raise ImporterError("Erreur d'import %s, contexte : %s"\
- % (unicode(cls), unicode(data)))
+ except cls.MultipleObjectsReturned:
created = False
obj = cls.objects.filter(**create_dict).all()[0]
for attr, value in m2ms:
@@ -890,6 +952,15 @@ class Importer(object):
for v in values:
getattr(obj, attr).add(v)
except IntegrityError as e:
+ message = e.message
+ try:
+ message = unicode(e.message.decode('utf-8'))
+ except (UnicodeDecodeError, UnicodeDecodeError):
+ message = ''
+ try:
+ data = unicode(data)
+ except UnicodeDecodeError:
+ data = ''
raise ImporterError(
"Erreur d'import %s, contexte : %s, erreur : %s" \
% (unicode(cls), unicode(data), e.message.decode('utf-8')))