summaryrefslogtreecommitdiff
path: root/ishtar_common/data_importer.py
diff options
context:
space:
mode:
authorÉtienne Loks <etienne.loks@proxience.com>2015-10-20 00:54:55 +0200
committerÉtienne Loks <etienne.loks@proxience.com>2015-10-20 00:54:55 +0200
commit3a83c9d3636f7f3f9f3d2580de43cf47d598d4f6 (patch)
tree33a236a768d6f6a8082dd9cc20686515867f8a05 /ishtar_common/data_importer.py
parent87afa869477c69bbb3c36083fea076e47d565d4c (diff)
downloadIshtar-3a83c9d3636f7f3f9f3d2580de43cf47d598d4f6.tar.bz2
Ishtar-3a83c9d3636f7f3f9f3d2580de43cf47d598d4f6.zip
Imports: improve data matching
Diffstat (limited to 'ishtar_common/data_importer.py')
-rw-r--r--ishtar_common/data_importer.py127
1 files changed, 108 insertions, 19 deletions
diff --git a/ishtar_common/data_importer.py b/ishtar_common/data_importer.py
index 0a63656ab..972d286ed 100644
--- a/ishtar_common/data_importer.py
+++ b/ishtar_common/data_importer.py
@@ -64,6 +64,19 @@ class ImportFormater(object):
self.comment = comment
self.force_new = force_new
+ def reinit_db_target(self, db_targets):
+ if not self.formater:
+ return
+ elif type(self.formater) not in (list, tuple):
+ self.formater.db_target = db_targets[0]
+ else:
+ for idx, formater in enumerate(self.formater):
+ formater.db_target = db_targets[idx]
+ formater.init_db_target()
+
+ def init_db_target(self):
+ pass
+
def __unicode__(self):
return self.field_name
@@ -112,6 +125,9 @@ class Formater(object):
import_instance=None):
return
+ def init_db_target(self):
+ pass
+
class ChoiceChecker(object):
def report_new(self, comment):
@@ -243,17 +259,21 @@ class StrChoiceFormater(Formater, ChoiceChecker):
if model and v:
v = model.objects.get(pk=v)
self.equiv_dict[value] = v
- if self.db_target:
- for target_key in self.db_target.keys.filter(is_set=True).all():
- value = target_key.value
- if not self.strict:
- value = slugify(value)
- if value in self.equiv_dict:
- continue
- v = target_key.key
- if model and v:
- v = model.objects.get(pk=v)
- self.equiv_dict[value] = v
+ self.init_db_target()
+
+ def init_db_target(self):
+ if not self.db_target:
+ return
+ for target_key in self.db_target.keys.filter(is_set=True).all():
+ value = target_key.value
+ if not self.strict:
+ value = slugify(value)
+ if value in self.equiv_dict:
+ continue
+ v = target_key.key
+ if self.model and v:
+ v = self.model.objects.get(pk=v)
+ self.equiv_dict[value] = v
def prepare(self, value):
return unicode(value).strip()
@@ -275,6 +295,26 @@ class StrChoiceFormater(Formater, ChoiceChecker):
def check(self, values, output=None, comment='', choose_default=False,
import_instance=None):
+ from ishtar_common.models import TargetKey
+ if self.db_target:
+ q = {'target': self.db_target,
+ 'associated_import': import_instance,
+ 'is_set': True
+ }
+ for v in self.equiv_dict:
+ q['key'] = v
+ value = self.equiv_dict[v]
+ if hasattr(value, 'txt_idx'):
+ value = value.txt_idx
+ elif hasattr(value, 'pk'):
+ value = value.pk
+ q['value'] = value
+ with transaction.commit_on_success():
+ try:
+ t, created = TargetKey.objects.get_or_create(**q)
+ except IntegrityError:
+ pass
+
if (not output or output == 'silent') and not choose_default:
return
if self.many_split:
@@ -454,15 +494,19 @@ class StrToBoolean(Formater, ChoiceChecker):
self.strict = strict
self.db_target = db_target
self.missings = set()
- if self.db_target:
- for target_key in self.db_target.keys.filter(is_set=True).all():
- key = self.prepare(target_key.key)
- if key in self.dct:
- continue
- v = target_key.format()
- self.dct[key] = v
+ self.init_db_target()
self.new_keys = {}
+ def init_db_target(self):
+ if not self.db_target:
+ return
+ for target_key in self.db_target.keys.filter(is_set=True).all():
+ key = self.prepare(target_key.key)
+ if key in self.dct:
+ continue
+ v = target_key.format()
+ self.dct[key] = v
+
def prepare(self, value):
value = unicode(value).strip()
if not self.strict:
@@ -631,7 +675,8 @@ class Importer(object):
models.ImportTarget.objects.get_or_create(
column=column, target=target, formater_type=formater_model,
force_new=getattr(formater, 'force_new', False),
- regexp_filter=regexp_filter)
+ regexp_filter=regexp_filter,
+ comment=line.comment)
return True
def __init__(self, skip_lines=0, reference_header=None,
@@ -654,6 +699,7 @@ class Importer(object):
self.line_format = copy.copy(self.LINE_FORMAT)
self.import_instance = import_instance
self.archive = None
+ self.DB_TARGETS = {}
if import_instance and import_instance.imported_images:
self.archive = import_instance.imported_images
self._defaults = self.DEFAULTS.copy()
@@ -699,6 +745,18 @@ class Importer(object):
vals[idx_col].append(val)
for idx, formater in enumerate(self.line_format):
if formater and idx < len(vals):
+
+ if self.DB_TARGETS:
+ field_names = formater.field_name
+ if type(field_names) not in (list, tuple):
+ field_names = [field_names]
+ db_targets = []
+ for field_name in field_names:
+ db_targets.append(
+ self.DB_TARGETS["{}-{}".format(
+ idx + 1, field_name)])
+ formater.reinit_db_target(db_targets)
+
formater.init(vals[idx], output, choose_default=choose_default,
import_instance=self.import_instance)
@@ -707,6 +765,30 @@ class Importer(object):
self.initialize(table, self.output, choose_default=choose_default)
self._importation(table)
+ def _associate_db_target_to_formaters(self):
+ if not self.import_instance:
+ return
+ self.DB_TARGETS = {}
+ from ishtar_common.models import ImporterColumn, ImportTarget
+ for idx, line in enumerate(self.line_format):
+ idx += 1
+ if not line:
+ continue
+ col = ImporterColumn.objects.get(
+ importer_type=self.import_instance.importer_type,
+ col_number=idx)
+ formater = line.formater
+ targets = line.field_name
+ if type(formater) not in (list, tuple):
+ formater = [formater]
+ targets = [targets]
+ for target in targets:
+ tg = target
+ if type(target) == list and type(target[0]) == list:
+ tg = target[0]
+ self.DB_TARGETS["{}-{}".format(idx, tg)] = \
+ ImportTarget.objects.get(column=col, target=tg)
+
@classmethod
def _field_name_to_data_dict(
cls, field_name, value, data, force_value=False, concat=False,
@@ -942,6 +1024,13 @@ class Importer(object):
if many_values:
values = re.split(func.many_split, values[0])
formated_values = []
+
+ if self.DB_TARGETS:
+ field_name = formater.field_name
+ if type(field_name) in (list, tuple):
+ field_name = field_name[idx_v]
+ func.reinit_db_target(
+ self.DB_TARGETS["{}-{}".format(idx_col, field_name)])
for idx, v in enumerate(values):
value = None
try: