summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorÉtienne Loks <etienne.loks@iggdrasil.net>2017-08-07 21:21:10 +0200
committerÉtienne Loks <etienne.loks@iggdrasil.net>2017-08-07 21:21:10 +0200
commitdc643fdf32e0e27b99dc560f8b2c4107b6cefc19 (patch)
tree79fa78fb06bcb9fa9a4b076f5e2ed1df06c674c5
parent53f1681fbc71096f8fe7ba951e112ff7ead48b8d (diff)
downloadIshtar-dc643fdf32e0e27b99dc560f8b2c4107b6cefc19.tar.bz2
Ishtar-dc643fdf32e0e27b99dc560f8b2c4107b6cefc19.zip
Imports: refactoring of importer - add post process importers set in database
-rw-r--r--ishtar_common/data_importer.py380
1 files changed, 239 insertions, 141 deletions
diff --git a/ishtar_common/data_importer.py b/ishtar_common/data_importer.py
index ebf85f816..9a4435c27 100644
--- a/ishtar_common/data_importer.py
+++ b/ishtar_common/data_importer.py
@@ -41,6 +41,20 @@ NEW_LINE_BREAK = '#####@@@#####'
RE_FILTER_CEDEX = re.compile("(.*) *(?: *CEDEX|cedex|Cedex|Cédex|cédex *\d*)")
+def post_importer_action(func):
+ def wrapper(self, context, value):
+ return func(self, context, value)
+ wrapper.importer_trigger = 'post'
+ return wrapper
+
+
+def pre_importer_action(func):
+ def wrapper(self, context, value):
+ return func(self, context, value)
+ wrapper.importer_trigger = 'pre'
+ return wrapper
+
+
class ImportFormater(object):
def __init__(self, field_name, formater=None, required=True, through=None,
through_key=None, through_dict=None,
@@ -825,6 +839,8 @@ class Importer(object):
for related_key in item.RELATED_POST_PROCESS:
for related in getattr(item, related_key).all():
related.save()
+ for func, context, value in self._item_post_processing:
+ getattr(item, func)(context, value)
return item
def initialize(self, table, output='silent', choose_default=False):
@@ -990,6 +1006,7 @@ class Importer(object):
return
self._throughs = [] # list of (formater, value)
self._post_processing = [] # list of (formater, value)
+ self._item_post_processing = []
data = {}
# keep in database the raw line for testing purpose
@@ -1096,7 +1113,7 @@ class Importer(object):
for formater, val in self._post_processing:
formater.post_process(obj, data, val, owner=self.history_modifier)
- obj = self.post_processing(obj, data)
+ self.post_processing(obj, data)
def _row_processing(self, c_row, idx_col, idx_line, val, data):
if idx_col >= len(self.line_format):
@@ -1239,7 +1256,194 @@ class Importer(object):
concat_str=concat_str[idx])
c_row.append(u" ; ".join([v for v in c_values]))
+ def _get_field_m2m(self, attribute, data, c_path, new_created,
+ field_object):
+ """
+ Manage and m2m field from raw data
+
+ :param attribute: attribute name
+ :param data: current data dictionary
+ :param c_path: attribute path from the main model point of view
+ :param new_created: dict of forced newly created items to prevent
+ multiple creation
+ :param field_object: django field object for this attribute
+ :return: None
+ """
+ m2ms = []
+
+ many_values = data.pop(attribute)
+ if hasattr(field_object, 'rel'):
+ model = field_object.rel.to
+ elif hasattr(field_object, 'to'):
+ model = field_object.to
+ elif hasattr(field_object, 'model'):
+ model = field_object.model
+ if type(many_values) not in (list, tuple):
+ many_values = [many_values]
+
+ for val in many_values:
+ if val.__class__ == model:
+ # the value is a model instance: it is OK!
+ m2ms.append((attribute, val))
+ continue
+ if type(val) != dict:
+ # value is not a dict, we don't know what to do with it...
+ continue
+ vals = []
+
+ # contruct many dict for each values
+ default_dict = {}
+
+ # # get default values
+ p = [attribute]
+ if c_path:
+ p = list(c_path) + p
+ p = tuple(p)
+ if p in self._defaults:
+ for k in self._defaults[p]:
+ default_dict[k] = self._defaults[p][k]
+
+ # # init with simple values that will be duplicated
+ for key in val.keys():
+ if type(val[key]) not in (list, tuple):
+ default_dict[key] = val[key]
+ vals.append(default_dict.copy())
+
+ # # manage multiple values
+ for key in val.keys():
+ if type(val[key]) in (list, tuple):
+ for idx, v in enumerate(val[key]):
+ if len(vals) <= idx:
+ vals.append(default_dict.copy())
+ vals[idx][key] = v
+
+ # check that m2m are not empty
+ notempty = False
+ for dct in vals:
+ for k in dct:
+ if dct[k] not in ("", None):
+ notempty = True
+ break
+ if not notempty:
+ continue
+
+ field_names = model._meta.get_all_field_names()
+ for v in vals:
+ if 'history_modifier' in field_names:
+ if 'defaults' not in v:
+ v['defaults'] = {}
+ v['defaults']['history_modifier'] = \
+ self.history_modifier
+ m2m_m2ms = []
+ c_c_path = c_path[:]
+ for k in v.keys():
+ if k not in field_names:
+ continue
+ self.get_field(model, k, v, m2m_m2ms, c_c_path,
+ new_created)
+ if '__force_new' in v:
+ created = v.pop('__force_new')
+ key = u";".join([u"{}-{}".format(k, v[k])
+ for k in sorted(v.keys())])
+ # only one forced creation
+ if attribute in new_created \
+ and key in new_created[attribute]:
+ continue
+ if attribute not in new_created:
+ new_created[attribute] = []
+ new_created[attribute].append(key)
+ has_values = bool([1 for k in v if v[k]])
+ if has_values:
+ if self.MODEL_CREATION_LIMIT and \
+ model not in self.MODEL_CREATION_LIMIT:
+ raise self._get_improperly_conf_error(model)
+ v = model.objects.create(**v)
+ else:
+ continue
+ else:
+ v['defaults'] = v.get('defaults', {})
+ extra_fields = {}
+ # "File" type is a temp object and can be different
+ # for the same filename - it must be treated
+ # separately
+ for field in model._meta.fields:
+ k = field.name
+ # attr_class is a FileField attribute
+ if hasattr(field, 'attr_class') and k in v:
+ extra_fields[k] = v.pop(k)
+ created = False
+ if not self.MODEL_CREATION_LIMIT or \
+ model in self.MODEL_CREATION_LIMIT:
+ v, created = model.objects.get_or_create(
+ **v)
+ else:
+ get_v = v.copy()
+ if 'defaults' in get_v:
+ get_v.pop('defaults')
+ try:
+ v = model.objects.get(**get_v)
+ except model.DoesNotExist:
+ raise self._get_does_not_exist_in_db_error(
+ model, get_v)
+ changed = False
+ for k in extra_fields.keys():
+ if extra_fields[k]:
+ changed = True
+ setattr(v, k, extra_fields[k])
+ if changed:
+ v.save()
+ for att, objs in m2m_m2ms:
+ if type(objs) not in (list, tuple):
+ objs = [objs]
+ for obj in objs:
+ getattr(v, att).add(obj)
+ if self.import_instance \
+ and hasattr(v, 'imports') and created:
+ v.imports.add(self.import_instance)
+ m2ms.append((attribute, v))
+ return m2ms
+
+ def _set_importer_trigger(self, cls, attribute, data):
+ """
+ An importer trigger is used. Stock it for later execution and remove
+ it from current data dict.
+
+ :param cls: current model
+ :param attribute: attribute name
+ :param data: current data dictionary
+ :return: None
+ """
+ func = getattr(cls, attribute)
+ if func.importer_trigger == 'pre':
+ pass # TODO
+ elif func.importer_trigger == 'post':
+ print("ok!!!!!!")
+ self._item_post_processing.append([attribute, data,
+ data[attribute]])
+ else:
+ logger.warning("Unknow importer_trigger '{}' for '{}'".format(
+ func.importer_trigger, attribute
+ ))
+ data.pop(attribute)
+
def get_field(self, cls, attribute, data, m2ms, c_path, new_created):
+ """
+ Get field from raw data
+
+ :param cls: current model
+ :param attribute: attribute name
+ :param data: current data dictionary
+ :param m2ms: many to many list of tuple: (m2m key, m2m value)
+ :param c_path: attribute path from the main model point of view
+ :param new_created: dict of forced newly created items to prevent
+ multiple creation
+ :return: None
+ """
+ if hasattr(cls, attribute) and \
+ getattr(getattr(cls, attribute), 'importer_trigger', None):
+ # importer trigger
+ self._set_importer_trigger(cls, attribute, data)
+ return
try:
field_object, model, direct, m2m = \
cls._meta.get_field_by_name(attribute)
@@ -1248,154 +1452,48 @@ class Importer(object):
_(u"Importer configuration error: field \"{}\" does not exist "
u"for {}.")).format(attribute, cls._meta.verbose_name))
if m2m:
- many_values = data.pop(attribute)
- if hasattr(field_object, 'rel'):
- model = field_object.rel.to
- elif hasattr(field_object, 'to'):
- model = field_object.to
- elif hasattr(field_object, 'model'):
- model = field_object.model
- if type(many_values) not in (list, tuple):
- many_values = [many_values]
- for val in many_values:
- if val.__class__ == model:
- m2ms.append((attribute, val))
- elif val.__class__ != model and type(val) == dict:
- vals = []
-
- # contruct many dict for each values
- default_dict = {}
-
- # # get default values
- p = [attribute]
- if c_path:
- p = list(c_path) + p
- p = tuple(p)
- if p in self._defaults:
- for k in self._defaults[p]:
- default_dict[k] = self._defaults[p][k]
- # # init with simple values that will be duplicated
- for key in val.keys():
- if type(val[key]) not in (list, tuple):
- default_dict[key] = val[key]
- vals.append(default_dict.copy())
- # # manage multiple values
- for key in val.keys():
- if type(val[key]) in (list, tuple):
- for idx, v in enumerate(val[key]):
- if len(vals) <= idx:
- vals.append(default_dict.copy())
- vals[idx][key] = v
-
- # check that m2m are not empty
- notempty = False
- for dct in vals:
- for k in dct:
- if dct[k] not in ("", None):
- notempty = True
- break
- if not notempty:
- continue
-
- field_names = model._meta.get_all_field_names()
- for v in vals:
- if 'history_modifier' in field_names:
- if 'defaults' not in v:
- v['defaults'] = {}
- v['defaults']['history_modifier'] = \
- self.history_modifier
- m2m_m2ms = []
- c_c_path = c_path[:]
- for k in v.keys():
- if k not in field_names:
- continue
- self.get_field(model, k, v, m2m_m2ms, c_c_path,
- new_created)
- if '__force_new' in v:
- created = v.pop('__force_new')
- key = u";".join([u"{}-{}".format(k, v[k])
- for k in sorted(v.keys())])
- # only one forced creation
- if attribute in new_created \
- and key in new_created[attribute]:
- continue
- if attribute not in new_created:
- new_created[attribute] = []
- new_created[attribute].append(key)
- has_values = bool([1 for k in v if v[k]])
- if has_values:
- if self.MODEL_CREATION_LIMIT and \
- model not in self.MODEL_CREATION_LIMIT:
- raise self._get_improperly_conf_error(model)
- v = model.objects.create(**v)
- else:
- continue
- else:
- v['defaults'] = v.get('defaults', {})
- extra_fields = {}
- # "File" type is a temp object and can be different
- # for the same filename - it must be treated
- # separately
- for field in model._meta.fields:
- k = field.name
- # attr_class is a FileField attribute
- if hasattr(field, 'attr_class') and k in v:
- extra_fields[k] = v.pop(k)
- created = False
- if not self.MODEL_CREATION_LIMIT or \
- model in self.MODEL_CREATION_LIMIT:
- v, created = model.objects.get_or_create(
- **v)
- else:
- get_v = v.copy()
- if 'defaults' in get_v:
- get_v.pop('defaults')
- try:
- v = model.objects.get(**get_v)
- except model.DoesNotExist:
- raise self._get_does_not_exist_in_db_error(
- model, get_v)
- changed = False
- for k in extra_fields.keys():
- if extra_fields[k]:
- changed = True
- setattr(v, k, extra_fields[k])
- if changed:
- v.save()
- for att, objs in m2m_m2ms:
- if type(objs) not in (list, tuple):
- objs = [objs]
- for obj in objs:
- getattr(v, att).add(obj)
- if self.import_instance \
- and hasattr(v, 'imports') and created:
- v.imports.add(self.import_instance)
- m2ms.append((attribute, v))
- elif hasattr(field_object, 'rel') and field_object.rel:
- if type(data[attribute]) == dict:
- # put history_modifier for every created item
- if 'history_modifier' in \
- field_object.rel.to._meta.get_all_field_names():
- data[attribute]['history_modifier'] = \
- self.history_modifier
- try:
- c_path.append(attribute)
- data[attribute], created = self.get_object(
- field_object.rel.to, data[attribute].copy(), c_path)
- except ImporterError, msg:
- self.errors.append((self.idx_line, None, msg))
- data[attribute] = None
- elif type(data[attribute]) == list:
- data[attribute] = data[attribute][0]
+ m2ms += self._get_field_m2m(attribute, data, c_path,
+ new_created, field_object)
+ return
+ if not hasattr(field_object, 'rel') or not field_object.rel:
+ return
+ if type(data[attribute]) == list:
+ # extract the first item from list
+ # be careful if the list has more than one item this is arbitrary
+ if len(data[attribute]) > 1:
+ logger.warning(
+ 'Import {}: {} has many when only one is expected. Get '
+ 'the first one but it is not OK!'.format(
+ self.import_instance, attribute))
+ data[attribute] = data[attribute][0]
+ return
+ if type(data[attribute]) != dict:
+ # we treat only dict formated values
+ return
+ # put history_modifier for every created item
+ if 'history_modifier' in \
+ field_object.rel.to._meta.get_all_field_names():
+ data[attribute]['history_modifier'] = \
+ self.history_modifier
+ try:
+ c_path.append(attribute)
+ data[attribute], created = self.get_object(
+ field_object.rel.to, data[attribute].copy(), c_path)
+ except ImporterError, msg:
+ self.errors.append((self.idx_line, None, msg))
+ data[attribute] = None
def get_object(self, cls, data, path=[]):
m2ms = []
if type(data) != dict:
+ # if data is not a dict we don't know what to do
return data, False
+
is_empty = not bool(
[k for k in data if k not in ('history_modifier', 'defaults')
and data[k]])
if is_empty:
+ # if no value, no creation
return None, False
c_path = path[:]
@@ -1437,7 +1535,7 @@ class Importer(object):
if type(create_dict[k]) == dict:
create_dict.pop(k)
# File doesn't like deepcopy
- if type(create_dict[k]) == File:
+ elif type(create_dict[k]) == File:
create_dict[k] = copy.copy(data[k])
# default values