diff options
author | Étienne Loks <etienne.loks@iggdrasil.net> | 2017-10-12 13:02:59 +0200 |
---|---|---|
committer | Étienne Loks <etienne.loks@iggdrasil.net> | 2017-10-12 13:02:59 +0200 |
commit | a2ae7228cae62c4fde1f9554372162d322e69aa9 (patch) | |
tree | 38f7b144f2083901d073dbc3a7c646f70f9e7096 /ishtar_common | |
parent | 81fc327fd0eb353fbf82dd17e23682750ad3d4c2 (diff) | |
download | Ishtar-a2ae7228cae62c4fde1f9554372162d322e69aa9.tar.bz2 Ishtar-a2ae7228cae62c4fde1f9554372162d322e69aa9.zip |
Manage postgres index and vector searches (refs #2912)
Diffstat (limited to 'ishtar_common')
-rw-r--r-- | ishtar_common/migrations/0015_auto_20171011_1644.py | 36 | ||||
-rw-r--r-- | ishtar_common/models.py | 75 | ||||
-rw-r--r-- | ishtar_common/utils.py | 42 |
3 files changed, 150 insertions, 3 deletions
diff --git a/ishtar_common/migrations/0015_auto_20171011_1644.py b/ishtar_common/migrations/0015_auto_20171011_1644.py new file mode 100644 index 000000000..a9f4499c2 --- /dev/null +++ b/ishtar_common/migrations/0015_auto_20171011_1644.py @@ -0,0 +1,36 @@ +# -*- coding: utf-8 -*- +# Generated by Django 1.11 on 2017-10-11 16:44 +from __future__ import unicode_literals + +import django.contrib.postgres.search +from django.db import migrations + + +class Migration(migrations.Migration): + + dependencies = [ + ('ishtar_common', '0014_ishtarsiteprofile_preservation'), + ] + + operations = [ + migrations.AddField( + model_name='historicalorganization', + name='search_vector', + field=django.contrib.postgres.search.SearchVectorField(blank=True, help_text='Auto filled at save', null=True, verbose_name='Search vector'), + ), + migrations.AddField( + model_name='historicalperson', + name='search_vector', + field=django.contrib.postgres.search.SearchVectorField(blank=True, help_text='Auto filled at save', null=True, verbose_name='Search vector'), + ), + migrations.AddField( + model_name='organization', + name='search_vector', + field=django.contrib.postgres.search.SearchVectorField(blank=True, help_text='Auto filled at save', null=True, verbose_name='Search vector'), + ), + migrations.AddField( + model_name='person', + name='search_vector', + field=django.contrib.postgres.search.SearchVectorField(blank=True, help_text='Auto filled at save', null=True, verbose_name='Search vector'), + ), + ] diff --git a/ishtar_common/models.py b/ishtar_common/models.py index 28a24115b..915415416 100644 --- a/ishtar_common/models.py +++ b/ishtar_common/models.py @@ -35,6 +35,7 @@ import tempfile import time from django.conf import settings +from django.contrib.postgres.search import SearchVectorField, SearchVector from django.core.cache import cache from django.core.exceptions import ObjectDoesNotExist, ValidationError from django.core.files.uploadedfile import SimpleUploadedFile @@ -58,7 +59,7 @@ from simple_history.models import HistoricalRecords as BaseHistoricalRecords from ishtar_common.model_merging import merge_model_objects from ishtar_common.utils import get_cache, disable_for_loaddata, create_slug,\ - get_all_field_names + get_all_field_names, merge_tsvectors from ishtar_common.models_imports import ImporterModel, ImporterType, \ ImporterDefault, ImporterDefaultValues, ImporterColumn, \ @@ -917,9 +918,75 @@ class Imported(models.Model): abstract = True -class BaseHistorizedItem(Imported): +class FullSearch(models.Model): + search_vector = SearchVectorField(_("Search vector"), blank=True, null=True, + help_text=_("Auto filled at save")) + BASE_SEARCH_VECTORS = [] + INT_SEARCH_VECTORS = [] + M2M_SEARCH_VECTORS = [] + + class Meta: + abstract = True + + def update_search_vector(self, save=True): + """ + Update the search vector + :param save: True if you want to save the object immediately + :return: True if modified + """ + if not self.BASE_SEARCH_VECTORS and not self.M2M_SEARCH_VECTORS: + logger.warning("No search_vectors defined for {}".format( + self.__class__)) + return + if getattr(self, '_search_updated', None): + return + self._search_updated = True + + old_search = "" + if self.search_vector: + old_search = self.search_vector[:] + search_vectors = [] + base_q = self.__class__.objects.filter(pk=self.pk) + + # many to many have to be queried one by one otherwise only one is fetch + for M2M_SEARCH_VECTOR in self.M2M_SEARCH_VECTORS: + key = M2M_SEARCH_VECTOR.split('__')[0] + rel_key = getattr(self, key) + for item in rel_key.values('pk').all(): + query_dct = {key + "__pk": item['pk']} + q = copy.copy(base_q).filter(**query_dct) + q = q.annotate( + search=SearchVector( + M2M_SEARCH_VECTOR, + config=settings.ISHTAR_SEARCH_LANGUAGE) + ).values('search') + search_vectors.append(q.all()[0]['search']) + + # int/float are not well managed by the SearchVector + for INT_SEARCH_VECTOR in self.INT_SEARCH_VECTORS: + q = base_q.values(INT_SEARCH_VECTOR) + search_vectors.append( + "'{}':1".format(q.all()[0][INT_SEARCH_VECTOR])) + + # query "simple" fields + q = base_q.annotate( + search=SearchVector( + *self.BASE_SEARCH_VECTORS, + config=settings.ISHTAR_SEARCH_LANGUAGE + )).values('search') + search_vectors.append(q.all()[0]['search']) + self.search_vector = merge_tsvectors(search_vectors) + changed = old_search != self.search_vector + if save and changed: + self.skip_history_when_saving = True + self.save() + return changed + + +class BaseHistorizedItem(FullSearch, Imported): """ - Historized item with external ID management + Historized item with external ID management. + All historized items are searcheable """ IS_BASKET = False EXTERNAL_ID_KEY = '' @@ -1187,6 +1254,7 @@ class LightHistorizedItem(BaseHistorizedItem): super(LightHistorizedItem, self).save(*args, **kwargs) return True + PARSE_FORMULA = re.compile("{([^}]*)}") FORMULA_FILTERS = { @@ -1409,6 +1477,7 @@ def get_current_profile(force=False): def cached_site_changed(sender, **kwargs): get_current_profile(force=True) + post_save.connect(cached_site_changed, sender=IshtarSiteProfile) post_delete.connect(cached_site_changed, sender=IshtarSiteProfile) diff --git a/ishtar_common/utils.py b/ishtar_common/utils.py index c6a4032f0..f3b1a821b 100644 --- a/ishtar_common/utils.py +++ b/ishtar_common/utils.py @@ -104,9 +104,12 @@ def cached_label_changed(sender, **kwargs): setattr(instance, cached_label, lbl) changed = True if changed: + instance._search_updated = False if hasattr(instance, '_cascade_change') and instance._cascade_change: instance.skip_history_when_saving = True instance.save() + if hasattr(instance, 'update_search_vector'): + instance.update_search_vector() updated = False if hasattr(instance, '_cached_labels_bulk_update'): updated = instance._cached_labels_bulk_update() @@ -117,6 +120,7 @@ def cached_label_changed(sender, **kwargs): item.test_obj = instance.test_obj cached_label_changed(item.__class__, instance=item) + SHORTIFY_STR = ugettext(" (...)") @@ -289,3 +293,41 @@ def get_all_related_objects(model): and f.auto_created and not f.concrete ] + +def merge_tsvectors(vectors): + """ + Parse tsvector to merge them in one string + :param vectors: list of tsvector string + :return: merged tsvector + """ + result_dict = {} + for vector in vectors: + if not vector: + continue + + current_position = 0 + if result_dict: + for key in result_dict: + max_position = max(result_dict[key]) + if max_position > current_position: + current_position = max_position + + for dct_member in vector.split(" "): + key, positions = dct_member.split(':') + key = key[1:-1] # remove quotes + positions = [int(pos) + current_position + for pos in positions.split(',')] + if key in result_dict: + result_dict[key] += positions + else: + result_dict[key] = positions + + # {'lamelie': [1, 42, 5]} => {'lamelie': "1,42,5"} + result_dict = {k: ",".join([str(val) for val in result_dict[k]]) + for k in result_dict} + # {'lamelie': "1,5", "hagarde": "2", "regarde": "4"} => + # "'lamelie':1,5 'hagarde':2 'regarde':4" + result = " ".join(["'{}':{}".format(k, result_dict[k]) + for k in result_dict]) + + return result |