Manage postgres index and vector searches (refs #2912)

author: Étienne Loks <etienne.loks@iggdrasil.net> 2017-10-12 13:02:59 +0200
committer: Étienne Loks <etienne.loks@iggdrasil.net> 2017-10-12 13:02:59 +0200
commit: a2ae7228cae62c4fde1f9554372162d322e69aa9 (patch)
tree: 38f7b144f2083901d073dbc3a7c646f70f9e7096 /ishtar_common
parent: 81fc327fd0eb353fbf82dd17e23682750ad3d4c2 (diff)
download: Ishtar-a2ae7228cae62c4fde1f9554372162d322e69aa9.tar.bz2
Ishtar-a2ae7228cae62c4fde1f9554372162d322e69aa9.zip
3 files changed, 150 insertions, 3 deletions
diff --git a/ishtar_common/migrations/0015_auto_20171011_1644.py b/ishtar_common/migrations/0015_auto_20171011_1644.py
new file mode 100644
index 000000000..a9f4499c2
--- /dev/null
+++ b/ishtar_common/migrations/0015_auto_20171011_1644.py
@@ -0,0 +1,36 @@
+# -*- coding: utf-8 -*-
+# Generated by Django 1.11 on 2017-10-11 16:44
+from __future__ import unicode_literals
+
+import django.contrib.postgres.search
+from django.db import migrations
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ('ishtar_common', '0014_ishtarsiteprofile_preservation'),
+    ]
+
+    operations = [
+        migrations.AddField(
+            model_name='historicalorganization',
+            name='search_vector',
+            field=django.contrib.postgres.search.SearchVectorField(blank=True, help_text='Auto filled at save', null=True, verbose_name='Search vector'),
+        ),
+        migrations.AddField(
+            model_name='historicalperson',
+            name='search_vector',
+            field=django.contrib.postgres.search.SearchVectorField(blank=True, help_text='Auto filled at save', null=True, verbose_name='Search vector'),
+        ),
+        migrations.AddField(
+            model_name='organization',
+            name='search_vector',
+            field=django.contrib.postgres.search.SearchVectorField(blank=True, help_text='Auto filled at save', null=True, verbose_name='Search vector'),
+        ),
+        migrations.AddField(
+            model_name='person',
+            name='search_vector',
+            field=django.contrib.postgres.search.SearchVectorField(blank=True, help_text='Auto filled at save', null=True, verbose_name='Search vector'),
+        ),
+    ]
diff --git a/ishtar_common/models.py b/ishtar_common/models.py
index 28a24115b..915415416 100644
--- a/ishtar_common/models.py
+++ b/ishtar_common/models.py
@@ -35,6 +35,7 @@ import tempfile
 import time
 
 from django.conf import settings
+from django.contrib.postgres.search import SearchVectorField, SearchVector
 from django.core.cache import cache
 from django.core.exceptions import ObjectDoesNotExist, ValidationError
 from django.core.files.uploadedfile import SimpleUploadedFile
@@ -58,7 +59,7 @@ from simple_history.models import HistoricalRecords as BaseHistoricalRecords
 
 from ishtar_common.model_merging import merge_model_objects
 from ishtar_common.utils import get_cache, disable_for_loaddata, create_slug,\
-    get_all_field_names
+    get_all_field_names, merge_tsvectors
 
 from ishtar_common.models_imports import ImporterModel, ImporterType, \
     ImporterDefault, ImporterDefaultValues, ImporterColumn, \
@@ -917,9 +918,75 @@ class Imported(models.Model):
         abstract = True
 
 
-class BaseHistorizedItem(Imported):
+class FullSearch(models.Model):
+    search_vector = SearchVectorField(_("Search vector"), blank=True, null=True,
+                                      help_text=_("Auto filled at save"))
+    BASE_SEARCH_VECTORS = []
+    INT_SEARCH_VECTORS = []
+    M2M_SEARCH_VECTORS = []
+
+    class Meta:
+        abstract = True
+
+    def update_search_vector(self, save=True):
+        """
+        Update the search vector
+        :param save: True if you want to save the object immediately
+        :return: True if modified
+        """
+        if not self.BASE_SEARCH_VECTORS and not self.M2M_SEARCH_VECTORS:
+            logger.warning("No search_vectors defined for {}".format(
+                self.__class__))
+            return
+        if getattr(self, '_search_updated', None):
+            return
+        self._search_updated = True
+
+        old_search = ""
+        if self.search_vector:
+            old_search = self.search_vector[:]
+        search_vectors = []
+        base_q = self.__class__.objects.filter(pk=self.pk)
+
+        # many to many have to be queried one by one otherwise only one is fetch
+        for M2M_SEARCH_VECTOR in self.M2M_SEARCH_VECTORS:
+            key = M2M_SEARCH_VECTOR.split('__')[0]
+            rel_key = getattr(self, key)
+            for item in rel_key.values('pk').all():
+                query_dct = {key + "__pk": item['pk']}
+                q = copy.copy(base_q).filter(**query_dct)
+                q = q.annotate(
+                    search=SearchVector(
+                        M2M_SEARCH_VECTOR,
+                        config=settings.ISHTAR_SEARCH_LANGUAGE)
+                ).values('search')
+                search_vectors.append(q.all()[0]['search'])
+
+        # int/float are not well managed by the SearchVector
+        for INT_SEARCH_VECTOR in self.INT_SEARCH_VECTORS:
+            q = base_q.values(INT_SEARCH_VECTOR)
+            search_vectors.append(
+                "'{}':1".format(q.all()[0][INT_SEARCH_VECTOR]))
+
+        # query "simple" fields
+        q = base_q.annotate(
+            search=SearchVector(
+                *self.BASE_SEARCH_VECTORS,
+                config=settings.ISHTAR_SEARCH_LANGUAGE
+            )).values('search')
+        search_vectors.append(q.all()[0]['search'])
+        self.search_vector = merge_tsvectors(search_vectors)
+        changed = old_search != self.search_vector
+        if save and changed:
+            self.skip_history_when_saving = True
+            self.save()
+        return changed
+
+
+class BaseHistorizedItem(FullSearch, Imported):
     """
-    Historized item with external ID management
+    Historized item with external ID management.
+    All historized items are searcheable
     """
     IS_BASKET = False
     EXTERNAL_ID_KEY = ''
@@ -1187,6 +1254,7 @@ class LightHistorizedItem(BaseHistorizedItem):
         super(LightHistorizedItem, self).save(*args, **kwargs)
         return True
 
+
 PARSE_FORMULA = re.compile("{([^}]*)}")
 
 FORMULA_FILTERS = {
@@ -1409,6 +1477,7 @@ def get_current_profile(force=False):
 def cached_site_changed(sender, **kwargs):
     get_current_profile(force=True)
 
+
 post_save.connect(cached_site_changed, sender=IshtarSiteProfile)
 post_delete.connect(cached_site_changed, sender=IshtarSiteProfile)
 
diff --git a/ishtar_common/utils.py b/ishtar_common/utils.py
index c6a4032f0..f3b1a821b 100644
--- a/ishtar_common/utils.py
+++ b/ishtar_common/utils.py
@@ -104,9 +104,12 @@ def cached_label_changed(sender, **kwargs):
             setattr(instance, cached_label, lbl)
             changed = True
     if changed:
+        instance._search_updated = False
         if hasattr(instance, '_cascade_change') and instance._cascade_change:
             instance.skip_history_when_saving = True
         instance.save()
+    if hasattr(instance, 'update_search_vector'):
+        instance.update_search_vector()
     updated = False
     if hasattr(instance, '_cached_labels_bulk_update'):
         updated = instance._cached_labels_bulk_update()
@@ -117,6 +120,7 @@ def cached_label_changed(sender, **kwargs):
                 item.test_obj = instance.test_obj
             cached_label_changed(item.__class__, instance=item)
 
+
 SHORTIFY_STR = ugettext(" (...)")
 
 
@@ -289,3 +293,41 @@ def get_all_related_objects(model):
         and f.auto_created and not f.concrete
     ]
 
+
+def merge_tsvectors(vectors):
+    """
+    Parse tsvector to merge them in one string
+    :param vectors: list of tsvector string
+    :return: merged tsvector
+    """
+    result_dict = {}
+    for vector in vectors:
+        if not vector:
+            continue
+
+        current_position = 0
+        if result_dict:
+            for key in result_dict:
+                max_position = max(result_dict[key])
+                if max_position > current_position:
+                    current_position = max_position
+
+        for dct_member in vector.split(" "):
+            key, positions = dct_member.split(':')
+            key = key[1:-1]  # remove quotes
+            positions = [int(pos) + current_position
+                         for pos in positions.split(',')]
+            if key in result_dict:
+                result_dict[key] += positions
+            else:
+                result_dict[key] = positions
+
+    # {'lamelie': [1, 42, 5]} => {'lamelie': "1,42,5"}
+    result_dict = {k: ",".join([str(val) for val in result_dict[k]])
+                   for k in result_dict}
+    # {'lamelie': "1,5", "hagarde": "2", "regarde": "4"} =>
+    # "'lamelie':1,5 'hagarde':2 'regarde':4"
+    result = " ".join(["'{}':{}".format(k, result_dict[k])
+                       for k in result_dict])
+
+    return result
author	Étienne Loks <etienne.loks@iggdrasil.net>	2017-10-12 13:02:59 +0200
committer	Étienne Loks <etienne.loks@iggdrasil.net>	2017-10-12 13:02:59 +0200
commit	a2ae7228cae62c4fde1f9554372162d322e69aa9 (patch)
tree	38f7b144f2083901d073dbc3a7c646f70f9e7096 /ishtar_common
parent	81fc327fd0eb353fbf82dd17e23682750ad3d4c2 (diff)
download	Ishtar-a2ae7228cae62c4fde1f9554372162d322e69aa9.tar.bz2 Ishtar-a2ae7228cae62c4fde1f9554372162d322e69aa9.zip