summaryrefslogtreecommitdiff
path: root/ishtar_common
diff options
context:
space:
mode:
authorÉtienne Loks <etienne.loks@iggdrasil.net>2017-10-12 13:02:59 +0200
committerÉtienne Loks <etienne.loks@iggdrasil.net>2017-10-12 13:02:59 +0200
commita2ae7228cae62c4fde1f9554372162d322e69aa9 (patch)
tree38f7b144f2083901d073dbc3a7c646f70f9e7096 /ishtar_common
parent81fc327fd0eb353fbf82dd17e23682750ad3d4c2 (diff)
downloadIshtar-a2ae7228cae62c4fde1f9554372162d322e69aa9.tar.bz2
Ishtar-a2ae7228cae62c4fde1f9554372162d322e69aa9.zip
Manage postgres index and vector searches (refs #2912)
Diffstat (limited to 'ishtar_common')
-rw-r--r--ishtar_common/migrations/0015_auto_20171011_1644.py36
-rw-r--r--ishtar_common/models.py75
-rw-r--r--ishtar_common/utils.py42
3 files changed, 150 insertions, 3 deletions
diff --git a/ishtar_common/migrations/0015_auto_20171011_1644.py b/ishtar_common/migrations/0015_auto_20171011_1644.py
new file mode 100644
index 000000000..a9f4499c2
--- /dev/null
+++ b/ishtar_common/migrations/0015_auto_20171011_1644.py
@@ -0,0 +1,36 @@
+# -*- coding: utf-8 -*-
+# Generated by Django 1.11 on 2017-10-11 16:44
+from __future__ import unicode_literals
+
+import django.contrib.postgres.search
+from django.db import migrations
+
+
+class Migration(migrations.Migration):
+
+ dependencies = [
+ ('ishtar_common', '0014_ishtarsiteprofile_preservation'),
+ ]
+
+ operations = [
+ migrations.AddField(
+ model_name='historicalorganization',
+ name='search_vector',
+ field=django.contrib.postgres.search.SearchVectorField(blank=True, help_text='Auto filled at save', null=True, verbose_name='Search vector'),
+ ),
+ migrations.AddField(
+ model_name='historicalperson',
+ name='search_vector',
+ field=django.contrib.postgres.search.SearchVectorField(blank=True, help_text='Auto filled at save', null=True, verbose_name='Search vector'),
+ ),
+ migrations.AddField(
+ model_name='organization',
+ name='search_vector',
+ field=django.contrib.postgres.search.SearchVectorField(blank=True, help_text='Auto filled at save', null=True, verbose_name='Search vector'),
+ ),
+ migrations.AddField(
+ model_name='person',
+ name='search_vector',
+ field=django.contrib.postgres.search.SearchVectorField(blank=True, help_text='Auto filled at save', null=True, verbose_name='Search vector'),
+ ),
+ ]
diff --git a/ishtar_common/models.py b/ishtar_common/models.py
index 28a24115b..915415416 100644
--- a/ishtar_common/models.py
+++ b/ishtar_common/models.py
@@ -35,6 +35,7 @@ import tempfile
import time
from django.conf import settings
+from django.contrib.postgres.search import SearchVectorField, SearchVector
from django.core.cache import cache
from django.core.exceptions import ObjectDoesNotExist, ValidationError
from django.core.files.uploadedfile import SimpleUploadedFile
@@ -58,7 +59,7 @@ from simple_history.models import HistoricalRecords as BaseHistoricalRecords
from ishtar_common.model_merging import merge_model_objects
from ishtar_common.utils import get_cache, disable_for_loaddata, create_slug,\
- get_all_field_names
+ get_all_field_names, merge_tsvectors
from ishtar_common.models_imports import ImporterModel, ImporterType, \
ImporterDefault, ImporterDefaultValues, ImporterColumn, \
@@ -917,9 +918,75 @@ class Imported(models.Model):
abstract = True
-class BaseHistorizedItem(Imported):
+class FullSearch(models.Model):
+ search_vector = SearchVectorField(_("Search vector"), blank=True, null=True,
+ help_text=_("Auto filled at save"))
+ BASE_SEARCH_VECTORS = []
+ INT_SEARCH_VECTORS = []
+ M2M_SEARCH_VECTORS = []
+
+ class Meta:
+ abstract = True
+
+ def update_search_vector(self, save=True):
+ """
+ Update the search vector
+ :param save: True if you want to save the object immediately
+ :return: True if modified
+ """
+ if not self.BASE_SEARCH_VECTORS and not self.M2M_SEARCH_VECTORS:
+ logger.warning("No search_vectors defined for {}".format(
+ self.__class__))
+ return
+ if getattr(self, '_search_updated', None):
+ return
+ self._search_updated = True
+
+ old_search = ""
+ if self.search_vector:
+ old_search = self.search_vector[:]
+ search_vectors = []
+ base_q = self.__class__.objects.filter(pk=self.pk)
+
+ # many to many have to be queried one by one otherwise only one is fetch
+ for M2M_SEARCH_VECTOR in self.M2M_SEARCH_VECTORS:
+ key = M2M_SEARCH_VECTOR.split('__')[0]
+ rel_key = getattr(self, key)
+ for item in rel_key.values('pk').all():
+ query_dct = {key + "__pk": item['pk']}
+ q = copy.copy(base_q).filter(**query_dct)
+ q = q.annotate(
+ search=SearchVector(
+ M2M_SEARCH_VECTOR,
+ config=settings.ISHTAR_SEARCH_LANGUAGE)
+ ).values('search')
+ search_vectors.append(q.all()[0]['search'])
+
+ # int/float are not well managed by the SearchVector
+ for INT_SEARCH_VECTOR in self.INT_SEARCH_VECTORS:
+ q = base_q.values(INT_SEARCH_VECTOR)
+ search_vectors.append(
+ "'{}':1".format(q.all()[0][INT_SEARCH_VECTOR]))
+
+ # query "simple" fields
+ q = base_q.annotate(
+ search=SearchVector(
+ *self.BASE_SEARCH_VECTORS,
+ config=settings.ISHTAR_SEARCH_LANGUAGE
+ )).values('search')
+ search_vectors.append(q.all()[0]['search'])
+ self.search_vector = merge_tsvectors(search_vectors)
+ changed = old_search != self.search_vector
+ if save and changed:
+ self.skip_history_when_saving = True
+ self.save()
+ return changed
+
+
+class BaseHistorizedItem(FullSearch, Imported):
"""
- Historized item with external ID management
+ Historized item with external ID management.
+ All historized items are searcheable
"""
IS_BASKET = False
EXTERNAL_ID_KEY = ''
@@ -1187,6 +1254,7 @@ class LightHistorizedItem(BaseHistorizedItem):
super(LightHistorizedItem, self).save(*args, **kwargs)
return True
+
PARSE_FORMULA = re.compile("{([^}]*)}")
FORMULA_FILTERS = {
@@ -1409,6 +1477,7 @@ def get_current_profile(force=False):
def cached_site_changed(sender, **kwargs):
get_current_profile(force=True)
+
post_save.connect(cached_site_changed, sender=IshtarSiteProfile)
post_delete.connect(cached_site_changed, sender=IshtarSiteProfile)
diff --git a/ishtar_common/utils.py b/ishtar_common/utils.py
index c6a4032f0..f3b1a821b 100644
--- a/ishtar_common/utils.py
+++ b/ishtar_common/utils.py
@@ -104,9 +104,12 @@ def cached_label_changed(sender, **kwargs):
setattr(instance, cached_label, lbl)
changed = True
if changed:
+ instance._search_updated = False
if hasattr(instance, '_cascade_change') and instance._cascade_change:
instance.skip_history_when_saving = True
instance.save()
+ if hasattr(instance, 'update_search_vector'):
+ instance.update_search_vector()
updated = False
if hasattr(instance, '_cached_labels_bulk_update'):
updated = instance._cached_labels_bulk_update()
@@ -117,6 +120,7 @@ def cached_label_changed(sender, **kwargs):
item.test_obj = instance.test_obj
cached_label_changed(item.__class__, instance=item)
+
SHORTIFY_STR = ugettext(" (...)")
@@ -289,3 +293,41 @@ def get_all_related_objects(model):
and f.auto_created and not f.concrete
]
+
+def merge_tsvectors(vectors):
+ """
+ Parse tsvector to merge them in one string
+ :param vectors: list of tsvector string
+ :return: merged tsvector
+ """
+ result_dict = {}
+ for vector in vectors:
+ if not vector:
+ continue
+
+ current_position = 0
+ if result_dict:
+ for key in result_dict:
+ max_position = max(result_dict[key])
+ if max_position > current_position:
+ current_position = max_position
+
+ for dct_member in vector.split(" "):
+ key, positions = dct_member.split(':')
+ key = key[1:-1] # remove quotes
+ positions = [int(pos) + current_position
+ for pos in positions.split(',')]
+ if key in result_dict:
+ result_dict[key] += positions
+ else:
+ result_dict[key] = positions
+
+ # {'lamelie': [1, 42, 5]} => {'lamelie': "1,42,5"}
+ result_dict = {k: ",".join([str(val) for val in result_dict[k]])
+ for k in result_dict}
+ # {'lamelie': "1,5", "hagarde": "2", "regarde": "4"} =>
+ # "'lamelie':1,5 'hagarde':2 'regarde':4"
+ result = " ".join(["'{}':{}".format(k, result_dict[k])
+ for k in result_dict])
+
+ return result