summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorÉtienne Loks <etienne.loks@iggdrasil.net>2017-10-12 13:02:59 +0200
committerÉtienne Loks <etienne.loks@iggdrasil.net>2017-10-12 13:02:59 +0200
commita2ae7228cae62c4fde1f9554372162d322e69aa9 (patch)
tree38f7b144f2083901d073dbc3a7c646f70f9e7096
parent81fc327fd0eb353fbf82dd17e23682750ad3d4c2 (diff)
downloadIshtar-a2ae7228cae62c4fde1f9554372162d322e69aa9.tar.bz2
Ishtar-a2ae7228cae62c4fde1f9554372162d322e69aa9.zip
Manage postgres index and vector searches (refs #2912)
-rw-r--r--archaeological_context_records/migrations/0010_auto_20171011_1644.py26
-rw-r--r--archaeological_files/migrations/0008_auto_20171011_1644.py26
-rw-r--r--archaeological_finds/migrations/0010_auto_20171011_1644.py61
-rw-r--r--archaeological_operations/migrations/0009_auto_20171011_1644.py51
-rw-r--r--archaeological_operations/models.py5
-rw-r--r--archaeological_operations/tests.py15
-rw-r--r--archaeological_warehouse/migrations/0008_auto_20171011_1644.py36
-rw-r--r--example_project/settings.py1
-rw-r--r--ishtar_common/migrations/0015_auto_20171011_1644.py36
-rw-r--r--ishtar_common/models.py75
-rw-r--r--ishtar_common/utils.py42
11 files changed, 371 insertions, 3 deletions
diff --git a/archaeological_context_records/migrations/0010_auto_20171011_1644.py b/archaeological_context_records/migrations/0010_auto_20171011_1644.py
new file mode 100644
index 000000000..379110e44
--- /dev/null
+++ b/archaeological_context_records/migrations/0010_auto_20171011_1644.py
@@ -0,0 +1,26 @@
+# -*- coding: utf-8 -*-
+# Generated by Django 1.11 on 2017-10-11 16:44
+from __future__ import unicode_literals
+
+import django.contrib.postgres.search
+from django.db import migrations
+
+
+class Migration(migrations.Migration):
+
+ dependencies = [
+ ('archaeological_context_records', '0009_auto_20170829_1639'),
+ ]
+
+ operations = [
+ migrations.AddField(
+ model_name='contextrecord',
+ name='search_vector',
+ field=django.contrib.postgres.search.SearchVectorField(blank=True, help_text='Auto filled at save', null=True, verbose_name='Search vector'),
+ ),
+ migrations.AddField(
+ model_name='historicalcontextrecord',
+ name='search_vector',
+ field=django.contrib.postgres.search.SearchVectorField(blank=True, help_text='Auto filled at save', null=True, verbose_name='Search vector'),
+ ),
+ ]
diff --git a/archaeological_files/migrations/0008_auto_20171011_1644.py b/archaeological_files/migrations/0008_auto_20171011_1644.py
new file mode 100644
index 000000000..33dfbf59e
--- /dev/null
+++ b/archaeological_files/migrations/0008_auto_20171011_1644.py
@@ -0,0 +1,26 @@
+# -*- coding: utf-8 -*-
+# Generated by Django 1.11 on 2017-10-11 16:44
+from __future__ import unicode_literals
+
+import django.contrib.postgres.search
+from django.db import migrations
+
+
+class Migration(migrations.Migration):
+
+ dependencies = [
+ ('archaeological_files', '0007_auto_20170826_1152'),
+ ]
+
+ operations = [
+ migrations.AddField(
+ model_name='file',
+ name='search_vector',
+ field=django.contrib.postgres.search.SearchVectorField(blank=True, help_text='Auto filled at save', null=True, verbose_name='Search vector'),
+ ),
+ migrations.AddField(
+ model_name='historicalfile',
+ name='search_vector',
+ field=django.contrib.postgres.search.SearchVectorField(blank=True, help_text='Auto filled at save', null=True, verbose_name='Search vector'),
+ ),
+ ]
diff --git a/archaeological_finds/migrations/0010_auto_20171011_1644.py b/archaeological_finds/migrations/0010_auto_20171011_1644.py
new file mode 100644
index 000000000..ce892e96d
--- /dev/null
+++ b/archaeological_finds/migrations/0010_auto_20171011_1644.py
@@ -0,0 +1,61 @@
+# -*- coding: utf-8 -*-
+# Generated by Django 1.11 on 2017-10-11 16:44
+from __future__ import unicode_literals
+
+import django.contrib.postgres.search
+from django.db import migrations
+
+
+class Migration(migrations.Migration):
+
+ dependencies = [
+ ('archaeological_finds', '0009_auto_20171010_1644'),
+ ]
+
+ operations = [
+ migrations.AddField(
+ model_name='basefind',
+ name='search_vector',
+ field=django.contrib.postgres.search.SearchVectorField(blank=True, help_text='Auto filled at save', null=True, verbose_name='Search vector'),
+ ),
+ migrations.AddField(
+ model_name='find',
+ name='search_vector',
+ field=django.contrib.postgres.search.SearchVectorField(blank=True, help_text='Auto filled at save', null=True, verbose_name='Search vector'),
+ ),
+ migrations.AddField(
+ model_name='historicalbasefind',
+ name='search_vector',
+ field=django.contrib.postgres.search.SearchVectorField(blank=True, help_text='Auto filled at save', null=True, verbose_name='Search vector'),
+ ),
+ migrations.AddField(
+ model_name='historicalfind',
+ name='search_vector',
+ field=django.contrib.postgres.search.SearchVectorField(blank=True, help_text='Auto filled at save', null=True, verbose_name='Search vector'),
+ ),
+ migrations.AddField(
+ model_name='historicaltreatment',
+ name='search_vector',
+ field=django.contrib.postgres.search.SearchVectorField(blank=True, help_text='Auto filled at save', null=True, verbose_name='Search vector'),
+ ),
+ migrations.AddField(
+ model_name='historicaltreatmentfile',
+ name='search_vector',
+ field=django.contrib.postgres.search.SearchVectorField(blank=True, help_text='Auto filled at save', null=True, verbose_name='Search vector'),
+ ),
+ migrations.AddField(
+ model_name='property',
+ name='search_vector',
+ field=django.contrib.postgres.search.SearchVectorField(blank=True, help_text='Auto filled at save', null=True, verbose_name='Search vector'),
+ ),
+ migrations.AddField(
+ model_name='treatment',
+ name='search_vector',
+ field=django.contrib.postgres.search.SearchVectorField(blank=True, help_text='Auto filled at save', null=True, verbose_name='Search vector'),
+ ),
+ migrations.AddField(
+ model_name='treatmentfile',
+ name='search_vector',
+ field=django.contrib.postgres.search.SearchVectorField(blank=True, help_text='Auto filled at save', null=True, verbose_name='Search vector'),
+ ),
+ ]
diff --git a/archaeological_operations/migrations/0009_auto_20171011_1644.py b/archaeological_operations/migrations/0009_auto_20171011_1644.py
new file mode 100644
index 000000000..18a284a21
--- /dev/null
+++ b/archaeological_operations/migrations/0009_auto_20171011_1644.py
@@ -0,0 +1,51 @@
+# -*- coding: utf-8 -*-
+# Generated by Django 1.11 on 2017-10-11 16:44
+from __future__ import unicode_literals
+
+import django.contrib.postgres.search
+from django.db import migrations
+
+
+class Migration(migrations.Migration):
+
+ dependencies = [
+ ('archaeological_operations', '0008_auto_20170829_1639'),
+ ]
+
+ operations = [
+ migrations.AddField(
+ model_name='administrativeact',
+ name='search_vector',
+ field=django.contrib.postgres.search.SearchVectorField(blank=True, help_text='Auto filled at save', null=True, verbose_name='Search vector'),
+ ),
+ migrations.AddField(
+ model_name='archaeologicalsite',
+ name='search_vector',
+ field=django.contrib.postgres.search.SearchVectorField(blank=True, help_text='Auto filled at save', null=True, verbose_name='Search vector'),
+ ),
+ migrations.AddField(
+ model_name='historicaladministrativeact',
+ name='search_vector',
+ field=django.contrib.postgres.search.SearchVectorField(blank=True, help_text='Auto filled at save', null=True, verbose_name='Search vector'),
+ ),
+ migrations.AddField(
+ model_name='historicaloperation',
+ name='search_vector',
+ field=django.contrib.postgres.search.SearchVectorField(blank=True, help_text='Auto filled at save', null=True, verbose_name='Search vector'),
+ ),
+ migrations.AddField(
+ model_name='operation',
+ name='search_vector',
+ field=django.contrib.postgres.search.SearchVectorField(blank=True, help_text='Auto filled at save', null=True, verbose_name='Search vector'),
+ ),
+ migrations.AddField(
+ model_name='parcel',
+ name='search_vector',
+ field=django.contrib.postgres.search.SearchVectorField(blank=True, help_text='Auto filled at save', null=True, verbose_name='Search vector'),
+ ),
+ migrations.AddField(
+ model_name='parcelowner',
+ name='search_vector',
+ field=django.contrib.postgres.search.SearchVectorField(blank=True, help_text='Auto filled at save', null=True, verbose_name='Search vector'),
+ ),
+ ]
diff --git a/archaeological_operations/models.py b/archaeological_operations/models.py
index 54ed96cec..d55a2e689 100644
--- a/archaeological_operations/models.py
+++ b/archaeological_operations/models.py
@@ -248,6 +248,10 @@ class Operation(ClosedItem, BaseHistorizedItem, ImageModel, OwnPerms,
'archaeological_sites__reference': _(u"Archaeological sites ("
u"reference)"),
}
+ BASE_SEARCH_VECTORS = ["scientist__raw_name", "cached_label",
+ "common_name", "comment", "address", "old_code"]
+ INT_SEARCH_VECTORS = ["year"]
+ M2M_SEARCH_VECTORS = ["towns__name"]
# fields definition
creation_date = models.DateField(_(u"Creation date"),
@@ -309,6 +313,7 @@ class Operation(ClosedItem, BaseHistorizedItem, ImageModel, OwnPerms,
code_patriarche = models.TextField(u"Code PATRIARCHE", null=True,
blank=True, unique=True)
TABLE_COLS = ['full_code_patriarche'] + TABLE_COLS
+ BASE_SEARCH_VECTORS = ['code_patriarche'] + BASE_SEARCH_VECTORS
# preventive
fnap_financing = models.FloatField(u"Financement FNAP (%)",
blank=True, null=True)
diff --git a/archaeological_operations/tests.py b/archaeological_operations/tests.py
index 0d6908374..91050dc1f 100644
--- a/archaeological_operations/tests.py
+++ b/archaeological_operations/tests.py
@@ -895,6 +895,21 @@ class OperationTest(TestCase, OperationInitTest):
self.assertEqual(ope_id, 'OP2011-1')
self.assertEqual(town, self.towns[0].name)
+ def test_search_vector_update(self):
+ operation = self.operations[0]
+ town = self.create_towns({'numero_insee': '12346', 'name': 'Daisy'})[-1]
+ operation.towns.add(town)
+ town = self.create_towns(
+ {'numero_insee': '12347', 'name': 'Dirty old'})[-1]
+ operation.towns.add(town)
+ operation = models.Operation.objects.get(pk=operation.pk)
+ operation.comment = u"Zardoz"
+ operation.code_patriarche = u"HUIAAA5"
+ operation.save()
+ for key in ('old', 'op2010', 'dirty', 'daisy', "'2010'", "zardoz",
+ "huiaaa5"):
+ self.assertIn(key, operation.search_vector)
+
def test_cache_bulk_update(self):
if settings.USE_SPATIALITE_FOR_TESTS:
# using views - can only be tested with postgresql
diff --git a/archaeological_warehouse/migrations/0008_auto_20171011_1644.py b/archaeological_warehouse/migrations/0008_auto_20171011_1644.py
new file mode 100644
index 000000000..82245647d
--- /dev/null
+++ b/archaeological_warehouse/migrations/0008_auto_20171011_1644.py
@@ -0,0 +1,36 @@
+# -*- coding: utf-8 -*-
+# Generated by Django 1.11 on 2017-10-11 16:44
+from __future__ import unicode_literals
+
+import django.contrib.postgres.search
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+
+ dependencies = [
+ ('archaeological_warehouse', '0007_auto_20171004_1125'),
+ ]
+
+ operations = [
+ migrations.AddField(
+ model_name='collection',
+ name='search_vector',
+ field=django.contrib.postgres.search.SearchVectorField(blank=True, help_text='Auto filled at save', null=True, verbose_name='Search vector'),
+ ),
+ migrations.AddField(
+ model_name='container',
+ name='search_vector',
+ field=django.contrib.postgres.search.SearchVectorField(blank=True, help_text='Auto filled at save', null=True, verbose_name='Search vector'),
+ ),
+ migrations.AddField(
+ model_name='warehouse',
+ name='search_vector',
+ field=django.contrib.postgres.search.SearchVectorField(blank=True, help_text='Auto filled at save', null=True, verbose_name='Search vector'),
+ ),
+ migrations.AlterField(
+ model_name='container',
+ name='index',
+ field=models.IntegerField(default=0, verbose_name='Container ID'),
+ ),
+ ]
diff --git a/example_project/settings.py b/example_project/settings.py
index ea50daffb..6ca8cb5fc 100644
--- a/example_project/settings.py
+++ b/example_project/settings.py
@@ -240,6 +240,7 @@ ISHTAR_PERIODS = {}
ISHTAR_PERMIT_TYPES = {}
ISHTAR_DOC_TYPES = {u"undefined": u"Undefined"}
+ISHTAR_SEARCH_LANGUAGE = "french"
ISHTAR_DPTS = []
diff --git a/ishtar_common/migrations/0015_auto_20171011_1644.py b/ishtar_common/migrations/0015_auto_20171011_1644.py
new file mode 100644
index 000000000..a9f4499c2
--- /dev/null
+++ b/ishtar_common/migrations/0015_auto_20171011_1644.py
@@ -0,0 +1,36 @@
+# -*- coding: utf-8 -*-
+# Generated by Django 1.11 on 2017-10-11 16:44
+from __future__ import unicode_literals
+
+import django.contrib.postgres.search
+from django.db import migrations
+
+
+class Migration(migrations.Migration):
+
+ dependencies = [
+ ('ishtar_common', '0014_ishtarsiteprofile_preservation'),
+ ]
+
+ operations = [
+ migrations.AddField(
+ model_name='historicalorganization',
+ name='search_vector',
+ field=django.contrib.postgres.search.SearchVectorField(blank=True, help_text='Auto filled at save', null=True, verbose_name='Search vector'),
+ ),
+ migrations.AddField(
+ model_name='historicalperson',
+ name='search_vector',
+ field=django.contrib.postgres.search.SearchVectorField(blank=True, help_text='Auto filled at save', null=True, verbose_name='Search vector'),
+ ),
+ migrations.AddField(
+ model_name='organization',
+ name='search_vector',
+ field=django.contrib.postgres.search.SearchVectorField(blank=True, help_text='Auto filled at save', null=True, verbose_name='Search vector'),
+ ),
+ migrations.AddField(
+ model_name='person',
+ name='search_vector',
+ field=django.contrib.postgres.search.SearchVectorField(blank=True, help_text='Auto filled at save', null=True, verbose_name='Search vector'),
+ ),
+ ]
diff --git a/ishtar_common/models.py b/ishtar_common/models.py
index 28a24115b..915415416 100644
--- a/ishtar_common/models.py
+++ b/ishtar_common/models.py
@@ -35,6 +35,7 @@ import tempfile
import time
from django.conf import settings
+from django.contrib.postgres.search import SearchVectorField, SearchVector
from django.core.cache import cache
from django.core.exceptions import ObjectDoesNotExist, ValidationError
from django.core.files.uploadedfile import SimpleUploadedFile
@@ -58,7 +59,7 @@ from simple_history.models import HistoricalRecords as BaseHistoricalRecords
from ishtar_common.model_merging import merge_model_objects
from ishtar_common.utils import get_cache, disable_for_loaddata, create_slug,\
- get_all_field_names
+ get_all_field_names, merge_tsvectors
from ishtar_common.models_imports import ImporterModel, ImporterType, \
ImporterDefault, ImporterDefaultValues, ImporterColumn, \
@@ -917,9 +918,75 @@ class Imported(models.Model):
abstract = True
-class BaseHistorizedItem(Imported):
+class FullSearch(models.Model):
+ search_vector = SearchVectorField(_("Search vector"), blank=True, null=True,
+ help_text=_("Auto filled at save"))
+ BASE_SEARCH_VECTORS = []
+ INT_SEARCH_VECTORS = []
+ M2M_SEARCH_VECTORS = []
+
+ class Meta:
+ abstract = True
+
+ def update_search_vector(self, save=True):
+ """
+ Update the search vector
+ :param save: True if you want to save the object immediately
+ :return: True if modified
+ """
+ if not self.BASE_SEARCH_VECTORS and not self.M2M_SEARCH_VECTORS:
+ logger.warning("No search_vectors defined for {}".format(
+ self.__class__))
+ return
+ if getattr(self, '_search_updated', None):
+ return
+ self._search_updated = True
+
+ old_search = ""
+ if self.search_vector:
+ old_search = self.search_vector[:]
+ search_vectors = []
+ base_q = self.__class__.objects.filter(pk=self.pk)
+
+ # many to many have to be queried one by one otherwise only one is fetch
+ for M2M_SEARCH_VECTOR in self.M2M_SEARCH_VECTORS:
+ key = M2M_SEARCH_VECTOR.split('__')[0]
+ rel_key = getattr(self, key)
+ for item in rel_key.values('pk').all():
+ query_dct = {key + "__pk": item['pk']}
+ q = copy.copy(base_q).filter(**query_dct)
+ q = q.annotate(
+ search=SearchVector(
+ M2M_SEARCH_VECTOR,
+ config=settings.ISHTAR_SEARCH_LANGUAGE)
+ ).values('search')
+ search_vectors.append(q.all()[0]['search'])
+
+ # int/float are not well managed by the SearchVector
+ for INT_SEARCH_VECTOR in self.INT_SEARCH_VECTORS:
+ q = base_q.values(INT_SEARCH_VECTOR)
+ search_vectors.append(
+ "'{}':1".format(q.all()[0][INT_SEARCH_VECTOR]))
+
+ # query "simple" fields
+ q = base_q.annotate(
+ search=SearchVector(
+ *self.BASE_SEARCH_VECTORS,
+ config=settings.ISHTAR_SEARCH_LANGUAGE
+ )).values('search')
+ search_vectors.append(q.all()[0]['search'])
+ self.search_vector = merge_tsvectors(search_vectors)
+ changed = old_search != self.search_vector
+ if save and changed:
+ self.skip_history_when_saving = True
+ self.save()
+ return changed
+
+
+class BaseHistorizedItem(FullSearch, Imported):
"""
- Historized item with external ID management
+ Historized item with external ID management.
+ All historized items are searcheable
"""
IS_BASKET = False
EXTERNAL_ID_KEY = ''
@@ -1187,6 +1254,7 @@ class LightHistorizedItem(BaseHistorizedItem):
super(LightHistorizedItem, self).save(*args, **kwargs)
return True
+
PARSE_FORMULA = re.compile("{([^}]*)}")
FORMULA_FILTERS = {
@@ -1409,6 +1477,7 @@ def get_current_profile(force=False):
def cached_site_changed(sender, **kwargs):
get_current_profile(force=True)
+
post_save.connect(cached_site_changed, sender=IshtarSiteProfile)
post_delete.connect(cached_site_changed, sender=IshtarSiteProfile)
diff --git a/ishtar_common/utils.py b/ishtar_common/utils.py
index c6a4032f0..f3b1a821b 100644
--- a/ishtar_common/utils.py
+++ b/ishtar_common/utils.py
@@ -104,9 +104,12 @@ def cached_label_changed(sender, **kwargs):
setattr(instance, cached_label, lbl)
changed = True
if changed:
+ instance._search_updated = False
if hasattr(instance, '_cascade_change') and instance._cascade_change:
instance.skip_history_when_saving = True
instance.save()
+ if hasattr(instance, 'update_search_vector'):
+ instance.update_search_vector()
updated = False
if hasattr(instance, '_cached_labels_bulk_update'):
updated = instance._cached_labels_bulk_update()
@@ -117,6 +120,7 @@ def cached_label_changed(sender, **kwargs):
item.test_obj = instance.test_obj
cached_label_changed(item.__class__, instance=item)
+
SHORTIFY_STR = ugettext(" (...)")
@@ -289,3 +293,41 @@ def get_all_related_objects(model):
and f.auto_created and not f.concrete
]
+
+def merge_tsvectors(vectors):
+ """
+ Parse tsvector to merge them in one string
+ :param vectors: list of tsvector string
+ :return: merged tsvector
+ """
+ result_dict = {}
+ for vector in vectors:
+ if not vector:
+ continue
+
+ current_position = 0
+ if result_dict:
+ for key in result_dict:
+ max_position = max(result_dict[key])
+ if max_position > current_position:
+ current_position = max_position
+
+ for dct_member in vector.split(" "):
+ key, positions = dct_member.split(':')
+ key = key[1:-1] # remove quotes
+ positions = [int(pos) + current_position
+ for pos in positions.split(',')]
+ if key in result_dict:
+ result_dict[key] += positions
+ else:
+ result_dict[key] = positions
+
+ # {'lamelie': [1, 42, 5]} => {'lamelie': "1,42,5"}
+ result_dict = {k: ",".join([str(val) for val in result_dict[k]])
+ for k in result_dict}
+ # {'lamelie': "1,5", "hagarde": "2", "regarde": "4"} =>
+ # "'lamelie':1,5 'hagarde':2 'regarde':4"
+ result = " ".join(["'{}':{}".format(k, result_dict[k])
+ for k in result_dict])
+
+ return result