diff options
author | Étienne Loks <etienne.loks@iggdrasil.net> | 2017-10-12 13:02:59 +0200 |
---|---|---|
committer | Étienne Loks <etienne.loks@iggdrasil.net> | 2017-10-12 13:02:59 +0200 |
commit | a2ae7228cae62c4fde1f9554372162d322e69aa9 (patch) | |
tree | 38f7b144f2083901d073dbc3a7c646f70f9e7096 | |
parent | 81fc327fd0eb353fbf82dd17e23682750ad3d4c2 (diff) | |
download | Ishtar-a2ae7228cae62c4fde1f9554372162d322e69aa9.tar.bz2 Ishtar-a2ae7228cae62c4fde1f9554372162d322e69aa9.zip |
Manage postgres index and vector searches (refs #2912)
-rw-r--r-- | archaeological_context_records/migrations/0010_auto_20171011_1644.py | 26 | ||||
-rw-r--r-- | archaeological_files/migrations/0008_auto_20171011_1644.py | 26 | ||||
-rw-r--r-- | archaeological_finds/migrations/0010_auto_20171011_1644.py | 61 | ||||
-rw-r--r-- | archaeological_operations/migrations/0009_auto_20171011_1644.py | 51 | ||||
-rw-r--r-- | archaeological_operations/models.py | 5 | ||||
-rw-r--r-- | archaeological_operations/tests.py | 15 | ||||
-rw-r--r-- | archaeological_warehouse/migrations/0008_auto_20171011_1644.py | 36 | ||||
-rw-r--r-- | example_project/settings.py | 1 | ||||
-rw-r--r-- | ishtar_common/migrations/0015_auto_20171011_1644.py | 36 | ||||
-rw-r--r-- | ishtar_common/models.py | 75 | ||||
-rw-r--r-- | ishtar_common/utils.py | 42 |
11 files changed, 371 insertions, 3 deletions
diff --git a/archaeological_context_records/migrations/0010_auto_20171011_1644.py b/archaeological_context_records/migrations/0010_auto_20171011_1644.py new file mode 100644 index 000000000..379110e44 --- /dev/null +++ b/archaeological_context_records/migrations/0010_auto_20171011_1644.py @@ -0,0 +1,26 @@ +# -*- coding: utf-8 -*- +# Generated by Django 1.11 on 2017-10-11 16:44 +from __future__ import unicode_literals + +import django.contrib.postgres.search +from django.db import migrations + + +class Migration(migrations.Migration): + + dependencies = [ + ('archaeological_context_records', '0009_auto_20170829_1639'), + ] + + operations = [ + migrations.AddField( + model_name='contextrecord', + name='search_vector', + field=django.contrib.postgres.search.SearchVectorField(blank=True, help_text='Auto filled at save', null=True, verbose_name='Search vector'), + ), + migrations.AddField( + model_name='historicalcontextrecord', + name='search_vector', + field=django.contrib.postgres.search.SearchVectorField(blank=True, help_text='Auto filled at save', null=True, verbose_name='Search vector'), + ), + ] diff --git a/archaeological_files/migrations/0008_auto_20171011_1644.py b/archaeological_files/migrations/0008_auto_20171011_1644.py new file mode 100644 index 000000000..33dfbf59e --- /dev/null +++ b/archaeological_files/migrations/0008_auto_20171011_1644.py @@ -0,0 +1,26 @@ +# -*- coding: utf-8 -*- +# Generated by Django 1.11 on 2017-10-11 16:44 +from __future__ import unicode_literals + +import django.contrib.postgres.search +from django.db import migrations + + +class Migration(migrations.Migration): + + dependencies = [ + ('archaeological_files', '0007_auto_20170826_1152'), + ] + + operations = [ + migrations.AddField( + model_name='file', + name='search_vector', + field=django.contrib.postgres.search.SearchVectorField(blank=True, help_text='Auto filled at save', null=True, verbose_name='Search vector'), + ), + migrations.AddField( + model_name='historicalfile', + name='search_vector', + field=django.contrib.postgres.search.SearchVectorField(blank=True, help_text='Auto filled at save', null=True, verbose_name='Search vector'), + ), + ] diff --git a/archaeological_finds/migrations/0010_auto_20171011_1644.py b/archaeological_finds/migrations/0010_auto_20171011_1644.py new file mode 100644 index 000000000..ce892e96d --- /dev/null +++ b/archaeological_finds/migrations/0010_auto_20171011_1644.py @@ -0,0 +1,61 @@ +# -*- coding: utf-8 -*- +# Generated by Django 1.11 on 2017-10-11 16:44 +from __future__ import unicode_literals + +import django.contrib.postgres.search +from django.db import migrations + + +class Migration(migrations.Migration): + + dependencies = [ + ('archaeological_finds', '0009_auto_20171010_1644'), + ] + + operations = [ + migrations.AddField( + model_name='basefind', + name='search_vector', + field=django.contrib.postgres.search.SearchVectorField(blank=True, help_text='Auto filled at save', null=True, verbose_name='Search vector'), + ), + migrations.AddField( + model_name='find', + name='search_vector', + field=django.contrib.postgres.search.SearchVectorField(blank=True, help_text='Auto filled at save', null=True, verbose_name='Search vector'), + ), + migrations.AddField( + model_name='historicalbasefind', + name='search_vector', + field=django.contrib.postgres.search.SearchVectorField(blank=True, help_text='Auto filled at save', null=True, verbose_name='Search vector'), + ), + migrations.AddField( + model_name='historicalfind', + name='search_vector', + field=django.contrib.postgres.search.SearchVectorField(blank=True, help_text='Auto filled at save', null=True, verbose_name='Search vector'), + ), + migrations.AddField( + model_name='historicaltreatment', + name='search_vector', + field=django.contrib.postgres.search.SearchVectorField(blank=True, help_text='Auto filled at save', null=True, verbose_name='Search vector'), + ), + migrations.AddField( + model_name='historicaltreatmentfile', + name='search_vector', + field=django.contrib.postgres.search.SearchVectorField(blank=True, help_text='Auto filled at save', null=True, verbose_name='Search vector'), + ), + migrations.AddField( + model_name='property', + name='search_vector', + field=django.contrib.postgres.search.SearchVectorField(blank=True, help_text='Auto filled at save', null=True, verbose_name='Search vector'), + ), + migrations.AddField( + model_name='treatment', + name='search_vector', + field=django.contrib.postgres.search.SearchVectorField(blank=True, help_text='Auto filled at save', null=True, verbose_name='Search vector'), + ), + migrations.AddField( + model_name='treatmentfile', + name='search_vector', + field=django.contrib.postgres.search.SearchVectorField(blank=True, help_text='Auto filled at save', null=True, verbose_name='Search vector'), + ), + ] diff --git a/archaeological_operations/migrations/0009_auto_20171011_1644.py b/archaeological_operations/migrations/0009_auto_20171011_1644.py new file mode 100644 index 000000000..18a284a21 --- /dev/null +++ b/archaeological_operations/migrations/0009_auto_20171011_1644.py @@ -0,0 +1,51 @@ +# -*- coding: utf-8 -*- +# Generated by Django 1.11 on 2017-10-11 16:44 +from __future__ import unicode_literals + +import django.contrib.postgres.search +from django.db import migrations + + +class Migration(migrations.Migration): + + dependencies = [ + ('archaeological_operations', '0008_auto_20170829_1639'), + ] + + operations = [ + migrations.AddField( + model_name='administrativeact', + name='search_vector', + field=django.contrib.postgres.search.SearchVectorField(blank=True, help_text='Auto filled at save', null=True, verbose_name='Search vector'), + ), + migrations.AddField( + model_name='archaeologicalsite', + name='search_vector', + field=django.contrib.postgres.search.SearchVectorField(blank=True, help_text='Auto filled at save', null=True, verbose_name='Search vector'), + ), + migrations.AddField( + model_name='historicaladministrativeact', + name='search_vector', + field=django.contrib.postgres.search.SearchVectorField(blank=True, help_text='Auto filled at save', null=True, verbose_name='Search vector'), + ), + migrations.AddField( + model_name='historicaloperation', + name='search_vector', + field=django.contrib.postgres.search.SearchVectorField(blank=True, help_text='Auto filled at save', null=True, verbose_name='Search vector'), + ), + migrations.AddField( + model_name='operation', + name='search_vector', + field=django.contrib.postgres.search.SearchVectorField(blank=True, help_text='Auto filled at save', null=True, verbose_name='Search vector'), + ), + migrations.AddField( + model_name='parcel', + name='search_vector', + field=django.contrib.postgres.search.SearchVectorField(blank=True, help_text='Auto filled at save', null=True, verbose_name='Search vector'), + ), + migrations.AddField( + model_name='parcelowner', + name='search_vector', + field=django.contrib.postgres.search.SearchVectorField(blank=True, help_text='Auto filled at save', null=True, verbose_name='Search vector'), + ), + ] diff --git a/archaeological_operations/models.py b/archaeological_operations/models.py index 54ed96cec..d55a2e689 100644 --- a/archaeological_operations/models.py +++ b/archaeological_operations/models.py @@ -248,6 +248,10 @@ class Operation(ClosedItem, BaseHistorizedItem, ImageModel, OwnPerms, 'archaeological_sites__reference': _(u"Archaeological sites (" u"reference)"), } + BASE_SEARCH_VECTORS = ["scientist__raw_name", "cached_label", + "common_name", "comment", "address", "old_code"] + INT_SEARCH_VECTORS = ["year"] + M2M_SEARCH_VECTORS = ["towns__name"] # fields definition creation_date = models.DateField(_(u"Creation date"), @@ -309,6 +313,7 @@ class Operation(ClosedItem, BaseHistorizedItem, ImageModel, OwnPerms, code_patriarche = models.TextField(u"Code PATRIARCHE", null=True, blank=True, unique=True) TABLE_COLS = ['full_code_patriarche'] + TABLE_COLS + BASE_SEARCH_VECTORS = ['code_patriarche'] + BASE_SEARCH_VECTORS # preventive fnap_financing = models.FloatField(u"Financement FNAP (%)", blank=True, null=True) diff --git a/archaeological_operations/tests.py b/archaeological_operations/tests.py index 0d6908374..91050dc1f 100644 --- a/archaeological_operations/tests.py +++ b/archaeological_operations/tests.py @@ -895,6 +895,21 @@ class OperationTest(TestCase, OperationInitTest): self.assertEqual(ope_id, 'OP2011-1') self.assertEqual(town, self.towns[0].name) + def test_search_vector_update(self): + operation = self.operations[0] + town = self.create_towns({'numero_insee': '12346', 'name': 'Daisy'})[-1] + operation.towns.add(town) + town = self.create_towns( + {'numero_insee': '12347', 'name': 'Dirty old'})[-1] + operation.towns.add(town) + operation = models.Operation.objects.get(pk=operation.pk) + operation.comment = u"Zardoz" + operation.code_patriarche = u"HUIAAA5" + operation.save() + for key in ('old', 'op2010', 'dirty', 'daisy', "'2010'", "zardoz", + "huiaaa5"): + self.assertIn(key, operation.search_vector) + def test_cache_bulk_update(self): if settings.USE_SPATIALITE_FOR_TESTS: # using views - can only be tested with postgresql diff --git a/archaeological_warehouse/migrations/0008_auto_20171011_1644.py b/archaeological_warehouse/migrations/0008_auto_20171011_1644.py new file mode 100644 index 000000000..82245647d --- /dev/null +++ b/archaeological_warehouse/migrations/0008_auto_20171011_1644.py @@ -0,0 +1,36 @@ +# -*- coding: utf-8 -*- +# Generated by Django 1.11 on 2017-10-11 16:44 +from __future__ import unicode_literals + +import django.contrib.postgres.search +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('archaeological_warehouse', '0007_auto_20171004_1125'), + ] + + operations = [ + migrations.AddField( + model_name='collection', + name='search_vector', + field=django.contrib.postgres.search.SearchVectorField(blank=True, help_text='Auto filled at save', null=True, verbose_name='Search vector'), + ), + migrations.AddField( + model_name='container', + name='search_vector', + field=django.contrib.postgres.search.SearchVectorField(blank=True, help_text='Auto filled at save', null=True, verbose_name='Search vector'), + ), + migrations.AddField( + model_name='warehouse', + name='search_vector', + field=django.contrib.postgres.search.SearchVectorField(blank=True, help_text='Auto filled at save', null=True, verbose_name='Search vector'), + ), + migrations.AlterField( + model_name='container', + name='index', + field=models.IntegerField(default=0, verbose_name='Container ID'), + ), + ] diff --git a/example_project/settings.py b/example_project/settings.py index ea50daffb..6ca8cb5fc 100644 --- a/example_project/settings.py +++ b/example_project/settings.py @@ -240,6 +240,7 @@ ISHTAR_PERIODS = {} ISHTAR_PERMIT_TYPES = {} ISHTAR_DOC_TYPES = {u"undefined": u"Undefined"} +ISHTAR_SEARCH_LANGUAGE = "french" ISHTAR_DPTS = [] diff --git a/ishtar_common/migrations/0015_auto_20171011_1644.py b/ishtar_common/migrations/0015_auto_20171011_1644.py new file mode 100644 index 000000000..a9f4499c2 --- /dev/null +++ b/ishtar_common/migrations/0015_auto_20171011_1644.py @@ -0,0 +1,36 @@ +# -*- coding: utf-8 -*- +# Generated by Django 1.11 on 2017-10-11 16:44 +from __future__ import unicode_literals + +import django.contrib.postgres.search +from django.db import migrations + + +class Migration(migrations.Migration): + + dependencies = [ + ('ishtar_common', '0014_ishtarsiteprofile_preservation'), + ] + + operations = [ + migrations.AddField( + model_name='historicalorganization', + name='search_vector', + field=django.contrib.postgres.search.SearchVectorField(blank=True, help_text='Auto filled at save', null=True, verbose_name='Search vector'), + ), + migrations.AddField( + model_name='historicalperson', + name='search_vector', + field=django.contrib.postgres.search.SearchVectorField(blank=True, help_text='Auto filled at save', null=True, verbose_name='Search vector'), + ), + migrations.AddField( + model_name='organization', + name='search_vector', + field=django.contrib.postgres.search.SearchVectorField(blank=True, help_text='Auto filled at save', null=True, verbose_name='Search vector'), + ), + migrations.AddField( + model_name='person', + name='search_vector', + field=django.contrib.postgres.search.SearchVectorField(blank=True, help_text='Auto filled at save', null=True, verbose_name='Search vector'), + ), + ] diff --git a/ishtar_common/models.py b/ishtar_common/models.py index 28a24115b..915415416 100644 --- a/ishtar_common/models.py +++ b/ishtar_common/models.py @@ -35,6 +35,7 @@ import tempfile import time from django.conf import settings +from django.contrib.postgres.search import SearchVectorField, SearchVector from django.core.cache import cache from django.core.exceptions import ObjectDoesNotExist, ValidationError from django.core.files.uploadedfile import SimpleUploadedFile @@ -58,7 +59,7 @@ from simple_history.models import HistoricalRecords as BaseHistoricalRecords from ishtar_common.model_merging import merge_model_objects from ishtar_common.utils import get_cache, disable_for_loaddata, create_slug,\ - get_all_field_names + get_all_field_names, merge_tsvectors from ishtar_common.models_imports import ImporterModel, ImporterType, \ ImporterDefault, ImporterDefaultValues, ImporterColumn, \ @@ -917,9 +918,75 @@ class Imported(models.Model): abstract = True -class BaseHistorizedItem(Imported): +class FullSearch(models.Model): + search_vector = SearchVectorField(_("Search vector"), blank=True, null=True, + help_text=_("Auto filled at save")) + BASE_SEARCH_VECTORS = [] + INT_SEARCH_VECTORS = [] + M2M_SEARCH_VECTORS = [] + + class Meta: + abstract = True + + def update_search_vector(self, save=True): + """ + Update the search vector + :param save: True if you want to save the object immediately + :return: True if modified + """ + if not self.BASE_SEARCH_VECTORS and not self.M2M_SEARCH_VECTORS: + logger.warning("No search_vectors defined for {}".format( + self.__class__)) + return + if getattr(self, '_search_updated', None): + return + self._search_updated = True + + old_search = "" + if self.search_vector: + old_search = self.search_vector[:] + search_vectors = [] + base_q = self.__class__.objects.filter(pk=self.pk) + + # many to many have to be queried one by one otherwise only one is fetch + for M2M_SEARCH_VECTOR in self.M2M_SEARCH_VECTORS: + key = M2M_SEARCH_VECTOR.split('__')[0] + rel_key = getattr(self, key) + for item in rel_key.values('pk').all(): + query_dct = {key + "__pk": item['pk']} + q = copy.copy(base_q).filter(**query_dct) + q = q.annotate( + search=SearchVector( + M2M_SEARCH_VECTOR, + config=settings.ISHTAR_SEARCH_LANGUAGE) + ).values('search') + search_vectors.append(q.all()[0]['search']) + + # int/float are not well managed by the SearchVector + for INT_SEARCH_VECTOR in self.INT_SEARCH_VECTORS: + q = base_q.values(INT_SEARCH_VECTOR) + search_vectors.append( + "'{}':1".format(q.all()[0][INT_SEARCH_VECTOR])) + + # query "simple" fields + q = base_q.annotate( + search=SearchVector( + *self.BASE_SEARCH_VECTORS, + config=settings.ISHTAR_SEARCH_LANGUAGE + )).values('search') + search_vectors.append(q.all()[0]['search']) + self.search_vector = merge_tsvectors(search_vectors) + changed = old_search != self.search_vector + if save and changed: + self.skip_history_when_saving = True + self.save() + return changed + + +class BaseHistorizedItem(FullSearch, Imported): """ - Historized item with external ID management + Historized item with external ID management. + All historized items are searcheable """ IS_BASKET = False EXTERNAL_ID_KEY = '' @@ -1187,6 +1254,7 @@ class LightHistorizedItem(BaseHistorizedItem): super(LightHistorizedItem, self).save(*args, **kwargs) return True + PARSE_FORMULA = re.compile("{([^}]*)}") FORMULA_FILTERS = { @@ -1409,6 +1477,7 @@ def get_current_profile(force=False): def cached_site_changed(sender, **kwargs): get_current_profile(force=True) + post_save.connect(cached_site_changed, sender=IshtarSiteProfile) post_delete.connect(cached_site_changed, sender=IshtarSiteProfile) diff --git a/ishtar_common/utils.py b/ishtar_common/utils.py index c6a4032f0..f3b1a821b 100644 --- a/ishtar_common/utils.py +++ b/ishtar_common/utils.py @@ -104,9 +104,12 @@ def cached_label_changed(sender, **kwargs): setattr(instance, cached_label, lbl) changed = True if changed: + instance._search_updated = False if hasattr(instance, '_cascade_change') and instance._cascade_change: instance.skip_history_when_saving = True instance.save() + if hasattr(instance, 'update_search_vector'): + instance.update_search_vector() updated = False if hasattr(instance, '_cached_labels_bulk_update'): updated = instance._cached_labels_bulk_update() @@ -117,6 +120,7 @@ def cached_label_changed(sender, **kwargs): item.test_obj = instance.test_obj cached_label_changed(item.__class__, instance=item) + SHORTIFY_STR = ugettext(" (...)") @@ -289,3 +293,41 @@ def get_all_related_objects(model): and f.auto_created and not f.concrete ] + +def merge_tsvectors(vectors): + """ + Parse tsvector to merge them in one string + :param vectors: list of tsvector string + :return: merged tsvector + """ + result_dict = {} + for vector in vectors: + if not vector: + continue + + current_position = 0 + if result_dict: + for key in result_dict: + max_position = max(result_dict[key]) + if max_position > current_position: + current_position = max_position + + for dct_member in vector.split(" "): + key, positions = dct_member.split(':') + key = key[1:-1] # remove quotes + positions = [int(pos) + current_position + for pos in positions.split(',')] + if key in result_dict: + result_dict[key] += positions + else: + result_dict[key] = positions + + # {'lamelie': [1, 42, 5]} => {'lamelie': "1,42,5"} + result_dict = {k: ",".join([str(val) for val in result_dict[k]]) + for k in result_dict} + # {'lamelie': "1,5", "hagarde": "2", "regarde": "4"} => + # "'lamelie':1,5 'hagarde':2 'regarde':4" + result = " ".join(["'{}':{}".format(k, result_dict[k]) + for k in result_dict]) + + return result |