From c337f48a8bb2941e206d7504b66a3a4f7299d572 Mon Sep 17 00:00:00 2001 From: Étienne Loks Date: Fri, 28 Sep 2018 08:57:18 +0200 Subject: Manage search with trigrams - 2 --- chimere/migrations/0019_auto_20180927_1659.py | 44 ----------- chimere/migrations/0019_auto_20180928_0848.py | 44 +++++++++++ chimere/models.py | 103 ++++++++++++++++++-------- settings.py | 1 + 4 files changed, 118 insertions(+), 74 deletions(-) delete mode 100644 chimere/migrations/0019_auto_20180927_1659.py create mode 100644 chimere/migrations/0019_auto_20180928_0848.py diff --git a/chimere/migrations/0019_auto_20180927_1659.py b/chimere/migrations/0019_auto_20180927_1659.py deleted file mode 100644 index e64d396..0000000 --- a/chimere/migrations/0019_auto_20180927_1659.py +++ /dev/null @@ -1,44 +0,0 @@ -# -*- coding: utf-8 -*- -# Generated by Django 1.11.5 on 2018-09-27 16:59 -from __future__ import unicode_literals - -from django.db import migrations, models -from django.contrib.postgres.operations import TrigramExtension, \ - UnaccentExtension - - -class Migration(migrations.Migration): - - dependencies = [ - ('chimere', '0018_auto_20180717_1204'), - ] - - operations = [ - TrigramExtension(), - UnaccentExtension(), - migrations.AlterField( - model_name='area', - name='single_click_map', - field=models.BooleanField(default=False, verbose_name='Hide categories after click'), - ), - migrations.AlterField( - model_name='importer', - name='filtr', - field=models.TextField(blank=True, help_text=' ', null=True, verbose_name='Filter'), - ), - migrations.AlterField( - model_name='marker', - name='search_vector', - field=models.TextField(blank=True, help_text='Auto filled at save', null=True, verbose_name='Search vector'), - ), - migrations.AlterField( - model_name='polygon', - name='search_vector', - field=models.TextField(blank=True, help_text='Auto filled at save', null=True, verbose_name='Search vector'), - ), - migrations.AlterField( - model_name='route', - name='search_vector', - field=models.TextField(blank=True, help_text='Auto filled at save', null=True, verbose_name='Search vector'), - ), - ] diff --git a/chimere/migrations/0019_auto_20180928_0848.py b/chimere/migrations/0019_auto_20180928_0848.py new file mode 100644 index 0000000..bf5186f --- /dev/null +++ b/chimere/migrations/0019_auto_20180928_0848.py @@ -0,0 +1,44 @@ +# -*- coding: utf-8 -*- +# Generated by Django 1.11.5 on 2018-09-28 08:48 +from __future__ import unicode_literals + +from django.db import migrations, models +from django.contrib.postgres.operations import TrigramExtension, \ + UnaccentExtension + + +class Migration(migrations.Migration): + + dependencies = [ + ('chimere', '0018_auto_20180717_1204'), + ] + + operations = [ + TrigramExtension(), + UnaccentExtension(), + migrations.AddField( + model_name='marker', + name='search_for_trigram', + field=models.TextField(blank=True, help_text='Auto filled at save', null=True, verbose_name='Search for trigram'), + ), + migrations.AddField( + model_name='polygon', + name='search_for_trigram', + field=models.TextField(blank=True, help_text='Auto filled at save', null=True, verbose_name='Search for trigram'), + ), + migrations.AddField( + model_name='route', + name='search_for_trigram', + field=models.TextField(blank=True, help_text='Auto filled at save', null=True, verbose_name='Search for trigram'), + ), + migrations.AlterField( + model_name='area', + name='single_click_map', + field=models.BooleanField(default=False, verbose_name='Hide categories after click'), + ), + migrations.AlterField( + model_name='importer', + name='filtr', + field=models.TextField(blank=True, help_text=' ', null=True, verbose_name='Filter'), + ), + ] diff --git a/chimere/models.py b/chimere/models.py index 7a803dc..a052b66 100644 --- a/chimere/models.py +++ b/chimere/models.py @@ -623,8 +623,10 @@ class GeographicItem(models.Model): _("Normalised weight"), blank=True, null=True, help_text=_("The weight normalised to be between 0 and 1. " "Automatically recalculated.")) - search_vector = models.TextField( - _("Search vector"), blank=True, null=True, + search_vector = SearchVectorField(_("Search vector"), blank=True, null=True, + help_text=_("Auto filled at save")) + search_for_trigram = models.TextField( + _("Search for trigram"), blank=True, null=True, help_text=_("Auto filled at save")) geom_attr = '' @@ -1021,28 +1023,35 @@ class GeographicItem(models.Model): """ subcats = SubCategory.getAvailable(instance=True, area=area) base_query = cls.objects.filter(categories__in=subcats) - search_factors = [('categories__name', 1), - ('categories__category__name', 1), - ('name', 1), - ('keywords', 0.5), - ('categories__keywords', 0.5), - ('search_vector', 0.25)] - - annotate_q = dict( - [("similarity_" + key, TrigramSimilarity(key, query)) - for key, fac in search_factors]) - for limit in [0.3, 0.1]: - filtr, order = None, [] - for key, factor in search_factors: - q = Q(**{"similarity_" + key + "__gt": limit / factor}) - if not filtr: - filtr = q - else: - filtr |= q - order.append("similarity_" + key) - q = base_query.annotate(**annotate_q).filter(filtr).order_by(*order) - if q.count(): - break + if settings.CHIMERE_SEARCH_TECH == 'search_vector': + q = cls.objects.filter(categories__in=subcats, + search_vector=SearchQuery( + query, + config=settings.CHIMERE_SEARCH_LANGUAGE)) + else: + search_factors = [('categories__name', 1), + ('categories__category__name', 1), + ('name', 1), + ('keywords', 0.5), + ('categories__keywords', 0.5), + ('search_for_trigram', 0.25)] + + annotate_q = dict( + [("similarity_" + key, TrigramSimilarity(key, query)) + for key, fac in search_factors]) + for limit in [0.3, 0.1]: + filtr, order = None, [] + for key, factor in search_factors: + q = Q(**{"similarity_" + key + "__gt": limit / factor}) + if not filtr: + filtr = q + else: + filtr |= q + order.append("similarity_" + key) + q = base_query.annotate(**annotate_q).filter(filtr).order_by( + *order) + if q.count(): + break if get_json: return cls.getGeoJSONs(q, slice=slice, check_next=check_next) if slice: @@ -1061,30 +1070,64 @@ class GeographicItem(models.Model): :param save: True if you want to save the object immediately :return: True if modified """ - old_search = "" + old_search, old_trigram = "", "" if self.search_vector: old_search = self.search_vector[:] + if self.search_for_trigram: + old_trigram = self.search_for_trigram[:] search_vectors = [] + search_for_trigram = [] + base_q = self.__class__.objects.filter(pk=self.pk) cat_keys = ['categories__name', 'categories__keywords', 'categories__category__name'] for cat in self.categories.values('pk').all(): + q = copy.copy(base_q).filter(categories__pk=cat['pk']) + q = q.annotate( + search=SearchVector( + 'categories__name', + 'categories__keywords', + 'categories__category__name', + config=settings.CHIMERE_SEARCH_LANGUAGE + )) + search_vectors.append(q.all()[0].search) + q = copy.copy(base_q).filter(categories__pk=cat['pk']) q = q.values(*cat_keys) item = q.all()[0] - search_vectors += [item[key] for key in cat_keys if item[key]] + search_for_trigram += [item[key] for key in cat_keys if item[key]] for pm in self.properties.values('pk').all(): + q = copy.copy(base_q).filter(properties__pk=pm['pk']) + q = q.annotate( + search=SearchVector( + 'properties__search_value', + config=settings.CHIMERE_SEARCH_LANGUAGE + )) + search_vectors.append(q.all()[0].search) + q = copy.copy(base_q).filter(properties__pk=pm['pk']) q = q.values('properties__search_value') item = q.all()[0] if item['properties__search_value']: - search_vectors.append(item['properties__search_value']) + search_for_trigram.append(item['properties__search_value']) + q = copy.copy(base_q).annotate( + search=SearchVector( + 'name', 'description', 'keywords', + config=settings.CHIMERE_SEARCH_LANGUAGE + )) + search_vectors.append(q.all()[0].search) + self.search_vector = utils.merge_tsvectors(search_vectors) + base_keys = ['name', 'description', 'keywords'] q = base_q.values(*base_keys) item = q.all()[0] - search_vectors += [item[key] for key in base_keys if item[key]] - self.search_vector = " ".join(set(search_vectors)) - changed = old_search != self.search_vector + search_for_trigram += [item[key] for key in base_keys if item[key]] + # deduplicate + search_for_trigram = " ".join(set(search_for_trigram)).split(" ") + self.search_for_trigram = " ".join(set(search_for_trigram)) + + changed = (old_search != self.search_vector) or ( + old_trigram != self.search_for_trigram) if save and changed: self.save() return changed diff --git a/settings.py b/settings.py index 878d1cd..ebb64b6 100644 --- a/settings.py +++ b/settings.py @@ -136,6 +136,7 @@ CHIMERE_THUMBS_SCALE_WIDTH = None # search engine CHIMERE_SEARCH_LANGUAGE = 'french' +CHIMERE_SEARCH_TECH = 'search_vector' # "search_vector" or "trigram" HAYSTACK_SEARCH_ENGINE = False HAYSTACK_CONNECTIONS = { -- cgit v1.2.3