diff options
author | Étienne Loks <etienne.loks@iggdrasil.net> | 2018-09-27 22:43:59 +0200 |
---|---|---|
committer | Étienne Loks <etienne.loks@iggdrasil.net> | 2018-09-27 22:43:59 +0200 |
commit | 5fe8cb4248935e7adc62830ebb73c32048367983 (patch) | |
tree | c1bd192f33983de8e3ed8d0c82087daf4dd17d94 /chimere/models.py | |
parent | a324a90b4c8718e8f6133e2ad11c5b78724138d6 (diff) | |
download | Chimère-5fe8cb4248935e7adc62830ebb73c32048367983.tar.bz2 Chimère-5fe8cb4248935e7adc62830ebb73c32048367983.zip |
Manage search with trigrams
Diffstat (limited to 'chimere/models.py')
-rw-r--r-- | chimere/models.py | 69 |
1 files changed, 41 insertions, 28 deletions
diff --git a/chimere/models.py b/chimere/models.py index 3e877ed..7a803dc 100644 --- a/chimere/models.py +++ b/chimere/models.py @@ -39,7 +39,7 @@ from django.conf import settings from django.contrib.auth.models import User, Permission, ContentType, Group from django.contrib.gis.db import models from django.contrib.postgres.search import SearchVectorField, SearchVector, \ - SearchQuery + SearchQuery, TrigramSimilarity from django.core.files import File from django.core.exceptions import ObjectDoesNotExist from django.core.urlresolvers import reverse, NoReverseMatch @@ -623,8 +623,9 @@ class GeographicItem(models.Model): _("Normalised weight"), blank=True, null=True, help_text=_("The weight normalised to be between 0 and 1. " "Automatically recalculated.")) - search_vector = SearchVectorField(_("Search vector"), blank=True, null=True, - help_text=_("Auto filled at save")) + search_vector = models.TextField( + _("Search vector"), blank=True, null=True, + help_text=_("Auto filled at save")) geom_attr = '' default_values = {} @@ -1019,10 +1020,29 @@ class GeographicItem(models.Model): also return if next items are available """ subcats = SubCategory.getAvailable(instance=True, area=area) - q = cls.objects.filter(categories__in=subcats, - search_vector=SearchQuery( - query, - config=settings.CHIMERE_SEARCH_LANGUAGE)) + base_query = cls.objects.filter(categories__in=subcats) + search_factors = [('categories__name', 1), + ('categories__category__name', 1), + ('name', 1), + ('keywords', 0.5), + ('categories__keywords', 0.5), + ('search_vector', 0.25)] + + annotate_q = dict( + [("similarity_" + key, TrigramSimilarity(key, query)) + for key, fac in search_factors]) + for limit in [0.3, 0.1]: + filtr, order = None, [] + for key, factor in search_factors: + q = Q(**{"similarity_" + key + "__gt": limit / factor}) + if not filtr: + filtr = q + else: + filtr |= q + order.append("similarity_" + key) + q = base_query.annotate(**annotate_q).filter(filtr).order_by(*order) + if q.count(): + break if get_json: return cls.getGeoJSONs(q, slice=slice, check_next=check_next) if slice: @@ -1046,31 +1066,24 @@ class GeographicItem(models.Model): old_search = self.search_vector[:] search_vectors = [] base_q = self.__class__.objects.filter(pk=self.pk) + cat_keys = ['categories__name', 'categories__keywords', + 'categories__category__name'] for cat in self.categories.values('pk').all(): q = copy.copy(base_q).filter(categories__pk=cat['pk']) - q = q.annotate( - search=SearchVector( - 'categories__name', - 'categories__keywords', - 'categories__category__name', - config=settings.CHIMERE_SEARCH_LANGUAGE - )) - search_vectors.append(q.all()[0].search) + q = q.values(*cat_keys) + item = q.all()[0] + search_vectors += [item[key] for key in cat_keys if item[key]] for pm in self.properties.values('pk').all(): q = copy.copy(base_q).filter(properties__pk=pm['pk']) - q = q.annotate( - search=SearchVector( - 'properties__search_value', - config=settings.CHIMERE_SEARCH_LANGUAGE - )) - search_vectors.append(q.all()[0].search) - q = base_q.annotate( - search=SearchVector( - 'name', 'description', 'keywords', - config=settings.CHIMERE_SEARCH_LANGUAGE - )) - search_vectors.append(q.all()[0].search) - self.search_vector = utils.merge_tsvectors(search_vectors) + q = q.values('properties__search_value') + item = q.all()[0] + if item['properties__search_value']: + search_vectors.append(item['properties__search_value']) + base_keys = ['name', 'description', 'keywords'] + q = base_q.values(*base_keys) + item = q.all()[0] + search_vectors += [item[key] for key in base_keys if item[key]] + self.search_vector = " ".join(set(search_vectors)) changed = old_search != self.search_vector if save and changed: self.save() |