diff options
author | Étienne Loks <etienne.loks@iggdrasil.net> | 2018-09-28 08:57:18 +0200 |
---|---|---|
committer | Étienne Loks <etienne.loks@iggdrasil.net> | 2018-09-28 09:05:15 +0200 |
commit | c337f48a8bb2941e206d7504b66a3a4f7299d572 (patch) | |
tree | 6c9dd01b94af1eca3a6e55dafb6d3cd9a5ff92aa /chimere/models.py | |
parent | 5fe8cb4248935e7adc62830ebb73c32048367983 (diff) | |
download | Chimère-c337f48a8bb2941e206d7504b66a3a4f7299d572.tar.bz2 Chimère-c337f48a8bb2941e206d7504b66a3a4f7299d572.zip |
Manage search with trigrams - 2
Diffstat (limited to 'chimere/models.py')
-rw-r--r-- | chimere/models.py | 103 |
1 files changed, 73 insertions, 30 deletions
diff --git a/chimere/models.py b/chimere/models.py index 7a803dc..a052b66 100644 --- a/chimere/models.py +++ b/chimere/models.py @@ -623,8 +623,10 @@ class GeographicItem(models.Model): _("Normalised weight"), blank=True, null=True, help_text=_("The weight normalised to be between 0 and 1. " "Automatically recalculated.")) - search_vector = models.TextField( - _("Search vector"), blank=True, null=True, + search_vector = SearchVectorField(_("Search vector"), blank=True, null=True, + help_text=_("Auto filled at save")) + search_for_trigram = models.TextField( + _("Search for trigram"), blank=True, null=True, help_text=_("Auto filled at save")) geom_attr = '' @@ -1021,28 +1023,35 @@ class GeographicItem(models.Model): """ subcats = SubCategory.getAvailable(instance=True, area=area) base_query = cls.objects.filter(categories__in=subcats) - search_factors = [('categories__name', 1), - ('categories__category__name', 1), - ('name', 1), - ('keywords', 0.5), - ('categories__keywords', 0.5), - ('search_vector', 0.25)] - - annotate_q = dict( - [("similarity_" + key, TrigramSimilarity(key, query)) - for key, fac in search_factors]) - for limit in [0.3, 0.1]: - filtr, order = None, [] - for key, factor in search_factors: - q = Q(**{"similarity_" + key + "__gt": limit / factor}) - if not filtr: - filtr = q - else: - filtr |= q - order.append("similarity_" + key) - q = base_query.annotate(**annotate_q).filter(filtr).order_by(*order) - if q.count(): - break + if settings.CHIMERE_SEARCH_TECH == 'search_vector': + q = cls.objects.filter(categories__in=subcats, + search_vector=SearchQuery( + query, + config=settings.CHIMERE_SEARCH_LANGUAGE)) + else: + search_factors = [('categories__name', 1), + ('categories__category__name', 1), + ('name', 1), + ('keywords', 0.5), + ('categories__keywords', 0.5), + ('search_for_trigram', 0.25)] + + annotate_q = dict( + [("similarity_" + key, TrigramSimilarity(key, query)) + for key, fac in search_factors]) + for limit in [0.3, 0.1]: + filtr, order = None, [] + for key, factor in search_factors: + q = Q(**{"similarity_" + key + "__gt": limit / factor}) + if not filtr: + filtr = q + else: + filtr |= q + order.append("similarity_" + key) + q = base_query.annotate(**annotate_q).filter(filtr).order_by( + *order) + if q.count(): + break if get_json: return cls.getGeoJSONs(q, slice=slice, check_next=check_next) if slice: @@ -1061,30 +1070,64 @@ class GeographicItem(models.Model): :param save: True if you want to save the object immediately :return: True if modified """ - old_search = "" + old_search, old_trigram = "", "" if self.search_vector: old_search = self.search_vector[:] + if self.search_for_trigram: + old_trigram = self.search_for_trigram[:] search_vectors = [] + search_for_trigram = [] + base_q = self.__class__.objects.filter(pk=self.pk) cat_keys = ['categories__name', 'categories__keywords', 'categories__category__name'] for cat in self.categories.values('pk').all(): q = copy.copy(base_q).filter(categories__pk=cat['pk']) + q = q.annotate( + search=SearchVector( + 'categories__name', + 'categories__keywords', + 'categories__category__name', + config=settings.CHIMERE_SEARCH_LANGUAGE + )) + search_vectors.append(q.all()[0].search) + + q = copy.copy(base_q).filter(categories__pk=cat['pk']) q = q.values(*cat_keys) item = q.all()[0] - search_vectors += [item[key] for key in cat_keys if item[key]] + search_for_trigram += [item[key] for key in cat_keys if item[key]] for pm in self.properties.values('pk').all(): q = copy.copy(base_q).filter(properties__pk=pm['pk']) + q = q.annotate( + search=SearchVector( + 'properties__search_value', + config=settings.CHIMERE_SEARCH_LANGUAGE + )) + search_vectors.append(q.all()[0].search) + + q = copy.copy(base_q).filter(properties__pk=pm['pk']) q = q.values('properties__search_value') item = q.all()[0] if item['properties__search_value']: - search_vectors.append(item['properties__search_value']) + search_for_trigram.append(item['properties__search_value']) + q = copy.copy(base_q).annotate( + search=SearchVector( + 'name', 'description', 'keywords', + config=settings.CHIMERE_SEARCH_LANGUAGE + )) + search_vectors.append(q.all()[0].search) + self.search_vector = utils.merge_tsvectors(search_vectors) + base_keys = ['name', 'description', 'keywords'] q = base_q.values(*base_keys) item = q.all()[0] - search_vectors += [item[key] for key in base_keys if item[key]] - self.search_vector = " ".join(set(search_vectors)) - changed = old_search != self.search_vector + search_for_trigram += [item[key] for key in base_keys if item[key]] + # deduplicate + search_for_trigram = " ".join(set(search_for_trigram)).split(" ") + self.search_for_trigram = " ".join(set(search_for_trigram)) + + changed = (old_search != self.search_vector) or ( + old_trigram != self.search_for_trigram) if save and changed: self.save() return changed |