summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorÉtienne Loks <etienne.loks@iggdrasil.net>2018-09-28 08:57:18 +0200
committerÉtienne Loks <etienne.loks@iggdrasil.net>2018-09-28 09:05:15 +0200
commitc337f48a8bb2941e206d7504b66a3a4f7299d572 (patch)
tree6c9dd01b94af1eca3a6e55dafb6d3cd9a5ff92aa
parent5fe8cb4248935e7adc62830ebb73c32048367983 (diff)
downloadChimère-c337f48a8bb2941e206d7504b66a3a4f7299d572.tar.bz2
Chimère-c337f48a8bb2941e206d7504b66a3a4f7299d572.zip
Manage search with trigrams - 2
-rw-r--r--chimere/migrations/0019_auto_20180928_0848.py (renamed from chimere/migrations/0019_auto_20180927_1659.py)32
-rw-r--r--chimere/models.py103
-rw-r--r--settings.py1
3 files changed, 90 insertions, 46 deletions
diff --git a/chimere/migrations/0019_auto_20180927_1659.py b/chimere/migrations/0019_auto_20180928_0848.py
index e64d396..bf5186f 100644
--- a/chimere/migrations/0019_auto_20180927_1659.py
+++ b/chimere/migrations/0019_auto_20180928_0848.py
@@ -1,5 +1,5 @@
# -*- coding: utf-8 -*-
-# Generated by Django 1.11.5 on 2018-09-27 16:59
+# Generated by Django 1.11.5 on 2018-09-28 08:48
from __future__ import unicode_literals
from django.db import migrations, models
@@ -16,6 +16,21 @@ class Migration(migrations.Migration):
operations = [
TrigramExtension(),
UnaccentExtension(),
+ migrations.AddField(
+ model_name='marker',
+ name='search_for_trigram',
+ field=models.TextField(blank=True, help_text='Auto filled at save', null=True, verbose_name='Search for trigram'),
+ ),
+ migrations.AddField(
+ model_name='polygon',
+ name='search_for_trigram',
+ field=models.TextField(blank=True, help_text='Auto filled at save', null=True, verbose_name='Search for trigram'),
+ ),
+ migrations.AddField(
+ model_name='route',
+ name='search_for_trigram',
+ field=models.TextField(blank=True, help_text='Auto filled at save', null=True, verbose_name='Search for trigram'),
+ ),
migrations.AlterField(
model_name='area',
name='single_click_map',
@@ -26,19 +41,4 @@ class Migration(migrations.Migration):
name='filtr',
field=models.TextField(blank=True, help_text=' ', null=True, verbose_name='Filter'),
),
- migrations.AlterField(
- model_name='marker',
- name='search_vector',
- field=models.TextField(blank=True, help_text='Auto filled at save', null=True, verbose_name='Search vector'),
- ),
- migrations.AlterField(
- model_name='polygon',
- name='search_vector',
- field=models.TextField(blank=True, help_text='Auto filled at save', null=True, verbose_name='Search vector'),
- ),
- migrations.AlterField(
- model_name='route',
- name='search_vector',
- field=models.TextField(blank=True, help_text='Auto filled at save', null=True, verbose_name='Search vector'),
- ),
]
diff --git a/chimere/models.py b/chimere/models.py
index 7a803dc..a052b66 100644
--- a/chimere/models.py
+++ b/chimere/models.py
@@ -623,8 +623,10 @@ class GeographicItem(models.Model):
_("Normalised weight"), blank=True, null=True,
help_text=_("The weight normalised to be between 0 and 1. "
"Automatically recalculated."))
- search_vector = models.TextField(
- _("Search vector"), blank=True, null=True,
+ search_vector = SearchVectorField(_("Search vector"), blank=True, null=True,
+ help_text=_("Auto filled at save"))
+ search_for_trigram = models.TextField(
+ _("Search for trigram"), blank=True, null=True,
help_text=_("Auto filled at save"))
geom_attr = ''
@@ -1021,28 +1023,35 @@ class GeographicItem(models.Model):
"""
subcats = SubCategory.getAvailable(instance=True, area=area)
base_query = cls.objects.filter(categories__in=subcats)
- search_factors = [('categories__name', 1),
- ('categories__category__name', 1),
- ('name', 1),
- ('keywords', 0.5),
- ('categories__keywords', 0.5),
- ('search_vector', 0.25)]
-
- annotate_q = dict(
- [("similarity_" + key, TrigramSimilarity(key, query))
- for key, fac in search_factors])
- for limit in [0.3, 0.1]:
- filtr, order = None, []
- for key, factor in search_factors:
- q = Q(**{"similarity_" + key + "__gt": limit / factor})
- if not filtr:
- filtr = q
- else:
- filtr |= q
- order.append("similarity_" + key)
- q = base_query.annotate(**annotate_q).filter(filtr).order_by(*order)
- if q.count():
- break
+ if settings.CHIMERE_SEARCH_TECH == 'search_vector':
+ q = cls.objects.filter(categories__in=subcats,
+ search_vector=SearchQuery(
+ query,
+ config=settings.CHIMERE_SEARCH_LANGUAGE))
+ else:
+ search_factors = [('categories__name', 1),
+ ('categories__category__name', 1),
+ ('name', 1),
+ ('keywords', 0.5),
+ ('categories__keywords', 0.5),
+ ('search_for_trigram', 0.25)]
+
+ annotate_q = dict(
+ [("similarity_" + key, TrigramSimilarity(key, query))
+ for key, fac in search_factors])
+ for limit in [0.3, 0.1]:
+ filtr, order = None, []
+ for key, factor in search_factors:
+ q = Q(**{"similarity_" + key + "__gt": limit / factor})
+ if not filtr:
+ filtr = q
+ else:
+ filtr |= q
+ order.append("similarity_" + key)
+ q = base_query.annotate(**annotate_q).filter(filtr).order_by(
+ *order)
+ if q.count():
+ break
if get_json:
return cls.getGeoJSONs(q, slice=slice, check_next=check_next)
if slice:
@@ -1061,30 +1070,64 @@ class GeographicItem(models.Model):
:param save: True if you want to save the object immediately
:return: True if modified
"""
- old_search = ""
+ old_search, old_trigram = "", ""
if self.search_vector:
old_search = self.search_vector[:]
+ if self.search_for_trigram:
+ old_trigram = self.search_for_trigram[:]
search_vectors = []
+ search_for_trigram = []
+
base_q = self.__class__.objects.filter(pk=self.pk)
cat_keys = ['categories__name', 'categories__keywords',
'categories__category__name']
for cat in self.categories.values('pk').all():
q = copy.copy(base_q).filter(categories__pk=cat['pk'])
+ q = q.annotate(
+ search=SearchVector(
+ 'categories__name',
+ 'categories__keywords',
+ 'categories__category__name',
+ config=settings.CHIMERE_SEARCH_LANGUAGE
+ ))
+ search_vectors.append(q.all()[0].search)
+
+ q = copy.copy(base_q).filter(categories__pk=cat['pk'])
q = q.values(*cat_keys)
item = q.all()[0]
- search_vectors += [item[key] for key in cat_keys if item[key]]
+ search_for_trigram += [item[key] for key in cat_keys if item[key]]
for pm in self.properties.values('pk').all():
q = copy.copy(base_q).filter(properties__pk=pm['pk'])
+ q = q.annotate(
+ search=SearchVector(
+ 'properties__search_value',
+ config=settings.CHIMERE_SEARCH_LANGUAGE
+ ))
+ search_vectors.append(q.all()[0].search)
+
+ q = copy.copy(base_q).filter(properties__pk=pm['pk'])
q = q.values('properties__search_value')
item = q.all()[0]
if item['properties__search_value']:
- search_vectors.append(item['properties__search_value'])
+ search_for_trigram.append(item['properties__search_value'])
+ q = copy.copy(base_q).annotate(
+ search=SearchVector(
+ 'name', 'description', 'keywords',
+ config=settings.CHIMERE_SEARCH_LANGUAGE
+ ))
+ search_vectors.append(q.all()[0].search)
+ self.search_vector = utils.merge_tsvectors(search_vectors)
+
base_keys = ['name', 'description', 'keywords']
q = base_q.values(*base_keys)
item = q.all()[0]
- search_vectors += [item[key] for key in base_keys if item[key]]
- self.search_vector = " ".join(set(search_vectors))
- changed = old_search != self.search_vector
+ search_for_trigram += [item[key] for key in base_keys if item[key]]
+ # deduplicate
+ search_for_trigram = " ".join(set(search_for_trigram)).split(" ")
+ self.search_for_trigram = " ".join(set(search_for_trigram))
+
+ changed = (old_search != self.search_vector) or (
+ old_trigram != self.search_for_trigram)
if save and changed:
self.save()
return changed
diff --git a/settings.py b/settings.py
index 878d1cd..ebb64b6 100644
--- a/settings.py
+++ b/settings.py
@@ -136,6 +136,7 @@ CHIMERE_THUMBS_SCALE_WIDTH = None
# search engine
CHIMERE_SEARCH_LANGUAGE = 'french'
+CHIMERE_SEARCH_TECH = 'search_vector' # "search_vector" or "trigram"
HAYSTACK_SEARCH_ENGINE = False
HAYSTACK_CONNECTIONS = {