summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorÉtienne Loks <etienne.loks@iggdrasil.net>2023-01-18 23:33:56 +0100
committerÉtienne Loks <etienne.loks@iggdrasil.net>2023-01-19 18:06:55 +0100
commit34e5e81e7afe240b05f2df834d21e08dbb4bcf79 (patch)
treed27d1e598f6e665f0bd624cc1523440008ba7697
parent803ce58a52cf818e874954cbf89237ba819027bc (diff)
downloadIshtar-34e5e81e7afe240b05f2df834d21e08dbb4bcf79.tar.bz2
Ishtar-34e5e81e7afe240b05f2df834d21e08dbb4bcf79.zip
Free search: "raw" index for reference - improve parent only search
-rw-r--r--CHANGES.md5
-rw-r--r--archaeological_context_records/models.py4
-rw-r--r--archaeological_context_records/tests.py5
-rw-r--r--archaeological_files/models.py4
-rw-r--r--archaeological_finds/models_finds.py21
-rw-r--r--archaeological_operations/models.py37
-rw-r--r--archaeological_warehouse/models.py9
-rw-r--r--ishtar_common/management/commands/ishtar_maintenance.py2
-rw-r--r--ishtar_common/models.py6
-rw-r--r--ishtar_common/models_common.py59
-rw-r--r--ishtar_common/utils.py11
-rw-r--r--ishtar_common/views_item.py6
12 files changed, 116 insertions, 53 deletions
diff --git a/CHANGES.md b/CHANGES.md
index 111f1d5cd..d4f072642 100644
--- a/CHANGES.md
+++ b/CHANGES.md
@@ -6,6 +6,11 @@ date: 2023-01-18
Ishtar changelog
================
+### Features/improvements ###
+- Free search:
+ - "raw" index for reference (add in index whole reference and split ref)
+ - improve parent only search index
+
v4.0.35 - 2023-01-18
--------------------
diff --git a/archaeological_context_records/models.py b/archaeological_context_records/models.py
index 72a835cc2..074bc3358 100644
--- a/archaeological_context_records/models.py
+++ b/archaeological_context_records/models.py
@@ -635,10 +635,10 @@ class ContextRecord(
PARENT_ONLY_SEARCH_VECTORS = ["operation", "archaeological_site", "parcel"]
BASE_SEARCH_VECTORS = [
- SearchVectorConfig("cached_label"),
- SearchVectorConfig("label"),
+ SearchVectorConfig("label", "raw"),
SearchVectorConfig("location"),
SearchVectorConfig("town__name"),
+ SearchVectorConfig("town__numero_insee", "raw"),
SearchVectorConfig("interpretation", "local"),
SearchVectorConfig("filling", "local"),
SearchVectorConfig("datings_comment", "local"),
diff --git a/archaeological_context_records/tests.py b/archaeological_context_records/tests.py
index 9842340f3..f0b12e0d6 100644
--- a/archaeological_context_records/tests.py
+++ b/archaeological_context_records/tests.py
@@ -429,8 +429,9 @@ class ContextRecordTest(ContextRecordInit, TestCase):
self.assertIsNotNone(cr.search_vector)
for key in ("label", "heeer"):
self.assertIn(key, cr.search_vector)
- cr.operation.code_patriarche = "PATRIARCHE"
- cr.operation.save()
+ operation = models_ope.Operation.objects.get(pk=cr.operation.pk)
+ operation.code_patriarche = "PATRIARCHE"
+ operation.save()
cr = models.ContextRecord.objects.get(pk=cr.pk)
profile = get_current_profile()
diff --git a/archaeological_files/models.py b/archaeological_files/models.py
index c2992935b..bb31c1c24 100644
--- a/archaeological_files/models.py
+++ b/archaeological_files/models.py
@@ -417,11 +417,11 @@ class File(
}
BASE_SEARCH_VECTORS = [
SearchVectorConfig("name"),
- SearchVectorConfig("internal_reference"),
+ SearchVectorConfig("internal_reference", "raw"),
SearchVectorConfig("file_type__label"),
SearchVectorConfig("saisine_type__label"),
SearchVectorConfig("permit_type__label"),
- SearchVectorConfig("permit_reference"),
+ SearchVectorConfig("permit_reference", "raw"),
SearchVectorConfig("comment", "local"),
SearchVectorConfig("research_comment", "local"),
]
diff --git a/archaeological_finds/models_finds.py b/archaeological_finds/models_finds.py
index 0efb9963e..6be609056 100644
--- a/archaeological_finds/models_finds.py
+++ b/archaeological_finds/models_finds.py
@@ -459,12 +459,12 @@ class BaseFind(
CACHED_COMPLETE_ID = "cache_complete_id"
PARENT_SEARCH_VECTORS = ["context_record"]
BASE_SEARCH_VECTORS = [
- SearchVectorConfig("label"),
+ SearchVectorConfig("label", "raw"),
SearchVectorConfig("description", "local"),
SearchVectorConfig("comment", "local"),
- SearchVectorConfig("cache_short_id"),
- SearchVectorConfig("cache_complete_id"),
- SearchVectorConfig("excavation_id"),
+ SearchVectorConfig("cache_short_id", "raw"),
+ SearchVectorConfig("cache_complete_id", "raw"),
+ SearchVectorConfig("excavation_id", "raw"),
]
DOC_VALUES = [
@@ -1652,19 +1652,18 @@ class Find(
"""
PARENT_SEARCH_VECTORS = ["base_finds"]
+ PARENT_ONLY_SEARCH_VECTORS = ["container"]
BASE_SEARCH_VECTORS = [
- SearchVectorConfig("cached_label"),
- SearchVectorConfig("label"),
+ SearchVectorConfig("cached_label", "raw"),
+ SearchVectorConfig("label", "raw"),
SearchVectorConfig("description", "local"),
- SearchVectorConfig("container__location__name"),
- SearchVectorConfig("container__reference"),
SearchVectorConfig("mark"),
SearchVectorConfig("comment", "local"),
SearchVectorConfig("dating_comment", "local"),
- SearchVectorConfig("previous_id"),
+ SearchVectorConfig("previous_id", "raw"),
SearchVectorConfig("denomination"),
- SearchVectorConfig("museum_id"),
- SearchVectorConfig("laboratory_id"),
+ SearchVectorConfig("museum_id", "raw"),
+ SearchVectorConfig("laboratory_id", "raw"),
SearchVectorConfig("decoration"),
SearchVectorConfig("manufacturing_place"),
]
diff --git a/archaeological_operations/models.py b/archaeological_operations/models.py
index caaff33d9..191918237 100644
--- a/archaeological_operations/models.py
+++ b/archaeological_operations/models.py
@@ -326,19 +326,20 @@ class ArchaeologicalSite(
SearchVectorConfig("locality_ngi", "local"),
SearchVectorConfig("name"),
SearchVectorConfig("oceanographic_service_localisation"),
- SearchVectorConfig("reference"),
- SearchVectorConfig("other_reference"),
- SearchVectorConfig("shipwreck_code"),
+ SearchVectorConfig("reference", "raw"),
+ SearchVectorConfig("other_reference", "raw"),
+ SearchVectorConfig("shipwreck_code", "raw"),
SearchVectorConfig("shipwreck_name"),
- SearchVectorConfig("drassm_number"),
- SearchVectorConfig("affmar_number"),
+ SearchVectorConfig("drassm_number", "raw"),
+ SearchVectorConfig("affmar_number", "raw"),
]
M2M_SEARCH_VECTORS = [
SearchVectorConfig("periods__label", "local"),
SearchVectorConfig("remains__label", "local"),
SearchVectorConfig("towns__name"),
+ SearchVectorConfig("towns__numero_insee", "raw"),
]
- PARENT_SEARCH_VECTORS = ["operations"]
+ PARENT_ONLY_SEARCH_VECTORS = ["operations"]
DATED_FIELDS = BaseHistorizedItem.DATED_FIELDS + ["sinking_date"]
@@ -907,6 +908,15 @@ class ParcelItem:
parcels[key] = p
+def add_oa_prefix(value):
+ if not value:
+ return ""
+ profile = get_current_profile()
+ if not profile.operation_prefix:
+ return ""
+ return profile.operation_prefix + value
+
+
class Operation(
ClosedItem,
DocumentItem,
@@ -1035,25 +1045,26 @@ class Operation(
BASE_SEARCH_VECTORS = [
SearchVectorConfig("abstract", "local"),
SearchVectorConfig("address", "local"),
- SearchVectorConfig("code_patriarche"),
+ SearchVectorConfig("code_patriarche", "raw"),
+ SearchVectorConfig("code_patriarche", "raw", func=add_oa_prefix),
SearchVectorConfig("comment", "local"),
SearchVectorConfig("common_name"),
SearchVectorConfig("common_name", "local"),
SearchVectorConfig("in_charge__cached_label"),
SearchVectorConfig("protagonist__cached_label"),
- SearchVectorConfig("official_report_number"),
- SearchVectorConfig("old_code"),
+ SearchVectorConfig("official_report_number", "raw"),
+ SearchVectorConfig("old_code", "raw"),
SearchVectorConfig("operation_type__label"),
- SearchVectorConfig("operator_reference"),
+ SearchVectorConfig("operator_reference", "raw"),
SearchVectorConfig("operator__cached_label"),
SearchVectorConfig("scientist__cached_label"),
SearchVectorConfig("scientific_documentation_comment", "local"),
SearchVectorConfig("seizure_name"),
- SearchVectorConfig("drassm_code"),
+ SearchVectorConfig("drassm_code", "raw"),
]
PROPERTY_SEARCH_VECTORS = [
- SearchVectorConfig("full_reference"),
- SearchVectorConfig("short_code_patriarche"),
+ SearchVectorConfig("full_reference", "raw"),
+ SearchVectorConfig("short_code_patriarche", "raw"),
]
INT_SEARCH_VECTORS = [
SearchVectorConfig("year"),
diff --git a/archaeological_warehouse/models.py b/archaeological_warehouse/models.py
index 9a8bc86c2..0997d6ea7 100644
--- a/archaeological_warehouse/models.py
+++ b/archaeological_warehouse/models.py
@@ -836,17 +836,18 @@ class Container(
]
IMAGE_PREFIX = "containers/"
BASE_SEARCH_VECTORS = [
- SearchVectorConfig("reference"),
+ SearchVectorConfig("reference", "raw"),
SearchVectorConfig("container_type__label"),
SearchVectorConfig("cached_location"),
- SearchVectorConfig("old_reference"),
+ SearchVectorConfig("old_reference", "raw"),
SearchVectorConfig("comment", "local"),
]
M2M_SEARCH_VECTORS = [
- SearchVectorConfig("division__reference"),
- SearchVectorConfig("division__division__division__label"),
+ SearchVectorConfig("division__reference", "raw"),
+ SearchVectorConfig("division__division__division__label", "raw"),
]
PARENT_SEARCH_VECTORS = ["parent"]
+ PARENT_ONLY_SEARCH_VECTORS = ["finds", "finds_ref"]
STATISTIC_MODALITIES_OPTIONS = OrderedDict(
[
diff --git a/ishtar_common/management/commands/ishtar_maintenance.py b/ishtar_common/management/commands/ishtar_maintenance.py
index c4ad52ab6..4f050088e 100644
--- a/ishtar_common/management/commands/ishtar_maintenance.py
+++ b/ishtar_common/management/commands/ishtar_maintenance.py
@@ -131,7 +131,7 @@ def _end_task(changed_nb, msg, quiet, store_results, log, log_name, csv_cols):
writer.writerow(csv_cols)
writer.writerows(store_results)
if not quiet:
- sys.stdout.write(f"log: {path} written.")
+ sys.stdout.write(f"log: {path} written.\n")
def task_main_image(quiet=False, log=False):
diff --git a/ishtar_common/models.py b/ishtar_common/models.py
index 442e0c687..fc2487d7b 100644
--- a/ishtar_common/models.py
+++ b/ishtar_common/models.py
@@ -3780,9 +3780,9 @@ class Document(
BASE_SEARCH_VECTORS = [
SearchVectorConfig("title"),
SearchVectorConfig("source_type__label"),
- SearchVectorConfig("external_id"),
- SearchVectorConfig("reference"),
- SearchVectorConfig("internal_reference"),
+ SearchVectorConfig("external_id", "raw"),
+ SearchVectorConfig("reference", "raw"),
+ SearchVectorConfig("internal_reference", "raw"),
SearchVectorConfig("description", "local"),
SearchVectorConfig("comment", "local"),
SearchVectorConfig("additional_information", "local"),
diff --git a/ishtar_common/models_common.py b/ishtar_common/models_common.py
index a6f9ac0e8..03fe26f0c 100644
--- a/ishtar_common/models_common.py
+++ b/ishtar_common/models_common.py
@@ -884,8 +884,36 @@ class FullSearch(models.Model):
deactivate()
return query_parameters
+ @classmethod
+ def _update_raw_search_field(cls, value):
+ result = []
+ if not value:
+ value = ""
+ for val in value.split("'"):
+ result.append(f"'{val.lower()}':1")
+ SEPS = [" ", "-", "/"]
+ values = []
+ # split ID terms
+ for idx, sep in enumerate(SEPS):
+ if not idx:
+ values = value.split(sep)
+ continue
+ new_values = []
+ for val in values:
+ new_values += val.split(sep)
+ values = new_values
+ for val in values:
+ if len(val) < 2:
+ continue
+ val = val.replace("'", "").lower()
+ result.append(f"'{val}':1")
+ return result
+
def _update_search_field(self, search_vector_conf, search_vectors, data):
for value in search_vector_conf.format(data):
+ if search_vector_conf.language == "raw":
+ search_vectors += self._update_raw_search_field(value)
+ continue
with connection.cursor() as cursor:
cursor.execute(
"SELECT to_tsvector(%s, %s)", [search_vector_conf.language, value]
@@ -921,7 +949,7 @@ class FullSearch(models.Model):
logger.warning("No search_vectors defined for {}".format(self.__class__))
return
if getattr(self, "_search_updated", None):
- return
+ return self.search_vector
JsonDataField = apps.get_model("ishtar_common", "JsonDataField")
self._search_updated = True
@@ -938,6 +966,12 @@ class FullSearch(models.Model):
for item in rel_key.values("pk").all():
query_dct = {key + "__pk": item["pk"]}
q = copy.copy(base_q).filter(**query_dct)
+ if m2m_search_vector.language == "raw":
+ q = q.values_list(m2m_search_vector.key, flat=True)
+ search_vectors += self._update_raw_search_field(q.all()[0])
+ continue
+ query_dct = {key + "__pk": item["pk"]}
+ q = copy.copy(base_q).filter(**query_dct)
q = q.annotate(
search=SearchVector(
m2m_search_vector.key, config=m2m_search_vector.language
@@ -961,17 +995,17 @@ class FullSearch(models.Model):
elif parent:
search_vectors.append(parent.search_vector)
- for PARENT_ONLY_SEARCH_VECTOR in self.PARENT_ONLY_SEARCH_VECTORS:
- parent = getattr(self, PARENT_ONLY_SEARCH_VECTOR)
- if hasattr(parent, "all"): # m2m
- for p in parent.all():
+ for PARENT_ONLY_SEARCH_VECTOR in self.PARENT_ONLY_SEARCH_VECTORS:
+ parent = getattr(self, PARENT_ONLY_SEARCH_VECTOR)
+ if hasattr(parent, "all"): # m2m
+ for p in parent.all():
+ search_vectors.append(
+ p.update_search_vector(save=False, exclude_parent=True)
+ )
+ elif parent:
search_vectors.append(
- p.update_search_vector(save=False, exclude_parent=True)
+ parent.update_search_vector(save=False, exclude_parent=True)
)
- elif parent:
- search_vectors.append(
- parent.update_search_vector(save=False, exclude_parent=True)
- )
if self.BASE_SEARCH_VECTORS:
# query "simple" fields
@@ -4110,7 +4144,10 @@ class SearchVectorConfig:
value = ""
if not self.func:
return [value]
- return self.func(value)
+ value = self.func(value)
+ if not isinstance(value, list):
+ return [value]
+ return value
class ShortMenuItem:
diff --git a/ishtar_common/utils.py b/ishtar_common/utils.py
index 340fb9ee0..1219dd454 100644
--- a/ishtar_common/utils.py
+++ b/ishtar_common/utils.py
@@ -934,6 +934,9 @@ def num2col(n):
return string
+RE_TSVECTOR = re.compile(r"('[^']+':\d+(?:,\d+)*)")
+
+
def merge_tsvectors(vectors):
"""
Parse tsvector to merge them in one string
@@ -952,16 +955,20 @@ def merge_tsvectors(vectors):
if max_position > current_position:
current_position = max_position
- for dct_member in vector.split(" "):
+ for dct_member in RE_TSVECTOR.findall(vector):
splitted = dct_member.split(":")
key = ":".join(splitted[:-1])
- positions = splitted[-1]
key = key[1:-1] # remove quotes
+ result_dict[key] = [1]
+ """
+ # position is not used today - simplify
+ positions = splitted[-1]
positions = [int(pos) + current_position for pos in positions.split(",")]
if key in result_dict:
result_dict[key] += positions
else:
result_dict[key] = positions
+ """
# {'lamelie': [1, 42, 5]} => {'lamelie': "1,42,5"}
result_dict = {
diff --git a/ishtar_common/views_item.py b/ishtar_common/views_item.py
index 821a6635f..f905d677c 100644
--- a/ishtar_common/views_item.py
+++ b/ishtar_common/views_item.py
@@ -835,9 +835,11 @@ def _search_manage_search_vector(
{
"where": [
f"{model._meta.db_table}.search_vector @@ (to_tsquery(%s, %s)) = true OR " +
- f"{model._meta.db_table}.search_vector @@ (to_tsquery('simple', %s)) = true"
+ f"{model._meta.db_table}.search_vector @@ (to_tsquery('simple', %s)) = true OR "
+ f"{model._meta.db_table}.search_vector::tsvector @@ %s::tsquery = true"
],
- "params": [settings.ISHTAR_SEARCH_LANGUAGE, search_query, search_query],
+ "params": [settings.ISHTAR_SEARCH_LANGUAGE, search_query, search_query,
+ search_query],
}
)
return dct, exc_dct, distinct_queries