diff options
author | Étienne Loks <etienne.loks@iggdrasil.net> | 2023-01-18 23:33:56 +0100 |
---|---|---|
committer | Étienne Loks <etienne.loks@iggdrasil.net> | 2023-01-19 18:06:55 +0100 |
commit | 34e5e81e7afe240b05f2df834d21e08dbb4bcf79 (patch) | |
tree | d27d1e598f6e665f0bd624cc1523440008ba7697 /ishtar_common/utils.py | |
parent | 803ce58a52cf818e874954cbf89237ba819027bc (diff) | |
download | Ishtar-34e5e81e7afe240b05f2df834d21e08dbb4bcf79.tar.bz2 Ishtar-34e5e81e7afe240b05f2df834d21e08dbb4bcf79.zip |
Free search: "raw" index for reference - improve parent only search
Diffstat (limited to 'ishtar_common/utils.py')
-rw-r--r-- | ishtar_common/utils.py | 11 |
1 files changed, 9 insertions, 2 deletions
diff --git a/ishtar_common/utils.py b/ishtar_common/utils.py index 340fb9ee0..1219dd454 100644 --- a/ishtar_common/utils.py +++ b/ishtar_common/utils.py @@ -934,6 +934,9 @@ def num2col(n): return string +RE_TSVECTOR = re.compile(r"('[^']+':\d+(?:,\d+)*)") + + def merge_tsvectors(vectors): """ Parse tsvector to merge them in one string @@ -952,16 +955,20 @@ def merge_tsvectors(vectors): if max_position > current_position: current_position = max_position - for dct_member in vector.split(" "): + for dct_member in RE_TSVECTOR.findall(vector): splitted = dct_member.split(":") key = ":".join(splitted[:-1]) - positions = splitted[-1] key = key[1:-1] # remove quotes + result_dict[key] = [1] + """ + # position is not used today - simplify + positions = splitted[-1] positions = [int(pos) + current_position for pos in positions.split(",")] if key in result_dict: result_dict[key] += positions else: result_dict[key] = positions + """ # {'lamelie': [1, 42, 5]} => {'lamelie': "1,42,5"} result_dict = { |