diff options
| author | Étienne Loks <etienne.loks@iggdrasil.net> | 2018-07-02 19:58:25 +0200 | 
|---|---|---|
| committer | Étienne Loks <etienne.loks@iggdrasil.net> | 2018-08-13 18:26:03 +0200 | 
| commit | 6029e4f0e58451848e2c4812d107aae190aa10c7 (patch) | |
| tree | 38143096a878386d25f9d950a8c4248a30187344 /ishtar_common/views_item.py | |
| parent | 9de2c94a7a528e1ae24bc2a0a9bb9354329d0a93 (diff) | |
| download | Ishtar-6029e4f0e58451848e2c4812d107aae190aa10c7.tar.bz2 Ishtar-6029e4f0e58451848e2c4812d107aae190aa10c7.zip | |
Full text search: manage facet search (simple, hierarchic, OR) (refs #4180)
Diffstat (limited to 'ishtar_common/views_item.py')
| -rw-r--r-- | ishtar_common/views_item.py | 153 | 
1 files changed, 128 insertions, 25 deletions
| diff --git a/ishtar_common/views_item.py b/ishtar_common/views_item.py index f5e47a832..a2cc0762c 100644 --- a/ishtar_common/views_item.py +++ b/ishtar_common/views_item.py @@ -312,8 +312,16 @@ FORBIDDEN_CHAR = [u":"]  RESERVED_CHAR = [u"|", u"&"] -def _parse_query_string(string): +def _parse_query_string(string, request_keys, current_dct):      string = string.strip().lower() + +    if u"=" in string: +        splited = string.split(u"=") +        if len(splited) == 2 and splited[0] in request_keys: +            term, query = splited +            term = request_keys[term] +            current_dct[term] = query +            return ""      for reserved_char in FORBIDDEN_CHAR:          string = string.replace(reserved_char, u"")      if len(string) != 1: @@ -326,22 +334,53 @@ def _parse_query_string(string):      return string -def _parse_parentheses_groups(groups): +def _parse_parentheses_groups(groups, request_keys, current_dct=None):      """      Transform parentheses groups to query + +    :param groups: groups to transform (list) +    :param request_keys: request keys for facet search +    :param current_dct: +    :return: query string, query dict      """ +    if not current_dct: +        current_dct = {}      if type(groups) is not list:          string = groups.strip()          # split into many groups if spaces -        if ' ' not in string: -            return _parse_query_string(groups) -        return _parse_parentheses_groups(string.split(u" ")) + +        # do not split inside quotes +        current_index = 0 +        found = string.find('"', current_index) +        SEP = u"?ç;?"  # replace spaces inside quote with this characters +        previous_quote = None +        while found != -1: +            if previous_quote: +                string = string[0:previous_quote] + \ +                         string[previous_quote:found].replace(u' ', SEP) + \ +                         string[found:] +                previous_quote = None +                # SEP is larger than a space +                found = string.find('"', current_index) +            else: +                previous_quote = found +            current_index = found + 1 +            found = string.find('"', current_index) + +        string_groups = [gp.replace(SEP, u" ") for gp in string.split(u" ")] +        if len(string_groups) == 1: +            return _parse_query_string(string_groups[0], request_keys, +                                       current_dct), current_dct +        return _parse_parentheses_groups(string_groups, +                                         request_keys, current_dct)      if not groups:  # empty list -        return "" +        return "", current_dct      query = u"("      previous_sep, has_item = None, False      for item in groups: -        q = _parse_parentheses_groups(item).strip() +        q, current_dct = _parse_parentheses_groups(item, request_keys, +                                                   current_dct) +        q = q.strip()          if not q:              continue          if q in (u"|", u"&"): @@ -358,17 +397,24 @@ def _parse_parentheses_groups(groups):          has_item = True          previous_sep = None      query += u")" -    return unidecode(query) +    if query == u"()": +        query = u"" +    return unidecode(query), current_dct -def _search_manage_search_vector(dct): -    if 'search_vector' in dct: -        parentheses_groups = _parse_parentheses(dct['search_vector'].strip()) -        query = _parse_parentheses_groups(parentheses_groups) +def _search_manage_search_vector(dct, request_keys): +    if 'search_vector' not in dct: +        return dct + +    parentheses_groups = _parse_parentheses(dct['search_vector'].strip()) +    search_query, extra_dct = _parse_parentheses_groups(parentheses_groups, +                                                        request_keys) +    dct.update(extra_dct) +    if search_query:          dct['extras'].append(              {'where': ["search_vector @@ (to_tsquery(%s, %s)) = true"],               'params': [settings.ISHTAR_SEARCH_LANGUAGE, -                        query]} +                        search_query]}          )      return dct @@ -478,6 +524,8 @@ def get_item(model, func_name, default_name, extra_request_keys=[],          else:              my_relation_types_prefix = copy(relation_types_prefix) +        general_types = model.general_types() +          fields = [model._meta.get_field(k)                    for k in get_all_field_names(model)] @@ -599,6 +647,13 @@ def get_item(model, func_name, default_name, extra_request_keys=[],                  dct = request.session[func_name]          else:              request.session[func_name] = dct + +        dct['extras'] = [] +        dct = _search_manage_search_vector(dct, request_keys) +        search_vector = "" +        if 'search_vector' in dct: +            search_vector = dct.pop('search_vector') +          for k in (list(my_bool_fields) + list(my_reversed_bool_fields)):              if k in dct:                  if dct[k] == u"1": @@ -682,19 +737,67 @@ def get_item(model, func_name, default_name, extra_request_keys=[],                      break                  elif req.endswith(k_hr + '__pk'):                      val = dct.pop(req) -                    reqs = Q(**{req: val}) -                    req = req[:-2] + '__' -                    for idx in range(HIERARCHIC_LEVELS): -                        req = req[:-2] + 'parent__pk' -                        q = Q(**{req: val}) -                        reqs |= q -                    and_reqs.append(reqs) + +                    if u";" in val: +                        # OR request +                        values = val.split(u";") +                    else: +                        values = [val] +                    base_req = req[:] +                    reqs = None +                    for val in values: +                        suffix = "pk" +                        req = base_req[:] + +                        if val.startswith(u'"') and val.startswith(u'"'): +                            # manage search text by label +                            if u"%" in val: +                                suffix = "label__icontains" +                            else: +                                suffix = "label__iexact" +                            val = val[1:-1] +                            req = req[:-2] + suffix + +                        if not reqs: +                            reqs = Q(**{req: val}) +                        else: +                            reqs |= Q(**{req: val}) +                        for idx in range(HIERARCHIC_LEVELS): +                            req = req[:-(len(suffix))] + 'parent__' + suffix +                            q = Q(**{req: val}) +                            reqs |= q +                    if reqs: +                        and_reqs.append(reqs)                      break -        dct['extras'] = [] -        dct = _search_manage_search_vector(dct) -        search_vector = "" -        if 'search_vector' in dct: -            search_vector = dct.pop('search_vector') + +        # manage search text by label +        for base_k in general_types: +            if base_k in HIERARCHIC_FIELDS: +                continue +            k = base_k + "__pk" +            if k not in dct or not dct[k].startswith(u'"') \ +                    or not dct[k].startswith(u'"'): +                continue +            val = dct.pop(k) +            if u";" in val: +                # OR request +                values = val.split(u";") +            else: +                values = [val] +            reqs = None +            for val in values: +                if not val.endswith(u'"') or not val.startswith(u""): +                    continue +                query = val[1:-1] +                suffix = "__label__icontains" if u"%" in val else \ +                    "__label__iexact" +                if not reqs: +                    reqs = Q(**{base_k + suffix: query}) +                else: +                    reqs |= Q(**{base_k + suffix: query}) +            if reqs: +                and_reqs.append(reqs) +          extras = dct.pop('extras')          query = Q(**dct) | 
