diff options
Diffstat (limited to 'ishtar_common/utils.py')
-rw-r--r-- | ishtar_common/utils.py | 92 |
1 files changed, 92 insertions, 0 deletions
diff --git a/ishtar_common/utils.py b/ishtar_common/utils.py index 09c470823..709f020a4 100644 --- a/ishtar_common/utils.py +++ b/ishtar_common/utils.py @@ -1362,6 +1362,98 @@ def create_slug(model, name, slug_attr="slug", max_length=100, pk=None): return final_slug +def _get_parse_string(trunc_number=None): + def parse_string(value): + value = value.strip() + if value == "#EMPTY": + value = "" + value = value.replace(" ", " ") + if trunc_number: + value = value[:trunc_number] + return value + + return parse_string + + +parse_string = _get_parse_string() + + +def parse_insee(value): + value = parse_string(value) + values = [] + while len(value) > 4: + values.append(value[:5]) + value = value[5:] + towns = [] + Town = apps.get_model("ishtar_common", "Town") + for value in values: + try: + town = Town.objects.get(numero_insee=value) + except Town.DoesNotExist: + # sys.stderr.write('Numero INSEE : %s non existant en base' + # % value) + continue + towns.append(town) + return towns + + +PARCEL_YEAR_REGEXP = re.compile(r"^([0-9]{4})[ :]+") +PARCEL_SECTION_REGEXP = re.compile( + r"(?: )*(?:[Ss]ection(?:s)?)?(?: )*([A-Z][A-Z0-9]{0,3})[ :]*" + r"((?:(?: |;|,|[Pp]arcelle(?:s)?|n°|et|à|to)*[0-9]+[p]?)+)" +) +PARCEL_NB_RANGE_REGEXP = re.compile(r"([0-9]+[p]?) (?:à|to) ([0-9]+[p]?)") +PARCEL_NB_REGEXP = re.compile(r"(?: |;|,|[Pp]arcelle(?:s)?|n°|et|à|to)*([0-9]+[p]?)") + + +def parse_parcels(parcel_str, insee_code=None, owner=None): + parcels, town = [], None + if insee_code: + town = parse_insee(insee_code) + # manage only one town at a time + if len(town) >= 2 or not town: + return parcels + town = town[0] + parcel_str = parcel_str.strip().replace( + "\ufe50", ",").replace("\uff0c", ",").replace("\n", " ") + parcel_str = re.sub(r'\s+', ' ', parcel_str) + parcel_str = parcel_str.replace("à", "_aaaa_").replace("n°", "_nnnn_") + parcel_str = parcel_str.encode("ascii", "ignore").decode("utf-8") + parcel_str = parcel_str.replace("_aaaa_", "à").replace("_nnnn_", "n°") + m = PARCEL_YEAR_REGEXP.match(parcel_str) + year = None + if m: + year = m.groups()[0] + parcel_str = parcel_str[m.span()[1]:] + for parcel in PARCEL_SECTION_REGEXP.findall(parcel_str): + sector, nums = parcel[0], parcel[1] + for num in PARCEL_NB_REGEXP.findall(nums): + if len(str(num)) > 6: + continue + dct = {"year": year, "section": sector, "parcel_number": num} + if town: + dct["town"] = town + if owner: + dct["history_modifier"] = owner + parcels.append(dct) + for parcel_ranges in PARCEL_NB_RANGE_REGEXP.findall(nums): + lower_range, higher_range = parcel_ranges + try: + # the lower range itself has been already kept + lower_range = int(lower_range) + 1 + higher_range = int(higher_range) + except ValueError: + continue + for num in range(lower_range, higher_range): + dct = {"year": year, "section": sector, "parcel_number": str(num)} + if town: + dct["town"] = town + if owner: + dct["history_modifier"] = owner + parcels.append(dct) + return parcels + + def get_all_field_names(model): return list( set( |