#!/usr/bin/env python # -*- coding: utf-8 -*- # Copyright (C) 2013 Étienne Loks # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU Affero General Public License as # published by the Free Software Foundation, either version 3 of the # License, or (at your option) any later version. # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU Affero General Public License for more details. # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see . # See the file COPYING for details. import re PARCEL_YEAR_REGEXP = re.compile(r"^([0-9]{4})[ :]+") PARCEL_SECTION_REGEXP = re.compile(ur"(?: )*(?:[Ss]ection(?:s)?)?(?: )*([A-Z][A-Z0-9]{0,3})[ :]*((?:(?: |;|,|[Pp]arcelle(?:s)?|n°|et|à|to)*[0-9]+[p]?)+)") PARCEL_NB_RANGE_REGEXP = re.compile(ur'([0-9]+[p]?) (?:à|to) ([0-9]+[p]?)') PARCEL_NB_REGEXP = re.compile(ur'(?: |;|,|[Pp]arcelle(?:s)?|n°|et|à|to)*([0-9]+[p]?)') def parse_parcels(parcel_str, insee_code=None, owner=None): parcels, town = [], None if insee_code: town = parse_insee(insee_code) # manage only one town at a time assert len(town) < 2 if not town: return parcels town = town[0] m = PARCEL_YEAR_REGEXP.match(parcel_str) year = None if m: year = m.groups()[0] parcel_str = parcel_str[m.span()[1]:] for parcel in PARCEL_SECTION_REGEXP.findall(parcel_str): sector, nums = parcel[0], parcel[1] for num in PARCEL_NB_REGEXP.findall(nums): if len(unicode(num)) > 6: continue dct = {'year':year, 'section':sector, 'parcel_number':num} if town: dct['town'] = town if owner: dct['history_modifier'] = owner parcels.append(dct) for parcel_ranges in PARCEL_NB_RANGE_REGEXP.findall(nums): lower_range, higher_range = parcel_ranges try: # the lower range itself has been already kept lower_range = int(lower_range) + 1 higher_range = int(higher_range) except ValueError: continue for num in xrange(lower_range, higher_range): dct = {'year':year, 'section':sector, 'parcel_number':unicode(num)} if town: dct['town'] = town if owner: dct['history_modifier'] = owner parcels.append(dct) return parcels