diff options
Diffstat (limited to 'archaeological_operations')
| -rw-r--r-- | archaeological_operations/import_from_csv.py | 41 | ||||
| -rw-r--r-- | archaeological_operations/utils.py | 68 | 
2 files changed, 70 insertions, 39 deletions
| diff --git a/archaeological_operations/import_from_csv.py b/archaeological_operations/import_from_csv.py index 1176aec4c..bd4c1c841 100644 --- a/archaeological_operations/import_from_csv.py +++ b/archaeological_operations/import_from_csv.py @@ -33,6 +33,8 @@ from django.contrib.auth.models import User  from django.db import transaction  from django.template.defaultfilters import slugify +from archaeological_operations.utils import parse_parcels +  from ishtar_common.models import Town, Person, PersonType, OrganizationType, \      Organization, SourceType  from archaeological_files.models import PermitType, File, FileType @@ -426,45 +428,6 @@ def parse_rapp_index(value):      if items:          return int(items[-1]) -PARCEL_YEAR_REGEXP = re.compile(r"^([0-9]{4})[ :]+") -PARCEL_SECTION_REGEXP = re.compile(ur"(?: )*(?:[Ss]ection(?:s)?)?(?: )*([A-Z][A-Z0-9]{0,3})[ :]*((?:(?: |;|,|[Pp]arcelle(?:s)?|n°|et|à)*[0-9]+[p]?)+)") -PARCEL_NB_RANGE_REGEXP = re.compile(ur'([0-9]+[p]?) à ([0-9]+[p]?)') -PARCEL_NB_REGEXP = re.compile(ur'(?: |;|,|[Pp]arcelle(?:s)?|n°|et|à)*([0-9]+[p]?)') - -def parse_parcels(parcel_str, insee_code, owner): -    parcels = [] -    town = parse_insee(insee_code) -    # manage only one town at a time -    assert len(town) < 2 -    if not town: -        return parcels -    town = town[0] -    m = PARCEL_YEAR_REGEXP.match(parcel_str) -    year = None -    if m: -        year = m.groups()[0] -        parcel_str = parcel_str[m.span()[1]:] -    for parcel in PARCEL_SECTION_REGEXP.findall(parcel_str): -        sector, nums = parcel[0], parcel[1] -        for num in PARCEL_NB_REGEXP.findall(nums): -            if len(unicode(num)) > 6: -                continue -            parcels.append({'year':year, 'town':town, 'section':sector, -                            'parcel_number':num, 'history_modifier':owner}) -        for parcel_ranges in PARCEL_NB_RANGE_REGEXP.findall(nums): -            lower_range, higher_range = parcel_ranges -            try: -                # the lower range itself has been already kept -                lower_range = int(lower_range) + 1 -                higher_range = int(higher_range) -            except ValueError: -                continue -            for num in xrange(lower_range, higher_range): -                parcels.append({'year':year, 'town':town, -                        'section':sector, 'parcel_number':unicode(num), -                        'history_modifier':owner}) -    return parcels -  _CACHED_DOC_TYPES = {}  def parse_doc_types(value): diff --git a/archaeological_operations/utils.py b/archaeological_operations/utils.py new file mode 100644 index 000000000..c48ec93e8 --- /dev/null +++ b/archaeological_operations/utils.py @@ -0,0 +1,68 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# Copyright (C) 2013  Étienne Loks  <etienne.loks_AT_peacefrogsDOTnet> + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the +# GNU Affero General Public License for more details. + +# You should have received a copy of the GNU Affero General Public License +# along with this program.  If not, see <http://www.gnu.org/licenses/>. + +# See the file COPYING for details. + +import re + +PARCEL_YEAR_REGEXP = re.compile(r"^([0-9]{4})[ :]+") +PARCEL_SECTION_REGEXP = re.compile(ur"(?: )*(?:[Ss]ection(?:s)?)?(?: )*([A-Z][A-Z0-9]{0,3})[ :]*((?:(?: |;|,|[Pp]arcelle(?:s)?|n°|et|à|to)*[0-9]+[p]?)+)") +PARCEL_NB_RANGE_REGEXP = re.compile(ur'([0-9]+[p]?) (?:à|to) ([0-9]+[p]?)') +PARCEL_NB_REGEXP = re.compile(ur'(?: |;|,|[Pp]arcelle(?:s)?|n°|et|à|to)*([0-9]+[p]?)') + +def parse_parcels(parcel_str, insee_code=None, owner=None): +    parcels, town = [], None +    if insee_code: +        town = parse_insee(insee_code) +        # manage only one town at a time +        assert len(town) < 2 +        if not town: +            return parcels +        town = town[0] +    m = PARCEL_YEAR_REGEXP.match(parcel_str) +    year = None +    if m: +        year = m.groups()[0] +        parcel_str = parcel_str[m.span()[1]:] +    for parcel in PARCEL_SECTION_REGEXP.findall(parcel_str): +        sector, nums = parcel[0], parcel[1] +        for num in PARCEL_NB_REGEXP.findall(nums): +            if len(unicode(num)) > 6: +                continue +            dct = {'year':year, 'section':sector, 'parcel_number':num} +            if town: +                dct['town'] = town +            if owner: +                dct['history_modifier'] = owner +            parcels.append(dct) +        for parcel_ranges in PARCEL_NB_RANGE_REGEXP.findall(nums): +            lower_range, higher_range = parcel_ranges +            try: +                # the lower range itself has been already kept +                lower_range = int(lower_range) + 1 +                higher_range = int(higher_range) +            except ValueError: +                continue +            for num in xrange(lower_range, higher_range): +                dct = {'year':year, 'section':sector, +                       'parcel_number':unicode(num)} +                if town: +                    dct['town'] = town +                if owner: +                    dct['history_modifier'] = owner +                parcels.append(dct) +    return parcels | 
