diff options
author | Étienne Loks <etienne.loks@peacefrogs.net> | 2013-09-03 01:21:25 +0200 |
---|---|---|
committer | Étienne Loks <etienne.loks@peacefrogs.net> | 2013-09-03 01:24:21 +0200 |
commit | 6e085204448177f638b2c7abf249afdc1b8f6dae (patch) | |
tree | 52ab711d9532c91cff5715c0d565755f4c6a488f /archaeological_operations | |
parent | 795b399c1e235bb071c58e87d75923f4b7e3d98e (diff) | |
download | Ishtar-6e085204448177f638b2c7abf249afdc1b8f6dae.tar.bz2 Ishtar-6e085204448177f638b2c7abf249afdc1b8f6dae.zip |
Refactoring: move parse_parcels to "utils.py" (refs #809)
Diffstat (limited to 'archaeological_operations')
-rw-r--r-- | archaeological_operations/import_from_csv.py | 41 | ||||
-rw-r--r-- | archaeological_operations/utils.py | 68 |
2 files changed, 70 insertions, 39 deletions
diff --git a/archaeological_operations/import_from_csv.py b/archaeological_operations/import_from_csv.py index 1176aec4c..bd4c1c841 100644 --- a/archaeological_operations/import_from_csv.py +++ b/archaeological_operations/import_from_csv.py @@ -33,6 +33,8 @@ from django.contrib.auth.models import User from django.db import transaction from django.template.defaultfilters import slugify +from archaeological_operations.utils import parse_parcels + from ishtar_common.models import Town, Person, PersonType, OrganizationType, \ Organization, SourceType from archaeological_files.models import PermitType, File, FileType @@ -426,45 +428,6 @@ def parse_rapp_index(value): if items: return int(items[-1]) -PARCEL_YEAR_REGEXP = re.compile(r"^([0-9]{4})[ :]+") -PARCEL_SECTION_REGEXP = re.compile(ur"(?: )*(?:[Ss]ection(?:s)?)?(?: )*([A-Z][A-Z0-9]{0,3})[ :]*((?:(?: |;|,|[Pp]arcelle(?:s)?|n°|et|à)*[0-9]+[p]?)+)") -PARCEL_NB_RANGE_REGEXP = re.compile(ur'([0-9]+[p]?) à ([0-9]+[p]?)') -PARCEL_NB_REGEXP = re.compile(ur'(?: |;|,|[Pp]arcelle(?:s)?|n°|et|à)*([0-9]+[p]?)') - -def parse_parcels(parcel_str, insee_code, owner): - parcels = [] - town = parse_insee(insee_code) - # manage only one town at a time - assert len(town) < 2 - if not town: - return parcels - town = town[0] - m = PARCEL_YEAR_REGEXP.match(parcel_str) - year = None - if m: - year = m.groups()[0] - parcel_str = parcel_str[m.span()[1]:] - for parcel in PARCEL_SECTION_REGEXP.findall(parcel_str): - sector, nums = parcel[0], parcel[1] - for num in PARCEL_NB_REGEXP.findall(nums): - if len(unicode(num)) > 6: - continue - parcels.append({'year':year, 'town':town, 'section':sector, - 'parcel_number':num, 'history_modifier':owner}) - for parcel_ranges in PARCEL_NB_RANGE_REGEXP.findall(nums): - lower_range, higher_range = parcel_ranges - try: - # the lower range itself has been already kept - lower_range = int(lower_range) + 1 - higher_range = int(higher_range) - except ValueError: - continue - for num in xrange(lower_range, higher_range): - parcels.append({'year':year, 'town':town, - 'section':sector, 'parcel_number':unicode(num), - 'history_modifier':owner}) - return parcels - _CACHED_DOC_TYPES = {} def parse_doc_types(value): diff --git a/archaeological_operations/utils.py b/archaeological_operations/utils.py new file mode 100644 index 000000000..c48ec93e8 --- /dev/null +++ b/archaeological_operations/utils.py @@ -0,0 +1,68 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# Copyright (C) 2013 Étienne Loks <etienne.loks_AT_peacefrogsDOTnet> + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. + +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. + +# See the file COPYING for details. + +import re + +PARCEL_YEAR_REGEXP = re.compile(r"^([0-9]{4})[ :]+") +PARCEL_SECTION_REGEXP = re.compile(ur"(?: )*(?:[Ss]ection(?:s)?)?(?: )*([A-Z][A-Z0-9]{0,3})[ :]*((?:(?: |;|,|[Pp]arcelle(?:s)?|n°|et|à|to)*[0-9]+[p]?)+)") +PARCEL_NB_RANGE_REGEXP = re.compile(ur'([0-9]+[p]?) (?:à|to) ([0-9]+[p]?)') +PARCEL_NB_REGEXP = re.compile(ur'(?: |;|,|[Pp]arcelle(?:s)?|n°|et|à|to)*([0-9]+[p]?)') + +def parse_parcels(parcel_str, insee_code=None, owner=None): + parcels, town = [], None + if insee_code: + town = parse_insee(insee_code) + # manage only one town at a time + assert len(town) < 2 + if not town: + return parcels + town = town[0] + m = PARCEL_YEAR_REGEXP.match(parcel_str) + year = None + if m: + year = m.groups()[0] + parcel_str = parcel_str[m.span()[1]:] + for parcel in PARCEL_SECTION_REGEXP.findall(parcel_str): + sector, nums = parcel[0], parcel[1] + for num in PARCEL_NB_REGEXP.findall(nums): + if len(unicode(num)) > 6: + continue + dct = {'year':year, 'section':sector, 'parcel_number':num} + if town: + dct['town'] = town + if owner: + dct['history_modifier'] = owner + parcels.append(dct) + for parcel_ranges in PARCEL_NB_RANGE_REGEXP.findall(nums): + lower_range, higher_range = parcel_ranges + try: + # the lower range itself has been already kept + lower_range = int(lower_range) + 1 + higher_range = int(higher_range) + except ValueError: + continue + for num in xrange(lower_range, higher_range): + dct = {'year':year, 'section':sector, + 'parcel_number':unicode(num)} + if town: + dct['town'] = town + if owner: + dct['history_modifier'] = owner + parcels.append(dct) + return parcels |