diff options
Diffstat (limited to 'archaeological_operations/data_importer.py')
-rw-r--r-- | archaeological_operations/data_importer.py | 280 |
1 files changed, 0 insertions, 280 deletions
diff --git a/archaeological_operations/data_importer.py b/archaeological_operations/data_importer.py deleted file mode 100644 index b4cd2f0d0..000000000 --- a/archaeological_operations/data_importer.py +++ /dev/null @@ -1,280 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- -# Copyright (C) 2015 Étienne Loks <etienne.loks_AT_peacefrogsDOTnet> - -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU Affero General Public License as -# published by the Free Software Foundation, either version 3 of the -# License, or (at your option) any later version. - -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Affero General Public License for more details. - -# You should have received a copy of the GNU Affero General Public License -# along with this program. If not, see <http://www.gnu.org/licenses/>. - -# See the file COPYING for details. - -import re - -from django.db import IntegrityError -from django.template.defaultfilters import slugify - -from ishtar_common.data_importer import * -from ishtar_common.models import Town, OrganizationType, SourceType, \ - SupportType, Format, AuthorType - -from archaeological_operations import models -from archaeological_operations.forms import OPERATOR -from archaeological_operations.utils import parse_parcels - -RE_PERMIT_REFERENCE = re.compile('[A-Za-z]*(.*)') - - -class ImportParcelFormater(ImportFormater): - NEED = ['town', ] - PARCEL_OWNER_KEY = 'associated_file' - - def post_process(self, obj, context, value, owner=None): - value = value.strip() - base_dct = {self.PARCEL_OWNER_KEY: obj, 'history_modifier': owner} - if 'parcels' in context: - for key in context['parcels']: - if context['parcels'][key]: - base_dct[key] = context['parcels'][key] - for parcel_dct in parse_parcels(value, owner=owner): - parcel_dct.update(base_dct) - try: - models.Parcel.objects.get_or_create(**parcel_dct) - except IntegrityError: - try: - p = unicode(parcel_dct) - except UnicodeDecodeError: - try: - p = str(parcel_dct).decode('utf-8') - except UnicodeDecodeError: - p = u"" - raise ImporterError(u"Erreur d'import parcelle, contexte : %s" - % p) - - -class ImportYearFormater(ImportFormater): - def post_process(self, obj, context, value, owner=None): - value = self.formater.format(value) - if not value: - return - obj.year = value.year - obj.save() - - -class TownFormater(Formater): - def __init__(self, town_full_dct={}, town_dct={}): - self._town_full_dct = town_full_dct - self._town_dct = town_dct - self._initialized = False if not self._town_full_dct else True - - def town_dct_init(self): - for town in Town.objects.all(): - key = (slugify(town.name.strip()), town.numero_insee[:2]) - if key in self._town_full_dct: - # print("Danger! %s is ambiguous with another town on the same" - # " department." % town.name) - continue - self._town_full_dct[key] = town - key = slugify(town.name.strip()) - if key in self._town_dct: - # print("Warning %s is ambiguous with no department provided" % - # town.name) - continue - self._town_dct[key] = town - self._initialized = True - - def format(self, value, extra=None): - if not self._initialized: - self.town_dct_init() - m = RE_FILTER_CEDEX.match(value) - if m: - value = m.groups()[0] - if not value: - return None - if extra: - key = (slugify(value), extra) - if key in self._town_full_dct: - return self._town_full_dct[key] - key = slugify(value) - if key in self._town_dct: - return self._town_dct[key] - - -class TownINSEEFormater(Formater): - def __init__(self): - self._town_dct = {} - - def format(self, value, extra=None): - value = value.strip() - if not value: - return None - if value in self._town_dct: - return self._town_dct[value] - q = Town.objects.filter(numero_insee=value) - if not q.count(): - return - self._town_dct[value] = q.all()[0] - return self._town_dct[value] - - -class SurfaceFormater(Formater): - def test(self): - assert self.format(u"352 123") == 352123 - assert self.format(u"456 789 m²") == 456789 - assert self.format(u"78ha") == 780000 - - def format(self, value, extra=None): - value = value.strip() - if not value: - return None - factor = 1 - if value.endswith(u"m2") or value.endswith(u"m²"): - value = value[:-2] - if value.endswith(u"ha"): - value = value[:-2] - factor = 10000 - try: - return int(value.replace(' ', '')) * factor - except ValueError: - raise ImporterError("Erreur import surface : %s" % unicode(value)) - -# RE_ADD_CD_POSTAL_TOWN = re.compile("(.*)[, ](\d{5}) (.*?) *(?: "\ -# "*CEDEX|cedex|Cedex *\d*)*") - -RE_NAME_ADD_CD_POSTAL_TOWN = re.compile( - "(.+)?[, ]*" + NEW_LINE_BREAK + "(.+)?[, ]*(\d{2} *\d{3})[, ]*(.+)") - -RE_ADD_CD_POSTAL_TOWN = re.compile("(.+)?[, ]*(\d{2} *\d{3})[, ]*(.+)") - -RE_CD_POSTAL_FILTER = re.compile("(\d*) (\d*)") - -RE_ORGA = re.compile("([^,\n]*)") - - -class OperationImporterBibracte(Importer): - OBJECT_CLS = models.Operation - DESC = u"Exports Bibracte : importeur pour l'onglet opération" - DEFAULTS = { - ('operator',): { - 'organization_type': OPERATOR - }, - } - LINE_FORMAT = [ - # CODE OPE - ImportFormater('operation_code', IntegerFormater(),), - # REGION - None, - # TYPE operation - ImportFormater('operation_type', TypeFormater(models.OperationType),), - # NOM - ImportFormater('common_name', UnicodeFormater(120),), - # OPERATEUR - ImportFormater('operator__name', UnicodeFormater(120),), - # resp. lien IMPORT avec personne - ImportFormater('in_charge__raw_name', UnicodeFormater(300),), - # début - ImportFormater('start_date', DateFormater(['%Y/%m/%d']),), - # fin - ImportFormater('excavation_end_date', DateFormater(['%Y/%m/%d']),), - # Chronos - ImportFormater('periods', TypeFormater(models.Period, many_split="&"), - required=False), - ] - -RE_PARCEL_SECT_NUM = re.compile("([A-Za-z]*)([0-9]*)") -RE_NUM_INSEE = re.compile("([0-9]*)") - - -class ParcelImporterBibracte(Importer): - OBJECT_CLS = models.Parcel - DESC = u"Exports Bibracte : importeur pour l'onglet parcelles" - DEFAULTS = { - ('operator',): { - 'organization_type': OrganizationType.objects.get( - txt_idx="operator")}, - } - LINE_FORMAT = [ - # code OA - ImportFormater('operation__operation_code', IntegerFormater(),), - # identifiant parcelle - ImportFormater( - ['section', 'parcel_number'], - [UnicodeFormater(4), UnicodeFormater(6), ], - regexp=RE_PARCEL_SECT_NUM, - regexp_formater_args=[[0], [1]], required=False, - duplicate_fields=[('external_id', False)],), - # numero parcelle - ImportFormater('parcel_number', UnicodeFormater(6), - required=False,), - # section cadastre - ImportFormater('section', UnicodeFormater(4), - required=False,), - # annee cadastre - ImportFormater('year', YearFormater(), required=False,), - # nom commune - None, - # numero INSEE commune - ImportFormater('town__numero_insee', UnicodeFormater(6), - regexp=RE_NUM_INSEE, required=False,), - # nom departement - None, - # lieu dit adresse - ImportFormater('address', UnicodeFormater(500), - required=False,), - ] - -MAIN_AUTHOR, created = AuthorType.objects.get_or_create(txt_idx='main_author') - - -class DocImporterBibracte(Importer): - OBJECT_CLS = models.OperationSource - DEFAULTS = { - ('authors',): {'author_type': MAIN_AUTHOR}, - } - DESC = u"Exports Bibracte : importeur pour l'onglet documentation" - LINE_FORMAT = [ - # code OA - ImportFormater('operation__operation_code', IntegerFormater(),), - # identifiant documentation - ImportFormater('external_id', UnicodeFormater(12),), - # type - ImportFormater('source_type', TypeFormater(SourceType), - required=False), - # nature support - ImportFormater('support_type', TypeFormater(SupportType), - required=False), - # nombre element - ImportFormater('item_number', IntegerFormater(), required=False), - # auteur - ImportFormater('authors__person__raw_name', UnicodeFormater(300), - required=False), - # annee - ImportFormater('creation_date', DateFormater(['%Y']),), - # format - ImportFormater('format_type', TypeFormater(Format), required=False), - # description legende - ImportFormater('description', UnicodeFormater(1000), required=False), - # type contenant - None, - # numero contenant - None, - # commentaire - ImportFormater('comment', UnicodeFormater(1000), required=False), - # echelle - ImportFormater('scale', UnicodeFormater(30), required=False), - # type sous contenant - None, - # numero sous contenant - None, - # informations complementaires - ImportFormater('additional_information', UnicodeFormater(1000), - required=False), - ] |