diff options
Diffstat (limited to 'ishtar_common/scripts')
| -rw-r--r-- | ishtar_common/scripts/__init__.py | 0 | ||||
| -rwxr-xr-x | ishtar_common/scripts/import_from_csv.py | 68 | ||||
| -rwxr-xr-x | ishtar_common/scripts/import_ope_from_csv.py | 411 | ||||
| -rwxr-xr-x | ishtar_common/scripts/import_towns_from_osm.py | 110 | 
4 files changed, 589 insertions, 0 deletions
| diff --git a/ishtar_common/scripts/__init__.py b/ishtar_common/scripts/__init__.py new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/ishtar_common/scripts/__init__.py diff --git a/ishtar_common/scripts/import_from_csv.py b/ishtar_common/scripts/import_from_csv.py new file mode 100755 index 000000000..de916be6f --- /dev/null +++ b/ishtar_common/scripts/import_from_csv.py @@ -0,0 +1,68 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- + +""" +Import departements and towns from csv file +""" + +DELIMITER = "," +QUOTECHAR = '"' + +import sys +import csv +sys.path.append('.') + +from django.core.management import setup_environ +import settings + +setup_environ(settings) + +from optparse import OptionParser + +from ishtar_base import models + +def insert_department(value): +    idx, label = value +    if models.Departement.objects.filter(number=idx).count(): +        return +    models.Departement(number=idx, label=label).save() +    print idx, label, u" inserted" + +def insert_town(value): +    idx, label = value +    if models.Town.objects.filter(numero_insee=idx).count(): +        return +    try: +        dpt = models.Departement.objects.get(number=idx[:2]) +    except: +        return +    models.Town(numero_insee=idx, name=label, departement=dpt).save() +    print idx, label, u" inserted" + +tables = {u"department":insert_department, +          u"town":insert_town} + +usage = u"usage: %%prog csv_file.csv table_name\n\n"\ +        u"Table name must be in: %s." % u", ".join(tables.keys()) +parser = OptionParser(usage=usage) + +(options, args) = parser.parse_args() + +try: +    assert len(args) == 2 +except AssertionError: +    parser.error(u"You must provide one csv file and the table name.") + +try: +    assert args[1] in tables.keys() +except AssertionError: +    parser.error(u"Incorrect table name.") + +try: +    values = csv.reader(open(args[0], 'rb'), delimiter=DELIMITER, +                    quotechar=QUOTECHAR) +except (IOError): +    parser.error(u"Incorrect CSV file.") + +for value in values: +    tables[args[1]](value) diff --git a/ishtar_common/scripts/import_ope_from_csv.py b/ishtar_common/scripts/import_ope_from_csv.py new file mode 100755 index 000000000..fd29fd3f6 --- /dev/null +++ b/ishtar_common/scripts/import_ope_from_csv.py @@ -0,0 +1,411 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# Copyright (C) 2011  Étienne Loks  <etienne.loks_AT_peacefrogsDOTnet> + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the +# GNU Affero General Public License for more details. + +# You should have received a copy of the GNU Affero General Public License +# along with this program.  If not, see <http://www.gnu.org/licenses/>. + +# See the file COPYING for details. + +""" +Scripts +""" + +DELIMITER = ";" +QUOTECHAR = '"' + +import sys +import datetime +import csv, codecs +sys.path.append('.') + +from django.core.management import setup_environ +import settings + +setup_environ(settings) + +from optparse import OptionParser + +from ishtar_base import models + +usage = u"usage: % %prog csv_file.csv update sra_nantes" +parser = OptionParser(usage=usage) + +(options, args) = parser.parse_args() + +try: +    assert len(args) >= 1 +except AssertionError: +    parser.error(u"You must provide one csv file.") + +update = True if len(args) > 1 and args[1] else False + +def unicode_csv_reader(unicode_csv_data, dialect=csv.excel, **kwargs): +    # csv.py doesn't do Unicode; encode temporarily as UTF-8: +    csv_reader = csv.reader(utf_8_encoder(unicode_csv_data), +                            dialect=dialect, **kwargs) +    for row in csv_reader: +        # decode UTF-8 back to Unicode, cell by cell: +        yield [unicode(cell, 'utf-8') for cell in row] + +def utf_8_encoder(unicode_csv_data): +    for line in unicode_csv_data: +        yield line.encode('utf-8') + +try: +    values = unicode_csv_reader(codecs.open(args[0], 'rb', "utf-8"), +                    delimiter=DELIMITER, quotechar=QUOTECHAR) +except (IOError): +    parser.error(u"Incorrect CSV file.") + +from django.db import transaction + +from ishtar_base.models import Operation, OperationType, User, Town,\ +                          Person, PersonType, Period, AdministrativeAct, ActType + +# lbl, txt_idx +TYPES = { +    u"SU":(u'emergency_excavation', u"Sauvetage urgent"), +    u"EV":(u'evaluation', u"Évaluation"), +    u"SD":(u'Sampling', u"Sondage"), +    u"FP":(u"prog_excavation", u"Fouille programmée"), +    u"SP":(u"prev_excavation", u"Fouille préventive"), +    u"OPD":(u"arch_diagnostic", u"Diagnostic") +} + +for k in TYPES.keys(): +    try: +        TYPES[k] = OperationType.objects.get(txt_idx=TYPES[k][0]) +    except: +        ot = OperationType(txt_idx=TYPES[k][0], label=TYPES[k][1]) +        ot.save() +        TYPES[k] = ot + +def parse_operationtype(value): +    if value not in TYPES: +        return None +    return TYPES[value] + +# lbl, txt_idx +PERIODS = { +    u'MA':u'middle_age', +    u'IND':u'indetermined', +    u'CON':u'contemporan', +    u'MOD':u'modern', +    u'REC':u'recent_times', +    u'BMA':u'low_middle_age', +    u'MAC':u'classic_middle_age', +    u'HMA':u'high_middle_age', +    u'BAS':u'low_empire', +    u'HAU':u'high-empire', +    u'NRE':u'republic', +    u'GAL':u'gallo-roman', +    u'FE2':u'second_iron_age', +    u'FE1':u'first_iron_age', +    u'BRF':u'final_bronze_age', +    u'BRM':u'middle_bronze_age', +    u'BRA':u'old_bronze_age', +    u'FER':u'iron_age', +    u'BRO':u'bronze_age', +    u'PRO':u'protohistory', +    u'NEF':u'final_neolithic', +    u'NER':u'recent_neolithic', +    u'NEM':u'middle_neolithic', +    u'NEA':u'old_neolithic', +    u'NEO':u'neolithic', +    u'MER':u'recent_mesolithic', +    u'MEM':u'middle_mesolithic', +    u'MEA':u'old_mesolithic', +    u'MES':u'mesolithic', +    u'PAF':u'final_paleolithic', +    u'PAS':u'late_paleolithic', +    u'PAM':u'middle_paleolithic', +    u'PAA':u'ancien_paleolithic', +    u'PAL':u'paleolithic', +    u'':u'not_yet_documented', +} +for k in PERIODS.keys(): +    PERIODS[k] = Period.objects.get(txt_idx=PERIODS[k]) +PERIODS_KEYS = PERIODS.keys() +PERIODS_KEYS.sort(key=len) +PERIODS_KEYS.reverse() + +def parse_period(value): +    value = value[3:] if value.startswith('EUR') else value +    while value.endswith('-'): +        value = value[:-1] +    value = value[3:] if value.startswith('EUR') else value +    if not value: +        return [PERIODS[u'']] +    periods, old_val = [], u'' +    while value and old_val != value: +        old_val = value +        for k in PERIODS_KEYS: +            if value.startswith(k): +                periods.append(PERIODS[k]) +                value = value[len(k):] +                break +    return periods + +def parse_date(value): +    try: +        return datetime.datetime.strptime(value, '%d/%m/%Y') +    except: +        return None + +def parse_surface(value): +    value = value.replace(',', '.') +    try: +        # hectare en metre carrés +        value = float(value) * 10000 +        if value: +            return value +        return None +    except: +        return None + +def parse_year(value): +    try: +        yr = int(value) +    except: +        return +    if yr < 1900 or yr > 2100: +        return +    return yr + +default_person = User.objects.get(pk=2) + + +# si pas de start date : premier janvier de year + +# attrs, convert +COLS = [ + [], # numéro de dossier ? + (('operation_type',), parse_operationtype), + (('common_name',), unicode), + (('in_charge', 'name'), unicode), + (('in_charge', 'surname'), unicode), + [], # État ? + [], # Adresse ? + [], # origine ? + (('periods',), parse_period), + [], # Programme ? + [], # Rattach PC ? + [], # vide + (('administrative_act', 'ref_sra'), unicode), + (('administrative_act', 'signature_date'), parse_date), + (('start_date',), parse_date), + (('excavation_end_date',), parse_date), + (('year',), parse_year), + [], # identification + (('code_patriarche',), int), + [], # X degré + [], # Y degré + [], # X saisi ? + [], # Y saisi ? + [], # georef + [], # geometrie + (('surface',), parse_surface), +] + +def parse_insee(value): +    values = [] +    while len(value) > 4: +        values.append(value[:5]) +        value = value[5:] +    towns = [] +    for value in values: +        try: +            town = Town.objects.get(numero_insee=value) +            towns.append(town) +        except: +            sys.stderr.write('Numero INSEE : %s non existant en base' % value) +    return towns + +def parse_patriarche(value): +    if not value: +        return +    return '18' + unicode(value) + +COLS_SRA_NANTES = [ + [], + [], + [], #(('year',), int), + (('towns',), parse_insee), + [], # Intit TODO + [], # Adresse site... TODO + [], # prog/prev + [], #(('in_charge', 'attached_to', 'name'), unicode), + [], #(('in_charge', 'name'), unicode), + [], #(('in_charge' num + [], #(('in_charge', 'surname'), unicode), + [], # num ope non patriarche + (('start_date__year',), int), + (('excavation_end_date__year',), int), + (('start_date__day',), int), + (('excavation_end_date__day',), int), + (('start_date__month',), int), + (('excavation_end_date__month',), int), + [], + [], + [], + [], + [('code_patriarche',), parse_patriarche] +] + +col_defs = COLS_SRA_NANTES if len(args) > 2 and args[2] else COLS + +ope_default = {'history_modifier':default_person} + +# key : (class, default, reverse, extra) +key_classes = { +'in_charge':(Person, {'history_modifier':default_person, +                      'title':'', +                      'person_type':PersonType.objects.get( +                                    txt_idx='head_scientist')}, False), +'administrative_act':(AdministrativeAct, {'history_modifier':default_person, +                       'act_type':ActType.objects.get( +                                  txt_idx='excavation_order')}, 'operation'), +} + +@transaction.commit_manually +def import_operations(col_defs=COLS, update=False): +    new_ops = 0 +    error_ope, error_reversed, error_multis = [], [], [] +    for line_idx, vals in enumerate(values): +        sys.stdout.write("\r* line %d" % (line_idx)) +        if not line_idx: +            continue # remove header +        args = {} +        for col_idx, val in enumerate(vals): +            if len(col_defs) <= col_idx or not col_defs[col_idx]: +                continue +            attrs, typ = col_defs[col_idx] +            c_args = args +            for attr in attrs: +                if attr not in c_args: +                    c_args[attr] = {} +                c_args = c_args[attr] +            try: +                v = typ(val) +            except: +                v = "" +            if len(attrs) == 1: +                args[attrs[0]] = v +            elif len(attrs) == 2: +                args[attrs[0]][attrs[1]] = v +            elif len(attrs) == 3: +                args[attrs[0]][attrs[1]][attrs[2]] = v +        # manage exploded dates +        for k in args.keys(): +            if '__year' in k: +                key = k[:-len('__year')] +                try: +                    v = datetime.datetime(args[k], args[key+'__month'], +                                          args[key+'__day']) +                    args[key] = v +                except: +                    pass +                args.pop(k) +                args.pop(key+'__month') +                args.pop(key+'__day') +        reversed_items, multis = [], [] +        for k in args.keys(): +            if k in key_classes: +                cls, default, reverse = key_classes[k] +                default.update(args[k]) +                if reverse: +                    reversed_items.append((cls, default, reverse)) +                    args.pop(k) +                    continue +                try: +                    obj = cls.objects.get(**default) +                except: +                    obj = cls.objects.create(**default) +                    obj.save() +                transaction.commit() +                args[k] = obj +            elif type(args[k]) == list: +                multis.append((k, args[k])) +                args.pop(k) +        op = None +        if not update and not args['operation_type']: +            continue +        try: +            op = Operation.objects.get(code_patriarche=args['code_patriarche']) +            if not update: +                continue +        except: +            if update: +                continue +        # creation +        if not op: +            args.update(ope_default) +            args['operation_code'] = Operation.get_available_operation_code( +                                                                args['year']) +            try: +                op = Operation.objects.create(**args) +                op.save() +                new_ops += 1 +            except: +                error_ope.append((line_idx, args)) +                transaction.rollback() +                continue +            transaction.commit() +        else: # mise à jour +            try: +                for k in args: +                    if getattr(op, k): +                        continue +                    setattr(op, k, args[k]) +                op.save() +            except: +                transaction.rollback() +                continue +            transaction.commit() +        try: +            for cls, default, reverse in reversed_items: +                default[reverse] = op +                it = cls(**default).save() +        except: +            transaction.rollback() +            error_reversed.append((line_idx, reversed_items)) +            continue +        transaction.commit() +        try: +            for k, vals in multis: +                for v in vals: +                    getattr(op, k).add(v) +                    op.save() +        except: +            transaction.rollback() +            error_multis.append((line_idx, multis)) +            continue +        transaction.commit() + +    if error_ope: +        sys.stderr.write("\nError while recording theses operations:\n") +        for line_idx, args in error_ope: +            sys.stderr.write("line: " + str(line_idx) + " args: " + str(args) + '\n') +    if error_multis: +        sys.stderr.write( "\nError while recording theses multiples items attached to operation:") +        for line_idx, args in error_multis: +            sys.stderr.write("line: " + str(line_idx) + " args: " + str(args) + '\n') +    if error_reversed: +        sys.stderr.write("\nError while recording theses items that depend to operation:") +        for line_idx, args in error_reversed: +            sys.stderr.write("line: " + str(line_idx) + " args: " + str(args) + '\n') + +    sys.stdout.write("\n%d new operations recorded\n" % new_ops) + +import_operations(col_defs=col_defs, update=update) diff --git a/ishtar_common/scripts/import_towns_from_osm.py b/ishtar_common/scripts/import_towns_from_osm.py new file mode 100755 index 000000000..fb301f09f --- /dev/null +++ b/ishtar_common/scripts/import_towns_from_osm.py @@ -0,0 +1,110 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- + +""" +Import towns from OpenStreetMap data. +Take an OSM xml file for argument. + +To get an OSM file (with a bounding box adapted to your needs): +curl --location --globoff "http://www.informationfreeway.org/api/0.6/node[place=village|town|city][bbox=-5.53711,41.90228,8.96484,51.50874]" -o city.osm +or from a whole xml/pbf export: +./osmosis --read-pbf ~/france-20110125.osm.pbf --node-key-value keyValueList="place.village,place.town,place.city" --write-xml city.osm +""" + +import sys +sys.path.append('.') + +from django.core.management import setup_environ +from django.core.exceptions import ObjectDoesNotExist +from django.contrib.gis.geos import Point +import settings + +setup_environ(settings) + +from optparse import OptionParser +from xml.parsers import expat + +from ishtar_base import models + +usage = "usage: %prog osm_file.xml" +parser = OptionParser(usage=usage) + +(options, args) = parser.parse_args() + +try: +    assert len(args) == 1 +except AssertionError: +    parser.error("You must provide one XML file") + + +ATTRS = [u"lat", u"lon"] + +# key : (mandatory, [restraint to keys]) +TAGS = {u"place":(True, [u"village", u"town", u"city"]), +        u"ref:INSEE":(True, []), +        u"population":(False, []) +        } + +class TownParser: + +    def __init__(self): +        self._parser = expat.ParserCreate() +        self._parser.returns_unicode = True +        self._parser.StartElementHandler = self.start +        self._parser.EndElementHandler = self.end +        self._parser.CharacterDataHandler = self.data +        self.town = {} +        self.number = 0 + +    def feed(self, data): +        self._parser.ParseFile(data) + +    def close(self): +        self._parser.Parse("", 1) # end of data +        del self._parser # get rid of circular references + +    def start(self, tag, attrs): +        if tag == u"node": +            self.town = {} +            for attr in ATTRS: +                if attr in attrs: +                    self.town[attr] = attrs[attr] +        if tag == u"tag": +            if not u"k" in attrs or not u"v" in attrs: +                return +            if attrs[u"k"] in TAGS: +                limit = TAGS[attrs[u"k"]][1] +                if limit and \ +                   (attrs[u"v"] not in limit or \ +                   (type(limit) == unicode and limit not in attrs[u"v"])): +                    self.town["DEL"] = True +                    return +                self.town[attrs[u"k"]] = attrs[u"v"] + +    def end(self, tag): +        if tag == u"node" and self.town and "DEL" not in self.town: +            for k in TAGS: +                if TAGS[k][0] and k not in self.town: +                    return +            self.number += 1 +            try: +                town = models.Town.objects.get(numero_insee=self.town["ref:INSEE"]) +            except ObjectDoesNotExist: +                return +            town.center = Point(float(self.town['lon']), float(self.town['lat']), +                                srid=4326) +            town.save() +            print town, "updated" + +    def data(self, data): +        pass + +p = TownParser() + +try: +    p.feed(file(args[0])) +    print u"%d towns updated" % p.number +except (IOError, expat.ExpatError): +    parser.error("Incorrect XML file") + + | 
