diff options
author | Étienne Loks <etienne.loks@iggdrasil.net> | 2016-01-18 18:41:39 +0100 |
---|---|---|
committer | Étienne Loks <etienne.loks@iggdrasil.net> | 2016-01-19 00:18:02 +0100 |
commit | e9c377136b8d0329f464749ea9671874d9dde17e (patch) | |
tree | 17e9fbf51fc054d18ddcd7fabc87e8684daf4eb4 /scripts | |
parent | 50124308f8f514e0f0120257a7d6a97523edb160 (diff) | |
download | Ishtar-e9c377136b8d0329f464749ea9671874d9dde17e.tar.bz2 Ishtar-e9c377136b8d0329f464749ea9671874d9dde17e.zip |
Configure for publications on pypi, descriptions, etc.
Diffstat (limited to 'scripts')
-rw-r--r-- | scripts/history_duplicate_clean.py | 31 | ||||
-rwxr-xr-x | scripts/import_from_csv.py | 68 | ||||
-rwxr-xr-x | scripts/import_towns_from_osm.py | 110 | ||||
-rw-r--r-- | scripts/pre_import_sra_files.py | 79 | ||||
-rwxr-xr-x | scripts/simple_ooo_replace.py | 79 |
5 files changed, 367 insertions, 0 deletions
diff --git a/scripts/history_duplicate_clean.py b/scripts/history_duplicate_clean.py new file mode 100644 index 000000000..61d358720 --- /dev/null +++ b/scripts/history_duplicate_clean.py @@ -0,0 +1,31 @@ +""" +Clean duplicate in history. +This should be unecessary now. +""" + +import datetime +from archaeological_operations.models import Operation, AdministrativeAct +from archaeological_files.models import File +from archaeological_context_records.models import ContextRecord +from archaeological_finds.models import Find, BaseFind, Treatment + +nb_deleted = {} +to_delete = [] +for model in [Operation, File, ContextRecord, AdministrativeAct, Find, + BaseFind, Treatment]: + nb_deleted[model.__name__] = 0 + for item in model.objects.all()[0:]: + c_user, c_date = None, None + for h in item.history.order_by('-history_modifier_id', '-history_date', + '-history_id').all(): + if c_user and c_date and h.history_modifier_id == c_user and \ + c_date - h.history_date < datetime.timedelta(seconds=5): + to_delete.append(h) + c_user = h.history_modifier_id + c_date = h.history_date + nb_deleted[model.__name__] += len(to_delete) + +for item in to_delete: + item.delete() +for m in nb_deleted: + print "* %d deleted for %s" % (nb_deleted[m], m) diff --git a/scripts/import_from_csv.py b/scripts/import_from_csv.py new file mode 100755 index 000000000..9640f1851 --- /dev/null +++ b/scripts/import_from_csv.py @@ -0,0 +1,68 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- + +""" +Import departements and towns from csv file +""" + +DELIMITER = "," +QUOTECHAR = '"' + +import sys +import csv +sys.path.append('.') + +from django.core.management import setup_environ +import settings + +setup_environ(settings) + +from optparse import OptionParser + +from ishtar_common import models + +def insert_department(value): + idx, label = value + if models.Department.objects.filter(number=idx).count(): + return + models.Department(number=idx, label=label).save() + print idx, label, u" inserted" + +def insert_town(value): + idx, label = value + if models.Town.objects.filter(numero_insee=idx).count(): + return + try: + dpt = models.Department.objects.get(number=idx[:2]) + except: + return + models.Town(numero_insee=idx, name=label, departement=dpt).save() + print idx, label, u" inserted" + +tables = {u"department":insert_department, + u"town":insert_town} + +usage = u"usage: %%prog csv_file.csv table_name\n\n"\ + u"Table name must be in: %s." % u", ".join(tables.keys()) +parser = OptionParser(usage=usage) + +(options, args) = parser.parse_args() + +try: + assert len(args) == 2 +except AssertionError: + parser.error(u"You must provide one csv file and the table name.") + +try: + assert args[1] in tables.keys() +except AssertionError: + parser.error(u"Incorrect table name.") + +try: + values = csv.reader(open(args[0], 'rb'), delimiter=DELIMITER, + quotechar=QUOTECHAR) +except (IOError): + parser.error(u"Incorrect CSV file.") + +for value in values: + tables[args[1]](value) diff --git a/scripts/import_towns_from_osm.py b/scripts/import_towns_from_osm.py new file mode 100755 index 000000000..fb301f09f --- /dev/null +++ b/scripts/import_towns_from_osm.py @@ -0,0 +1,110 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- + +""" +Import towns from OpenStreetMap data. +Take an OSM xml file for argument. + +To get an OSM file (with a bounding box adapted to your needs): +curl --location --globoff "http://www.informationfreeway.org/api/0.6/node[place=village|town|city][bbox=-5.53711,41.90228,8.96484,51.50874]" -o city.osm +or from a whole xml/pbf export: +./osmosis --read-pbf ~/france-20110125.osm.pbf --node-key-value keyValueList="place.village,place.town,place.city" --write-xml city.osm +""" + +import sys +sys.path.append('.') + +from django.core.management import setup_environ +from django.core.exceptions import ObjectDoesNotExist +from django.contrib.gis.geos import Point +import settings + +setup_environ(settings) + +from optparse import OptionParser +from xml.parsers import expat + +from ishtar_base import models + +usage = "usage: %prog osm_file.xml" +parser = OptionParser(usage=usage) + +(options, args) = parser.parse_args() + +try: + assert len(args) == 1 +except AssertionError: + parser.error("You must provide one XML file") + + +ATTRS = [u"lat", u"lon"] + +# key : (mandatory, [restraint to keys]) +TAGS = {u"place":(True, [u"village", u"town", u"city"]), + u"ref:INSEE":(True, []), + u"population":(False, []) + } + +class TownParser: + + def __init__(self): + self._parser = expat.ParserCreate() + self._parser.returns_unicode = True + self._parser.StartElementHandler = self.start + self._parser.EndElementHandler = self.end + self._parser.CharacterDataHandler = self.data + self.town = {} + self.number = 0 + + def feed(self, data): + self._parser.ParseFile(data) + + def close(self): + self._parser.Parse("", 1) # end of data + del self._parser # get rid of circular references + + def start(self, tag, attrs): + if tag == u"node": + self.town = {} + for attr in ATTRS: + if attr in attrs: + self.town[attr] = attrs[attr] + if tag == u"tag": + if not u"k" in attrs or not u"v" in attrs: + return + if attrs[u"k"] in TAGS: + limit = TAGS[attrs[u"k"]][1] + if limit and \ + (attrs[u"v"] not in limit or \ + (type(limit) == unicode and limit not in attrs[u"v"])): + self.town["DEL"] = True + return + self.town[attrs[u"k"]] = attrs[u"v"] + + def end(self, tag): + if tag == u"node" and self.town and "DEL" not in self.town: + for k in TAGS: + if TAGS[k][0] and k not in self.town: + return + self.number += 1 + try: + town = models.Town.objects.get(numero_insee=self.town["ref:INSEE"]) + except ObjectDoesNotExist: + return + town.center = Point(float(self.town['lon']), float(self.town['lat']), + srid=4326) + town.save() + print town, "updated" + + def data(self, data): + pass + +p = TownParser() + +try: + p.feed(file(args[0])) + print u"%d towns updated" % p.number +except (IOError, expat.ExpatError): + parser.error("Incorrect XML file") + + diff --git a/scripts/pre_import_sra_files.py b/scripts/pre_import_sra_files.py new file mode 100644 index 000000000..df00d3ef5 --- /dev/null +++ b/scripts/pre_import_sra_files.py @@ -0,0 +1,79 @@ +import unicodecsv +import datetime + +from django.conf import settings + +from ishtar_common.data_importer import Importer + + +def get_year(value): + try: + for fmt in ['%d/%m/%Y', '%d/%m/%Y']: + return datetime.datetime.strptime(value, fmt).year + except: + pass + +index_list = [] + + +def treatment(data): + internal_ref = data[37].strip() + creation = data[34].strip() + reception = data[19].strip() + yr = get_year(creation) + if not yr: + yr = get_year(reception) + + idx, year = None, None + if '-' in internal_ref: + year, y_idx = internal_ref.split('-') + if len(year) == 4: # 2007-XXXX + try: + year = int(year) + idx = int(y_idx) + except ValueError: + pass + elif '.' in internal_ref: + year, y_idx = internal_ref.split('.') + if len(year) == 4: # 2011.XXXX + try: + year = int(year) + idx = int(y_idx) + except ValueError: + pass + if not idx: + idx = int(internal_ref) + if year and year != yr: + yr = year + assert yr # we should absolutly have a year! + + external_id = "{}{}-{}".format(settings.ISHTAR_LOCAL_PREFIX, yr, idx) + assert (yr, external_id) not in index_list + index_list.append((yr, external_id)) + return yr, idx, external_id + + +new_datas = [] +with open('plouf.csv') as csv_file: + datas = [line for line in unicodecsv.reader(csv_file, + encoding='utf-8')] + for idx, data in enumerate(datas): + if idx < 3: + # headers + data.append('annee') + data.append('identifiant numerique') + data.append('external_id') + new_datas.append(data) + continue + try: + year, idx, external_id = treatment(data) + data.append(year) + data.append(idx) + data.append(external_id) + new_datas.append(data) + except Exception as e: + print("Line {}: {}".format(idx + 1, e)) + +csv = Importer()._get_csv(new_datas, empty=u'') +with open('plouf2.csv', 'w') as fle: + fle.write(csv.encode('utf-8')) diff --git a/scripts/simple_ooo_replace.py b/scripts/simple_ooo_replace.py new file mode 100755 index 000000000..62cbab7a5 --- /dev/null +++ b/scripts/simple_ooo_replace.py @@ -0,0 +1,79 @@ +import os +import shutil +import sys +from zipfile import ZipFile, ZIP_DEFLATED + +rpl_lst = [ + ('adminact_associated_file_general_contractor_attached_to_name', + 'adminact_associated_file_corporation_general_contractor_name'), + ('adminact_associated_file_general_contractor_' + 'attached_to_address', + 'adminact_associated_file_corporation_general_contractor_' + 'address'), + ('adminact_associated_file_general_contractor_' + 'address_complement', + 'adminact_associated_file_corporation_general_contractor_' + 'address_complement '), + ('adminact_associated_file_general_contractor_' + 'attached_to_postal_code', + 'adminact_associated_file_corporation_general_contractor_' + 'postal_code '), + ('adminact_associated_file_general_contractor_attached_to_town', + 'adminact_associated_file_corporation_general_contractor_town', + ), + ('adminact_associated_file_address', + 'adminact_associated_file_get_locality', + ) +] + +context = dict(rpl_lst) + + +def value_replace(content): + value = content + modified = False + for key in context: + if key in value: + modified = True + value = value.replace(key, context[key]) + return value, modified + + +def replace(directory, infile): + print("Processing {}".format(infile)) + outfile = "PREPROCESS--" + infile + infile = directory + os.sep + infile + outfile = directory + os.sep + outfile + + inzip = ZipFile(infile, 'r', ZIP_DEFLATED) + outzip = ZipFile(outfile, 'w', ZIP_DEFLATED) + + values = {} + idx = 0 + for xml_file in ('content.xml', 'styles.xml'): + content = inzip.read(xml_file) + values[xml_file], modified = value_replace(content) + if modified: + idx += 1 + + for f in inzip.infolist(): + if f.filename in values: + outzip.writestr(f.filename, values[f.filename]) + else: + outzip.writestr(f, inzip.read(f.filename)) + + inzip.close() + outzip.close() + # replace original by PREPROCESS + shutil.move(outfile, infile) + return idx + +directory = sys.argv[-1] +idx = 0 + + +for fle in os.listdir(directory): + if fle.endswith('.odt'): + idx += replace(directory, fle) + +print("{} modifications".format(idx)) |