summaryrefslogtreecommitdiff
path: root/scripts
diff options
context:
space:
mode:
Diffstat (limited to 'scripts')
-rw-r--r--scripts/history_duplicate_clean.py31
-rwxr-xr-xscripts/import_from_csv.py68
-rwxr-xr-xscripts/import_towns_from_osm.py110
-rw-r--r--scripts/pre_import_sra_files.py79
-rwxr-xr-xscripts/simple_ooo_replace.py79
5 files changed, 367 insertions, 0 deletions
diff --git a/scripts/history_duplicate_clean.py b/scripts/history_duplicate_clean.py
new file mode 100644
index 000000000..61d358720
--- /dev/null
+++ b/scripts/history_duplicate_clean.py
@@ -0,0 +1,31 @@
+"""
+Clean duplicate in history.
+This should be unecessary now.
+"""
+
+import datetime
+from archaeological_operations.models import Operation, AdministrativeAct
+from archaeological_files.models import File
+from archaeological_context_records.models import ContextRecord
+from archaeological_finds.models import Find, BaseFind, Treatment
+
+nb_deleted = {}
+to_delete = []
+for model in [Operation, File, ContextRecord, AdministrativeAct, Find,
+ BaseFind, Treatment]:
+ nb_deleted[model.__name__] = 0
+ for item in model.objects.all()[0:]:
+ c_user, c_date = None, None
+ for h in item.history.order_by('-history_modifier_id', '-history_date',
+ '-history_id').all():
+ if c_user and c_date and h.history_modifier_id == c_user and \
+ c_date - h.history_date < datetime.timedelta(seconds=5):
+ to_delete.append(h)
+ c_user = h.history_modifier_id
+ c_date = h.history_date
+ nb_deleted[model.__name__] += len(to_delete)
+
+for item in to_delete:
+ item.delete()
+for m in nb_deleted:
+ print "* %d deleted for %s" % (nb_deleted[m], m)
diff --git a/scripts/import_from_csv.py b/scripts/import_from_csv.py
new file mode 100755
index 000000000..9640f1851
--- /dev/null
+++ b/scripts/import_from_csv.py
@@ -0,0 +1,68 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+
+"""
+Import departements and towns from csv file
+"""
+
+DELIMITER = ","
+QUOTECHAR = '"'
+
+import sys
+import csv
+sys.path.append('.')
+
+from django.core.management import setup_environ
+import settings
+
+setup_environ(settings)
+
+from optparse import OptionParser
+
+from ishtar_common import models
+
+def insert_department(value):
+ idx, label = value
+ if models.Department.objects.filter(number=idx).count():
+ return
+ models.Department(number=idx, label=label).save()
+ print idx, label, u" inserted"
+
+def insert_town(value):
+ idx, label = value
+ if models.Town.objects.filter(numero_insee=idx).count():
+ return
+ try:
+ dpt = models.Department.objects.get(number=idx[:2])
+ except:
+ return
+ models.Town(numero_insee=idx, name=label, departement=dpt).save()
+ print idx, label, u" inserted"
+
+tables = {u"department":insert_department,
+ u"town":insert_town}
+
+usage = u"usage: %%prog csv_file.csv table_name\n\n"\
+ u"Table name must be in: %s." % u", ".join(tables.keys())
+parser = OptionParser(usage=usage)
+
+(options, args) = parser.parse_args()
+
+try:
+ assert len(args) == 2
+except AssertionError:
+ parser.error(u"You must provide one csv file and the table name.")
+
+try:
+ assert args[1] in tables.keys()
+except AssertionError:
+ parser.error(u"Incorrect table name.")
+
+try:
+ values = csv.reader(open(args[0], 'rb'), delimiter=DELIMITER,
+ quotechar=QUOTECHAR)
+except (IOError):
+ parser.error(u"Incorrect CSV file.")
+
+for value in values:
+ tables[args[1]](value)
diff --git a/scripts/import_towns_from_osm.py b/scripts/import_towns_from_osm.py
new file mode 100755
index 000000000..fb301f09f
--- /dev/null
+++ b/scripts/import_towns_from_osm.py
@@ -0,0 +1,110 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+
+"""
+Import towns from OpenStreetMap data.
+Take an OSM xml file for argument.
+
+To get an OSM file (with a bounding box adapted to your needs):
+curl --location --globoff "http://www.informationfreeway.org/api/0.6/node[place=village|town|city][bbox=-5.53711,41.90228,8.96484,51.50874]" -o city.osm
+or from a whole xml/pbf export:
+./osmosis --read-pbf ~/france-20110125.osm.pbf --node-key-value keyValueList="place.village,place.town,place.city" --write-xml city.osm
+"""
+
+import sys
+sys.path.append('.')
+
+from django.core.management import setup_environ
+from django.core.exceptions import ObjectDoesNotExist
+from django.contrib.gis.geos import Point
+import settings
+
+setup_environ(settings)
+
+from optparse import OptionParser
+from xml.parsers import expat
+
+from ishtar_base import models
+
+usage = "usage: %prog osm_file.xml"
+parser = OptionParser(usage=usage)
+
+(options, args) = parser.parse_args()
+
+try:
+ assert len(args) == 1
+except AssertionError:
+ parser.error("You must provide one XML file")
+
+
+ATTRS = [u"lat", u"lon"]
+
+# key : (mandatory, [restraint to keys])
+TAGS = {u"place":(True, [u"village", u"town", u"city"]),
+ u"ref:INSEE":(True, []),
+ u"population":(False, [])
+ }
+
+class TownParser:
+
+ def __init__(self):
+ self._parser = expat.ParserCreate()
+ self._parser.returns_unicode = True
+ self._parser.StartElementHandler = self.start
+ self._parser.EndElementHandler = self.end
+ self._parser.CharacterDataHandler = self.data
+ self.town = {}
+ self.number = 0
+
+ def feed(self, data):
+ self._parser.ParseFile(data)
+
+ def close(self):
+ self._parser.Parse("", 1) # end of data
+ del self._parser # get rid of circular references
+
+ def start(self, tag, attrs):
+ if tag == u"node":
+ self.town = {}
+ for attr in ATTRS:
+ if attr in attrs:
+ self.town[attr] = attrs[attr]
+ if tag == u"tag":
+ if not u"k" in attrs or not u"v" in attrs:
+ return
+ if attrs[u"k"] in TAGS:
+ limit = TAGS[attrs[u"k"]][1]
+ if limit and \
+ (attrs[u"v"] not in limit or \
+ (type(limit) == unicode and limit not in attrs[u"v"])):
+ self.town["DEL"] = True
+ return
+ self.town[attrs[u"k"]] = attrs[u"v"]
+
+ def end(self, tag):
+ if tag == u"node" and self.town and "DEL" not in self.town:
+ for k in TAGS:
+ if TAGS[k][0] and k not in self.town:
+ return
+ self.number += 1
+ try:
+ town = models.Town.objects.get(numero_insee=self.town["ref:INSEE"])
+ except ObjectDoesNotExist:
+ return
+ town.center = Point(float(self.town['lon']), float(self.town['lat']),
+ srid=4326)
+ town.save()
+ print town, "updated"
+
+ def data(self, data):
+ pass
+
+p = TownParser()
+
+try:
+ p.feed(file(args[0]))
+ print u"%d towns updated" % p.number
+except (IOError, expat.ExpatError):
+ parser.error("Incorrect XML file")
+
+
diff --git a/scripts/pre_import_sra_files.py b/scripts/pre_import_sra_files.py
new file mode 100644
index 000000000..df00d3ef5
--- /dev/null
+++ b/scripts/pre_import_sra_files.py
@@ -0,0 +1,79 @@
+import unicodecsv
+import datetime
+
+from django.conf import settings
+
+from ishtar_common.data_importer import Importer
+
+
+def get_year(value):
+ try:
+ for fmt in ['%d/%m/%Y', '%d/%m/%Y']:
+ return datetime.datetime.strptime(value, fmt).year
+ except:
+ pass
+
+index_list = []
+
+
+def treatment(data):
+ internal_ref = data[37].strip()
+ creation = data[34].strip()
+ reception = data[19].strip()
+ yr = get_year(creation)
+ if not yr:
+ yr = get_year(reception)
+
+ idx, year = None, None
+ if '-' in internal_ref:
+ year, y_idx = internal_ref.split('-')
+ if len(year) == 4: # 2007-XXXX
+ try:
+ year = int(year)
+ idx = int(y_idx)
+ except ValueError:
+ pass
+ elif '.' in internal_ref:
+ year, y_idx = internal_ref.split('.')
+ if len(year) == 4: # 2011.XXXX
+ try:
+ year = int(year)
+ idx = int(y_idx)
+ except ValueError:
+ pass
+ if not idx:
+ idx = int(internal_ref)
+ if year and year != yr:
+ yr = year
+ assert yr # we should absolutly have a year!
+
+ external_id = "{}{}-{}".format(settings.ISHTAR_LOCAL_PREFIX, yr, idx)
+ assert (yr, external_id) not in index_list
+ index_list.append((yr, external_id))
+ return yr, idx, external_id
+
+
+new_datas = []
+with open('plouf.csv') as csv_file:
+ datas = [line for line in unicodecsv.reader(csv_file,
+ encoding='utf-8')]
+ for idx, data in enumerate(datas):
+ if idx < 3:
+ # headers
+ data.append('annee')
+ data.append('identifiant numerique')
+ data.append('external_id')
+ new_datas.append(data)
+ continue
+ try:
+ year, idx, external_id = treatment(data)
+ data.append(year)
+ data.append(idx)
+ data.append(external_id)
+ new_datas.append(data)
+ except Exception as e:
+ print("Line {}: {}".format(idx + 1, e))
+
+csv = Importer()._get_csv(new_datas, empty=u'')
+with open('plouf2.csv', 'w') as fle:
+ fle.write(csv.encode('utf-8'))
diff --git a/scripts/simple_ooo_replace.py b/scripts/simple_ooo_replace.py
new file mode 100755
index 000000000..62cbab7a5
--- /dev/null
+++ b/scripts/simple_ooo_replace.py
@@ -0,0 +1,79 @@
+import os
+import shutil
+import sys
+from zipfile import ZipFile, ZIP_DEFLATED
+
+rpl_lst = [
+ ('adminact_associated_file_general_contractor_attached_to_name',
+ 'adminact_associated_file_corporation_general_contractor_name'),
+ ('adminact_associated_file_general_contractor_'
+ 'attached_to_address',
+ 'adminact_associated_file_corporation_general_contractor_'
+ 'address'),
+ ('adminact_associated_file_general_contractor_'
+ 'address_complement',
+ 'adminact_associated_file_corporation_general_contractor_'
+ 'address_complement '),
+ ('adminact_associated_file_general_contractor_'
+ 'attached_to_postal_code',
+ 'adminact_associated_file_corporation_general_contractor_'
+ 'postal_code '),
+ ('adminact_associated_file_general_contractor_attached_to_town',
+ 'adminact_associated_file_corporation_general_contractor_town',
+ ),
+ ('adminact_associated_file_address',
+ 'adminact_associated_file_get_locality',
+ )
+]
+
+context = dict(rpl_lst)
+
+
+def value_replace(content):
+ value = content
+ modified = False
+ for key in context:
+ if key in value:
+ modified = True
+ value = value.replace(key, context[key])
+ return value, modified
+
+
+def replace(directory, infile):
+ print("Processing {}".format(infile))
+ outfile = "PREPROCESS--" + infile
+ infile = directory + os.sep + infile
+ outfile = directory + os.sep + outfile
+
+ inzip = ZipFile(infile, 'r', ZIP_DEFLATED)
+ outzip = ZipFile(outfile, 'w', ZIP_DEFLATED)
+
+ values = {}
+ idx = 0
+ for xml_file in ('content.xml', 'styles.xml'):
+ content = inzip.read(xml_file)
+ values[xml_file], modified = value_replace(content)
+ if modified:
+ idx += 1
+
+ for f in inzip.infolist():
+ if f.filename in values:
+ outzip.writestr(f.filename, values[f.filename])
+ else:
+ outzip.writestr(f, inzip.read(f.filename))
+
+ inzip.close()
+ outzip.close()
+ # replace original by PREPROCESS
+ shutil.move(outfile, infile)
+ return idx
+
+directory = sys.argv[-1]
+idx = 0
+
+
+for fle in os.listdir(directory):
+ if fle.endswith('.odt'):
+ idx += replace(directory, fle)
+
+print("{} modifications".format(idx))