summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorÉtienne Loks <etienne.loks@iggdrasil.net>2025-05-12 14:59:21 +0200
committerÉtienne Loks <etienne.loks@iggdrasil.net>2025-05-13 10:40:31 +0200
commit92ea61d5bc4713b151eb3c7d513f9e3699ccb2eb (patch)
tree7b256e3a950fbf07911a60a3ca6e1c831624a377
parentf3f0498e7001abe9676b6506baea07e820cbf60b (diff)
downloadIshtar-92ea61d5bc4713b151eb3c7d513f9e3699ccb2eb.tar.bz2
Ishtar-92ea61d5bc4713b151eb3c7d513f9e3699ccb2eb.zip
✨ import_geofla_csv, import_insee_comm_csv: logging, better verbose CLI
-rw-r--r--ishtar_common/management/commands/import_geofla_csv.py202
-rw-r--r--ishtar_common/management/commands/import_insee_comm_csv.py119
-rw-r--r--ishtar_common/utils.py28
3 files changed, 210 insertions, 139 deletions
diff --git a/ishtar_common/management/commands/import_geofla_csv.py b/ishtar_common/management/commands/import_geofla_csv.py
index 69a1cad0b..400825092 100644
--- a/ishtar_common/management/commands/import_geofla_csv.py
+++ b/ishtar_common/management/commands/import_geofla_csv.py
@@ -18,6 +18,8 @@
# See the file COPYING for details.
import csv
+import datetime
+import os
import sys
from django.core.management.base import BaseCommand
@@ -25,8 +27,10 @@ from django.contrib.gis.geos import GEOSGeometry, Point
from django.db import transaction
from django.db.utils import DataError
+from django.conf import settings
from django.contrib.contenttypes.models import ContentType
from ishtar_common.models import Town, GeoVectorData, GeoDataType, GeoProviderType
+from ishtar_common.utils import BColors, fast_line_count, get_log_time, get_progress
town_content_type = ContentType.objects.get(app_label="ishtar_common", model="town")
@@ -51,6 +55,9 @@ class Command(BaseCommand):
help='Quiet output')
parser.add_argument(
'--create-only', dest='create_only', action='store_true',
+ help='No update')
+ parser.add_argument(
+ '--create-only-geo', dest='create_only_geo', action='store_true',
help='Create only missing geo')
parser.add_argument(
'--srid', type=int, default=2154, dest='srid',
@@ -72,108 +79,127 @@ class Command(BaseCommand):
@transaction.atomic
def handle(self, *args, **options):
+ log_path = os.sep.join([settings.ROOT_PATH, "logs"])
+ if not os.path.exists(log_path):
+ os.mkdir(log_path, mode=0o770)
+
csv.field_size_limit(sys.maxsize)
csv_file = options['csv_file']
default_year = options['year']
srid = options['srid']
quiet = options['quiet']
create_only = options["create_only"]
+ create_only_geo = options["create_only_geo"]
if not quiet:
- sys.stdout.write('* using year {} as a default\n'.format(
- default_year))
- sys.stdout.write('* Opening file {}\n'.format(csv_file))
- nb_created, nb_changed, nb_error, nb_geo = 0, 0, 0, 0
- with open(csv_file, 'rt') as csvfile:
- header = csvfile.readline()
- geom_colum = header.split(",")[0]
- csvfile.seek(0)
-
- reader = csv.DictReader(csvfile)
- for idx, row in enumerate(reader):
- if not quiet:
- sys.stdout.write('Processing town %d.\r' % (idx + 1))
- sys.stdout.flush()
- num_insee = row['INSEE_COM']
- if len(num_insee) < 5:
- num_insee = '0' + num_insee
- if 'NOM_COM_M' in row:
- name = row['NOM_COM_M']
- elif 'NOM_M' in row:
- name = row['NOM_M']
- elif 'NOM' in row:
- name = row['NOM'].upper()
- else:
- name = row['NOM_COM'].upper()
- town, created = self.get_town(num_insee, name, default_year)
- if created:
- nb_created += 1
- else:
- nb_changed += 1
- if create_only and town.main_geodata:
+ sys.stdout.write(BColors.OKGREEN)
+ sys.stdout.write(f'* using year {default_year} as a default\n')
+ sys.stdout.write(f'* opening file {csv_file}{BColors.ENDC}\n')
+ nb_created, nb_error, nb_geo = 0, 0, 0
+ nb_csv_lines = fast_line_count(csv_file) - 1
+ started = datetime.datetime.now()
+ log_filename = f"import_ign-{get_log_time().replace(':', '')}.csv"
+ log_path = os.sep.join([log_path, log_filename])
+ with open(log_path, "w+") as fle:
+ writer = csv.writer(fle)
+ writer.writerow(["insee", "town", "created", "error"])
+ with open(csv_file, 'rt') as csvfile:
+ header = csvfile.readline()
+ geom_colum = header.split(",")[0]
+ csvfile.seek(0)
+
+ reader = csv.DictReader(csvfile)
+ for idx, row in enumerate(reader):
+ if not quiet:
+ sys.stdout.write(
+ get_progress("processing town", idx, nb_csv_lines, started)
+ )
+ sys.stdout.flush()
+ num_insee = row['INSEE_COM']
+ if len(num_insee) < 5:
+ num_insee = '0' + num_insee
+ if 'NOM_COM_M' in row:
+ name = row['NOM_COM_M']
+ elif 'NOM_M' in row:
+ name = row['NOM_M']
+ elif 'NOM' in row:
+ name = row['NOM'].upper()
+ else:
+ name = row['NOM_COM'].upper()
+ town, created = self.get_town(num_insee, name, default_year)
+ if created:
+ if create_only_geo:
+ continue
+ nb_created += 1
+ elif (create_only or create_only_geo) and town.main_geodata:
continue
- geom = row[geom_colum].upper()
- if 'MULTI' not in geom:
- geom = geom.replace('POLYGON', 'MULTIPOLYGON(') + ')'
- limit = GEOSGeometry(geom, srid=srid)
- if 'X_CENTROID' in row:
- center = Point(
- float(row['X_CENTROID']), float(row['Y_CENTROID']),
- srid=srid)
- else:
- center = None
- values = {}
- values['center'] = None
- if not town.year and default_year:
- values['year'] = default_year
- if 'SUPERFICIE' in row:
- values['surface'] = row['SUPERFICIE']
- else:
- values['surface'] = None
- for k in values:
- setattr(town, k, values[k])
- try:
- with transaction.atomic():
- town.save()
- except DataError:
- nb_error += 1
- town, created = self.get_town(num_insee, name, default_year)
+ geom = row[geom_colum].upper()
+ if 'MULTI' not in geom:
+ geom = geom.replace('POLYGON', 'MULTIPOLYGON(') + ')'
+ limit = GEOSGeometry(geom, srid=srid)
+ if 'X_CENTROID' in row:
+ center = Point(
+ float(row['X_CENTROID']), float(row['Y_CENTROID']),
+ srid=srid)
+ else:
+ center = None
+ values = {}
+ values['center'] = None
+ if not town.year and default_year:
+ values['year'] = default_year
+ if 'SUPERFICIE' in row:
+ values['surface'] = row['SUPERFICIE']
+ else:
+ values['surface'] = None
for k in values:
setattr(town, k, values[k])
- town.save()
- attrs = {
- "name": town._generate_cached_label(),
- "source_content_type": town_content_type,
- "source_id": town.pk,
- "data_type": data_type,
- "provider": provider,
- }
- if limit:
- attrs["multi_polygon"] = limit
- else:
- attrs["point_2d"] = center
- try:
- data, created = GeoVectorData.objects.get_or_create(**attrs)
- except DataError:
+ error = False
+ try:
+ with transaction.atomic():
+ town.save()
+ except DataError:
+ nb_error += 1
+ town, created = self.get_town(num_insee, name, default_year)
+ for k in values:
+ setattr(town, k, values[k])
+ town.save()
+ error = True
+ writer.writerow([town.numero_insee, town.name, created, error])
+ attrs = {
+ "name": town._generate_cached_label(),
+ "source_content_type": town_content_type,
+ "source_id": town.pk,
+ "data_type": data_type,
+ "provider": provider,
+ }
if limit:
- print(f"\nError {town} polygon\n")
- attrs.pop("multi_polygon")
+ attrs["multi_polygon"] = limit
+ else:
attrs["point_2d"] = center
+ try:
data, created = GeoVectorData.objects.get_or_create(**attrs)
- else:
- print(f"\nError {town} center\n")
- town.main_geodata = data
- town._post_save_geo_ok = False
- town.save()
- if created:
- nb_geo += 1
+ except DataError:
+ if limit:
+ print(f"\nError {town} polygon\n")
+ attrs.pop("multi_polygon")
+ attrs["point_2d"] = center
+ data, created = GeoVectorData.objects.get_or_create(**attrs)
+ else:
+ print(f"\nError {town} center\n")
+ town.main_geodata = data
+ town._post_save_geo_ok = False
+ town.save()
+ if created:
+ nb_geo += 1
if quiet:
return
- sys.stdout.write('\n* {} town created'.format(nb_created))
- sys.stdout.write('\n* {} geo created'.format(nb_geo))
- sys.stdout.write('\n* {} town changed'.format(nb_changed))
- sys.stdout.write('\n* {} town with geometry error\n'.format(nb_error))
+ sys.stdout.write(BColors.OKGREEN)
+ if nb_created:
+ sys.stdout.write('\n* {} town created'.format(nb_created))
+ if nb_geo:
+ sys.stdout.write('\n* {} geo created'.format(nb_geo))
+ if nb_error:
+ sys.stdout.write('\n* {} town with geometry error\n'.format(nb_error))
+ sys.stdout.write(f"\n[{get_log_time()}] log file:")
+ sys.stdout.write(f"\n{BColors.WARNING}{log_path}{BColors.ENDC}\n")
sys.stdout.flush()
-
-
-
diff --git a/ishtar_common/management/commands/import_insee_comm_csv.py b/ishtar_common/management/commands/import_insee_comm_csv.py
index 2e990c7ce..4584da890 100644
--- a/ishtar_common/management/commands/import_insee_comm_csv.py
+++ b/ishtar_common/management/commands/import_insee_comm_csv.py
@@ -18,13 +18,17 @@
# See the file COPYING for details.
import csv
+import os
import re
import sys
+from django.conf import settings
from django.core.management.base import BaseCommand
from ishtar_common.models import Town
+from ishtar_common.utils import BColors, get_log_time
+
class Command(BaseCommand):
help = 'Import INSEE csv'
@@ -39,75 +43,88 @@ class Command(BaseCommand):
help='Quiet output')
def handle(self, *args, **options):
+ log_path = os.sep.join([settings.ROOT_PATH, "logs"])
+ if not os.path.exists(log_path):
+ os.mkdir(log_path, mode=0o770)
+
csv_file = options['csv_file']
default_year = options['year']
quiet = options['quiet']
if not quiet:
- sys.stdout.write('* using year {} for new towns\n'.format(
- default_year))
- sys.stdout.write('* opening file {}\n'.format(csv_file))
+ sys.stdout.write(BColors.OKGREEN)
+ sys.stdout.write(f'* Using year {default_year} for new towns\n')
+ sys.stdout.write(f'* Opening file {csv_file}{BColors.ENDC}\n')
r = re.compile(r"(.*)\((.*)\)")
nb_created = 0
nb_link = 0
missing = []
strange = []
linked = set()
- with open(csv_file, 'rt') as csvfile:
- reader = csv.DictReader(csvfile)
- for idx, row in enumerate(reader):
- new_insee = row['DepComN']
- if len(new_insee) < 5:
- new_insee = '0' + new_insee
-
- if not idx: # test if first do not exist
+
+ log_filename = f"import_insee-{get_log_time().replace(':', '')}.csv"
+ log_path = os.sep.join([log_path, log_filename])
+ with open(log_path, "w+") as fle:
+ writer = csv.writer(fle)
+ writer.writerow(["new insee", "new town"])
+ with open(csv_file, 'rt') as csvfile:
+ reader = csv.DictReader(csvfile)
+ for idx, row in enumerate(reader):
+ new_insee = row['DepComN']
+ if len(new_insee) < 5:
+ new_insee = '0' + new_insee
+
+ if not idx: # test if first do not exist
+ q = Town.objects.filter(numero_insee=new_insee,
+ year=default_year)
+ if q.count():
+ sys.stdout.write(
+ f"{BColors.FAIL}First town already exists for this year{BColors.ENDC}\n")
+ return
+
+ if not quiet:
+ sys.stdout.write('Processing town %d.\r' % (idx + 1))
+ sys.stdout.flush()
+
+ old_insee = row['DepComA']
+ if len(old_insee) < 5:
+ old_insee = '0' + old_insee
+ q = Town.objects.filter(numero_insee=old_insee)
+
+ if not q.count():
+ missing.append((old_insee, row['NomCA']))
+ continue
+ if q.count() > 1:
+ q = q.filter(year__lt=default_year).order_by('-year')
+ if not q.count():
+ strange.append((old_insee, row['NomCA']))
+ continue
+ old_town = q.all()[0]
+
q = Town.objects.filter(numero_insee=new_insee,
year=default_year)
- if q.count():
- print("First town already exists for this year....")
- return
-
- if not quiet:
- sys.stdout.write('Processing town %d.\r' % (idx + 1))
- sys.stdout.flush()
-
- old_insee = row['DepComA']
- if len(old_insee) < 5:
- old_insee = '0' + old_insee
- q = Town.objects.filter(numero_insee=old_insee)
-
- if not q.count():
- missing.append((old_insee, row['NomCA']))
- continue
- if q.count() > 1:
- q = q.filter(year__lt=default_year).order_by('-year')
if not q.count():
- strange.append((old_insee, row['NomCA']))
- continue
- old_town = q.all()[0]
-
- q = Town.objects.filter(numero_insee=new_insee,
- year=default_year)
- if not q.count():
- nb_created += 1
- name = row['NomCN'].strip()
- name = r.sub(r"\2 \1", name).strip()
- new_town = Town.objects.create(name=name, year=default_year,
- numero_insee=new_insee)
- else:
- new_town = q.all()[0]
- if new_town in old_town.children.all():
- continue # link already created
- nb_link += 1
- old_town.children.add(new_town)
- linked.add(new_town)
+ nb_created += 1
+ name = row['NomCN'].strip()
+ name = r.sub(r"\2 \1", name).strip()
+ new_town = Town.objects.create(name=name, year=default_year,
+ numero_insee=new_insee)
+ writer.writerow([new_town.numero_insee, new_town.name])
+ else:
+ new_town = q.all()[0]
+ if new_town in old_town.children.all():
+ continue # link already created
+ nb_link += 1
+ old_town.children.add(new_town)
+ linked.add(new_town)
nb_limit = 0
if not quiet:
- sys.stdout.write('\nGenerate limits...'.format(nb_created))
+ sys.stdout.write('\nGenerate limits...')
for town in linked:
if town.generate_geo():
nb_limit += 1
if quiet:
return
+ sys.stdout.write(BColors.OKGREEN)
sys.stdout.write('\n* {} town created\n'.format(nb_created))
sys.stdout.write('* {} link created\n'.format(nb_link))
sys.stdout.write('* {} limit generated\n'.format(nb_limit))
@@ -119,6 +136,6 @@ class Command(BaseCommand):
sys.stdout.write('* these towns have newer version:\n')
for insee, name in strange:
sys.stdout.write('* {} ({})\n'.format(name, insee))
+ sys.stdout.write(f"\n[{get_log_time()}] log file:")
+ sys.stdout.write(f"\n{BColors.WARNING}{log_path}{BColors.ENDC}\n")
sys.stdout.flush()
-
-
diff --git a/ishtar_common/utils.py b/ishtar_common/utils.py
index 23495c2ec..04cda150b 100644
--- a/ishtar_common/utils.py
+++ b/ishtar_common/utils.py
@@ -3158,6 +3158,34 @@ def get_eta(current, total, base_time, current_time):
return f"{int(eta // 3600):02d}:{int(eta % 3600 // 60):02d}:{int(eta % 60):02d}"
+def get_progress(base_lbl, idx, total, ref_time):
+ """
+ Output progress for a long task.
+ - base_lbl: label to display
+ - idx: current item number
+ - total: number of items
+ - ref_time: time the task has been started
+ """
+ lbl = f"\r{BColors.OKBLUE}[{get_percent(idx, total)}] {base_lbl} {idx + 1}/{total}"
+ lbl += f" ({get_eta(idx, total, ref_time, datetime.datetime.now())} left){BColors.ENDC}"
+ return lbl
+
+
+def fast_line_count(filename):
+ """
+ Efficient line counter for a file
+ """
+ CHUNK_SIZE = 1024 * 1024
+ def _count(reader):
+ b = reader(CHUNK_SIZE)
+ while b:
+ yield b
+ b = reader(CHUNK_SIZE)
+ with open(filename, 'rb') as fp:
+ count = sum(buffer.count(b"\n") for buffer in _count(fp.raw.read))
+ return count + 1
+
+
RE_NUMBER = r"[+-]?\d+(?:\.\d*)?"
RE_COORDS = r"(" + RE_NUMBER + r") (" + RE_NUMBER + r")"