summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorÉtienne Loks <etienne.loks@iggdrasil.net>2022-05-12 15:40:46 +0200
committerÉtienne Loks <etienne.loks@iggdrasil.net>2022-12-12 12:21:00 +0100
commitc0b5c70f34fe0df39da08818a8649c1f0125e905 (patch)
tree72841baefc2f7874b6819c603b7e0fba1e6fc3dd
parent12de821ded4f964c89d1ac758701bcaf4750e7de (diff)
downloadIshtar-c0b5c70f34fe0df39da08818a8649c1f0125e905.tar.bz2
Ishtar-c0b5c70f34fe0df39da08818a8649c1f0125e905.zip
Geodata: improve migrate script (multi processing) - fix upper model
-rw-r--r--archaeological_context_records/models.py34
-rw-r--r--archaeological_finds/models_finds.py1
-rw-r--r--ishtar_common/management/commands/migrate_to_geo_v4.py461
3 files changed, 292 insertions, 204 deletions
diff --git a/archaeological_context_records/models.py b/archaeological_context_records/models.py
index 3a89a6ca9..e279c55fa 100644
--- a/archaeological_context_records/models.py
+++ b/archaeological_context_records/models.py
@@ -398,6 +398,8 @@ class CRBulkView(object):
class GeographicSubTownItem(GeoItem):
+ UPPER_GEO = []
+
class Meta:
abstract = True
@@ -433,19 +435,34 @@ class GeographicSubTownItem(GeoItem):
self.main_geodata = None
modified = True
+ for upper_attr in self.UPPER_GEO:
+ upper = getattr(self, upper_attr, None)
+ if upper and upper.main_geodata and \
+ upper.main_geodata_id not in self.geodata.values_list(
+ "id", flat=True):
+ modified = True
+ self.geodata.add(upper.main_geodata)
+ if not self.main_geodata:
+ self.main_geodata = upper.main_geodata
+
if not has_geo_town:
- if modified and save:
- self.skip_history_when_saving = True
- self._no_move = True
- self.save()
+ if modified:
+ if save:
+ self.skip_history_when_saving = True
+ self._no_move = True
+ self.save()
+ else:
+ return True
return
if not q_geodata_current_town.filter(source_id=town.id).count():
self.geodata.add(town.main_geodata)
- if save:
- self.skip_history_when_saving = True
- self._no_move = True
- self.save()
+ if save:
+ self.skip_history_when_saving = True
+ self._no_move = True
+ self.save()
+ else:
+ return True
class ContextRecord(
@@ -671,6 +688,7 @@ class ContextRecord(
"short_label",
"town_label_with_areas",
]
+ UPPER_GEO = ["operation", "archaeological_site"]
history = HistoricalRecords(bases=[HistoryModel])
objects = UUIDModelManager()
diff --git a/archaeological_finds/models_finds.py b/archaeological_finds/models_finds.py
index 7d6d43d6c..962d627ba 100644
--- a/archaeological_finds/models_finds.py
+++ b/archaeological_finds/models_finds.py
@@ -397,6 +397,7 @@ class BaseFind(
SLUG = "basefind"
SERIALIZE_EXCLUDE = ["find"]
SERIALIZE_CALL = {"complete_id": "complete_id", "short_id": "short_id"}
+ UPPER_GEO = ["context_record"]
uuid = models.UUIDField(default=uuid.uuid4)
label = models.TextField(_("Free ID"))
diff --git a/ishtar_common/management/commands/migrate_to_geo_v4.py b/ishtar_common/management/commands/migrate_to_geo_v4.py
index 2a316c2a4..d904b279a 100644
--- a/ishtar_common/management/commands/migrate_to_geo_v4.py
+++ b/ishtar_common/management/commands/migrate_to_geo_v4.py
@@ -3,6 +3,8 @@
import csv
import datetime
+from django.db import connection
+from multiprocessing import Pool, Process
import os
import sys
@@ -24,63 +26,269 @@ if not os.path.exists(log_path):
os.mkdir(log_path, mode=0o770)
-def write_output(model_name, idx, nb, ref_time=None):
- lbl = f"\r[{get_percent(idx, nb)}] Migrate {model_name}s {idx + 1}/{nb}"
+town_content_type = ContentType.objects.get(app_label="ishtar_common", model="town")
+data_type, __ = models_common.GeoDataType.objects.get_or_create(
+ txt_idx="town-limit", defaults={"label": "Limites commune"}
+)
+provider, __ = models_common.GeoProviderType.objects.get_or_create(
+ txt_idx="france-ign", defaults={"label": "IGN"}
+)
+
+changed = []
+
+
+def _process_town(town_id):
+ connection.close()
+ town = models_common.Town.objects.get(pk=town_id)
+ attrs = {
+ "name": town._generate_cached_label(),
+ "source_content_type": town_content_type,
+ "source_id": town.pk,
+ "data_type": data_type,
+ "provider": provider,
+ }
+ if town.limit:
+ attrs["multi_polygon"] = town.limit
+ else:
+ attrs["point_2d"] = town.center
+ data, created = models_common.GeoVectorData.objects.get_or_create(**attrs)
+ town.main_geodata = data
+ town._post_save_geo_ok = False
+ town.save()
+ if created:
+ changed.append(["geovectordata", data.name, data.pk, "Création commune"])
+
+
+model_slug, model_name, model_full_name, model = None, None, None, None
+model_content_type, data_type_area, data_type_center = None, None, None
+
+cls_labels = {
+ "ContextRecord": ["Context Record", "Unité d'Enregistrement"],
+ "BaseFind": ["Base find", "Mobilier d'origine"],
+ "Operation": ["Operation", "Opération"],
+ "ArchaeologicalSite": ["Entité (EA)", "Entité archéologique",
+ "Archaeological site"],
+}
+
+
+def _process_site_ope(obj):
+ connection.close()
+ obj._no_move = True
+ obj.skip_history_when_saving = True
+ obj.save() # auto manage geo town association
+ q_towns = obj.towns.filter(main_geodata__multi_polygon__isnull=False)
+ if q_towns.count() > 1:
+ changed.append(
+ [model_slug, str(obj), obj.pk, "Association géo de zone communale"]
+ )
+ elif q_towns.count() == 1:
+ changed.append(
+ [model_slug, str(obj), obj.pk, "Association géo de commune"]
+ )
+ obj_verbose_names = cls_labels[obj.__class__.__name__]
+ if obj.multi_polygon_source == "P" and obj.multi_polygon \
+ and obj.multi_polygon_source_item in obj_verbose_names:
+ attrs = {
+ "name": f"{_(model_name.capitalize())}{_(':')} {str(obj)}",
+ "source_content_type": model_content_type,
+ "source_id": obj.pk,
+ "multi_polygon": obj.multi_polygon,
+ "data_type": data_type_area,
+ }
+ data = models_common.GeoVectorData.objects.create(**attrs)
+ obj.main_geodata = data
+ obj._post_save_geo_ok = False
+ obj.save()
+ changed.append(
+ [
+ "geovectordata",
+ data.name,
+ data.pk,
+ f"Multi-polygone {model_name}",
+ ]
+ )
+ if obj.point_source == "P" and obj.point_2d \
+ and obj.point_source_item in obj_verbose_names:
+ if obj.x and obj.y:
+ attrs = {
+ "name": f"{_(model_name.capitalize())}{_(':')} {str(obj)}",
+ "source_content_type": model_content_type,
+ "source_id": obj.pk,
+ "data_type": data_type_center,
+ "x": obj.x,
+ "y": obj.y,
+ "z": obj.z,
+ }
+ data = models_common.GeoVectorData.objects.create(**attrs)
+ obj.main_geodata = data
+ obj.save()
+ changed.append(
+ [
+ "geovectordata",
+ data.name,
+ data.pk,
+ f"Coordonnées {model_name}",
+ ]
+ )
+ elif obj.point_2d:
+ attrs = {
+ "name": f"{_(model_name.capitalize())}{_(':')} {str(obj)}",
+ "source_content_type": model_content_type,
+ "source_id": obj.pk,
+ "data_type": data_type_center,
+ }
+ if obj.point:
+ attrs["point_3d"] = obj.point
+ else:
+ attrs["point_2d"] = obj.point_2d
+ data = models_common.GeoVectorData.objects.create(**attrs)
+ obj.main_geodata = data
+ obj._post_save_geo_ok = False
+ obj.save()
+ changed.append(
+ ["geovectordata", data.name, data.pk, f"Point {model_name}"]
+ )
+
+data_type_outline = None
+
+
+def _process_main(obj):
+ connection.close()
+ obj._no_move = True
+ obj.skip_history_when_saving = True
+ obj.save() # auto manage geo town association
+
+ if obj.main_geodata:
+ changed.append(
+ [model_slug, str(obj), obj.pk, "Association géo de zone communale"]
+ )
+ obj_verbose_names = cls_labels[obj.__class__.__name__]
+ if obj.multi_polygon_source == "P" and obj.multi_polygon \
+ and obj.multi_polygon_source_item in obj_verbose_names:
+ attrs = {
+ "name": f"{_(model_name.capitalize())}{_(':')} {str(obj)}",
+ "source_content_type": model_content_type,
+ "source_id": obj.pk,
+ "multi_polygon": obj.multi_polygon,
+ "data_type": data_type_outline,
+ }
+ data = models_common.GeoVectorData.objects.create(**attrs)
+ obj.main_geodata = data
+ obj._post_save_geo_ok = False
+ obj.save()
+ changed.append(
+ [
+ "geovectordata",
+ data.name,
+ data.pk,
+ f"Multi-polygone {model_name}",
+ ]
+ )
+ if obj.point_source == "P" and obj.point_2d \
+ and obj.point_source_item in obj_verbose_names:
+ if obj.x and obj.y:
+ attrs = {
+ "name": f"{_(model_name.capitalize())}{_(':')} {str(obj)}",
+ "source_content_type": model_content_type,
+ "source_id": obj.pk,
+ "data_type": data_type_center,
+ "x": obj.x,
+ "y": obj.y,
+ "z": obj.z,
+ }
+ data = models_common.GeoVectorData.objects.create(**attrs)
+ obj.main_geodata = data
+ obj._post_save_geo_ok = False
+ obj.save()
+ changed.append(
+ [
+ "geovectordata",
+ data.name,
+ data.pk,
+ f"Coordonnées {model_name}",
+ ]
+ )
+ elif obj.point_2d:
+ attrs = {
+ "name": f"{_(model_name.capitalize())}{_(':')} {str(obj)}",
+ "source_content_type": model_content_type,
+ "source_id": obj.pk,
+ "data_type": data_type_center,
+ }
+ if obj.point:
+ attrs["point_3d"] = obj.point
+ else:
+ attrs["point_2d"] = obj.point_2d
+ data = models_common.GeoVectorData.objects.create(**attrs)
+ obj.main_geodata = data
+ obj._post_save_geo_ok = False
+ obj.save()
+ changed.append(
+ ["geovectordata", data.name, data.pk, f"Point {model_name}"]
+ )
+
+
+def _process_simple(obj):
+ connection.close()
+ obj._post_save_geo_ok = False
+ obj._no_move = True
+ obj.skip_history_when_saving = True
+ obj.save()
+
+
+idx = 0
+total = 0
+ref_time = None
+
+
+def write_output(arg):
+ global idx, total, model_name, ref_time, quiet
+ if quiet:
+ return
+ idx = idx + 1
+ lbl = f"\r[{get_percent(idx, total)}] Migrate {model_name}s {idx + 1}/{total}"
if ref_time:
- lbl += f" ({get_eta(idx, nb, ref_time, datetime.datetime.now())} left)"
+ lbl += f" ({get_eta(idx, total, ref_time, datetime.datetime.now())} left)"
sys.stdout.write(lbl)
sys.stdout.flush()
-def migrate(quiet=False, log=True):
- changed = []
+def launch_job(lst, name, process_number, process_func):
+ global idx, total, model_name, ref_time
+ idx, total, model_name, ref_time = 0, len(lst), name, datetime.datetime.now()
+ pool = Pool(processes=process_number)
+ for item in lst:
+ pool.apply_async(process_func, (item,), callback=write_output)
+ pool.close()
+ pool.join()
+
+
+quiet = False
+
+
+def migrate(log=True, process_number=1):
+ global idx
# create towns
q = models_common.Town.objects.exclude(
center__isnull=True, limit__isnull=True
- ).exclude(main_geodata__isnull=False)
- nb = q.count()
- town_content_type = ContentType.objects.get(app_label="ishtar_common", model="town")
- data_type, __ = models_common.GeoDataType.objects.get_or_create(
- txt_idx="town-limit", defaults={"label": "Limites commune"}
- )
- provider, __ = models_common.GeoProviderType.objects.get_or_create(
- txt_idx="france-ign", defaults={"label": "IGN"}
- )
- ref_time = datetime.datetime.now()
- for idx, town in enumerate(q.all()):
- if not quiet:
- write_output("town", idx, nb, ref_time)
- attrs = {
- "name": town._generate_cached_label(),
- "source_content_type": town_content_type,
- "source_id": town.pk,
- "data_type": data_type,
- "provider": provider,
- }
- if town.limit:
- attrs["multi_polygon"] = town.limit
- else:
- attrs["point_2d"] = town.center
- data, created = models_common.GeoVectorData.objects.get_or_create(**attrs)
- if created:
- changed.append(["geovectordata", data.name, data.pk, "Création commune"])
- town.main_geodata = data
- town.save()
- if not quiet and nb:
- sys.stdout.write(f"\r[{get_log_time()}] Towns migrated \n")
- sys.stdout.flush()
+ ).exclude(main_geodata__isnull=False).distinct()
+ town_ids = list(q.values_list("id", flat=True))
+ idx = 0
+ launch_job(town_ids, "town", process_number, _process_town)
model_list = [
("operation", "opération", "de l'opération", Operation),
("archaeologicalsite", "site", "du site", ArchaeologicalSite),
]
+
+ global model_slug, model_name, model_full_name, model
+ global model_content_type, data_type_area, data_type_center, data_type_outline
for model_slug, model_name, model_full_name, model in model_list:
+ connection.close()
# manage operation vector sources
model_content_type = ContentType.objects.get(
app_label="archaeological_operations", model=model_slug
)
- q = model.objects.exclude(main_geodata__isnull=False)
- nb = q.count()
data_type_area, __ = models_common.GeoDataType.objects.get_or_create(
txt_idx=f"{model_slug}-area",
defaults={"label": f"Emprise {model_full_name}"},
@@ -89,82 +297,10 @@ def migrate(quiet=False, log=True):
txt_idx=f"{model_slug}-center",
defaults={"label": f"Centre {model_full_name}"},
)
- ref_time = datetime.datetime.now()
- for idx, obj in enumerate(q.all()):
- if not quiet:
- write_output(model_name, idx, nb, ref_time)
-
- obj._no_move = True
- obj.skip_history_when_saving = True
- obj.save() # auto manage geo town association
- q_towns = obj.towns.filter(main_geodata__multi_polygon__isnull=False)
- if q_towns.count() > 1:
- changed.append(
- [model_slug, str(obj), obj.pk, "Association géo de zone communale"]
- )
- elif q_towns.count() == 1:
- changed.append(
- [model_slug, str(obj), obj.pk, "Association géo de commune"]
- )
- if obj.multi_polygon_source == "P" and obj.multi_polygon:
- attrs = {
- "name": f"{_(model_name.capitalize())}{_(':')} {str(obj)}",
- "source_content_type": model_content_type,
- "source_id": obj.pk,
- "multi_polygon": obj.multi_polygon,
- "data_type": data_type_area,
- }
- data = models_common.GeoVectorData.objects.create(**attrs)
- obj.main_geodata = data
- obj.save()
- changed.append(
- [
- "geovectordata",
- data.name,
- data.pk,
- f"Multi-polygone {model_name}",
- ]
- )
- if obj.point_source == "P" and obj.point_2d:
- if obj.x and obj.y:
- attrs = {
- "name": f"{_(model_name.capitalize())}{_(':')} {str(obj)}",
- "source_content_type": model_content_type,
- "source_id": obj.pk,
- "data_type": data_type_center,
- "x": obj.x,
- "y": obj.y,
- "z": obj.z,
- }
- data = models_common.GeoVectorData.objects.create(**attrs)
- obj.main_geodata = data
- obj.save()
- changed.append(
- [
- "geovectordata",
- data.name,
- data.pk,
- f"Coordonnées {model_name}",
- ]
- )
- elif obj.point_2d:
- attrs = {
- "name": f"{_(model_name.capitalize())}{_(':')} {str(obj)}",
- "source_content_type": model_content_type,
- "source_id": obj.pk,
- "data_type": data_type_center,
- }
- if obj.point:
- attrs["point_3d"] = obj.point
- else:
- attrs["point_2d"] = obj.point_2d
- data = models_common.GeoVectorData.objects.create(**attrs)
- obj.main_geodata = data
- obj.save()
- changed.append(
- ["geovectordata", data.name, data.pk, f"Point {model_name}"]
- )
- if not quiet and nb:
+ q = model.objects.exclude(main_geodata__isnull=False)
+ launch_job(list(q.all()), model_name, process_number, _process_site_ope)
+
+ if not quiet:
sys.stdout.write(
f"\r[{get_log_time()}] {model_name.capitalize()} migrated"
+ " " * 20
@@ -189,9 +325,8 @@ def migrate(quiet=False, log=True):
),
]
for app, model_slug, model_name, model_full_name, model in model_list:
+ connection.close()
model_content_type = ContentType.objects.get(app_label=app, model=model_slug)
- q = model.objects.exclude(main_geodata__isnull=False)
- nb = q.count()
data_type_outline, __ = models_common.GeoDataType.objects.get_or_create(
txt_idx=f"{model_slug}-outline",
defaults={"label": f"Contour d'{model_name}"},
@@ -200,77 +335,10 @@ def migrate(quiet=False, log=True):
txt_idx=f"{model_slug}-center",
defaults={"label": f"Centre {model_full_name}"},
)
- ref_time = datetime.datetime.now()
- for idx, obj in enumerate(q.all()):
- if not quiet:
- write_output(model_name, idx, nb, ref_time)
- obj._no_move = True
- obj.skip_history_when_saving = True
- obj.save() # auto manage geo town association
-
- if obj.main_geodata:
- changed.append(
- [model_slug, str(obj), obj.pk, "Association géo de zone communale"]
- )
- if obj.multi_polygon_source == "P" and obj.multi_polygon:
- attrs = {
- "name": f"{_(model_name.capitalize())}{_(':')} {str(obj)}",
- "source_content_type": model_content_type,
- "source_id": obj.pk,
- "multi_polygon": obj.multi_polygon,
- "data_type": data_type_outline,
- }
- data = models_common.GeoVectorData.objects.create(**attrs)
- obj.main_geodata = data
- obj.save()
- changed.append(
- [
- "geovectordata",
- data.name,
- data.pk,
- f"Multi-polygone {model_name}",
- ]
- )
- if obj.point_source == "P" and obj.point_2d:
- if obj.x and obj.y:
- attrs = {
- "name": f"{_(model_name.capitalize())}{_(':')} {str(obj)}",
- "source_content_type": model_content_type,
- "source_id": obj.pk,
- "data_type": data_type_center,
- "x": obj.x,
- "y": obj.y,
- "z": obj.z,
- }
- data = models_common.GeoVectorData.objects.create(**attrs)
- obj.main_geodata = data
- obj.save()
- changed.append(
- [
- "geovectordata",
- data.name,
- data.pk,
- f"Coordonnées {model_name}",
- ]
- )
- elif obj.point_2d:
- attrs = {
- "name": f"{_(model_name.capitalize())}{_(':')} {str(obj)}",
- "source_content_type": model_content_type,
- "source_id": obj.pk,
- "data_type": data_type_center,
- }
- if obj.point:
- attrs["point_3d"] = obj.point
- else:
- attrs["point_2d"] = obj.point_2d
- data = models_common.GeoVectorData.objects.create(**attrs)
- obj.main_geodata = data
- obj.save()
- changed.append(
- ["geovectordata", data.name, data.pk, f"Point {model_name}"]
- )
- if not quiet and nb:
+ q = model.objects.exclude(main_geodata__isnull=False)
+ launch_job(q.all(), model_name, process_number, _process_main)
+
+ if not quiet:
sys.stdout.write(
f"\r[{get_log_time()}] {model_name.capitalize()} migrated"
+ " " * 20
@@ -279,14 +347,11 @@ def migrate(quiet=False, log=True):
sys.stdout.flush()
model_list = [Warehouse, Container]
for model in model_list:
- ref_time = datetime.datetime.now()
+ connection.close()
q = model.objects.exclude(main_geodata__isnull=False)
- nb = q.count()
- for idx, obj in enumerate(q.all()):
- if not quiet:
- write_output(model.__name__, idx, nb, ref_time)
- obj.save()
- if not quiet and nb:
+ launch_job(q.all(), model.__name__, process_number, _process_simple)
+
+ if not quiet:
sys.stdout.write(
f"\r[{get_log_time()}] {model.__name__.capitalize()} migrated" + " " * 20 + "\n"
)
@@ -309,6 +374,9 @@ class Command(BaseCommand):
def add_arguments(self, parser):
parser.add_argument(
+ "--process", dest="process", help="Number of process"
+ )
+ parser.add_argument(
"--quiet", dest="quiet", action="store_true", help="Quiet output"
)
parser.add_argument(
@@ -317,10 +385,11 @@ class Command(BaseCommand):
def handle(self, *args, **options):
log = options["log"]
+ global quiet
quiet = options["quiet"]
if not quiet:
sys.stdout.write(f"[{get_log_time()}] Processing migration\n")
- errors = migrate(quiet=quiet, log=log)
+ errors = migrate(log=log, process_number=int(options["process"] or 1))
if not errors:
if not quiet:
sys.stdout.write(f"[{get_log_time()}] Migration finished\n")