From 25f23ed03ad48fb4e5ca2f14527c0bc381dc8c9b Mon Sep 17 00:00:00 2001 From: Étienne Loks Date: Wed, 16 Jun 2021 13:34:38 +0200 Subject: Context records: Optimize record relations - management command - tests --- archaeological_context_records/models.py | 186 +++++++++++++++------ archaeological_context_records/tests.py | 122 ++++++++++++-- .../commands/relations_update_cache_tables.py | 66 +++----- ishtar_common/models.py | 23 +++ 4 files changed, 286 insertions(+), 111 deletions(-) diff --git a/archaeological_context_records/models.py b/archaeological_context_records/models.py index 96ba16935..65145be98 100644 --- a/archaeological_context_records/models.py +++ b/archaeological_context_records/models.py @@ -1366,8 +1366,11 @@ class ContextRecordTree(RelationsViews): REFERENCES {fk_table}(id), CONSTRAINT fk2_{table} FOREIGN KEY(cr_parent_id) REFERENCES {fk_table}(id) - );""".format(table="context_records_tree", - fk_table="archaeological_context_records_contextrecord") + ); + CREATE INDEX {table}_id ON {table} (cr_id); + CREATE INDEX {table}_parent_id ON {table} (cr_parent_id); + """.format(table="context_records_tree", + fk_table="archaeological_context_records_contextrecord") key = models.TextField(primary_key=True) cr = models.ForeignKey( @@ -1388,7 +1391,6 @@ class ContextRecordTree(RelationsViews): @classmethod def _save_tree(cls, tree): keys = [] - print("tree", tree) for idx, parent_id in enumerate(tree[:-1]): for child_id in tree[idx:]: if child_id != parent_id: @@ -1399,85 +1401,161 @@ class ContextRecordTree(RelationsViews): keys.append((child_id, parent_id)) return keys + @classmethod + def _get_base_relations(cls): + return RelationType.objects.filter( + logical_relation__in=('included', 'equal')).values_list("id", flat=True) + + @classmethod + def _get_base_equal_relations(cls): + return RelationType.objects.filter( + logical_relation='equal').values_list("id", flat=True) + + @classmethod + def _get_base_included_relations(cls): + return RelationType.objects.filter( + logical_relation='included').values_list("id", flat=True) + + @classmethod + def _get_base_children(cls): + return ContextRecord.objects.values_list("id", flat=True) + @classmethod def _update_child(cls, parent_id, tree, rel_types): whole_tree = set() - childs = RecordRelations.objects.values_list( + children = list(RecordRelations.objects.values_list( "left_record_id", flat=True).filter( - right_record_id=parent_id, relation_type_id__in=rel_types) - for c in childs[:]: + right_record_id=parent_id, relation_type_id__in=rel_types)) + to_be_pop = [] + for idx, c in enumerate(children[:]): if c in tree: # cyclic - childs.pop(c) - #print("childs", parent_id, childs) - if not childs: # last leaf in the tree + to_be_pop.append(idx) + for idx in reversed(to_be_pop): + children.pop(idx) + if not children: # last leaf in the tree return cls._save_tree(tree) - for c in childs: + for c in children: whole_tree.update(cls._update_child(c, tree[:] + [c], rel_types)) return whole_tree @classmethod - def _get_parent_trees(cls, child_id, trees, rel_types): + def _get_parent_trees(cls, child_id, trees, rel_types, deep=0): parents = RecordRelations.objects.values_list( "right_record_id", flat=True).filter( left_record_id=child_id, relation_type_id__in=rel_types) if not parents: return trees new_trees = [] - for p in parents: + for p in set(parents): if p == child_id or any(1 for tree in trees if p in tree): # cyclic continue c_trees = list(map(lambda x: x + [p], trees)) - new_trees += cls._get_parent_trees(p, c_trees, rel_types) + new_trees += cls._get_parent_trees(p, c_trees, rel_types, deep + 1) return new_trees @classmethod - def _update(cls, item_id, cascade=True): - # update the whole tree - rel_types = RelationType.objects.filter( - logical_relation__in=('included', 'equal')).values_list("id", flat=True) + def _get_equals(cls, item_id, equal_rel_types): + equals = list(RecordRelations.objects.values_list( + "right_record_id", flat=True).filter( + left_record_id=item_id, relation_type_id__in=equal_rel_types)) + equals += list(RecordRelations.objects.values_list( + "left_record_id", flat=True).filter( + right_record_id=item_id, relation_type_id__in=equal_rel_types)) + return set(equals) + + @classmethod + def _update_equals(cls, item_id, equals): + keys = [] + for equal_id in equals: + if item_id != equal_id: + cls.objects.get_or_create( + key=f"{item_id}_{equal_id}", + cr_id=item_id, cr_parent_id=equal_id + ) + keys.append((item_id, equal_id)) + cls.objects.get_or_create( + key=f"{equal_id}_{item_id}", + cr_id=equal_id, cr_parent_id=item_id + ) + keys.append((equal_id, item_id)) + return keys + + @classmethod + def _update_relations_equals(cls, relations): + equal_rel_types = cls._get_base_equal_relations() + keys = [] + for child_id, parent_id in relations: + equals = set(cls._get_equals(child_id, equal_rel_types)) + keys += cls._update_equals(child_id, equals) + for alt_child in equals: + if alt_child != child_id: + cls.objects.get_or_create( + key=f"{alt_child}_{parent_id}", + cr_id=alt_child, cr_parent_id=parent_id + ) + keys.append((alt_child, parent_id)) + equals = set(cls._get_equals(parent_id, equal_rel_types)) + keys += cls._update_equals(parent_id, equals) + for alt_parent in equals: + if alt_parent != parent_id: + cls.objects.get_or_create( + key=f"{child_id}_{alt_parent}", + cr_id=child_id, cr_parent_id=alt_parent + ) + keys.append((child_id, alt_parent)) + return set(keys) + + @classmethod + def _update(cls, item_id, already_updated=None): + all_relations = set() + # add self relation + cls.objects.get_or_create( + key=f"{item_id}_{item_id}", + cr_id=item_id, cr_parent_id=item_id + ) + all_relations.add((item_id, item_id)) + current_relations_as_child = list( + cls.objects.filter(cr_id=item_id).values_list("cr_parent_id", flat=True) + ) + current_relations_as_parent = list( + cls.objects.filter(cr_parent_id=item_id).values_list("cr_id", flat=True) + ) + + ## update the whole tree + inc_rel_types = cls._get_base_included_relations() # get first parents parent_ids = [ - tree[-1] for tree in cls._get_parent_trees(item_id, [[item_id]], rel_types)] - """ - parent_ids = [] - current_ids = [item_id] - while current_ids: - new_ids = [] - for current_id in current_ids: - parents = RecordRelations.objects.values_list( - "right_record_id", flat=True).filter( - left_record_id=current_id, relation_type_id__in=rel_types) - if not parents: - continue - for p in parents[:]: - if p == current_id or p in parent_ids: # cyclic - parents.pop(p) - parent_ids += parents - new_ids += parents - current_ids = new_ids - """ - def get_cr(idx): - return ContextRecord.objects.get(pk=idx) - print(get_cr(item_id)) + tree[-1] for tree in cls._get_parent_trees(item_id, [[item_id]], + inc_rel_types)] if not parent_ids: parent_ids = [item_id] - print("parents", [get_cr(p) for p in parent_ids]) # get all child for parents and save trees - all_relations = set() for parent_id in parent_ids: tree = [parent_id] - all_relations.update(cls._update_child(parent_id, tree, rel_types)) - #print(all_relations) - - # delete old relations - for item_id in set([c for c, __ in all_relations] + - [p for p, __ in all_relations]): - for rel in cls.objects.filter(cr_id=item_id).all(): - if (rel.cr_id, rel.cr_parent_id) not in all_relations: - rel.delete() - for rel in cls.objects.filter(cr_parent_id=item_id).all(): - if (rel.cr_id, rel.cr_parent_id) not in all_relations: - rel.delete() - + all_relations.update(cls._update_child(parent_id, tree, inc_rel_types)) + all_relations.update(cls._update_relations_equals(all_relations)) + if not all_relations: + equal_rel_types = cls._get_base_equal_relations() + equals = set(cls._get_equals(item_id, equal_rel_types)) + all_relations.update(cls._update_equals(item_id, equals)) + + + ## delete old relations + if not already_updated: + already_updated = [item_id] + for parent_id in current_relations_as_child: + if (item_id, parent_id) not in all_relations and \ + parent_id not in already_updated: + # disappeared - must regenerate + already_updated.append(parent_id) + cls.objects.filter(key=f"{item_id}_{parent_id}").delete() + cls._update(parent_id, already_updated) + for child_id in current_relations_as_parent: + if (child_id, item_id) not in all_relations and \ + child_id not in already_updated: + # disappeared - must regenerate + already_updated.append(child_id) + cls.objects.filter(key=f"{child_id}_{item_id}").delete() + cls._update(child_id, already_updated) diff --git a/archaeological_context_records/tests.py b/archaeological_context_records/tests.py index 1f919b5d9..553731542 100644 --- a/archaeological_context_records/tests.py +++ b/archaeological_context_records/tests.py @@ -943,8 +943,6 @@ class RecordRelationsTest(ContextRecordInit, TestCase): ) def test_relation_view(self): - ## TODO : branches multiples - ## TODO : cyclique profile = get_current_profile() profile.parent_relations_engine = "V" profile.save() @@ -956,15 +954,15 @@ class RecordRelationsTest(ContextRecordInit, TestCase): logical_relation='included' ) """ - 6 7 8 9 10 + 6 7 8 9 10 = 11 = 12 | | | | | ------- ----- | | - 4 5 + 4 5 = 13 | | --------- | - 3 + 3 = 14 | --------- | | @@ -991,13 +989,113 @@ class RecordRelationsTest(ContextRecordInit, TestCase): self.assertTrue(models.ContextRecordTree.check_engine()) # change to table q = models.ContextRecordTree.objects.filter(cr=crs[0], cr_parent=crs[1]) self.assertEqual(q.count(), 0) # empty table - print("~~~ CR1 - child of all") - models.ContextRecordTree.update(crs[0].id) - print("~~~ CR2") - models.ContextRecordTree.update(crs[1].id) - print("~~~ CR3 - parent of all") - models.ContextRecordTree.update(crs[2].id) - # vérifier cr1 -> cr3 + # verify tree generation + full_trees = [ + [10, 5, 3, 2], + [10, 5, 3, 1], + [9, 5, 3, 2], + [9, 5, 3, 1], + [8, 4, 3, 2], + [8, 4, 3, 1], + [7, 4, 3, 2], + [7, 4, 3, 1], + [6, 4, 3, 2], + [6, 4, 3, 1], + ] + self._test_tree_generation(0, full_trees) + trees = [ + [10, 5, 3, 2], + [10, 5, 3, 1], + [9, 5, 3, 2], + [9, 5, 3, 1], + [8, 4, 3, 2], + [8, 4, 3, 1], + [7, 4, 3, 2], + [7, 4, 3, 1], + [6, 4, 3, 2], + [6, 4, 3, 1], + ] + self._test_tree_generation(1, trees) + trees = [ + [10, 5, 3, 2], + [10, 5, 3, 1], + [9, 5, 3, 2], + [9, 5, 3, 1], + [8, 4, 3, 2], + [8, 4, 3, 1], + [7, 4, 3, 2], + [7, 4, 3, 1], + [6, 4, 3, 2], + [6, 4, 3, 1], + ] + self._test_tree_generation(2, trees) + trees = [ + [8, 4, 3, 2], + [8, 4, 3, 1], + [7, 4, 3, 2], + [7, 4, 3, 1], + [6, 4, 3, 2], + [6, 4, 3, 1], + ] + self._test_tree_generation(3, trees) + trees = [ + [10, 5, 3, 2], + [10, 5, 3, 1], + [9, 5, 3, 2], + [9, 5, 3, 1], + ] + self._test_tree_generation(4, trees) + trees = [ + [6, 4, 3, 2], + [6, 4, 3, 1], + ] + self._test_tree_generation(5, trees) + trees = [ + [7, 4, 3, 2], + [7, 4, 3, 1], + ] + self._test_tree_generation(6, trees) + trees = [ + [8, 4, 3, 2], + [8, 4, 3, 1], + ] + self._test_tree_generation(7, trees) + trees = [ + [9, 5, 3, 2], + [9, 5, 3, 1], + ] + self._test_tree_generation(8, trees) + trees = [ + [10, 5, 3, 2], + [10, 5, 3, 1], + ] + self._test_tree_generation(9, trees) + models.ContextRecordTree.objects.filter(pk__isnull=False).delete() + # test regenerate all + models.ContextRecordTree.regenerate_all() + self._test_tree_(full_trees, "'FULL GENERATION'") + + # test remove a Node + # test EQUIV + + def _test_tree_(self, test_trees, context_record): + crs = self.context_records + for tree in test_trees: + for tree_idx in range(len(tree) - 1): + q = models.ContextRecordTree.objects.filter( + cr_parent=crs[tree[tree_idx] - 1], cr=crs[tree[tree_idx + 1] - 1]) + self.assertEqual( + q.count(), 1, + msg="Tree relation ({}, {}) is missing for context " + "record {}".format(tree[tree_idx + 1], tree[tree_idx], + context_record) + ) + + def _test_tree_generation(self, cr_idx, test_trees): + crs = self.context_records + models.ContextRecordTree.objects.filter(pk__isnull=False).delete() + models.ContextRecordTree.update(crs[cr_idx].id) + self._test_tree_(test_trees, cr_idx + 1) class ContextRecordWizardCreationTest(WizardTest, ContextRecordInit, TestCase): diff --git a/ishtar_common/management/commands/relations_update_cache_tables.py b/ishtar_common/management/commands/relations_update_cache_tables.py index ab7f134ff..3e2dfaef5 100644 --- a/ishtar_common/management/commands/relations_update_cache_tables.py +++ b/ishtar_common/management/commands/relations_update_cache_tables.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- -# Copyright (C) 2013-2018 Étienne Loks +# Copyright (C) 2021 Étienne Loks # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU Affero General Public License as @@ -24,60 +24,36 @@ from django.core.management.base import BaseCommand from django.apps import apps -APPS = ['ishtar_common', 'archaeological_operations', - 'archaeological_context_records', 'archaeological_finds', - 'archaeological_warehouse'] +CACHE_TABLES = [ + "archaeological_context_records.ContextRecordTree" +] class Command(BaseCommand): args = '' - help = 'Regenerate geo, cached labels and search vectors' + help = 'Update all relations for cache tables' def add_arguments(self, parser): - parser.add_argument('app_name', nargs='?', default=None, - choices=APPS) - parser.add_argument('model_name', nargs='?', default=None) + parser.add_argument('table', nargs='?', default=None, + choices=CACHE_TABLES) parser.add_argument( '--quiet', dest='quiet', action='store_true', help='Quiet output') def handle(self, *args, **options): quiet = options['quiet'] - limit = options['app_name'] - model_name = options['model_name'] - if model_name: - model_name = model_name.lower() - for app in APPS: - if limit and app != limit: - continue + tables = CACHE_TABLES + if options.get("table", None): + table = options.get("table", None) + if table not in CACHE_TABLES: + sys.stdout.write("{} not a valid cache table\n".format(table)) + return + tables = [table] + for table in tables: if not quiet: - print("* app: {}".format(app)) - for model in apps.get_app_config(app).get_models(): - if model_name and model.__name__.lower() != model_name: - continue - if model.__name__.startswith('Historical'): - continue - if not bool( - [k for k in dir(model) - if k.startswith('_generate_') or - k == "search_vector"]): - continue - msg = "-> processing {}: ".format(model._meta.verbose_name) - ln = model.objects.count() - for idx, obj_id in enumerate(model.objects.values('pk').all()): - obj = model.objects.get(pk=obj_id['pk']) - obj.skip_history_when_saving = True - obj._no_move = True - if hasattr(obj, "point_source") and obj.point_source in ( - "M", "T"): - obj.point = None - obj.point_2d = None - obj.x = None - obj.y = None - cmsg = "\r{} {}/{}".format(msg, idx + 1, ln) - if not quiet: - sys.stdout.write(cmsg) - sys.stdout.flush() - obj.save() - if not quiet: - sys.stdout.write("\n") + print("* table: {}".format(table)) + app, tablename = table.split(".") + model = apps.get_app_config(app).get_model(tablename) + model.regenerate_all(quiet=quiet) + if not quiet: + sys.stdout.write("\n") diff --git a/ishtar_common/models.py b/ishtar_common/models.py index fdeba5f26..19c432053 100644 --- a/ishtar_common/models.py +++ b/ishtar_common/models.py @@ -20,6 +20,8 @@ """ Models description """ +import sys + from bs4 import BeautifulSoup import copy import datetime @@ -861,6 +863,27 @@ class RelationsViews(models.Model): ) return relation_view_update.delay(sender, kwargs) + @classmethod + def _get_base_children(cls): + raise NotImplemented() + + @classmethod + def regenerate_all(cls, quiet=True): + cls.check_engine() + profile = get_current_profile(force=True) + if profile.parent_relations_engine == "V": + return + cls.objects.filter(pk__isnull=False).delete() + base_children = list(cls._get_base_children()) + total = len(base_children) + for idx, cr_id in enumerate(base_children): + if not quiet: + sys.stdout.write(f"Processing: {idx + 1} / {total}\t\t{cr_id}\r") + sys.stdout.flush() + cls.update(cr_id) + if not quiet: + sys.stdout.write("\n") + @classmethod def create_table(cls): raise NotImplemented() -- cgit v1.2.3