From d59943e7f6623c948539f1d11b2e40e165a31d13 Mon Sep 17 00:00:00 2001 From: Stuart Marshall Date: Thu, 13 Feb 2025 16:42:52 +0000 Subject: [PATCH 1/9] feat: add command to remove and rehydrate groups members --- coral/management/commands/migrate_safely.py | 126 ++++++++++++++++++-- 1 file changed, 119 insertions(+), 7 deletions(-) diff --git a/coral/management/commands/migrate_safely.py b/coral/management/commands/migrate_safely.py index e5a099a2d..7212630ae 100644 --- a/coral/management/commands/migrate_safely.py +++ b/coral/management/commands/migrate_safely.py @@ -13,22 +13,55 @@ class Command(BaseCommand): """ def add_arguments(self, parser): + parser.add_argument( + "-o", + "--operation", + action="store", + dest="operation", + choices=[ + "migrate", + "rehydrate_members", + "remove_members" + ], + help="Operation Type; " + ) parser.add_argument( "-m", "--model_name", - ) + ) parser.add_argument( "-r", "--reverse", action="store_true", ) + parser.add_argument( + "-s", + "--source", + help="The path to the input file", + ) + parser.add_argument( + "-e", + "--export", + help="The path for the output file", + ) + parser.add_argument( + "-n", + "--new_members", + help="The path to the group file containing the wanted members", + ) def handle(self, *args, **options): - if options["reverse"]: - ScanForDataRisks().reverse_migration(options["model_name"]) - else: - ScanForDataRisks().handle_model_update(options["model_name"]) - pass + if options["operation"] == "rehydrate_members": + GroupTransform().rehydrate_members(options['source'], options['export'], options['new_members']) + if options["operation"] == "remove_members": + GroupTransform().remove_members(options['source'], options['export']) + + if options["operation"] == "migrate": + if options["reverse"]: + ScanForDataRisks().reverse_migration(options["model_name"]) + else: + ScanForDataRisks().handle_model_update(options["model_name"]) + pass class ScanForDataRisks(): """ This class should contain all of the functions needed to determine data migration risks """ @@ -362,4 +395,83 @@ def remove_nodes(self, tile_json: dict, deleted_nodes: dict): def allow_many(self, tile_json, nodeid): tile_json['data'][nodeid] = [tile_json['data'][nodeid]] - return tile_json \ No newline at end of file + return tile_json + +class GroupTransform(): + """ + This class contains functions to transform the Groups + """ + def rehydrate_members(self, input_file_path, output_file_path, group_with_members): + MEMBER_NODE = "bb2f7e1c-7029-11ee-885f-0242ac140008" + try: + # Read the JSON file + with open(input_file_path, 'r') as file: + data = json.load(file) + + group_ids = [] + new_members = [] + + resource_instances = data.get("business_data", {}).get("resources", []) + + for resource in resource_instances: + group_ids.append(resource['resourceinstance']['resourceinstanceid']) + + if group_with_members: + with open(group_with_members, 'r') as new_file: + new_data = json.load(new_file) + + # gets the members from the groups that aren't the groups - will be person models + new_resource_instances = new_data.get("business_data", {}).get("resources", []) + for resource in new_resource_instances: + for tile in resource["tiles"]: + if MEMBER_NODE in tile["data"]: + members = [{'value': member, 'name': resource['resourceinstance']["name"]} for member in (tile["data"].get(MEMBER_NODE)or []) if member['resourceId'] not in group_ids] + new_members.extend(members) + + for resource in resource_instances: + for tile in resource["tiles"]: + if MEMBER_NODE in tile["data"]: + if tile["data"][MEMBER_NODE]: + if len(new_members) > 0: + match = next((item for item in new_members if item['name'] == resource['resourceinstance']["name"]), None) + if match: + tile["data"][MEMBER_NODE].append(match['value']) + + + data['business_data']['resources'] = resource_instances + + # Write the updated JSON to a new file + with open(output_file_path, 'w') as file: + json.dump(data, file, indent=4) + + except Exception as e: + print(f"An error occurred: {e}") + + def remove_members(self, input_file_path, output_file_path): + MEMBER_NODE = "bb2f7e1c-7029-11ee-885f-0242ac140008" + try: + # Read the JSON file + with open(input_file_path, 'r') as file: + data = json.load(file) + + group_ids = [] + + resource_instances = data.get("business_data", {}).get("resources", []) + resource_instances = [resource for resource in resource_instances if resource['resourceinstance']["name"] != "Undefined"] + + for resource in resource_instances: + group_ids.append(resource['resourceinstance']['resourceinstanceid']) + for resource in resource_instances: + for tile in resource["tiles"]: + if MEMBER_NODE in tile["data"]: + if tile["data"][MEMBER_NODE]: + tile["data"][MEMBER_NODE] = [member for member in tile["data"][MEMBER_NODE] if member['resourceId'] in group_ids] + + data['business_data']['resources'] = resource_instances + + # Write the updated JSON to a new file + with open(output_file_path, 'w') as file: + json.dump(data, file, indent=4) + + except Exception as e: + print(f"An error occurred: {e}") \ No newline at end of file From 6889a4708597145413b27f77d51da982937ad8a1 Mon Sep 17 00:00:00 2001 From: Stuart Marshall Date: Thu, 13 Feb 2025 17:14:23 +0000 Subject: [PATCH 2/9] feat: automatically remove old data and import new file --- coral/management/commands/migrate_safely.py | 37 ++++++++++++++++----- 1 file changed, 29 insertions(+), 8 deletions(-) diff --git a/coral/management/commands/migrate_safely.py b/coral/management/commands/migrate_safely.py index 7212630ae..f749fb7e3 100644 --- a/coral/management/commands/migrate_safely.py +++ b/coral/management/commands/migrate_safely.py @@ -44,15 +44,10 @@ def add_arguments(self, parser): "--export", help="The path for the output file", ) - parser.add_argument( - "-n", - "--new_members", - help="The path to the group file containing the wanted members", - ) def handle(self, *args, **options): if options["operation"] == "rehydrate_members": - GroupTransform().rehydrate_members(options['source'], options['export'], options['new_members']) + GroupTransform().rehydrate_members(options['source'], options['export']) if options["operation"] == "remove_members": GroupTransform().remove_members(options['source'], options['export']) @@ -401,8 +396,9 @@ class GroupTransform(): """ This class contains functions to transform the Groups """ - def rehydrate_members(self, input_file_path, output_file_path, group_with_members): + def rehydrate_members(self, input_file_path, output_file_path): MEMBER_NODE = "bb2f7e1c-7029-11ee-885f-0242ac140008" + GROUP_ID = "07883c9e-b25c-11e9-975a-a4d18cec433a" try: # Read the JSON file with open(input_file_path, 'r') as file: @@ -416,7 +412,20 @@ def rehydrate_members(self, input_file_path, output_file_path, group_with_member for resource in resource_instances: group_ids.append(resource['resourceinstance']['resourceinstanceid']) - if group_with_members: + + management.call_command("packages", + operation="export_business_data", + graphs=GROUP_ID, + format="json", + dest_dir="." + ) + + files = glob.glob('Group_*.json') + if files: + latest_file = max(files, key=os.path.getmtime) + group_with_members = 'backup_Group_with_members.json' + os.rename(latest_file, group_with_members) + with open(group_with_members, 'r') as new_file: new_data = json.load(new_file) @@ -443,6 +452,18 @@ def rehydrate_members(self, input_file_path, output_file_path, group_with_member # Write the updated JSON to a new file with open(output_file_path, 'w') as file: json.dump(data, file, indent=4) + + graph = Graph.objects.get(pk=GROUP_ID) + + graph.delete_instances() + + management.call_command("packages", + operation="import_business_data", + source=output_file_path, + overwrite="overwrite", + prevent_indexing=False, + escape_function=True + ) except Exception as e: print(f"An error occurred: {e}") From 7a480e2226b9a51790abb4325d1b14521b7aff3c Mon Sep 17 00:00:00 2001 From: Stuart Marshall Date: Mon, 17 Feb 2025 16:42:21 +0000 Subject: [PATCH 3/9] fix: auto import and export the data when removing groups and rehydrating --- coral/management/commands/migrate_safely.py | 106 +++++++++++++------- 1 file changed, 70 insertions(+), 36 deletions(-) diff --git a/coral/management/commands/migrate_safely.py b/coral/management/commands/migrate_safely.py index f749fb7e3..f01cd5194 100644 --- a/coral/management/commands/migrate_safely.py +++ b/coral/management/commands/migrate_safely.py @@ -6,7 +6,7 @@ from django.core import management from django.core.management.base import BaseCommand import json -import pdb +import datetime class Command(BaseCommand): """Safely Migrate a model that may have conflicting changes. @@ -47,9 +47,9 @@ def add_arguments(self, parser): def handle(self, *args, **options): if options["operation"] == "rehydrate_members": - GroupTransform().rehydrate_members(options['source'], options['export']) + GroupTransform().rehydrate_members(options['source']) if options["operation"] == "remove_members": - GroupTransform().remove_members(options['source'], options['export']) + GroupTransform().remove_members() if options["operation"] == "migrate": if options["reverse"]: @@ -396,9 +396,19 @@ class GroupTransform(): """ This class contains functions to transform the Groups """ - def rehydrate_members(self, input_file_path, output_file_path): - MEMBER_NODE = "bb2f7e1c-7029-11ee-885f-0242ac140008" - GROUP_ID = "07883c9e-b25c-11e9-975a-a4d18cec433a" + + MEMBER_NODE = "bb2f7e1c-7029-11ee-885f-0242ac140008" + GROUP_ID = "07883c9e-b25c-11e9-975a-a4d18cec433a" + + def rehydrate_members(self, input_file_path): + """ + Add the members in the current environment into the updated groups + + This requires importing the updated groups without any members into the current environment. + + Parameters: + input_file_path (str): The path to the updated groups without members + """ try: # Read the JSON file with open(input_file_path, 'r') as file: @@ -409,21 +419,23 @@ def rehydrate_members(self, input_file_path, output_file_path): resource_instances = data.get("business_data", {}).get("resources", []) + # Get id's for the groups for resource in resource_instances: group_ids.append(resource['resourceinstance']['resourceinstanceid']) - + # Export the current group data with the members management.call_command("packages", operation="export_business_data", - graphs=GROUP_ID, + graphs=self.GROUP_ID, format="json", - dest_dir="." + dest_dir="coral/pkg/business_data/files" ) - files = glob.glob('Group_*.json') + files = glob.glob('coral/pkg/business_data/files/Group_*.json') if files: latest_file = max(files, key=os.path.getmtime) - group_with_members = 'backup_Group_with_members.json' + today = datetime.datetime.today().strftime('%Y-%m-%d_%H-%M-%S') + group_with_members = os.path.join(os.path.dirname(latest_file), f"backup_Group_with_members_{today}.json") os.rename(latest_file, group_with_members) with open(group_with_members, 'r') as new_file: @@ -433,27 +445,29 @@ def rehydrate_members(self, input_file_path, output_file_path): new_resource_instances = new_data.get("business_data", {}).get("resources", []) for resource in new_resource_instances: for tile in resource["tiles"]: - if MEMBER_NODE in tile["data"]: - members = [{'value': member, 'name': resource['resourceinstance']["name"]} for member in (tile["data"].get(MEMBER_NODE)or []) if member['resourceId'] not in group_ids] + if self.MEMBER_NODE in tile["data"]: + members = [{'value': member, 'name': resource['resourceinstance']["name"]} for member in (tile["data"].get(self.MEMBER_NODE)or []) if member['resourceId'] not in group_ids] new_members.extend(members) for resource in resource_instances: for tile in resource["tiles"]: - if MEMBER_NODE in tile["data"]: - if tile["data"][MEMBER_NODE]: + if self.MEMBER_NODE in tile["data"]: + if tile["data"][self.MEMBER_NODE]: if len(new_members) > 0: match = next((item for item in new_members if item['name'] == resource['resourceinstance']["name"]), None) if match: - tile["data"][MEMBER_NODE].append(match['value']) + tile["data"][self.MEMBER_NODE].append(match['value']) data['business_data']['resources'] = resource_instances + output_file_path = f"coral/pkg/business_data/files/Updated_Groups_{today}.json" + # Write the updated JSON to a new file with open(output_file_path, 'w') as file: json.dump(data, file, indent=4) - graph = Graph.objects.get(pk=GROUP_ID) + graph = Graph.objects.get(pk=self.GROUP_ID) graph.delete_instances() @@ -468,31 +482,51 @@ def rehydrate_members(self, input_file_path, output_file_path): except Exception as e: print(f"An error occurred: {e}") - def remove_members(self, input_file_path, output_file_path): - MEMBER_NODE = "bb2f7e1c-7029-11ee-885f-0242ac140008" + def remove_members(self): + """ + Remove the Person instances from the current groups + + This will remove all Person instances set in the groups members without removing any link between groups. + """ try: - # Read the JSON file - with open(input_file_path, 'r') as file: - data = json.load(file) + management.call_command("packages", + operation="export_business_data", + graphs=self.GROUP_ID, + format="json", + dest_dir="coral/pkg/business_data/files" + ) - group_ids = [] + files = glob.glob('coral/pkg/business_data/files/Group_*.json') + if files: + latest_file = max(files, key=os.path.getmtime) + today = datetime.datetime.today().strftime('%Y-%m-%d_%H-%M-%S') + group_with_members = os.path.join(os.path.dirname(latest_file), f"backup_Group_with_members_{today}.json") + os.rename(latest_file, group_with_members) - resource_instances = data.get("business_data", {}).get("resources", []) - resource_instances = [resource for resource in resource_instances if resource['resourceinstance']["name"] != "Undefined"] + # Read the JSON file + with open(group_with_members, 'r') as file: + data = json.load(file) - for resource in resource_instances: - group_ids.append(resource['resourceinstance']['resourceinstanceid']) - for resource in resource_instances: - for tile in resource["tiles"]: - if MEMBER_NODE in tile["data"]: - if tile["data"][MEMBER_NODE]: - tile["data"][MEMBER_NODE] = [member for member in tile["data"][MEMBER_NODE] if member['resourceId'] in group_ids] + group_ids = [] - data['business_data']['resources'] = resource_instances + resource_instances = data.get("business_data", {}).get("resources", []) + resource_instances = [resource for resource in resource_instances if resource['resourceinstance']["name"] != "Undefined"] - # Write the updated JSON to a new file - with open(output_file_path, 'w') as file: - json.dump(data, file, indent=4) + for resource in resource_instances: + group_ids.append(resource['resourceinstance']['resourceinstanceid']) + for resource in resource_instances: + for tile in resource["tiles"]: + if self.MEMBER_NODE in tile["data"]: + if tile["data"][self.MEMBER_NODE]: + tile["data"][self.MEMBER_NODE] = [member for member in tile["data"][self.MEMBER_NODE] if member['resourceId'] in group_ids] + + data['business_data']['resources'] = resource_instances + + output_file_path = f"coral/pkg/business_data/files/Empty_Groups_{today}.json" + + # Write the updated JSON to a new file + with open(output_file_path, 'w') as file: + json.dump(data, file, indent=4) except Exception as e: print(f"An error occurred: {e}") \ No newline at end of file From eaaa520f9a672f6fa0b28508647862f1c6551de2 Mon Sep 17 00:00:00 2001 From: Stuart Marshall Date: Tue, 18 Feb 2025 10:04:19 +0000 Subject: [PATCH 4/9] fix: match person instances to group resource id instead of name --- coral/management/commands/migrate_safely.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/coral/management/commands/migrate_safely.py b/coral/management/commands/migrate_safely.py index f01cd5194..c1f17df6f 100644 --- a/coral/management/commands/migrate_safely.py +++ b/coral/management/commands/migrate_safely.py @@ -435,7 +435,7 @@ def rehydrate_members(self, input_file_path): if files: latest_file = max(files, key=os.path.getmtime) today = datetime.datetime.today().strftime('%Y-%m-%d_%H-%M-%S') - group_with_members = os.path.join(os.path.dirname(latest_file), f"backup_Group_with_members_{today}.json") + group_with_members = os.path.join(os.path.dirname(latest_file), f"backup_Previous_Group_with_members_{today}.json") os.rename(latest_file, group_with_members) with open(group_with_members, 'r') as new_file: @@ -446,7 +446,7 @@ def rehydrate_members(self, input_file_path): for resource in new_resource_instances: for tile in resource["tiles"]: if self.MEMBER_NODE in tile["data"]: - members = [{'value': member, 'name': resource['resourceinstance']["name"]} for member in (tile["data"].get(self.MEMBER_NODE)or []) if member['resourceId'] not in group_ids] + members = [{'value': member, 'groupId': resource['resourceinstance']["resourceinstanceid"]} for member in (tile["data"].get(self.MEMBER_NODE)or []) if member['resourceId'] not in group_ids] new_members.extend(members) for resource in resource_instances: @@ -454,9 +454,10 @@ def rehydrate_members(self, input_file_path): if self.MEMBER_NODE in tile["data"]: if tile["data"][self.MEMBER_NODE]: if len(new_members) > 0: - match = next((item for item in new_members if item['name'] == resource['resourceinstance']["name"]), None) + match = next((item for item in new_members if item['groupId'] == resource['resourceinstance']["resourceinstanceid"]), None) if match: tile["data"][self.MEMBER_NODE].append(match['value']) + data['business_data']['resources'] = resource_instances From 79e92c7b8c542f2cf1ba0cfc2a958fbaa3dee811 Mon Sep 17 00:00:00 2001 From: Stuart Marshall Date: Tue, 18 Feb 2025 14:29:49 +0000 Subject: [PATCH 5/9] feat: check for changed names and new concepts --- coral/management/commands/migrate_safely.py | 43 ++++++++++++++++++--- 1 file changed, 38 insertions(+), 5 deletions(-) diff --git a/coral/management/commands/migrate_safely.py b/coral/management/commands/migrate_safely.py index c1f17df6f..5b5b354f9 100644 --- a/coral/management/commands/migrate_safely.py +++ b/coral/management/commands/migrate_safely.py @@ -74,6 +74,8 @@ def compare_nodes(self) -> tuple[list,list,dict]: new_functions = [] deleted_nodes = {} deleted_nodegroups = [] + updated_names = [] + updated_concepts = [] incoming_datatypes = {} current_datatypes = {} datatype_changes = {} @@ -103,6 +105,16 @@ def compare_nodes(self) -> tuple[list,list,dict]: else: current_datatypes[str(node_json.nodeid)] = node_json.datatype + for node in incoming_nodes: + old_node = next(node_json for node_json in current_nodes if str(node_json.nodeid) == node['nodeid']) + if old_node: + if str(old_node.name) != node['name']: + updated_names.append({ + 'nodeid': node['nodeid'], + 'old_name': old_node.name, + 'new_name': node['name'] + }) + for nodegroup in current_nodegroups: if str(nodegroup.nodegroupid) not in list(map(lambda ng: str(ng['nodegroupid']), incoming_nodegroups)): deleted_nodegroups.append(str(nodegroup.nodegroupid)) @@ -135,9 +147,14 @@ def compare_nodes(self) -> tuple[list,list,dict]: if incoming_datatypes[nodeid] == 'concept' or incoming_datatypes[nodeid] == 'concept-list': concept_keys = ("conceptid", "text", "id") collection_id = incoming_node['config']['rdmCollection'] - datatype_changes[nodeid]['concept_options'] = list(map(lambda concept: {concept_keys[i] : concept[i] for i, _ in enumerate(concept)}, Concept().get_child_collections(collection_id))) + datatype_changes[nodeid]['concept_id'] = collection_id + concept_list = list(map(lambda concept: {concept_keys[i] : concept[i] for i, _ in enumerate(concept)}, Concept().get_child_collections(collection_id))) + print(concept_list) + if len(concept_list) == 0: + updated_concepts.append(incoming_node['config']['rdmCollection']) + datatype_changes[nodeid]['concept_options'] = concept_list - return new_nodes, deleted_nodes, deleted_nodegroups, datatype_changes, new_functions + return new_nodes, deleted_nodes, deleted_nodegroups, datatype_changes, new_functions, updated_names, updated_concepts def handle_datatype_changes(self, tile_json, datatype_changes: dict): for node in tile_json['data']: @@ -183,8 +200,10 @@ def handle_deleted_nodegroups(self, tile_json, deleted_nodegroups): if value in deleted_nodegroups: return True return False + + def handle_concept_change(self, tile_json, mapping, updated_concepts) - def handle_data_change_messages(self, new_nodes, deleted_nodes, deleted_nodegroups, new_functions): + def handle_data_change_messages(self, new_nodes, deleted_nodes, deleted_nodegroups, new_functions, updated_names, updated_concepts): nodes = self.graph.nodes.values() # Print the current state with headings @@ -204,6 +223,20 @@ def handle_data_change_messages(self, new_nodes, deleted_nodes, deleted_nodegrou else: print("No new functions added") + print("\nUpdated Node Names:") + if updated_names: + for node in updated_names: + print(f"{node['nodeid']}: {node['old_name']} -> {node['new_name']}") + else: + print("No names changed") + + print("\nNew Concepts:") + if updated_concepts: + for concept in updated_concepts: + print(f"{concept}") + else: + print("No new concepts") + print("\nDeleted Nodes:") if deleted_nodes: for alias, node in deleted_nodes.items(): @@ -258,9 +291,9 @@ def handle_model_update(self, model_name): os.rename(glob.glob(f'{sanitised_model_name}*.json')[0], f'stale_data_{sanitised_model_name}.json') - new_nodes, deleted_nodes, deleted_nodegroups, self.datatype_changes, new_functions = self.compare_nodes() + new_nodes, deleted_nodes, deleted_nodegroups, self.datatype_changes, new_functions, updated_names, updated_concepts = self.compare_nodes() - self.handle_data_change_messages(new_nodes, deleted_nodes, deleted_nodegroups, new_functions) + self.handle_data_change_messages(new_nodes, deleted_nodes, deleted_nodegroups, new_functions, updated_names, updated_concepts) if self.datatype_changes == {} and len(deleted_nodes) == 0 and len(deleted_nodegroups) == 0: input("\nContinue with deleting and updating the graphs and data?") From 16638fe60200481d289ff91799f883d41b7cec33 Mon Sep 17 00:00:00 2001 From: Stuart Marshall Date: Tue, 18 Feb 2025 15:33:58 +0000 Subject: [PATCH 6/9] feat: check for change in concepts for existing concept datatypes --- coral/management/commands/migrate_safely.py | 85 ++++++++++++++------- 1 file changed, 58 insertions(+), 27 deletions(-) diff --git a/coral/management/commands/migrate_safely.py b/coral/management/commands/migrate_safely.py index 5b5b354f9..883d8b5f2 100644 --- a/coral/management/commands/migrate_safely.py +++ b/coral/management/commands/migrate_safely.py @@ -3,6 +3,7 @@ from arches.app.models.graph import Graph from arches.app.models.models import FunctionXGraph from arches.app.models.concept import Concept +from arches.app.models.concept import ConceptValue from django.core import management from django.core.management.base import BaseCommand import json @@ -76,6 +77,7 @@ def compare_nodes(self) -> tuple[list,list,dict]: deleted_nodegroups = [] updated_names = [] updated_concepts = [] + new_concepts = [] incoming_datatypes = {} current_datatypes = {} datatype_changes = {} @@ -90,30 +92,53 @@ def compare_nodes(self) -> tuple[list,list,dict]: current_nodes = self.graph.nodes.values() current_nodegroups = self.graph.get_nodegroups() current_functions = FunctionXGraph.objects.filter(graph_id = self.graphid) + + # map the node ids into a dict + current_node_map = {str(n.nodeid): n for n in current_nodes} for node in incoming_nodes: - if node['nodeid'] not in list(map(lambda n : str(n.nodeid), current_nodes)): - new_nodes.append(node['nodeid']) - else: - incoming_datatypes[node['nodeid']] = node['datatype'] - if node['datatype'] == 'domain-value' or node['datatype'] == 'domain-value-list': - incoming_option_ids[node['nodeid']] = list(map(lambda o: o['id'], node['config']['options'])) + node_id = node["nodeid"] + node_name = node["name"] + node_datatype = node["datatype"] + + if node_id not in current_node_map: + new_nodes.append(node_id) + else: + old_node = current_node_map[node_id] + + # Track datatype changes + incoming_datatypes[node_id] = node_datatype + if node_datatype in ("domain-value", "domain-value-list"): + incoming_option_ids[node_id] = [o["id"] for o in node["config"]["options"]] + + # Track name changes + if old_node.name != node_name: + updated_names.append({ + "nodeid": node_id, + "old_name": old_node.name, + "new_name": node_name + }) + + # Track change in concept + if node_datatype in ("concept", "concept-list") and current_node_map[node_id].datatype in ("concept", "concept-list"): + concept = node["config"]["rdmCollection"] + old_concept = current_node_map[node_id].config['rdmCollection'] + if concept != old_concept: + def get_concept_value(concept_id): + return Concept().get(concept_id).get_preflabel().value + + updated_concepts.append({ + "id": concept, + "value": get_concept_value(concept), + "old_id": old_concept, + "old_value": get_concept_value(old_concept), + }) for node_json in current_nodes: if str(node_json.nodeid) not in incoming_datatypes.keys(): deleted_nodes[node_json.alias] = str(node_json.nodeid) else: - current_datatypes[str(node_json.nodeid)] = node_json.datatype - - for node in incoming_nodes: - old_node = next(node_json for node_json in current_nodes if str(node_json.nodeid) == node['nodeid']) - if old_node: - if str(old_node.name) != node['name']: - updated_names.append({ - 'nodeid': node['nodeid'], - 'old_name': old_node.name, - 'new_name': node['name'] - }) + current_datatypes[str(node_json.nodeid)] = node_json.datatype for nodegroup in current_nodegroups: if str(nodegroup.nodegroupid) not in list(map(lambda ng: str(ng['nodegroupid']), incoming_nodegroups)): @@ -149,12 +174,11 @@ def compare_nodes(self) -> tuple[list,list,dict]: collection_id = incoming_node['config']['rdmCollection'] datatype_changes[nodeid]['concept_id'] = collection_id concept_list = list(map(lambda concept: {concept_keys[i] : concept[i] for i, _ in enumerate(concept)}, Concept().get_child_collections(collection_id))) - print(concept_list) if len(concept_list) == 0: - updated_concepts.append(incoming_node['config']['rdmCollection']) + new_concepts.append(incoming_node['config']['rdmCollection']) datatype_changes[nodeid]['concept_options'] = concept_list - return new_nodes, deleted_nodes, deleted_nodegroups, datatype_changes, new_functions, updated_names, updated_concepts + return new_nodes, deleted_nodes, deleted_nodegroups, datatype_changes, new_functions, updated_names, new_concepts, updated_concepts def handle_datatype_changes(self, tile_json, datatype_changes: dict): for node in tile_json['data']: @@ -201,9 +225,9 @@ def handle_deleted_nodegroups(self, tile_json, deleted_nodegroups): return True return False - def handle_concept_change(self, tile_json, mapping, updated_concepts) + # def handle_concept_change(self, tile_json, mapping, updated_concepts) - def handle_data_change_messages(self, new_nodes, deleted_nodes, deleted_nodegroups, new_functions, updated_names, updated_concepts): + def handle_data_change_messages(self, new_nodes, deleted_nodes, deleted_nodegroups, new_functions, updated_names, new_concepts, updated_concepts): nodes = self.graph.nodes.values() # Print the current state with headings @@ -223,6 +247,13 @@ def handle_data_change_messages(self, new_nodes, deleted_nodes, deleted_nodegrou else: print("No new functions added") + print("\nNew Concepts:") + if new_concepts: + for concept in new_concepts: + print(f"{concept}") + else: + print("No new concepts") + print("\nUpdated Node Names:") if updated_names: for node in updated_names: @@ -230,12 +261,12 @@ def handle_data_change_messages(self, new_nodes, deleted_nodes, deleted_nodegrou else: print("No names changed") - print("\nNew Concepts:") + print("\nUpdated Concepts:") if updated_concepts: for concept in updated_concepts: - print(f"{concept}") + print(f"{concept['old_id']} ({concept['old_value']}) -> {concept['id']} ({concept['value']})") else: - print("No new concepts") + print("No updated concepts") print("\nDeleted Nodes:") if deleted_nodes: @@ -291,9 +322,9 @@ def handle_model_update(self, model_name): os.rename(glob.glob(f'{sanitised_model_name}*.json')[0], f'stale_data_{sanitised_model_name}.json') - new_nodes, deleted_nodes, deleted_nodegroups, self.datatype_changes, new_functions, updated_names, updated_concepts = self.compare_nodes() + new_nodes, deleted_nodes, deleted_nodegroups, self.datatype_changes, new_functions, updated_names, new_concepts, updated_concepts = self.compare_nodes() - self.handle_data_change_messages(new_nodes, deleted_nodes, deleted_nodegroups, new_functions, updated_names, updated_concepts) + self.handle_data_change_messages(new_nodes, deleted_nodes, deleted_nodegroups, new_functions, updated_names, new_concepts, updated_concepts) if self.datatype_changes == {} and len(deleted_nodes) == 0 and len(deleted_nodegroups) == 0: input("\nContinue with deleting and updating the graphs and data?") From 1a27c44520f6afa02cb8905119894fd54ae047cf Mon Sep 17 00:00:00 2001 From: Stuart Marshall Date: Wed, 19 Feb 2025 17:05:59 +0000 Subject: [PATCH 7/9] feat: add concept to concept conversion function --- coral/management/commands/migrate_safely.py | 57 ++++++++++++++++++--- 1 file changed, 50 insertions(+), 7 deletions(-) diff --git a/coral/management/commands/migrate_safely.py b/coral/management/commands/migrate_safely.py index 883d8b5f2..f0b6a284f 100644 --- a/coral/management/commands/migrate_safely.py +++ b/coral/management/commands/migrate_safely.py @@ -4,6 +4,7 @@ from arches.app.models.models import FunctionXGraph from arches.app.models.concept import Concept from arches.app.models.concept import ConceptValue +from arches.app.models.models import Value from django.core import management from django.core.management.base import BaseCommand import json @@ -45,6 +46,11 @@ def add_arguments(self, parser): "--export", help="The path for the output file", ) + parser.add_argument( + "-M", + "--mapping", + help="The path for the mapping file to convert node data", + ) def handle(self, *args, **options): if options["operation"] == "rehydrate_members": @@ -56,7 +62,7 @@ def handle(self, *args, **options): if options["reverse"]: ScanForDataRisks().reverse_migration(options["model_name"]) else: - ScanForDataRisks().handle_model_update(options["model_name"]) + ScanForDataRisks().handle_model_update(options["model_name"], options["mapping"]) pass class ScanForDataRisks(): @@ -66,6 +72,7 @@ class ScanForDataRisks(): graphid = "" graph = None datatype_changes = {} + mapping = {} def compare_nodes(self) -> tuple[list,list,dict]: """ @@ -100,6 +107,7 @@ def compare_nodes(self) -> tuple[list,list,dict]: node_id = node["nodeid"] node_name = node["name"] node_datatype = node["datatype"] + nodegroup_id = node["nodegroup_id"] if node_id not in current_node_map: new_nodes.append(node_id) @@ -125,13 +133,21 @@ def compare_nodes(self) -> tuple[list,list,dict]: old_concept = current_node_map[node_id].config['rdmCollection'] if concept != old_concept: def get_concept_value(concept_id): + try: return Concept().get(concept_id).get_preflabel().value + except Exception as e: + print(f"{e}") + new_concepts.append(concept) + return "This concept needs to be registered" updated_concepts.append({ - "id": concept, + "concept_id": concept, "value": get_concept_value(concept), - "old_id": old_concept, + "old_concept_id": old_concept, "old_value": get_concept_value(old_concept), + "node_id": node_id, + "node_name": node_name, + "nodegroup": nodegroup_id }) for node_json in current_nodes: @@ -225,7 +241,14 @@ def handle_deleted_nodegroups(self, tile_json, deleted_nodegroups): return True return False - # def handle_concept_change(self, tile_json, mapping, updated_concepts) + def handle_concept_change(self, tile_json, updated_concepts): + for node in list(tile_json['data'].keys()): + if node in [concept["node_id"] for concept in updated_concepts]: + mapping = next(value for key, value in self.mapping.items() if key == node) + print("tile", tile_json) + print("node", node) + TransformData().concept_to_concept(tile_json, node, mapping) + def handle_data_change_messages(self, new_nodes, deleted_nodes, deleted_nodegroups, new_functions, updated_names, new_concepts, updated_concepts): nodes = self.graph.nodes.values() @@ -264,7 +287,8 @@ def handle_data_change_messages(self, new_nodes, deleted_nodes, deleted_nodegrou print("\nUpdated Concepts:") if updated_concepts: for concept in updated_concepts: - print(f"{concept['old_id']} ({concept['old_value']}) -> {concept['id']} ({concept['value']})") + print(f"\nNode: {concept['node_id']} ({concept['node_name']}):") + print(f"Concept: {concept['old_concept_id']} ({concept['old_value']}) -> {concept['concept_id']} ({concept['value']})") else: print("No updated concepts") @@ -291,7 +315,7 @@ def handle_data_change_messages(self, new_nodes, deleted_nodes, deleted_nodegrou else: print("No data changes") - def handle_model_update(self, model_name): + def handle_model_update(self, model_name, mapping=None): model_path = f'coral/pkg/graphs/resource_models/{model_name}.json' with open(model_path) as incoming_json: file_contents = incoming_json.read() @@ -299,6 +323,9 @@ def handle_model_update(self, model_name): self.incoming_json = json.loads(file_contents) self.graphid = self.incoming_json['graph'][0]['graphid'] self.graph = Graph.objects.get(pk=self.graphid) + if mapping: + with open(mapping, 'r') as file: + self.mapping = json.load(file) try: management.call_command("packages", @@ -326,7 +353,7 @@ def handle_model_update(self, model_name): self.handle_data_change_messages(new_nodes, deleted_nodes, deleted_nodegroups, new_functions, updated_names, new_concepts, updated_concepts) - if self.datatype_changes == {} and len(deleted_nodes) == 0 and len(deleted_nodegroups) == 0: + if self.datatype_changes == {} and len(deleted_nodes) == 0 and len(deleted_nodegroups) == 0 and len(updated_concepts) == 0: input("\nContinue with deleting and updating the graphs and data?") @@ -359,6 +386,8 @@ def handle_model_update(self, model_name): tile = self.handle_datatype_changes(tile, self.datatype_changes) if len(deleted_nodes) > 0: tile = self.handle_deleted_nodes(tile, deleted_nodes) + if len(updated_concepts) > 0: + tile = self.handle_concept_change(tile, updated_concepts) # filter the tiles to remove the deleted node group tiles if len(deleted_nodegroups) > 0: @@ -456,6 +485,20 @@ def allow_many(self, tile_json, nodeid): tile_json['data'][nodeid] = [tile_json['data'][nodeid]] return tile_json + def concept_to_concept(self, tile_json, node, mapping): + if mapping: + current_value = ConceptValue().get(tile_json['data'][node]).conceptid + new_value = Value.objects.filter(concept_id = mapping[current_value], valuetype = 'prefLabel').first().valueid + # Need to add some error checking when nothing in mapping and for when the new concept hasn't been uploaded + if new_value: + tile_json['data'][node] = str(new_value) + else: + tile_json['data'][node] = None + else: + print("No mapping file provided") + return + + class GroupTransform(): """ This class contains functions to transform the Groups From b4ec8fd8df85dc2ff3efa65c745268fc93d1f506 Mon Sep 17 00:00:00 2001 From: Stuart Marshall Date: Thu, 20 Feb 2025 12:55:32 +0000 Subject: [PATCH 8/9] feat: add error checking for missing concept keys and values --- coral/management/commands/migrate_safely.py | 42 ++++++++++++++++----- 1 file changed, 32 insertions(+), 10 deletions(-) diff --git a/coral/management/commands/migrate_safely.py b/coral/management/commands/migrate_safely.py index f0b6a284f..948e5bf27 100644 --- a/coral/management/commands/migrate_safely.py +++ b/coral/management/commands/migrate_safely.py @@ -9,6 +9,9 @@ from django.core.management.base import BaseCommand import json import datetime +import logging + +logging.basicConfig() class Command(BaseCommand): """Safely Migrate a model that may have conflicting changes. @@ -242,12 +245,17 @@ def handle_deleted_nodegroups(self, tile_json, deleted_nodegroups): return False def handle_concept_change(self, tile_json, updated_concepts): + if self.mapping is None: + raise ValueError("No mapping file has been provided for the concept conversion. Use -M to add a file") + for node in list(tile_json['data'].keys()): if node in [concept["node_id"] for concept in updated_concepts]: - mapping = next(value for key, value in self.mapping.items() if key == node) - print("tile", tile_json) - print("node", node) + try: + mapping = next(value for key, value in self.mapping.items() if key == node) + except Exception as e: + raise ValueError(f"No mapping could be found in the file for the node {node}") from e TransformData().concept_to_concept(tile_json, node, mapping) + def handle_data_change_messages(self, new_nodes, deleted_nodes, deleted_nodegroups, new_functions, updated_names, new_concepts, updated_concepts): @@ -323,7 +331,8 @@ def handle_model_update(self, model_name, mapping=None): self.incoming_json = json.loads(file_contents) self.graphid = self.incoming_json['graph'][0]['graphid'] self.graph = Graph.objects.get(pk=self.graphid) - if mapping: + self.mapping = mapping + if self.mapping: with open(mapping, 'r') as file: self.mapping = json.load(file) @@ -486,17 +495,30 @@ def allow_many(self, tile_json, nodeid): return tile_json def concept_to_concept(self, tile_json, node, mapping): - if mapping: current_value = ConceptValue().get(tile_json['data'][node]).conceptid - new_value = Value.objects.filter(concept_id = mapping[current_value], valuetype = 'prefLabel').first().valueid - # Need to add some error checking when nothing in mapping and for when the new concept hasn't been uploaded + + if current_value in mapping: + mapping_value = mapping[current_value] + elif 'default' in mapping: + mapping_value = mapping['default'] + else: + raise KeyError(f"The node {current_value} was not found in the mapping file, and no 'default' key exists.") + + try: + if mapping_value: + new_value = Value.objects.filter(concept_id=mapping_value, valuetype='prefLabel').first().valueid + else: + # Allows for a null value in the mapping + new_value = mapping_value + + except Exception as e: + raise ValueError(f"The concept {mapping_value} was not found. Have you imported your new concept and collection?") from e + if new_value: tile_json['data'][node] = str(new_value) else: tile_json['data'][node] = None - else: - print("No mapping file provided") - return + class GroupTransform(): From 1ed0dcf6a92a9aa7d2ede843270de0e66d5190c4 Mon Sep 17 00:00:00 2001 From: owen Date: Mon, 3 Mar 2025 14:28:09 +0000 Subject: [PATCH 9/9] version: v7.7.29 --- coral/settings.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/coral/settings.py b/coral/settings.py index d3bb85af7..693974ad0 100644 --- a/coral/settings.py +++ b/coral/settings.py @@ -22,7 +22,7 @@ pass APP_NAME = 'coral' -APP_VERSION = semantic_version.Version(major=7, minor=6, patch=29) +APP_VERSION = semantic_version.Version(major=7, minor=7, patch=29) GROUPINGS = { "groups": {