From 7dc0f5e54ef611b53b5f46a16e50ff637c7ee136 Mon Sep 17 00:00:00 2001 From: Christian Winger Date: Fri, 17 Mar 2023 10:05:03 +0000 Subject: [PATCH 01/13] make sure tag names fromkeywords are <= 40 chars --- dataedit/structures.py | 1 + dataedit/views.py | 1 + 2 files changed, 2 insertions(+) diff --git a/dataedit/structures.py b/dataedit/structures.py index 27d76a371..fdf1531c3 100644 --- a/dataedit/structures.py +++ b/dataedit/structures.py @@ -54,6 +54,7 @@ def create_name_normalized(name): name_norm = name_norm.lower() name_norm = re.sub("[^a-z0-9]+", "_", name_norm) name_norm = name_norm.strip("_") + name_norm = name_norm[:40] # max len return name_norm @staticmethod diff --git a/dataedit/views.py b/dataedit/views.py index 0595ee32f..7ef0ae57c 100644 --- a/dataedit/views.py +++ b/dataedit/views.py @@ -1394,6 +1394,7 @@ def get_or_create_tag_by_name(name): .first() ) if tag is None: + name = name[:40] # max len tag = Tag(name=name) session.add(tag) session.flush() From e6d7030760f36c2f6653f897e8bbab71604c6a98 Mon Sep 17 00:00:00 2001 From: Christian Winger Date: Fri, 17 Mar 2023 10:20:28 +0000 Subject: [PATCH 02/13] refactor get_metadata function --- api/actions.py | 6 ++++++ api/views.py | 5 ++--- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/api/actions.py b/api/actions.py index b92ef0c96..61f6e035c 100644 --- a/api/actions.py +++ b/api/actions.py @@ -1096,6 +1096,12 @@ def _get_table(schema, table): return Table(table, metadata, autoload=True, autoload_with=engine, schema=schema) +def _get_table_metadata(schema, table): + table_obj = _get_table(schema=schema, table=table) + comment = table_obj.comment + return json.loads(comment) if comment else {} + + def __internal_select(query, context): # engine = _get_engine() context2 = dict(user=context.get("user")) diff --git a/api/views.py b/api/views.py index 7799700d4..ed0bd8ff4 100644 --- a/api/views.py +++ b/api/views.py @@ -250,9 +250,8 @@ def __create_sequence(self, request, schema, sequence, jsn): class Metadata(APIView): @api_exception def get(self, request, schema, table): - table_obj = actions._get_table(schema=schema, table=table) - comment = table_obj.comment - return JsonResponse(json.loads(comment) if comment else {}) + metadata = actions._get_table_metadata(schema, table) + return JsonResponse(metadata) @api_exception @require_write_permission From cf816a429aff2ce161958d21ea21e2bc92ad3615 Mon Sep 17 00:00:00 2001 From: Christian Winger Date: Fri, 17 Mar 2023 10:21:37 +0000 Subject: [PATCH 03/13] refactor get_metadata function --- api/actions.py | 2 +- api/views.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/api/actions.py b/api/actions.py index 61f6e035c..bbbee08e3 100644 --- a/api/actions.py +++ b/api/actions.py @@ -1096,7 +1096,7 @@ def _get_table(schema, table): return Table(table, metadata, autoload=True, autoload_with=engine, schema=schema) -def _get_table_metadata(schema, table): +def get_table_metadata(schema, table): table_obj = _get_table(schema=schema, table=table) comment = table_obj.comment return json.loads(comment) if comment else {} diff --git a/api/views.py b/api/views.py index ed0bd8ff4..a26bf53a1 100644 --- a/api/views.py +++ b/api/views.py @@ -250,7 +250,7 @@ def __create_sequence(self, request, schema, sequence, jsn): class Metadata(APIView): @api_exception def get(self, request, schema, table): - metadata = actions._get_table_metadata(schema, table) + metadata = actions.get_table_metadata(schema, table) return JsonResponse(metadata) @api_exception From 8975e3b2a924e1e0f82bff3f4d4ffc3614769ac5 Mon Sep 17 00:00:00 2001 From: Christian Winger Date: Fri, 17 Mar 2023 10:43:18 +0000 Subject: [PATCH 04/13] created one time migration command --- .../commands/upgrade_metadata_20230317.py | 536 ++++++++++++++++++ requirements.txt | 2 + 2 files changed, 538 insertions(+) create mode 100644 dataedit/management/commands/upgrade_metadata_20230317.py diff --git a/dataedit/management/commands/upgrade_metadata_20230317.py b/dataedit/management/commands/upgrade_metadata_20230317.py new file mode 100644 index 000000000..4d39c502e --- /dev/null +++ b/dataedit/management/commands/upgrade_metadata_20230317.py @@ -0,0 +1,536 @@ +""" +NOTE: this is a very ugly ONE TIME script to fix all existing legacy metadata +in the oep to the latest version 1.5 + +After migration, this code should be banished from the code base + +""" + +import json +import logging +import os +import re + +import jsonschema.validators +from django.core.management.base import BaseCommand +from inflection import camelize +from omi.dialects.oep import OEP_V_1_5_Dialect +from omi.dialects.oep.compiler import compile_date_or_none +from omi.dialects.oep.parser import parse_date_or_none + +from api.actions import get_table_metadata, set_table_metadata +from api.connection import _get_engine +from dataedit.models import Table +from dataedit.views import schema_whitelist + +# ------------------------------------------------------------------------------------ +# helper functions +# ------------------------------------------------------------------------------------ + + +def flatten(obj) -> dict: + def _flatten(x, path=""): + if isinstance(x, list): + x = enumerate(x) + elif isinstance(x, dict): + x = x.items() + else: + # assert isinstance(x, str) # TODO remove + yield (path, x) + return + for k, v in x: + yield from _flatten(v, f"{path}{k}/") + + res_l = list(_flatten(obj)) + res_d = dict(res_l) + assert len(res_l) == len(res_d) # TODO remove + return res_d + + +def unflatten(obj: dict): + # recursion end + if len(obj) == 1 and "" in obj: + x = obj[""] + # assert isinstance(x, str) # TODO remove + return x + result = {} # group by first part of path + for path, x in obj.items(): + assert path.endswith("/"), path # TODO REMOVE + prefix, rest_path = path.split("/", maxsplit=1) + if prefix not in result: + result[prefix] = {} + assert rest_path not in result[prefix] # TODO remove + result[prefix][rest_path] = x + # recursion + result = dict((p, unflatten(x)) for p, x in result.items()) + # convert to list + if all(x.isnumeric() for x in result): + result = dict((int(k), v) for k, v in result.items()) + max_id = max(result) + lst = [None] * (max_id + 1) + for i, x in result.items(): + lst[i] = x + result = lst + return result + + +def create_json_validator(): + + schema_path = os.path.dirname(__file__) + "/../../static/metaedit/schema.json" + + with open(schema_path, "r", encoding="utf-8") as file: + schema = json.load(file) + + validator_cls = jsonschema.validators.validator_for(schema) + validator_cls.check_schema(schema) + validator = validator_cls(schema) + return validator + + +def omi_dump_and_parse(metadata_obj): + dialect = OEP_V_1_5_Dialect() + + metadata_str = json.dumps(metadata_obj, ensure_ascii=False) + metadata_oep = dialect.parse(metadata_str) + metadata_obj_new = dialect.compile(metadata_oep) + + # omi sometimes creates tuples instead of lists + metadata_obj_new = json.loads(json.dumps(metadata_obj_new)) + metadata_obj_new = remove_nulls(metadata_obj_new) + + return metadata_obj_new + + +def is_date_path(path): + """check if path in object structure contains a date""" + try: + key = path.split("/")[-2] + except IndexError: + return False + + if "date" in key.lower() and "dates" not in key.lower(): + return True + elif key in ["start", "end"]: + return True + else: + return False + + +def fix_date(val): + """fix date strings.""" + + if not isinstance(val, str): + raise Exception(val) + elif re.match("^[0-9-]+T0$", val): + return val[:-2] # remove T0 + elif re.match("^[0-9-]+T00$", val): + return val[:-3] # remove T00 + elif re.match("^[0-9-]+T00:$", val): + return val[:-4] # remove T00 + elif re.match("^[0-9-]+T[0-9]{2}:$", val): + return val[:-4] # (FIXME) + elif re.match("^[0-9]{2}-[0-9]{2}$", val): + # only monath/day add dummy year (FIXME) + return "1000-" + val + elif re.match(r"^[0-9]{2}\.[0-9]{2}\.[0-9]{4}$", val): # german date + d, m, y = re.match(r"^([0-9]{2})\.([0-9]{2})\.([0-9]{4})$", val).groups() + return f"{y}-{m}-{d}" + elif re.match( + r"^[0-9]{4}\.[0-9]{2}\.[0-9]{2}$", val + ): # iso date, but . instead of - + y, m, d = re.match(r"^([0-9]{4})\.([0-9]{2})\.([0-9]{2})$", val).groups() + return f"{y}-{m}-{d}" + else: + try: # see if it goes through omi parsing + val_d = parse_date_or_none(val) + val_s = compile_date_or_none(val_d) + return val_s or "" # always return str + except Exception: + logging.warning(f"Not a date: {val}") + return "" # always return str + + +def fix_key(k): + """fix old key names""" + k = k.strip() + k = k.replace(" ", "_") + if k == "": + return k + k = camelize(k, uppercase_first_letter=False) + if k == "uRL": + k = "url" + elif k == "licence": + k = "license" + elif k == "extend": + k = "extent" + elif k == "discription": + k = "description" + elif k == "fromat": + k = "format" + return k + + +def split_list(lst): + assert isinstance(lst, list) + for item in lst: + for x in item.replace(";", ",").split(","): + x = x.strip() + if x: + yield x + + +def fix_keywords(keywords): + result = [] + for k in split_list(keywords): + k = k[:40] + result.append(k) + return result + + +def fix_languages(languages): + return [ + { + "ger": "de-DE", + "eng": "en-US", + "english": "en-US", + "en": "en-US", + "en-g": "en-GB", + "fre": "fr-FR", + "de_de": "de-DE", + }.get(lng.lower(), lng) + for lng in split_list(languages) + ] + + +def remove_nulls(obj): + """recursively remove empty structures""" + if isinstance(obj, list): + # recursion + obj_new = [remove_nulls(x) for x in obj] + # drop None + obj_new = [x for x in obj_new if x is not None] + # if empty list: return None + if not obj_new: + obj_new = None + return obj_new + elif isinstance(obj, dict): + # recursion + obj_new = dict((k, remove_nulls(v)) for k, v in sorted(obj.items())) + # drop None + obj_new = dict((k, v) for k, v in sorted(obj_new.items()) if v is not None) + # if empty list: return None + if not obj_new: + obj_new = None + return obj_new + elif obj is None: + return None + elif isinstance(obj, str): + obj_new = obj.strip() + if obj_new.lower() in ["", "null", "none", "..."]: + obj_new = None + return obj_new + else: + raise Exception((obj, type(obj))) + + +def find(meta, pat): + pat = re.compile(pat) + res = [] + for k in meta.keys(): + m = pat.match(k) + if m: + res.append(m) + return res + + +# ------------------------------------------------------------------------------------ +# main function +# ------------------------------------------------------------------------------------ + + +def fix_metadata(metadata, table_name): + """main function to fix metadata + + Args: + + metadata(object): python object with metadata + table_name(str): name of table (fallback for missing mandatory id) + + Returns: + python object with fixed metadata + that should pass through the omi compiler/parser + with minimal data loss + """ + + # recursively clean empty structures + metadata = remove_nulls(metadata) + metadata = flatten(metadata) + + # fix keys + metadata = dict( + ("/".join(fix_key(x) for x in k.split("/")), v) for k, v in metadata.items() + ) + + # check that values all are non empty stings + # for v in metadata.values(): + # assert isinstance(v, str) and v + # check keys not case sensitivity + # assert len(metadata) == len(set(k.lower() for k in metadata.keys())) + + # ---------------------------------------------------------------------------------- + + def rm(val_old): + del metadata[val_old.group()] + + def drop(val_old, val_new): + return val_old + + def merge(val_old, val_new): + return f"{val_old}; {val_new}" + + def err(val_old, val_new): + raise Exception() + + def mov(match, key_new, on_conflict=err): + key_old = match.group() + if not isinstance(key_new, str): # function + key_new = key_new(match) + elif "%" in key_new: + key_new = key_new % match.groups() + + val = metadata.pop(key_old) + if key_new in metadata: + try: + val = on_conflict(metadata[key_new], val) + except Exception: + raise Exception(f"{key_old} => {key_new}: {val} in {metadata[key_new]}") + + # logging.info(f"{key_old} => {key_new}: {val}") + metadata[key_new] = val + + idx = "([0-9]+)" + + # ---------------------------------------------------------------------------------- + + for m in find(metadata, "^(metadataVersion|metaVersion)/$"): + rm(m) + for m in find(metadata, "^metaMetadata/.*$"): + rm(m) + for m in find(metadata, f"^resources/{idx}/metaVersion/$"): + rm(m) + for m in find(metadata, "^_comment/.*$"): + rm(m) + + # ---------------------------------------------------------------------------------- + + for m in find(metadata, f"^source/{idx}/(name|url)/$"): + mov(m, "sources/%s/%s/") + for m in find(metadata, f"^source/{idx}/$"): + mov(m, "sources/%s/name/") + for m in find(metadata, "^sources/(copyright|license|name|url)/$"): # NOT array + mov(m, "sources/0/%s/") + for m in find(metadata, "^originalFile/$"): + mov(m, "sources/0/path/") + for m in find(metadata, f"^originalFile/{idx}/$"): + mov(m, "sources/%s/path/") + for m in find(metadata, f"^retrieved/{idx}/$"): + mov(m, "sources/%s/description/", merge) + for m in find(metadata, "^dateOfCollection/$"): + mov(m, "sources/0/description/", merge) + for m in find(metadata, f"^dateOfCollection/{idx}/$"): + mov(m, "sources/%s/description/", merge) + for m in find(metadata, f"^sources/{idx}/copyright/$"): + mov(m, "sources/%s/licenses/0/attribution/") + for m in find(metadata, f"^sources/{idx}/license/$"): + mov(m, "sources/%s/licenses/0/name/") + for m in find(metadata, f"^sources/{idx}/url/$"): + mov(m, "sources/%s/licenses/0/path/") + for m in find(metadata, f"^sources/{idx}/name/$"): + mov(m, "sources/%s/title/") + for m in find(metadata, f"^sources/{idx}/comment/$"): + mov(m, "sources/%s/description/", drop) + + for m in find( + metadata, f"^license/{idx}/(copyright|name|url|version|id|instruction)/$" + ): + mov(m, "licenses/%s/%s/") + for m in find(metadata, "^license/(copyright|name|url|version|id|instruction)/$"): + mov(m, "licenses/0/%s/") + for m in find(metadata, f"^license/{idx}/$"): + mov(m, "licenses/%s/name/") + for m in find(metadata, f"^instructionsForProperUse/{idx}/$"): + mov(m, "licenses/0/instruction/") + for m in find(metadata, f"^licenses/{idx}/copyright/$"): + mov(m, "licenses/%s/attribution/") + for m in find(metadata, f"^licenses/{idx}/url/$"): + mov(m, "licenses/%s/path/") + for m in find(metadata, f"^licenses/{idx}/version/$"): + mov(m, "licenses/%s/title/") + for m in find(metadata, f"^licenses/{idx}/id/$"): + mov(m, "licenses/%s/name/", drop) + + for m in find(metadata, "^changes/(comment|date|mail|name)/$"): + mov(m, "contributors/0/%s/") + for m in find(metadata, f"^changes/{idx}/(comment|date|mail|name)/$"): + mov(m, "contributors/%s/%s/") + for m in find(metadata, f"^contributors/{idx}/mail/$"): + mov(m, "contributors/%s/email/") + for m in find(metadata, f"^contributors/{idx}/name/$"): + mov(m, "contributors/%s/title/") + + for m in find(metadata, f"^label/{idx}/$"): + mov(m, "keywords/%s/") + + for m in find(metadata, f"^description/{idx}/$"): + mov(m, "description/", merge) + for m in find(metadata, "^comment/$"): + mov(m, "description/", merge) + for m in find(metadata, "^notes?/$"): + mov(m, "description/", merge) + for m in find(metadata, f"^notes/{idx}/$"): + mov(m, "description/", merge) + for m in find(metadata, "^version/$"): + mov(m, "description/", merge) + for m in find(metadata, f"^toDo/{idx}/$"): + mov(m, "description/", merge) + + for m in find(metadata, "^temporal/timeseries/([^/0-9]+)/$"): + mov(m, "temporal/timeseries/0/%s/") + for m in find(metadata, "^temporal/(start|end|resolution)/$"): + mov(m, "temporal/timeseries/0/%s/") + for m in find(metadata, "^temporal/timestamp/$"): + mov(m, "temporal/timeseries/0/resolution/", drop) + for m in find(metadata, "^referenceDate/$"): + mov(m, "temporal/referenceDate/") + for m in find(metadata, f"^referenceDate/{idx}/$"): + mov(m, "temporal/referenceDate/") + + for m in find(metadata, f"^spatial/{idx}/extent/$"): + mov(m, "spatial/extent/") + for m in find(metadata, f"^spatial/{idx}/resolution/$"): + mov(m, "spatial/resolution/") + for m in find(metadata, f"^regionalLevel/{idx}/$"): + mov(m, "spatial/resolution/") + for m in find(metadata, f"^spatialResolution/({idx}/|)$"): + mov(m, "spatial/resolution/", drop) + + for m in find(metadata, f"^resources/{idx}/fields/{idx}/(name|description|unit)/$"): + mov(m, "resources/%s/schema/fields/%s/%s/") + for m in find(metadata, f"^resources/{idx}/fields/{idx}/url/$"): + mov(m, "resources/%s/schema/fields/%s/description/", drop) + for m in find(metadata, f"^column/{idx}/(description|name|unit)/$"): + mov(m, "resources/0/schema/fields/%s/%s/") + for m in find(metadata, f"^fields/{idx}/(name|type|description)/$"): + mov(m, "resources/0/schema/fields/%s/%s/", drop) + for m in find(metadata, f"^tableFields/{idx}/(name|type|description|unit)/$"): + mov(m, "resources/0/schema/fields/%s/%s/") + for m in find(metadata, f"^tableFields/{idx}/descriptionGerman/$"): + mov(m, "resources/0/schema/fields/%s/description/", drop) + + # ---------------------------------------------------------------------------------- + + # id + if not find(metadata, "^id/$"): + metadata["id/"] = table_name + + # fix all dates + for k, v in metadata.items(): + if is_date_path(k): + metadata[k] = fix_date(v) + + metadata = unflatten(metadata) + + # fix keywords + if "keywords" in metadata: + metadata["keywords"] = fix_keywords(metadata["keywords"]) + + # fix language codes + if "language" in metadata: + metadata["language"] = fix_languages(metadata["language"]) + + metadata = remove_nulls(metadata) + + return metadata + + +class Command(BaseCommand): + def add_arguments(self, parser): + parser.add_argument( + "--fix", action="store_true", help="without this, only run checks" + ) + + def handle(self, *args, **options): + + logging.basicConfig( + format="[%(asctime)s %(levelname)7s] %(message)s", + datefmt="%Y-%m-%d %H:%M:%S", + level=logging.INFO, + ) + + json_validator = create_json_validator() + + # DRY RUN: TEST ALL + + logging.info("---------------------------------------------------------------") + logging.info("DRY RUN: check conversion for all tables") + logging.info("---------------------------------------------------------------") + + for t in Table.objects.all(): + table_name = t.name + schema_name = t.schema.name + + whitelisted = schema_name in schema_whitelist + logging.info(f"{schema_name}.{table_name} (whitelist={whitelisted})") + + # load metadata from comment string + metadata_orig = get_table_metadata(schema_name, table_name) + + if not metadata_orig: + logging.info("empty metadata") + continue + + metadata_fixed = fix_metadata(metadata_orig, table_name) + + # validate with json schema + json_validator.validate(metadata_fixed) + + # roundtrip omi to check if migration will work + metadata_omi = omi_dump_and_parse(metadata_fixed) + + # compare (so we know omi round trip does not drop data) + del metadata_omi["_comment"] + del metadata_omi["metaMetadata"] + assert json.dumps(metadata_fixed, sort_keys=True) == json.dumps( + metadata_omi, sort_keys=True + ) + + logging.info("---------------------------------------------------------------") + + if not options["fix"]: + logging.info("use --fix to actually save metadata") + return + + logging.info("FIXING METADATA") + logging.info("---------------------------------------------------------------") + + engine = _get_engine() + + for t in Table.objects.all(): + table_name = t.name + schema_name = t.schema.name + + whitelisted = schema_name in schema_whitelist + logging.info(f"{schema_name}.{table_name} (whitelist={whitelisted})") + + # load metadata from comment string + metadata_orig = get_table_metadata(schema_name, table_name) + + if not metadata_orig: + logging.info("empty metadata") + continue + + metadata_fixed = fix_metadata(metadata_orig, table_name) + + with engine.connect() as con: + cursor = con.connection.cursor() + set_table_metadata(table_name, schema_name, metadata_fixed, cursor) diff --git a/requirements.txt b/requirements.txt index c207f4f8f..90d229f54 100644 --- a/requirements.txt +++ b/requirements.txt @@ -27,3 +27,5 @@ sparqlwrapper Pygments<=2.11.2 # latest version fails to convert fenced code blocks pre-commit # only for dev uritemplate # allows python manage.py generateschema +inflection +jsonschema From 073b6259607192baebb0ff6609d157a32a61ef49 Mon Sep 17 00:00:00 2001 From: Christian Winger Date: Fri, 17 Mar 2023 13:53:37 +0000 Subject: [PATCH 05/13] use commit so metadata changes are permanent --- .../commands/upgrade_metadata_20230317.py | 30 +++++++++++-------- 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/dataedit/management/commands/upgrade_metadata_20230317.py b/dataedit/management/commands/upgrade_metadata_20230317.py index 4d39c502e..f64121165 100644 --- a/dataedit/management/commands/upgrade_metadata_20230317.py +++ b/dataedit/management/commands/upgrade_metadata_20230317.py @@ -116,7 +116,7 @@ def is_date_path(path): return False -def fix_date(val): +def fix_date(val, path=None): """fix date strings.""" if not isinstance(val, str): @@ -135,19 +135,22 @@ def fix_date(val): elif re.match(r"^[0-9]{2}\.[0-9]{2}\.[0-9]{4}$", val): # german date d, m, y = re.match(r"^([0-9]{2})\.([0-9]{2})\.([0-9]{4})$", val).groups() return f"{y}-{m}-{d}" - elif re.match( - r"^[0-9]{4}\.[0-9]{2}\.[0-9]{2}$", val - ): # iso date, but . instead of - + elif re.match(r"^[0-9]{4}\.[0-9]{2}\.[0-9]{2}$", val): + # iso date, but . instead of - y, m, d = re.match(r"^([0-9]{4})\.([0-9]{2})\.([0-9]{2})$", val).groups() return f"{y}-{m}-{d}" + elif re.match(r"^[0-9]{4}-[0-9]{2}-?$", val): + # missing day + y, m = re.match(r"^([0-9]{4})-([0-9]{2})-?$", val).groups() + return f"{y}-{m}-01" else: try: # see if it goes through omi parsing val_d = parse_date_or_none(val) val_s = compile_date_or_none(val_d) return val_s or "" # always return str except Exception: - logging.warning(f"Not a date: {val}") - return "" # always return str + logging.warning(f"Not a date: {val} ({path})") + return "" # always return str def fix_key(k): @@ -436,7 +439,7 @@ def mov(match, key_new, on_conflict=err): # fix all dates for k, v in metadata.items(): if is_date_path(k): - metadata[k] = fix_date(v) + metadata[k] = fix_date(v, k) metadata = unflatten(metadata) @@ -462,8 +465,7 @@ def add_arguments(self, parser): def handle(self, *args, **options): logging.basicConfig( - format="[%(asctime)s %(levelname)7s] %(message)s", - datefmt="%Y-%m-%d %H:%M:%S", + format="[%(levelname)7s] %(message)s", level=logging.INFO, ) @@ -480,9 +482,11 @@ def handle(self, *args, **options): schema_name = t.schema.name whitelisted = schema_name in schema_whitelist + + logging.info("") # empty line logging.info(f"{schema_name}.{table_name} (whitelist={whitelisted})") - # load metadata from comment string + # load metadata metadata_orig = get_table_metadata(schema_name, table_name) if not metadata_orig: @@ -532,5 +536,7 @@ def handle(self, *args, **options): metadata_fixed = fix_metadata(metadata_orig, table_name) with engine.connect() as con: - cursor = con.connection.cursor() - set_table_metadata(table_name, schema_name, metadata_fixed, cursor) + with con: + cursor = con.connection.cursor() + set_table_metadata(table_name, schema_name, metadata_fixed, cursor) + con.connection.commit() From d064337bb22466eb2f46d596290129f3bf596d9c Mon Sep 17 00:00:00 2001 From: Christian Winger Date: Sat, 18 Mar 2023 07:25:11 +0000 Subject: [PATCH 06/13] minor change to license fix --- dataedit/management/commands/upgrade_metadata_20230317.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/dataedit/management/commands/upgrade_metadata_20230317.py b/dataedit/management/commands/upgrade_metadata_20230317.py index f64121165..01ed266b9 100644 --- a/dataedit/management/commands/upgrade_metadata_20230317.py +++ b/dataedit/management/commands/upgrade_metadata_20230317.py @@ -367,10 +367,10 @@ def mov(match, key_new, on_conflict=err): mov(m, "licenses/%s/attribution/") for m in find(metadata, f"^licenses/{idx}/url/$"): mov(m, "licenses/%s/path/") - for m in find(metadata, f"^licenses/{idx}/version/$"): - mov(m, "licenses/%s/title/") for m in find(metadata, f"^licenses/{idx}/id/$"): - mov(m, "licenses/%s/name/", drop) + mov(m, "licenses/%s/title/", drop) + for m in find(metadata, f"^licenses/{idx}/version/$"): + mov(m, "licenses/%s/title/", drop) for m in find(metadata, "^changes/(comment|date|mail|name)/$"): mov(m, "contributors/0/%s/") From 6532a1e01b2b8278a19e1f0bfbcdbcc3466a42eb Mon Sep 17 00:00:00 2001 From: Christian Winger Date: Sat, 18 Mar 2023 13:39:02 +0000 Subject: [PATCH 07/13] changed tutorial link text #1174 --- base/templates/base/index.html | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/base/templates/base/index.html b/base/templates/base/index.html index 85145a10a..fffa981d9 100644 --- a/base/templates/base/index.html +++ b/base/templates/base/index.html @@ -70,7 +70,10 @@

Ontology

Academy

- There is something you'd like to do on the platform, but you don't know how? Check out the tutorials! They are available for all levels, and in the form of texts, videos and markdowns. + The Open Energy Academy (OEA) provides courses as well as dedicated tutorials + covering important topics around the Open Energy Family (OEF) tools + and the Open Energy Platform (OEP). + You will also find short answers to urgent questions.

From a855246455164a546a05d532697cbc859a77dece Mon Sep 17 00:00:00 2001 From: jh-RLI Date: Mon, 20 Mar 2023 18:39:09 +0100 Subject: [PATCH 08/13] fore correct table id if metaBuilder is initialized ... will fix false id after manual fix script #1154 --- dataedit/static/metaedit/metaedit.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dataedit/static/metaedit/metaedit.js b/dataedit/static/metaedit/metaedit.js index eaf0f6492..edb8c2d7d 100644 --- a/dataedit/static/metaedit/metaedit.js +++ b/dataedit/static/metaedit/metaedit.js @@ -114,7 +114,7 @@ var MetaEdit = function (config) { // MUST have ID - json["id"] = json["id"] || config["url_table_id"]; + json["id"] = config["url_table_id"]; // MUST have one resource with name == id == tablename json["resources"] = json["resources"] || [{}]; From 398dc6a14f300b24ec166fd8757b92b7fd0c17ac Mon Sep 17 00:00:00 2001 From: Christian Winger Date: Thu, 23 Mar 2023 13:45:13 +0000 Subject: [PATCH 09/13] one more rule for the metadata fix --- dataedit/management/commands/upgrade_metadata_20230317.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/dataedit/management/commands/upgrade_metadata_20230317.py b/dataedit/management/commands/upgrade_metadata_20230317.py index 01ed266b9..2508fbb6b 100644 --- a/dataedit/management/commands/upgrade_metadata_20230317.py +++ b/dataedit/management/commands/upgrade_metadata_20230317.py @@ -397,6 +397,9 @@ def mov(match, key_new, on_conflict=err): for m in find(metadata, f"^toDo/{idx}/$"): mov(m, "description/", merge) + for m in find(metadata, f"^name/{idx}/$"): + mov(m, "name/", drop) + for m in find(metadata, "^temporal/timeseries/([^/0-9]+)/$"): mov(m, "temporal/timeseries/0/%s/") for m in find(metadata, "^temporal/(start|end|resolution)/$"): From b7b6afbb0c1310f3cba7487cea3db6d983598430 Mon Sep 17 00:00:00 2001 From: Christian Winger Date: Thu, 23 Mar 2023 13:58:36 +0000 Subject: [PATCH 10/13] changelog --- versions/changelogs/current.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/versions/changelogs/current.md b/versions/changelogs/current.md index cf36cf198..9ee2b017d 100644 --- a/versions/changelogs/current.md +++ b/versions/changelogs/current.md @@ -2,4 +2,8 @@ ### Features +- one time migration script to upgrade legacy meta data + ### Bugs + +- truncate keywords/tags to 40 characters From ea34062beb82b35020bb11838bd7d17d9152316a Mon Sep 17 00:00:00 2001 From: Christian Winger Date: Thu, 23 Mar 2023 15:01:07 +0000 Subject: [PATCH 11/13] replaced dependency inflection --- .../commands/upgrade_metadata_20230317.py | 50 ++++++++++++++----- requirements.txt | 1 - 2 files changed, 38 insertions(+), 13 deletions(-) diff --git a/dataedit/management/commands/upgrade_metadata_20230317.py b/dataedit/management/commands/upgrade_metadata_20230317.py index 2508fbb6b..810b46b8c 100644 --- a/dataedit/management/commands/upgrade_metadata_20230317.py +++ b/dataedit/management/commands/upgrade_metadata_20230317.py @@ -13,7 +13,6 @@ import jsonschema.validators from django.core.management.base import BaseCommand -from inflection import camelize from omi.dialects.oep import OEP_V_1_5_Dialect from omi.dialects.oep.compiler import compile_date_or_none from omi.dialects.oep.parser import parse_date_or_none @@ -157,19 +156,46 @@ def fix_key(k): """fix old key names""" k = k.strip() k = k.replace(" ", "_") + k = k.lower() if k == "": return k - k = camelize(k, uppercase_first_letter=False) - if k == "uRL": - k = "url" - elif k == "licence": - k = "license" - elif k == "extend": - k = "extent" - elif k == "discription": - k = "description" - elif k == "fromat": - k = "format" + + k = { + "reference_date": "referenceDate", + "licence": "license", + "extend": "extent", + "discription": "description", + "fromat": "format", + "metadata_version": "metadataVersion", + "original_file": "originalFile", + "date_of_collection": "dateOfCollection", + "metadatalicense": "metadataLicense", + "metadataversion": "metadataVersion", + "metametadata": "metaMetadata", + "spatial_resolution": "spatialResolution", + "sourcecode": "sourceCode", + "fundingagency": "fundingAgency", + "grantno": "grantNo", + "foreignkeys": "foreignKeys", + "primarykey": "primaryKey", + "decimalseparator": "decimalSeparator", + "publicationdate": "publicationDate", + "referencedate": "referenceDate", + "fundingagencylogo": "fundingAgencyLogo", + "publisherlogo": "publisherLogo", + "aggregationtype": "aggregationType", + "table_fields": "tableFields", + "regional_level": "regionalLevel", + "instructions_for_proper_use": "instructionsForProperUse", + "todo": "toDo", + "description_german": "descriptionGerman", + "isabout": "isAbout", + "meta_version": "metaVersion", + "valuereference": "valueReference", + "value_reference": "valueReference", + "is_about": "isAbout", + }.get(k, k) + return k diff --git a/requirements.txt b/requirements.txt index 90d229f54..b3394ea1f 100644 --- a/requirements.txt +++ b/requirements.txt @@ -27,5 +27,4 @@ sparqlwrapper Pygments<=2.11.2 # latest version fails to convert fenced code blocks pre-commit # only for dev uritemplate # allows python manage.py generateschema -inflection jsonschema From 37d613141bb845497f57e1758985a5b301c41c7a Mon Sep 17 00:00:00 2001 From: jh-RLI Date: Thu, 23 Mar 2023 16:28:31 +0100 Subject: [PATCH 12/13] bump version --- VERSION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/VERSION b/VERSION index 34a83616b..a31499ef5 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -0.12.1 +0.12.12 From da845315befd79ca0f1694aa79072702b8f4e47b Mon Sep 17 00:00:00 2001 From: jh-RLI Date: Thu, 23 Mar 2023 16:32:21 +0100 Subject: [PATCH 13/13] update changelogs --- versions/changelogs/0_12_2.md | 9 +++++++++ versions/changelogs/current.md | 3 --- 2 files changed, 9 insertions(+), 3 deletions(-) create mode 100644 versions/changelogs/0_12_2.md diff --git a/versions/changelogs/0_12_2.md b/versions/changelogs/0_12_2.md new file mode 100644 index 000000000..dea8db353 --- /dev/null +++ b/versions/changelogs/0_12_2.md @@ -0,0 +1,9 @@ +### Changes + +### Features + +- one time migration script to upgrade legacy meta data (#1182) + +### Bugs + +- truncate keywords/tags to 40 characters (#1182) diff --git a/versions/changelogs/current.md b/versions/changelogs/current.md index 9ee2b017d..3c331ce65 100644 --- a/versions/changelogs/current.md +++ b/versions/changelogs/current.md @@ -2,8 +2,5 @@ ### Features -- one time migration script to upgrade legacy meta data - ### Bugs -- truncate keywords/tags to 40 characters