From 2d2947597d0815f85f52928a353d7e063fdca05b Mon Sep 17 00:00:00 2001 From: juk0de <5322305+juk0de@users.noreply.github.com> Date: Sun, 29 Sep 2024 11:43:41 +0200 Subject: [PATCH] mtf2json: added option '--statistics' --- mtf2json/__init__.py | 2 +- mtf2json/cli.py | 45 ++++++++++++++++++++++++++++++++++++++++++-- mtf2json/mtf2json.py | 29 +++++++++++++++++++++++++--- 3 files changed, 70 insertions(+), 6 deletions(-) diff --git a/mtf2json/__init__.py b/mtf2json/__init__.py index 37c80f5..0506abf 100644 --- a/mtf2json/__init__.py +++ b/mtf2json/__init__.py @@ -1,3 +1,3 @@ # this enables direct import from 'mtf2json' (instead of 'mtf2json.mtf2json') -from .mtf2json import read_mtf, write_json, version, mm_commit # noqa +from .mtf2json import read_mtf, write_json, version, mm_commit, statistics # noqa from .error import ConversionError # noqa diff --git a/mtf2json/cli.py b/mtf2json/cli.py index c3fd546..3c4f4db 100644 --- a/mtf2json/cli.py +++ b/mtf2json/cli.py @@ -19,7 +19,14 @@ import argparse from pathlib import Path import os -from .mtf2json import read_mtf, write_json, ConversionError, version, mm_commit +from .mtf2json import ( + read_mtf, + write_json, + ConversionError, + version, + mm_commit, + statistics, +) from typing import Optional, List, Tuple @@ -78,6 +85,12 @@ def create_parser() -> argparse.ArgumentParser: action="store_true", help="Recursively convert MTF files in subdirectories.", ) + parser.add_argument( + "--statistics", + "-s", + action="store_true", + help="Print statistics after the conversion.", + ) parser.add_argument( "--ignore-errors", "-i", @@ -147,6 +160,29 @@ def convert_dir( return 1 if error_occured else 0 +def print_statistics() -> None: + """ + Print conversion statistics. + """ + + def do_print(category_dict: dict[str, list[str]]) -> None: + if len(category_dict) == 0: + print(" NONE") + else: + for key, filenames in category_dict.items(): + print(f"> '{key}':") + for filename in filenames: + print(f" {filename}") + + print("=== STATISTICS ===") + print("= Unknown keys =") + do_print(statistics["unknown_keys"]) + print("\n= Keys with empty values =") + do_print(statistics["empty_value_keys"]) + print("\n= Lines without keys (except known special cases) =") + do_print(statistics["no_key_lines"]) + + def main() -> None: parser = create_parser() args = parser.parse_args() @@ -208,12 +244,17 @@ def main() -> None: sys.exit(1) else: print(json.dumps(data)) + if args.statistics: + print_statistics() # convert all MTF files in given directory if args.mtf_dir: mtf_dir = Path(args.mtf_dir) json_dir = Path(args.json_dir) if args.json_dir else None - sys.exit(convert_dir(mtf_dir, json_dir, args.recursive, args.ignore_errors)) + res = convert_dir(mtf_dir, json_dir, args.recursive, args.ignore_errors) + if args.statistics: + print_statistics() + sys.exit(res) if __name__ == "__main__": diff --git a/mtf2json/mtf2json.py b/mtf2json/mtf2json.py index d916bfa..e706c68 100755 --- a/mtf2json/mtf2json.py +++ b/mtf2json/mtf2json.py @@ -119,6 +119,23 @@ # even if they can sometimes be numbers string_keys = ["model"] +# dict for the '--statistics' option +statistics: dict[str, dict[str, list[str]]] = { + "unknown_keys": {}, + "empty_value_keys": {}, + "no_key_lines": {}, +} + + +def __add_statistics(category: str, key: str, file: str): + """ + Add given entry and file to the given statistics category. + """ + if key not in statistics[category]: + statistics[category][key] = [] + if file not in statistics[category][key]: + statistics[category][key].append(file) + # decoder that catches utf8 decoding errors and switches to cp1252 def mixed_decoder(error: UnicodeError) -> tuple[str, int]: @@ -201,7 +218,9 @@ def __check_compat(file: TextIO) -> None: file.seek(0) -def __read_line(file: TextIO, verbose: bool = False) -> Iterator[tuple[str, str, str]]: +def __read_line( + file: TextIO, filename: str, verbose: bool = False +) -> Iterator[tuple[str, str, str]]: """ A generator that reads the next line and returns (key, value, section). The value may be empty. This can be because of an empty value in the MTF file, @@ -249,7 +268,8 @@ def __read_line(file: TextIO, verbose: bool = False) -> Iterator[tuple[str, str, # -> fixes #14 and similar issues if not __key_is_known(key): if verbose: - print(f"> detected line with unkown key '{key}', skipping it") + print(f"> detected line with unknown key '{key}', skipping it") + __add_statistics("unknown_keys", key, filename) continue elif key == "armor" or key in armor_location_keys: section = "armor" @@ -271,6 +291,8 @@ def __read_line(file: TextIO, verbose: bool = False) -> Iterator[tuple[str, str, print( f"> detected key, value and section: ['{key}', '{value}', '{section}']" ) + if value == "" and section not in ["weapons", "armor", "critical_slots"]: + __add_statistics("empty_value_keys", key, filename) yield (key, value, section) continue else: @@ -294,6 +316,7 @@ def __read_line(file: TextIO, verbose: bool = False) -> Iterator[tuple[str, str, print( "> line contains no key and is no known special case, skipping it" ) + __add_statistics("no_key_lines", line, filename) continue return None @@ -307,7 +330,7 @@ def read_mtf(path: Path, verbose: bool = False) -> dict[str, Any]: with open(path, "r", encoding="utf8", errors="mixed") as file: __check_compat(file) mech_data["mtf2json"] = version - for key, value, section in __read_line(file, verbose): + for key, value, section in __read_line(file, path.name, verbose): # = rules level = if key == "rules_level": add_rules_level(value, mech_data)