From 2d2947597d0815f85f52928a353d7e063fdca05b Mon Sep 17 00:00:00 2001
From: juk0de <5322305+juk0de@users.noreply.github.com>
Date: Sun, 29 Sep 2024 11:43:41 +0200
Subject: [PATCH] mtf2json: added option '--statistics'

---
 mtf2json/__init__.py |  2 +-
 mtf2json/cli.py      | 45 ++++++++++++++++++++++++++++++++++++++++++--
 mtf2json/mtf2json.py | 29 +++++++++++++++++++++++++---
 3 files changed, 70 insertions(+), 6 deletions(-)

diff --git a/mtf2json/__init__.py b/mtf2json/__init__.py
index 37c80f5..0506abf 100644
--- a/mtf2json/__init__.py
+++ b/mtf2json/__init__.py
@@ -1,3 +1,3 @@
 # this enables direct import from 'mtf2json' (instead of 'mtf2json.mtf2json')
-from .mtf2json import read_mtf, write_json, version, mm_commit  # noqa
+from .mtf2json import read_mtf, write_json, version, mm_commit, statistics  # noqa
 from .error import ConversionError  # noqa
diff --git a/mtf2json/cli.py b/mtf2json/cli.py
index c3fd546..3c4f4db 100644
--- a/mtf2json/cli.py
+++ b/mtf2json/cli.py
@@ -19,7 +19,14 @@
 import argparse
 from pathlib import Path
 import os
-from .mtf2json import read_mtf, write_json, ConversionError, version, mm_commit
+from .mtf2json import (
+    read_mtf,
+    write_json,
+    ConversionError,
+    version,
+    mm_commit,
+    statistics,
+)
 from typing import Optional, List, Tuple
 
 
@@ -78,6 +85,12 @@ def create_parser() -> argparse.ArgumentParser:
         action="store_true",
         help="Recursively convert MTF files in subdirectories.",
     )
+    parser.add_argument(
+        "--statistics",
+        "-s",
+        action="store_true",
+        help="Print statistics after the conversion.",
+    )
     parser.add_argument(
         "--ignore-errors",
         "-i",
@@ -147,6 +160,29 @@ def convert_dir(
     return 1 if error_occured else 0
 
 
+def print_statistics() -> None:
+    """
+    Print conversion statistics.
+    """
+
+    def do_print(category_dict: dict[str, list[str]]) -> None:
+        if len(category_dict) == 0:
+            print("  NONE")
+        else:
+            for key, filenames in category_dict.items():
+                print(f"> '{key}':")
+                for filename in filenames:
+                    print(f"  {filename}")
+
+    print("=== STATISTICS ===")
+    print("= Unknown keys =")
+    do_print(statistics["unknown_keys"])
+    print("\n= Keys with empty values =")
+    do_print(statistics["empty_value_keys"])
+    print("\n= Lines without keys (except known special cases) =")
+    do_print(statistics["no_key_lines"])
+
+
 def main() -> None:
     parser = create_parser()
     args = parser.parse_args()
@@ -208,12 +244,17 @@ def main() -> None:
                     sys.exit(1)
             else:
                 print(json.dumps(data))
+        if args.statistics:
+            print_statistics()
 
     # convert all MTF files in given directory
     if args.mtf_dir:
         mtf_dir = Path(args.mtf_dir)
         json_dir = Path(args.json_dir) if args.json_dir else None
-        sys.exit(convert_dir(mtf_dir, json_dir, args.recursive, args.ignore_errors))
+        res = convert_dir(mtf_dir, json_dir, args.recursive, args.ignore_errors)
+        if args.statistics:
+            print_statistics()
+        sys.exit(res)
 
 
 if __name__ == "__main__":
diff --git a/mtf2json/mtf2json.py b/mtf2json/mtf2json.py
index d916bfa..e706c68 100755
--- a/mtf2json/mtf2json.py
+++ b/mtf2json/mtf2json.py
@@ -119,6 +119,23 @@
 # even if they can sometimes be numbers
 string_keys = ["model"]
 
+# dict for the '--statistics' option
+statistics: dict[str, dict[str, list[str]]] = {
+    "unknown_keys": {},
+    "empty_value_keys": {},
+    "no_key_lines": {},
+}
+
+
+def __add_statistics(category: str, key: str, file: str):
+    """
+    Add given entry and file to the given statistics category.
+    """
+    if key not in statistics[category]:
+        statistics[category][key] = []
+    if file not in statistics[category][key]:
+        statistics[category][key].append(file)
+
 
 # decoder that catches utf8 decoding errors and switches to cp1252
 def mixed_decoder(error: UnicodeError) -> tuple[str, int]:
@@ -201,7 +218,9 @@ def __check_compat(file: TextIO) -> None:
     file.seek(0)
 
 
-def __read_line(file: TextIO, verbose: bool = False) -> Iterator[tuple[str, str, str]]:
+def __read_line(
+    file: TextIO, filename: str, verbose: bool = False
+) -> Iterator[tuple[str, str, str]]:
     """
     A generator that reads the next line and returns (key, value, section).
     The value may be empty. This can be because of an empty value in the MTF file,
@@ -249,7 +268,8 @@ def __read_line(file: TextIO, verbose: bool = False) -> Iterator[tuple[str, str,
             # -> fixes #14 and similar issues
             if not __key_is_known(key):
                 if verbose:
-                    print(f"> detected line with unkown key '{key}', skipping it")
+                    print(f"> detected line with unknown key '{key}', skipping it")
+                __add_statistics("unknown_keys", key, filename)
                 continue
             elif key == "armor" or key in armor_location_keys:
                 section = "armor"
@@ -271,6 +291,8 @@ def __read_line(file: TextIO, verbose: bool = False) -> Iterator[tuple[str, str,
                 print(
                     f"> detected key, value and section: ['{key}', '{value}', '{section}']"
                 )
+            if value == "" and section not in ["weapons", "armor", "critical_slots"]:
+                __add_statistics("empty_value_keys", key, filename)
             yield (key, value, section)
             continue
         else:
@@ -294,6 +316,7 @@ def __read_line(file: TextIO, verbose: bool = False) -> Iterator[tuple[str, str,
                     print(
                         "> line contains no key and is no known special case, skipping it"
                     )
+                __add_statistics("no_key_lines", line, filename)
                 continue
     return None
 
@@ -307,7 +330,7 @@ def read_mtf(path: Path, verbose: bool = False) -> dict[str, Any]:
     with open(path, "r", encoding="utf8", errors="mixed") as file:
         __check_compat(file)
         mech_data["mtf2json"] = version
-        for key, value, section in __read_line(file, verbose):
+        for key, value, section in __read_line(file, path.name, verbose):
             # = rules level =
             if key == "rules_level":
                 add_rules_level(value, mech_data)