Chnages to runner for dev builds, minor result scripts

secure-software-engineering · Jul 27, 2023 · c1dee69 · c1dee69
1 parent baf1444
commit c1dee69
Show file tree

Hide file tree

Showing 5 changed files with 116 additions and 53 deletions.
diff --git a/scripts/benchmark_count.py b/scripts/benchmark_count.py
@@ -8,13 +8,17 @@
 
 total_annotations = 0
 total_types = 0
+total_col = 0
 for json_file in json_files:
     with open(json_file, "r") as f:
         data = json.load(f)
         total_annotations += len(data)
         for _t in data:
             total_types += len(_t["type"])
+            if _t.get("col_offset"):
+                total_col += 1
 
 print(f"Total Python files: {len(python_files)}")
 print(f"Total annotations: {total_annotations}")
 print(f"Total types in annotations: {total_types}")
+print(f"Total col_offset in annotations: {total_col}/{total_types}")
diff --git a/scripts/get_identifiers.py b/scripts/get_identifiers.py
@@ -3,6 +3,46 @@
 from pathlib import Path
 
 
+class NodeVisitor(ast.NodeVisitor):
+    def visit_FunctionDef(self, node):
+        print(
+            f"Function: {node.name} -> Line: {node.lineno}, Column:"
+            f" {node.col_offset + 5}"
+        )
+        for arg in node.args.args:
+            if arg.arg == "self":
+                continue
+            print(
+                f"Parameter: {arg.arg} -> Line: {arg.lineno}, Column:"
+                f" {arg.col_offset + 1}"
+            )
+
+        self.generic_visit(node)
+
+    def visit_Assign(self, node):
+        for target in node.targets:
+            if isinstance(target, ast.Name):
+                print(
+                    f"Local variable: {target.id} -> Line: {target.lineno}, Column:"
+                    f" {target.col_offset + 1}"
+                )
+        self.generic_visit(node)
+
+    def visit_Lambda(self, node):
+        print(f"Lambda function -> Line: {node.lineno}, Column: {node.col_offset + 1}")
+        for arg in node.args.args:
+            print(
+                f"Parameter: {arg.arg} -> Line: {arg.lineno}, Column:"
+                f" {arg.col_offset+1}"
+            )
+        self.generic_visit(node)
+
+
+def parse_python_code(code):
+    tree = ast.parse(code)
+    NodeVisitor().visit(tree)
+
+
 def describe_symtable(st, recursive=True, indent=0):
     def print_d(s, *args):
         prefix = " " * indent
@@ -17,28 +57,29 @@ def print_d(s, *args):
             describe_symtable(child_st, recursive, indent + 5)
 
 
-in_py_path = "../micro-benchmark/python_features/functions"
+in_py_path = "../micro-benchmark/python_features"
 for _file in sorted(Path(in_py_path).rglob("*.py")):
     # Create a dictionary to store line numbers for identifiers
-    identifiers = {}
+    # identifiers = {}
     print(_file.parts[-4:])
-    describe_symtable(
-        symtable.symtable(_file.read_text(), _file.name, compile_type="exec")
-    )
-
-    # Parse the code into an AST (Abstract Syntax Tree)
-    tree = ast.parse(_file.read_text())
-
-    # Traverse the AST and extract line numbers for identifiers
-    for node in ast.walk(tree):
-        if isinstance(node, ast.Name):
-            identifier = node.id
-            lineno = node.lineno
-            if identifier in identifiers:
-                identifiers[identifier].append(lineno)
-            else:
-                identifiers[identifier] = [lineno]
+    # describe_symtable(
+    #     symtable.symtable(_file.read_text(), _file.name, compile_type="exec")
+    # )
+
+    # # Parse the code into an AST (Abstract Syntax Tree)
+    # tree = ast.parse(_file.read_text())
+
+    # # Traverse the AST and extract line numbers for identifiers
+    # for node in ast.walk(tree):
+    #     if isinstance(node, ast.Name):
+    #         identifier = node.id
+    #         lineno = node.lineno
+    #         if identifier in identifiers:
+    #             identifiers[identifier].append(lineno)
+    #         else:
+    #             identifiers[identifier] = [lineno]
 
     print("\n")
-    print(identifiers)
+    parse_python_code(_file.read_text())
+    # print(identifiers)
     print("\n\n###########################\n\n")
diff --git a/src/main_runner.py b/src/main_runner.py
@@ -56,31 +56,38 @@ def _list_python_files(self, directory):
 
 
 class TypeEvalPyRunner:
-    def __init__(self, tool_name, dockerfile_path, host_results_path):
+    def __init__(
+        self,
+        tool_name,
+        dockerfile_path,
+        host_results_path,
+        dockerfile_name="Dockerfile",
+        volumes={},
+    ):
         self.docker_client = docker.from_env()
         self.tool_name = tool_name
         self.dockerfile_path = dockerfile_path
-        self.dockerfile_name = tool_name
+        self.dockerfile_name = dockerfile_name
         self.test_runner_script_path = f"/tmp/src/runner.py"
         self.host_results_path = host_results_path
+        self.volumes = volumes
 
         if not os.path.exists(self.host_results_path):
             os.makedirs(self.host_results_path)
 
     def _build_docker_image(self):
         logger.info("Building image")
         image, _ = self.docker_client.images.build(
-            path=self.dockerfile_path, tag=self.dockerfile_name
+            path=self.dockerfile_path,
+            tag=self.tool_name,
+            dockerfile=self.dockerfile_name,
         )
         return image
 
     def spawn_docker_instance(self):
         logger.info("Creating container")
         container = self.docker_client.containers.run(
-            self.dockerfile_name,
-            detach=True,
-            stdin_open=True,
-            tty=True,
+            self.tool_name, detach=True, stdin_open=True, tty=True, volumes=self.volumes
         )
         return container
 
@@ -164,8 +171,21 @@ def run_tool_test(self):
 
 
 class HeaderGenRunner(TypeEvalPyRunner):
-    def __init__(self, host_results_path):
-        super().__init__("headergen", "./target_tools/headergen", host_results_path)
+    def __init__(self, host_results_path, debug=False):
+        if debug:
+            super().__init__(
+                "headergen_dev",
+                "./target_tools/headergen",
+                host_results_path,
+                volumes={
+                    "/mnt/Projects/PhD/Research/HeaderGen/git_sources/HeaderGen_github/": {
+                        "bind": "/app/HeaderGen",
+                        "mode": "ro",
+                    }
+                },
+            )
+        else:
+            super().__init__("headergen", "./target_tools/headergen", host_results_path)
 
     def run_tool_test(self):
         self._run_test_in_session()
@@ -194,7 +214,7 @@ def spawn_docker_instance(self):
 def main():
     host_results_path = f"./results_{datetime.now().strftime('%d-%m %H:%M')}"
 
-    runner = HeaderGenRunner(host_results_path)
+    runner = HeaderGenRunner(host_results_path, debug=1)
     runner.run_tool_test()
 
     # runner = Type4pyRunner(host_results_path)
@@ -218,6 +238,8 @@ def main():
     # runner = HityperRunner(host_results_path)
     # runner.run_tool_test()
 
+    os.rename("main_runner.log", f"{str(host_results_path)}/main_runner.log")
+
 
 if __name__ == "__main__":
     main()
diff --git a/src/result_analysis_scripts/main_analyze_results.py b/src/result_analysis_scripts/main_analyze_results.py
@@ -15,12 +15,18 @@
 file_handler = logging.FileHandler("results_analysis.log")
 file_handler.setLevel(logging.DEBUG)
 
+file_handler_info = logging.FileHandler("results_analysis_info.log")
+file_handler_info.setLevel(logging.INFO)
+
 console_handler = logging.StreamHandler()
 console_handler.setLevel(logging.INFO)
 formatter = logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s")
 file_handler.setFormatter(formatter)
+file_handler_info.setFormatter(formatter)
+
 console_handler.setFormatter(formatter)
 logger.addHandler(file_handler)
+logger.addHandler(file_handler_info)
 logger.addHandler(console_handler)
 
 logger.info("Result Analysis Started\n")
@@ -50,29 +56,16 @@ def compare_json_files(expected, out):
         out_fact_mismatch = None
         for fact_out in data_out:
             # Get full matches
-            if fact_expected == fact_out:
+            if utils.check_match(expected=fact_expected, out=fact_out):
                 total_matches += 1
                 out_fact_matched = True
                 break
             # Check if everything else matches except "type"
-            elif (fact_expected.keys() == fact_out.keys()) and all(
-                [
-                    fact_expected.get(x) == fact_out.get(x)
-                    for x in fact_expected.keys()
-                    if x != "type"
-                ]
+            elif utils.check_match(
+                expected=fact_expected, out=fact_out, partial_match=True
             ):
-                for _type in fact_expected.get("type", []):
-                    if _type in fact_out.get("type", []):
-                        partial_matches += 1
-                        partial_matches_list.append(fact_expected)
-                        break
-
-                if "any" in fact_out.get("type", []):
-                    marked_as_any += 1
-                else:
-                    mismatch += 1
-                    # logger.info("Total mismatch?")
+                partial_matches += 1
+                partial_matches_list.append(fact_expected)
 
                 out_fact_mismatch_list.append(fact_expected)
                 out_fact_mismatch = fact_out.get("type", [])
@@ -175,8 +168,8 @@ def process_cat_dir(cat_dir, tool_name=None):
     cat_recall_results_grouped = {}
 
     for root, dirs, files in os.walk(cat_dir):
-        # logger.info(files)
         test_files = [x.split(".py")[0] for x in files if x.endswith(".py")]
+        logger.debug(root)
         for test in test_files:
             if f"{test}_gt.json" in files:
                 file_count += 1
@@ -216,8 +209,8 @@ def process_cat_dir(cat_dir, tool_name=None):
                         f"{os.path.basename(os.path.dirname(gt_file))}:{test}"
                     ] = cat_recall_grouped
 
-                    # logger.debug("Missing Matches:")
-                    # logger.debug(json.dumps(results["missing_matches"], indent=4))
+                    logger.debug("Missing Matches:")
+                    logger.debug(json.dumps(results["missing_matches"], indent=4))
 
                     dir_path = os.path.relpath(os.path.dirname(gt_file), cat_dir)
                     file_name = dir_path + "/" + os.path.basename(gt_file)
@@ -559,8 +552,8 @@ def generate_top_n_performance(test_suite_dir, tool_name=None):
 
 
 if __name__ == "__main__":
-    # results_dir = Path("./results_analysis_tests")
     results_dir = None
+    # results_dir = Path("./results_26-07 22:49")
     if results_dir is None:
         dir_path = Path("../")
         directories = [
@@ -607,3 +600,6 @@ def generate_top_n_performance(test_suite_dir, tool_name=None):
 
     # Move logs
     os.rename("results_analysis.log", f"{str(results_dir)}/results_analysis.log")
+    os.rename(
+        "results_analysis_info.log", f"{str(results_dir)}/results_analysis_info.log"
+    )
diff --git a/src/target_tools/headergen/src/runner.py b/src/target_tools/headergen/src/runner.py
@@ -30,7 +30,7 @@ def list_python_files(folder_path):
 
 
 def process_file(file_path):
-    analysis_meta = headergen.start_headergen(file_path, "/tmp", debug_mode=True)
+    analysis_meta = headergen.get_analysis_output(file_path, "/tmp")
     return analysis_meta
 
 
@@ -48,7 +48,7 @@ def main_runner(args):
             json_file_path = str(file).replace(".py", "_result.json")
 
             with open(json_file_path, "w") as json_file:
-                inferred_serializable = inferred["analysis_info"]["types_formatted"]
+                inferred_serializable = inferred["types_formatted"]
                 json.dump(inferred_serializable, json_file, sort_keys=True, indent=4)
 
         except Exception as e: