OpenLiberty · shin19991207 · Jul 24, 2024 · Jul 25, 2024 · Aug 9, 2024 · Sep 12, 2024
diff --git a/pr-checker/rules.json → pr-checker/adoc_rules.json b/pr-checker/rules.json → pr-checker/adoc_rules.json
diff --git a/pr-checker/checker.py b/pr-checker/checker.py
@@ -1,6 +1,7 @@
 import argparse
 import sys
 from datetime import date, datetime
+from lxml import etree
 import re
 import json
 import os
@@ -28,10 +29,10 @@ def adoc_checker(file, valid_tags, rules):
 
     release_date_re = re.compile(
         ":page-releasedate:[ ]*([0-9]{4}[-][0-9]{2}[-][0-9]{2})")
-    tags_re = re.compile(":page-tags: *\[(.*)\]")
+    tags_re = re.compile(r":page-tags: *\[(.*)\]")
     list_re = re.compile("^- ")
     file_tags_re = re.compile("^.*(hide_tags=).*(tags=).*$")
-    hotspot_re = re.compile("\[(hotspot(=[^ =\n]+)? ?)+( file(=[0-9]+)?)?\]`[^`\n]*`")
+    hotspot_re = re.compile(r"\[(hotspot(=[^ =\n]+)? ?)+( file(=[0-9]+)?)?\]`[^`\n]*`")
 
     skip_list = os.environ.get('SKIP_LIST')
     print(f"SKIP_LIST={skip_list}\n");
@@ -133,6 +134,128 @@ def check_vocabulary(file, deny_list, warning_list):
     return output
 
 
+def pom_xml_checker(file, guide_id, rules):
+    """
+    Checks if the artifactId in pom.xml starts with the guide's ID
+    """
+    guide_id = "guide-" + guide_id[len("guide-"):]
+    output = ''
+
+    path_parts = file.split(os.sep)  # Split the path by directory separator
+
+    # Check if it's a direct pom.xml in the root of finish/start
+    if path_parts[-2] in rules['main_directory']:
+        # Base module (finish/pom.xml or start/pom.xml)
+        expected_artifact_id = guide_id
+    elif path_parts[-3] in rules['main_directory']:
+        # module (finish/xxx/pom.xml or start/xxx/pom.xml)
+        module_name = path_parts[-2]
+        expected_artifact_id = f"{guide_id}-{module_name}"
+    else:
+        output += f"[ERROR] Unrecognized path structure for {file}.\n"
+        return output
+
+    try:
+        tree = etree.parse(file)
+        root = tree.getroot()
+        namespace = root.nsmap.get(None)
+        namespaces = {'m': namespace} if namespace else {}
+        artifact_id_element = root.find('m:artifactId', namespaces=namespaces)
+        if artifact_id_element is None or artifact_id_element.text is None:
+            output += f"[ERROR] Project's artifactId not found.\n"
+            return output
+
+        artifact_id = artifact_id_element.text.strip()
+        line_number = artifact_id_element.sourceline
+        if artifact_id != expected_artifact_id:
+            output += f"[ERROR] [LINE {line_number}] The project's artifactId '{artifact_id}' does not match the expected '{expected_artifact_id}'.\n"
+
+    except:
+        e = sys.exc_info()[0]
+        print(f"something went wrong with {file} parsing", e)
+
+    return output
+
+
+def find_corresponding_pom_xml(server_file):
+    """
+    Given a server.xml path, finds the corresponding pom.xml path.
+    """    
+    path_parts = server_file.split(os.sep)
+
+    try:
+        src_index = path_parts.index('src')
+    except ValueError:
+        # 'src' not found in path; cannot determine pom.xml location
+        return None
+
+    module_root_dir = os.sep.join(path_parts[:src_index])
+    pom_xml_path = os.path.join(module_root_dir, 'pom.xml')
+    if os.path.exists(pom_xml_path):
+        return pom_xml_path
+    else:
+        return None
+
+
+def extract_artifact_id(pom_file):
+    """
+    Extracts the artifactId from the given pom.xml file.
+    """
+    try:
+        tree = etree.parse(pom_file)
+        root = tree.getroot()
+        namespace = root.nsmap.get(None)
+        namespaces = {'m': namespace} if namespace else {}
+
+        artifact_id_element = root.find('m:artifactId', namespaces=namespaces)
+        if artifact_id_element is not None and artifact_id_element.text:
+            return artifact_id_element.text.strip()
+    except:
+        e = sys.exc_info()[0]
+        print(f"something went wrong with {pom_file} parsing", e)
+
+    return None
+
+
+def server_xml_checker(file):
+    """
+    Checks if the location attribute in server.xml's webApplication element
+    matches the artifactId from pom.xml.
+    """
+    output = ''
+
+    try:
+        tree = etree.parse(file)
+        root = tree.getroot()
+
+        # Find the webApplication element
+        web_app_element = root.find(".//webApplication")
+        if web_app_element is None:
+            output += f"[ERROR] webApplication element not found.\n"
+            return output
+
+        # Get the location attribute
+        location = web_app_element.get("location")
+        if location is None:
+            line_number = web_app_element.sourceline
+            output += f"[ERROR] [LINE {line_number}] location attribute not found in the webApplication element.\n"
+            return output
+
+        pom_file = find_corresponding_pom_xml(file)
+        artifact_id = extract_artifact_id(pom_file)
+        # Expected location is the artifactId with .war extension
+        expected_location = f"{artifact_id}.war"
+
+        if location != expected_location:
+            line_number = web_app_element.sourceline
+            output += f"[ERROR] [LINE {line_number}] webApplication location '{location}' does not match the expected '{expected_location}'.\n"
+    except:
+        e = sys.exc_info()[0]
+        print(f"something went wrong with {file} parsing", e)
+
+    return output
+
+
 if __name__ == "__main__":
     parser = argparse.ArgumentParser()
     parser.add_argument('--deny', nargs=1,
@@ -141,7 +264,9 @@ def check_vocabulary(file, deny_list, warning_list):
                         type=argparse.FileType('r'))
     parser.add_argument('--tags', nargs=1,
                         type=argparse.FileType('r'))
-    parser.add_argument('--rules', nargs=1,
+    parser.add_argument('--adoc-rules', nargs=1,
+                        type=argparse.FileType('r'))
+    parser.add_argument('--xml-rules', nargs=1,
                         type=argparse.FileType('r'))
     parser.add_argument('--repo', nargs=1, type=str)
     parser.add_argument('infile', nargs='*',
@@ -171,32 +296,60 @@ def check_vocabulary(file, deny_list, warning_list):
             e = sys.exc_info()[0]
             print("something went wrong with tags parsing", e)
             tags = []
-    if args.rules is not None and args.repo is not None:
+    if args.adoc_rules is not None and args.repo is not None:
         try:
             repo = args.repo[0].split('/')[-1]
-            rules = dict(map(lambda rule: (rule[0], {'check': repo not in rule[1]['exception'], 'log-level': rule[1]['log-level']}),
-                             json.loads(args.rules[0].read()).items()))
+            adoc_rules = dict(map(lambda adoc_rules: (adoc_rules[0], {'check': repo not in adoc_rules[1]['exception'], 'log-level': adoc_rules[1]['log-level']}),
+                             json.loads(args.adoc_rules[0].read()).items()))
         except:
             e = sys.exc_info()[0]
-            print("something went wrong with repo and rule parsing", e)
-            repo = ''
-            rules = {
+            print("something went wrong with repo and adoc-rules parsing", e)
+            adoc_rules = {
                 "license": {'check': True, 'log-level': 'ERROR'},
                 "release_date": {'check': True, 'log-level': 'ERROR'},
                 "page_tags": {'check': True, 'log-level': 'ERROR'},
                 "-": {'check': True, 'log-level': 'ERROR'},
                 "line-length": {'check': True, 'log-level': 'WARNING'},
                 "file_tags": {'check': True, 'log-level': 'ERROR'},
             }
+    if args.xml_rules is not None and args.repo is not None:
+        try:
+            repo = args.repo[0].split('/')[-1]
+            xml_rules = {"check": repo not in (xml_data := json.loads(args.xml_rules[0].read()))['skip'], 'main_directory': xml_data['main_directory']}
+        except:
+            e = sys.exc_info()[0]
+            print("something went wrong with repo and xml-rules parsing", e)
+            xml_rules = { "check" : False }
 
     file_extensions = map(lambda f: f.name.split(
         '/')[-1].split('.')[-1], args.infile)
     output = ''
 
     for i, f in enumerate(file_extensions):
         if f == 'adoc':
-            output += adoc_checker(args.infile[i], tags, rules)
-            output += check_vocabulary(args.infile[i], deny_list, warning_list)
+            result = adoc_checker(args.infile[i], tags, adoc_rules) + check_vocabulary(args.infile[i], deny_list, warning_list)
+            if result != '':
+                output += f"[INFO] File:[{args.infile[i].name}]\n{result}"
+
+    for i, file in enumerate(args.infile):
+        if os.path.basename(file.name) == 'pom.xml' and xml_rules["check"]:
+            result = pom_xml_checker(file.name, repo, xml_rules)
+            if result:
+                output += f"[INFO] File:[{file.name}]\n{result}"
+
+        if os.path.basename(file.name) == 'server.xml' and xml_rules["check"]:
+            path_parts = file.name.split(os.sep)
+            if path_parts[-2] == 'staging' or path_parts[-3] == 'staging':
+                continue
+            pom_xml_path = find_corresponding_pom_xml(file.name)
+            pom_xml_result = pom_xml_checker(pom_xml_path, repo, xml_rules)
+            if pom_xml_result:
+                output += f"[INFO] File:[{pom_xml_path}]\n{pom_xml_result}"
+                continue
+            server_xml_result = server_xml_checker(file.name)
+            if server_xml_result:
+                output += f"[INFO] File:[{file.name}]\n{server_xml_result}"
+
     if output != '':
         print(output.rstrip())
         if 'ERROR' in output:

diff --git a/pr-checker/checker.sh b/pr-checker/checker.sh
@@ -1,6 +1,8 @@
 #!/bin/sh
 set -o pipefail
 
+python3 -m pip install lxml
+
 repo=$1
 pr_number=$2
 
@@ -33,4 +35,4 @@ else
     echo "::set-output name=canSkip::false"
 fi
 
-python3 "$SCRIPTPATH"/checker.py --deny "$SCRIPTPATH"/deny_list.json --warn "$SCRIPTPATH"/warning_list.json --tags "$SCRIPTPATH"/../guide_tags.json --repo "$repo" --rules "$SCRIPTPATH"/rules.json $(echo $UPDATED_FILES | jq '.[]' | tr -d '"')
+python3 "$SCRIPTPATH"/checker.py --deny "$SCRIPTPATH"/deny_list.json --warn "$SCRIPTPATH"/warning_list.json --tags "$SCRIPTPATH"/../guide_tags.json --repo "$repo" --adoc-rules "$SCRIPTPATH"/adoc_rules.json --xml-rules "$SCRIPTPATH"/xml_rules.json $(echo $UPDATED_FILES | jq '.[]' | tr -d '"')
diff --git a/pr-checker/xml_rules.json b/pr-checker/xml_rules.json
@@ -0,0 +1,12 @@
+{
+    "main_directory": [
+        "start",
+        "finish",
+        "ibmmq"
+    ],
+    "skip": [
+      "guide-liberty-deep-dive",
+      "guide-liberty-deep-dive-gradle"
+    ]
+  }
+