RE solution thanks to Gaurav Nelson

openshift · Jan 22, 2025 · 353c558 · 353c558
1 parent adaf15d
commit 353c558
Showing 1 changed file with 27 additions and 19 deletions.
diff --git a/build_for_portal.py b/build_for_portal.py
@@ -627,28 +627,36 @@ def detect_images(content, imagefiles):
     Adds the filenames to the imagefiles set
     Does NOT control for false positives such as commented out content,
         because "false negatives" are worse
-    """
-    for content_str in content:
-        workstr = content_str
-        pos = workstr.find("image:")
-
-        while pos>=0:
-            # discard everything until the end of the substring
-            workstr = workstr[pos+6:] # 6 is the length of "image:"
-            workstr = workstr.strip(":") # remove any additional : such as the one in image:: ; any trailing : is lost but that is unimportant
 
-            # if "[" is not found, then this is not an image reference,
-            # because an image reference always has "[" right after the file name
-            bracket_pos = workstr.find("[")
-            if bracket_pos<0: break # if there are no more [ characters, there are no more images
-
-            image_candidate = workstr[:bracket_pos]
-            workstr = workstr[bracket_pos:]
+    TEMPORARY: use both procedural and RE detection and report any misalignment
+    """
+    image_pattern = re.compile(r'image::?([^\s\[]+)\[.*?\]')
 
-            if image_candidate.find(" ")<0: # if the candidate contains a space it's not an image file
-                imagefiles.add(os.path.basename(image_candidate))
+    for content_str in content:
+        imagefiles.update({os.path.basename(f) for f in image_pattern.findall(content_str)})
+
+        # NON RE SOLUTION COMMENTED OUT
+        # workstr = content_str
+        # pos = workstr.find("image:")
+        #
+        # while pos>=0:
+        #     # discard everything until the end of the substring
+        #     workstr = workstr[pos+6:] # 6 is the length of "image:"
+        #     workstr = workstr.strip(":") # remove any additional : such as the one in image:: ; any trailing : is lost but that is unimportant
+        #
+        #     # if "[" is not found, then this is not an image reference,
+        #     # because an image reference always has "[" right after the file name
+        #     bracket_pos = workstr.find("[")
+        #     if bracket_pos<0: break # if there are no more [ characters, there are no more images
+        #
+        #     image_candidate = workstr[:bracket_pos]
+        #     workstr = workstr[bracket_pos:]
+        #
+        #     if image_candidate.find(" ")<0: # if the candidate contains a space it's not an image file
+        #         imagefiles.add(os.path.basename(image_candidate))
+        #         string_results.add(image_candidate)
+        #     pos = workstr.find("image:")
 
-            pos = workstr.find("images/")