Skip to content

Commit

Permalink
RE solution thanks to Gaurav Nelson
Browse files Browse the repository at this point in the history
  • Loading branch information
mramendi committed Jan 22, 2025
1 parent adaf15d commit 353c558
Showing 1 changed file with 27 additions and 19 deletions.
46 changes: 27 additions & 19 deletions build_for_portal.py
Original file line number Diff line number Diff line change
Expand Up @@ -627,28 +627,36 @@ def detect_images(content, imagefiles):
Adds the filenames to the imagefiles set
Does NOT control for false positives such as commented out content,
because "false negatives" are worse
"""
for content_str in content:
workstr = content_str
pos = workstr.find("image:")

while pos>=0:
# discard everything until the end of the substring
workstr = workstr[pos+6:] # 6 is the length of "image:"
workstr = workstr.strip(":") # remove any additional : such as the one in image:: ; any trailing : is lost but that is unimportant
# if "[" is not found, then this is not an image reference,
# because an image reference always has "[" right after the file name
bracket_pos = workstr.find("[")
if bracket_pos<0: break # if there are no more [ characters, there are no more images

image_candidate = workstr[:bracket_pos]
workstr = workstr[bracket_pos:]
TEMPORARY: use both procedural and RE detection and report any misalignment
"""
image_pattern = re.compile(r'image::?([^\s\[]+)\[.*?\]')

if image_candidate.find(" ")<0: # if the candidate contains a space it's not an image file
imagefiles.add(os.path.basename(image_candidate))
for content_str in content:
imagefiles.update({os.path.basename(f) for f in image_pattern.findall(content_str)})

# NON RE SOLUTION COMMENTED OUT
# workstr = content_str
# pos = workstr.find("image:")
#
# while pos>=0:
# # discard everything until the end of the substring
# workstr = workstr[pos+6:] # 6 is the length of "image:"
# workstr = workstr.strip(":") # remove any additional : such as the one in image:: ; any trailing : is lost but that is unimportant
#
# # if "[" is not found, then this is not an image reference,
# # because an image reference always has "[" right after the file name
# bracket_pos = workstr.find("[")
# if bracket_pos<0: break # if there are no more [ characters, there are no more images
#
# image_candidate = workstr[:bracket_pos]
# workstr = workstr[bracket_pos:]
#
# if image_candidate.find(" ")<0: # if the candidate contains a space it's not an image file
# imagefiles.add(os.path.basename(image_candidate))
# string_results.add(image_candidate)
# pos = workstr.find("image:")

pos = workstr.find("images/")



Expand Down

0 comments on commit 353c558

Please sign in to comment.