Skip to content

Commit

Permalink
[py-tx] Fixes facebook#1659: Convert image mode I;16 to RGB (facebook…
Browse files Browse the repository at this point in the history
  • Loading branch information
haianhng31 authored Nov 1, 2024
1 parent ce85474 commit 9e067ec
Show file tree
Hide file tree
Showing 5 changed files with 46 additions and 9 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ def _convert_image_to_correct_array_dimension(image: Image.Image) -> np.ndarray:
"""
Handle possible image format conversion or
"""
if image.mode == "LA":
if image.mode == "LA" or image.mode == "I;16":
# LA images (luminance with alpha) return 3 dimensional ndarray
# which is incompatible with pdqhash
image = image.convert("RGB")
Expand Down
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,51 @@


class PDQHasherModuleUnitTest(unittest.TestCase):
def setUp(self):
"""Set up test images."""
self.test_files = {
# Grayscale with alpha channel
"la": {
"path": "threatexchange/tests/hashing/resources/LA.png",
"expected_pdq": "accb6d39648035f8125c8ce6ba65007de7b54c67a2d93ef7b8f33b0611306715",
"expected_quality": 100,
},
# 16-bit grayscale
"i16": {
"path": "threatexchange/tests/hashing/resources/I16.png",
"expected_pdq": "de2ef0e99ecdfc1d248a0eb055f023d1d61e79c3920cbb55d561c02accab1763",
"expected_quality": 36,
},
# Standard RGB test
"rgb": {
"path": "threatexchange/tests/hashing/resources/rgb.jpeg",
"expected_pdq": "fb4eed46cb8a6c78819ca06b756c541f7b07ef6d02c82fccd00f862166272cda",
"expected_quality": 100,
},
}

def test_pdq_from_file_different_formats(self):
"""Test PDQ hash computation from files of different formats."""
for format_name, test_data in self.test_files.items():
with self.subTest(format=format_name):
file_path = pathlib.Path(test_data["path"])
if file_path.exists():
pdq_hash, pdq_quality = pdq_hasher.pdq_from_file(file_path)
assert pdq_hash == test_data["expected_pdq"]
assert pdq_quality == test_data["expected_quality"]

def test_pdq_from_bytes_different_formats(self):
"""Test PDQ hash computation from bytes of different formats."""
for format_name, test_data in self.test_files.items():
with self.subTest(format=format_name):
file_path = pathlib.Path(test_data["path"])
if file_path.exists():
with open(file_path, "rb") as f:
bytes_data = f.read()
pdq_hash, pdq_quality = pdq_hasher.pdq_from_bytes(bytes_data)
assert pdq_hash == test_data["expected_pdq"]
assert pdq_quality == test_data["expected_quality"]

def test_pdq_from_file(self):
"""Writes a few bytes to a file and runs the pdq hasher on it."""
with tempfile.NamedTemporaryFile("w+b") as f:
Expand All @@ -63,11 +108,3 @@ def test_pdq_from_bytes(self):
bytes_ = base64.b64decode(RANDOM_IMAGE_BASE64)
pdq_hash = pdq_hasher.pdq_from_bytes(bytes_)[0]
assert pdq_hash == RANDOM_IMAGE_PDQ

def test_pdq_from_file_la_png(self):
file_path = pathlib.Path("threatexchange/tests/hashing/resources/LA.png")
expected_pdq_output = (
"5dbc6c369dc4476538bcf307e61d80994c5ed0a5790efaf999bc499ad3b02421",
100,
)
assert pdq_hasher.pdq_from_file(file_path) == expected_pdq_output

0 comments on commit 9e067ec

Please sign in to comment.