diff --git a/src/common/strings.py b/src/common/strings.py index 8d466c8..c63f175 100644 --- a/src/common/strings.py +++ b/src/common/strings.py @@ -9,3 +9,8 @@ def get_alphabetical_value(string: str) -> int: Example: 'COLIN' -> 53 = 3 + 15 + 12 + 9 + 14 """ return sum([(ord(char) - ord('a') + 1) for char in string.lower()]) + + +def sum_ascii_values(text: str) -> int: + """Sum the ASCII values of the given text `text`.""" + return sum(ord(character) for character in text) diff --git a/src/p059_xor_decryption/p059_cipher.txt b/src/p059_xor_decryption/p059_cipher.txt new file mode 100644 index 0000000..b3b3247 --- /dev/null +++ b/src/p059_xor_decryption/p059_cipher.txt @@ -0,0 +1 @@ +36,22,80,0,0,4,23,25,19,17,88,4,4,19,21,11,88,22,23,23,29,69,12,24,0,88,25,11,12,2,10,28,5,6,12,25,10,22,80,10,30,80,10,22,21,69,23,22,69,61,5,9,29,2,66,11,80,8,23,3,17,88,19,0,20,21,7,10,17,17,29,20,69,8,17,21,29,2,22,84,80,71,60,21,69,11,5,8,21,25,22,88,3,0,10,25,0,10,5,8,88,2,0,27,25,21,10,31,6,25,2,16,21,82,69,35,63,11,88,4,13,29,80,22,13,29,22,88,31,3,88,3,0,10,25,0,11,80,10,30,80,23,29,19,12,8,2,10,27,17,9,11,45,95,88,57,69,16,17,19,29,80,23,29,19,0,22,4,9,1,80,3,23,5,11,28,92,69,9,5,12,12,21,69,13,30,0,0,0,0,27,4,0,28,28,28,84,80,4,22,80,0,20,21,2,25,30,17,88,21,29,8,2,0,11,3,12,23,30,69,30,31,23,88,4,13,29,80,0,22,4,12,10,21,69,11,5,8,88,31,3,88,4,13,17,3,69,11,21,23,17,21,22,88,65,69,83,80,84,87,68,69,83,80,84,87,73,69,83,80,84,87,65,83,88,91,69,29,4,6,86,92,69,15,24,12,27,24,69,28,21,21,29,30,1,11,80,10,22,80,17,16,21,69,9,5,4,28,2,4,12,5,23,29,80,10,30,80,17,16,21,69,27,25,23,27,28,0,84,80,22,23,80,17,16,17,17,88,25,3,88,4,13,29,80,17,10,5,0,88,3,16,21,80,10,30,80,17,16,25,22,88,3,0,10,25,0,11,80,12,11,80,10,26,4,4,17,30,0,28,92,69,30,2,10,21,80,12,12,80,4,12,80,10,22,19,0,88,4,13,29,80,20,13,17,1,10,17,17,13,2,0,88,31,3,88,4,13,29,80,6,17,2,6,20,21,69,30,31,9,20,31,18,11,94,69,54,17,8,29,28,28,84,80,44,88,24,4,14,21,69,30,31,16,22,20,69,12,24,4,12,80,17,16,21,69,11,5,8,88,31,3,88,4,13,17,3,69,11,21,23,17,21,22,88,25,22,88,17,69,11,25,29,12,24,69,8,17,23,12,80,10,30,80,17,16,21,69,11,1,16,25,2,0,88,31,3,88,4,13,29,80,21,29,2,12,21,21,17,29,2,69,23,22,69,12,24,0,88,19,12,10,19,9,29,80,18,16,31,22,29,80,1,17,17,8,29,4,0,10,80,12,11,80,84,67,80,10,10,80,7,1,80,21,13,4,17,17,30,2,88,4,13,29,80,22,13,29,69,23,22,69,12,24,12,11,80,22,29,2,12,29,3,69,29,1,16,25,28,69,12,31,69,11,92,69,17,4,69,16,17,22,88,4,13,29,80,23,25,4,12,23,80,22,9,2,17,80,70,76,88,29,16,20,4,12,8,28,12,29,20,69,26,9,69,11,80,17,23,80,84,88,31,3,88,4,13,29,80,21,29,2,12,21,21,17,29,2,69,12,31,69,12,24,0,88,20,12,25,29,0,12,21,23,86,80,44,88,7,12,20,28,69,11,31,10,22,80,22,16,31,18,88,4,13,25,4,69,12,24,0,88,3,16,21,80,10,30,80,17,16,25,22,88,3,0,10,25,0,11,80,17,23,80,7,29,80,4,8,0,23,23,8,12,21,17,17,29,28,28,88,65,75,78,68,81,65,67,81,72,70,83,64,68,87,74,70,81,75,70,81,67,80,4,22,20,69,30,2,10,21,80,8,13,28,17,17,0,9,1,25,11,31,80,17,16,25,22,88,30,16,21,18,0,10,80,7,1,80,22,17,8,73,88,17,11,28,80,17,16,21,11,88,4,4,19,25,11,31,80,17,16,21,69,11,1,16,25,2,0,88,2,10,23,4,73,88,4,13,29,80,11,13,29,7,29,2,69,75,94,84,76,65,80,65,66,83,77,67,80,64,73,82,65,67,87,75,72,69,17,3,69,17,30,1,29,21,1,88,0,23,23,20,16,27,21,1,84,80,18,16,25,6,16,80,0,0,0,23,29,3,22,29,3,69,12,24,0,88,0,0,10,25,8,29,4,0,10,80,10,30,80,4,88,19,12,10,19,9,29,80,18,16,31,22,29,80,1,17,17,8,29,4,0,10,80,12,11,80,84,86,80,35,23,28,9,23,7,12,22,23,69,25,23,4,17,30,69,12,24,0,88,3,4,21,21,69,11,4,0,8,3,69,26,9,69,15,24,12,27,24,69,49,80,13,25,20,69,25,2,23,17,6,0,28,80,4,12,80,17,16,25,22,88,3,16,21,92,69,49,80,13,25,6,0,88,20,12,11,19,10,14,21,23,29,20,69,12,24,4,12,80,17,16,21,69,11,5,8,88,31,3,88,4,13,29,80,22,29,2,12,29,3,69,73,80,78,88,65,74,73,70,69,83,80,84,87,72,84,88,91,69,73,95,87,77,70,69,83,80,84,87,70,87,77,80,78,88,21,17,27,94,69,25,28,22,23,80,1,29,0,0,22,20,22,88,31,11,88,4,13,29,80,20,13,17,1,10,17,17,13,2,0,88,31,3,88,4,13,29,80,6,17,2,6,20,21,75,88,62,4,21,21,9,1,92,69,12,24,0,88,3,16,21,80,10,30,80,17,16,25,22,88,29,16,20,4,12,8,28,12,29,20,69,26,9,69,65,64,69,31,25,19,29,3,69,12,24,0,88,18,12,9,5,4,28,2,4,12,21,69,80,22,10,13,2,17,16,80,21,23,7,0,10,89,69,23,22,69,12,24,0,88,19,12,10,19,16,21,22,0,10,21,11,27,21,69,23,22,69,12,24,0,88,0,0,10,25,8,29,4,0,10,80,10,30,80,4,88,19,12,10,19,9,29,80,18,16,31,22,29,80,1,17,17,8,29,4,0,10,80,12,11,80,84,86,80,36,22,20,69,26,9,69,11,25,8,17,28,4,10,80,23,29,17,22,23,30,12,22,23,69,49,80,13,25,6,0,88,28,12,19,21,18,17,3,0,88,18,0,29,30,69,25,18,9,29,80,17,23,80,1,29,4,0,10,29,12,22,21,69,12,24,0,88,3,16,21,3,69,23,22,69,12,24,0,88,3,16,26,3,0,9,5,0,22,4,69,11,21,23,17,21,22,88,25,11,88,7,13,17,19,13,88,4,13,29,80,0,0,0,10,22,21,11,12,3,69,25,2,0,88,21,19,29,30,69,22,5,8,26,21,23,11,94 \ No newline at end of file diff --git a/src/p059_xor_decryption/p059_xor_decryption.py b/src/p059_xor_decryption/p059_xor_decryption.py new file mode 100644 index 0000000..73264db --- /dev/null +++ b/src/p059_xor_decryption/p059_xor_decryption.py @@ -0,0 +1,120 @@ +""" +Problem 59: XOR decryption +https://projecteuler.net/problem=59 + +Each character on a computer is assigned a unique code and the preferred standard is +ASCII (American Standard Code for Information Interchange). +For example, uppercase A = 65, asterisk (*) = 42, and lowercase k = 107. + +A modern encryption method is to take a text file, convert the bytes to ASCII, then XOR each byte +with a given value, taken from a secret key. +The advantage with the XOR function is that using the same encryption key on the cipher text, +restores the plain text; for example, 65 XOR 42 = 107, then 107 XOR 42 = 65. + +For unbreakable encryption, the key is the same length as the plain text message, +and the key is made up of random bytes. The user would keep the encrypted message and +the encryption key in different locations, and without both "halves", +it is impossible to decrypt the message. + +Unfortunately, this method is impractical for most users, so the modified method is to use +a password as a key. If the password is shorter than the message, which is likely, +the key is repeated cyclically throughout the message. +The balance for this method is using a sufficiently long password key for security, +but short enough to be memorable. + +Your task has been made easy, as the encryption key consists of three lower case characters. +Using p059_cipher.txt (right click and 'Save Link/Target As...'), a file containing +the encrypted ASCII codes, and the knowledge that the plain text must contain common +English words, decrypt the message and find the sum of the ASCII values in the original text. +""" + +from typing import Iterable, List, Tuple +from itertools import cycle + +from src.common.files import get_items_from_file +from src.common.strings import sum_ascii_values + + +MOST_COMMON_ENGLISH_WORDS = { + 'the', 'be', 'to', 'of', 'and', 'a', 'in', 'that', 'have', 'i', 'it', 'for', 'not', 'on', + 'with', 'he', 'as', 'you', 'do', 'at', 'this', 'but', 'his', 'by', 'from', 'they', 'we', + 'say', 'her', 'she', 'or', 'an', 'will', 'my', 'one', 'all', 'would', 'there', 'their', + 'what', 'so', 'up', 'out', 'if', 'about', 'who', 'get', 'which', 'go', 'me', 'when', 'make', + 'can', 'like', 'time', 'no', 'just', 'him', 'know', 'take', 'people', 'into', 'year', 'your', + 'good', 'some', 'could', 'them', 'see', 'other', 'than', 'then', 'now', 'look', 'only', + 'come', 'its', 'over', 'think', 'also', 'back', 'after', 'use', 'two', 'how', 'our', 'work', + 'first', 'well', 'way', 'even', 'new', 'want', 'because', 'any', 'these', 'give', 'day', + 'most', 'us', +} + + +def read_file(file_name: str) -> List[int]: + """Read the input cipher file.""" + return [int(character) for character in get_items_from_file(file_name)] + + +def get_possible_encryption_keys( + length: int = 3, + current_encryption_key: List[int] = [] +) -> Iterable[List[int]]: + """Get all possible encryption keys consisting of `length` lower case characters.""" + for encryption_character in range(ord('a'), (ord('z') + 1)): + new_encryption_key = current_encryption_key + [encryption_character] + if length == 1: + yield new_encryption_key + else: + yield from get_possible_encryption_keys(length - 1, new_encryption_key) + + +def decrypt_text(encrypted_text: Iterable[int], encryption_key: List[int]) -> str: + """ + Decrypt the given encrypted text `encrypted_text` using + the encryption/decryption key `encryption_key`. + """ + return ''.join( + chr(character ^ encryption_char) + for character, encryption_char in zip(encrypted_text, cycle(encryption_key)) + ) + + +def score_decrypted_text(decrypted_text: str) -> int: + """ + Calculate a score value to estimate the probability that the decrypted text contains common + English words. + """ + score = 0 + for word in decrypted_text.replace('.', ' ').split(): + if word.lower() in MOST_COMMON_ENGLISH_WORDS: + score += len(word) + return score + + +def bruteforce_decrypt_file(file_name: str) -> Tuple[str, str]: + """ + Decrypt the given file with file name `file_name` by trying all possible encryption keys. + Return the tuple `(, )`. + """ + max_score = -1 + encryption_key = None + text = read_file(file_name) + for current_encryption_key in get_possible_encryption_keys(3): + decrypted_text = decrypt_text(text, current_encryption_key) + score = score_decrypted_text(decrypted_text) + if score > max_score: + max_score = score + encryption_key = current_encryption_key + return decrypt_text(text, encryption_key), encryption_key + + +def main() -> None: + """Main function.""" + file_name = 'src/p059_xor_decryption/p059_cipher.txt' + decrypted_text, encryption_key = bruteforce_decrypt_file(file_name) + ascii_sum = sum_ascii_values(decrypted_text) + print(f'The sum of the ASCII values in the original text is {ascii_sum:,}.') + print(f'The decrypted text begins with "{decrypted_text[:100]}".') + print(f'The encryption key is {encryption_key}.') + + +if __name__ == '__main__': + main() diff --git a/test/common/test_strings.py b/test/common/test_strings.py index 8beb608..54f135d 100644 --- a/test/common/test_strings.py +++ b/test/common/test_strings.py @@ -2,6 +2,8 @@ String utility functions. """ +import pytest + def test_get_alphabetical_value(): # arrange @@ -13,3 +15,18 @@ def test_get_alphabetical_value(): # assert expected_result = 53 assert actual_result == expected_result + + +@pytest.mark.parametrize('test_input_text,expected_result', [ + ('', 0), + ('A*k', 214), +]) +def test_sum_ascii_values(test_input_text, expected_result): + # arrange + from src.common.strings import sum_ascii_values + + # act + actual_result = sum_ascii_values(test_input_text) + + # assert + assert actual_result == expected_result diff --git a/test/test_p059_xor_decryption/test_p059_xor_decryption.py b/test/test_p059_xor_decryption/test_p059_xor_decryption.py new file mode 100644 index 0000000..d0b8e40 --- /dev/null +++ b/test/test_p059_xor_decryption/test_p059_xor_decryption.py @@ -0,0 +1,99 @@ +""" +Problem 59: XOR decryption +https://projecteuler.net/problem=59 + +Each character on a computer is assigned a unique code and the preferred standard is +ASCII (American Standard Code for Information Interchange). +For example, uppercase A = 65, asterisk (*) = 42, and lowercase k = 107. + +A modern encryption method is to take a text file, convert the bytes to ASCII, then XOR each byte +with a given value, taken from a secret key. +The advantage with the XOR function is that using the same encryption key on the cipher text, +restores the plain text; for example, 65 XOR 42 = 107, then 107 XOR 42 = 65. + +For unbreakable encryption, the key is the same length as the plain text message, +and the key is made up of random bytes. The user would keep the encrypted message and +the encryption key in different locations, and without both "halves", +it is impossible to decrypt the message. + +Unfortunately, this method is impractical for most users, so the modified method is to use +a password as a key. If the password is shorter than the message, which is likely, +the key is repeated cyclically throughout the message. +The balance for this method is using a sufficiently long password key for security, +but short enough to be memorable. + +Your task has been made easy, as the encryption key consists of three lower case characters. +Using p059_cipher.txt (right click and 'Save Link/Target As...'), a file containing +the encrypted ASCII codes, and the knowledge that the plain text must contain common +English words, decrypt the message and find the sum of the ASCII values in the original text. +""" + +import pytest + + +def test_get_possible_encryption_keys_length_1(): + # arrange + from src.p059_xor_decryption.p059_xor_decryption import get_possible_encryption_keys + + # act + actual_result_iter = get_possible_encryption_keys(1) + + # assert + expected_result = list([character] for character in range(97, 123)) + assert list(actual_result_iter) == expected_result + + +def test_get_possible_encryption_keys_length_3(): + # arrange + from src.p059_xor_decryption.p059_xor_decryption import get_possible_encryption_keys + + # act + actual_result_iter = get_possible_encryption_keys(3) + + # assert + actual_result = list(actual_result_iter) + expected_length = pow(26, 3) + expected_result_start = [ + [97, 97, 97], + [97, 97, 98], + [97, 97, 99], + [97, 97, 100], + ] + expected_result_end = [ + [122, 122, 119], + [122, 122, 120], + [122, 122, 121], + [122, 122, 122], + ] + assert len(actual_result) == expected_length + assert actual_result[:4] == expected_result_start + assert actual_result[-4:] == expected_result_end + + +def test_decrypt_text(): + # arrange + from src.p059_xor_decryption.p059_xor_decryption import decrypt_text + + encrypted_text = [65, 107] + + # act + actual_result = decrypt_text(encrypted_text, [42]) + + # assert + expected_result = 'kA' + assert actual_result == expected_result + + +@pytest.mark.parametrize('test_input_decrypted_text,expected_result', [ + ('This text consists of common english words.', 6), + ('hrnskhqing82jdnak 39gsjd la39gnda49', 0) +]) +def test_score_decrypted_text(test_input_decrypted_text, expected_result): + # arrange + from src.p059_xor_decryption.p059_xor_decryption import score_decrypted_text + + # act + actual_result = score_decrypted_text(test_input_decrypted_text) + + # assert + assert actual_result == expected_result