Skip to content

Commit

Permalink
fix further tests
Browse files Browse the repository at this point in the history
  • Loading branch information
andreaskoepf committed Mar 4, 2025
1 parent e1d0491 commit acfd892
Show file tree
Hide file tree
Showing 5 changed files with 6 additions and 7 deletions.
4 changes: 2 additions & 2 deletions tests/test_knight_swap.py
Original file line number Diff line number Diff line change
Expand Up @@ -155,8 +155,8 @@ def test_score_calculation():

# Test invalid answers
assert dataset.score_answer(None, puzzle) == 0.0
assert dataset.score_answer("", puzzle) == 0.01
assert dataset.score_answer("Invalid", puzzle) == 0.01
assert dataset.score_answer("", puzzle) == 0.0
assert dataset.score_answer("Invalid", puzzle) == 0.0

# Test correct answer
assert dataset.score_answer(puzzle["answer"], puzzle) == 1.0
3 changes: 1 addition & 2 deletions tests/test_knights_knaves.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,8 +99,7 @@ def test_score_answer():
assert dataset.score_answer(correct_answer, problem) == 1.0
assert abs(dataset.score_answer(half_answer, problem) - 0.65) < 1e-10
assert dataset.score_answer(modified_answer, problem) == 1.0
assert dataset.score_answer(wrong_answer, problem) == 0.01
print("flipped")
assert dataset.score_answer(wrong_answer, problem) == 0.0
assert dataset.score_answer(flipped_answer, problem) == 1.0


Expand Down
2 changes: 1 addition & 1 deletion tests/test_manipulate_matrix.py
Original file line number Diff line number Diff line change
Expand Up @@ -214,7 +214,7 @@ def test_manipulate_matrix_score_answer():

# incorrect answer
answer = "1 2 3\n4 5 6\n7 8 8"
assert dataset.score_answer(answer, entry) == 0.01
assert dataset.score_answer(answer, entry) == 0.0

# answer is none
answer = None
Expand Down
2 changes: 1 addition & 1 deletion tests/test_n_queens.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ def test_nqueens_score_answer():

# Test invalid answer gets score 0.01
invalid_answer = "_ _ _ _\n_ _ _ _\n_ _ _ _\n_ _ _ _"
assert dataset.score_answer(invalid_answer, item) == 0.01
assert dataset.score_answer(invalid_answer, item) == 0.0

# Test None answer gets score 0.0
assert dataset.score_answer(None, item) == 0.0
Expand Down
2 changes: 1 addition & 1 deletion tests/test_number_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ def test_number_format_answer():

# Incorrect answer (diff larger than 1e-2)
model_answer = "54245.9"
assert dataset.score_answer(model_answer, entry) == 0.01
assert dataset.score_answer(model_answer, entry) == 0.0

# Answer is null
model_answer = None
Expand Down

0 comments on commit acfd892

Please sign in to comment.