Fix evaluation

tongyx361 · Sep 19, 2024 · a8fe7cc · a8fe7cc
1 parent 8f462ae
commit a8fe7cc
Show file tree

Hide file tree

Showing 5 changed files with 322 additions and 298 deletions.
diff --git a/README.md b/README.md
@@ -17,7 +17,7 @@ pip install "git+https://github.com/tongyx361/symeval.git"
 ``` python
 from symeval import *
 
-math_evaluator = EvaluatorMathBatch()
+evaluator = EvaluatorMathBatch()
 ```
 
 `symeval` provides elaborate answer extraction and correctness judgement
@@ -41,7 +41,7 @@ to evaluate in batch with **timeout** but still efficiently.
 
 ``` python
 test_eq(
-    math_evaluator.batch_eq(ref_answers=["1/2", "1/2"], pred_answers=["0.5", "2/4"]),
+    evaluator.batch_eq(ref_answers=["1/2", "1/2"], pred_answers=["0.5", "2/4"]),
     [True] * 2,
 )
 ```
@@ -90,37 +90,50 @@ can:
 
 ``` python
 # MATH-style boxed answer
-math_evaluator.extract_ans("Therefore, $1+1=\\boxed{2}$.")
+evaluator.extract_ans("Therefore, $1+1=\\boxed{2}$.")
 ```
 
 ``` python
 # Answer around "answer"
-math_evaluator.extract_ans(
+evaluator.extract_ans(
     "Both $1$ and $11$ divide $11,$ so $\\boxed{11}=2$, and since $1,$ $2,$ $4,$ $5,$ $10,$ and $20$ divide $20,$ then $\\boxed{20}=6$. The inner expression, $\\boxed{11}\\times\\boxed{20}=2\\times6=12$. Finally, $\\boxed{12}=6$ because $1,$ $2,$ $3,$ $4,$ $6,$ and $12$ divide $12.$\n\nTherefore, $6$ is our answer. Please note that we have not boxed the correct answer as we normally do, as that would be especially confusing for this problem."
 )
 ```
 
 ``` python
 # Use the last number by default
-math_evaluator.extract_ans(
+evaluator.extract_ans(
     'First, we need to count the total number of letters in the word "CIRCLE". There are 6 letters.\n\nNext, we need to count the number of distinct letters. There are 6 distinct letters in the word "CIRCLE": C, I, R, L, E, and G.\n\nNow, let\'s consider the arrangements of the distinct letters. The number of ways to arrange n distinct items is n factorial (n!). So, we have 6! = 6 × 5 × 4 × 3 × 2 × 1 = 720 ways to arrange the distinct letters.\n\nHowever, the word "CIRCLE" has one letter that repeats (the letter \'C\' repeats twice). We have over-counted the number of distinct arrangements by including arrangements that are just rotations of each other (for example, "CIRCLE" and "LCIRCE" are considered different arrangements here, but they are the same word when read).\n\nTo correct for this, we divide the total number of arrangements by the number of ways to arrange the repeated letters. The number of ways to arrange 2 identical items is 2! = 2 × 1 = 2. So, we divide the total number of arrangements by 2 to get the correct number of distinct arrangements.\n\nTherefore, the number of ways to arrange the letters of the word "CIRCLE" is 720 ÷ 2 = 360.'
 )
 # More cases ...
 ```
 
 ``` python
 # Normalize fraction
-math_evaluator.extract_ans("The answer is 1/2")
+evaluator.extract_ans("The answer is 1/2")
 ```
 
 ``` python
 # Normalize pmatrix
-math_evaluator.extract_ans(
+evaluator.extract_ans(
     "The answer is \\begin{pmatrix} 3 \\\\ \\frac{\\pi}{2} \\end{pmatrix}"
 )
 # More cases ...
 ```
 
+More test cases:
+
+<details class="code-fold">
+<summary>Code</summary>
+
+``` python
+test_eq(evaluator.norm_ans_str("864 \\mbox{ inches}^2"), "864")
+test_eq(evaluator.norm_ans_str("\\frac{270}7\\text{ degrees}"), "\\frac{270}7")
+test_eq(evaluator.norm_ans_str(".0000672"), "0.0000672")
+```
+
+</details>
+
 #### Correctly Processing Various Mathematical Objects / Special Text
 
 [`EvaluatorMath`](https://tongyx361.github.io/symeval/core.html#evaluatormath),
@@ -133,26 +146,26 @@ calculation, is able to correctly process
   times.
 
 ``` python
-math_evaluator.eq("x+y", "y+x") == True  # Expression
+evaluator.eq("x+y", "y+x") == True  # Expression
 ```
 
 ``` python
-math_evaluator.eq("\\frac{1}{2}", "0.5") == True  # LaTeX
+evaluator.eq("\\frac{1}{2}", "0.5") == True  # LaTeX
 ```
 
 ``` python
-math_evaluator.eq(
+evaluator.eq(
     "\\begin{array}1\\\\2\\end{array}",
     "1,2",
 )  # Matrix (Vector)
 ```
 
 ``` python
-math_evaluator.eq("{1,2}", "{2,1}", compare_sets=True)  # Set
+evaluator.eq("{1,2}", "{2,1}", compare_sets=True)  # Set
 ```
 
 ``` python
-math_evaluator.eq("no", "false")  # Bool
+evaluator.eq("no", "false")  # Bool
 # More mathematical objects and special texts ...
 ```
 
@@ -162,47 +175,52 @@ More test cases:
 <summary>Code</summary>
 
 ``` python
-test_eq(math_evaluator.eq("251,7\\\\ \\noindent", "0"), False)
-test_eq(math_evaluator.eq("3.54*10^{-7}", "3.54e-07"), True)
-test_eq(math_evaluator.eq(r"\frac{1}{2}", "0.5"), True)
-test_eq(math_evaluator.eq("1", "100"), False)
-test_eq(math_evaluator.eq("100", "1"), False)
-test_eq(math_evaluator.eq("3.04", "0.0304", False), True)
-test_eq(math_evaluator.eq(["0.0304", 0.0304], "3.04"), True)
-test_eq(math_evaluator.eq("x<-1", "x>3"), False)
+test_eq(evaluator.eq("251,7\\\\ \\noindent", "0"), False)
+test_eq(evaluator.eq("3.54*10^{-7}", "3.54e-07"), True)
+test_eq(evaluator.eq(r"\frac{1}{2}", "0.5"), True)
+test_eq(evaluator.eq("1", "100"), False)
+test_eq(evaluator.eq("100", "1"), False)
+test_eq(evaluator.eq("3.04", "0.0304", False), True)
+test_eq(evaluator.eq(["0.0304", 0.0304], "3.04"), True)
+test_eq(evaluator.eq("x<-1", "x>3"), False)
 test_eq(
-    math_evaluator.eq("(-\\infty,0)\\cup(0,\\infty)", "(-\\infty,0)\\cup(0,\\infty)"),
+    evaluator.eq("(-\\infty,0)\\cup(0,\\infty)", "(-\\infty,0)\\cup(0,\\infty)"),
     True,
 )
-test_eq(math_evaluator.eq("1+2,2+1", "2+1,1+2"), True)
-test_eq(math_evaluator.eq("5", "5"), True)
-test_eq(math_evaluator.eq("0.1 + 0.2", "0.3"), True)  # `0.1 + 0.2 == 0.3` is `False`
-test_eq(math_evaluator.eq("x + y", "y + x"), True)
-test_eq(math_evaluator.eq("C", "C"), True)
-test_eq(math_evaluator.eq("1,234", "1234"), True)
-test_eq(math_evaluator.eq("12,34", "(12,34)"), True)
-
-test_eq(math_evaluator.eq("\\$ 5", "5"), True)
-test_eq(math_evaluator.eq("3 * \\sqrt{13}", "3\\sqrt{13}"), True)
-test_eq(math_evaluator.eq("\\pi/2", "\\frac{\\pi}{2}"), True)
-test_eq(math_evaluator.eq("(3,\\pi/2)", "(3,\\frac{\\pi}{2})"), True)
-test_eq(math_evaluator.eq("23000", "\\$23{,}000"), True)
-test_eq(
-    math_evaluator.eq(r"\left(1,2\right)", r"\left(2,1\right)", compare_sets=True), True
-)
-test_eq(math_evaluator.eq("White", "white"), True)
-test_eq(math_evaluator.eq("[0,3)", "[0,1]"), False)
-test_eq(math_evaluator.eq("[0,1]", "[0,3)"), False)
-test_eq(math_evaluator.eq("1001.5", "1001"), False)
-test_eq(math_evaluator.eq("\\frac{2003}{2}", "1001"), False)
+test_eq(evaluator.eq("1+2,2+1", "2+1,1+2"), True)
+test_eq(evaluator.eq("5", "5"), True)
+test_eq(evaluator.eq("0.1 + 0.2", "0.3"), True)  # `0.1 + 0.2 == 0.3` is `False`
+test_eq(evaluator.eq("x + y", "y + x"), True)
+test_eq(evaluator.eq("C", "C"), True)
+test_eq(evaluator.eq("1,234", "1234"), True)
+test_eq(evaluator.eq("12,34", "(12,34)"), True)
+
+test_eq(evaluator.eq("\\$ 5", "5"), True)
+test_eq(evaluator.eq("3 * \\sqrt{13}", "3\\sqrt{13}"), True)
+test_eq(evaluator.eq("\\pi/2", "\\frac{\\pi}{2}"), True)
+test_eq(evaluator.eq("(3,\\pi/2)", "(3,\\frac{\\pi}{2})"), True)
+test_eq(evaluator.eq("23000", "\\$23{,}000"), True)
+test_eq(evaluator.eq(r"\left(1,2\right)", r"\left(2,1\right)", compare_sets=True), True)
+test_eq(evaluator.eq("White", "white"), True)
+test_eq(evaluator.eq("[0,3)", "[0,1]"), False)
+test_eq(evaluator.eq("[0,1]", "[0,3)"), False)
+test_eq(evaluator.eq("1001.5", "1001"), False)
+test_eq(evaluator.eq("\\frac{2003}{2}", "1001"), False)
 ```
 
 </details>
 
+``` python
+test_eq(evaluator.eq("-2,1", "1,-2", compare_sets=True), True)
+```
+
 #### Normalized Majority Voting
 
 ``` python
-math_evaluator.get_maj_answers(["", "", "1", "2", "2", "3", "3", "3"])
+maj_answers_list, norm_answers_list = evaluator.batch_get_maj_answers(
+    [["", "", "1", "2", "2", "3", "3", "3"]]
+)
+print(f"{maj_answers_list = } <- {norm_answers_list = }")
 ```
 
 ### Parsing LaTeX
@@ -230,51 +248,51 @@ latex2sympy_interval("(a+b,b]")
 ``` python
 from symeval import EvaluatorMathBatch
 
-math_evaluator = EvaluatorMathBatch()
+evaluator = EvaluatorMathBatch()
 ```
 
 ``` python
-math_evaluator.latex2matrix(r"\sqrt{400\cos^2(9\pi/44)},\frac{\pi}{4}")
+evaluator.latex2matrix(r"\sqrt{400\cos^2(9\pi/44)},\frac{\pi}{4}")
 ```
 
 ``` python
-math_evaluator.latex2matrix(
+evaluator.latex2matrix(
     r"\begin{pmatrix} \frac{1}{2} & 0 & -\frac{\sqrt{3}}{2} \\ 0 & 1 & 0 \\ \frac{\sqrt{3}}{2} & 0 & \frac{1}{2} \end{pmatrix}"
 )
 ```
 
 ``` python
 test_eq(
-    math_evaluator.latex2matrix("\\begin{pmatrix}-18\\\\-49\\\\96\\end{pmatrix}"),
+    evaluator.latex2matrix("\\begin{pmatrix}-18\\\\-49\\\\96\\end{pmatrix}"),
     Matrix([[-18, -49, 96]]),
 )
 test_eq(
-    math_evaluator.latex2matrix("\\begin{pmatrix} 2 & 3 \\\\ 0 & -2 \\end{pmatrix}"),
+    evaluator.latex2matrix("\\begin{pmatrix} 2 & 3 \\\\ 0 & -2 \\end{pmatrix}"),
     Matrix([[2, 3], [0, -2]]),
 )
 ```
 
 ### Normalization
 
 ``` python
-test_eq(math_evaluator.norm_math_str("251,7\\\\ \\noindent"), "251,7")
+test_eq(evaluator.norm_math_str("251,7\\\\ \\noindent"), "251,7")
 ```
 
 ``` python
 test_eq(fix_a_slash_b("(3/4)\\sqrt{3}"), "(\\frac{3}{4})\\sqrt{3}")
 ```
 
 ``` python
-test_eq(math_evaluator.norm_pm("x\\pmy"), "x-y,x+y")
-test_eq(math_evaluator.norm_pm("a\\mpb"), "a-b,a+b")
-test_eq(math_evaluator.norm_pm("1\\pm\\sqrt{19}"), "1-\\sqrt{19},1+\\sqrt{19}")
-test_eq(math_evaluator.norm_pm(r"\{1\pm\sqrt{5},-2\}"), "1-\\sqrt{5},1+\\sqrt{5},-2")
+test_eq(evaluator.norm_pm("x\\pmy"), "x-y,x+y")
+test_eq(evaluator.norm_pm("a\\mpb"), "a-b,a+b")
+test_eq(evaluator.norm_pm("1\\pm\\sqrt{19}"), "1-\\sqrt{19},1+\\sqrt{19}")
+test_eq(evaluator.norm_pm(r"\{1\pm\sqrt{5},-2\}"), "1-\\sqrt{5},1+\\sqrt{5},-2")
 test_eq(
-    math_evaluator.norm_pm("\\(\\frac{1\\pm\\sqrt{17}}{4}\\)"),
+    evaluator.norm_pm("\\(\\frac{1\\pm\\sqrt{17}}{4}\\)"),
     "\\frac{1-\\sqrt{17}}{4},\\frac{1+\\sqrt{17}}{4}",
 )
 test_eq(
-    math_evaluator.norm_pm(r"\frac{1\pm\sqrt{1-\frac{2}{\sqrt{3}}}}{1}"),
+    evaluator.norm_pm(r"\frac{1\pm\sqrt{1-\frac{2}{\sqrt{3}}}}{1}"),
     "\\frac{1-\\sqrt{1-\\frac{2}{\\sqrt{3}}}}{1},\\frac{1+\\sqrt{1-\\frac{2}{\\sqrt{3}}}}{1}",
 )
 ```
@@ -285,14 +303,14 @@ test_eq(norm_deg(r"\sin 20^\circ"), r"\sin {20*\frac{\pi}{180}}")
 ```
 
 ``` python
-test_eq(math_evaluator.norm_basic_fn(r"sinx"), r"\sin^{1}x")
-test_eq(math_evaluator.norm_basic_fn(r"\sin^2x"), r"\sin^{2}x")
+test_eq(evaluator.norm_basic_fn(r"sinx"), r"\sin^{1}x")
+test_eq(evaluator.norm_basic_fn(r"\sin^2x"), r"\sin^{2}x")
 ```
 
 ### Processing Sets
 
 ``` python
-test_eq(math_evaluator.extract_set("{2,1}"), ["1", "2"])
+test_eq(evaluator.extract_set("{2,1}"), ["1", "2"])
 ```
 
 ``` python
@@ -305,7 +323,7 @@ test_eq(is_set("(3/4)sqrt(3)"), False)
 ### Manipulating Strings
 
 ``` python
-test_eq(math_evaluator.remove_first_paren_pair("{white}", "{"), "white")
+test_eq(evaluator.remove_first_paren_pair("{white}", "{"), "white")
 ```
 
 ## Contribution Guidelines

diff --git a/nbs/00_core.ipynb b/nbs/00_core.ipynb
@@ -62,15 +62,6 @@
     "show_doc(EvaluatorMath)"
    ]
   },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "show_doc(EvaluatorBatchBase)"
-   ]
-  },
   {
    "cell_type": "code",
    "execution_count": null,