From f4c2b466713166ba082dff4080a50a518d8b89dc Mon Sep 17 00:00:00 2001 From: Muhammad Adeel Tajamul <77053848+muhammadadeeltajamul@users.noreply.github.com> Date: Mon, 13 Jan 2025 12:56:40 +0500 Subject: [PATCH 1/2] chore: updated notification preference url in email digest (#36101) --- openedx/core/djangoapps/notifications/email/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/openedx/core/djangoapps/notifications/email/utils.py b/openedx/core/djangoapps/notifications/email/utils.py index 34c245308785..ad8b8b85dfd8 100644 --- a/openedx/core/djangoapps/notifications/email/utils.py +++ b/openedx/core/djangoapps/notifications/email/utils.py @@ -100,7 +100,7 @@ def create_email_template_context(username): "mailing_address": settings.CONTACT_MAILING_ADDRESS, "logo_url": get_logo_url_for_email(), "social_media": social_media_info, - "notification_settings_url": f"{settings.ACCOUNT_MICROFRONTEND_URL}/notifications", + "notification_settings_url": f"{settings.ACCOUNT_MICROFRONTEND_URL}/#notifications", "unsubscribe_url": get_unsubscribe_link(username, patch) } From cd9b90fc213a535487f909a07c094b18d5466cac Mon Sep 17 00:00:00 2001 From: Navin Karkera Date: Mon, 13 Jan 2025 23:39:47 +0530 Subject: [PATCH 2/2] feat: show math in plain text in library cards (#36055) Converts mathjax equations to unicode to be rendered as plain text in library card previews --- .../djangoapps/content/search/documents.py | 3 +- .../content/search/plain_text_math.py | 161 ++++++++++++++++++ .../content/search/tests/test_documents.py | 118 +++++++++++++ requirements/edx/base.txt | 2 + requirements/edx/development.txt | 4 + requirements/edx/doc.txt | 2 + requirements/edx/kernel.in | 1 + requirements/edx/testing.txt | 2 + 8 files changed, 292 insertions(+), 1 deletion(-) create mode 100644 openedx/core/djangoapps/content/search/plain_text_math.py diff --git a/openedx/core/djangoapps/content/search/documents.py b/openedx/core/djangoapps/content/search/documents.py index 40fe4529272b..98cd7d576e0a 100644 --- a/openedx/core/djangoapps/content/search/documents.py +++ b/openedx/core/djangoapps/content/search/documents.py @@ -14,6 +14,7 @@ from rest_framework.exceptions import NotFound from openedx.core.djangoapps.content.search.models import SearchAccess +from openedx.core.djangoapps.content.search.plain_text_math import process_mathjax from openedx.core.djangoapps.content_libraries import api as lib_api from openedx.core.djangoapps.content_tagging import api as tagging_api from openedx.core.djangoapps.xblock import api as xblock_api @@ -220,7 +221,7 @@ class implementation returns only: # Generate description from the content description = _get_description_from_block_content(block_type, content_data) if description: - block_data[Fields.description] = description + block_data[Fields.description] = process_mathjax(description) except Exception as err: # pylint: disable=broad-except log.exception(f"Failed to process index_dictionary for {block.usage_key}: {err}") diff --git a/openedx/core/djangoapps/content/search/plain_text_math.py b/openedx/core/djangoapps/content/search/plain_text_math.py new file mode 100644 index 000000000000..70f6c3fd2cf0 --- /dev/null +++ b/openedx/core/djangoapps/content/search/plain_text_math.py @@ -0,0 +1,161 @@ +""" +Helper class to convert mathjax equations to plain text. +""" + +import re + +import unicodeit + + +class InvalidMathEquation(Exception): + """Raised when mathjax equation is invalid. This is used to skip all transformations.""" + + +class EqnPatternNotFound(Exception): + """Raised when a pattern is not found in equation. This is used to skip a specific transformation.""" + + +class PlainTextMath: + """ + Converts mathjax equations to plain text using unicodeit and some preprocessing. + """ + equation_pattern = re.compile( + r'\[mathjaxinline\](.*?)\[\/mathjaxinline\]|\[mathjax\](.*?)\[\/mathjax\]|\\\((.*?)\\\)|\\\[(.*?)\\\]' + ) + eqn_replacements = ( + # just remove prefix `\` + ("\\sin", "sin"), + ("\\cos", "cos"), + ("\\tan", "tan"), + ("\\arcsin", "arcsin"), + ("\\arccos", "arccos"), + ("\\arctan", "arctan"), + ("\\cot", "cot"), + ("\\sec", "sec"), + ("\\csc", "csc"), + # Is used for matching brackets in mathjax, should not be required in plain text. + ("\\left", ""), + ("\\right", ""), + ) + regex_replacements = ( + # Makes text bold, so not required in plain text. + (re.compile(r'{\\bf (.*?)}'), r"\1"), + ) + extract_inner_texts = ( + # Replaces any eqn: `\name{inner_text}` with `inner_text` + "\\mathbf{", + "\\bm{", + ) + frac_open_close_pattern = re.compile(r"}\s*{") + + @staticmethod + def _nested_bracket_matcher(equation: str, opening_pattern: str) -> str: + r""" + Matches opening and closing brackets in given string. + + Args: + equation: string + opening_pattern: for example, `\mathbf{` + + Returns: + String inside the eqn brackets + """ + start = equation.find(opening_pattern) + if start == -1: + raise EqnPatternNotFound() + open_count = 0 + inner_start = start + len(opening_pattern) + for i, char in enumerate(equation[inner_start:]): + if char == "{": + open_count += 1 + if char == "}": + if open_count == 0: + break + open_count -= 1 + else: + raise InvalidMathEquation() + # In below example `|` symbol is used to denote index position + # |\mathbf{, \mathbf{|, \mathbf{some_text|}, \mathbf{some_text}| + return (start, inner_start, inner_start + i, inner_start + i + 1) + + def _fraction_handler(self, equation: str) -> str: + r""" + Converts `\frac{x}{y}` to `(x/y)` while handling nested `{}`. + + For example: `\frac{2}{\sqrt{1+y}}` is converted to `(2/\sqrt{1+y})`. + + Args: + equation: string + + Returns: + String with `\frac` replaced by normal `/` symbol. + """ + try: + n_start, n_inner_start, n_inner_end, n_end = self._nested_bracket_matcher(equation, "\\frac{") + except EqnPatternNotFound: + return equation + + numerator = equation[n_inner_start:n_inner_end] + # Handle nested fractions + numerator = self._fraction_handler(numerator) + + try: + _, d_inner_start, d_inner_end, d_end = self._nested_bracket_matcher(equation[n_end:], "{") + except EqnPatternNotFound: + return equation + + denominator = equation[n_end + d_inner_start:n_end + d_inner_end] + # Handle nested fractions + denominator = self._fraction_handler(denominator) + # Now re-create the equation with `(numerator / denominator)` + equation = equation[:n_start] + f"({numerator}/{denominator})" + equation[n_end + d_end:] + return equation + + def _nested_text_extractor(self, equation: str, pattern: str) -> str: + """ + Recursively extracts text from equation for given pattern + """ + try: + start, inner_start, inner_end, end = self._nested_bracket_matcher(equation, pattern) + inner_text = equation[inner_start:inner_end] + inner_text = self._nested_text_extractor(inner_text, pattern) + equation = equation[:start] + inner_text + equation[end:] + except EqnPatternNotFound: + pass + return equation + + def _handle_replacements(self, equation: str) -> str: + """ + Makes a bunch of replacements in equation string. + """ + for q, replacement in self.eqn_replacements: + equation = equation.replace(q, replacement) + for pattern in self.extract_inner_texts: + equation = self._nested_text_extractor(equation, pattern) + for pattern, replacement in self.regex_replacements: + equation = re.sub(pattern, replacement, equation) + return equation + + def run(self, eqn_matches: re.Match) -> str: + """ + Takes re.Match object and runs conversion process on each match group. + """ + groups = eqn_matches.groups() + for group in groups: + if not group: + continue + original = group + try: + group = self._handle_replacements(group) + group = self._fraction_handler(group) + return unicodeit.replace(group) + except Exception: # pylint: disable=broad-except + return original + return None + + +processor = PlainTextMath() + + +def process_mathjax(content: str) -> str: + return re.sub(processor.equation_pattern, processor.run, content) diff --git a/openedx/core/djangoapps/content/search/tests/test_documents.py b/openedx/core/djangoapps/content/search/tests/test_documents.py index 603cc8d92f5e..a97caae168d6 100644 --- a/openedx/core/djangoapps/content/search/tests/test_documents.py +++ b/openedx/core/djangoapps/content/search/tests/test_documents.py @@ -477,3 +477,121 @@ def test_collection_with_published_library(self): "num_children": 1 } } + + def test_mathjax_plain_text_conversion_for_search(self): + """ + Test how an HTML block with mathjax equations gets converted to plain text in search description. + """ + # pylint: disable=line-too-long + eqns = [ + # (input, expected output) + ('Simple addition: \\( 2 + 3 \\)', 'Simple addition: 2 + 3'), + ('Simple subtraction: \\( 5 - 2 \\)', 'Simple subtraction: 5 − 2'), + ('Simple multiplication: \\( 4 * 6 \\)', 'Simple multiplication: 4 * 6'), + ('Simple division: \\( 8 / 2 \\)', 'Simple division: 8 / 2'), + ('Mixed arithmetic: \\( 2 + 3 4 \\)', 'Mixed arithmetic: 2 + 3 4'), + ('Simple exponentiation: \\[ 2^3 \\]', 'Simple exponentiation: 2³'), + ('Root extraction: \\[ 16^{1/2} \\]', 'Root extraction: 16¹^/²'), + ('Exponent with multiple terms: \\[ (2 + 3)^2 \\]', 'Exponent with multiple terms: (2 + 3)²'), + ('Nested exponents: \\[ 2^(3^2) \\]', 'Nested exponents: 2⁽3²)'), + ('Mixed roots: \\[ 8^{1/2} 3^2 \\]', 'Mixed roots: 8¹^/² 3²'), + ('Simple fraction: [mathjaxinline] 3/4 [/mathjaxinline]', 'Simple fraction: 3/4'), + ( + 'Decimal to fraction conversion: [mathjaxinline] 0.75 = 3/4 [/mathjaxinline]', + 'Decimal to fraction conversion: 0.75 = 3/4', + ), + ('Mixed fractions: [mathjaxinline] 1 1/2 = 3/2 [/mathjaxinline]', 'Mixed fractions: 1 1/2 = 3/2'), + ( + 'Converting decimals to mixed fractions: [mathjaxinline] 2.5 = 5/2 [/mathjaxinline]', + 'Converting decimals to mixed fractions: 2.5 = 5/2', + ), + ( + 'Trig identities: [mathjaxinline] \\sin(x + y) = \\sin(x) \\cos(y) + \\cos(x) \\sin(y) [/mathjaxinline]', + 'Trig identities: sin(x + y) = sin(x) cos(y) + cos(x) sin(y)', + ), + ( + 'Sine, cosine, and tangent: [mathjaxinline] \\sin(x) [/mathjaxinline] [mathjaxinline] \\cos(x) [/mathjaxinline] [mathjaxinline] \\tan(x) [/mathjaxinline]', + 'Sine, cosine, and tangent: sin(x) cos(x) tan(x)', + ), + ( + 'Hyperbolic trig functions: [mathjaxinline] \\sinh(x) [/mathjaxinline] [mathjaxinline] \\cosh(x) [/mathjaxinline]', + 'Hyperbolic trig functions: sinh(x) cosh(x)', + ), + ( + "Simple derivative: [mathjax] f(x) = x^2, f'(x) = 2x [/mathjax]", + "Simple derivative: f(x) = x², f'(x) = 2x", + ), + ('Double integral: [mathjax] int\\int (x + y) dxdy [/mathjax]', 'Double integral: int∫ (x + y) dxdy'), + ( + 'Partial derivatives: [mathjax] f(x,y) = xy, \\frac{\\partial f}{\\partial x} = y [/mathjax] [mathjax] \\frac{\\partial f}{\\partial y} = x [/mathjax]', + 'Partial derivatives: f(x,y) = xy, (∂ f/∂ x) = y (∂ f/∂ y) = x', + ), + ( + 'Mean and standard deviation: [mathjax] mu = 2, \\sigma = 1 [/mathjax]', + 'Mean and standard deviation: mu = 2, σ = 1', + ), + ( + 'Binomial probability: [mathjax] P(X = k) = (\\binom{n}{k} p^k (1-p)^{n-k}) [/mathjax]', + 'Binomial probability: P(X = k) = (\\binom{n}{k} pᵏ (1−p)ⁿ⁻ᵏ)', + ), + ('Gaussian distribution: [mathjax] N(\\mu, \\sigma^2) [/mathjax]', 'Gaussian distribution: N(μ, σ²)'), + ( + 'Greek letters: [mathjaxinline] \\alpha [/mathjaxinline] [mathjaxinline] \\beta [/mathjaxinline] [mathjaxinline] \\gamma [/mathjaxinline]', + 'Greek letters: α β γ', + ), + ( + 'Subscripted variables: [mathjaxinline] x_i [/mathjaxinline] [mathjaxinline] y_j [/mathjaxinline]', + 'Subscripted variables: xᵢ yⱼ', + ), + ('Superscripted variables: [mathjaxinline] x^{i} [/mathjaxinline]', 'Superscripted variables: xⁱ'), + ( + 'Not supported: \\( \\begin{bmatrix} 1 & 0 \\ 0 & 1 \\end{bmatrix} = I \\)', + 'Not supported: \\begin{bmatrix} 1 & 0 \\ 0 & 1 \\end{bmatrix} = I', + ), + ( + 'Bold text: \\( {\\bf a} \\cdot {\\bf b} = |{\\bf a}| |{\\bf b}| \\cos(\\theta) \\)', + 'Bold text: a ⋅ b = |a| |b| cos(θ)', + ), + ('Bold text: \\( \\frac{\\sqrt{\\mathbf{2}+3}}{\\sqrt{4}} \\)', 'Bold text: (√{2+3}/√{4})'), + ('Nested Bold text 1: \\( \\mathbf{ \\frac{1}{2} } \\)', 'Nested Bold text 1: (1/2)'), + ( + 'Nested Bold text 2: \\( \\mathbf{a \\cdot (a \\mathbf{\\times} b)} \\)', + 'Nested Bold text 2: a ⋅ (a × b)' + ), + ( + 'Nested Bold text 3: \\( \\mathbf{a \\cdot (a \\bm{\\times} b)} \\)', + 'Nested Bold text 3: a ⋅ (a × b)' + ), + ('Sqrt test 1: \\(\\sqrt\\)', 'Sqrt test 1: √'), + ('Sqrt test 2: \\(x^2 + \\sqrt(y)\\)', 'Sqrt test 2: x² + √(y)'), + ('Sqrt test 3: [mathjaxinline]x^2 + \\sqrt(y)[/mathjaxinline]', 'Sqrt test 3: x² + √(y)'), + ('Fraction test 1: \\( \\frac{2} {3} \\)', 'Fraction test 1: (2/3)'), + ('Fraction test 2: \\( \\frac{2}{3} \\)', 'Fraction test 2: (2/3)'), + ('Fraction test 3: \\( \\frac{\\frac{2}{3}}{4} \\)', 'Fraction test 3: ((2/3)/4)'), + ('Fraction test 4: \\( \\frac{\\frac{2} {3}}{4} \\)', 'Fraction test 4: ((2/3)/4)'), + ('Fraction test 5: \\( \\frac{\\frac{2} {3}}{\\frac{4}{3}} \\)', 'Fraction test 5: ((2/3)/(4/3))'), + # Invalid equations. + ('Fraction error: \\( \\frac{2} \\)', 'Fraction error: \\frac{2}'), + ('Fraction error 2: \\( \\frac{\\frac{2}{3}{4} \\)', 'Fraction error 2: \\frac{\\frac{2}{3}{4}'), + ('Unclosed: [mathjaxinline]x^2', 'Unclosed: [mathjaxinline]x^2'), + ( + 'Missing closing bracket: \\( \\frac{\\frac{2} {3}{\\frac{4}{3}} \\)', + 'Missing closing bracket: \\frac{\\frac{2} {3}{\\frac{4}{3}}' + ), + ('No equation: normal text', 'No equation: normal text'), + ] + # pylint: enable=line-too-long + block = BlockFactory.create( + parent_location=self.toy_course.location, + category="html", + display_name="Non-default HTML Block", + editor="raw", + use_latex_compiler=True, + data="|||".join(e[0] for e in eqns), + ) + doc = {} + doc.update(searchable_doc_for_course_block(block)) + doc.update(searchable_doc_tags(block.usage_key)) + result = doc['description'].split('|||') + for i, eqn in enumerate(result): + assert eqn.strip() == eqns[i][1] diff --git a/requirements/edx/base.txt b/requirements/edx/base.txt index d73ac89b2a0a..a6f79ba3cd7d 100644 --- a/requirements/edx/base.txt +++ b/requirements/edx/base.txt @@ -1208,6 +1208,8 @@ unicodecsv==0.14.1 # via # -r requirements/edx/kernel.in # edx-enterprise +unicodeit==0.7.5 + # via -r requirements/edx/kernel.in uritemplate==4.1.1 # via # drf-spectacular diff --git a/requirements/edx/development.txt b/requirements/edx/development.txt index a153f18be7a3..e353c37e9541 100644 --- a/requirements/edx/development.txt +++ b/requirements/edx/development.txt @@ -2160,6 +2160,10 @@ unicodecsv==0.14.1 # -r requirements/edx/doc.txt # -r requirements/edx/testing.txt # edx-enterprise +unicodeit==0.7.5 + # via + # -r requirements/edx/doc.txt + # -r requirements/edx/testing.txt unidiff==0.7.5 # via -r requirements/edx/testing.txt uritemplate==4.1.1 diff --git a/requirements/edx/doc.txt b/requirements/edx/doc.txt index f715e876d7f6..68cebb2d3e6d 100644 --- a/requirements/edx/doc.txt +++ b/requirements/edx/doc.txt @@ -1521,6 +1521,8 @@ unicodecsv==0.14.1 # via # -r requirements/edx/base.txt # edx-enterprise +unicodeit==0.7.5 + # via -r requirements/edx/base.txt uritemplate==4.1.1 # via # -r requirements/edx/base.txt diff --git a/requirements/edx/kernel.in b/requirements/edx/kernel.in index d2ec04314801..d1a132778133 100644 --- a/requirements/edx/kernel.in +++ b/requirements/edx/kernel.in @@ -163,3 +163,4 @@ web-fragments # Provides the ability to render fragments o wrapt # Better functools.wrapped. TODO: functools has since improved, maybe we can switch? XBlock[django] # Courseware component architecture xss-utils # https://github.com/openedx/edx-platform/pull/20633 Fix XSS via Translations +unicodeit # Converts mathjax equation to plain text by using unicode symbols diff --git a/requirements/edx/testing.txt b/requirements/edx/testing.txt index f6ad62bf0d5d..e27fb1195b95 100644 --- a/requirements/edx/testing.txt +++ b/requirements/edx/testing.txt @@ -1605,6 +1605,8 @@ unicodecsv==0.14.1 # via # -r requirements/edx/base.txt # edx-enterprise +unicodeit==0.7.5 + # via -r requirements/edx/base.txt unidiff==0.7.5 # via -r requirements/edx/testing.in uritemplate==4.1.1