Skip to content

Commit

Permalink
Preliminarily write functions of `34_markdown.obsidian.machine_learni…
Browse files Browse the repository at this point in the history
…ng.notation_linking.ipynb` and modify `parse_notation_note` to return a list instead of a MarkdownFile for the bulleted list of notation note links
  • Loading branch information
hyunjongkimmath committed May 31, 2024
1 parent 395f28c commit 5db0911
Show file tree
Hide file tree
Showing 8 changed files with 1,067 additions and 93 deletions.
54 changes: 50 additions & 4 deletions nbs/00_helper.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -353,7 +353,15 @@
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"['', 'hello', ' asdf', ' ', 'asdf']\n"
]
}
],
"source": [
"text = 'hello asdf asdf'\n",
"sample_output = separate_indices_from_str(text, [(0,5), (10,11)])\n",
Expand Down Expand Up @@ -993,6 +1001,39 @@
"#### Removing HTML tags in a text and obtaining the data of the tags."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"markup = '<b>Hello</b>'\n",
"soup = BeautifulSoup(markup, 'html.parser')\n",
"tag = soup.b\n",
"new_str = soup.new_string(' World')\n",
"tag.append(new_str)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"' World'"
]
},
"execution_count": null,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"new_str"
]
},
{
"cell_type": "code",
"execution_count": null,
Expand Down Expand Up @@ -1064,8 +1105,13 @@
" return position + len(content)\n",
" replacement_text = _select_replacement_text(\n",
" content, replace_with_attributes, definitely_replace)\n",
" replaced_content = content.replace_with(\n",
" parsed_soup.new_string(replacement_text))\n",
" \n",
" try:\n",
" replaced_content = content.replace_with(\n",
" parsed_soup.new_string(replacement_text))\n",
" except TypeError as e:\n",
" raise e\n",
"\n",
" replaced_contents.append((\n",
" replaced_content,\n",
" position,\n",
Expand Down Expand Up @@ -2266,7 +2312,7 @@
"name": "stdout",
"output_type": "stream",
"text": [
"2024-03-24T14:35\n"
"2024-05-27T21:24\n"
]
}
],
Expand Down
86 changes: 54 additions & 32 deletions nbs/20_markdown.obsidian.personal.notation.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@
"from trouver.helper import latex_indices, latex_to_path_accepted_string, notation_asterisk_indices, remove_html_tags_in_text\n",
"from trouver.markdown.markdown.file import MarkdownFile, MarkdownLineEnum\n",
"from trouver.markdown.obsidian.links import (\n",
" find_links_in_markdown_text, LinkType, ObsidianLink, MARKDOWNLINK_PATTERN, WIKILINK_PATTERN\n",
" find_links_in_markdown_text, LinkType, ObsidianLink, MARKDOWNLINK_PATTERN, MARKDOWNLINK_CAPTURE_PATTERN, WIKILINK_PATTERN\n",
")\n",
"from trouver.markdown.obsidian.personal.information_notes import bulleted_links_of_type_in_section\n",
"from trouver.markdown.obsidian.personal.note_type import (\n",
Expand Down Expand Up @@ -194,9 +194,10 @@
"#| export\n",
"def _divide_bulleted_list_mf_at_end(\n",
" mf: MarkdownFile\n",
" ) -> tuple[MarkdownFile, Union[MarkdownFile, None]]: # The first MarkdownFile contains the main content. The second MarkdonwFile contains the bulleted list at the end; if no such bulleted list exists, then this is None.\n",
" \"\"\"Divide a `MarkdownFile` for a notation note into two MarkdownFiles, one\n",
" of the main content and the other for the trailing bulleted list of links\n",
" ) -> tuple[MarkdownFile, list[tuple[str, str]]]: # The first MarkdownFile contains the main content. The second MarkdonwFile contains the bulleted list at the end; if no such bulleted list exists, then this is None.\n",
" \"\"\"Return a `MarkdownFile` consisting of just the main content\n",
" of a notation note along with the list of tuples capturing the\n",
" information of the trailing bulleted list of links\n",
" for notations used in the notation note.\n",
"\n",
" Assumes that the bulleted list is formatted correctly\n",
Expand All @@ -215,11 +216,16 @@
" last_part = main_parts.pop() # Should be the same as `part`\n",
" trailing_parts.insert(0, last_part)\n",
" \n",
" if trailing_parts:\n",
" bulleted_list_mf = MarkdownFile(trailing_parts)\n",
" else:\n",
" bulleted_list_mf = None\n",
" return MarkdownFile(main_parts), bulleted_list_mf\n",
" bulleted_list = [\n",
" _notat_str_and_linked_notat_note_name_from_bullet_point_part(part)\n",
" for part in trailing_parts]\n",
" return MarkdownFile(main_parts), bulleted_list\n",
" \n",
" # if trailing_parts:\n",
" # bulleted_list_mf = MarkdownFile(trailing_parts)\n",
" # else:\n",
" # bulleted_list_mf = None\n",
" # return MarkdownFile(main_parts), bulleted_list_mf\n",
"\n",
"def _part_is_unordered_list_and_is_of_markdownstyle_link(\n",
" part: dict[str, Union[str, MarkdownLineEnum]]\n",
Expand All @@ -235,7 +241,22 @@
" if not re.match(MARKDOWNLINK_PATTERN, part['line'][2:]):\n",
" return False\n",
" return True\n",
" "
" \n",
"\n",
"def _notat_str_and_linked_notat_note_name_from_bullet_point_part(\n",
" part: dict[str, Union[str, MarkdownLineEnum]]\n",
" ) -> tuple[str, str]:\n",
" match = re.match(MARKDOWNLINK_CAPTURE_PATTERN, part['line'][2:])\n",
" return match[1], match[2]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"test_eq(['hi'], ['hi'])"
]
},
{
Expand All @@ -249,22 +270,22 @@
"mf = MarkdownFile.from_string(notation_note_str)\n",
"main_content, bulleted_list = _divide_bulleted_list_mf_at_end(mf)\n",
"test_eq(str(main_content), '$\\operatorname{Gal}(L/K)$ [[linky|denotes]] blah blah blah.')\n",
"test_eq(str(bulleted_list), '- [$L$](notation_L_some_field_extension)\\n- [$K$](notation_K_some_base_field)')\n",
"test_eq(bulleted_list, [('$L$', 'notation_L_some_field_extension'), ('$K$', 'notation_K_some_base_field')])\n",
"\n",
"# This time, add more blank lines\n",
"notation_note_str = '$\\operatorname{Gal}(L/K)$ [[linky|denotes]] blah blah blah.\\n\\n- [$L$](notation_L_some_field_extension)\\n\\n- [$K$](notation_K_some_base_field)\\n'\n",
"mf = MarkdownFile.from_string(notation_note_str)\n",
"main_content, bulleted_list = _divide_bulleted_list_mf_at_end(mf)\n",
"test_eq(str(main_content), '$\\operatorname{Gal}(L/K)$ [[linky|denotes]] blah blah blah.')\n",
"test_eq(str(bulleted_list), '- [$L$](notation_L_some_field_extension)\\n- [$K$](notation_K_some_base_field)')\n",
"test_eq(bulleted_list, [('$L$', 'notation_L_some_field_extension'), ('$K$', 'notation_K_some_base_field')])\n",
"\n",
"# No bulleted list means the second output is `None`:\n",
"\n",
"notation_note_str = '$\\operatorname{Gal}(L/K)$ [[linky|denotes]] blah blah blah.\\n\\n'\n",
"mf = MarkdownFile.from_string(notation_note_str)\n",
"main_content, bulleted_list = _divide_bulleted_list_mf_at_end(mf)\n",
"test_eq(str(main_content), '$\\operatorname{Gal}(L/K)$ [[linky|denotes]] blah blah blah.')\n",
"assert bulleted_list is None\n",
"test_eq(bulleted_list, [])\n",
"\n"
]
},
Expand All @@ -277,25 +298,26 @@
"#| export\n",
"def parse_notation_note(\n",
" notation_note: Union[str, VaultNote],\n",
" vault: Optional[PathLike] = None # The vault If `None`, then uses th\n",
" vault: Optional[PathLike] = None # The vault If `None`, then uses `notation_note.vault`\n",
" ) -> tuple[Union[dict, None], Union[str, None], str, MarkdownFile,\n",
" Union[MarkdownFile, None]]:\n",
" list[tuple[str, str]]]:\n",
" \"\"\"Parse information from the notation note.\n",
"\n",
" **Returns**\n",
"\n",
" - tuple[Union[dict, None], str, ObsidianLink, MarkdownFile, MarkdownFile]\n",
" - tuple[Union[dict, None], str, ObsidianLink, MarkdownFile, list[tuple[str, str]]]\n",
" - The first entry is the YAML frontmatter meta, if available.\n",
" - The second entry is the notation string\n",
" - The third entry is the name of the \"main note\" of the notation note. This is usual\n",
" the linked note in the link `[[<linked_note>|denotes]]`. If no such main note\n",
" exists, then this is `None`.\n",
" - The fourth entry is the MarkdownFile consisting of the \"main\" content of the note,\n",
" which excludes the information given by all of the other entries.\n",
" - The fifth entry is the MarkdownFile consisting of the ending bulleted list, listing\n",
" the notations used in the notation notes along with links to the notation notes\n",
" describing these notations. If there is not such bulleted list, then this entry\n",
" is `None`. \n",
" - The fifth entry is a list of tuples of two `str`'s representing a\n",
" bulleted list of notation notes to which `notation_note` links to.\n",
" Each tuple is of the form `(latex_str, notation_note_name)` and\n",
" the corresponding entry in the bulleted list is of the form\n",
" `- [<latex_str>](<notation_note_name)`.\n",
"\n",
" **Raises**\n",
"\n",
Expand All @@ -322,12 +344,12 @@
"\n",
" file_text = str(mf_without_metadata)\n",
"\n",
" main_mf, mf_with_links_to_notations = _divide_bulleted_list_mf_at_end(mf_without_metadata)\n",
" main_mf, linked_notations_list = _divide_bulleted_list_mf_at_end(mf_without_metadata)\n",
" _remove_the_notation_str_and_denotes_in_main_mf(main_mf, notation_note)\n",
"\n",
" return (metadata, _get_notation_string(file_text, notation_note),\n",
" _main_of_notation_from_text(file_text), main_mf,\n",
" mf_with_links_to_notations)\n",
" linked_notations_list)\n",
"\n",
"\n",
"def _get_notation_string(\n",
Expand Down Expand Up @@ -388,13 +410,13 @@
"source": [
"vault = _test_directory() / 'test_vault_7'\n",
"notation_note = VaultNote(vault, name='some_reference_name_notation_Spec_A')\n",
"metadata, notation_str, main_of_notation, main_mf, mf_with_links_to_notations = parse_notation_note(notation_note, vault)\n",
"metadata, notation_str, main_of_notation, main_mf, linked_notat_notes = parse_notation_note(notation_note, vault)\n",
"\n",
"test_eq(metadata, {'detect_regex': [], 'latex_in_original': ['\\\\operatorname{Spec} A']})\n",
"test_eq(notation_str, '$\\\\operatorname{Spec} A$')\n",
"test_eq(main_of_notation, 'spectrum_of_a_ring')\n",
"test_eq(str(main_mf), 'the spectrum of the ring $A$.')\n",
"assert mf_with_links_to_notations is None # There is not a bulleted list at the end, so the last output is `None`."
"test_eq(linked_notat_notes, []) # There is not a bulleted list at the end, so the last output is `None`."
]
},
{
Expand All @@ -405,13 +427,13 @@
"source": [
"vault = _test_directory() / 'test_vault_7'\n",
"notation_note = VaultNote(vault, name='poonen_curves_notation_zeta_X_s_zeta_function_of_variety')\n",
"metadata, notation_str, main_of_notation, main_mf, mf_with_links_to_notations = parse_notation_note(notation_note, vault)\n",
"metadata, notation_str, main_of_notation, main_mf, linked_notat_notes = parse_notation_note(notation_note, vault)\n",
"\n",
"test_eq(metadata, None)\n",
"test_eq(notation_str, r'$\\zeta_{X}(s)$')\n",
"test_eq(main_of_notation, 'poonen_curves_3.4.1 DEFINITION')\n",
"test_eq(str(main_mf), 'the zeta function of the [[poonen_curves_1.0.2 DEFINITION|variety]] $X$ over $\\\\mathbb{F}_q$.\\n\\nIt is defined as\\n\\n$$\\\\zeta_X(s) = Z_X(q^{-s}).$$\\n\\nA priori, it is a formal series, but in fact [[poonen_curves_ 3.6_page_56|it converges]] for $\\\\operatorname{Re} s > \\\\dim X$.')\n",
"test_eq(str(mf_with_links_to_notations), '- [$Z_X$](poonen_curves_notation_Z_X_T)') # There is a bulleted list at the end, so the last output is `None`."
"test_eq(linked_notat_notes, [('$Z_X$', 'poonen_curves_notation_Z_X_T')]) # There is a bulleted list at the end, so the last output is `None`."
]
},
{
Expand All @@ -425,7 +447,7 @@
"\n",
"vault = _test_directory() / 'test_vault_7'\n",
"notation_note = VaultNote(vault, name='some_reference_name_notation_O_X_this_file_has_no_links') \n",
"metadata, notation_str, main_of_notation, main_mf, mf_with_links_to_notations = parse_notation_note(notation_note, vault)\n",
"metadata, notation_str, main_of_notation, main_mf, linked_notat_notes = parse_notation_note(notation_note, vault)\n",
"assert main_note is None\n"
]
},
Expand All @@ -440,7 +462,7 @@
"\n",
"vault = _test_directory() / 'test_vault_7'\n",
"notation_note = VaultNote(vault, name='some_reference_name_notation_B_R') \n",
"metadata, notation_str, main_of_notation, main_mf, mf_with_links_to_notations = parse_notation_note(notation_note, vault)\n",
"metadata, notation_str, main_of_notation, main_mf, linked_notat_notes = parse_notation_note(notation_note, vault)\n",
"assert main_note is None\n"
]
},
Expand All @@ -454,7 +476,7 @@
"# Test the case where the notation string contains a [[]]\n",
"vault = _test_directory() / 'test_vault_7'\n",
"notation_note = VaultNote(vault, name='some_reference_name_notation_k_t_formal_power_series_ring') \n",
"metadata, notation_str, main_of_notation, main_mf, mf_with_links_to_notations = parse_notation_note(notation_note, vault)\n",
"metadata, notation_str, main_of_notation, main_mf, linked_notat_notes = parse_notation_note(notation_note, vault)\n",
"test_eq(notation_str, '$k[[t]]$')\n",
"test_eq(main_of_notation, 'some_note')\n",
"# assert main_note is None"
Expand Down Expand Up @@ -2321,7 +2343,7 @@
"name": "stderr",
"output_type": "stream",
"text": [
"C:\\Users\\hyunj\\AppData\\Local\\Temp\\ipykernel_20072\\2025152828.py:57: UserWarning: The following note has the following excess notations: note_with_some_excessive_notation_notes, \\mathcal{B}(\\mathbb{R}), \\operatorname{Jac}(C)\n",
"C:\\Users\\hyunj\\AppData\\Local\\Temp\\ipykernel_19052\\2025152828.py:57: UserWarning: The following note has the following excess notations: note_with_some_excessive_notation_notes, \\mathcal{B}(\\mathbb{R}), \\operatorname{Jac}(C)\n",
" warnings.warn(\n"
]
}
Expand Down Expand Up @@ -2759,9 +2781,9 @@
],
"metadata": {
"kernelspec": {
"display_name": "trouver_py310_venv",
"display_name": "python3",
"language": "python",
"name": "trouver_py310_venv"
"name": "python3"
}
},
"nbformat": 4,
Expand Down
Loading

0 comments on commit 5db0911

Please sign in to comment.