diff --git a/llms_txt/_modidx.py b/llms_txt/_modidx.py
index ee270cf..d5bd669 100644
--- a/llms_txt/_modidx.py
+++ b/llms_txt/_modidx.py
@@ -10,6 +10,8 @@
'llms_txt.core._parse_llms': ('core.html#_parse_llms', 'llms_txt/core.py'),
'llms_txt.core._section': ('core.html#_section', 'llms_txt/core.py'),
'llms_txt.core.create_ctx': ('core.html#create_ctx', 'llms_txt/core.py'),
+ 'llms_txt.core.find_root_dir': ('core.html#find_root_dir', 'llms_txt/core.py'),
+ 'llms_txt.core.get_doc_content': ('core.html#get_doc_content', 'llms_txt/core.py'),
'llms_txt.core.get_sizes': ('core.html#get_sizes', 'llms_txt/core.py'),
'llms_txt.core.llms_txt2ctx': ('core.html#llms_txt2ctx', 'llms_txt/core.py'),
'llms_txt.core.mk_ctx': ('core.html#mk_ctx', 'llms_txt/core.py'),
diff --git a/llms_txt/core.py b/llms_txt/core.py
index 845b4a1..fd37dac 100644
--- a/llms_txt/core.py
+++ b/llms_txt/core.py
@@ -3,7 +3,8 @@
# AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/01_core.ipynb.
# %% auto 0
-__all__ = ['opt_re', 'named_re', 'search', 'parse_link', 'parse_llms_file', 'mk_ctx', 'get_sizes', 'create_ctx', 'llms_txt2ctx']
+__all__ = ['opt_re', 'named_re', 'search', 'parse_link', 'parse_llms_file', 'find_root_dir', 'get_doc_content', 'mk_ctx',
+ 'get_sizes', 'create_ctx', 'llms_txt2ctx']
# %% ../nbs/01_core.ipynb
import re
@@ -13,6 +14,7 @@
from fastcore.xml import *
from fastcore.script import *
import httpx
+from urllib.parse import urlparse
# %% ../nbs/01_core.ipynb
def opt_re(s):
@@ -65,14 +67,40 @@ def parse_llms_file(txt):
# %% ../nbs/01_core.ipynb
from fastcore.xml import Sections,Project,Doc
+# %% ../nbs/01_core.ipynb
+def find_root_dir():
+ "Find the root directory of the nbdev project by looking for settings.ini"
+ path = Path.cwd()
+ while path != path.parent:
+ if (path / 'settings.ini').exists(): return path
+ path = path.parent
+ return None
+
+# %% ../nbs/01_core.ipynb
+def get_doc_content(url):
+ "Fetch content from local file if in nbdev repo."
+ root_dir = find_root_dir()
+ if root_dir:
+ config = Config(root_dir, 'settings.ini')
+ doc_host = config.get('doc_host')
+ if doc_host and url.startswith(doc_host):
+ parsed_url = urlparse(url)
+ relative_path = parsed_url.path.lstrip('/')
+ local_path = root_dir / '_docs' / relative_path
+ if local_path.exists():
+ with open(local_path, 'r') as f: return f.read()
+ # If not a local file or file doesn't exist, fetch from URL
+ return httpx.get(url).text
+
# %% ../nbs/01_core.ipynb
def _doc(kw):
"Create a `Doc` FT object with the text retrieved from `url` as the child, and `kw` as attrs."
url = kw.pop('url')
+ txt = get_doc_content(url)
re_comment = re.compile('^$', flags=re.MULTILINE)
re_base64_img = re.compile(r']*src="data:image/[^"]*"[^>]*>')
- txt = [o for o in httpx.get(url).text.splitlines() if not re_comment.search(o) and not re_base64_img.search(o)]
- return Doc('\n'.join(txt), **kw)
+ txt = '\n'.join([o for o in txt.splitlines() if not re_comment.search(o) and not re_base64_img.search(o)])
+ return Doc(txt, **kw)
# %% ../nbs/01_core.ipynb
def _section(nm, items, n_workers=None):
diff --git a/nbs/01_core.ipynb b/nbs/01_core.ipynb
index be56b40..c0aefc1 100644
--- a/nbs/01_core.ipynb
+++ b/nbs/01_core.ipynb
@@ -49,7 +49,8 @@
"from fastcore.utils import *\n",
"from fastcore.xml import *\n",
"from fastcore.script import *\n",
- "import httpx"
+ "import httpx\n",
+ "from urllib.parse import urlparse"
]
},
{
@@ -673,6 +674,45 @@
"from fastcore.xml import Sections,Project,Doc"
]
},
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#| export\n",
+ "def find_root_dir():\n",
+ " \"Find the root directory of the nbdev project by looking for settings.ini\"\n",
+ " path = Path.cwd()\n",
+ " while path != path.parent:\n",
+ " if (path / 'settings.ini').exists(): return path\n",
+ " path = path.parent\n",
+ " return None"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#|export\n",
+ "def get_doc_content(url):\n",
+ " \"Fetch content from local file if in nbdev repo.\"\n",
+ " root_dir = find_root_dir()\n",
+ " if root_dir:\n",
+ " config = Config(root_dir, 'settings.ini')\n",
+ " doc_host = config.get('doc_host')\n",
+ " if doc_host and url.startswith(doc_host):\n",
+ " parsed_url = urlparse(url)\n",
+ " relative_path = parsed_url.path.lstrip('/')\n",
+ " local_path = root_dir / '_docs' / relative_path\n",
+ " if local_path.exists():\n",
+ " with open(local_path, 'r') as f: return f.read()\n",
+ " # If not a local file or file doesn't exist, fetch from URL\n",
+ " return httpx.get(url).text"
+ ]
+ },
{
"cell_type": "code",
"execution_count": null,
@@ -683,10 +723,11 @@
"def _doc(kw):\n",
" \"Create a `Doc` FT object with the text retrieved from `url` as the child, and `kw` as attrs.\"\n",
" url = kw.pop('url')\n",
+ " txt = get_doc_content(url)\n",
" re_comment = re.compile('^$', flags=re.MULTILINE)\n",
" re_base64_img = re.compile(r']*src=\"data:image/[^\"]*\"[^>]*>')\n",
- " txt = [o for o in httpx.get(url).text.splitlines() if not re_comment.search(o) and not re_base64_img.search(o)]\n",
- " return Doc('\\n'.join(txt), **kw)"
+ " txt = '\\n'.join([o for o in txt.splitlines() if not re_comment.search(o) and not re_base64_img.search(o)])\n",
+ " return Doc(txt, **kw)"
]
},
{