From 612712c8b0a0b9932642e49b0f4dbbec9453355b Mon Sep 17 00:00:00 2001 From: Mufeed VH Date: Wed, 15 Jan 2025 17:53:37 +0530 Subject: [PATCH] Introduce `python-sdk` for code2prompt (#47) --- .gitignore | 172 +++++++++++++++++++++++ Cargo.toml | 5 + README.md | 32 ++++- pyproject.toml | 37 +++++ python-sdk/.gitignore | 171 +++++++++++++++++++++++ python-sdk/README.md | 126 +++++++++++++++++ python-sdk/__init__.py | 14 ++ python-sdk/code2prompt/__init__.py | 13 ++ python-sdk/code2prompt/code2prompt.py | 30 ++++ python-sdk/examples/basic_usage.py | 54 ++++++++ python-sdk/setup.py | 20 +++ src/lib.rs | 1 + src/python.rs | 191 ++++++++++++++++++++++++++ 13 files changed, 864 insertions(+), 2 deletions(-) create mode 100644 pyproject.toml create mode 100644 python-sdk/.gitignore create mode 100644 python-sdk/README.md create mode 100644 python-sdk/__init__.py create mode 100644 python-sdk/code2prompt/__init__.py create mode 100644 python-sdk/code2prompt/code2prompt.py create mode 100644 python-sdk/examples/basic_usage.py create mode 100644 python-sdk/setup.py create mode 100644 src/python.rs diff --git a/.gitignore b/.gitignore index a17e17b..c9d07a6 100644 --- a/.gitignore +++ b/.gitignore @@ -86,3 +86,175 @@ $RECYCLE.BIN/ # Windows shortcuts *.lnk + +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# UV +# Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +#uv.lock + +# poetry +# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control +#poetry.lock + +# pdm +# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. +#pdm.lock +# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it +# in version control. +# https://pdm.fming.dev/latest/usage/project/#working-with-version-control +.pdm.toml +.pdm-python +.pdm-build/ + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# PyCharm +# JetBrains specific template is maintained in a separate JetBrains.gitignore that can +# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore +# and can be added to the global gitignore or merged into this file. For a more nuclear +# option (not recommended) you can uncomment the following to ignore the entire idea folder. +#.idea/ + +# PyPI configuration file +.pypirc \ No newline at end of file diff --git a/Cargo.toml b/Cargo.toml index b1e9531..b638347 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -19,6 +19,10 @@ test = false bench = false path = "src/main.rs" +[lib] +name = "code2prompt" +crate-type = ["cdylib", "rlib"] + [dependencies] clap = { version = "4.0", features = ["derive"] } handlebars = "4.3" @@ -38,6 +42,7 @@ once_cell = "1.19.0" log = "0.4" env_logger = "0.11.3" arboard = "3.4.0" +pyo3 = { version = "0.23", features = ["extension-module", "abi3-py312", "generate-import-lib"] } [profile.release] lto = "thin" diff --git a/README.md b/README.md index 0282612..3f47659 100644 --- a/README.md +++ b/README.md @@ -17,6 +17,7 @@ - [Templates](#templates) - [User Defined Variables](#user-defined-variables) - [Tokenizers](#tokenizers) +- [Python SDK](#python-sdk) - [Contribution](#contribution) - [License](#license) - [Support The Author](#support-the-author) @@ -27,8 +28,9 @@ You can run this tool on the entire directory and it would generate a well-forma - Quickly generate LLM prompts from codebases of any size. - Customize prompt generation with Handlebars templates. (See the [default template](src/default_template.hbs)) -- Respects `.gitignore`. +- Respects `.gitignore` (can be disabled with `--no-ignore`). - Filter and exclude files using glob patterns. +- Control hidden file inclusion with `--hidden` flag. - Display the token count of the generated prompt. (See [Tokenizers](#tokenizers) for more details) - Optionally include Git diff output (staged files) in the generated prompt. - Automatically copy the generated prompt to the clipboard. @@ -138,7 +140,6 @@ Save the generated prompt to an output file: ```sh code2prompt path/to/codebase --output=output.txt ``` - Print output as JSON: ```sh @@ -181,6 +182,18 @@ Disable wrapping code inside markdown code blocks: code2prompt path/to/codebase --no-codeblock ``` +Include hidden files and directories: + +```sh +code2prompt path/to/codebase --hidden +``` + +Skip .gitignore rules: + +```sh +code2prompt path/to/codebase --no-ignore +``` + - Rewrite the code to another language. - Find bugs/security vulnerabilities. - Document the code. @@ -254,6 +267,21 @@ Tokenization is implemented using [`tiktoken-rs`](https://github.com/zurawiki/ti For more context on the different tokenizers, see the [OpenAI Cookbook](https://github.com/openai/openai-cookbook/blob/66b988407d8d13cad5060a881dc8c892141f2d5c/examples/How_to_count_tokens_with_tiktoken.ipynb) +## Python SDK + +code2prompt also provides Python bindings for seamless integration into Python applications. The Python SDK offers all the functionality of the CLI tool through an intuitive object-oriented interface. + +See [python-sdk/README.md](python-sdk/README.md) for detailed documentation and usage examples. + +Example usage: +```python +from code2prompt import CodePrompt + +prompt = CodePrompt("./my_project", include_patterns=["*.py"]) +result = prompt.generate(encoding="cl100k") +print(result["prompt"]) +``` + ## How is it useful? `code2prompt` makes it easy to generate prompts for LLMs from your codebase. It traverses the directory, builds a tree structure, and collects information about each file. You can customize the prompt generation using Handlebars templates. The generated prompt is automatically copied to your clipboard and can also be saved to an output file. `code2prompt` helps streamline the process of creating LLM prompts for code analysis, generation, and other tasks. diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..f446979 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,37 @@ +[build-system] +requires = ["maturin>=1.4,<2.0"] +build-backend = "maturin" + +[project] +name = "code2prompt" +version = "2.0.0" +description = "Python bindings for code2prompt - A tool to generate LLM prompts from codebases" +authors = [ + {name = "Mufeed VH", email = "mufeed@lyminal.space"}, +] +readme = "README.md" +requires-python = ">=3.12" +classifiers = [ + "Development Status :: 5 - Production/Stable", + "Intended Audience :: Developers", + "License :: OSI Approved :: MIT License", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3 :: Only", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Programming Language :: Rust", + "Topic :: Software Development :: Libraries :: Python Modules", +] + +[project.urls] +Homepage = "https://github.com/mufeedvh/code2prompt" +Documentation = "https://github.com/mufeedvh/code2prompt" +Repository = "https://github.com/mufeedvh/code2prompt" + +[tool.maturin] +python-source = "python-sdk" +features = ["pyo3/extension-module"] +module-name = "code2prompt.code2prompt" \ No newline at end of file diff --git a/python-sdk/.gitignore b/python-sdk/.gitignore new file mode 100644 index 0000000..7dbdfee --- /dev/null +++ b/python-sdk/.gitignore @@ -0,0 +1,171 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# UV +# Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +#uv.lock + +# poetry +# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control +#poetry.lock + +# pdm +# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. +#pdm.lock +# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it +# in version control. +# https://pdm.fming.dev/latest/usage/project/#working-with-version-control +.pdm.toml +.pdm-python +.pdm-build/ + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# PyCharm +# JetBrains specific template is maintained in a separate JetBrains.gitignore that can +# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore +# and can be added to the global gitignore or merged into this file. For a more nuclear +# option (not recommended) you can uncomment the following to ignore the entire idea folder. +#.idea/ + +# PyPI configuration file +.pypirc \ No newline at end of file diff --git a/python-sdk/README.md b/python-sdk/README.md new file mode 100644 index 0000000..b035bcd --- /dev/null +++ b/python-sdk/README.md @@ -0,0 +1,126 @@ +# code2prompt Python SDK + +Python bindings for [code2prompt](https://github.com/mufeedvh/code2prompt) - A tool to generate LLM prompts from codebases. + +## Installation + +### Local Development Installation + +1. Clone the repository: +```bash +git clone https://github.com/mufeedvh/code2prompt.git +cd code2prompt +``` + +2. Install development dependencies: +```bash +python3 -m venv .venv +source .venv/bin/activate +pip install maturin pytest +``` + +3. Build and install the package locally: +```bash +cd code2prompt/ # root repo directory +maturin develop -r +``` + +### Running Examples + +Try out the example script: +```bash +python examples/basic_usage.py +``` + +## Usage + +```python +from code2prompt import CodePrompt + +# Create a new CodePrompt instance +prompt = CodePrompt( + path="./my_project", + include_patterns=["*.py", "*.rs"], # Optional: Only include Python and Rust files + exclude_patterns=["**/tests/*"], # Optional: Exclude test files + line_numbers=True, # Optional: Add line numbers to code +) + +# Generate a prompt +result = prompt.generate( + template=None, # Optional: Custom Handlebars template + encoding="cl100k" # Optional: Token encoding (for token counting) +) + +# Access the generated prompt and metadata +print(f"Generated prompt: {result['prompt']}") +print(f"Token count: {result['token_count']}") +print(f"Model info: {result['model_info']}") + +# Git operations +git_diff = prompt.get_git_diff() +branch_diff = prompt.get_git_diff_between_branches("main", "feature") +git_log = prompt.get_git_log("main", "feature") +``` + +## API Reference + +### `CodePrompt` + +Main class for generating prompts from code. + +#### Constructor + +```python +CodePrompt( + path: str, + include_patterns: List[str] = [], + exclude_patterns: List[str] = [], + include_priority: bool = False, + line_numbers: bool = False, + relative_paths: bool = False, + exclude_from_tree: bool = False, + no_codeblock: bool = False, + follow_symlinks: bool = False +) +``` + +- `path`: Path to the codebase directory +- `include_patterns`: List of glob patterns for files to include +- `exclude_patterns`: List of glob patterns for files to exclude +- `include_priority`: Give priority to include patterns in case of conflicts +- `line_numbers`: Add line numbers to code blocks +- `relative_paths`: Use relative paths instead of absolute +- `exclude_from_tree`: Exclude files from source tree based on patterns +- `no_codeblock`: Don't wrap code in markdown code blocks +- `follow_symlinks`: Follow symbolic links when traversing directories + +#### Methods + +##### `generate(template: Optional[str] = None, encoding: Optional[str] = None) -> Dict` + +Generate a prompt from the codebase. + +- `template`: Optional custom Handlebars template +- `encoding`: Optional token encoding (cl100k, p50k, p50k_edit, r50k, gpt2) + +Returns a dictionary containing: +- `prompt`: The generated prompt +- `directory`: The processed directory path +- `token_count`: Number of tokens (if encoding was specified) +- `model_info`: Information about the model (if encoding was specified) + +##### `get_git_diff() -> str` + +Get git diff for the repository. + +##### `get_git_diff_between_branches(branch1: str, branch2: str) -> str` + +Get git diff between two branches. + +##### `get_git_log(branch1: str, branch2: str) -> str` + +Get git log between two branches. + +## License + +MIT License - see LICENSE file for details. \ No newline at end of file diff --git a/python-sdk/__init__.py b/python-sdk/__init__.py new file mode 100644 index 0000000..dc6246a --- /dev/null +++ b/python-sdk/__init__.py @@ -0,0 +1,14 @@ +""" +code2prompt is a Python library for generating LLM prompts from codebases. + +It provides a simple interface to the Rust-based code2prompt library, allowing you to: +- Generate prompts from code directories +- Filter files using glob patterns +- Get git diffs and logs +- Count tokens for different models +""" + +from .code2prompt import CodePrompt + +__version__ = "2.0.0" +__all__ = ["CodePrompt"] \ No newline at end of file diff --git a/python-sdk/code2prompt/__init__.py b/python-sdk/code2prompt/__init__.py new file mode 100644 index 0000000..c163b0a --- /dev/null +++ b/python-sdk/code2prompt/__init__.py @@ -0,0 +1,13 @@ +""" +code2prompt is a Python library for generating LLM prompts from codebases. + +It provides a simple interface to the Rust-based code2prompt library, allowing you to: +- Generate prompts from code directories +- Filter files using glob patterns +- Get git diffs and logs +- Count tokens for different models +""" + +from .code2prompt import CodePrompt + +__all__ = ['CodePrompt'] \ No newline at end of file diff --git a/python-sdk/code2prompt/code2prompt.py b/python-sdk/code2prompt/code2prompt.py new file mode 100644 index 0000000..f172821 --- /dev/null +++ b/python-sdk/code2prompt/code2prompt.py @@ -0,0 +1,30 @@ +# Import the Rust module +from code2prompt import CodePrompt as RustCodePrompt + +class CodePrompt: + def __init__(self, path, include_patterns=None, exclude_patterns=None, + include_priority=False, line_numbers=False, relative_paths=False, + exclude_from_tree=False, no_codeblock=False, follow_symlinks=False): + self._inner = RustCodePrompt( + path, + include_patterns or [], + exclude_patterns or [], + include_priority, + line_numbers, + relative_paths, + exclude_from_tree, + no_codeblock, + follow_symlinks + ) + + def generate(self, template=None, encoding=None): + return self._inner.generate(template, encoding) + + def get_git_diff(self): + return self._inner.get_git_diff() + + def get_git_diff_between_branches(self, branch1, branch2): + return self._inner.get_git_diff_between_branches(branch1, branch2) + + def get_git_log(self, branch1, branch2): + return self._inner.get_git_log(branch1, branch2) \ No newline at end of file diff --git a/python-sdk/examples/basic_usage.py b/python-sdk/examples/basic_usage.py new file mode 100644 index 0000000..aaf36ed --- /dev/null +++ b/python-sdk/examples/basic_usage.py @@ -0,0 +1,54 @@ +"""Example usage of the code2prompt Python SDK.""" + +from code2prompt import CodePrompt + +def main(): + # Create a CodePrompt instance for the current directory + prompt = CodePrompt( + path=".", + include_patterns=["*.py", "*.rs"], # Only include Python and Rust files + exclude_patterns=["**/tests/*"], # Exclude test files + line_numbers=True # Add line numbers to code + ) + + # Generate a prompt with token counting + result = prompt.generate(encoding="cl100k") + + # Print the results + print(f"Generated prompt for directory: {result['directory']}") + print(f"Token count: {result['token_count']}") + print(f"Model info: {result['model_info']}") + + # Print the first 1000 characters of the prompt, or less if shorter + print("\nPrompt preview:") + prompt_text = result['prompt'] + if prompt_text: + preview_length = min(1000, len(prompt_text)) + print(f"{prompt_text[:preview_length]}...") + else: + print("No prompt generated") + + # Git operations example + print("\nGit operations:") + + try: + # Get current changes + diff = prompt.get_git_diff() + print("\nCurrent git diff:") + print(diff[:200] + "..." if diff else "No changes") + + # Get diff between branches + branch_diff = prompt.get_git_diff_between_branches("main", "develop") + print("\nDiff between main and develop:") + print(branch_diff[:200] + "..." if branch_diff else "No differences") + + # Get git log + git_log = prompt.get_git_log("main", "develop") + print("\nGit log between main and develop:") + print(git_log[:200] + "..." if git_log else "No log entries") + + except Exception as e: + print(f"Git operations failed: {e}") + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/python-sdk/setup.py b/python-sdk/setup.py new file mode 100644 index 0000000..46a0595 --- /dev/null +++ b/python-sdk/setup.py @@ -0,0 +1,20 @@ +from setuptools import setup, find_packages + +setup( + name="code2prompt", + version="2.0.0", + packages=find_packages(), + install_requires=[], + author="Mufeed VH", + author_email="contact@mufeedvh.com", + description="Python bindings for code2prompt", + long_description=open("README.md").read(), + long_description_content_type="text/markdown", + url="https://github.com/mufeedvh/code2prompt", + classifiers=[ + "Programming Language :: Python :: 3", + "License :: OSI Approved :: MIT License", + "Operating System :: OS Independent", + ], + python_requires=">=3.7", +) \ No newline at end of file diff --git a/src/lib.rs b/src/lib.rs index fc6a06e..629626d 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,6 +1,7 @@ pub mod filter; pub mod git; pub mod path; +pub mod python; pub mod template; pub mod token; diff --git a/src/python.rs b/src/python.rs new file mode 100644 index 0000000..71c7087 --- /dev/null +++ b/src/python.rs @@ -0,0 +1,191 @@ +use pyo3::prelude::*; +use pyo3::types::PyDict; +use std::path::PathBuf; + +use crate::{ + git::{get_git_diff, get_git_diff_between_branches, get_git_log}, + path::traverse_directory, + template::{handlebars_setup, render_template}, + token::{get_model_info, get_tokenizer}, +}; + +/// Python module for code2prompt +#[pymodule] +fn code2prompt(_py: Python<'_>, m: &Bound<'_, PyModule>) -> PyResult<()> { + m.add_class::()?; + Ok(()) +} + +/// Main class for generating prompts from code +#[pyclass] +struct CodePrompt { + path: PathBuf, + include_patterns: Vec, + exclude_patterns: Vec, + include_priority: bool, + line_numbers: bool, + relative_paths: bool, + exclude_from_tree: bool, + no_codeblock: bool, + follow_symlinks: bool, + hidden: bool, + no_ignore: bool, +} + +#[pymethods] +impl CodePrompt { + /// Create a new CodePrompt instance + /// + /// Args: + /// path (str): Path to the codebase directory + /// include_patterns (List[str], optional): Patterns to include. Defaults to []. + /// exclude_patterns (List[str], optional): Patterns to exclude. Defaults to []. + /// include_priority (bool, optional): Give priority to include patterns. Defaults to False. + /// line_numbers (bool, optional): Add line numbers to code. Defaults to False. + /// relative_paths (bool, optional): Use relative paths. Defaults to False. + /// exclude_from_tree (bool, optional): Exclude files from tree based on patterns. Defaults to False. + /// no_codeblock (bool, optional): Don't wrap code in markdown blocks. Defaults to False. + /// follow_symlinks (bool, optional): Follow symbolic links. Defaults to False. + /// hidden (bool, optional): Include hidden directories and files. Defaults to False. + /// no_ignore (bool, optional): Skip .gitignore rules. Defaults to False. + #[new] + #[pyo3(signature = ( + path, + include_patterns = vec![], + exclude_patterns = vec![], + include_priority = false, + line_numbers = false, + relative_paths = false, + exclude_from_tree = false, + no_codeblock = false, + follow_symlinks = false, + hidden = false, + no_ignore = false + ))] + fn new( + path: String, + include_patterns: Vec, + exclude_patterns: Vec, + include_priority: bool, + line_numbers: bool, + relative_paths: bool, + exclude_from_tree: bool, + no_codeblock: bool, + follow_symlinks: bool, + hidden: bool, + no_ignore: bool, + ) -> Self { + Self { + path: PathBuf::from(path), + include_patterns, + exclude_patterns, + include_priority, + line_numbers, + relative_paths, + exclude_from_tree, + no_codeblock, + follow_symlinks, + hidden, + no_ignore, + } + } + + /// Generate a prompt from the codebase + /// + /// Args: + /// template (str, optional): Custom Handlebars template. Defaults to None. + /// encoding (str, optional): Token encoding to use. Defaults to "cl100k". + /// + /// Returns: + /// dict: Dictionary containing the rendered prompt and metadata + #[pyo3(signature = (template=None, encoding=None))] + fn generate(&self, template: Option, encoding: Option) -> PyResult { + Python::with_gil(|py| { + // Traverse directory + let (tree, files) = traverse_directory( + &self.path, + &self.include_patterns, + &self.exclude_patterns, + self.include_priority, + self.line_numbers, + self.relative_paths, + self.exclude_from_tree, + self.no_codeblock, + self.follow_symlinks, + self.hidden, + self.no_ignore, + ) + .map_err(|e| PyErr::new::(e.to_string()))?; + + // Setup template + let template_content = template.unwrap_or_else(|| include_str!("default_template.hbs").to_string()); + let handlebars = handlebars_setup(&template_content, "template") + .map_err(|e| PyErr::new::(e.to_string()))?; + + // Prepare data + let data = serde_json::json!({ + "absolute_code_path": self.path.display().to_string(), + "source_tree": tree, + "files": files, + }); + + // Render template + let rendered = render_template(&handlebars, "template", &data) + .map_err(|e| PyErr::new::(e.to_string()))?; + + // Count tokens if encoding is provided + let token_count = if let Some(enc) = &encoding { + let bpe = get_tokenizer(&Some(enc.to_string())); + bpe.encode_with_special_tokens(&rendered).len() + } else { + 0 + }; + + // Create return dictionary + let result = PyDict::new(py); + result.set_item("prompt", rendered)?; + result.set_item("directory", self.path.display().to_string())?; + result.set_item("token_count", token_count)?; + if let Some(enc) = &encoding { + result.set_item("model_info", get_model_info(&Some(enc.to_string())))?; + } + + Ok(result.into()) + }) + } + + /// Get git diff for the repository + /// + /// Returns: + /// str: Git diff output + fn get_git_diff(&self) -> PyResult { + get_git_diff(&self.path) + .map_err(|e| PyErr::new::(e.to_string())) + } + + /// Get git diff between two branches + /// + /// Args: + /// branch1 (str): First branch name + /// branch2 (str): Second branch name + /// + /// Returns: + /// str: Git diff output + fn get_git_diff_between_branches(&self, branch1: &str, branch2: &str) -> PyResult { + get_git_diff_between_branches(&self.path, branch1, branch2) + .map_err(|e| PyErr::new::(e.to_string())) + } + + /// Get git log between two branches + /// + /// Args: + /// branch1 (str): First branch name + /// branch2 (str): Second branch name + /// + /// Returns: + /// str: Git log output + fn get_git_log(&self, branch1: &str, branch2: &str) -> PyResult { + get_git_log(&self.path, branch1, branch2) + .map_err(|e| PyErr::new::(e.to_string())) + } +}