diff --git a/libs/agentc_core/agentc_core/catalog/directory.py b/libs/agentc_core/agentc_core/catalog/directory.py index da7c4969..c33cc5aa 100644 --- a/libs/agentc_core/agentc_core/catalog/directory.py +++ b/libs/agentc_core/agentc_core/catalog/directory.py @@ -1,5 +1,6 @@ import fnmatch import logging +import os import pathlib import typing @@ -13,7 +14,7 @@ class ScanDirectoryOpts(typing.TypedDict): def scan_directory( - root_dir: str, wanted_patterns: typing.Iterable[str], opts: ScanDirectoryOpts = None + root_dir: str, target_dir: str, wanted_patterns: typing.Iterable[str], opts: ScanDirectoryOpts = None ) -> typing.Iterable[pathlib.Path]: """ Find file paths in a directory tree which match wanted glob patterns, while also handling any ignore @@ -21,13 +22,29 @@ def scan_directory( """ ignore_file_parsers = [] + all_ignore_files_paths = [] + user_target_dir = os.path.abspath(os.path.join(root_dir, target_dir)) + if opts: - for ignore_file_name in opts["ignore_file_names"]: - ignore_file_path = pathlib.Path(root_dir) / ignore_file_name - if ignore_file_path.exists() and opts["ignore_file_parser_factory"]: - ignore_file_parsers.append(opts["ignore_file_parser_factory"](ignore_file_path.absolute())) + # Find all ignore files in the directory tree till user mentioned directory. + for cur_dir, _dirs, files in os.walk(root_dir): + # Ignore path if it does not appear in the path towards user mentioned directory. + if cur_dir not in user_target_dir: + continue + + for file in files: + if file in opts["ignore_file_names"]: + all_ignore_files_paths.append(os.path.join(cur_dir, file)) + + # Stop crawling once user mentioned directory is crawled. + if cur_dir == user_target_dir: + break + + if opts["ignore_file_parser_factory"]: + for ignore_file_path in all_ignore_files_paths: + ignore_file_parsers.append(opts["ignore_file_parser_factory"](ignore_file_path)) - for path in pathlib.Path(root_dir).rglob("*"): + for path in pathlib.Path(user_target_dir).rglob("*"): if len(ignore_file_parsers) > 0 and any(ignore_file_parser(path) for ignore_file_parser in ignore_file_parsers): logger.debug(f"Ignoring file {path.absolute()}.") continue @@ -42,5 +59,5 @@ def scan_directory( import sys # Ex: python3 agentc_core/catalog/directory.py "*.py" "*.md" - for x in scan_directory("", sys.argv[1:]): + for x in scan_directory("", "", sys.argv[1:]): print(x) diff --git a/libs/agentc_core/agentc_core/catalog/index.py b/libs/agentc_core/agentc_core/catalog/index.py index b160a882..a1d0e293 100644 --- a/libs/agentc_core/agentc_core/catalog/index.py +++ b/libs/agentc_core/agentc_core/catalog/index.py @@ -1,6 +1,7 @@ import dataclasses import fnmatch import logging +import os import tqdm import typing @@ -106,6 +107,7 @@ def index_catalog_start( logger.debug(f"Now crawling source directories. [{','.join(d for d in source_dirs)}]") printer(f"Crawling {','.join(d for d in source_dirs)}:") + source_files = list() if kind == "tool": source_globs = [i.glob_pattern for i in AllIndexers if all(k.is_tool() for k in i.kind)] @@ -114,7 +116,8 @@ def index_catalog_start( else: raise ValueError(f"Unknown kind: {kind}") for source_dir in source_dirs: - source_files += scan_directory(source_dir, source_globs, opts=scan_directory_opts) + source_files += scan_directory(os.getcwd(), source_dir, source_globs, opts=scan_directory_opts) + all_errs = [] all_descriptors = [] source_iterable = tqdm.tqdm(source_files) if print_progress else source_files diff --git a/libs/agentc_core/tests/catalog/resources/scan_files/.agentcignore b/libs/agentc_core/tests/catalog/resources/scan_files/.agentcignore new file mode 100644 index 00000000..56fc557e --- /dev/null +++ b/libs/agentc_core/tests/catalog/resources/scan_files/.agentcignore @@ -0,0 +1 @@ +prompts/prompt2.prompt \ No newline at end of file diff --git a/libs/agentc_core/tests/catalog/resources/scan_files/prompts/.agentcignore b/libs/agentc_core/tests/catalog/resources/scan_files/prompts/.agentcignore new file mode 100644 index 00000000..e69de29b diff --git a/libs/agentc_core/tests/catalog/resources/scan_files/prompts/prompt1.jinja b/libs/agentc_core/tests/catalog/resources/scan_files/prompts/prompt1.jinja new file mode 100644 index 00000000..e69de29b diff --git a/libs/agentc_core/tests/catalog/resources/scan_files/prompts/prompt2.prompt b/libs/agentc_core/tests/catalog/resources/scan_files/prompts/prompt2.prompt new file mode 100644 index 00000000..e69de29b diff --git a/libs/agentc_core/tests/catalog/resources/scan_files/tool1.py b/libs/agentc_core/tests/catalog/resources/scan_files/tool1.py new file mode 100644 index 00000000..e69de29b diff --git a/libs/agentc_core/tests/catalog/resources/scan_files/tools/.agentcignore b/libs/agentc_core/tests/catalog/resources/scan_files/tools/.agentcignore new file mode 100644 index 00000000..e1e7e743 --- /dev/null +++ b/libs/agentc_core/tests/catalog/resources/scan_files/tools/.agentcignore @@ -0,0 +1,2 @@ +prompt1.jinja +tool2.yaml \ No newline at end of file diff --git a/libs/agentc_core/tests/catalog/resources/scan_files/tools/tool2.yaml b/libs/agentc_core/tests/catalog/resources/scan_files/tools/tool2.yaml new file mode 100644 index 00000000..e69de29b diff --git a/libs/agentc_core/tests/catalog/resources/scan_files/tools/tool3.sqlpp b/libs/agentc_core/tests/catalog/resources/scan_files/tools/tool3.sqlpp new file mode 100644 index 00000000..e69de29b diff --git a/libs/agentc_core/tests/catalog/resources/scan_files/tools/tool4.py b/libs/agentc_core/tests/catalog/resources/scan_files/tools/tool4.py new file mode 100644 index 00000000..e69de29b diff --git a/libs/agentc_core/tests/catalog/test_scan_dir.py b/libs/agentc_core/tests/catalog/test_scan_dir.py new file mode 100644 index 00000000..e4e9f59e --- /dev/null +++ b/libs/agentc_core/tests/catalog/test_scan_dir.py @@ -0,0 +1,35 @@ +import os +import pathlib +import pytest + +from agentc_core.catalog.directory import scan_directory +from agentc_core.defaults import DEFAULT_SCAN_DIRECTORY_OPTS +from agentc_core.indexer.indexer import AllIndexers + + +@pytest.mark.smoke +def test_scan_dir_tools(): + root_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), "resources", "scan_files") + source_globs = [i.glob_pattern for i in AllIndexers if all(k.is_tool() for k in i.kind)] + output = [] + output += scan_directory(root_dir, "tools", source_globs, opts=DEFAULT_SCAN_DIRECTORY_OPTS) + + assert ( + pathlib.PosixPath(os.path.join(root_dir, "tools", "tool3.sqlpp")) in output + and pathlib.PosixPath(os.path.join(root_dir, "tools", "tool4.py")) in output + and pathlib.PosixPath(os.path.join(root_dir, "tools", "tool2.sqlpp")) not in output + and pathlib.PosixPath(os.path.join(root_dir, "tool1.py")) not in output + ) + + +@pytest.mark.smoke +def test_scan_dir_prompts(): + root_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), "resources", "scan_files") + source_globs = [i.glob_pattern for i in AllIndexers if all(k.is_prompt() for k in i.kind)] + output = [] + output += scan_directory(root_dir, "prompts", source_globs, opts=DEFAULT_SCAN_DIRECTORY_OPTS) + + assert ( + pathlib.PosixPath(os.path.join(root_dir, "prompts", "prompt1.jinja")) in output + and pathlib.PosixPath(os.path.join(root_dir, "prompts", "prompt2.prompt")) not in output + )