From c60c49b06fa990bc50fc8df6d3b7c99430806faa Mon Sep 17 00:00:00 2001 From: Mamerto Fabian Jr Date: Thu, 19 Dec 2024 15:41:10 +0800 Subject: [PATCH] Dev (#7) * Cross platform (#6) * Update README.md (#5) * Update README.md * WIP: Implement cross-platform search interface and refactor server functionality - Introduced a new search interface in `search_interface.py` that supports platform-specific search implementations for macOS, Linux, and Windows. - Refactored `server.py` to utilize the new search interface, allowing for a unified search experience across different operating systems. - Updated documentation to reflect changes in search capabilities and platform-specific features. * Add platform-specific search implementation and enhance server functionality - Introduced `platform_search.py` to define platform-specific search parameters for macOS, Linux, and Windows, utilizing Pydantic models for validation. - Updated `server.py` to integrate the new unified search query model, allowing for seamless handling of search commands across different operating systems. - Enhanced tool documentation to provide platform-specific search syntax and capabilities, improving user guidance. - Refactored search command building logic to accommodate platform-specific parameters, ensuring accurate command execution for each OS. * fix issues in Linux, update README * clean up README * version bump * Update version to 0.2.0 and add .aider* to .gitignore * Enhance Windows search syntax documentation and improve parameter handling in server.py - Updated the Windows search syntax guide to include detailed features, operators, functions, and examples for better user understanding. - Refactored parameter handling in the server to improve validation and error handling for 'base' and 'windows_params', ensuring they can be passed as either strings or dictionaries. - Enhanced overall code readability and maintainability by restructuring input parsing logic. * Update version to 0.2.1 and change logging level to WARNING in server.py - Bump version in pyproject.toml to 0.2.1 for the next release. - Adjust logging level in server.py from INFO to WARNING to reduce verbosity and focus on important messages. --- .gitignore | 3 + README.md | 187 +++++------ SEARCH_SYNTAX.md | 262 +++++++++++++++ pyproject.toml | 2 +- .../platform_search.py | 173 ++++++++++ .../search_interface.py | 227 +++++++++++++ src/mcp_server_everything_search/server.py | 310 ++++++++++-------- uv.lock | 2 +- 8 files changed, 925 insertions(+), 241 deletions(-) create mode 100644 SEARCH_SYNTAX.md create mode 100644 src/mcp_server_everything_search/platform_search.py create mode 100644 src/mcp_server_everything_search/search_interface.py diff --git a/.gitignore b/.gitignore index 9041878..bf33ddb 100644 --- a/.gitignore +++ b/.gitignore @@ -30,3 +30,6 @@ env/ htmlcov/ .tox/ .mypy_cache/ + +.history +.aider* diff --git a/README.md b/README.md index 6b75fe2..023354a 100644 --- a/README.md +++ b/README.md @@ -1,21 +1,27 @@ # Everything Search MCP Server -An MCP server that provides fast file searching capabilities using the [Everything](https://www.voidtools.com/) SDK. +An MCP server that provides fast file searching capabilities across Windows, macOS, and Linux. On Windows, it uses the [Everything](https://www.voidtools.com/) SDK. On macOS, it uses the built-in `mdfind` command. On Linux, it uses the `locate`/`plocate` command. ## Tools ### search -Search for files and folders using Everything SDK. +Search for files and folders across your system. The search capabilities and syntax support vary by platform: + +- Windows: Full Everything SDK features (see syntax guide below) +- macOS: Basic filename and content search using Spotlight database +- Linux: Basic filename search using locate database Parameters: -- `query` (required): Search query string. Supports wildcards (* and ?) and more. See the search syntax guide for more details. + +- `query` (required): Search query string. See platform-specific notes below. - `max_results` (optional): Maximum number of results to return (default: 100, max: 1000) - `match_path` (optional): Match against full path instead of filename only (default: false) - `match_case` (optional): Enable case-sensitive search (default: false) - `match_whole_word` (optional): Match whole words only (default: false) - `match_regex` (optional): Enable regex search (default: false) - `sort_by` (optional): Sort order for results (default: 1). Available options: + ``` - 1: Sort by filename (A to Z) - 2: Sort by filename (Z to A) @@ -32,6 +38,7 @@ Parameters: ``` Examples: + ```json { "query": "*.py", @@ -48,119 +55,45 @@ Examples: ``` Response includes: + - File/folder path - File size in bytes - Last modified date ### Search Syntax Guide -
-Advanced Search Queries - -### Basic Operators -- `space`: AND operator -- `|`: OR operator -- `!`: NOT operator -- `< >`: Grouping -- `" "`: Search for an exact phrase - -### Wildcards -- `*`: Matches zero or more characters -- `?`: Matches exactly one character - -Note: Wildcards match the whole filename by default. Disable Match whole filename to match wildcards anywhere. - -### Functions - -#### Size and Count -- `size:[kb|mb|gb]`: Search by file size -- `count:`: Limit number of results -- `childcount:`: Folders with specific number of children -- `childfilecount:`: Folders with specific number of files -- `childfoldercount:`: Folders with specific number of subfolders -- `len:`: Match filename length - -#### Dates -- `datemodified:, dm:`: Modified date -- `dateaccessed:, da:`: Access date -- `datecreated:, dc:`: Creation date -- `daterun:, dr:`: Last run date -- `recentchange:, rc:`: Recently changed date - -Date formats: YYYY[-MM[-DD[Thh[:mm[:ss[.sss]]]]]] or today, yesterday, lastweek, etc. - -#### File Attributes and Types -- `attrib:, attributes:`: Search by file attributes (A:Archive, H:Hidden, S:System, etc.) -- `type:`: Search by file type -- `ext:`: Search by semicolon-separated extensions - -#### Path and Name -- `path:`: Search in specific path -- `parent:, infolder:, nosubfolders:`: Search in path excluding subfolders -- `startwith:`: Files starting with text -- `endwith:`: Files ending with text -- `child:`: Folders containing specific child -- `depth:, parents:`: Files at specific folder depth -- `root`: Files with no parent folder -- `shell:`: Search in known shell folders - -#### Duplicates and Lists -- `dupe, namepartdupe, attribdupe, dadupe, dcdupe, dmdupe, sizedupe`: Find duplicates -- `filelist:`: Search pipe-separated (|) file list -- `filelistfilename:`: Search files from list file -- `frn:`: Search by File Reference Numbers -- `fsi:`: Search by file system index -- `empty`: Find empty folders - -### Function Syntax - -- `function:value`: Equal to value -- `function:<=value`: Less than or equal -- `function:value`: Greater than -- `function:>=value`: Greater than or equal -- `function:start..end`: Range of values -- `function:start-end`: Range of values - -### Modifiers - -- `case:, nocase:: Enable/disable case sensitivity -- `file:, folder:: Match only files or folders -- `path:, nopath:: Match full path or filename only -- `regex:, noregex:: Enable/disable regex -- `wfn:, nowfn:: Match whole filename or anywhere -- `wholeword:, ww:: Match whole words only -- `wildcards:, nowildcards:: Enable/disable wildcards - -### Examples - -1. Find Python files modified today: - `ext:py datemodified:today` - -2. Find large video files: - `ext:mp4|mkv|avi size:>1gb` - -3. Find files in specific folder: - `path:C:\Projects *.js` -
+For detailed information about the search syntax supported on each platform (Windows, macOS, and Linux), please see [SEARCH_SYNTAX.md](SEARCH_SYNTAX.md). ## Prerequisites -1. Windows operating system (required - this server only works on Windows) -2. [Everything](https://www.voidtools.com/) search utility: +### Windows + +1. [Everything](https://www.voidtools.com/) search utility: - Download and install from https://www.voidtools.com/ - **Make sure the Everything service is running** -3. Everything SDK: +2. Everything SDK: - Download from https://www.voidtools.com/support/everything/sdk/ - Extract the SDK files to a location on your system +### Linux + +1. Install and initialize the `locate` or `plocate` command: + - Ubuntu/Debian: `sudo apt-get install plocate` or `sudo apt-get install mlocate` + - Fedora: `sudo dnf install mlocate` +2. After installation, update the database: + - For plocate: `sudo updatedb` + - For mlocate: `sudo /etc/cron.daily/mlocate` + +### macOS + +No additional setup required. The server uses the built-in `mdfind` command. + ## Installation ### Using uv (recommended) When using [`uv`](https://docs.astral.sh/uv/) no specific installation is needed. We will -use [`uvx`](https://docs.astral.sh/uv/guides/tools/) to directly run *mcp-server-everything-search*. +use [`uvx`](https://docs.astral.sh/uv/guides/tools/) to directly run _mcp-server-everything-search_. ### Using PIP @@ -178,19 +111,26 @@ python -m mcp_server_everything_search ## Configuration +### Windows + The server requires the Everything SDK DLL to be available: Environment variable: - ``` - EVERYTHING_SDK_PATH=path\to\Everything-SDK\dll\Everything64.dll - ``` + +``` +EVERYTHING_SDK_PATH=path\to\Everything-SDK\dll\Everything64.dll +``` + +### Linux and macOS + +No additional configuration required. ### Usage with Claude Desktop -Add this to your `claude_desktop_config.json`: +Add one of these configurations to your `claude_desktop_config.json` based on your platform:
-Using uvx +Windows (using uvx) ```json "mcpServers": { @@ -203,10 +143,11 @@ Add this to your `claude_desktop_config.json`: } } ``` +
-Using pip installation +Windows (using pip installation) ```json "mcpServers": { @@ -219,6 +160,32 @@ Add this to your `claude_desktop_config.json`: } } ``` + +
+ +
+Linux and macOS + +```json +"mcpServers": { + "everything-search": { + "command": "uvx", + "args": ["mcp-server-everything-search"] + } +} +``` + +Or if using pip installation: + +```json +"mcpServers": { + "everything-search": { + "command": "python", + "args": ["-m", "mcp_server_everything_search"] + } +} +``` +
## Debugging @@ -237,7 +204,19 @@ cd mcp-everything-search/src/mcp_server_everything_search npx @modelcontextprotocol/inspector uv run mcp-server-everything-search ``` -Using PowerShell, running `Get-Content -Path "$env:APPDATA\Claude\logs\mcp*.log" -Tail 20 -Wait` will show the logs from the server and may help you debug any issues. +To view server logs: + +Linux/macOS: + +```bash +tail -f ~/.config/Claude/logs/mcp*.log +``` + +Windows (PowerShell): + +```powershell +Get-Content -Path "$env:APPDATA\Claude\logs\mcp*.log" -Tail 20 -Wait +``` ## Development diff --git a/SEARCH_SYNTAX.md b/SEARCH_SYNTAX.md new file mode 100644 index 0000000..4161808 --- /dev/null +++ b/SEARCH_SYNTAX.md @@ -0,0 +1,262 @@ +# Search Syntax Guide + +## Windows Search (Everything SDK) + +The following advanced search features are only available on Windows when using the Everything SDK: + +### Basic Operators + +- `space`: AND operator +- `|`: OR operator +- `!`: NOT operator +- `< >`: Grouping +- `" "`: Search for an exact phrase + +### Wildcards + +- `*`: Matches zero or more characters +- `?`: Matches exactly one character + +Note: Wildcards match the whole filename by default. Disable Match whole filename to match wildcards anywhere. + +### Functions + +#### Size and Count + +- `size:[kb|mb|gb]`: Search by file size +- `count:`: Limit number of results +- `childcount:`: Folders with specific number of children +- `childfilecount:`: Folders with specific number of files +- `childfoldercount:`: Folders with specific number of subfolders +- `len:`: Match filename length + +#### Dates + +- `datemodified:, dm:`: Modified date +- `dateaccessed:, da:`: Access date +- `datecreated:, dc:`: Creation date +- `daterun:, dr:`: Last run date +- `recentchange:, rc:`: Recently changed date + +Date formats: YYYY[-MM[-DD[Thh[:mm[:ss[.sss]]]]]] or today, yesterday, lastweek, etc. + +#### File Attributes and Types + +- `attrib:, attributes:`: Search by file attributes (A:Archive, H:Hidden, S:System, etc.) +- `type:`: Search by file type +- `ext:`: Search by semicolon-separated extensions + +#### Path and Name + +- `path:`: Search in specific path +- `parent:, infolder:, nosubfolders:`: Search in path excluding subfolders +- `startwith:`: Files starting with text +- `endwith:`: Files ending with text +- `child:`: Folders containing specific child +- `depth:, parents:`: Files at specific folder depth +- `root`: Files with no parent folder +- `shell:`: Search in known shell folders + +#### Duplicates and Lists + +- `dupe, namepartdupe, attribdupe, dadupe, dcdupe, dmdupe, sizedupe`: Find duplicates +- `filelist:`: Search pipe-separated (|) file list +- `filelistfilename:`: Search files from list file +- `frn:`: Search by File Reference Numbers +- `fsi:`: Search by file system index +- `empty`: Find empty folders + +### Function Syntax + +- `function:value`: Equal to value +- `function:<=value`: Less than or equal +- `function:value`: Greater than +- `function:>=value`: Greater than or equal +- `function:start..end`: Range of values +- `function:start-end`: Range of values + +### Modifiers + +- `case:, nocase:`: Enable/disable case sensitivity +- `file:, folder:`: Match only files or folders +- `path:, nopath:`: Match full path or filename only +- `regex:, noregex:`: Enable/disable regex +- `wfn:, nowfn:`: Match whole filename or anywhere +- `wholeword:, ww:`: Match whole words only +- `wildcards:, nowildcards:`: Enable/disable wildcards + +### Examples + +1. Find Python files modified today: + `ext:py datemodified:today` + +2. Find large video files: + `ext:mp4|mkv|avi size:>1gb` + +3. Find files in specific folder: + `path:C:\Projects *.js` + +## macOS Search (mdfind) + +macOS uses Spotlight's metadata search capabilities through the `mdfind` command. The following features are supported: + +### Command Options + +- `-live`: Provides live updates to search results as files change +- `-count`: Show only the number of matches +- `-onlyin directory`: Limit search to specific directory +- `-literal`: Treat query as literal text without interpretation +- `-interpret`: Interpret query as if typed in Spotlight menu + +### Basic Search + +- Simple text search looks for matches in any metadata attribute +- Wildcards (`*`) are supported in search strings +- Multiple words are treated as AND conditions +- Whitespace is significant in queries +- Use parentheses () to group expressions + +### Search Operators + +- `|` (OR): Match either word, e.g., `"image|photo"` +- `-` (NOT): Exclude matches, e.g., `-screenshot` +- `=`, `==` (equal) +- `!=` (not equal) +- `<`, `>` (less/greater than) +- `<=`, `>=` (less/greater than or equal) + +### Value Comparison Modifiers + +Use brackets with these modifiers: + +- `[c]`: Case-insensitive comparison +- `[d]`: Diacritical marks insensitive +- Can be combined, e.g., `[cd]` for both + +### Content Types (kind:) + +- `application`, `app`: Applications +- `audio`, `music`: Audio/Music files +- `bookmark`: Bookmarks +- `contact`: Contacts +- `email`, `mail message`: Email messages +- `event`: Calendar events +- `folder`: Folders +- `font`: Fonts +- `image`: Images +- `movie`: Movies +- `pdf`: PDF documents +- `preferences`: System preferences +- `presentation`: Presentations +- `todo`: Calendar to-dos + +### Date Filters (date:) + +Time-based search using these keywords: + +- `today`, `yesterday`, `tomorrow` +- `this week`, `next week` +- `this month`, `next month` +- `this year`, `next year` + +Or use time functions: + +- `$time.today()` +- `$time.yesterday()` +- `$time.this_week()` +- `$time.this_month()` +- `$time.this_year()` +- `$time.tomorrow()` +- `$time.next_week()` +- `$time.next_month()` +- `$time.next_year()` + +### Common Metadata Attributes + +Search specific metadata using these attributes: + +- `kMDItemAuthors`: Document authors +- `kMDItemContentType`: File type +- `kMDItemContentTypeTree`: File type hierarchy +- `kMDItemCreator`: Creating application +- `kMDItemDescription`: File description +- `kMDItemDisplayName`: Display name +- `kMDItemFSContentChangeDate`: File modification date +- `kMDItemFSCreationDate`: File creation date +- `kMDItemFSName`: Filename +- `kMDItemKeywords`: Keywords/tags +- `kMDItemLastUsedDate`: Last used date +- `kMDItemNumberOfPages`: Page count +- `kMDItemTitle`: Document title +- `kMDItemUserTags`: User-assigned tags + +### Examples + +1. Find images modified yesterday: + `kind:image date:yesterday` + +2. Find documents by author (case-insensitive): + `kMDItemAuthors ==[c] "John Doe"` + +3. Find files in specific directory: + `mdfind -onlyin ~/Documents "query"` + +4. Find files by tag: + `kMDItemUserTags = "Important"` + +5. Find files created by application: + `kMDItemCreator = "Pixelmator*"` + +6. Find PDFs with specific text: + `kind:pdf "search term"` + +7. Find recent presentations: + `kind:presentation date:this week` + +8. Count matching files: + `mdfind -count "kind:image date:today"` + +9. Monitor for new matches: + `mdfind -live "kind:pdf"` + +10. Complex metadata search: + `kMDItemContentTypeTree = "public.image" && kMDItemUserTags = "vacation" && kMDItemFSContentChangeDate >= $time.this_month()` + +Note: Use `mdls filename` to see all available metadata attributes for a specific file. + +## Linux Search (locate/plocate) + +Linux uses the locate/plocate command for fast filename searching. The following features are supported: + +### Basic Search + +- Simple text search matches against filenames +- Multiple words are treated as AND conditions +- Wildcards (`*` and `?`) are supported +- Case-insensitive by default + +### Search Options + +- `-i`: Case-insensitive search (default) +- `-c`: Count matches instead of showing them +- `-r` or `--regex`: Use regular expressions +- `-b`: Match only the basename +- `-w`: Match whole words only + +### Examples + +1. Find all Python files: + `*.py` + +2. Find files in home directory: + `/home/username/*` + +3. Case-sensitive search for specific file: + `--regex "^/etc/[A-Z].*\.conf$"` + +4. Count matching files: + Use with `-c` parameter + +Note: The locate database must be up to date for accurate results. Run `sudo updatedb` to update the database manually. diff --git a/pyproject.toml b/pyproject.toml index 6dcdea8..cbdd452 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "mcp-server-everything-search" -version = "0.1.2" +version = "0.2.1" description = "A Model Context Protocol server providing fast file searching using Everything SDK" readme = "README.md" requires-python = ">=3.10" diff --git a/src/mcp_server_everything_search/platform_search.py b/src/mcp_server_everything_search/platform_search.py new file mode 100644 index 0000000..dab59a0 --- /dev/null +++ b/src/mcp_server_everything_search/platform_search.py @@ -0,0 +1,173 @@ +"""Platform-specific search implementations with dedicated parameter models.""" + +from typing import Optional, List, Dict, Any +from pydantic import BaseModel, Field +from enum import Enum +import platform + +class BaseSearchQuery(BaseModel): + """Base search parameters common to all platforms.""" + query: str = Field( + description="Search query string. See platform-specific documentation for syntax details." + ) + max_results: int = Field( + default=100, + ge=1, + le=1000, + description="Maximum number of results to return (1-1000)" + ) + +class MacSpecificParams(BaseModel): + """macOS-specific search parameters for mdfind.""" + live_updates: bool = Field( + default=False, + description="Provide live updates to search results" + ) + search_directory: Optional[str] = Field( + default=None, + description="Limit search to specific directory (-onlyin parameter)" + ) + literal_query: bool = Field( + default=False, + description="Treat query as literal string without interpretation" + ) + interpret_query: bool = Field( + default=False, + description="Interpret query as if typed in Spotlight menu" + ) + +class LinuxSpecificParams(BaseModel): + """Linux-specific search parameters for locate.""" + ignore_case: bool = Field( + default=True, + description="Ignore case distinctions (-i parameter)" + ) + regex_search: bool = Field( + default=False, + description="Use regular expressions in patterns (-r parameter)" + ) + existing_files: bool = Field( + default=True, + description="Only output existing files (-e parameter)" + ) + count_only: bool = Field( + default=False, + description="Only display count of matches (-c parameter)" + ) + +class WindowsSortOption(int, Enum): + """Sort options for Windows Everything search.""" + NAME_ASC = 1 + NAME_DESC = 2 + PATH_ASC = 3 + PATH_DESC = 4 + SIZE_ASC = 5 + SIZE_DESC = 6 + EXT_ASC = 7 + EXT_DESC = 8 + CREATED_ASC = 11 + CREATED_DESC = 12 + MODIFIED_ASC = 13 + MODIFIED_DESC = 14 + +class WindowsSpecificParams(BaseModel): + """Windows-specific search parameters for Everything SDK.""" + match_path: bool = Field( + default=False, + description="Match against full path instead of filename only" + ) + match_case: bool = Field( + default=False, + description="Enable case-sensitive search" + ) + match_whole_word: bool = Field( + default=False, + description="Match whole words only" + ) + match_regex: bool = Field( + default=False, + description="Enable regex search" + ) + sort_by: WindowsSortOption = Field( + default=WindowsSortOption.NAME_ASC, + description="Sort order for results" + ) + +class UnifiedSearchQuery(BaseSearchQuery): + """Combined search parameters model.""" + mac_params: Optional[MacSpecificParams] = None + linux_params: Optional[LinuxSpecificParams] = None + windows_params: Optional[WindowsSpecificParams] = None + + @classmethod + def get_schema_for_platform(cls) -> Dict[str, Any]: + """Get the appropriate schema based on the current platform.""" + system = platform.system().lower() + + schema = { + "type": "object", + "properties": { + "base": BaseSearchQuery.model_json_schema() + }, + "required": ["base"] + } + + # Add platform-specific parameters + if system == "darwin": + schema["properties"]["mac_params"] = MacSpecificParams.model_json_schema() + elif system == "linux": + schema["properties"]["linux_params"] = LinuxSpecificParams.model_json_schema() + elif system == "windows": + schema["properties"]["windows_params"] = WindowsSpecificParams.model_json_schema() + + return schema + + def get_platform_params(self) -> Optional[BaseModel]: + """Get the parameters specific to the current platform.""" + system = platform.system().lower() + if system == "darwin": + return self.mac_params + elif system == "linux": + return self.linux_params + elif system == "windows": + return self.windows_params + return None + +def build_search_command(query: UnifiedSearchQuery) -> List[str]: + """Build the appropriate search command based on platform and parameters.""" + system = platform.system().lower() + platform_params = query.get_platform_params() + + if system == "darwin": + cmd = ["mdfind"] + if platform_params: + if platform_params.live_updates: + cmd.append("-live") + if platform_params.search_directory: + cmd.extend(["-onlyin", platform_params.search_directory]) + if platform_params.literal_query: + cmd.append("-literal") + if platform_params.interpret_query: + cmd.append("-interpret") + cmd.append(query.query) # Use query directly from UnifiedSearchQuery + return cmd + + elif system == "linux": + cmd = ["locate"] + if platform_params: + if platform_params.ignore_case: + cmd.append("-i") + if platform_params.regex_search: + cmd.append("-r") + if platform_params.existing_files: + cmd.append("-e") + if platform_params.count_only: + cmd.append("-c") + cmd.append(query.query) # Use query directly from UnifiedSearchQuery + return cmd + + elif system == "windows": + # For Windows, return None as we'll use the Everything SDK directly + return [] + + raise NotImplementedError(f"Unsupported platform: {system}") diff --git a/src/mcp_server_everything_search/search_interface.py b/src/mcp_server_everything_search/search_interface.py new file mode 100644 index 0000000..398a26c --- /dev/null +++ b/src/mcp_server_everything_search/search_interface.py @@ -0,0 +1,227 @@ +"""Platform-agnostic search interface for MCP.""" + +import abc +import platform +import subprocess +import os +from datetime import datetime +from typing import Optional, List +from dataclasses import dataclass +from pathlib import Path + +@dataclass +class SearchResult: + """Universal search result structure.""" + path: str + filename: str + extension: Optional[str] = None + size: Optional[int] = None + created: Optional[datetime] = None + modified: Optional[datetime] = None + accessed: Optional[datetime] = None + attributes: Optional[str] = None + +class SearchProvider(abc.ABC): + """Abstract base class for platform-specific search implementations.""" + + @abc.abstractmethod + def search_files( + self, + query: str, + max_results: int = 100, + match_path: bool = False, + match_case: bool = False, + match_whole_word: bool = False, + match_regex: bool = False, + sort_by: Optional[int] = None + ) -> List[SearchResult]: + """Execute a file search using platform-specific methods.""" + pass + + @classmethod + def get_provider(cls) -> 'SearchProvider': + """Factory method to get the appropriate search provider for the current platform.""" + system = platform.system().lower() + if system == 'darwin': + return MacSearchProvider() + elif system == 'linux': + return LinuxSearchProvider() + elif system == 'windows': + return WindowsSearchProvider() + else: + raise NotImplementedError(f"No search provider available for {system}") + + def _convert_path_to_result(self, path: str) -> SearchResult: + """Convert a path to a SearchResult with file information.""" + try: + path_obj = Path(path) + stat = path_obj.stat() + return SearchResult( + path=str(path_obj), + filename=path_obj.name, + extension=path_obj.suffix[1:] if path_obj.suffix else None, + size=stat.st_size, + created=datetime.fromtimestamp(stat.st_ctime), + modified=datetime.fromtimestamp(stat.st_mtime), + accessed=datetime.fromtimestamp(stat.st_atime) + ) + except (OSError, ValueError) as e: + # If we can't access the file, return basic info + return SearchResult( + path=str(path), + filename=os.path.basename(path) + ) + +class MacSearchProvider(SearchProvider): + """macOS search implementation using mdfind.""" + + def search_files( + self, + query: str, + max_results: int = 100, + match_path: bool = False, + match_case: bool = False, + match_whole_word: bool = False, + match_regex: bool = False, + sort_by: Optional[int] = None + ) -> List[SearchResult]: + try: + # Build mdfind command + cmd = ['mdfind'] + if match_path: + # When matching path, don't use -name + cmd.append(query) + else: + cmd.extend(['-name', query]) + + # Execute search + result = subprocess.run(cmd, capture_output=True, text=True) + if result.returncode != 0: + raise RuntimeError(f"mdfind failed: {result.stderr}") + + # Process results + paths = result.stdout.splitlines()[:max_results] + return [self._convert_path_to_result(path) for path in paths] + + except subprocess.CalledProcessError as e: + raise RuntimeError(f"Search failed: {e}") + +class LinuxSearchProvider(SearchProvider): + """Linux search implementation using locate/plocate.""" + + def __init__(self): + """Check if locate/plocate is installed and the database is ready.""" + self.locate_cmd = None + self.locate_type = None + + # Check for plocate first (newer version) + plocate_check = subprocess.run(['which', 'plocate'], capture_output=True) + if plocate_check.returncode == 0: + self.locate_cmd = 'plocate' + self.locate_type = 'plocate' + else: + # Check for mlocate + mlocate_check = subprocess.run(['which', 'locate'], capture_output=True) + if mlocate_check.returncode == 0: + self.locate_cmd = 'locate' + self.locate_type = 'mlocate' + else: + raise RuntimeError( + "Neither 'locate' nor 'plocate' is installed. Please install one:\n" + "Ubuntu/Debian: sudo apt-get install plocate\n" + " or\n" + " sudo apt-get install mlocate\n" + "Fedora: sudo dnf install mlocate\n" + "After installation, the database will be updated automatically, or run:\n" + "For plocate: sudo updatedb\n" + "For mlocate: sudo /etc/cron.daily/mlocate" + ) + + def _update_database(self): + """Update the locate database.""" + if self.locate_type == 'plocate': + subprocess.run(['sudo', 'updatedb'], check=True) + else: # mlocate + subprocess.run(['sudo', '/etc/cron.daily/mlocate'], check=True) + + def search_files( + self, + query: str, + max_results: int = 100, + match_path: bool = False, + match_case: bool = False, + match_whole_word: bool = False, + match_regex: bool = False, + sort_by: Optional[int] = None + ) -> List[SearchResult]: + try: + # Build locate command + cmd = [self.locate_cmd] + if not match_case: + cmd.append('-i') + if match_regex: + cmd.append('--regex' if self.locate_type == 'mlocate' else '-r') + cmd.append(query) + + # Execute search + result = subprocess.run(cmd, capture_output=True, text=True) + if result.returncode != 0: + error_msg = result.stderr.lower() + if "no such file or directory" in error_msg or "database" in error_msg: + raise RuntimeError( + f"The {self.locate_type} database needs to be created. " + f"Please run: sudo updatedb" + ) + raise RuntimeError(f"{self.locate_cmd} failed: {result.stderr}") + + # Process results + paths = result.stdout.splitlines()[:max_results] + return [self._convert_path_to_result(path) for path in paths] + + except FileNotFoundError: + raise RuntimeError( + f"The {self.locate_cmd} command disappeared. Please reinstall:\n" + "Ubuntu/Debian: sudo apt-get install plocate\n" + " or\n" + " sudo apt-get install mlocate\n" + "Fedora: sudo dnf install mlocate" + ) + except subprocess.CalledProcessError as e: + raise RuntimeError(f"Search failed: {e}") + + +class WindowsSearchProvider(SearchProvider): + """Windows search implementation using Everything SDK.""" + + def __init__(self): + """Initialize Everything SDK.""" + import os + from .everything_sdk import EverythingSDK + dll_path = os.getenv('EVERYTHING_SDK_PATH', 'D:\\dev\\tools\\Everything-SDK\\dll\\Everything64.dll') + self.everything_sdk = EverythingSDK(dll_path) + + def search_files( + self, + query: str, + max_results: int = 100, + match_path: bool = False, + match_case: bool = False, + match_whole_word: bool = False, + match_regex: bool = False, + sort_by: Optional[int] = None + ) -> List[SearchResult]: + # Replace double backslashes with single backslashes + query = query.replace("\\\\", "\\") + # If the query.query contains forward slashes, replace them with backslashes + query = query.replace("/", "\\") + + return self.everything_sdk.search_files( + query=query, + max_results=max_results, + match_path=match_path, + match_case=match_case, + match_whole_word=match_whole_word, + match_regex=match_regex, + sort_by=sort_by + ) + \ No newline at end of file diff --git a/src/mcp_server_everything_search/server.py b/src/mcp_server_everything_search/server.py index 73b0233..408bcfd 100644 --- a/src/mcp_server_everything_search/server.py +++ b/src/mcp_server_everything_search/server.py @@ -1,66 +1,21 @@ -"""MCP server implementation for Everything Search.""" +"""MCP server implementation for cross-platform file search.""" -import ctypes -import os +import json +import platform import sys -from enum import IntEnum -from typing import Literal - +from typing import List from mcp.server import Server from mcp.server.stdio import stdio_server from mcp.types import TextContent, Tool, Resource, ResourceTemplate, Prompt from pydantic import BaseModel, Field -from .everything_sdk import ( - EverythingSDK, - EVERYTHING_SORT_NAME_ASCENDING, - EVERYTHING_SORT_NAME_DESCENDING, - EVERYTHING_SORT_PATH_ASCENDING, - EVERYTHING_SORT_PATH_DESCENDING, - EVERYTHING_SORT_SIZE_ASCENDING, - EVERYTHING_SORT_SIZE_DESCENDING, - EVERYTHING_SORT_EXTENSION_ASCENDING, - EVERYTHING_SORT_EXTENSION_DESCENDING, - EVERYTHING_SORT_DATE_CREATED_ASCENDING, - EVERYTHING_SORT_DATE_CREATED_DESCENDING, - EVERYTHING_SORT_DATE_MODIFIED_ASCENDING, - EVERYTHING_SORT_DATE_MODIFIED_DESCENDING, -) - -class SortOption(IntEnum): - """Sort options for search results. - - Available options: - - NAME_ASC (1): Sort by filename in ascending order - - NAME_DESC (2): Sort by filename in descending order - - PATH_ASC (3): Sort by full path in ascending order - - PATH_DESC (4): Sort by full path in descending order - - SIZE_ASC (5): Sort by file size in ascending order (smallest first) - - SIZE_DESC (6): Sort by file size in descending order (largest first) - - EXT_ASC (7): Sort by file extension in ascending order - - EXT_DESC (8): Sort by file extension in descending order - - CREATED_ASC (11): Sort by creation date in ascending order (oldest first) - - CREATED_DESC (12): Sort by creation date in descending order (newest first) - - MODIFIED_ASC (13): Sort by modification date in ascending order (oldest first) - - MODIFIED_DESC (14): Sort by modification date in descending order (newest first) - """ - NAME_ASC = EVERYTHING_SORT_NAME_ASCENDING - NAME_DESC = EVERYTHING_SORT_NAME_DESCENDING - PATH_ASC = EVERYTHING_SORT_PATH_ASCENDING - PATH_DESC = EVERYTHING_SORT_PATH_DESCENDING - SIZE_ASC = EVERYTHING_SORT_SIZE_ASCENDING - SIZE_DESC = EVERYTHING_SORT_SIZE_DESCENDING - EXT_ASC = EVERYTHING_SORT_EXTENSION_ASCENDING - EXT_DESC = EVERYTHING_SORT_EXTENSION_DESCENDING - CREATED_ASC = EVERYTHING_SORT_DATE_CREATED_ASCENDING - CREATED_DESC = EVERYTHING_SORT_DATE_CREATED_DESCENDING - MODIFIED_ASC = EVERYTHING_SORT_DATE_MODIFIED_ASCENDING - MODIFIED_DESC = EVERYTHING_SORT_DATE_MODIFIED_DESCENDING +from .platform_search import UnifiedSearchQuery, WindowsSpecificParams, build_search_command +from .search_interface import SearchProvider class SearchQuery(BaseModel): """Model for search query parameters.""" query: str = Field( - description="Search query string. Supports wildcards (* and ?) and more. See the search syntax guide for more details." + description="Search query string. See the search syntax guide for details." ) max_results: int = Field( default=100, @@ -84,41 +39,83 @@ class SearchQuery(BaseModel): default=False, description="Enable regex search" ) - sort_by: SortOption = Field( - default=SortOption.NAME_ASC, - description="""Sort order for results. Available options: - - 1 (NAME_ASC): Sort by filename (A to Z) - - 2 (NAME_DESC): Sort by filename (Z to A) - - 3 (PATH_ASC): Sort by path (A to Z) - - 4 (PATH_DESC): Sort by path (Z to A) - - 5 (SIZE_ASC): Sort by size (smallest first) - - 6 (SIZE_DESC): Sort by size (largest first) - - 7 (EXT_ASC): Sort by extension (A to Z) - - 8 (EXT_DESC): Sort by extension (Z to A) - - 11 (CREATED_ASC): Sort by creation date (oldest first) - - 12 (CREATED_DESC): Sort by creation date (newest first) - - 13 (MODIFIED_ASC): Sort by modification date (oldest first) - - 14 (MODIFIED_DESC): Sort by modification date (newest first)""" + sort_by: int = Field( + default=1, + description="Sort order for results (Note: Not all sort options available on all platforms)" ) - class Config: - """Pydantic model configuration.""" - use_enum_values = True # Use enum values in schema - async def serve() -> None: """Run the server.""" - # Load Everything SDK DLL - dll_path = os.getenv('EVERYTHING_SDK_PATH', 'D:\\dev\\tools\\Everything-SDK\\dll\\Everything64.dll') - everything_sdk = EverythingSDK(dll_path) + current_platform = platform.system().lower() + search_provider = SearchProvider.get_provider() + + server = Server("universal-search") + + @server.list_resources() + async def list_resources() -> list[Resource]: + """Return an empty list since this server doesn't provide any resources.""" + return [] - server = Server("everything-search") + @server.list_resource_templates() + async def list_resource_templates() -> list[ResourceTemplate]: + """Return an empty list since this server doesn't provide any resource templates.""" + return [] + + @server.list_prompts() + async def list_prompts() -> list[Prompt]: + """Return an empty list since this server doesn't provide any prompts.""" + return [] @server.list_tools() - async def list_tools() -> list[Tool]: - return [ - Tool( - name="search", - description="""Search for files and folders using Everything SDK. + async def list_tools() -> List[Tool]: + """Return the search tool with platform-specific documentation and schema.""" + platform_info = { + 'windows': "Using Everything SDK with full search capabilities", + 'darwin': "Using mdfind (Spotlight) with native macOS search capabilities", + 'linux': "Using locate with Unix-style search capabilities" + } + + syntax_docs = { + 'darwin': """macOS Spotlight (mdfind) Search Syntax: + +Basic Usage: +- Simple text search: Just type the words you're looking for +- Phrase search: Use quotes ("exact phrase") +- Filename search: -name "filename" +- Directory scope: -onlyin /path/to/dir + +Special Parameters: +- Live updates: -live +- Literal search: -literal +- Interpreted search: -interpret + +Metadata Attributes: +- kMDItemDisplayName +- kMDItemTextContent +- kMDItemKind +- kMDItemFSSize +- And many more OS X metadata attributes""", + + 'linux': """Linux Locate Search Syntax: + +Basic Usage: +- Simple pattern: locate filename +- Case-insensitive: -i pattern +- Regular expressions: -r pattern +- Existing files only: -e pattern +- Count matches: -c pattern + +Pattern Wildcards: +- * matches any characters +- ? matches single character +- [] matches character classes + +Examples: +- locate -i "*.pdf" +- locate -r "/home/.*\.txt$" +- locate -c "*.doc" +""", + 'windows': """Search for files and folders using Everything SDK. Features: - Fast file and folder search across all indexed drives @@ -127,21 +124,17 @@ async def list_tools() -> list[Tool]: - Case-sensitive and whole word matching - Regular expression support - Path matching - Search Syntax Guide: - 1. Basic Operators: - space: AND operator - |: OR operator - !: NOT operator - < >: Grouping - " ": Search for an exact phrase - 2. Wildcards: - *: Matches zero or more characters - ?: Matches exactly one character Note: Wildcards match the whole filename by default. Disable Match whole filename to match wildcards anywhere. - 3. Functions: Size and Count: - size:[kb|mb|gb]: Search by file size @@ -150,7 +143,6 @@ async def list_tools() -> list[Tool]: - childfilecount:: Folders with specific number of files - childfoldercount:: Folders with specific number of subfolders - len:: Match filename length - Dates: - datemodified:, dm:: Modified date - dateaccessed:, da:: Access date @@ -174,7 +166,6 @@ async def list_tools() -> list[Tool]: - depth:, parents:: Files at specific folder depth - root: Files with no parent folder - shell:: Search in known shell folders - Duplicates and Lists: - dupe, namepartdupe, attribdupe, dadupe, dcdupe, dmdupe, sizedupe: Find duplicates - filelist:: Search pipe-separated (|) file list @@ -182,7 +173,6 @@ async def list_tools() -> list[Tool]: - frn:: Search by File Reference Numbers - fsi:: Search by file system index - empty: Find empty folders - 4. Function Syntax: - function:value: Equal to value - function:<=value: Less than or equal @@ -192,7 +182,6 @@ async def list_tools() -> list[Tool]: - function:>=value: Greater than or equal - function:start..end: Range of values - function:start-end: Range of values - 5. Modifiers: - case:, nocase:: Enable/disable case sensitivity - file:, folder:: Match only files or folders @@ -201,61 +190,104 @@ async def list_tools() -> list[Tool]: - wfn:, nowfn:: Match whole filename or anywhere - wholeword:, ww:: Match whole words only - wildcards:, nowildcards:: Enable/disable wildcards - Examples: 1. Find Python files modified today: ext:py datemodified:today - 2. Find large video files: ext:mp4|mkv|avi size:>1gb - 3. Find files in specific folder: path:C:\Projects *.js -""", - inputSchema=SearchQuery.model_json_schema(), - ), - ] +""" + } - @server.list_resources() - async def list_resources() -> list[Resource]: - """Return an empty list since this server doesn't provide any resources.""" - return [] + description = f"""Universal file search tool for {platform.system()} - @server.list_resource_templates() - async def list_resource_templates() -> list[ResourceTemplate]: - """Return an empty list since this server doesn't provide any resource templates.""" - return [] +Current Implementation: +{platform_info.get(current_platform, "Unknown platform")} - @server.list_prompts() - async def list_prompts() -> list[Prompt]: - """Return an empty list since this server doesn't provide any prompts.""" - return [] +Search Syntax Guide: +{syntax_docs.get(current_platform, "Platform-specific syntax guide not available")} +""" + + return [ + Tool( + name="search", + description=description, + inputSchema=UnifiedSearchQuery.get_schema_for_platform() + ) + ] @server.call_tool() - async def call_tool(name: str, arguments: dict) -> list[TextContent]: + async def call_tool(name: str, arguments: dict) -> List[TextContent]: if name != "search": raise ValueError(f"Unknown tool: {name}") try: - query = SearchQuery(**arguments) - # Replace double backslashes with single backslashes - query.query = query.query.replace("\\\\", "\\") - # If the query.query contains forward slashes, replace them with backslashes - query.query = query.query.replace("/", "\\") - - # Add debug logging - print(f"Debug: Executing search with query: {query.query}", file=sys.stderr) - print(f"Debug: Sort by: {query.sort_by}", file=sys.stderr) + # Parse and validate inputs + base_params = {} + windows_params = {} - results = everything_sdk.search_files( - query=query.query, - max_results=query.max_results, - match_path=query.match_path, - match_case=query.match_case, - match_whole_word=query.match_whole_word, - match_regex=query.match_regex, - sort_by=query.sort_by - ) + # Handle base parameters + if 'base' in arguments: + if isinstance(arguments['base'], str): + try: + base_params = json.loads(arguments['base']) + except json.JSONDecodeError: + # If not valid JSON string, treat as simple query string + base_params = {'query': arguments['base']} + elif isinstance(arguments['base'], dict): + # If already a dict, use directly + base_params = arguments['base'] + else: + raise ValueError("'base' parameter must be a string or dictionary") + + # Handle Windows-specific parameters + if 'windows_params' in arguments: + if isinstance(arguments['windows_params'], str): + try: + windows_params = json.loads(arguments['windows_params']) + except json.JSONDecodeError: + raise ValueError("Invalid JSON in windows_params") + elif isinstance(arguments['windows_params'], dict): + # If already a dict, use directly + windows_params = arguments['windows_params'] + else: + raise ValueError("'windows_params' must be a string or dictionary") + + # Combine parameters + query_params = { + **base_params, + 'windows_params': windows_params + } + + # Create unified query + query = UnifiedSearchQuery(**query_params) + + if current_platform == "windows": + # Use Everything SDK directly + platform_params = query.windows_params or WindowsSpecificParams() + results = search_provider.search_files( + query=query.query, + max_results=query.max_results, + match_path=platform_params.match_path, + match_case=platform_params.match_case, + match_whole_word=platform_params.match_whole_word, + match_regex=platform_params.match_regex, + sort_by=platform_params.sort_by + ) + else: + # Use command-line tools (mdfind/locate) + platform_params = None + if current_platform == 'darwin': + platform_params = query.mac_params or {} + elif current_platform == 'linux': + platform_params = query.linux_params or {} + + results = search_provider.search_files( + query=query.query, + max_results=query.max_results, + **platform_params.dict() if platform_params else {} + ) return [TextContent( type="text", @@ -267,15 +299,10 @@ async def call_tool(name: str, arguments: dict) -> list[TextContent]: f"Created: {r.created if r.created else 'N/A'}\n" f"Modified: {r.modified if r.modified else 'N/A'}\n" f"Accessed: {r.accessed if r.accessed else 'N/A'}\n" - f"Run Count: {r.run_count if r.run_count else 'N/A'}\n" - f"Attributes: {r.attributes if r.attributes else 'N/A'}\n" for r in results ]) )] except Exception as e: - # Add more detailed error logging - import traceback - print(f"Debug: Error details:\n{traceback.format_exc()}", file=sys.stderr) return [TextContent( type="text", text=f"Search failed: {str(e)}" @@ -287,6 +314,8 @@ async def call_tool(name: str, arguments: dict) -> list[TextContent]: def configure_windows_console(): """Configure Windows console for UTF-8 output.""" + import ctypes + if sys.platform == "win32": # Enable virtual terminal processing kernel32 = ctypes.windll.kernel32 @@ -303,11 +332,22 @@ def configure_windows_console(): sys.stdout.reconfigure(encoding='utf-8') sys.stderr.reconfigure(encoding='utf-8') -def main(): +def main() -> None: """Main entry point.""" import asyncio - - # Configure console before running the server + import logging + logging.basicConfig( + level=logging.WARNING, + format='%(asctime)s - %(name)s - %(levelname)s - %(message)s' + ) + configure_windows_console() - asyncio.run(serve()) + try: + asyncio.run(serve()) + except KeyboardInterrupt: + logging.info("Server stopped by user") + sys.exit(0) + except Exception as e: + logging.error(f"Server error: {e}", exc_info=True) + sys.exit(1) diff --git a/uv.lock b/uv.lock index 1b107ee..ab60e6b 100644 --- a/uv.lock +++ b/uv.lock @@ -147,7 +147,7 @@ wheels = [ [[package]] name = "mcp-server-everything-search" -version = "0.1.2" +version = "0.2.0" source = { editable = "." } dependencies = [ { name = "mcp" },