Skip to content

Commit

Permalink
Merge pull request #15 from loftwah/dl/split-output-for-llms
Browse files Browse the repository at this point in the history
Dl/split output for llms
  • Loading branch information
loftwah authored Sep 10, 2024
2 parents 82f81c5 + 59f4595 commit 9294e08
Show file tree
Hide file tree
Showing 2 changed files with 94 additions and 4 deletions.
31 changes: 30 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ Grabit.sh is a powerful command-line tool designed to quickly gather and summari
- File type summary
- Recently modified files list
- Project type detection
- LLM-friendly output chunks for easy integration with AI models

## Installation

Expand Down Expand Up @@ -111,6 +112,7 @@ Replace `<output_method>` with one of the following options:
- `stdout`: Display the output in the terminal (default)
- `clipboard`: Copy the output to your clipboard
- `file`: Save the output to a file (use the `-f` flag to specify the file path)
- `llm-chunks`: Generate LLM-friendly chunks of the output (new feature)

### Examples

Expand All @@ -132,6 +134,33 @@ Replace `<output_method>` with one of the following options:
grabitsh --output file -f output.txt
```

4. Generate LLM-friendly chunks:

```bash
grabitsh --output llm-chunks
```

This will create multiple text files, each containing a portion of the output with a preamble suitable for use with Large Language Models.

5. Customize chunk size for LLM output:

```bash
grabitsh --output llm-chunks --chunk-size 50000
```

This sets the chunk size to 50,000 tokens. The default is 100,000 tokens.

### LLM-Chunks Feature

The LLM-chunks output method is designed to create AI-friendly chunks of the Grabit.sh output. Each chunk includes a preamble that provides context about the tool, its purpose, and instructions for the AI model. This feature is particularly useful when you want to analyze the output using a Large Language Model or other AI tools.

Key points about LLM-chunks:

- Each chunk is saved as a separate text file (`grabitsh_chunk_1.txt`, `grabitsh_chunk_2.txt`, etc.).
- The default chunk size is 100,000 tokens, which can be customized using the `--chunk-size` flag.
- The preamble in each chunk helps the AI understand the context and purpose of the information.
- This feature makes it easy to feed the Grabit.sh output into AI models for further analysis or to generate insights about the repository.

## Web Server

Grabit.sh also includes a web server feature. To start the web server, use the following command:
Expand Down Expand Up @@ -205,4 +234,4 @@ This project is licensed under the terms of the license included in the [LICENSE

## Contact

For any queries or suggestions, please open an issue on the GitHub repository.
For any queries or suggestions, please open an issue on the GitHub repository.
67 changes: 64 additions & 3 deletions cmd/grabitsh/root.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,19 +16,21 @@ import (
var (
outputMethod string
outputFile string
chunkSize int
rootCmd *cobra.Command
)

func init() {
rootCmd = &cobra.Command{
Use: "grabitsh",
Short: "Grabit.sh gathers useful information from a Git repository",
Long: `Grabit.sh simplifies working with Git repositories by gathering useful information and outputting it to stdout, a file, or the clipboard.`,
Long: `Grabit.sh simplifies working with Git repositories by gathering useful information and outputting it to stdout, a file, the clipboard, or LLM-friendly chunks.`,
Run: runGrabit,
}

rootCmd.Flags().StringVarP(&outputMethod, "output", "o", "stdout", "Output method: stdout, clipboard, or file")
rootCmd.Flags().StringVarP(&outputMethod, "output", "o", "stdout", "Output method: stdout, clipboard, file, or llm-chunks")
rootCmd.Flags().StringVarP(&outputFile, "file", "f", "", "Output file path (required if output method is file)")
rootCmd.Flags().IntVarP(&chunkSize, "chunk-size", "c", 100000, "Token size for LLM chunks (default 100000)")

rootCmd.AddCommand(serveCmd)
}
Expand Down Expand Up @@ -194,7 +196,66 @@ func finalizeOutput(content string) {
} else {
color.Green("Output written to file: %s", outputFile)
}
case "llm-chunks":
if err := writeChunks(content); err != nil {
color.Red("Failed to write LLM chunks: %v", err)
} else {
color.Green("LLM chunks written successfully.")
}
default:
color.Red("Invalid output method. Choose stdout, clipboard, or file.")
color.Red("Invalid output method. Choose stdout, clipboard, file, or llm-chunks.")
}
}

func writeChunks(content string) error {
chunks := splitIntoChunks(content, chunkSize)
totalChunks := len(chunks)

preamble := `This is part %d of %d of the output from Grabit.sh, a tool that analyzes Git repositories.
Purpose: This output provides a comprehensive analysis of a Git repository, including its structure, configuration, dependencies, and potential issues. Use this information to understand the project, identify areas for improvement, and make informed decisions about the codebase.
Instructions:
1. Read through the information provided in this chunk.
2. If this is not the final chunk, wait for the next one before drawing conclusions.
3. Use the information to answer questions about the repository, suggest improvements, or identify potential issues.
4. Pay attention to sections like security analysis, performance metrics, and detected project types.
Content of Chunk %d/%d:
`

for i, chunk := range chunks {
filename := fmt.Sprintf("grabitsh_chunk_%d.txt", i+1)
fullContent := fmt.Sprintf(preamble, i+1, totalChunks, i+1, totalChunks) + chunk
if err := os.WriteFile(filename, []byte(fullContent), 0644); err != nil {
return fmt.Errorf("failed to write chunk %d: %v", i+1, err)
}
color.Green("Chunk %d/%d written to %s", i+1, totalChunks, filename)
}
return nil
}

func splitIntoChunks(content string, chunkSize int) []string {
var chunks []string
words := strings.Fields(content)
currentChunk := ""
wordCount := 0
preambleSize := 250 // Approximate size of the preamble in tokens

for _, word := range words {
if wordCount+len(strings.Fields(word)) > chunkSize-preambleSize {
chunks = append(chunks, strings.TrimSpace(currentChunk))
currentChunk = ""
wordCount = 0
}
currentChunk += word + " "
wordCount += len(strings.Fields(word))
}

if currentChunk != "" {
chunks = append(chunks, strings.TrimSpace(currentChunk))
}

return chunks
}

0 comments on commit 9294e08

Please sign in to comment.