From 45377adb8e2a59d1f95b3f2683e1c210ae8fd40f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Te=C3=AFlo=20M?= Date: Sun, 22 Dec 2024 09:53:56 +0100 Subject: [PATCH] fix: resolve some of the errors spot with the lint. --- .github/workflows/go-ci.yml | 4 +- README.md | 326 ++++++++-------------------------- cmd/hapax/main.go | 7 +- config.example.yaml | 6 +- errors/errors.go | 13 +- server/middleware/timeout.go | 3 + server/routing/router.go | 39 +++- server/routing/router_test.go | 8 +- server/server.go | 49 +++-- tests/circuitbreaker_test.go | 19 +- tests/docker_test.go | 26 ++- 11 files changed, 208 insertions(+), 292 deletions(-) diff --git a/.github/workflows/go-ci.yml b/.github/workflows/go-ci.yml index 73d2907..157a86e 100644 --- a/.github/workflows/go-ci.yml +++ b/.github/workflows/go-ci.yml @@ -39,11 +39,9 @@ jobs: with: version: latest args: | - --timeout=5m - --disable-all + --timeout=5m --out-format=colored-line-number --issues-exit-code=1 - # Only show new issues for pull requests only-new-issues: true skip-pkg-cache: true skip-build-cache: false diff --git a/README.md b/README.md index 3bdaed9..b601042 100644 --- a/README.md +++ b/README.md @@ -1,290 +1,106 @@ # Hapax -A lightweight HTTP server for Large Language Model (LLM) interactions, built with Go. - -## Version -v0.0.16 - -## Features - -- HTTP server with completion endpoint (`/v1/completions`) -- Health check endpoint (`/health`) -- Configurable server settings (port, timeouts, etc.) -- Clean shutdown handling -- Comprehensive test suite with mock LLM implementation -- Token validation with tiktoken - - Automatic token counting - - Context length validation - - Max tokens validation -- Middleware architecture: - - Request ID tracking - - Request timing metrics - - Panic recovery - - CORS support - - API key authentication - - Rate limiting (token bucket) - - Prometheus metrics collection -- Enhanced error handling: - - Structured JSON error responses - - Request ID tracking in errors - - Zap-based logging with context - - Custom error types for different scenarios - - Seamless error middleware integration -- Dynamic routing: - - Version-based routing (v1, v2) - - Route-specific middleware - - Health check endpoints - - Header validation -- Provider management: - - Multiple provider support (OpenAI, Anthropic, etc.) - - Provider health monitoring - - Automatic failover to backup providers - - Configurable health check intervals - - Provider-specific configuration - -## Installation +## Large Language Model Infrastructure, Simplified -```bash -go get github.com/teilomillet/hapax -``` +Building with Large Language Models is complex. Multiple providers, varying APIs, inconsistent performance, unpredictable costs—these challenges consume more engineering time than the actual innovation. -## Configuration +Hapax offers a different approach. -Hapax uses YAML for configuration. Here's an example configuration file: +What if managing LLM infrastructure was as simple as editing a configuration file? What if switching providers, adding endpoints, or implementing fallback strategies could be done with minimal effort? -```yaml -server: - port: 8080 - read_timeout: 30s - write_timeout: 30s - max_header_bytes: 1048576 # 1MB - shutdown_timeout: 30s +Imagine a system that: +- Connects to multiple LLM providers seamlessly +- Provides automatic failover between providers +- Offers comprehensive monitoring and metrics +- Allows instant configuration updates without downtime -routes: - - path: "/completions" - handler: "completion" - version: "v1" - methods: ["POST"] - middleware: ["auth", "ratelimit"] - headers: - Content-Type: "application/json" - health_check: - enabled: true - interval: 30s - timeout: 5s - threshold: 3 - checks: - api: "http" - - - path: "/health" - handler: "health" - version: "v1" - methods: ["GET"] - health_check: - enabled: true - interval: 15s - timeout: 2s - threshold: 2 - checks: - system: "tcp" - -llm: - provider: ollama - model: llama2 - endpoint: http://localhost:11434 - system_prompt: "You are a helpful assistant." - max_context_tokens: 4096 # Maximum context length for your model - options: - temperature: 0.7 - max_tokens: 2000 - -logging: - level: info # debug, info, warn, error - format: json # json, text -``` - -### Configuration Options - -#### Server Configuration -- `port`: HTTP server port (default: 8080) -- `read_timeout`: Maximum duration for reading request body (default: 30s) -- `write_timeout`: Maximum duration for writing response (default: 30s) -- `max_header_bytes`: Maximum size of request headers (default: 1MB) -- `shutdown_timeout`: Maximum duration to wait for graceful shutdown (default: 30s) - -#### LLM Configuration -- `provider`: LLM provider name (e.g., "ollama", "openai") -- `model`: Model name (e.g., "llama2", "gpt-4") -- `endpoint`: API endpoint URL -- `system_prompt`: Default system prompt for conversations -- `max_context_tokens`: Maximum context length in tokens (model-dependent) -- `options`: Provider-specific options - - `temperature`: Sampling temperature (0.0 to 1.0) - - `max_tokens`: Maximum tokens to generate - -#### Logging Configuration -- `level`: Log level (debug, info, warn, error) -- `format`: Log format (json, text) - -## Quick Start - -```go -package main - -import ( - "context" - "log" - - "github.com/teilomillet/hapax" - "github.com/teilomillet/gollm" - "go.uber.org/zap" -) - -func main() { - // Initialize logger (optional, defaults to production config) - logger, _ := zap.NewProduction() - defer logger.Sync() - hapax.SetLogger(logger) - - // Create an LLM instance (using gollm) - llm := gollm.New() - - // Create a completion handler - handler := hapax.NewCompletionHandler(llm) - - // Create a router - router := hapax.NewRouter(handler) - - // Use default configuration - config := hapax.DefaultConfig() - - // Create and start server - server := hapax.NewServer(config, router) - if err := server.Start(context.Background()); err != nil { - log.Fatal(err) - } -} -``` +This is Hapax. -## API Endpoints +### Real-World Flexibility in Action -### POST /v1/completions +Imagine you're running a production service using OpenAI's GPT model. Suddenly, you want to: +- Add a new Anthropic Claude model endpoint +- Create a fallback strategy +- Implement detailed monitoring -Generate completions using the configured LLM. +With Hapax, this becomes simple: -**Request:** -```json -{ - "prompt": "Your prompt here" -} -``` - -**Response:** -```json -{ - "completion": "LLM generated response" -} +```yaml +# Simply append to your existing configuration +providers: + anthropic: + type: anthropic + models: + claude-3.5-haiku: + api_key: ${ANTHROPIC_API_KEY} + endpoint: /v1/anthropic/haiku ``` -**Error Responses:** -- 400 Bad Request: Invalid JSON or missing prompt -- 405 Method Not Allowed: Wrong HTTP method -- 500 Internal Server Error: LLM error - -### GET /health - -Check server health status. - -**Response:** -```json -{ - "status": "ok" -} -``` +No downtime. No complex redeployment. Just configuration. -## Error Handling +## Intelligent Provider Management -Hapax provides structured error handling with JSON responses: +Hapax goes beyond simple API routing. It creates a resilient ecosystem for your LLM interactions: -```json -{ - "type": "validation_error", - "message": "Invalid request format", - "request_id": "req_123abc", - "details": { - "field": "prompt", - "error": "required" - } -} -``` +**Automatic Failover**: When one provider experiences issues, Hapax seamlessly switches to backup providers. Your service continues operating without interruption. -Error types include: -- `validation_error`: Request validation failures -- `provider_error`: LLM provider issues -- `rate_limit_error`: Rate limiting -- `internal_error`: Unexpected server errors +**Deduplication**: Prevent duplicate requests and unnecessary API calls. Hapax intelligently manages request caching and prevents redundant processing. -## Docker Support +**Provider Health Monitoring**: Continuously track provider performance. Automatically reconnect to primary providers once they're back online, ensuring optimal resource utilization. -The application comes with full Docker support, making it easy to deploy and run in containerized environments. +## Comprehensive Observability -### Features +Hapax isn't just a gateway—it's a complete monitoring and alerting system for your LLM infrastructure: +- Detailed Prometheus metrics +- Real-time performance tracking +- Comprehensive error reporting +- Intelligent alerting mechanisms -- **Multi-stage Build**: Optimized container size with separate build and runtime stages -- **Security**: Runs as non-root user with minimal runtime dependencies -- **Health Checks**: Built-in health monitoring for container orchestration -- **Prometheus Integration**: Ready-to-use metrics endpoint for monitoring -- **Docker Compose**: Complete setup with Prometheus integration +## API Versioning for Scalability -### Running with Docker +Create multiple API versions effortlessly. Each endpoint can have its own configuration, allowing granular control and smooth evolutionary paths for your services. -1. Build and run using Docker: -```bash -docker build -t hapax . -docker run -p 8080:8080 hapax +```yaml +routes: + - path: /v1/completions + handler: completion + version: v1 + - path: /v2/completions + handler: advanced_completion + version: v2 ``` -2. Or use Docker Compose for the full stack with Prometheus: +## Getting Started + ```bash -docker compose up -d +# Pull Hapax +docker pull ghcr.io/teilomillet/hapax:latest + +# Generate default configuration +docker run --rm -v $(pwd):/output \ + ghcr.io/teilomillet/hapax:latest \ + cp /app/config.example.yaml /output/config.yaml + +# Launch Hapax +docker run -p 8080:8080 \ + -v $(pwd)/config.yaml:/app/config.yaml \ + ghcr.io/teilomillet/hapax:latest ``` -### Container Health - -The container includes health checks that monitor: -- HTTP server availability -- Application readiness -- Basic functionality +## What's Next -Access the health status: -- Health endpoint: http://localhost:8080/health -- Metrics endpoint: http://localhost:8080/metrics -- Prometheus: http://localhost:9090 +Hapax is continuously evolving. -## Testing +## Open Source -The project includes a comprehensive test suite with a mock LLM implementation that can be used for testing LLM-dependent code: - -```go -import "github.com/teilomillet/hapax/mock_test" - -// Create a mock LLM with custom response -llm := &MockLLM{ - GenerateFunc: func(ctx context.Context, p *gollm.Prompt) (string, error) { - return "Custom response", nil - }, -} -``` - -Run the tests: -```bash -go test ./... -``` +Licensed under Apache 2.0, Hapax is open for collaboration and customization. -## License +## Community & Support -APACHE License 2.0 +- **Discussions**: [GitHub Discussions](https://github.com/teilomillet/hapax/discussions) +- **Documentation**: [Hapax Wiki](https://github.com/teilomillet/hapax/wiki) +- **Issues**: [GitHub Issues](https://github.com/teilomillet/hapax/issues) -## Contributing +## Our Vision -Contributions are welcome! Please feel free to submit a Pull Request. \ No newline at end of file +We believe LLM infrastructure should be simple, reliable, and adaptable. Hapax represents our commitment to making LLM integration accessible and powerful. \ No newline at end of file diff --git a/cmd/hapax/main.go b/cmd/hapax/main.go index be2554d..03a1fa1 100644 --- a/cmd/hapax/main.go +++ b/cmd/hapax/main.go @@ -35,7 +35,12 @@ func main() { if err != nil { log.Fatalf("Failed to create logger: %v", err) } - defer logger.Sync() + defer func() { + if err := logger.Sync(); err != nil { + // Log sync failure, but use fmt.Fprintf to stderr since the zap logger might be unavailable + fmt.Fprintf(os.Stderr, "Failed to sync logger: %v\n", err) + } + }() // Load configuration cfg, err := config.LoadFile(*configFile) diff --git a/config.example.yaml b/config.example.yaml index 42c2ea2..496f381 100644 --- a/config.example.yaml +++ b/config.example.yaml @@ -8,15 +8,15 @@ server: providers: openai: type: openai - model: gpt-4 + model: gpt-4o-mini api_key: ${OPENAI_API_KEY} anthropic: type: anthropic - model: claude-2 + model: claude-3.5-haiku-latest api_key: ${ANTHROPIC_API_KEY} ollama: type: ollama - model: llama2 + model: llama3 api_key: "" # Order of provider preference for failover diff --git a/errors/errors.go b/errors/errors.go index 743b59c..20e9fc4 100644 --- a/errors/errors.go +++ b/errors/errors.go @@ -152,13 +152,20 @@ func WriteError(w http.ResponseWriter, err *HapaxError) { w.Header().Set("Content-Type", "application/json") w.WriteHeader(err.Code) - // Convert HapaxError to ErrorResponse and write it - json.NewEncoder(w).Encode(&ErrorResponse{ + // Check the error return from Encode + if encodeErr := json.NewEncoder(w).Encode(&ErrorResponse{ Type: err.Type, Message: err.Message, RequestID: err.RequestID, Details: err.Details, - }) + }); encodeErr != nil { + // What do we do if encoding fails? + // Typically, you'd log the error + zap.L().Error("Failed to encode error response", zap.Error(encodeErr)) + + // Optionally, try a fallback method + w.Write([]byte(`{"error": "Failed to encode error response"}`)) + } } // Error is a drop-in replacement for http.Error that creates and writes diff --git a/server/middleware/timeout.go b/server/middleware/timeout.go index ca43ea3..69232b7 100644 --- a/server/middleware/timeout.go +++ b/server/middleware/timeout.go @@ -60,6 +60,9 @@ func Timeout(timeout time.Duration) func(http.Handler) http.Handler { return func(next http.Handler) http.Handler { return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { // Create a context with timeout + if timeout == 0 { + timeout = defaultTimeout + } ctx, cancel := context.WithTimeout(r.Context(), timeout) defer cancel() // Ensure cancel is called to release resources diff --git a/server/routing/router.go b/server/routing/router.go index 48191ae..0793c2a 100644 --- a/server/routing/router.go +++ b/server/routing/router.go @@ -114,7 +114,7 @@ func (r *Router) setupRoutes() { if route.HealthCheck != nil { healthPath := fmt.Sprintf("%s/health", path) router.Get(healthPath, r.healthCheckHandler(route)) // Register health check handler - r.startHealthCheck(route) // Start health check routine + r.startHealthCheck(route) // Start health check routine } }) } @@ -127,24 +127,33 @@ func (r *Router) setupRoutes() { } // healthCheckHandler returns a handler for route-specific health checks. -// It checks the health state of the route and responds accordingly. func (r *Router) healthCheckHandler(route config.RouteConfig) http.HandlerFunc { return func(w http.ResponseWriter, req *http.Request) { status := "healthy" if v, ok := r.healthState.Load(route.Path); ok && !v.(bool) { status = "unhealthy" - w.WriteHeader(http.StatusServiceUnavailable) // Respond with 503 if unhealthy + w.WriteHeader(http.StatusServiceUnavailable) + } + + // Properly handle potential JSON encoding errors + w.Header().Set("Content-Type", "application/json") + if err := json.NewEncoder(w).Encode(map[string]string{"status": status}); err != nil { + // Log the error and send a generic error response + r.logger.Error("Failed to encode health check response", + zap.String("route", route.Path), + zap.Error(err)) + + // Send a fallback error response + http.Error(w, "Internal Server Error", http.StatusInternalServerError) } - json.NewEncoder(w).Encode(map[string]string{"status": status}) // Encode health status as JSON } } // globalHealthCheckHandler returns a handler for the global health check endpoint. -// It checks the health of all routes and responds with their statuses. func (r *Router) globalHealthCheckHandler() http.HandlerFunc { return func(w http.ResponseWriter, req *http.Request) { allHealthy := true - statuses := make(map[string]string) // Map to hold health statuses + statuses := make(map[string]string) // Iterate through health states of all routes r.healthState.Range(func(key, value interface{}) bool { @@ -160,13 +169,25 @@ func (r *Router) globalHealthCheckHandler() http.HandlerFunc { }) if !allHealthy { - w.WriteHeader(http.StatusServiceUnavailable) // Respond with 503 if any service is unhealthy + w.WriteHeader(http.StatusServiceUnavailable) } - json.NewEncoder(w).Encode(map[string]interface{}{ + // Properly handle potential JSON encoding errors + w.Header().Set("Content-Type", "application/json") + response := map[string]interface{}{ "status": map[string]bool{"global": allHealthy}, "services": statuses, - }) // Encode global health status as JSON + } + + if err := json.NewEncoder(w).Encode(response); err != nil { + // Log the error and send a generic error response + r.logger.Error("Failed to encode global health check response", + zap.Bool("all_healthy", allHealthy), + zap.Error(err)) + + // Send a fallback error response + http.Error(w, "Internal Server Error", http.StatusInternalServerError) + } } } diff --git a/server/routing/router_test.go b/server/routing/router_test.go index 3a26604..c6195e5 100644 --- a/server/routing/router_test.go +++ b/server/routing/router_test.go @@ -98,10 +98,14 @@ func TestRouter_VersionedRouting(t *testing.T) { } handlers := map[string]http.Handler{ "test": http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { - w.Write([]byte("v1")) + if _, err := w.Write([]byte("v1")); err != nil { + t.Fatalf("Failed to write response: %v", err) + } }), "test2": http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { - w.Write([]byte("v2")) + if _, err := w.Write([]byte("v2")); err != nil { + t.Fatalf("Failed to write response: %v", err) + } }), } logger := zap.NewNop() diff --git a/server/server.go b/server/server.go index f79bd92..fd105a6 100644 --- a/server/server.go +++ b/server/server.go @@ -127,7 +127,9 @@ func NewRouter(completion http.Handler) *Router { // - LLM request counts by provider/model r.Get("/metrics", func(w http.ResponseWriter, r *http.Request) { w.Header().Set("Content-Type", "text/plain; version=0.0.4; charset=utf-8") - w.Write([]byte(` + + // Check the error return from Write + if _, err := w.Write([]byte(` # HELP hapax_requests_total The total number of HTTP requests. # TYPE hapax_requests_total counter hapax_requests_total{code="200"} 10 @@ -145,7 +147,14 @@ hapax_request_duration_seconds_count 10 # HELP hapax_llm_requests_total The total number of LLM requests. # TYPE hapax_llm_requests_total counter hapax_llm_requests_total{provider="openai",model="gpt-3.5-turbo"} 5 -`)) +`)); err != nil { + // In a real-world scenario, log the error + fmt.Printf("Failed to write metrics response: %v", err) + + // Send an error response + http.Error(w, "Failed to generate metrics", http.StatusInternalServerError) + return + } }) return router @@ -353,39 +362,57 @@ func (s *Server) Start(ctx context.Context) error { } func main() { + // Create logger with explicit error handling logger, err := zap.NewProduction() if err != nil { - fmt.Printf("Failed to create logger: %v\n", err) - return + // Fail fast if logger creation fails + fmt.Printf("Critical error: Failed to create logger: %v\n", err) + os.Exit(1) } - defer logger.Sync() + + // Ensure logger is synced, with robust error handling + defer func() { + if syncErr := logger.Sync(); syncErr != nil { + // Log sync failure, but don't mask the original error + fmt.Printf("Warning: Failed to sync logger: %v\n", syncErr) + } + }() + + // Set global logger errors.SetLogger(logger) - configPath := "config.yaml" // Or get from environment/flags + // Configuration and server setup with comprehensive error handling + configPath := "config.yaml" server, err := NewServer(configPath, logger) if err != nil { - logger.Fatal("Failed to create server", + logger.Fatal("Server initialization failed", zap.Error(err), + zap.String("config_path", configPath), ) } - // Handle graceful shutdown + // Graceful shutdown infrastructure ctx, cancel := context.WithCancel(context.Background()) defer cancel() - // Handle OS signals + // Signal handling with detailed logging sigChan := make(chan os.Signal, 1) signal.Notify(sigChan, syscall.SIGINT, syscall.SIGTERM) go func() { sig := <-sigChan - logger.Info("Received shutdown signal", zap.String("signal", sig.String())) + logger.Info("Shutdown signal received", + zap.String("signal", sig.String()), + zap.String("action", "initiating graceful shutdown"), + ) cancel() }() + // Server start with comprehensive error tracking if err := server.Start(ctx); err != nil { - logger.Fatal("Server error", + logger.Fatal("Server startup or runtime error", zap.Error(err), + zap.String("action", "server_start_failed"), ) } } diff --git a/tests/circuitbreaker_test.go b/tests/circuitbreaker_test.go index e3e3bae..97e731b 100644 --- a/tests/circuitbreaker_test.go +++ b/tests/circuitbreaker_test.go @@ -122,16 +122,33 @@ func TestCircuitBreaker(t *testing.T) { cb, err := newCB() require.NoError(t, err) + // Track failures explicitly + var failureCount int + var successCount int + // Execute a mix of successful and failed requests for i := 0; i < 3; i++ { - cb.Execute(func() error { + execErr := cb.Execute(func() error { if i%2 == 0 { return errors.New("failure") } return nil }) + + // Explicitly handle the error return + if execErr != nil { + failureCount++ + // Optional: log or make specific assertions about the error + t.Logf("Execution %d failed: %v", i, execErr) + } else { + successCount++ + } } + // Now we can make more precise assertions + assert.Equal(t, 2, failureCount, "Expected 2 failures") + assert.Equal(t, 1, successCount, "Expected 1 success") + counts := cb.Counts() assert.True(t, counts.TotalFailures > 0) assert.True(t, counts.Requests > counts.TotalFailures) diff --git a/tests/docker_test.go b/tests/docker_test.go index d8e1f3d..11e7b4a 100644 --- a/tests/docker_test.go +++ b/tests/docker_test.go @@ -53,7 +53,13 @@ func TestDockerBuild(t *testing.T) { cleanup := func() { cleanupCtx, cleanupCancel := context.WithTimeout(context.Background(), 10*time.Second) defer cleanupCancel() - exec.CommandContext(cleanupCtx, "docker", "rm", "-f", containerName).Run() + + // Check the error return from Run() + if err := exec.CommandContext(cleanupCtx, "docker", "rm", "-f", containerName).Run(); err != nil { + // In a test, you typically want to log the error rather than fail the entire test + // unless the cleanup failure is critical + t.Logf("Failed to remove Docker container %s: %v", containerName, err) + } } cleanup() // Clean up any leftover containers defer cleanup() @@ -228,12 +234,24 @@ func TestDockerCompose(t *testing.T) { // Enhanced cleanup to remove both containers and test config cleanup := func() { + // Docker Compose cleanup with error handling cmd := exec.CommandContext(ctx, "docker", "compose", "-f", filepath.Join(projectRoot, "docker-compose.yml"), "down", "-v") cmd.Stdout = os.Stdout cmd.Stderr = os.Stderr - cmd.Run() - // Clean up the config file - os.Remove(filepath.Join(projectRoot, "config.yaml")) + + if err := cmd.Run(); err != nil { + // Log the error without failing the test, as this is a cleanup step + t.Logf("Failed to remove Docker Compose containers: %v", err) + } + + // Config file cleanup with error handling + configPath := filepath.Join(projectRoot, "config.yaml") + if err := os.Remove(configPath); err != nil { + // Only log if the error is not because the file doesn't exist + if !os.IsNotExist(err) { + t.Logf("Failed to remove config file %s: %v", configPath, err) + } + } } cleanup() // Clean up any leftover containers and files defer cleanup()