diff --git a/ROADMAP.md b/ROADMAP.md index 4de7b12..630801b 100644 --- a/ROADMAP.md +++ b/ROADMAP.md @@ -1,141 +1,165 @@ # Hapax Development Roadmap ## Vision -Build a production-grade LLM gateway that makes deploying and managing LLM infrastructure as simple as running NGINX, while maintaining enterprise-grade reliability, security, and observability. +Hapax is the reliability layer between your code and LLM providers. We're building an open-source infrastructure layer that makes LLM operations robust and predictable. Our goal is to provide the tools and visibility you need to run AI applications with confidence, whether you're a solo developer or running large-scale deployments. -## Phase 2: Production Readiness -Focus: Enhance reliability, scalability, and deployability for production environments. +### Core Principles +- **Reliability**: Smart provider management for uninterrupted operations +- **Visibility**: Clear insights into your LLM infrastructure +- **Flexibility**: Adaptable to your security and scaling needs +- **Simplicity**: Complex infrastructure made approachable -### Performance & Operations -- [x] Request queueing -- [ ] QUIC Implementation - - Integration with quic-go library - - HTTP/3 support for improved latency - - Connection migration handling +## v0.1.0: Foundation (Current) +Focus: Core functionality and initial production readiness. + +### Core Features +- [x] Request queueing and deduplication +- [x] HTTP/3 (QUIC) implementation + - High-performance transport layer - 0-RTT connection establishment + - Connection migration - Multiplexing optimization - - Congestion control tuning - - UDP transport configuration - TLS 1.3 integration -## Phase 3: Advanced Features -Focus: Enhance security, scalability, and management capabilities. - -### Security & Access Control -- [ ] Role-based access control - - Fine-grained permission system - - Role hierarchy management - - Resource-level permissions - - Token-based authentication - - Permission auditing - - Integration with identity providers - - Custom authorization rules - -### Observability & Monitoring +### Documentation +- [ ] Installation and Configuration + - Deployment guide + - Configuration reference + - Security setup + - Performance tuning +- [ ] API Documentation + - Endpoint specifications + - Request/response formats + - Error handling + - Authentication +- [ ] Operations Guide + - Monitoring setup + - Metrics reference + - Logging guide + - Troubleshooting + +## v0.2.0: Enterprise Observability +Focus: Deep visibility and operational intelligence. + +### Advanced Monitoring +- [ ] Enhanced metrics collection + - Detailed latency tracking + - Resource utilization metrics + - Provider-specific metrics + - Custom metric pipelines - [ ] Advanced audit logging - Structured audit events - Compliance-ready logging - Log aggregation support - Log retention policies - - Sensitive data handling - - Log search and analysis - - Real-time log streaming +- [ ] Operational dashboards + - Real-time system visibility + - Performance analytics + - Health monitoring + - Alert management -### Scalability & Distribution +### Security Enhancements +- [ ] Role-based access control + - Fine-grained permissions + - Resource-level access + - Audit trails + - Identity provider integration +- [ ] Enhanced security features + - Request validation + - Rate limiting + - Token management + - Security event monitoring + +## v0.3.0: Enterprise Scale +Focus: Horizontal scaling and high availability. + +### Distributed Architecture - [ ] Cluster mode - Leader election - State synchronization - - Cluster health monitoring - Node auto-discovery - - Load distribution - - Failure recovery - - Cross-node request routing - -### Request Management -- [ ] Advanced rate limiting - - Dynamic rate adjustment - - Custom rate limit rules - - Rate limit sharing across cluster - - Quota management - - Usage analytics - - Client notification system - -### Performance Features -- [ ] Response caching - - Cache strategy configuration - - Cache invalidation rules - - Cache warming + - Cross-node routing +- [ ] Advanced request management + - Dynamic rate limiting + - Request quotas + - Load balancing + - Circuit breaking +- [ ] Distributed caching + - Cache strategies + - Invalidation rules - Memory management - - Cache statistics - - Distributed caching support + - Cache analytics -### Request Routing -- [ ] Custom routing rules +### Enterprise Integration +- [ ] Advanced routing - Content-based routing - - A/B testing support - Traffic splitting - Request transformation - - Response modification - - Custom middleware chains + - Custom middleware +- [ ] Provider management + - Multi-provider failover + - Provider health tracking + - Cost optimization + - Usage analytics -## Phase 4: Production Scale -Focus: Large-scale deployment features and optimizations. +## v1.0.0: Production Scale +Focus: Mission-critical deployment capabilities. -### Performance -- [ ] Performance optimization +### Performance & Reliability +- [ ] Advanced performance features - Connection pooling - Request batching - - Response streaming optimization - - Memory usage optimization - - CPU utilization improvements - - Network efficiency enhancements - -### Management -- [ ] Admin dashboard - - Real-time monitoring - - Configuration management - - User management - - Usage analytics - - System health overview - - Alert management - -### Operations + - Memory optimization + - CPU optimization +- [ ] Reliability enhancements + - Automated failover + - Self-healing + - Predictive scaling + - Performance prediction + +### Enterprise Operations - [ ] Cost management - - Usage tracking per client - - Cost allocation + - Usage tracking - Budget controls - - Cost optimization suggestions - - Billing integration + - Cost allocation - Usage forecasting - -- [ ] SLA monitoring - - SLA definition and tracking - - Availability metrics - - Performance metrics - - Custom SLA rules - - SLA violation alerts - - Historical SLA reporting +- [ ] SLA management + - SLA definition + - Performance tracking + - Availability monitoring + - Compliance reporting + +### Advanced Features +- [ ] Multi-region support + - Geographic routing + - Regional failover + - Data sovereignty + - Cross-region analytics +- [ ] Advanced security + - Zero-trust architecture + - Advanced threat detection + - Security analytics + - Compliance automation ## Success Metrics -- Installation time < 5 minutes -- Configuration requires no code changes -- 99.9% uptime -- < 100ms added latency +- Sub-minute deployment time +- Zero-touch configuration +- 99.99% availability +- < 50ms added latency - Zero security vulnerabilities - Automatic failure recovery -- QUIC/HTTP3 latency improvements +- Complete operational visibility ## Future Considerations -- Multi-region support +- Edge computing integration - Custom model hosting - Model performance analytics -- Fine-tuning integration -- Hybrid deployment support -- Edge computing integration +- Fine-tuning infrastructure +- Hybrid deployment models - Advanced protocol support ## Notes -- Security and reliability improvements will be ongoing -- Each feature includes appropriate testing and documentation -- Regular security audits throughout development -- Features may be reprioritized based on user feedback +- Security and reliability are continuous priorities +- Each feature includes comprehensive testing and documentation +- Regular security audits are mandatory +- Features may be reprioritized based on enterprise requirements diff --git a/cmd/hapax/main.go b/cmd/hapax/main.go index 5ad753f..f2635d4 100644 --- a/cmd/hapax/main.go +++ b/cmd/hapax/main.go @@ -20,7 +20,7 @@ var ( version = flag.Bool("version", false, "Print version and exit") ) -const Version = "v0.0.24" +const Version = "v0.0.25" func main() { flag.Parse() diff --git a/server/server.go b/server/server.go index 7b1fcec..7519662 100644 --- a/server/server.go +++ b/server/server.go @@ -196,21 +196,18 @@ func (s *Server) updateServerConfig(cfg *config.Config) error { s.mu.Lock() defer s.mu.Unlock() - // Create new handler and router - handler := NewRouter(s.llm, cfg, s.logger) - - // Create new HTTP server with updated config + // Create new HTTP server instance newServer := &http.Server{ Addr: fmt.Sprintf(":%d", cfg.Server.Port), - Handler: handler, + Handler: NewRouter(s.llm, cfg, s.logger), ReadTimeout: cfg.Server.ReadTimeout, WriteTimeout: cfg.Server.WriteTimeout, MaxHeaderBytes: cfg.Server.MaxHeaderBytes, } - // Create HTTP/3 server if enabled + // Create new HTTP/3 server if enabled var newHTTP3Server *http3.Server - if cfg.Server.HTTP3 != nil && cfg.Server.HTTP3.Enabled { + if cfg.Server.HTTP3.Enabled { quicConfig := &quic.Config{ MaxStreamReceiveWindow: cfg.Server.HTTP3.MaxStreamReceiveWindow, MaxConnectionReceiveWindow: cfg.Server.HTTP3.MaxConnectionReceiveWindow, @@ -221,10 +218,10 @@ func (s *Server) updateServerConfig(cfg *config.Config) error { } // If 0-RTT is enabled but replay is not allowed, wrap the handler - var http3Handler http.Handler = handler + var http3Handler http.Handler = NewRouter(s.llm, cfg, s.logger) if cfg.Server.HTTP3.Enable0RTT && !cfg.Server.HTTP3.Allow0RTTReplay { http3Handler = &replayProtectionHandler{ - handler: handler, + handler: NewRouter(s.llm, cfg, s.logger), logger: s.logger, seen: sync.Map{}, maxSize: cfg.Server.HTTP3.Max0RTTSize, @@ -247,8 +244,8 @@ func (s *Server) updateServerConfig(cfg *config.Config) error { } } - // If server is running, we need to stop it and start the new one - if s.running { + wasRunning := s.running + if wasRunning { // Gracefully shutdown existing server shutdownCtx, cancel := context.WithTimeout(context.Background(), 5*time.Second) defer cancel() @@ -263,6 +260,7 @@ func (s *Server) updateServerConfig(cfg *config.Config) error { s.logger.Error("Failed to close HTTP/3 server", zap.Error(err)) } } + s.running = false } // Update server instances @@ -270,7 +268,8 @@ func (s *Server) updateServerConfig(cfg *config.Config) error { s.http3Server = newHTTP3Server // If we were running before, start the new server - if s.running { + if wasRunning { + s.running = true go func() { if err := s.httpServer.ListenAndServe(); err != http.ErrServerClosed { s.logger.Error("HTTP server error", zap.Error(err)) @@ -337,28 +336,12 @@ func (s *Server) handleConfigUpdates(configChan <-chan *config.Config) { s.llm = newLLM } - // Create temporary server with new config - tempServer := &http.Server{} + // Update server configuration if err := s.updateServerConfig(newConfig); err != nil { s.logger.Error("Failed to update server config", zap.Error(err)) continue } - // Gracefully shutdown existing connections - ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) - if err := s.httpServer.Shutdown(ctx); err != nil { - s.logger.Error("Error during server shutdown", zap.Error(err)) - } - cancel() - - // Start server with new configuration - s.httpServer = tempServer - go func() { - if err := s.httpServer.ListenAndServe(); err != http.ErrServerClosed { - s.logger.Error("Server error", zap.Error(err)) - } - }() - s.logger.Info("Server restarted with new configuration") } } @@ -367,6 +350,10 @@ func (s *Server) handleConfigUpdates(configChan <-chan *config.Config) { // It handles graceful shutdown when the context is cancelled, ensuring that all connections are properly closed before exiting. func (s *Server) Start(ctx context.Context) error { s.mu.Lock() + if s.running { + s.mu.Unlock() + return fmt.Errorf("server is already running") + } s.running = true s.mu.Unlock() diff --git a/server/server_test.go b/server/server_test.go index 7e490ed..1e96aee 100644 --- a/server/server_test.go +++ b/server/server_test.go @@ -313,6 +313,19 @@ func TestServer(t *testing.T) { WriteTimeout: 10 * time.Second, MaxHeaderBytes: 1 << 20, ShutdownTimeout: 30 * time.Second, + HTTP3: &config.HTTP3Config{ + Enabled: false, + Port: 8443, + MaxStreamReceiveWindow: 10 * 1024 * 1024, + MaxConnectionReceiveWindow: 15 * 1024 * 1024, + MaxBiStreamsConcurrent: 100, + MaxUniStreamsConcurrent: 100, + Enable0RTT: false, + Allow0RTTReplay: false, + Max0RTTSize: 1024 * 1024, + UDPReceiveBufferSize: 1024 * 1024, + IdleTimeout: 30 * time.Second, + }, }, LLM: config.LLMConfig{ Provider: "mock", @@ -335,15 +348,9 @@ func TestServer(t *testing.T) { mockWatcher := mocks.NewMockConfigWatcher(cfg) // Create server with mocked dependencies - server := &Server{ - logger: logger, - config: mockWatcher, - llm: mockLLM, // Store the mock LLM in the server - } - - // Initialize server with current config - if err := server.updateServerConfig(cfg); err != nil { - t.Fatalf("Failed to update server config: %v", err) + server, err := NewServerWithConfig(mockWatcher, mockLLM, logger) + if err != nil { + t.Fatalf("Failed to create server: %v", err) } // Create context with cancel for server lifecycle @@ -381,6 +388,19 @@ func TestServer(t *testing.T) { WriteTimeout: 10 * time.Second, MaxHeaderBytes: 1 << 20, ShutdownTimeout: 30 * time.Second, + HTTP3: &config.HTTP3Config{ + Enabled: false, + Port: 8444, + MaxStreamReceiveWindow: 10 * 1024 * 1024, + MaxConnectionReceiveWindow: 15 * 1024 * 1024, + MaxBiStreamsConcurrent: 100, + MaxUniStreamsConcurrent: 100, + Enable0RTT: false, + Allow0RTTReplay: false, + Max0RTTSize: 1024 * 1024, + UDPReceiveBufferSize: 1024 * 1024, + IdleTimeout: 30 * time.Second, + }, }, LLM: config.LLMConfig{ Provider: "mock",