feat: enhance HTTP/3 configuration handling and server lifecycle

fix: resolve race conditions in server state management and graceful shutdown - Add proper HTTP/3 configuration in tests\n- Consolidate server shutdown logic\n- Fix server state management with proper mutex locking\n- Update ROADMAP.md with clearer project vision\n- Bump version to v0.0.25
teilomillet · Jan 3, 2025 · 5ec7c64 · 5ec7c64
1 parent 984d13f
commit 5ec7c64
Show file tree

Hide file tree

Showing 4 changed files with 169 additions and 138 deletions.
diff --git a/ROADMAP.md b/ROADMAP.md
@@ -1,141 +1,165 @@
 # Hapax Development Roadmap
 
 ## Vision
-Build a production-grade LLM gateway that makes deploying and managing LLM infrastructure as simple as running NGINX, while maintaining enterprise-grade reliability, security, and observability.
+Hapax is the reliability layer between your code and LLM providers. We're building an open-source infrastructure layer that makes LLM operations robust and predictable. Our goal is to provide the tools and visibility you need to run AI applications with confidence, whether you're a solo developer or running large-scale deployments.
 
-## Phase 2: Production Readiness
-Focus: Enhance reliability, scalability, and deployability for production environments.
+### Core Principles
+- **Reliability**: Smart provider management for uninterrupted operations
+- **Visibility**: Clear insights into your LLM infrastructure
+- **Flexibility**: Adaptable to your security and scaling needs
+- **Simplicity**: Complex infrastructure made approachable
 
-### Performance & Operations
-- [x] Request queueing
-- [ ] QUIC Implementation
-  - Integration with quic-go library
-  - HTTP/3 support for improved latency
-  - Connection migration handling
+## v0.1.0: Foundation (Current)
+Focus: Core functionality and initial production readiness.
+
+### Core Features
+- [x] Request queueing and deduplication
+- [x] HTTP/3 (QUIC) implementation
+  - High-performance transport layer
   - 0-RTT connection establishment
+  - Connection migration
   - Multiplexing optimization
-  - Congestion control tuning
-  - UDP transport configuration
   - TLS 1.3 integration
 
-## Phase 3: Advanced Features
-Focus: Enhance security, scalability, and management capabilities.
-
-### Security & Access Control
-- [ ] Role-based access control
-  - Fine-grained permission system
-  - Role hierarchy management
-  - Resource-level permissions
-  - Token-based authentication
-  - Permission auditing
-  - Integration with identity providers
-  - Custom authorization rules
-
-### Observability & Monitoring
+### Documentation
+- [ ] Installation and Configuration
+  - Deployment guide
+  - Configuration reference
+  - Security setup
+  - Performance tuning
+- [ ] API Documentation
+  - Endpoint specifications
+  - Request/response formats
+  - Error handling
+  - Authentication
+- [ ] Operations Guide
+  - Monitoring setup
+  - Metrics reference
+  - Logging guide
+  - Troubleshooting
+
+## v0.2.0: Enterprise Observability
+Focus: Deep visibility and operational intelligence.
+
+### Advanced Monitoring
+- [ ] Enhanced metrics collection
+  - Detailed latency tracking
+  - Resource utilization metrics
+  - Provider-specific metrics
+  - Custom metric pipelines
 - [ ] Advanced audit logging
   - Structured audit events
   - Compliance-ready logging
   - Log aggregation support
   - Log retention policies
-  - Sensitive data handling
-  - Log search and analysis
-  - Real-time log streaming
+- [ ] Operational dashboards
+  - Real-time system visibility
+  - Performance analytics
+  - Health monitoring
+  - Alert management
 
-### Scalability & Distribution
+### Security Enhancements
+- [ ] Role-based access control
+  - Fine-grained permissions
+  - Resource-level access
+  - Audit trails
+  - Identity provider integration
+- [ ] Enhanced security features
+  - Request validation
+  - Rate limiting
+  - Token management
+  - Security event monitoring
+
+## v0.3.0: Enterprise Scale
+Focus: Horizontal scaling and high availability.
+
+### Distributed Architecture
 - [ ] Cluster mode
   - Leader election
   - State synchronization
-  - Cluster health monitoring
   - Node auto-discovery
-  - Load distribution
-  - Failure recovery
-  - Cross-node request routing
-
-### Request Management
-- [ ] Advanced rate limiting
-  - Dynamic rate adjustment
-  - Custom rate limit rules
-  - Rate limit sharing across cluster
-  - Quota management
-  - Usage analytics
-  - Client notification system
-
-### Performance Features
-- [ ] Response caching
-  - Cache strategy configuration
-  - Cache invalidation rules
-  - Cache warming
+  - Cross-node routing
+- [ ] Advanced request management
+  - Dynamic rate limiting
+  - Request quotas
+  - Load balancing
+  - Circuit breaking
+- [ ] Distributed caching
+  - Cache strategies
+  - Invalidation rules
   - Memory management
-  - Cache statistics
-  - Distributed caching support
+  - Cache analytics
 
-### Request Routing
-- [ ] Custom routing rules
+### Enterprise Integration
+- [ ] Advanced routing
   - Content-based routing
-  - A/B testing support
   - Traffic splitting
   - Request transformation
-  - Response modification
-  - Custom middleware chains
+  - Custom middleware
+- [ ] Provider management
+  - Multi-provider failover
+  - Provider health tracking
+  - Cost optimization
+  - Usage analytics
 
-## Phase 4: Production Scale
-Focus: Large-scale deployment features and optimizations.
+## v1.0.0: Production Scale
+Focus: Mission-critical deployment capabilities.
 
-### Performance
-- [ ] Performance optimization
+### Performance & Reliability
+- [ ] Advanced performance features
   - Connection pooling
   - Request batching
-  - Response streaming optimization
-  - Memory usage optimization
-  - CPU utilization improvements
-  - Network efficiency enhancements
-
-### Management
-- [ ] Admin dashboard
-  - Real-time monitoring
-  - Configuration management
-  - User management
-  - Usage analytics
-  - System health overview
-  - Alert management
-
-### Operations
+  - Memory optimization
+  - CPU optimization
+- [ ] Reliability enhancements
+  - Automated failover
+  - Self-healing
+  - Predictive scaling
+  - Performance prediction
+
+### Enterprise Operations
 - [ ] Cost management
-  - Usage tracking per client
-  - Cost allocation
+  - Usage tracking
   - Budget controls
-  - Cost optimization suggestions
-  - Billing integration
+  - Cost allocation
   - Usage forecasting
-
-- [ ] SLA monitoring
-  - SLA definition and tracking
-  - Availability metrics
-  - Performance metrics
-  - Custom SLA rules
-  - SLA violation alerts
-  - Historical SLA reporting
+- [ ] SLA management
+  - SLA definition
+  - Performance tracking
+  - Availability monitoring
+  - Compliance reporting
+
+### Advanced Features
+- [ ] Multi-region support
+  - Geographic routing
+  - Regional failover
+  - Data sovereignty
+  - Cross-region analytics
+- [ ] Advanced security
+  - Zero-trust architecture
+  - Advanced threat detection
+  - Security analytics
+  - Compliance automation
 
 ## Success Metrics
-- Installation time < 5 minutes
-- Configuration requires no code changes
-- 99.9% uptime
-- < 100ms added latency
+- Sub-minute deployment time
+- Zero-touch configuration
+- 99.99% availability
+- < 50ms added latency
 - Zero security vulnerabilities
 - Automatic failure recovery
-- QUIC/HTTP3 latency improvements
+- Complete operational visibility
 
 ## Future Considerations
-- Multi-region support
+- Edge computing integration
 - Custom model hosting
 - Model performance analytics
-- Fine-tuning integration
-- Hybrid deployment support
-- Edge computing integration
+- Fine-tuning infrastructure
+- Hybrid deployment models
 - Advanced protocol support
 
 ## Notes
-- Security and reliability improvements will be ongoing
-- Each feature includes appropriate testing and documentation
-- Regular security audits throughout development
-- Features may be reprioritized based on user feedback
+- Security and reliability are continuous priorities
+- Each feature includes comprehensive testing and documentation
+- Regular security audits are mandatory
+- Features may be reprioritized based on enterprise requirements
diff --git a/cmd/hapax/main.go b/cmd/hapax/main.go
@@ -20,7 +20,7 @@ var (
 	version    = flag.Bool("version", false, "Print version and exit")
 )
 
-const Version = "v0.0.24"
+const Version = "v0.0.25"
 
 func main() {
 	flag.Parse()

diff --git a/server/server.go b/server/server.go
@@ -196,21 +196,18 @@ func (s *Server) updateServerConfig(cfg *config.Config) error {
 	s.mu.Lock()
 	defer s.mu.Unlock()
 
-	// Create new handler and router
-	handler := NewRouter(s.llm, cfg, s.logger)
-
-	// Create new HTTP server with updated config
+	// Create new HTTP server instance
 	newServer := &http.Server{
 		Addr:           fmt.Sprintf(":%d", cfg.Server.Port),
-		Handler:        handler,
+		Handler:        NewRouter(s.llm, cfg, s.logger),
 		ReadTimeout:    cfg.Server.ReadTimeout,
 		WriteTimeout:   cfg.Server.WriteTimeout,
 		MaxHeaderBytes: cfg.Server.MaxHeaderBytes,
 	}
 
-	// Create HTTP/3 server if enabled
+	// Create new HTTP/3 server if enabled
 	var newHTTP3Server *http3.Server
-	if cfg.Server.HTTP3 != nil && cfg.Server.HTTP3.Enabled {
+	if cfg.Server.HTTP3.Enabled {
 		quicConfig := &quic.Config{
 			MaxStreamReceiveWindow:     cfg.Server.HTTP3.MaxStreamReceiveWindow,
 			MaxConnectionReceiveWindow: cfg.Server.HTTP3.MaxConnectionReceiveWindow,
@@ -221,10 +218,10 @@ func (s *Server) updateServerConfig(cfg *config.Config) error {
 		}
 
 		// If 0-RTT is enabled but replay is not allowed, wrap the handler
-		var http3Handler http.Handler = handler
+		var http3Handler http.Handler = NewRouter(s.llm, cfg, s.logger)
 		if cfg.Server.HTTP3.Enable0RTT && !cfg.Server.HTTP3.Allow0RTTReplay {
 			http3Handler = &replayProtectionHandler{
-				handler: handler,
+				handler: NewRouter(s.llm, cfg, s.logger),
 				logger:  s.logger,
 				seen:    sync.Map{},
 				maxSize: cfg.Server.HTTP3.Max0RTTSize,
@@ -247,8 +244,8 @@ func (s *Server) updateServerConfig(cfg *config.Config) error {
 		}
 	}
 
-	// If server is running, we need to stop it and start the new one
-	if s.running {
+	wasRunning := s.running
+	if wasRunning {
 		// Gracefully shutdown existing server
 		shutdownCtx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
 		defer cancel()
@@ -263,14 +260,16 @@ func (s *Server) updateServerConfig(cfg *config.Config) error {
 				s.logger.Error("Failed to close HTTP/3 server", zap.Error(err))
 			}
 		}
+		s.running = false
 	}
 
 	// Update server instances
 	s.httpServer = newServer
 	s.http3Server = newHTTP3Server
 
 	// If we were running before, start the new server
-	if s.running {
+	if wasRunning {
+		s.running = true
 		go func() {
 			if err := s.httpServer.ListenAndServe(); err != http.ErrServerClosed {
 				s.logger.Error("HTTP server error", zap.Error(err))
@@ -337,28 +336,12 @@ func (s *Server) handleConfigUpdates(configChan <-chan *config.Config) {
 			s.llm = newLLM
 		}
 
-		// Create temporary server with new config
-		tempServer := &http.Server{}
+		// Update server configuration
 		if err := s.updateServerConfig(newConfig); err != nil {
 			s.logger.Error("Failed to update server config", zap.Error(err))
 			continue
 		}
 
-		// Gracefully shutdown existing connections
-		ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
-		if err := s.httpServer.Shutdown(ctx); err != nil {
-			s.logger.Error("Error during server shutdown", zap.Error(err))
-		}
-		cancel()
-
-		// Start server with new configuration
-		s.httpServer = tempServer
-		go func() {
-			if err := s.httpServer.ListenAndServe(); err != http.ErrServerClosed {
-				s.logger.Error("Server error", zap.Error(err))
-			}
-		}()
-
 		s.logger.Info("Server restarted with new configuration")
 	}
 }
@@ -367,6 +350,10 @@ func (s *Server) handleConfigUpdates(configChan <-chan *config.Config) {
 // It handles graceful shutdown when the context is cancelled, ensuring that all connections are properly closed before exiting.
 func (s *Server) Start(ctx context.Context) error {
 	s.mu.Lock()
+	if s.running {
+		s.mu.Unlock()
+		return fmt.Errorf("server is already running")
+	}
 	s.running = true
 	s.mu.Unlock()