Skip to content

Commit

Permalink
fix: circuit breaker recovery and error handling
Browse files Browse the repository at this point in the history
- Fixed circuit breaker reset timeout configuration
- Added circuit breaker config to Config struct
- Updated version to v0.0.11
  • Loading branch information
teilomillet committed Dec 20, 2024
1 parent 1eb7fa3 commit e30bc10
Show file tree
Hide file tree
Showing 10 changed files with 782 additions and 191 deletions.
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ WORKDIR /app
COPY --from=builder /app/hapax .

# Copy default config file
COPY config.yaml ./config.yaml
COPY config.example.yaml ./config.yaml

# Use non-root user
USER hapax
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
A lightweight HTTP server for Large Language Model (LLM) interactions, built with Go.

## Version
v0.0.9
v0.0.11

## Features

Expand Down
2 changes: 1 addition & 1 deletion ROADMAP.md
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ Focus: Enhance reliability, scalability, and deployability for production enviro
- Latency tracking
- Error monitoring
- Resource utilization
- [ ] Docker support
- [x] Docker support
- Multi-stage build optimization
- Production-ready Dockerfile
- Docker Compose configuration
Expand Down
31 changes: 25 additions & 6 deletions config.example.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,34 @@ server:
port: 8080
read_timeout: 30s
write_timeout: 30s
max_header_bytes: 1048576
shutdown_timeout: 5s

llm:
provider: "ollama" # Change to your preferred provider (ollama, openai, etc.)
model: "llama2" # Change to your preferred model
endpoint: "" # Set your provider's endpoint if needed
providers:
openai:
type: openai
model: gpt-4
api_key: ${OPENAI_API_KEY}
anthropic:
type: anthropic
model: claude-2
api_key: ${ANTHROPIC_API_KEY}
ollama:
type: ollama
model: llama2
api_key: ""

# Order of provider preference for failover
provider_preference:
- openai
- anthropic
- ollama

logging:
level: "info"
format: "json"
level: info
format: json

metrics:
enabled: true
prometheus:
enabled: true
15 changes: 15 additions & 0 deletions config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,9 @@ type Config struct {
LLM LLMConfig `yaml:"llm"`
Logging LoggingConfig `yaml:"logging"`
Routes []RouteConfig `yaml:"routes"`
Providers map[string]ProviderConfig `yaml:"providers"`
ProviderPreference []string `yaml:"provider_preference"` // Order of provider preference
CircuitBreaker CircuitBreakerConfig `yaml:"circuit_breaker"`
}

// ServerConfig holds server-specific configuration for the HTTP server.
Expand Down Expand Up @@ -166,6 +169,13 @@ type RetryConfig struct {
RetryableErrors []string `yaml:"retryable_errors"`
}

// ProviderConfig holds configuration for an LLM provider
type ProviderConfig struct {
Type string `yaml:"type"` // Provider type (e.g., openai, anthropic)
Model string `yaml:"model"` // Model name
APIKey string `yaml:"api_key"` // API key for authentication
}

// LoggingConfig holds logging-specific configuration.
type LoggingConfig struct {
// Level sets logging verbosity: debug, info, warn, error
Expand Down Expand Up @@ -217,6 +227,11 @@ type HealthCheck struct {
Checks map[string]string `yaml:"checks"`
}

// CircuitBreakerConfig holds circuit breaker settings
type CircuitBreakerConfig struct {
ResetTimeout time.Duration `yaml:"reset_timeout"`
}

// DefaultConfig returns a configuration with sensible defaults
func DefaultConfig() *Config {
return &Config{
Expand Down
173 changes: 173 additions & 0 deletions server/circuitbreaker/circuitbreaker.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,173 @@
package circuitbreaker

import (
"sync"
"time"

"github.com/prometheus/client_golang/prometheus"
"go.uber.org/zap"
)

// State represents the current state of the circuit breaker
type State int

const (
StateClosed State = iota // Circuit is closed (allowing requests)
StateOpen // Circuit is open (blocking requests)
StateHalfOpen // Circuit is half-open (testing if service is healthy)
)

// Config holds configuration for the circuit breaker
type Config struct {
FailureThreshold int // Number of failures before opening circuit
ResetTimeout time.Duration // Time to wait before attempting reset
HalfOpenRequests int // Number of requests to allow in half-open state
}

// CircuitBreaker implements the circuit breaker pattern
type CircuitBreaker struct {
name string
config Config
state State
failures int
lastFailure time.Time
halfOpen int
mu sync.RWMutex
logger *zap.Logger

// Metrics
stateGauge prometheus.Gauge
failuresCount prometheus.Counter
tripsTotal prometheus.Counter
}

// NewCircuitBreaker creates a new circuit breaker
func NewCircuitBreaker(name string, config Config, logger *zap.Logger, registry *prometheus.Registry) *CircuitBreaker {
cb := &CircuitBreaker{
name: name,
config: config,
state: StateClosed,
logger: logger,
}

// Initialize Prometheus metrics
cb.stateGauge = prometheus.NewGauge(prometheus.GaugeOpts{
Name: "hapax_circuit_breaker_state",
Help: "Current state of the circuit breaker (0=closed, 1=open, 2=half-open)",
ConstLabels: prometheus.Labels{
"name": name,
},
})

cb.failuresCount = prometheus.NewCounter(prometheus.CounterOpts{
Name: "hapax_circuit_breaker_failures_total",
Help: "Total number of failures recorded by the circuit breaker",
ConstLabels: prometheus.Labels{
"name": name,
},
})

cb.tripsTotal = prometheus.NewCounter(prometheus.CounterOpts{
Name: "hapax_circuit_breaker_trips_total",
Help: "Total number of times the circuit breaker has tripped",
ConstLabels: prometheus.Labels{
"name": name,
},
})

// Register metrics with Prometheus
registry.MustRegister(cb.stateGauge)
registry.MustRegister(cb.failuresCount)
registry.MustRegister(cb.tripsTotal)

return cb
}

// Execute runs the given function if the circuit breaker allows it
func (cb *CircuitBreaker) Execute(f func() error) error {
if !cb.AllowRequest() {
return ErrCircuitOpen
}

err := f()
cb.RecordResult(err)
return err
}

// AllowRequest checks if a request should be allowed through
func (cb *CircuitBreaker) AllowRequest() bool {
cb.mu.Lock()
defer cb.mu.Unlock()

switch cb.state {
case StateClosed:
return true
case StateOpen:
// Check if enough time has passed to try half-open
if time.Since(cb.lastFailure) > cb.config.ResetTimeout {
cb.setState(StateHalfOpen)
cb.halfOpen = 0
return true
}
return false
case StateHalfOpen:
// Allow one request in half-open state
if cb.halfOpen < cb.config.HalfOpenRequests {
cb.halfOpen++
return true
}
return false
default:
return false
}
}

// RecordResult records the result of a request
func (cb *CircuitBreaker) RecordResult(err error) {
cb.mu.Lock()
defer cb.mu.Unlock()

if err != nil {
cb.failures++
cb.failuresCount.Inc()
cb.lastFailure = time.Now()

// Trip breaker if failure threshold reached
if cb.failures >= cb.config.FailureThreshold {
cb.tripBreaker()
}
} else {
// Reset on success
if cb.state == StateHalfOpen {
cb.setState(StateClosed)
cb.failures = 0
cb.halfOpen = 0
} else if cb.state == StateClosed {
cb.failures = 0
}
}
}

// tripBreaker moves the circuit breaker to the open state
func (cb *CircuitBreaker) tripBreaker() {
cb.setState(StateOpen)
cb.tripsTotal.Inc()
cb.logger.Warn("Circuit breaker tripped",
zap.String("name", cb.name),
zap.Int("failures", cb.failures),
zap.Time("last_failure", cb.lastFailure),
)
}

// setState updates the circuit breaker state and metrics
func (cb *CircuitBreaker) setState(state State) {
cb.state = state
cb.stateGauge.Set(float64(state))
}

// GetState returns the current state of the circuit breaker
func (cb *CircuitBreaker) GetState() State {
cb.mu.RLock()
defer cb.mu.RUnlock()
return cb.state
}
8 changes: 8 additions & 0 deletions server/circuitbreaker/errors.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
package circuitbreaker

import "errors"

var (
// ErrCircuitOpen is returned when the circuit breaker is open
ErrCircuitOpen = errors.New("circuit breaker is open")
)
Loading

0 comments on commit e30bc10

Please sign in to comment.