Skip to content

Commit

Permalink
multi: manage shutdown requests with status codes
Browse files Browse the repository at this point in the history
In this commit, we manage shutdown requests with status
codes. Exits with code 1 for critical errors
(e.g, from exhausted attempts connecting to chain backend),
and code 0 for normal shutdowns (e.g., from Stop Daemon RPC call).

Co-authored-by: Elle Mouton <[email protected]>
  • Loading branch information
mohamedawnallah and ellemouton committed Jan 2, 2025
1 parent a388c1f commit 1714d8c
Show file tree
Hide file tree
Showing 7 changed files with 51 additions and 27 deletions.
20 changes: 10 additions & 10 deletions config_builder.go
Original file line number Diff line number Diff line change
Expand Up @@ -261,7 +261,7 @@ func (d *DefaultWalletImpl) ValidateMacaroon(ctx context.Context,
// Because the default implementation does not return any permissions,
// we shouldn't be registered as an external validator at all and this
// should never be invoked.
return fmt.Errorf("default implementation does not support external " +
return errors.New("default implementation does not support external " +
"macaroon validation")
}

Expand Down Expand Up @@ -365,9 +365,9 @@ func (d *DefaultWalletImpl) BuildWalletConfig(ctx context.Context,
if d.cfg.WalletUnlockPasswordFile != "" && !walletExists &&
!d.cfg.WalletUnlockAllowCreate {

return nil, nil, nil, fmt.Errorf("wallet unlock password file " +
"was specified but wallet does not exist; initialize " +
"the wallet before using auto unlocking")
return nil, nil, nil, errors.New("wallet unlock password " +
"file was specified but wallet does not exist; " +
"initialize the wallet before using auto unlocking")
}

// What wallet mode are we running in? We've already made sure the no
Expand Down Expand Up @@ -1099,7 +1099,7 @@ func (d *DefaultDatabaseBuilder) BuildDatabase(

if len(invoiceSlice.Invoices) > 0 {
cleanUp()
err := fmt.Errorf("found invoices in the KV invoice " +
err := errors.New("found invoices in the KV invoice " +
"DB, migration to native SQL is not yet " +
"supported")
d.logger.Error(err)
Expand Down Expand Up @@ -1161,7 +1161,7 @@ func (d *DefaultDatabaseBuilder) BuildDatabase(
// this RPC server.
func waitForWalletPassword(cfg *Config,
pwService *walletunlocker.UnlockerService,
loaderOpts []btcwallet.LoaderOption, shutdownChan <-chan struct{}) (
loaderOpts []btcwallet.LoaderOption, shutdownChan <-chan bool) (
*walletunlocker.WalletUnlockParams, error) {

// Wait for user to provide the password.
Expand Down Expand Up @@ -1234,7 +1234,7 @@ func waitForWalletPassword(cfg *Config,
// this case we need to import each of the xpubs individually.
case watchOnlyAccounts != nil:
if !cfg.RemoteSigner.Enable {
return nil, fmt.Errorf("cannot initialize " +
return nil, errors.New("cannot initialize " +
"watch only wallet with remote " +
"signer config disabled")
}
Expand All @@ -1254,7 +1254,7 @@ func waitForWalletPassword(cfg *Config,
// or the extended key is set so, we shouldn't get here.
// The default case is just here for readability and
// completeness.
err = fmt.Errorf("cannot create wallet, neither seed " +
err = errors.New("cannot create wallet, neither seed " +
"nor extended key was given")
}
if err != nil {
Expand Down Expand Up @@ -1311,7 +1311,7 @@ func waitForWalletPassword(cfg *Config,

// If we got a shutdown signal we just return with an error immediately
case <-shutdownChan:
return nil, fmt.Errorf("shutting down")
return nil, errors.New("shutting down")
}
}

Expand Down Expand Up @@ -1378,7 +1378,7 @@ func initNeutrinoBackend(ctx context.Context, cfg *Config, chainDir string,
// every other case, the neutrino.validatechannels overwrites the
// routing.assumechanvalid value.
if cfg.NeutrinoMode.ValidateChannels && cfg.Routing.AssumeChannelValid {
return nil, nil, fmt.Errorf("can't set both " +
return nil, nil, errors.New("can't set both " +
"neutrino.validatechannels and routing." +
"assumechanvalid to true at the same time")
}
Expand Down
4 changes: 4 additions & 0 deletions docs/release-notes/release-notes-0.19.0.md
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,10 @@
[here](https://github.com/lightningnetwork/lnd/blob/master/chainio/README.md)
to learn more.

* [Shutdown Exit Handling](https://github.com/lightningnetwork/lnd/pull/9392)
is added to manage shutdowns with status codes. Exits with code 1 for critical
errors, and code 0 for normal shutdowns (e.g., from StopDaemon RPC call).

## RPC Updates

* Some RPCs that previously just returned an empty response message now at least
Expand Down
19 changes: 16 additions & 3 deletions itest/lnd_etcd_failover_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,11 @@ package itest

import (
"context"
"errors"
"fmt"
"io"
"net"
"os/exec"
"sync"
"testing"
"time"
Expand Down Expand Up @@ -323,9 +325,20 @@ func testLeaderHealthCheck(ht *lntest.HarnessTest) {
// to shut down.
proxy.Stop(ht.T)

// Wait for Carol-1 to stop. If the health check wouldn't properly work
// this call would timeout and trigger a test failure.
require.NoError(ht.T, carol.WaitForProcessExit())
// Wait for Carol-1 to stop. If the health check isn't functioning
// properly this call will time out and trigger a test failure. The
// process is expected to exit with status code 1. Any other exit code
// or unexpected error will cause the test to fail.
err = carol.WaitForProcessExit()
require.Error(ht.T, err)

// Check that the exit code is specifically 1.
if exitErr := new(exec.ExitError); errors.As(err, &exitErr) {
require.Equal(ht.T, 1, exitErr.ExitCode())
} else {
require.Fail(ht.T, fmt.Sprintf("Unexpected error type: %T, "+
"message: %v", err, err))
}

// Now that Carol-1 is shut down we should fail over to Carol-2.
failoverTimeout := time.Duration(2*leaderSessionTTL) * time.Second
Expand Down
10 changes: 7 additions & 3 deletions lnd.go
Original file line number Diff line number Diff line change
Expand Up @@ -412,13 +412,13 @@ func Main(cfg *Config, lisCfg ListenerCfg, implCfg *ImplementationCfg,
// Start leader election if we're running on etcd. Continuation will be
// blocked until this instance is elected as the current leader or
// shutting down.
elected := false
elected, leaderNormalShutdown := false, false
var leaderElector cluster.LeaderElector
if cfg.Cluster.EnableLeaderElection {
electionCtx, cancelElection := context.WithCancel(ctx)

go func() {
<-interceptor.ShutdownChannel()
leaderNormalShutdown = <-interceptor.ShutdownChannel()
cancelElection()
}()

Expand Down Expand Up @@ -799,7 +799,11 @@ func Main(cfg *Config, lisCfg ListenerCfg, implCfg *ImplementationCfg,

// Wait for shutdown signal from either a graceful server stop or from
// the interrupt handler.
<-interceptor.ShutdownChannel()
normalShutdown := <-interceptor.ShutdownChannel()
if !(normalShutdown || leaderNormalShutdown) {
return errors.New("LND shut down with an error")
}

return nil
}

Expand Down
2 changes: 1 addition & 1 deletion log.go
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ func genSubLogger(root *build.SubLoggerManager,
return
}

interceptor.RequestShutdown()
interceptor.RequestShutdown(false)
}

// Return a function which will create a sublogger from our root
Expand Down
2 changes: 1 addition & 1 deletion rpcserver.go
Original file line number Diff line number Diff line change
Expand Up @@ -7062,7 +7062,7 @@ func (r *rpcServer) StopDaemon(_ context.Context,
"shut down, please wait until rescan finishes")
}

r.interceptor.RequestShutdown()
r.interceptor.RequestShutdown(true)

return &lnrpc.StopResponse{
Status: "shutdown initiated, check logs for progress",
Expand Down
21 changes: 12 additions & 9 deletions signal/signal.go
Original file line number Diff line number Diff line change
Expand Up @@ -100,11 +100,11 @@ type Interceptor struct {
interruptChannel chan os.Signal

// shutdownChannel is closed once the main interrupt handler exits.
shutdownChannel chan struct{}
shutdownChannel chan bool

// shutdownRequestChannel is used to request the daemon to shutdown
// gracefully, similar to when receiving SIGINT.
shutdownRequestChannel chan struct{}
shutdownRequestChannel chan bool

// quit is closed when instructing the main interrupt handler to exit.
// Note that to avoid losing notifications, only shutdown func may
Expand All @@ -124,8 +124,8 @@ func Intercept() (Interceptor, error) {

channels := Interceptor{
interruptChannel: make(chan os.Signal, 1),
shutdownChannel: make(chan struct{}),
shutdownRequestChannel: make(chan struct{}),
shutdownChannel: make(chan bool, 1),
shutdownRequestChannel: make(chan bool),
quit: make(chan struct{}),
}

Expand Down Expand Up @@ -171,18 +171,21 @@ func (c *Interceptor) mainInterruptHandler() {
close(c.quit)
}

var normalShutdown bool
for {
select {
case signal := <-c.interruptChannel:
log.Infof("Received %v", signal)
shutdown()

case <-c.shutdownRequestChannel:
log.Infof("Received shutdown request.")
case normalShutdown = <-c.shutdownRequestChannel:
log.Infof("Received shutdown request "+
"(normalShutdown=%t).", normalShutdown)
shutdown()

case <-c.quit:
log.Infof("Gracefully shutting down.")
c.shutdownChannel <- normalShutdown
close(c.shutdownChannel)
signal.Stop(c.interruptChannel)
return
Expand Down Expand Up @@ -215,15 +218,15 @@ func (c *Interceptor) Alive() bool {
}

// RequestShutdown initiates a graceful shutdown from the application.
func (c *Interceptor) RequestShutdown() {
func (c *Interceptor) RequestShutdown(normalShutdown bool) {
select {
case c.shutdownRequestChannel <- struct{}{}:
case c.shutdownRequestChannel <- normalShutdown:
case <-c.quit:
}
}

// ShutdownChannel returns the channel that will be closed once the main
// interrupt handler has exited.
func (c *Interceptor) ShutdownChannel() <-chan struct{} {
func (c *Interceptor) ShutdownChannel() <-chan bool {
return c.shutdownChannel
}

0 comments on commit 1714d8c

Please sign in to comment.