Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fail HTLC backwards before upstream claims on-chain #3556

Merged
merged 5 commits into from
Jan 27, 2025
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
74 changes: 74 additions & 0 deletions lightning/src/chain/channelmonitor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1023,6 +1023,12 @@ pub(crate) struct ChannelMonitorImpl<Signer: EcdsaChannelSigner> {

/// The first block height at which we had no remaining claimable balances.
balances_empty_height: Option<u32>,

/// In-memory only HTLC ids used to track upstream HTLCs that have been failed backwards due to
/// a downstream channel force-close remaining unconfirmed by the time the upstream timeout
/// expires. This is used to tell us we already generated an event to fail this HTLC back
/// during a previous block scan.
failed_back_htlc_ids: HashSet<SentHTLCId>,
}

/// Transaction outputs to watch for on-chain spends.
Expand Down Expand Up @@ -1445,6 +1451,8 @@ impl<Signer: EcdsaChannelSigner> ChannelMonitor<Signer> {
counterparty_node_id: Some(counterparty_node_id),
initial_counterparty_commitment_info: None,
balances_empty_height: None,

failed_back_htlc_ids: new_hash_set(),
})
}

Expand Down Expand Up @@ -4221,6 +4229,71 @@ impl<Signer: EcdsaChannelSigner> ChannelMonitorImpl<Signer> {
}
}

if self.lockdown_from_offchain || self.funding_spend_seen || self.holder_tx_signed {
// Fail back HTLCs on backwards channels if they expire within
// `LATENCY_GRACE_PERIOD_BLOCKS` blocks and the channel is closed (i.e. we're at a
// point where no further off-chain updates will be accepted). If we haven't seen the
// preimage for an HTLC by the time the previous hop's timeout expires, we've lost that
// HTLC, so we might as well fail it back instead of having our counterparty force-close
// the inbound channel.
let current_holder_htlcs = self.current_holder_commitment_tx.htlc_outputs.iter()
.map(|&(ref a, _, ref b)| (a, b.as_ref()));

let current_counterparty_htlcs = if let Some(txid) = self.current_counterparty_commitment_txid {
if let Some(htlc_outputs) = self.counterparty_claimable_outpoints.get(&txid) {
Some(htlc_outputs.iter().map(|&(ref a, ref b)| (a, b.as_ref().map(|boxed| &**boxed))))
} else { None }
} else { None }.into_iter().flatten();

let prev_counterparty_htlcs = if let Some(txid) = self.prev_counterparty_commitment_txid {
if let Some(htlc_outputs) = self.counterparty_claimable_outpoints.get(&txid) {
Some(htlc_outputs.iter().map(|&(ref a, ref b)| (a, b.as_ref().map(|boxed| &**boxed))))
} else { None }
} else { None }.into_iter().flatten();

let htlcs = current_holder_htlcs
.chain(current_counterparty_htlcs)
.chain(prev_counterparty_htlcs);

let height = self.best_block.height;
for (htlc, source_opt) in htlcs {
// Only check forwarded HTLCs' previous hops
let source = match source_opt {
Some(source) => source,
None => continue,
};
let inbound_htlc_expiry = match source.inbound_htlc_expiry() {
Some(cltv_expiry) => cltv_expiry,
None => continue,
};
let max_expiry_height = height.saturating_add(LATENCY_GRACE_PERIOD_BLOCKS);
if inbound_htlc_expiry > max_expiry_height {
continue;
}
let duplicate_event = self.pending_monitor_events.iter().any(
|update| if let &MonitorEvent::HTLCEvent(ref upd) = update {
upd.source == *source
} else { false });
if duplicate_event {
continue;
}
if !self.failed_back_htlc_ids.insert(SentHTLCId::from_source(source)) {
continue;
}
if !duplicate_event {
log_error!(logger, "Failing back HTLC {} upstream to preserve the \
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: is this really considered an error? Info/warn seems better suited.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Its an error in that a violation of our assumptions around tx confirmation time happened, and its probably an important enough case that users should see it and think hard about what is happening.

channel as the forward HTLC hasn't resolved and our backward HTLC \
expires soon at {}", log_bytes!(htlc.payment_hash.0), inbound_htlc_expiry);
self.pending_monitor_events.push(MonitorEvent::HTLCEvent(HTLCUpdate {
source: source.clone(),
payment_preimage: None,
payment_hash: htlc.payment_hash,
htlc_value_satoshis: Some(htlc.amount_msat / 1000),
}));
}
}
}

let conf_target = self.closure_conf_target();
self.onchain_tx_handler.update_claims_view_from_requests(claimable_outpoints, conf_height, self.best_block.height, broadcaster, conf_target, fee_estimator, logger);
self.onchain_tx_handler.update_claims_view_from_matched_txn(&txn_matched, conf_height, conf_hash, self.best_block.height, broadcaster, conf_target, fee_estimator, logger);
Expand Down Expand Up @@ -5066,6 +5139,7 @@ impl<'a, 'b, ES: EntropySource, SP: SignerProvider> ReadableArgs<(&'a ES, &'b SP
counterparty_node_id,
initial_counterparty_commitment_info,
balances_empty_height,
failed_back_htlc_ids: new_hash_set(),
})))
}
}
Expand Down
86 changes: 9 additions & 77 deletions lightning/src/ln/channel.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1693,15 +1693,6 @@ pub(super) struct ChannelContext<SP: Deref> where SP::Target: SignerProvider {
/// [`msgs::RevokeAndACK`] message from the counterparty.
sent_message_awaiting_response: Option<usize>,

#[cfg(any(test, fuzzing))]
// When we receive an HTLC fulfill on an outbound path, we may immediately fulfill the
// corresponding HTLC on the inbound path. If, then, the outbound path channel is
// disconnected and reconnected (before we've exchange commitment_signed and revoke_and_ack
// messages), they may re-broadcast their update_fulfill_htlc, causing a duplicate claim. This
// is fine, but as a sanity check in our failure to generate the second claim, we check here
// that the original was a claim, and that we aren't now trying to fulfill a failed HTLC.
historical_inbound_htlc_fulfills: HashSet<u64>,

/// This channel's type, as negotiated during channel open
channel_type: ChannelTypeFeatures,

Expand Down Expand Up @@ -2403,9 +2394,6 @@ impl<SP: Deref> ChannelContext<SP> where SP::Target: SignerProvider {
funding_tx_broadcast_safe_event_emitted: false,
channel_ready_event_emitted: false,

#[cfg(any(test, fuzzing))]
historical_inbound_htlc_fulfills: new_hash_set(),

channel_type,
channel_keys_id,

Expand Down Expand Up @@ -2636,9 +2624,6 @@ impl<SP: Deref> ChannelContext<SP> where SP::Target: SignerProvider {
funding_tx_broadcast_safe_event_emitted: false,
channel_ready_event_emitted: false,

#[cfg(any(test, fuzzing))]
historical_inbound_htlc_fulfills: new_hash_set(),

channel_type,
channel_keys_id,

Expand Down Expand Up @@ -4665,10 +4650,6 @@ impl<SP: Deref> FundedChannel<SP> where
}
}
if pending_idx == core::usize::MAX {
#[cfg(any(test, fuzzing))]
// If we failed to find an HTLC to fulfill, make sure it was previously fulfilled and
// this is simply a duplicate claim, not previously failed and we lost funds.
debug_assert!(self.context.historical_inbound_htlc_fulfills.contains(&htlc_id_arg));
return UpdateFulfillFetch::DuplicateClaim {};
}

Expand Down Expand Up @@ -4698,8 +4679,6 @@ impl<SP: Deref> FundedChannel<SP> where
if htlc_id_arg == htlc_id {
// Make sure we don't leave latest_monitor_update_id incremented here:
self.context.latest_monitor_update_id -= 1;
#[cfg(any(test, fuzzing))]
debug_assert!(self.context.historical_inbound_htlc_fulfills.contains(&htlc_id_arg));
return UpdateFulfillFetch::DuplicateClaim {};
}
},
Expand All @@ -4721,12 +4700,8 @@ impl<SP: Deref> FundedChannel<SP> where
self.context.holding_cell_htlc_updates.push(HTLCUpdateAwaitingACK::ClaimHTLC {
payment_preimage: payment_preimage_arg, htlc_id: htlc_id_arg,
});
#[cfg(any(test, fuzzing))]
self.context.historical_inbound_htlc_fulfills.insert(htlc_id_arg);
return UpdateFulfillFetch::NewClaim { monitor_update, htlc_value_msat, msg: None };
}
#[cfg(any(test, fuzzing))]
self.context.historical_inbound_htlc_fulfills.insert(htlc_id_arg);

{
let htlc = &mut self.context.pending_inbound_htlcs[pending_idx];
Expand Down Expand Up @@ -4791,14 +4766,8 @@ impl<SP: Deref> FundedChannel<SP> where
}
}

/// We can only have one resolution per HTLC. In some cases around reconnect, we may fulfill
/// an HTLC more than once or fulfill once and then attempt to fail after reconnect. We cannot,
/// however, fail more than once as we wait for an upstream failure to be irrevocably committed
/// before we fail backwards.
///
/// If we do fail twice, we `debug_assert!(false)` and return `Ok(None)`. Thus, this will always
/// return `Ok(_)` if preconditions are met. In any case, `Err`s will only be
/// [`ChannelError::Ignore`].
/// Returns `Err` (always with [`ChannelError::Ignore`]) if the HTLC could not be failed (e.g.
/// if it was already resolved). Otherwise returns `Ok`.
pub fn queue_fail_htlc<L: Deref>(&mut self, htlc_id_arg: u64, err_packet: msgs::OnionErrorPacket, logger: &L)
-> Result<(), ChannelError> where L::Target: Logger {
self.fail_htlc(htlc_id_arg, err_packet, true, logger)
Expand All @@ -4816,14 +4785,8 @@ impl<SP: Deref> FundedChannel<SP> where
.map(|msg_opt| assert!(msg_opt.is_none(), "We forced holding cell?"))
}

/// We can only have one resolution per HTLC. In some cases around reconnect, we may fulfill
/// an HTLC more than once or fulfill once and then attempt to fail after reconnect. We cannot,
/// however, fail more than once as we wait for an upstream failure to be irrevocably committed
/// before we fail backwards.
///
/// If we do fail twice, we `debug_assert!(false)` and return `Ok(None)`. Thus, this will always
/// return `Ok(_)` if preconditions are met. In any case, `Err`s will only be
/// [`ChannelError::Ignore`].
/// Returns `Err` (always with [`ChannelError::Ignore`]) if the HTLC could not be failed (e.g.
/// if it was already resolved). Otherwise returns `Ok`.
fn fail_htlc<L: Deref, E: FailHTLCContents + Clone>(
&mut self, htlc_id_arg: u64, err_contents: E, mut force_holding_cell: bool,
logger: &L
Expand All @@ -4841,12 +4804,8 @@ impl<SP: Deref> FundedChannel<SP> where
if htlc.htlc_id == htlc_id_arg {
match htlc.state {
InboundHTLCState::Committed => {},
InboundHTLCState::LocalRemoved(ref reason) => {
if let &InboundHTLCRemovalReason::Fulfill(_) = reason {
} else {
debug_assert!(false, "Tried to fail an HTLC that was already failed");
}
return Ok(None);
InboundHTLCState::LocalRemoved(_) => {
return Err(ChannelError::Ignore(format!("HTLC {} was already resolved", htlc.htlc_id)));
},
_ => {
debug_assert!(false, "Have an inbound HTLC we tried to claim before it was fully committed to");
Expand All @@ -4857,11 +4816,7 @@ impl<SP: Deref> FundedChannel<SP> where
}
}
if pending_idx == core::usize::MAX {
#[cfg(any(test, fuzzing))]
// If we failed to find an HTLC to fail, make sure it was previously fulfilled and this
// is simply a duplicate fail, not previously failed and we failed-back too early.
debug_assert!(self.context.historical_inbound_htlc_fulfills.contains(&htlc_id_arg));
return Ok(None);
return Err(ChannelError::Ignore(format!("Unable to find a pending HTLC which matched the given HTLC ID ({})", htlc_id_arg)));
}

if !self.context.channel_state.can_generate_new_commitment() {
Expand All @@ -4875,17 +4830,14 @@ impl<SP: Deref> FundedChannel<SP> where
match pending_update {
&HTLCUpdateAwaitingACK::ClaimHTLC { htlc_id, .. } => {
if htlc_id_arg == htlc_id {
#[cfg(any(test, fuzzing))]
debug_assert!(self.context.historical_inbound_htlc_fulfills.contains(&htlc_id_arg));
return Ok(None);
return Err(ChannelError::Ignore(format!("HTLC {} was already claimed!", htlc_id)));
}
},
&HTLCUpdateAwaitingACK::FailHTLC { htlc_id, .. } |
&HTLCUpdateAwaitingACK::FailMalformedHTLC { htlc_id, .. } =>
{
if htlc_id_arg == htlc_id {
debug_assert!(false, "Tried to fail an HTLC that was already failed");
return Err(ChannelError::Ignore("Unable to find a pending HTLC which matched the given HTLC ID".to_owned()));
return Err(ChannelError::Ignore(format!("HTLC {} was already pending failure", htlc_id)));
}
},
_ => {}
Expand Down Expand Up @@ -9718,13 +9670,6 @@ impl<SP: Deref> Writeable for FundedChannel<SP> where SP::Target: SignerProvider

self.context.channel_update_status.write(writer)?;

#[cfg(any(test, fuzzing))]
(self.context.historical_inbound_htlc_fulfills.len() as u64).write(writer)?;
#[cfg(any(test, fuzzing))]
for htlc in self.context.historical_inbound_htlc_fulfills.iter() {
htlc.write(writer)?;
}

// If the channel type is something other than only-static-remote-key, then we need to have
// older clients fail to deserialize this channel at all. If the type is
// only-static-remote-key, we simply consider it "default" and don't write the channel type
Expand Down Expand Up @@ -10058,16 +10003,6 @@ impl<'a, 'b, 'c, ES: Deref, SP: Deref> ReadableArgs<(&'a ES, &'b SP, u32, &'c Ch

let channel_update_status = Readable::read(reader)?;

#[cfg(any(test, fuzzing))]
let mut historical_inbound_htlc_fulfills = new_hash_set();
#[cfg(any(test, fuzzing))]
{
let htlc_fulfills_len: u64 = Readable::read(reader)?;
for _ in 0..htlc_fulfills_len {
assert!(historical_inbound_htlc_fulfills.insert(Readable::read(reader)?));
}
}

let pending_update_fee = if let Some(feerate) = pending_update_fee_value {
Some((feerate, if channel_parameters.is_outbound_from_holder {
FeeUpdateState::Outbound
Expand Down Expand Up @@ -10408,9 +10343,6 @@ impl<'a, 'b, 'c, ES: Deref, SP: Deref> ReadableArgs<(&'a ES, &'b SP, u32, &'c Ch
channel_pending_event_emitted: channel_pending_event_emitted.unwrap_or(true),
channel_ready_event_emitted: channel_ready_event_emitted.unwrap_or(true),

#[cfg(any(test, fuzzing))]
historical_inbound_htlc_fulfills,

channel_type: channel_type.unwrap(),
channel_keys_id,

Expand Down
Loading