diff --git a/pyproject.toml b/pyproject.toml
index 0e563f0f3..0dee18135 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -40,7 +40,7 @@ dependencies = [
     "scikit-learn",
     "scipy<1.13",
     "tensorboard",
-    "torch>=1.13.0, <2.6.0",
+    "torch>=1.13.0",
     "tqdm",
     "pymc>=5.0.0",
     "zuko>=1.2.0",
diff --git a/sbi/inference/abc/smcabc.py b/sbi/inference/abc/smcabc.py
index 25b20400c..d2b62d153 100644
--- a/sbi/inference/abc/smcabc.py
+++ b/sbi/inference/abc/smcabc.py
@@ -679,8 +679,8 @@ def get_new_kernel(self, thetas: Tensor) -> Distribution:
             )
 
         elif self.kernel == "uniform":
-            low = thetas - self.kernel_variance
-            high = thetas + self.kernel_variance
+            low = thetas - self.kernel_variance  # type: ignore
+            high = thetas + self.kernel_variance  # type: ignore
             # Move batch shape to event shape to get Uniform that is multivariate in
             # parameter dimension.
             return BoxUniform(low=low, high=high)
diff --git a/sbi/inference/trainers/npe/npe_a.py b/sbi/inference/trainers/npe/npe_a.py
index f48cd325a..4bd29f4c0 100644
--- a/sbi/inference/trainers/npe/npe_a.py
+++ b/sbi/inference/trainers/npe/npe_a.py
@@ -425,7 +425,7 @@ def __init__(
                 logits_pp,
                 m_pp,
                 prec_pp,
-            ) = proposal.posterior_estimator._posthoc_correction(default_x)
+            ) = proposal.posterior_estimator._posthoc_correction(default_x)  # type: ignore
             self._logits_pp, self._m_pp, self._prec_pp = (
                 logits_pp.detach(),
                 m_pp.detach(),
@@ -536,7 +536,7 @@ def _sample_approx_posterior_mog(
             num_samples, logits_p, m_p, prec_factors_p
         )
 
-        embedded_context = self._neural_net.net._embedding_net(x)
+        embedded_context = self._neural_net.net._embedding_net(x)  # type: ignore
         if embedded_context is not None:
             # Merge the context dimension with sample dimension in order to
             # apply the transform.
@@ -546,8 +546,9 @@ def _sample_approx_posterior_mog(
             )
 
         theta, _ = self._neural_net.net._transform.inverse(
-            theta, context=embedded_context
-        )
+            theta,  # type: ignore
+            context=embedded_context,
+        )  # type: ignore
 
         if embedded_context is not None:
             # Split the context dimension from sample dimension.
@@ -574,9 +575,9 @@ def _posthoc_correction(self, x: Tensor):
         x = x.squeeze(dim=0)
 
         # Evaluate the density estimator.
-        embedded_x = self._neural_net.net._embedding_net(x)
+        embedded_x = self._neural_net.net._embedding_net(x)  # type: ignore
         dist = self._neural_net.net._distribution  # defined to avoid black formatting.
-        logits_d, m_d, prec_d, _, _ = dist.get_mixture_components(embedded_x)
+        logits_d, m_d, prec_d, _, _ = dist.get_mixture_components(embedded_x)  # type: ignore
         norm_logits_d = logits_d - torch.logsumexp(logits_d, dim=-1, keepdim=True)
         norm_logits_d = atleast_2d(norm_logits_d)
 
@@ -704,8 +705,8 @@ def _set_maybe_z_scored_prior(self) -> None:
         prior will not be exactly have mean=0 and std=1.
         """
         if self.z_score_theta:
-            scale = self._neural_net.net._transform._transforms[0]._scale
-            shift = self._neural_net.net._transform._transforms[0]._shift
+            scale = self._neural_net.net._transform._transforms[0]._scale  # type: ignore
+            shift = self._neural_net.net._transform._transforms[0]._shift  # type: ignore
 
             # Following the definition of the linear transform in
             # `standardizing_transform` in `sbiutils.py`:
@@ -739,7 +740,7 @@ def _maybe_z_score_theta(self, theta: Tensor) -> Tensor:
         """Return potentially standardized theta if z-scoring was requested."""
 
         if self.z_score_theta:
-            theta, _ = self._neural_net.net._transform(theta)
+            theta, _ = self._neural_net.net._transform(theta)  # type: ignore
 
         return theta
 
@@ -784,7 +785,7 @@ def _precisions_posterior(self, precisions_pp: Tensor, precisions_d: Tensor):
 
         precisions_p = precisions_d_rep - precisions_pp_rep
         if isinstance(self._maybe_z_scored_prior, MultivariateNormal):
-            precisions_p += self._maybe_z_scored_prior.precision_matrix
+            precisions_p += self._maybe_z_scored_prior.precision_matrix  # type: ignore
 
         # Check if precision matrix is positive definite.
         for _, batches in enumerate(precisions_p):
diff --git a/sbi/inference/trainers/npe/npe_c.py b/sbi/inference/trainers/npe/npe_c.py
index 808c6b8af..ab1577d37 100644
--- a/sbi/inference/trainers/npe/npe_c.py
+++ b/sbi/inference/trainers/npe/npe_c.py
@@ -423,11 +423,11 @@ def _log_prob_proposal_posterior_mog(
         # Evaluate the proposal. MDNs do not have functionality to run the embedding_net
         # and then get the mixture_components (**without** calling log_prob()). Hence,
         # we call them separately here.
-        encoded_x = proposal.posterior_estimator.net._embedding_net(proposal.default_x)
+        encoded_x = proposal.posterior_estimator.net._embedding_net(proposal.default_x)  # type: ignore
         dist = (
             proposal.posterior_estimator.net._distribution
         )  # defined to avoid ugly black formatting.
-        logits_p, m_p, prec_p, _, _ = dist.get_mixture_components(encoded_x)
+        logits_p, m_p, prec_p, _, _ = dist.get_mixture_components(encoded_x)  # type: ignore
         norm_logits_p = logits_p - torch.logsumexp(logits_p, dim=-1, keepdim=True)
 
         # Evaluate the density estimator.
@@ -545,7 +545,7 @@ def _precisions_proposal_posterior(
 
         precisions_pp = precisions_p_rep + precisions_d_rep
         if isinstance(self._maybe_z_scored_prior, MultivariateNormal):
-            precisions_pp -= self._maybe_z_scored_prior.precision_matrix
+            precisions_pp -= self._maybe_z_scored_prior.precision_matrix  # type: ignore
 
         covariances_pp = torch.inverse(precisions_pp)
 
diff --git a/sbi/neural_nets/estimators/flowmatching_estimator.py b/sbi/neural_nets/estimators/flowmatching_estimator.py
index 8b6494054..9ae1c7cc4 100644
--- a/sbi/neural_nets/estimators/flowmatching_estimator.py
+++ b/sbi/neural_nets/estimators/flowmatching_estimator.py
@@ -66,7 +66,7 @@ def embedding_net(self):
 
     def forward(self, input: Tensor, condition: Tensor, t: Tensor) -> Tensor:
         # positional encoding of time steps
-        t = self.freqs * t[..., None]
+        t = self.freqs * t[..., None]  # type: ignore
         t = torch.cat((t.cos(), t.sin()), dim=-1)
 
         # embed the input and condition
@@ -162,5 +162,5 @@ def flow(self, condition: Tensor) -> NormalizingFlow:
 
         return NormalizingFlow(
             transform=transform,
-            base=DiagNormal(self.zeros, self.ones).expand(condition.shape[:-1]),
+            base=DiagNormal(self.zeros, self.ones).expand(condition.shape[:-1]),  # type: ignore
         )
diff --git a/sbi/neural_nets/estimators/nflows_flow.py b/sbi/neural_nets/estimators/nflows_flow.py
index 04ba24196..825321122 100644
--- a/sbi/neural_nets/estimators/nflows_flow.py
+++ b/sbi/neural_nets/estimators/nflows_flow.py
@@ -70,7 +70,7 @@ def inverse_transform(self, input: Tensor, condition: Tensor) -> Tensor:
         input = input.reshape(-1, input.shape[-1])
         condition = condition.reshape(-1, *self.condition_shape)
 
-        noise, _ = self.net._transorm(input, context=condition)
+        noise, _ = self.net._transorm(input, context=condition)  # type: ignore
         noise = noise.reshape(batch_shape)
         return noise
 
diff --git a/sbi/neural_nets/estimators/score_estimator.py b/sbi/neural_nets/estimators/score_estimator.py
index 132d44d1d..8ac3ec8bd 100644
--- a/sbi/neural_nets/estimators/score_estimator.py
+++ b/sbi/neural_nets/estimators/score_estimator.py
@@ -228,7 +228,7 @@ def approx_marginal_mean(self, times: Tensor) -> Tensor:
         Returns:
             Approximate marginal mean at a given time.
         """
-        return self.mean_t_fn(times) * self.mean_0
+        return self.mean_t_fn(times) * self.mean_0  # type: ignore
 
     def approx_marginal_std(self, times: Tensor) -> Tensor:
         r"""Approximate the marginal standard deviation of the target distribution at a
@@ -240,8 +240,8 @@ def approx_marginal_std(self, times: Tensor) -> Tensor:
         Returns:
             Approximate marginal standard deviation at a given time.
         """
-        vars = self.mean_t_fn(times) ** 2 * self.std_0**2 + self.std_fn(times) ** 2
-        return torch.sqrt(vars)
+        variances = self.mean_t_fn(times) ** 2 * self.std_0**2 + self.std_fn(times) ** 2  # type: ignore
+        return torch.sqrt(variances)
 
     def mean_t_fn(self, times: Tensor) -> Tensor:
         r"""Conditional mean function, E[xt|x0], specifying the "mean factor" at a given
diff --git a/sbi/neural_nets/net_builders/score_nets.py b/sbi/neural_nets/net_builders/score_nets.py
index 274c0b2c7..3e9cc540c 100644
--- a/sbi/neural_nets/net_builders/score_nets.py
+++ b/sbi/neural_nets/net_builders/score_nets.py
@@ -298,8 +298,8 @@ def __init__(
         )
 
         # Initialize the last layer to zero
-        self.ada_ln[-1].weight.data.zero_()
-        self.ada_ln[-1].bias.data.zero_()
+        self.ada_ln[-1].weight.data.zero_()  # type: ignore
+        self.ada_ln[-1].bias.data.zero_()  # type: ignore
 
         # MLP block
         # NOTE: This can be made more flexible to support layer types.