diff --git a/pySDC/helpers/NCCL_communicator.py b/pySDC/helpers/NCCL_communicator.py
index 155c47622b..4001498507 100644
--- a/pySDC/helpers/NCCL_communicator.py
+++ b/pySDC/helpers/NCCL_communicator.py
@@ -29,6 +29,7 @@ def __getattr__(self, name):
         """
         if name not in ['size', 'rank', 'Get_rank', 'Get_size', 'Split']:
             cp.cuda.get_current_stream().synchronize()
+
         return getattr(self.commMPI, name)
 
     @staticmethod
@@ -71,6 +72,9 @@ def get_op(self, MPI_op):
             raise NotImplementedError('Don\'t know what NCCL operation to use to replace this MPI operation!')
 
     def Reduce(self, sendbuf, recvbuf, op=MPI.SUM, root=0):
+        if not hasattr(sendbuf.data, 'ptr'):
+            return self.commMPI.Reduce(sendbuf=sendbuf, recvbuf=recvbuf, op=op, root=root)
+
         dtype = self.get_dtype(sendbuf)
         count = self.get_count(sendbuf)
         op = self.get_op(op)
@@ -88,6 +92,9 @@ def Reduce(self, sendbuf, recvbuf, op=MPI.SUM, root=0):
         )
 
     def Allreduce(self, sendbuf, recvbuf, op=MPI.SUM):
+        if not hasattr(sendbuf.data, 'ptr'):
+            return self.commMPI.Allreduce(sendbuf=sendbuf, recvbuf=recvbuf, op=op)
+
         dtype = self.get_dtype(sendbuf)
         count = self.get_count(sendbuf)
         op = self.get_op(op)
@@ -96,3 +103,13 @@ def Allreduce(self, sendbuf, recvbuf, op=MPI.SUM):
         self.commNCCL.allReduce(
             sendbuf=sendbuf.data.ptr, recvbuf=recvbuf.data.ptr, count=count, datatype=dtype, op=op, stream=stream.ptr
         )
+
+    def Bcast(self, buf, root=0):
+        if not hasattr(buf.data, 'ptr'):
+            return self.commMPI.Bcast(buf=buf, root=root)
+
+        dtype = self.get_dtype(buf)
+        count = self.get_count(buf)
+        stream = cp.cuda.get_current_stream()
+
+        self.commNCCL.bcast(buff=buf.data.ptr, count=count, datatype=dtype, root=root, stream=stream.ptr)
diff --git a/pySDC/helpers/plot_helper.py b/pySDC/helpers/plot_helper.py
index 340f1e5d98..51e8783b96 100644
--- a/pySDC/helpers/plot_helper.py
+++ b/pySDC/helpers/plot_helper.py
@@ -42,10 +42,12 @@ def figsize_by_journal(journal, scale, ratio):  # pragma: no cover
     textwidths = {
         'JSC_beamer': 426.79135,
         'Springer_Numerical_Algorithms': 338.58778,
+        'JSC_thesis': 434.26027,
     }
     # store text height in points here, get this from LaTeX using \the\textheight
     textheights = {
         'JSC_beamer': 214.43411,
+        'JSC_thesis': 635.5,
     }
     assert (
         journal in textwidths.keys()
diff --git a/pySDC/implementations/problem_classes/AllenCahn_MPIFFT.py b/pySDC/implementations/problem_classes/AllenCahn_MPIFFT.py
index 0a4c233572..f929f320f5 100644
--- a/pySDC/implementations/problem_classes/AllenCahn_MPIFFT.py
+++ b/pySDC/implementations/problem_classes/AllenCahn_MPIFFT.py
@@ -7,7 +7,7 @@
 
 class allencahn_imex(IMEX_Laplacian_MPIFFT):
     r"""
-    Example implementing the :math:`N`-dimensional Allen-Cahn equation with periodic boundary conditions :math:`u \in [0, 1]^2`
+    Example implementing the :math:`2`-dimensional Allen-Cahn equation with periodic boundary conditions :math:`u \in [0, 1]^2`
 
     .. math::
         \frac{\partial u}{\partial t} = \Delta u - \frac{2}{\varepsilon^2} u (1 - u) (1 - 2u)
@@ -153,7 +153,9 @@ def u_exact(self, t, **kwargs):
                         # build radius
                         r2 = (self.X[0] + i - L + 0.5) ** 2 + (self.X[1] + j - L + 0.5) ** 2
                         # add this blob, shifted by 1 to avoid issues with adding up negative contributions
-                        tmp += self.xp.tanh((rand_radii[i, j] - np.sqrt(r2)) / (np.sqrt(2) * self.eps)) + 1
+                        tmp += self.xp.tanh((rand_radii[i, j] - self.xp.sqrt(r2)) / (np.sqrt(2) * self.eps)) + 1
+            else:
+                raise NotImplementedError
             # normalize to [0,1]
             tmp *= 0.5
             assert self.xp.all(tmp <= 1.0)
@@ -183,8 +185,8 @@ class allencahn_imex_timeforcing(allencahn_imex):
         u({\bf x}, 0) = \tanh\left(\frac{r - \sqrt{(x_i-0.5)^2 + (y_j-0.5)^2}}{\sqrt{2}\varepsilon}\right),
 
     for :math:`i, j=0,..,N-1`, where :math:`N` is the number of spatial grid points. For time-stepping, the problem is treated
-    *semi-implicitly*, i.e., the linear part is solved with Fast-Fourier Transform (FFT) and the nonlinear part in the right-hand
-    side will be treated explicitly using ``mpi4py-fft`` [1]_ to solve them.
+    *semi-implicitly*, i.e., the linear part is solved with Fast-Fourier Transform (FFT) using ``mpi4py-fft`` [1]_ and the nonlinear part in the right-hand
+    side will be treated explicitly.
     """
 
     def eval_f(self, u, t):
diff --git a/pySDC/implementations/problem_classes/NonlinearSchroedinger_MPIFFT.py b/pySDC/implementations/problem_classes/NonlinearSchroedinger_MPIFFT.py
index fa930bb114..892990d9e9 100644
--- a/pySDC/implementations/problem_classes/NonlinearSchroedinger_MPIFFT.py
+++ b/pySDC/implementations/problem_classes/NonlinearSchroedinger_MPIFFT.py
@@ -118,6 +118,8 @@ class nonlinearschroedinger_fully_implicit(nonlinearschroedinger_imex):
     dtype_f = mesh
 
     def __init__(self, lintol=1e-9, liniter=99, **kwargs):
+        assert kwargs.get('useGPU', False) is False
+
         super().__init__(**kwargs)
         self._makeAttributeAndRegister('liniter', 'lintol', localVars=locals(), readOnly=False)