diff --git a/doc/whats_new/v0.4.rst b/doc/whats_new/v0.4.rst
index 1a4de76c7..303c46b11 100644
--- a/doc/whats_new/v0.4.rst
+++ b/doc/whats_new/v0.4.rst
@@ -14,6 +14,7 @@ Changelog
 ---------
 
 - |API| ``FeatureImportanceForest*`` now has a hyperparameter to control the number of permutations is done per forest ``permute_per_forest_fraction``, by `Adam Li`_ (:pr:`145`)
+- |Enhancement| Add dataset generators for regression and classification and hypothesis testing, by `Adam Li`_ (:pr:`160`)
 
 Code and Documentation Contributors
 -----------------------------------
diff --git a/sktree/_lib/sklearn_fork b/sktree/_lib/sklearn_fork
index 1adb20907..3d377ad78 160000
--- a/sktree/_lib/sklearn_fork
+++ b/sktree/_lib/sklearn_fork
@@ -1 +1 @@
-Subproject commit 1adb209077f12adac8f760196ae5260abab0cbdd
+Subproject commit 3d377ad78db936b17ee24e59d29222f259d92412
diff --git a/sktree/datasets/__init__.py b/sktree/datasets/__init__.py
new file mode 100644
index 000000000..74c770d6a
--- /dev/null
+++ b/sktree/datasets/__init__.py
@@ -0,0 +1,2 @@
+from .hyppo import make_quadratic_classification
+from .multiview import make_gaussian_mixture, make_joint_factor_model
diff --git a/sktree/datasets/hyppo.py b/sktree/datasets/hyppo.py
new file mode 100644
index 000000000..2c1cbcbdb
--- /dev/null
+++ b/sktree/datasets/hyppo.py
@@ -0,0 +1,45 @@
+import numpy as np
+
+
+def make_quadratic_classification(n_samples: int, n_features: int, noise=False, seed=None):
+    """Simulate classification data from a quadratic model.
+
+    This is a form of the simulation used in :footcite:`panda2018learning`.
+
+    Parameters
+    ----------
+    n_samples : int
+        The number of samples to generate.
+    n_features : int
+        The number of dimensions in the dataset.
+    noise : bool, optional
+        Whether or not to add noise, by default False.
+    seed : int, optional
+        Random seed, by default None.
+
+    Returns
+    -------
+    x : array-like, shape (n_samples, n_features)
+        Data array.
+    v : array-like, shape (n_samples,)
+        Target array of 1's and 0's.
+
+    References
+    ----------
+    .. footbibliography::
+    """
+    rng = np.random.default_rng(seed)
+
+    x = rng.standard_normal(size=(n_samples, n_features))
+    coeffs = np.array([np.exp(-0.0325 * (i + 24)) for i in range(n_features)])
+    eps = rng.standard_normal(size=(n_samples, n_features))
+
+    x_coeffs = x * coeffs
+    y = x_coeffs**2 + noise * eps
+
+    # generate the classification labels
+    n1 = x.shape[0]
+    n2 = y.shape[0]
+    v = np.vstack([np.zeros((n1, 1)), np.ones((n2, 1))])
+    x = np.vstack((x, y))
+    return x, v
diff --git a/sktree/datasets/meson.build b/sktree/datasets/meson.build
new file mode 100644
index 000000000..85a6f9b3f
--- /dev/null
+++ b/sktree/datasets/meson.build
@@ -0,0 +1,11 @@
+python_sources = [
+  '__init__.py',
+  'multiview.py',
+  'hyppo.py',
+]
+
+py3.install_sources(
+  python_sources,
+  pure: false,
+  subdir: 'sktree/datasets'
+)
diff --git a/sktree/datasets/multiview.py b/sktree/datasets/multiview.py
new file mode 100644
index 000000000..11df3993f
--- /dev/null
+++ b/sktree/datasets/multiview.py
@@ -0,0 +1,358 @@
+# Original source: https://github.com/mvlearn/mvlearn
+# License: MIT
+
+import numpy as np
+from scipy.stats import ortho_group
+from sklearn.utils import check_random_state
+
+
+def make_gaussian_mixture(
+    centers,
+    covariances,
+    n_samples=100,
+    transform="linear",
+    noise=None,
+    noise_dims=None,
+    class_probs=None,
+    random_state=None,
+    shuffle=False,
+    return_latents=False,
+    add_latent_noise=False,
+):
+    r"""Two-view Gaussian mixture model dataset generator.
+
+    This creates a two-view dataset from a Gaussian mixture model and
+    a (possibly nonlinear) transformation.
+
+    Parameters
+    ----------
+    centers : 1D array-like or list of 1D array-likes
+        The mean(s) of the Gaussian(s) from which the latent
+        points are sampled. If is a list of 1D array-likes, each is the
+        mean of a distinct Gaussian, sampled from with
+        probability given by `class_probs`. Otherwise is the mean of a
+        single Gaussian from which all are sampled.
+    covariances : 2D array-like or list of 2D array-likes
+        The covariance matrix(s) of the Gaussian(s), matched
+        to the specified centers.
+    n_samples : int
+        The number of points in each view, divided across Gaussians per
+        `class_probs`.
+    transform : 'linear' | 'sin' | poly' | callable, (default 'linear')
+        Transformation to perform on the latent variable. If a function,
+        applies it to the latent. Otherwise uses an implemented function.
+    noise : float or None (default=None)
+        Variance of mean zero Gaussian noise added to the first view.
+    noise_dims : int or None (default=None)
+        Number of additional dimensions of standard normal noise to add.
+    class_probs : array-like, default=None
+        A list of probabilities specifying the probability of a latent
+        point being sampled from each of the Gaussians. Must sum to 1. If
+        None, then is taken to be uniform over the Gaussians.
+    random_state : int, default=None
+        If set, can be used to reproduce the data generated.
+    shuffle : bool, default=False
+        If ``True``, data is shuffled so the labels are not ordered.
+    return_latents : bool (default False)
+        If true, returns the non-noisy latent variables.
+    add_latent_noise : bool (default False)
+        If true, adds noise to the latent variables before applying the
+        transformation.
+
+    Returns
+    -------
+    Xs : list of np.ndarray, of shape (n_samples, n_features)
+        The latent data and its noisy transformation.
+
+    y : np.ndarray, shape (n_samples,)
+        The integer labels for each sample's Gaussian membership.
+
+    latents : np.ndarray, shape (n_samples, n_features)
+        The non-noisy latent variables. Only returned if
+        ``return_latents=True``.
+
+    Notes
+    -----
+    For each class :math:`i` with prior probability :math:`p_i`,
+    center and covariance matrix :math:`\mu_i` and :math:`\Sigma_i`, and
+    :math:`n` total samples, the latent data is sampled such that:
+
+    .. math::
+        (X_1, y_1), \dots, (X_{np_i}, Y_{np_i}) \overset{i.i.d.}{\sim}
+            \mathcal{N}(\mu_i, \Sigma_i)
+
+    Two views of data are returned, the first being the latent samples and
+    the second being a specified transformation of the latent samples.
+    Additional noise may be added to the first view or added as noise
+    dimensions to both views.
+
+    Examples
+    --------
+    >>> from sktree.datasets.multiview import make_gaussian_mixture
+    >>> import numpy as np
+    >>> n_samples = 10
+    >>> centers = [[0,1], [0,-1]]
+    >>> covariances = [np.eye(2), np.eye(2)]
+    >>> Xs, y = make_gaussian_mixture(n_samples, centers, covariances,
+    ...                               shuffle=True, shuffle_random_state=42)
+    >>> print(y)
+    [1. 0. 1. 0. 1. 0. 1. 0. 0. 1.]
+    """
+    centers = np.asarray(centers)
+    covariances = np.asarray(covariances)
+
+    if centers.ndim == 1:
+        centers = centers[np.newaxis, :]
+    if covariances.ndim == 2:
+        covariances = covariances[np.newaxis, :]
+    if not centers.ndim == 2:
+        msg = "centers is of the incorrect shape"
+        raise ValueError(msg)
+    if not covariances.ndim == 3:
+        msg = "covariance matrix is of the incorrect shape"
+        raise ValueError(msg)
+    if centers.shape[0] != covariances.shape[0]:
+        msg = "The first dimensions of 2D centers and 3D covariances" + " must be equal"
+        raise ValueError(msg)
+    if class_probs is None:
+        class_probs = np.ones(centers.shape[0])
+        class_probs /= centers.shape[0]
+    elif sum(class_probs) != 1.0:
+        msg = "elements of `class_probs` must sum to 1"
+        raise ValueError(msg)
+    if len(centers) != len(class_probs) or len(covariances) != len(class_probs):
+        msg = "centers, covariances, and class_probs must be of equal length"
+        raise ValueError(msg)
+
+    rng = np.random.default_rng(seed=random_state)
+    latent = np.concatenate(
+        [
+            rng.multivariate_normal(
+                centers[i],
+                covariances[i],
+                size=int(class_probs[i] * n_samples),
+            )
+            for i in range(len(class_probs))
+        ]
+    )
+    y = np.concatenate(
+        [i * np.ones(int(class_probs[i] * n_samples)) for i in range(len(class_probs))]
+    )
+
+    if add_latent_noise:
+        latent += rng.standard_normal(size=latent.shape) * 0.1
+
+    # shuffle latent samples and labels
+    if shuffle:
+        indices = np.arange(latent.shape[0]).squeeze()
+        rng.shuffle(indices)
+        latent = latent[indices, :]
+        y = y[indices]
+
+    if callable(transform):
+        X = np.asarray([transform(x) for x in latent])
+    elif not isinstance(transform, str):
+        raise TypeError(
+            "'transform' must be of type string or a callable function," + f"not {type(transform)}"
+        )
+    elif transform == "linear":
+        X = _linear2view(latent, rng)
+    elif transform == "poly":
+        X = _poly2view(latent)
+    elif transform == "sin":
+        X = _sin2view(latent)
+    else:
+        raise ValueError(
+            "Transform type must be one of {'linear', 'poly', 'sin'}"
+            + f" or a callable function. Not {transform}"
+        )
+
+    if noise is not None:
+        Xs = [latent + np.sqrt(noise) * rng.standard_normal(size=latent.shape), X]
+    else:
+        Xs = [latent, X]
+
+    # if random_state is not None, make sure both views are independent
+    # but reproducible
+    if noise_dims is not None:
+        Xs = [_add_noise(X, noise_dims, rng) for X in Xs]
+
+    if return_latents:
+        return Xs, y, latent
+    else:
+        return Xs, y
+
+
+def _add_noise(X, n_noise, rng):
+    """Appends dimensions of standard normal noise to X"""
+    noise_vars = rng.standard_normal(size=(X.shape[0], n_noise))
+    return np.hstack((X, noise_vars))
+
+
+def _linear2view(X, rng):
+    """Rotates the data, a linear transformation"""
+    if X.shape[1] == 1:
+        X = -X
+    else:
+        X = X @ ortho_group.rvs(X.shape[1], random_state=rng)
+    return X
+
+
+def _poly2view(X):
+    """Applies a degree 2 polynomial transform to the data"""
+    X = np.asarray([np.power(x, 2) for x in X])
+    return X
+
+
+def _sin2view(X):
+    """Applies a sinusoidal transformation to the data"""
+    X = np.asarray([np.sin(x) for x in X])
+    return X
+
+
+def _rand_orthog(n, K, random_state=None):
+    """
+    Samples a random orthonormal matrix.
+
+    Parameters
+    ----------
+    n : int, positive
+        Number of rows in the matrix
+
+    K : int, positive
+        Number of columns in the matrix
+
+    random_state : None | int | instance of RandomState, optional
+        Seed to set randomization for reproducible results
+
+    Returns
+    -------
+    A: array-like, (n, K)
+        A random, column orthonormal matrix.
+
+    Notes
+    -----
+    See Section A.1.1 of :footcite:`perry2009crossvalidation`.
+
+    References
+    ----------
+    .. footbibliography::
+    """
+    rng = check_random_state(random_state)
+
+    Z = rng.normal(size=(n, K))
+    Q, R = np.linalg.qr(Z)
+
+    s = np.ones(K)
+    neg_mask = rng.uniform(size=K) > 0.5
+    s[neg_mask] = -1
+
+    return Q * s
+
+
+def make_joint_factor_model(
+    n_views,
+    n_features,
+    n_samples=100,
+    joint_rank=1,
+    noise_std=1,
+    m=1.5,
+    random_state=None,
+    return_decomp=False,
+):
+    """Joint factor model data generator.
+
+    Samples from a low rank, joint factor model where there is one set of
+    shared scores.
+
+    Parameters
+    -----------
+    n_views : int
+        Number of views to sample. This corresponds to ``B`` in the notes.
+
+    n_features : int, or list of int
+        Number of features in each view. A list specifies a different number
+        of features for each view.
+
+    n_samples : int
+        Number of samples in each view
+
+    joint_rank : int (default 1)
+        Rank of the common signal across views.
+
+    noise_std : float (default 1)
+        Scale of noise distribution.
+
+    m : float (default 1.5)
+        Signal strength.
+
+    random_state : int or RandomState instance, optional (default=None)
+        Controls random orthonormal matrix sampling and random noise
+        generation. Set for reproducible results.
+
+    return_decomp : bool, default=False
+        If ``True``, returns the ``view_loadings`` as well.
+
+    Returns
+    -------
+    Xs : list of array-likes or numpy.ndarray
+        List of samples data matrices:
+
+        - Xs length: n_views
+        - Xs[i] shape: (n_samples, n_features_i)
+
+        If ``return_decomp`` is False, returns just an array.
+
+    U: (n_samples, joint_rank)
+        The true orthonormal joint scores matrix. Returned if
+        ``return_decomp`` is True.
+
+    view_loadings: list of numpy.ndarray
+        The true view loadings matrices. Returned if
+        ``return_decomp`` is True.
+
+    Notes
+    -----
+    The data is generated as follows, where:
+
+    - :math:`b` are the different views
+    - :math:`U` is is a (n_samples, joint_rank) matrix of rotation matrices.
+    - ``svals`` are the singular values sampled.
+    - :math:`W_b` are (n_features_b, joint_rank) view loadings matrices, which are
+        orthonormal matrices to linearly transform the data, while preserving inner
+        products (i.e. a unitary transformation).
+
+    For b = 1, .., B
+        X_b = U @ diag(svals) @ W_b^T + noise_std * E_b
+
+    where U and each W_b are orthonormal matrices. The singular values are
+    linearly increasing following :footcite:`choi2017selectingpca` section 2.2.3.
+
+    References
+    ----------
+    .. footbibliography::
+    """
+    rng = check_random_state(random_state)
+    if isinstance(n_features, int):
+        n_features = [n_features] * n_views
+
+    # generate W_b orthonormal matrices
+    view_loadings = [_rand_orthog(d, joint_rank, random_state=rng) for d in n_features]
+
+    # sample monotonically increasing singular values
+    # the signal increases linearly and ``m`` determines the strength of the signal
+    svals = np.arange(1, 1 + joint_rank).astype(float)
+    svals *= m * noise_std * (n_samples * max(n_features)) ** (1 / 4)
+
+    # rotation operators that are generated via standard random normal
+    U = rng.standard_normal(size=(n_samples, joint_rank))
+    U = np.linalg.qr(U)[0]
+
+    # random noise for each view
+    Es = [noise_std * rng.standard_normal(size=(n_samples, d)) for d in n_features]
+    Xs = [(U * svals) @ view_loadings[b].T + Es[b] for b in range(n_views)]
+
+    if return_decomp:
+        return Xs, U, view_loadings
+    else:
+        return Xs
diff --git a/sktree/meson.build b/sktree/meson.build
index 8608052b8..aa46fe60f 100644
--- a/sktree/meson.build
+++ b/sktree/meson.build
@@ -86,3 +86,4 @@ subdir('experimental')
 subdir('stats')
 subdir('tests')
 subdir('tree')
+subdir('datasets')