diff --git a/cuda_core/cuda/core/experimental/_linker.py b/cuda_core/cuda/core/experimental/_linker.py
index b5a6b675..976d739f 100644
--- a/cuda_core/cuda/core/experimental/_linker.py
+++ b/cuda_core/cuda/core/experimental/_linker.py
@@ -3,11 +3,11 @@
 # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE
 
 import ctypes
-import warnings
 import weakref
 from contextlib import contextmanager
 from dataclasses import dataclass
 from typing import List, Optional
+from warnings import warn
 
 from cuda.core.experimental._device import Device
 from cuda.core.experimental._module import ObjectCode
@@ -23,11 +23,11 @@
 
 
 # Note: this function is reused in the tests
-def _decide_nvjitlink_or_driver():
+def _decide_nvjitlink_or_driver() -> bool:
     """Returns True if falling back to the cuLink* driver APIs."""
     global _driver_ver, _driver, _nvjitlink
     if _driver or _nvjitlink:
-        return
+        return _driver is not None
 
     _driver_ver = handle_return(driver.cuDriverGetVersion())
     _driver_ver = (_driver_ver // 1000, (_driver_ver % 1000) // 10)
@@ -43,7 +43,7 @@ def _decide_nvjitlink_or_driver():
             _nvjitlink = None
 
     if _nvjitlink is None:
-        warnings.warn(
+        warn(
             "nvJitLink is not installed or too old (<12.3). Therefore it is not usable "
             "and the culink APIs will be used instead.",
             stacklevel=3,
@@ -98,78 +98,59 @@ class LinkerOptions:
         will be used.
     max_register_count : int, optional
         Maximum register count.
-        Maps to: ``-maxrregcount=<N>``.
     time : bool, optional
         Print timing information to the info log.
-        Maps to ``-time``.
         Default: False.
     verbose : bool, optional
         Print verbose messages to the info log.
-        Maps to ``-verbose``.
         Default: False.
     link_time_optimization : bool, optional
         Perform link time optimization.
-        Maps to: ``-lto``.
         Default: False.
     ptx : bool, optional
-        Emit PTX after linking instead of CUBIN; only supported with ``-lto``.
-        Maps to ``-ptx``.
+        Emit PTX after linking instead of CUBIN; only supported with ``link_time_optimization=True``.
         Default: False.
     optimization_level : int, optional
         Set optimization level. Only 0 and 3 are accepted.
-        Maps to ``-O<N>``.
     debug : bool, optional
         Generate debug information.
-        Maps to ``-g``
         Default: False.
     lineinfo : bool, optional
         Generate line information.
-        Maps to ``-lineinfo``.
         Default: False.
     ftz : bool, optional
         Flush denormal values to zero.
-        Maps to ``-ftz=<n>``.
         Default: False.
     prec_div : bool, optional
         Use precise division.
-        Maps to ``-prec-div=<n>``.
         Default: True.
     prec_sqrt : bool, optional
         Use precise square root.
-        Maps to ``-prec-sqrt=<n>``.
         Default: True.
     fma : bool, optional
         Use fast multiply-add.
-        Maps to ``-fma=<n>``.
         Default: True.
     kernels_used : List[str], optional
         Pass list of kernels that are used; any not in the list can be removed. This option can be specified multiple
         times.
-        Maps to ``-kernels-used=<name>``.
     variables_used : List[str], optional
         Pass a list of variables that are used; any not in the list can be removed.
-        Maps to ``-variables-used=<name>``
     optimize_unused_variables : bool, optional
         Assume that if a variable is not referenced in device code, it can be removed.
-        Maps to: ``-optimize-unused-variables``
         Default: False.
-    xptxas : List[str], optional
+    ptxas_options : List[str], optional
         Pass options to PTXAS.
-        Maps to: ``-Xptxas=<opt>``.
     split_compile : int, optional
         Split compilation maximum thread count. Use 0 to use all available processors. Value of 1 disables split
         compilation (default).
-        Maps to ``-split-compile=<N>``.
         Default: 1.
     split_compile_extended : int, optional
         A more aggressive form of split compilation available in LTO mode only. Accepts a maximum thread count value.
         Use 0 to use all available processors. Value of 1 disables extended split compilation (default). Note: This
         option can potentially impact performance of the compiled binary.
-        Maps to ``-split-compile-extended=<N>``.
         Default: 1.
     no_cache : bool, optional
         Do not cache the intermediate steps of nvJitLink.
-        Maps to ``-no-cache``.
         Default: False.
     """
 
@@ -189,7 +170,7 @@ class LinkerOptions:
     kernels_used: Optional[List[str]] = None
     variables_used: Optional[List[str]] = None
     optimize_unused_variables: Optional[bool] = None
-    xptxas: Optional[List[str]] = None
+    ptxas_options: Optional[List[str]] = None
     split_compile: Optional[int] = None
     split_compile_extended: Optional[int] = None
     no_cache: Optional[bool] = None
@@ -239,8 +220,8 @@ def _init_nvjitlink(self):
                 self.formatted_options.append(f"-variables-used={variable}")
         if self.optimize_unused_variables is not None:
             self.formatted_options.append("-optimize-unused-variables")
-        if self.xptxas is not None:
-            for opt in self.xptxas:
+        if self.ptxas_options is not None:
+            for opt in self.ptxas_options:
                 self.formatted_options.append(f"-Xptxas={opt}")
         if self.split_compile is not None:
             self.formatted_options.append(f"-split-compile={self.split_compile}")
@@ -290,21 +271,21 @@ def _init_driver(self):
             self.formatted_options.append(1)
             self.option_keys.append(_driver.CUjit_option.CU_JIT_GENERATE_LINE_INFO)
         if self.ftz is not None:
-            raise ValueError("ftz option is deprecated in the driver API")
+            warn("ftz option is deprecated in the driver API", DeprecationWarning, stacklevel=3)
         if self.prec_div is not None:
-            raise ValueError("prec_div option is deprecated in the driver API")
+            warn("prec_div option is deprecated in the driver API", DeprecationWarning, stacklevel=3)
         if self.prec_sqrt is not None:
-            raise ValueError("prec_sqrt option is deprecated in the driver API")
+            warn("prec_sqrt option is deprecated in the driver API", DeprecationWarning, stacklevel=3)
         if self.fma is not None:
-            raise ValueError("fma options is deprecated in the driver API")
+            warn("fma options is deprecated in the driver API", DeprecationWarning, stacklevel=3)
         if self.kernels_used is not None:
-            raise ValueError("kernels_used is deprecated in the driver API")
+            warn("kernels_used is deprecated in the driver API", DeprecationWarning, stacklevel=3)
         if self.variables_used is not None:
-            raise ValueError("variables_used is deprecated in the driver API")
+            warn("variables_used is deprecated in the driver API", DeprecationWarning, stacklevel=3)
         if self.optimize_unused_variables is not None:
-            raise ValueError("optimize_unused_variables is deprecated in the driver API")
-        if self.xptxas is not None:
-            raise ValueError("xptxas option is not supported by the driver API")
+            warn("optimize_unused_variables is deprecated in the driver API", DeprecationWarning, stacklevel=3)
+        if self.ptxas_options is not None:
+            raise ValueError("ptxas_options option is not supported by the driver API")
         if self.split_compile is not None:
             raise ValueError("split_compile option is not supported by the driver API")
         if self.split_compile_extended is not None:
diff --git a/cuda_core/cuda/core/experimental/_program.py b/cuda_core/cuda/core/experimental/_program.py
index c10fd077..f938895e 100644
--- a/cuda_core/cuda/core/experimental/_program.py
+++ b/cuda_core/cuda/core/experimental/_program.py
@@ -7,6 +7,7 @@
 from typing import List, Optional, Tuple, Union
 
 from cuda.core.experimental._device import Device
+from cuda.core.experimental._linker import Linker, LinkerOptions
 from cuda.core.experimental._module import ObjectCode
 from cuda.core.experimental._utils import (
     _handle_boolean_option,
@@ -31,157 +32,120 @@ class ProgramOptions:
     relocatable_device_code : bool, optional
         Enable (disable) the generation of relocatable device code.
         Default: False
-        Maps to: ``--relocatable-device-code={true|false}`` (``-rdc``)
     extensible_whole_program : bool, optional
         Do extensible whole program compilation of device code.
         Default: False
-        Maps to: ``--extensible-whole-program`` (``-ewp``)
     debug : bool, optional
         Generate debug information. If --dopt is not specified, then turns off all optimizations.
         Default: False
-        Maps to: ``--device-debug`` (``-G``)
     lineinfo: bool, optional
         Generate line-number information.
         Default: False
-        Maps to: ``--generate-line-info`` (``-lineinfo``)
     device_code_optimize : bool, optional
         Enable device code optimization. When specified along with ‘-G’, enables limited debug information generation
         for optimized device code.
         Default: None
-        Maps to: ``--dopt on`` (``-dopt``)
     ptxas_options : Union[str, List[str]], optional
         Specify one or more options directly to ptxas, the PTX optimizing assembler. Options should be strings.
         For example ["-v", "-O2"].
         Default: None
-        Maps to: ``--ptxas-options <options>`` (``-Xptxas``)
     max_register_count : int, optional
         Specify the maximum amount of registers that GPU functions can use.
         Default: None
-        Maps to: ``--maxrregcount=<N>`` (``-maxrregcount``)
     ftz : bool, optional
         When performing single-precision floating-point operations, flush denormal values to zero or preserve denormal
         values.
         Default: False
-        Maps to: ``--ftz={true|false}`` (``-ftz``)
     prec_sqrt : bool, optional
         For single-precision floating-point square root, use IEEE round-to-nearest mode or use a faster approximation.
         Default: True
-        Maps to: ``--prec-sqrt={true|false}`` (``-prec-sqrt``)
     prec_div : bool, optional
         For single-precision floating-point division and reciprocals, use IEEE round-to-nearest mode or use a faster
         approximation.
         Default: True
-        Maps to: ``--prec-div={true|false}`` (``-prec-div``)
     fma : bool, optional
         Enables (disables) the contraction of floating-point multiplies and adds/subtracts into floating-point
         multiply-add operations.
         Default: True
-        Maps to: ``--fmad={true|false}`` (``-fmad``)
     use_fast_math : bool, optional
         Make use of fast math operations.
         Default: False
-        Maps to: ``--use_fast_math`` (``-use_fast_math``)
     extra_device_vectorization : bool, optional
         Enables more aggressive device code vectorization in the NVVM optimizer.
         Default: False
-        Maps to: ``--extra-device-vectorization`` (``-extra-device-vectorization``)
     link_time_optimization : bool, optional
         Generate intermediate code for later link-time optimization.
         Default: False
-        Maps to: ``--dlink-time-opt`` (``-dlto``)
     gen_opt_lto : bool, optional
         Run the optimizer passes before generating the LTO IR.
         Default: False
-        Maps to: ``--gen-opt-lto`` (``-gen-opt-lto``)
     define_macro : Union[str, Tuple[str, str], List[Union[str, Tuple[str, str]]]], optional
         Predefine a macro. Can be either a string, in which case that macro will be set to 1, a 2 element tuple of
         strings, in which case the first element is defined as the second, or a list of strings or tuples.
         Default: None
-        Maps to: ``--define-macro=<def>`` (``-D``)
     undefine_macro : Union[str, List[str]], optional
         Cancel any previous definition of a macro, or list of macros.
         Default: None
-        Maps to: ``--undefine-macro=<def>`` (``-U``)
     include_path : Union[str, List[str]], optional
         Add the directory or directories to the list of directories to be searched for headers.
         Default: None
-        Maps to: ``--include-path=<dir>`` (``-I``)
     pre_include : Union[str, List[str]], optional
         Preinclude one or more headers during preprocessing. Can be either a string or a list of strings.
         Default: None
-        Maps to: ``--pre-include=<header>`` (``-include``)
     no_source_include : bool, optional
         Disable the default behavior of adding the directory of each input source to the include path.
         Default: False
-        Maps to: ``--no-source-include`` (``-no-source-include``)
     std : str, optional
         Set language dialect to C++03, C++11, C++14, C++17 or C++20.
         Default: c++17
-        Maps to: ``--std={c++03|c++11|c++14|c++17|c++20}`` (``-std``)
     builtin_move_forward : bool, optional
         Provide builtin definitions of std::move and std::forward.
         Default: True
-        Maps to: ``--builtin-move-forward={true|false}`` (``-builtin-move-forward``)
     builtin_initializer_list : bool, optional
         Provide builtin definitions of std::initializer_list class and member functions.
         Default: True
-        Maps to: ``--builtin-initializer-list={true|false}`` (``-builtin-initializer-list``)
     disable_warnings : bool, optional
         Inhibit all warning messages.
         Default: False
-        Maps to: ``--disable-warnings`` (``-w``)
     restrict : bool, optional
         Programmer assertion that all kernel pointer parameters are restrict pointers.
         Default: False
-        Maps to: ``--restrict`` (``-restrict``)
     device_as_default_execution_space : bool, optional
         Treat entities with no execution space annotation as __device__ entities.
         Default: False
-        Maps to: ``--device-as-default-execution-space`` (``-default-device``)
     device_int128 : bool, optional
         Allow the __int128 type in device code.
         Default: False
-        Maps to: ``--device-int128`` (``-device-int128``)
     optimization_info : str, optional
         Provide optimization reports for the specified kind of optimization.
         Default: None
-        Maps to: ``--optimization-info=<kind>`` (``-opt-info``)
     no_display_error_number : bool, optional
         Disable the display of a diagnostic number for warning messages.
         Default: False
-        Maps to: ``--no-display-error-number`` (``-no-err-no``)
     diag_error : Union[int, List[int]], optional
         Emit error for a specified diagnostic message number or comma separated list of numbers.
         Default: None
-        Maps to: ``--diag-error=<error-number>, ...`` (``-diag-error``)
     diag_suppress : Union[int, List[int]], optional
         Suppress a specified diagnostic message number or comma separated list of numbers.
         Default: None
-        Maps to: ``--diag-suppress=<error-number>,…`` (``-diag-suppress``)
     diag_warn : Union[int, List[int]], optional
         Emit warning for a specified diagnostic message number or comma separated lis of numbers.
         Default: None
-        Maps to: ``--diag-warn=<error-number>,…`` (``-diag-warn``)
     brief_diagnostics : bool, optional
         Disable or enable showing source line and column info in a diagnostic.
         Default: False
-        Maps to: ``--brief-diagnostics={true|false}`` (``-brief-diag``)
     time : str, optional
         Generate a CSV table with the time taken by each compilation phase.
         Default: None
-        Maps to: ``--time=<file-name>`` (``-time``)
     split_compile : int, optional
         Perform compiler optimizations in parallel.
         Default: 1
-        Maps to: ``--split-compile= <number of threads>`` (``-split-compile``)
     fdevice_syntax_only : bool, optional
         Ends device compilation after front-end syntax checking.
         Default: False
-        Maps to: ``--fdevice-syntax-only`` (``-fdevice-syntax-only``)
     minimal : bool, optional
         Omit certain language features to reduce compile time for small programs.
         Default: False
-        Maps to: ``--minimal`` (``-minimal``)
     """
 
     arch: Optional[str] = None
@@ -378,14 +342,14 @@ class Program:
     code : Any
         String of the CUDA Runtime Compilation program.
     code_type : Any
-        String of the code type. Currently only ``"c++"`` is supported.
+        String of the code type. Currently ``"ptx"`` and ``"c++"`` are supported.
     options : ProgramOptions, optional
         A ProgramOptions object to customize the compilation process.
         See :obj:`ProgramOptions` for more information.
     """
 
     class _MembersNeededForFinalize:
-        __slots__ = ("handle",)
+        __slots__ = "handle"
 
         def __init__(self, program_obj, handle):
             self.handle = handle
@@ -396,30 +360,58 @@ def close(self):
                 handle_return(nvrtc.nvrtcDestroyProgram(self.handle))
                 self.handle = None
 
-    __slots__ = ("__weakref__", "_mnff", "_backend", "_options")
-    _supported_code_type = ("c++",)
+    __slots__ = ("__weakref__", "_mnff", "_backend", "_linker", "_options")
+    _supported_code_type = ("c++", "ptx")
     _supported_target_type = ("ptx", "cubin", "ltoir")
 
     def __init__(self, code, code_type, options: ProgramOptions = None):
         self._mnff = Program._MembersNeededForFinalize(self, None)
 
         self._options = options = check_or_create_options(ProgramOptions, options, "Program options")
+        code_type = code_type.lower()
 
         if code_type not in self._supported_code_type:
             raise NotImplementedError
 
-        if code_type.lower() == "c++":
+        if code_type == "c++":
             if not isinstance(code, str):
-                raise TypeError
+                raise TypeError("c++ Program expects code argument to be a string")
             # TODO: support pre-loaded headers & include names
             # TODO: allow tuples once NVIDIA/cuda-python#72 is resolved
             self._mnff.handle = handle_return(nvrtc.nvrtcCreateProgram(code.encode(), b"", 0, [], []))
             self._backend = "nvrtc"
+            self._linker = None
+
+        elif code_type == "ptx":
+            if not isinstance(code, str):
+                raise TypeError("ptx Program expects code argument to be a string")
+            self._linker = Linker(
+                ObjectCode(code.encode(), code_type), options=self._translate_program_options(options)
+            )
+            self._backend = "linker"
         else:
             raise NotImplementedError
 
+    def _translate_program_options(self, options: ProgramOptions) -> LinkerOptions:
+        return LinkerOptions(
+            arch=options.arch,
+            max_register_count=options.max_register_count,
+            time=options.time,
+            debug=options.debug,
+            lineinfo=options.lineinfo,
+            ftz=options.ftz,
+            prec_div=options.prec_div,
+            prec_sqrt=options.prec_sqrt,
+            fma=options.fma,
+            link_time_optimization=options.link_time_optimization,
+            split_compile=options.split_compile,
+            ptxas_options=options.ptxas_options,
+        )
+
     def close(self):
         """Destroy this program."""
+        if self._linker:
+            self._linker.close()
         self._mnff.close()
 
     def compile(self, target_type, name_expressions=(), logs=None):
@@ -450,7 +442,10 @@ def compile(self, target_type, name_expressions=(), logs=None):
         if self._backend == "nvrtc":
             if name_expressions:
                 for n in name_expressions:
-                    handle_return(nvrtc.nvrtcAddNameExpression(self._mnff.handle, n.encode()), handle=self._mnff.handle)
+                    handle_return(
+                        nvrtc.nvrtcAddNameExpression(self._mnff.handle, n.encode()),
+                        handle=self._mnff.handle,
+                    )
             options = self._options._as_bytes()
             handle_return(
                 nvrtc.nvrtcCompileProgram(self._mnff.handle, len(options), options),
@@ -477,10 +472,11 @@ def compile(self, target_type, name_expressions=(), logs=None):
                     handle_return(nvrtc.nvrtcGetProgramLog(self._mnff.handle, log), handle=self._mnff.handle)
                     logs.write(log.decode())
 
-            # TODO: handle jit_options for ptx?
-
             return ObjectCode(data, target_type, symbol_mapping=symbol_mapping)
 
+        if self._backend == "linker":
+            return self._linker.link(target_type)
+
     @property
     def backend(self):
         """Return the backend type string associated with this program."""
diff --git a/cuda_core/tests/test_linker.py b/cuda_core/tests/test_linker.py
index 288e2ce4..556fe9f7 100644
--- a/cuda_core/tests/test_linker.py
+++ b/cuda_core/tests/test_linker.py
@@ -57,6 +57,11 @@ def compile_ltoir_functions(init_cuda):
 if not culink_backend:
     options += [
         LinkerOptions(arch=ARCH, time=True),
+        LinkerOptions(arch=ARCH, optimize_unused_variables=True),
+        LinkerOptions(arch=ARCH, ptxas_options=["-v"]),
+        LinkerOptions(arch=ARCH, split_compile=0),
+        LinkerOptions(arch=ARCH, split_compile_extended=1),
+        # The following options are supported by nvjitlink and deprecated by culink
         LinkerOptions(arch=ARCH, ftz=True),
         LinkerOptions(arch=ARCH, prec_div=True),
         LinkerOptions(arch=ARCH, prec_sqrt=True),
@@ -65,14 +70,10 @@ def compile_ltoir_functions(init_cuda):
         LinkerOptions(arch=ARCH, kernels_used=["C", "B"]),
         LinkerOptions(arch=ARCH, variables_used=["var1"]),
         LinkerOptions(arch=ARCH, variables_used=["var1", "var2"]),
-        LinkerOptions(arch=ARCH, optimize_unused_variables=True),
-        LinkerOptions(arch=ARCH, xptxas=["-v"]),
-        LinkerOptions(arch=ARCH, split_compile=0),
-        LinkerOptions(arch=ARCH, split_compile_extended=1),
     ]
     version = nvjitlink.version()
     if version >= (12, 5):
-        options += [LinkerOptions(arch=ARCH, no_cache=True)]
+        options.append(LinkerOptions(arch=ARCH, no_cache=True))
 
 
 @pytest.mark.parametrize("options", options)
diff --git a/cuda_core/tests/test_program.py b/cuda_core/tests/test_program.py
index 0f9b8e3b..d9c5cbde 100644
--- a/cuda_core/tests/test_program.py
+++ b/cuda_core/tests/test_program.py
@@ -9,16 +9,22 @@
 import pytest
 from conftest import can_load_generated_ptx
 
+from cuda.core.experimental import _linker
 from cuda.core.experimental._module import Kernel, ObjectCode
 from cuda.core.experimental._program import Program, ProgramOptions
 
 
-# TODO handle and test options whcih are only supported on more modern CUDA versions
-# tracked in #337
-def test_program_with_various_options(init_cuda):
+@pytest.fixture(scope="module")
+def ptx_code_object():
     code = 'extern "C" __global__ void my_kernel() {}'
+    program = Program(code, "c++")
+    ptx_object_code = program.compile("ptx")
+    return ptx_object_code
+
 
-    options_list = [
+@pytest.mark.parametrize(
+    "options",
+    [
         ProgramOptions(device_code_optimize=True, debug=True),
         ProgramOptions(relocatable_device_code=True, max_register_count=32),
         ProgramOptions(ftz=True, prec_sqrt=False, prec_div=False),
@@ -38,14 +44,41 @@ def test_program_with_various_options(init_cuda):
         ProgramOptions(diag_error=1234, diag_suppress=1234),
         ProgramOptions(diag_error=[1234, 1223], diag_suppress=(1234, 1223)),
         ProgramOptions(diag_warn=1000),
+    ],
+)
+def test_cpp_program_with_various_options(init_cuda, options):
+    code = 'extern "C" __global__ void my_kernel() {}'
+    program = Program(code, "c++", options)
+    assert program.backend == "nvrtc"
+    program.compile("ptx")
+    program.close()
+    assert program.handle is None
+
+
+options = [
+    ProgramOptions(max_register_count=32),
+    ProgramOptions(debug=True),
+    ProgramOptions(lineinfo=True),
+    ProgramOptions(ftz=True),
+    ProgramOptions(prec_div=True),
+    ProgramOptions(prec_sqrt=True),
+    ProgramOptions(fma=True),
+]
+if not _linker._decide_nvjitlink_or_driver():
+    print("Using nvjitlink as the backend because decide() returned false")
+    options += [
+        ProgramOptions(time=True),
+        ProgramOptions(split_compile=True),
     ]
 
-    for options in options_list:
-        program = Program(code, "c++", options)
-        assert program.backend == "nvrtc"
-        program.compile("ptx")
-        program.close()
-        assert program.handle is None
+
+@pytest.mark.parametrize("options", options)
+def test_ptx_program_with_various_options(init_cuda, ptx_code_object, options):
+    program = Program(ptx_code_object._module.decode(), "ptx", options=options)
+    assert program.backend == "linker"
+    program.compile("cubin")
+    program.close()
+    assert program.handle is None
 
 
 def test_program_init_valid_code_type():
@@ -70,13 +103,18 @@ def test_program_init_invalid_code_format():
 # TODO: incorporate this check in Program
 # This is tested against the current device's arch
 @pytest.mark.xfail(not can_load_generated_ptx(), reason="PTX version too new")
-def test_program_compile_valid_target_type():
+def test_program_compile_valid_target_type(init_cuda):
     code = 'extern "C" __global__ void my_kernel() {}'
     program = Program(code, "c++")
-    object_code = program.compile("ptx")
-    kernel = object_code.get_kernel("my_kernel")
-    assert isinstance(object_code, ObjectCode)
-    assert isinstance(kernel, Kernel)
+    ptx_object_code = program.compile("ptx")
+    program = Program(ptx_object_code._module.decode(), "ptx")
+    cubin_object_code = program.compile("cubin")
+    ptx_kernel = ptx_object_code.get_kernel("my_kernel")
+    cubin_kernel = cubin_object_code.get_kernel("my_kernel")
+    assert isinstance(ptx_object_code, ObjectCode)
+    assert isinstance(cubin_object_code, ObjectCode)
+    assert isinstance(ptx_kernel, Kernel)
+    assert isinstance(cubin_kernel, Kernel)
 
 
 def test_program_compile_invalid_target_type():