From 034cd45ebb63ccd01da1d1b057f0d6892344908b Mon Sep 17 00:00:00 2001
From: Troi Williams <40696868+troiwill@users.noreply.github.com>
Date: Wed, 27 Mar 2024 17:31:14 -0400
Subject: [PATCH 01/30] Intial commit for Vector, GenericResponse, RewardCost,
 and an initial test file.

---
 pomdp_py/framework/generalization.pyx  | 146 +++++++++++++++++++++++++
 tests/test_framework_generalization.py |  29 +++++
 2 files changed, 175 insertions(+)
 create mode 100644 pomdp_py/framework/generalization.pyx
 create mode 100644 tests/test_framework_generalization.py

diff --git a/pomdp_py/framework/generalization.pyx b/pomdp_py/framework/generalization.pyx
new file mode 100644
index 0000000..04c3e1a
--- /dev/null
+++ b/pomdp_py/framework/generalization.pyx
@@ -0,0 +1,146 @@
+from __future__ import annotations
+from functools import cached_property
+from typing import Iterable, Iterator, Union
+
+
+cdef class Vector:
+    """
+    The Vector class. Provides an implementation of a vector for maintaining multiple values.
+    """
+    cdef list[float] vals
+    
+    def __init__(self, values: float | int | Iterable[float | int] = list([0.])):
+        cdef list _vec = list()
+        if isinstance(values, (float, int)):
+            _vec.append(values)
+        elif isinstance(values, (list, tuple)):
+            _vec += list(values)
+        else:
+            raise TypeError(f"values must be type int, float, list, or tuple, but got {type(values)}.")
+        
+        # Store the values as an array of floats.
+        self.vals = list(float(v) for v in _vec)
+
+    @cached_property
+    def values(self) -> list[float]:
+        return self.vals.copy()
+
+    def __iter__(self) -> Iterator:
+        return iter(self.vals)
+
+    def __len__(self) -> int:
+        return len(self.vals)
+
+    def __eq__(self, other: Vector | list) -> bool:
+        if not isinstance(other, (Vector, list)):
+            raise TypeError(f"other must be type Vector or list, but got {type(other)}.")
+        return len(self) == len(other) and all(v0 == v1 for v0, v1 in zip(self, other))
+
+    def __add__(self, other: Vector | list | float | int) -> Vector:
+        if isinstance(other, (float, int)):
+            vec = [other] * len(self)
+        elif isinstance(other, Vector):
+            vec = other
+        else:
+            raise TypeError(f"other must be type Vector, float, or int, but got {type(other)}.")
+        return Vector([v0 + v1 for v0, v1 in zip(self, vec)])
+
+    def __radd__(self, other):
+        return self.__add__(other)
+
+    def __mul__(self, other):
+        if not isinstance(other, (float, int)):
+            raise TypeError(f"other must be type float or int, but got {type(other)}.")
+        return Vector([v * other for v in self])
+
+    def __rmul__(self, other):
+        return self.__mul__(other)
+
+    def __str__(self) -> str:
+        return str(self.vals)
+
+
+ResponseVariableType = Union[float, Vector]
+
+
+cdef class GenericResponse:
+    """
+    A GenericResponse class maintains variables within a dictionary. However, subclasses of GenericResponse
+    can provide access to the dictionary variables using the dot (.) operator. Currently, this class can
+    handle arithmetic and comparison operations. However, if special operations will need to be performed, 
+    these operations need to be handled in the subclass.
+    """
+    cdef dict __dict__
+
+    def __init__(self, dict response_dict = dict()):
+        self.__dict__.update(response_dict.copy())
+
+    cpdef void _check_reward_compatibility(self, value):
+        if not isinstance(value, GenericResponse):
+            raise TypeError(f"other must be type GenericResponse, float, or int, but got {type(value)}.")
+
+    cdef dict add_response(self, GenericResponse other):
+        self._check_reward_compatibility(other)
+        cdef dict rv = dict()
+        for name, value in self.__dict__.items():
+            rv.update({name: value + other.__dict__[name]})
+        return rv
+
+    def __add__(self, other: GenericResponse) -> GenericResponse:
+        return GenericResponse(self.add_response(other))
+
+    def __radd__(self, other: GenericResponse) -> GenericResponse:
+        return self.__add__(other)
+
+    cpdef dict mul_scalar(self, float other):
+        if not isinstance(other, float):
+            raise TypeError("other must be type float or int.")
+        cdef dict rv = dict()
+        for name, value in self.__dict__.items():
+            rv.update({name: value * other})
+        return rv
+
+    def __mul__(self, other: float | int) -> GenericResponse:
+        return GenericResponse(self.mul_scalar(other))
+
+    def __rmul__(self, other) -> GenericResponse:
+        return self.__mul__(other)
+
+    def __eq__(self, other: GenericResponse) -> bool:
+        self._check_reward_compatibility(other)
+        return all(value == other.__dict__[name] for name, value in self.__dict__.items())
+
+    def __ne__(self, other) -> bool:
+        self._check_reward_compatibility(other)
+        return all(value != other.__dict__[name] for name, value in self.__dict__.items())
+
+    def __lt__(self, other) -> bool:
+        self._check_reward_compatibility(other)
+        return all(value < other.__dict__[name] for name, value in self.__dict__.items())
+
+    def __le__(self, other) -> bool:
+        self._check_reward_compatibility(other)
+        return all(value <= other.__dict__[name] for name, value in self.__dict__.items())
+
+    def __gt__(self, other) -> bool:
+        self._check_reward_compatibility(other)
+        return all(value > other.__dict__[name] for name, value in self.__dict__.items())
+
+    def __ge__(self, other) -> bool:
+        self._check_reward_compatibility(other)
+        return all(value >= other.__dict__[name] for name, value in self.__dict__.items())
+        
+    def __str__(self) -> str:
+        return ", ".join([f"{name}={values}" for name, values in self.__dict__.items()])
+
+
+cdef class RewardCost(GenericResponse):
+
+    def __init__(self, float reward=0.0, Vector cost=Vector()):
+        super().__init__({"reward": reward, "cost": cost})
+
+    def __add__(self, other: RewardCost) -> RewardCost:
+        return RewardCost(**self.add_response(other))
+
+    def __mul__(self, other: float) -> RewardCost:
+        return RewardCost(**self.mul_scalar(other))
diff --git a/tests/test_framework_generalization.py b/tests/test_framework_generalization.py
new file mode 100644
index 0000000..eda6069
--- /dev/null
+++ b/tests/test_framework_generalization.py
@@ -0,0 +1,29 @@
+from pomdp_py.framework.generalization import Vector, RewardCost
+
+description = "testing framework generalization"
+
+
+def test_assign():
+    v = Vector()
+    assert v == [0.]
+
+    v = Vector((2, 4, 8))
+    assert v == [2., 4., 8.]
+
+    v = Vector()
+    assert v != [1.]
+
+
+def test_add():
+    r = RewardCost(0., Vector([0., 10.])) + RewardCost(10., Vector([90., 13.]))
+    assert r.reward == 10.
+    assert r.cost == [90., 23.]
+    
+
+def run():
+    test_assign()
+    test_add()
+
+
+if __name__ == "__main__":
+    run()
\ No newline at end of file

From 5b3321a7db659e22f7a82456bb24f8bebd86044f Mon Sep 17 00:00:00 2001
From: Troi Williams <40696868+troiwill@users.noreply.github.com>
Date: Tue, 16 Apr 2024 16:52:09 -0400
Subject: [PATCH 02/30] Updated ignore file.

---
 .gitignore | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/.gitignore b/.gitignore
index a287569..5971777 100644
--- a/.gitignore
+++ b/.gitignore
@@ -136,3 +136,8 @@ Thumbs.db
 *.pg
 
 .DS_Store
+
+# Cython debugging files
+########################
+*.c
+*.html

From 9223f9421840e92f4143f6e7ae67b0aafbf81c2b Mon Sep 17 00:00:00 2001
From: Troi Williams <40696868+troiwill@users.noreply.github.com>
Date: Tue, 16 Apr 2024 17:03:23 -0400
Subject: [PATCH 03/30] Added CCPOMCP algorithm and dependencies; added test
 script for Vector; added example problem for CCPOMCP.

---
 pomdp_py/__init__.py                          |   6 +
 pomdp_py/__main__.py                          |   7 +-
 pomdp_py/algorithms/ccpomcp.pxd               |  57 ++
 pomdp_py/algorithms/ccpomcp.pyx               | 770 ++++++++++++++++++
 pomdp_py/algorithms/pomcp.pxd                 |   5 +-
 pomdp_py/framework/generalization.pxd         |  32 +
 pomdp_py/framework/generalization.pyx         | 403 ++++++---
 .../problems/rocksample/rocksample_problem.py |  21 +-
 pomdp_py/utils/cvec.pxd                       |  38 +
 pomdp_py/utils/cvec.pyx                       | 352 ++++++++
 setup.py                                      |   4 +-
 tests/test_util_vector_ops.py                 | 151 ++++
 12 files changed, 1715 insertions(+), 131 deletions(-)
 create mode 100644 pomdp_py/algorithms/ccpomcp.pxd
 create mode 100644 pomdp_py/algorithms/ccpomcp.pyx
 create mode 100644 pomdp_py/framework/generalization.pxd
 create mode 100644 pomdp_py/utils/cvec.pxd
 create mode 100644 pomdp_py/utils/cvec.pyx
 create mode 100644 tests/test_util_vector_ops.py

diff --git a/pomdp_py/__init__.py b/pomdp_py/__init__.py
index 057c552..80471d3 100644
--- a/pomdp_py/__init__.py
+++ b/pomdp_py/__init__.py
@@ -2,6 +2,7 @@
 
 # Framework
 from pomdp_py.framework.basics import *
+from pomdp_py.framework.generalization import *
 from pomdp_py.framework.oopomdp import *
 from pomdp_py.framework.planner import *
 
@@ -22,6 +23,10 @@
 # Algorithms
 from pomdp_py.algorithms.value_iteration import ValueIteration  # Cython compiled
 from pomdp_py.algorithms.value_function import value, qvalue, belief_update
+from pomdp_py.algorithms.ccpomcp import (
+    CostModel,
+    CCPOMCP,
+)
 from pomdp_py.algorithms.pomcp import POMCP
 from pomdp_py.algorithms.po_rollout import PORollout
 from pomdp_py.algorithms.po_uct import (
@@ -38,3 +43,4 @@
 # Templates & Utilities
 from pomdp_py.utils.templates import *
 from pomdp_py.utils.debugging import TreeDebugger
+from pomdp_py.utils.cvec import Vector
diff --git a/pomdp_py/__main__.py b/pomdp_py/__main__.py
index 22c04a7..059d075 100644
--- a/pomdp_py/__main__.py
+++ b/pomdp_py/__main__.py
@@ -1,6 +1,6 @@
 import argparse
 
-available_problems = ["tiger", "rocksample", "mos", "tag", "load_unload"]
+available_problems = ["tiger", "rocksample", "mos", "tag", "load_unload", "ccrocksample"]
 
 
 def parse_args():
@@ -45,6 +45,11 @@ def parse_args():
 
             main()
 
+        elif args.run.lower() == "ccrocksample":
+            from pomdp_py.problems.cc_rocksample.cc_rocksample_problem import main
+
+            main()
+
         else:
             print("Unrecognized pomdp: {}".format(args.run))
 
diff --git a/pomdp_py/algorithms/ccpomcp.pxd b/pomdp_py/algorithms/ccpomcp.pxd
new file mode 100644
index 0000000..0c5235c
--- /dev/null
+++ b/pomdp_py/algorithms/ccpomcp.pxd
@@ -0,0 +1,57 @@
+from pomdp_py.algorithms.po_uct cimport QNode
+from pomdp_py.algorithms.pomcp cimport POMCP, VNode
+from pomdp_py.framework.basics cimport PolicyModel, Action, Agent, State, Observation
+from pomdp_py.framework.generalization cimport Response
+from pomdp_py.utils.cvec cimport Vector
+
+
+cdef class CostModel:
+    pass
+
+
+cdef class CCQNode(QNode):
+    cdef Vector _cost_value
+    cdef Vector _avg_cost_value
+
+
+cdef class _CCPolicyActionData:
+    cdef double _prob
+    cdef Vector _cost_value
+    cdef Vector _avg_cost_value
+
+
+cdef class _CCPolicyModel(PolicyModel):
+    cdef dict[Action, _CCPolicyActionData] _data
+
+    cpdef Vector action_avg_cost(_CCPolicyModel self, Action action)
+    cpdef Vector action_cost_value(_CCPolicyModel self, Action action)
+    cdef public float probability(_CCPolicyModel self, Action action, State state)
+    cdef public Action sample(_CCPolicyModel self, State state)
+
+
+cdef class CCPOMCP(POMCP):
+    cdef double _r_diff
+    cdef double _tau
+    cdef double _alpha_n
+    cdef Vector _lambda
+    cdef Vector _cost_constraint
+    cdef Response _null_response
+    cdef bint _use_random_lambda
+    cdef bint _clip_lambda
+    cdef double _nu
+    cdef list[float] _cost_value_init
+    cdef unsigned int _n_constraints
+
+    cpdef public Action plan(CCPOMCP self, Agent agent)
+    cpdef _expand_vnode(self, VNode vnode, tuple history, State state = *)
+    cpdef _CCPolicyModel _greedy_policy(CCPOMCP self, VNode vnode, double explore_const, double nu)
+    cdef void _init_lambda_fn(CCPOMCP self)
+    cpdef tuple[State, Observation, Response] _sample_generative_model(CCPOMCP self, State state, Action action)
+    cpdef _search(CCPOMCP self)
+    cpdef Response _simulate(CCPOMCP self, State state, tuple history, VNode root, QNode parent,
+                             Observation observation, int depth)
+    cdef void _update_cost_constraint(CCPOMCP self, _CCPolicyModel policy_dist, Action sampled_action)
+
+
+cdef double _compute_visits_ratio(double visits_num, double visits_denom, double explore_const = *)
+cdef double _get_ccqnode_scalar_cost(VNode node, Action action)
diff --git a/pomdp_py/algorithms/ccpomcp.pyx b/pomdp_py/algorithms/ccpomcp.pyx
new file mode 100644
index 0000000..a0fde23
--- /dev/null
+++ b/pomdp_py/algorithms/ccpomcp.pyx
@@ -0,0 +1,770 @@
+# cython: profile=True
+
+from __future__ import annotations
+cimport cython
+from libc.math cimport log, sqrt, exp, abs
+import math
+cimport numpy as cnp
+import numpy as np
+from pomdp_py.algorithms.po_uct cimport QNode, ActionPrior
+from pomdp_py.algorithms.pomcp cimport POMCP
+from pomdp_py.framework.basics cimport PolicyModel, Action, Agent, State, Observation
+from pomdp_py.framework.generalization cimport (
+    Response,
+    ResponseAgent,
+    sample_generative_model_with_response
+)
+from pomdp_py.representations.distribution.histogram cimport Histogram
+from pomdp_py.representations.distribution.particles cimport Particles
+from pomdp_py.utils import typ
+from pomdp_py.utils.cvec cimport Vector
+from typing import Optional
+cnp.import_array()
+
+
+cdef double DBL_MIN = <double> -1e200
+cdef double DBL_MAX = <double> 1e200
+
+
+cdef class CostModel:
+    """
+    """
+
+    def probability(
+            self,
+            cost: float | Vector,
+            state: State,
+            action: Action,
+            next_state: State
+    ) -> float:
+        """
+        probability(self, cost, state, action, next_state)
+        Returns the probability of :math:`\Pr(c|s,a,s')`.
+
+        Args:
+            cost (float or ~pomdp_py.framework.generalization.Vector): the cost :math:`c`
+            state (~pomdp_py.framework.basics.State): the state :math:`s`
+            action (~pomdp_py.framework.basics.Action): the action :math:`a`
+            next_state (State): the next state :math:`s'`
+
+        Returns:
+            float: the probability :math:`\Pr(c|s,a,s')`
+        """
+        raise NotImplementedError
+
+    def sample(
+            self,
+            state: State,
+            action: Action,
+            next_state: State,
+            **kwargs,
+    ) -> float | Vector:
+        """
+        sample(self, state, action, next_state)
+        Returns a cost randomly sampled according to the
+        distribution of this cost model. This is required for cost-aware planners.
+
+        Args:
+            state (~pomdp_py.framework.basics.State): the next state :math:`s`
+            action (~pomdp_py.framework.basics.Action): the action :math:`a`
+            next_state (State): the next state :math:`s'`
+
+        Returns:
+            float or ~pomdp_py.framework.generalization.Vector: the cost :math:`c`
+        """
+        raise NotImplementedError
+
+    def argmax(self, state: State, action: Action, next_state: State) -> float | Vector:
+        """
+        argmax(self, state, action, next_state)
+        Returns the most likely cost. This is optional.
+        """
+        raise NotImplementedError
+
+    def get_distribution(self, state: State, action: Action, next_state: State):
+        """
+        get_distribution(self, state, action, next_state)
+        Returns the underlying distribution of the model.
+        """
+        raise NotImplementedError
+
+
+cdef class CCQNode(QNode):
+    def __init__(
+        self,
+        num_visits: int,
+        value: float,
+        cost_value: list[float],
+    ) -> None:
+        super().__init__(num_visits=num_visits, value=value)
+        if len(cost_value) == 0:
+            raise ValueError("len(cost_value) must be positive.")
+        self._cost_value = Vector(cost_value)
+        self._avg_cost_value = Vector.null(self._cost_value.len())
+
+    @property
+    def avg_cost_value(self) -> Vector:
+        return self._avg_cost_value.copy()
+
+    @avg_cost_value.setter
+    def avg_cost_value(self, avg_cost_value: Vector) -> None:
+        if not isinstance(avg_cost_value, Vector):
+            raise TypeError(
+                "avg_cost_value must be type Vector, "
+                f"but got {type(avg_cost_value)}."
+            )
+        self._avg_cost_value = avg_cost_value.copy()
+
+    @property
+    def cost_value(self) -> Vector:
+        return self._cost_value.copy()
+
+    @cost_value.setter
+    def cost_value(self, cost_value: Vector) -> None:
+        if not isinstance(cost_value, Vector):
+            raise TypeError(
+                "cost_value must be type Vector, "
+                f"but got {type(cost_value)}."
+            )
+        self._cost_value = cost_value.copy()
+
+    def __str__(self) -> str:
+        return (
+            typ.red("CCQNode")
+            + f"(n={self.num_visits}, v={self.value:.3f}, c={self.cost_value} "
+            + f"c_bar={self.avg_cost_value} | children=[{', '.join(list(self.children.keys()))}])"
+        )
+
+cdef class _CCPolicyActionData:
+    def __init__(self, double prob, Vector cost_value, Vector avg_cost_value):
+        self._prob = prob
+        self._cost_value = cost_value.copy()
+        self._avg_cost_value = avg_cost_value.copy()
+
+    @property
+    def prob(self) -> float:
+        return self._prob
+
+    @property
+    def cost_value(self) -> Vector:
+        return self._cost_value
+
+    @property
+    def avg_cost_value(self) -> Vector:
+        return self._avg_cost_value
+
+
+cdef class _CCPolicyModel(PolicyModel):
+    def __init__(self, dict[Action, _CCPolicyActionData] data_dict) -> None:
+        super().__init__()
+        cdef Action action
+        cdef _CCPolicyActionData datum
+        cdef double prob_sum = 0.0
+
+        for action, datum in data_dict.items():
+            if not isinstance(action, Action):
+                raise TypeError("action must be type Action.")
+            prob_sum += datum.prob
+
+        if prob_sum != 1.0:
+            raise ValueError(f"The probabilities must sum to 1.0, but got {prob_sum}.")
+        self._data = data_dict.copy()
+
+    cpdef Vector action_avg_cost(_CCPolicyModel self, Action action):
+        if action not in self._data:
+            raise KeyError(f"The action {action} is not exist in this policy model.")
+        return self._data[action].cost_value.copy()
+
+    cpdef Vector action_cost_value(_CCPolicyModel self, Action action):
+        if action not in self._data:
+            raise KeyError(f"The action {action} is not exist in this policy model.")
+        return self._data[action].avg_cost_value.copy()
+
+    cdef public float probability(_CCPolicyModel self, Action action, State state):
+        if action not in self._data:
+            raise KeyError(f"The action {action} is not exist in this policy model.")
+        return self._data[action].prob
+
+    cdef public Action sample(_CCPolicyModel self, State state):
+        return np.random.choice(np.array(list(self._data.keys()), dtype=object))
+
+    def get_all_actions(self, state: Optional[State] = None, history: Optional[tuple] = None):
+        return list(self._data.keys())
+
+
+cdef class CCPOMCP(POMCP):
+    """
+    The cost-constrained POMCP (CCPOMCP) is POMCP + cost constraints.
+    The current implementation assumes the cost constraint is 1D.
+    """
+
+    def __init__(
+        self,
+        r_diff: float,
+        tau: float,
+        alpha_n: float,
+        cost_constraint: list[float] | float,
+        clip_lambda: bool = True,
+        nu: float = 1.0,
+        max_depth: int = 5,
+        planning_time: float = -1.0,
+        num_sims: int = -1,
+        discount_factor: float = 0.9,
+        exploration_const: float = math.sqrt(2.0),
+        num_visits_init: int = 0,
+        value_init: int = 0,
+        cost_value_init: Optional[list[float] | float] = None,
+        use_random_lambda: bool = True,
+        rollout_policy: Optional[PolicyModel] = None,
+        action_prior: Optional[ActionPrior] = None,
+        show_progress: bool = False,
+        pbar_update_interval: int = 5
+    ):
+        super(CCPOMCP, self).__init__(
+            max_depth=max_depth,
+            planning_time=planning_time,
+            num_sims=num_sims,
+            discount_factor=discount_factor,
+            exploration_const=exploration_const,
+            num_visits_init=num_visits_init,
+            value_init=value_init,
+            rollout_policy=rollout_policy,
+            action_prior=action_prior,
+            show_progress=show_progress,
+            pbar_update_interval=pbar_update_interval
+        )
+        # Sanity checks and set the parameters.
+        if not isinstance(r_diff, float):
+            raise TypeError(f"r_diff must be type float, but got {type(r_diff)}.")
+        if r_diff < 0.0:
+            raise ValueError("r_diff must be a non-negative float.")
+        if not isinstance(tau, float):
+            raise TypeError(f"tau must be type float, but got {type(tau)}.")
+        if not isinstance(alpha_n, float):
+            raise TypeError(f"alpha_n must be type float, but got {type(alpha_n)}.")
+        if alpha_n < 0.0 or 1.0 < alpha_n:
+            raise ValueError("alpha_n must be in range [0.0, 1.0].")
+        if not isinstance(cost_constraint, (list, float)):
+            raise TypeError(
+                "cost_constraint must be a Vector or float "
+                f"but got type {type(cost_constraint)}."
+            )
+        if not isinstance(clip_lambda, bool):
+            raise TypeError(
+                f"clip_lambda must be a Boolean, but got type {type(clip_lambda)}."
+            )
+        if not isinstance(nu, float):
+            raise TypeError(f"nu must be type float, but got {type(nu)}.")
+        if not isinstance(use_random_lambda, bool):
+            raise TypeError(
+                "use_random_lambda must be type bool, "
+                f"but got {type(use_random_lambda)}."
+            )
+
+        if cost_value_init is not None:
+            if not isinstance(cost_value_init, (list, float)):
+                raise TypeError(
+                    "cost_value_init must be type Vector or float, "
+                    f"but got {type(cost_value_init)}."
+                )
+            if type(cost_value_init) != type(cost_constraint):
+                raise TypeError(
+                    "cost_value_init and cost_constraint must be the same type."
+                )
+
+        # Initialize lambda, cost constraint, and cost value init.
+        if isinstance(cost_constraint, list):
+            self._n_constraints = len(cost_constraint)
+            if len(cost_value_init) != len(cost_value_init):
+                raise ValueError(
+                    "The cost constraint and cost value init must have the same length."
+                )
+        else:
+            self._n_constraints = 1
+            cost_constraint = [cost_constraint]
+            cost_value_init = [cost_value_init] if cost_value_init is not None else [0.0]
+
+        self._lambda = Vector.null(self._n_constraints)
+        self._cost_value_init = list(cost_value_init)
+        self._cost_constraint = Vector(cost_constraint)
+        self._r_diff = <double> r_diff
+        self._tau = <double> tau
+        self._alpha_n = <double> alpha_n
+        self._clip_lambda = <bint> clip_lambda
+        self._nu = <double> nu
+        self._use_random_lambda = <bint> use_random_lambda
+
+    cpdef public Action plan(CCPOMCP self, Agent agent):
+        cdef Action action
+        cdef _CCPolicyModel policy_dist
+        cdef double time_taken
+        cdef int sims_count
+
+        if not isinstance(agent.belief, Particles):
+            raise TypeError(
+                "Agent's belief is not represented in particles. "
+                "CCPOMCP not usable. Please convert it to particles."
+            )
+
+        if self._rollout_policy is None:
+            raise ValueError(
+                "rollout_policy unset. Please call set_rollout_policy, "
+                "or pass in a rollout_policy upon initialization."
+            )
+
+        if not isinstance(agent, ResponseAgent):
+            raise TypeError(
+                f"agent must be type ResponseAgent, but got type {type(agent)}."
+            )
+
+        # Set the current agent being used for planning.
+        self._agent = agent
+        self._null_response = self._agent.response_model.null_response()
+        if not hasattr(self._agent, "tree"):
+            self._agent.add_attr("tree", None)
+
+        # Then get the policy distribution, sample from it,
+        # and update the cost constraint.
+        policy_dist, time_taken, sims_count = self._search()
+        action = policy_dist.sample(state=None)
+        self._update_cost_constraint(policy_dist, action)
+
+        # Update stats.
+        self._last_num_sims = sims_count
+        self._last_planning_time = time_taken
+
+        return action
+
+    cpdef _expand_vnode(
+        CCPOMCP self,
+        VNode vnode,
+        tuple history,
+        State state = None,
+    ):
+        cdef Action action
+
+        for action in self._agent.valid_actions(state=state, history=history):
+            if vnode[action] is None:
+                vnode[action] = CCQNode(
+                    self._num_visits_init, self._value_init, self._cost_value_init
+                )
+
+        if self._action_prior is not None:
+            # Using action prior; special values are set;
+            for preference in self._action_prior.get_preferred_actions(state, history):
+                action, num_visits_init, value_init = preference
+                vnode[action] = CCQNode(
+                    self._num_visits_init, self._value_init, self._cost_value_init
+                )
+
+    @cython.boundscheck(False)
+    cpdef _CCPolicyModel _greedy_policy(
+        CCPOMCP self,
+        VNode vnode,
+        double explore_const,
+        double nu,
+    ):
+        cdef list[Action] action_list = list(vnode.children.keys())
+        cdef int n_actions = len(action_list)
+
+        if n_actions == 0:
+            raise RuntimeError("The vnode has no visited actions?!")
+
+        # Compute Q_lambda.
+        cdef double n_ccqnode_visits
+        cdef double best_q_lambda = DBL_MIN
+        cdef int best_q_index = 0
+        cdef double logN = log(<double> vnode.num_visits + 1)
+        cdef double q_value
+        cdef Vector
+        cdef CCQNode ccqnode
+        cdef Action action
+        cdef int i = 0
+
+        Q_lambda = Vector.null(n_actions)
+        Action_UCB = Vector.null(n_actions)
+
+        for i in range(n_actions):
+            ccqnode = vnode[action_list[i]]
+            q_value = ccqnode.value - self._lambda.dot(ccqnode.cost_value)
+
+            if ccqnode.num_visits > 0:
+                n_ccqnode_visits = <double> ccqnode.num_visits + 1.0
+                q_value += _compute_visits_ratio(
+                    logN,
+                    n_ccqnode_visits,
+                    explore_const
+                )
+                Action_UCB[i] = _compute_visits_ratio(
+                    log(n_ccqnode_visits),
+                    n_ccqnode_visits
+                )
+
+            if q_value > best_q_lambda:
+                best_q_lambda = q_value
+                best_q_index = i
+
+            Q_lambda[i] = q_value
+
+        # Compute a*, the best action(s).
+        cdef list[Action] best_action_list = list()
+        cdef double best_ucb_add = Action_UCB[best_q_index]
+        cdef double ucb_add, q_value_diff
+        cdef bint add_to_best_action_list = False
+
+        q_value = 0.0
+        best_q_lambda = Q_lambda[best_q_index]
+
+        for i in range(n_actions):
+            action = action_list[i]
+            ccqnode = vnode[action]
+            q_value = Q_lambda[i]
+
+            if q_value == best_q_lambda:
+                add_to_best_action_list = True
+
+            else:
+                q_value_diff = abs(q_value - best_q_lambda)
+                ucb_add = nu * (Action_UCB[i] + best_ucb_add)
+                if q_value_diff <= ucb_add and action not in best_action_list:
+                    add_to_best_action_list = True
+
+            if add_to_best_action_list:
+                best_action_list.append(action)
+
+        # Find the policy.
+        cdef int n_best_actions = len(best_action_list)
+        cdef Action action_min, action_max
+        cdef CCQNode ccqnode_min, ccqnode_max
+        cdef double cost_constraint_scalar = self._cost_constraint[0]
+        cdef double max_cost_value, min_cost_value, min_prob, cost_value
+        cdef dict[Action, _CCPolicyActionData] data
+
+        if n_best_actions == 0:
+            raise RuntimeError("No best actions were found?!")
+
+        elif n_best_actions == 1:
+            action = best_action_list[0]
+            data = {
+                action: _CCPolicyActionData(
+                    1.0,
+                    vnode[action].cost_value,
+                    vnode[action].avg_cost_value
+                )
+            }
+
+        else:
+            # TODO: Implement linear programming to handle multiple constraints.
+            #       The code below can only handle ONE constraint.
+            if self._cost_constraint.len() > 1:
+                raise NotImplementedError(
+                    f"This algorithm can only handle one constraint for now."
+                )
+            # if self._lambda[0] <= 0.0:
+            #     raise RuntimeError(
+            #         "The scalar lambda must be positive to continue. "
+            #         "See the Appendix G in the Supplementary Materials for the paper "
+            #         "titled 'Monte-Carlo Tree Search for Constrained POMDPs' "
+            #         "by Lee et. al (2018)."
+            #     )
+
+            # Find a_max and a_min, the actions with the max and min scalar costs
+            # from the list of best actions.
+            max_cost_value = DBL_MIN
+            min_cost_value = DBL_MAX
+
+            for i in range(n_best_actions):
+                action = best_action_list[i]
+                cost_value = _get_ccqnode_scalar_cost(vnode, action)
+
+                if cost_value < min_cost_value:
+                    action_min = action
+                    min_cost_value = cost_value
+
+                if cost_value > max_cost_value:
+                    action_max = action
+                    max_cost_value = cost_value
+
+            # Sanity checks.
+            if max_cost_value == DBL_MIN:
+                raise RuntimeError(
+                    f"Max cost value ({max_cost_value}) must be more than {DBL_MIN}. "
+                    f"Note: there are {n_best_actions} best actions. An error exists!"
+                )
+            if min_cost_value == DBL_MAX:
+                raise RuntimeError(
+                    f"Min cost value ({min_cost_value}) must be less than {DBL_MAX}. "
+                    f"Note: there are {n_best_actions} best actions. An error exists!"
+                )
+
+            if max_cost_value <= cost_constraint_scalar or action_min == action_max:
+                data = {
+                    action_max: _CCPolicyActionData(
+                        1.0,
+                        vnode[action_max].cost_value,
+                        vnode[action_max].avg_cost_value
+                    )
+                }
+
+            elif min_cost_value <= cost_constraint_scalar:
+                data = {
+                    action_min: _CCPolicyActionData(
+                        1.0,
+                        vnode[action_min].cost_value,
+                        vnode[action_min].avg_cost_value
+                    )
+                }
+
+            else:
+                min_prob = (
+                    (max_cost_value - cost_constraint_scalar)
+                    / (max_cost_value - min_cost_value)
+                )
+
+                data = {
+                    action_min: _CCPolicyActionData(
+                        min_prob,
+                        vnode[action_min].cost_value,
+                        vnode[action_min].avg_cost_value
+                    ),
+                    action_max: _CCPolicyActionData(
+                        1.0 - min_prob,
+                        vnode[action_max].cost_value,
+                        vnode[action_max].avg_cost_value
+                    ),
+                }
+
+        return _CCPolicyModel(data)
+
+    cdef void _init_lambda_fn(CCPOMCP self):
+        if self._use_random_lambda:
+            self._lambda = Vector(
+                np.random.uniform(
+                    0.00001,
+                    1.0,
+                    size=self._cost_constraint.len()
+                ).tolist()
+            )
+
+        else:
+            self._lambda = Vector.null(self._n_constraints)
+
+    cpdef _perform_simulation(self, state):
+        super(CCPOMCP, self)._perform_simulation(state=state)
+
+        # Sample using the greedy policy. This greedy policy corresponds to the first
+        # call in the search(h_0) function.
+        policy_dist = self._greedy_policy(
+            self._agent.tree,
+            0.0,
+            0.0,
+        )
+        action = policy_dist.sample(state=state)
+
+        # Update lambda.
+        self._lambda = self._lambda + self._alpha_n * (
+                self._agent.tree[action].cost_value - self._cost_constraint
+        )
+        if self._clip_lambda:
+            lambda_vec_max = self._r_diff / (
+                    self._tau * (1.0 - self._discount_factor)
+            )
+            self._lambda = Vector.clip(self._lambda, 0.0, lambda_vec_max)
+
+    cpdef _rollout(self, State state, tuple history, VNode root, int depth):
+        cdef Action action
+        cdef float discount = 1.0
+        cdef State next_state
+        cdef Observation observation
+        cdef Response response, total_discounted_response
+        cdef int nsteps
+
+        total_discounted_response = self._null_response
+        while depth < self._max_depth:
+            action = self._rollout_policy.rollout(state, history)
+            next_state, observation, response, nsteps = (
+                sample_generative_model_with_response(
+                    self._agent.transition_model,
+                    self._agent.observation_model,
+                    self._agent.response_model,
+                    state,
+                    action,
+                    self._null_response,
+                )
+            )
+            history = history + ((action, observation),)
+            depth += nsteps
+            total_discounted_response = (
+                    total_discounted_response + (response * discount)
+            )
+            discount *= (self._discount_factor ** nsteps)
+            state = next_state
+        return total_discounted_response
+
+    cpdef _search(CCPOMCP self):
+        cdef Action action
+        cdef double time_taken
+        cdef int sims_count
+        cdef PolicyModel policy_dist
+
+        # Initialize the lambda vector.
+        self._init_lambda_fn()
+
+        # Run the _search(...) method in the super class.
+        action, time_taken, sims_count = super(CCPOMCP, self)._search()
+
+        # After the search times out, create a policy using the greedy method.
+        # This greedy policy corresponds to the last call in the search(h_0) function.
+        policy_dist = self._greedy_policy(
+            self._agent.tree,
+            0.0,
+            self._nu,
+        )
+        return policy_dist, time_taken, sims_count
+
+    cpdef Response _simulate(
+        CCPOMCP self,
+        State state,
+        tuple history,
+        VNode root,
+        QNode parent,
+        Observation observation,
+        int depth
+    ):
+        cdef Response response, total_response
+        cdef int nsteps = 1
+        cdef Action action
+        cdef State next_state
+        cdef _CCPolicyModel policy_dist
+
+        if depth > self._max_depth:
+            return self._null_response
+
+        if root is None:
+            if self._agent.tree is None:
+                root = self._VNode(root=True)
+                self._agent.tree = root
+                if self._agent.tree.history != self._agent.history:
+                    raise ValueError("Unable to plan for the given history.")
+
+            else:
+                root = self._VNode()
+
+            if parent is not None:
+                parent[observation] = root
+
+            self._expand_vnode(root, history, state=state)
+            response = self._rollout(state, history, root, depth)
+            return response
+
+        # This greedy policy corresponds to the call in the simulate(s, h, d) function
+        # in the paper.
+        policy_dist = self._greedy_policy(
+            root,
+            self._exploration_const,
+            self._nu
+        )
+        action = policy_dist.sample(state=state)
+        next_state, observation, response, nsteps = (
+            sample_generative_model_with_response(
+                self._agent.transition_model,
+                self._agent.observation_model,
+                self._agent.response_model,
+                state,
+                action,
+                self._null_response,
+            )
+        )
+
+        if nsteps == 0:
+            return response
+
+        total_response = (
+            response
+            + (self._discount_factor ** nsteps)
+            * self._simulate(
+                next_state,
+                history + ((action, observation),),
+                root[action][observation],
+                root[action],
+                observation,
+                depth + nsteps
+            )
+        )
+
+        root.num_visits += 1
+        root[action].num_visits += 1
+        root[action].value = (
+                root[action].value
+                + (total_response.reward - root[action].value) / root[action].num_visits
+        )
+
+        root[action].cost_value = (
+                root[action].cost_value
+                + (total_response.cost - root[action].cost_value) / root[action].num_visits
+        )
+
+        root[action].avg_cost_value = (
+                root[action].avg_cost_value
+                + (response.cost - root[action].avg_cost_value) / root[action].num_visits
+        )
+
+        if depth == 1 and root is not None:
+            root.belief.add(state)
+
+        return total_response
+
+    cdef void _update_cost_constraint(
+        CCPOMCP self,
+        _CCPolicyModel policy_dist,
+        Action sampled_action
+    ):
+        cdef double action_prob, prob_prime
+        cdef Vector chat_minus_avg_cost, action_avg_cost, cost_value, cost_sum
+        cdef Action action_prime
+        cdef list[Action] action_prime_list
+        cdef int i = 0
+
+        action_prob = policy_dist.probability(
+            action=sampled_action,
+            state=None
+        )
+        action_avg_cost = policy_dist.action_avg_cost(sampled_action)
+        self._cost_constraint -= (action_prob * action_avg_cost)
+
+        if action_prob < 1.0:
+            cost_sum = Vector.null(self._n_constraints)
+            action_prime_list = policy_dist.get_all_actions()
+            for i in range(len(action_prime_list)):
+                action_prime = action_prime_list[i]
+                if action_prime == sampled_action:
+                    continue
+
+                prob_prime = policy_dist.probability(
+                    action=action_prime,
+                    state=self._agent.history
+                )
+                cost_value = policy_dist.action_cost_value(sampled_action)
+                cost_sum[i] += (prob_prime * cost_value)
+            self._cost_constraint -= cost_sum
+        self._cost_constraint /= (self._discount_factor * action_prob)
+
+
+cdef double _compute_visits_ratio(
+        double visits_num,
+        double visits_denom,
+        double explore_const = 1.0,
+):
+    if visits_denom == 0.0:
+        return DBL_MIN
+    else:
+        return explore_const * sqrt(visits_num / visits_denom)
+
+
+cdef double _get_ccqnode_scalar_cost(
+    VNode node,
+    Action action
+):
+    if action not in node:
+        raise KeyError(f"Action {action} does not exist in node.")
+    return node[action].cost_value[0]
diff --git a/pomdp_py/algorithms/pomcp.pxd b/pomdp_py/algorithms/pomcp.pxd
index b3d48fe..e0d1ff8 100644
--- a/pomdp_py/algorithms/pomcp.pxd
+++ b/pomdp_py/algorithms/pomcp.pxd
@@ -1,7 +1,10 @@
-from pomdp_py.algorithms.po_uct cimport VNode, RootVNode
+from pomdp_py.algorithms.po_uct cimport VNode, RootVNode, POUCT
 from pomdp_py.representations.distribution.particles cimport Particles
 
 cdef class VNodeParticles(VNode):
     cdef public Particles belief
 cdef class RootVNodeParticles(RootVNode):
     cdef public Particles belief
+
+cdef class POMCP(POUCT):
+    pass
diff --git a/pomdp_py/framework/generalization.pxd b/pomdp_py/framework/generalization.pxd
new file mode 100644
index 0000000..d81bef4
--- /dev/null
+++ b/pomdp_py/framework/generalization.pxd
@@ -0,0 +1,32 @@
+from __future__ import annotations
+from pomdp_py.framework.basics cimport (
+    Agent,
+    Environment,
+    Observation,
+    State,
+    Action,
+    TransitionModel,
+    ObservationModel
+)
+
+
+cdef class Response:
+    pass
+
+
+cdef class ResponseModel:
+    cdef Response _null_response
+
+
+cdef class ResponseAgent(Agent):
+    cdef ResponseModel _response_model
+
+
+cdef class ResponseEnvironment(Environment):
+    cdef ResponseModel _response_model
+
+
+cpdef tuple[State, Observation, Response, int] sample_generative_model_with_response(
+    TransitionModel T, ObservationModel O, ResponseModel R, State state, Action action,
+    Response null_response, float discount_factor = *
+)
diff --git a/pomdp_py/framework/generalization.pyx b/pomdp_py/framework/generalization.pyx
index 04c3e1a..705bff8 100644
--- a/pomdp_py/framework/generalization.pyx
+++ b/pomdp_py/framework/generalization.pyx
@@ -1,146 +1,307 @@
-from __future__ import annotations
-from functools import cached_property
-from typing import Iterable, Iterator, Union
-
-
-cdef class Vector:
-    """
-    The Vector class. Provides an implementation of a vector for maintaining multiple values.
-    """
-    cdef list[float] vals
-    
-    def __init__(self, values: float | int | Iterable[float | int] = list([0.])):
-        cdef list _vec = list()
-        if isinstance(values, (float, int)):
-            _vec.append(values)
-        elif isinstance(values, (list, tuple)):
-            _vec += list(values)
-        else:
-            raise TypeError(f"values must be type int, float, list, or tuple, but got {type(values)}.")
-        
-        # Store the values as an array of floats.
-        self.vals = list(float(v) for v in _vec)
-
-    @cached_property
-    def values(self) -> list[float]:
-        return self.vals.copy()
-
-    def __iter__(self) -> Iterator:
-        return iter(self.vals)
-
-    def __len__(self) -> int:
-        return len(self.vals)
-
-    def __eq__(self, other: Vector | list) -> bool:
-        if not isinstance(other, (Vector, list)):
-            raise TypeError(f"other must be type Vector or list, but got {type(other)}.")
-        return len(self) == len(other) and all(v0 == v1 for v0, v1 in zip(self, other))
-
-    def __add__(self, other: Vector | list | float | int) -> Vector:
-        if isinstance(other, (float, int)):
-            vec = [other] * len(self)
-        elif isinstance(other, Vector):
-            vec = other
-        else:
-            raise TypeError(f"other must be type Vector, float, or int, but got {type(other)}.")
-        return Vector([v0 + v1 for v0, v1 in zip(self, vec)])
-
-    def __radd__(self, other):
-        return self.__add__(other)
-
-    def __mul__(self, other):
-        if not isinstance(other, (float, int)):
-            raise TypeError(f"other must be type float or int, but got {type(other)}.")
-        return Vector([v * other for v in self])
-
-    def __rmul__(self, other):
-        return self.__mul__(other)
-
-    def __str__(self) -> str:
-        return str(self.vals)
-
-
-ResponseVariableType = Union[float, Vector]
+# cython: profile=True
 
-
-cdef class GenericResponse:
+from __future__ import annotations
+from pomdp_py.framework.basics cimport (
+    Agent,
+    GenerativeDistribution,
+    PolicyModel,
+    TransitionModel,
+    ObservationModel,
+    BlackboxModel,
+    Action,
+    Observation,
+    State,
+    Environment,
+    Option
+)
+from typing import Optional
+
+
+cdef class Response:
     """
-    A GenericResponse class maintains variables within a dictionary. However, subclasses of GenericResponse
-    can provide access to the dictionary variables using the dot (.) operator. Currently, this class can
-    handle arithmetic and comparison operations. However, if special operations will need to be performed, 
+    A Response class maintains variables within a dictionary.
+    However, subclasses of Response can provide access to the
+    dictionary variables using the dot (.) operator. Currently,
+    this class can handle arithmetic and comparison operations.
+    However, if special operations will need to be performed,
     these operations need to be handled in the subclass.
     """
-    cdef dict __dict__
 
-    def __init__(self, dict response_dict = dict()):
-        self.__dict__.update(response_dict.copy())
+    def copy(self) -> Response:
+        raise NotImplementedError
 
-    cpdef void _check_reward_compatibility(self, value):
-        if not isinstance(value, GenericResponse):
-            raise TypeError(f"other must be type GenericResponse, float, or int, but got {type(value)}.")
+    @staticmethod
+    def null() -> Response:
+        raise NotImplementedError
 
-    cdef dict add_response(self, GenericResponse other):
-        self._check_reward_compatibility(other)
-        cdef dict rv = dict()
-        for name, value in self.__dict__.items():
-            rv.update({name: value + other.__dict__[name]})
-        return rv
+    def __add__(self, other: Response) -> Response:
+        raise NotImplementedError
 
-    def __add__(self, other: GenericResponse) -> GenericResponse:
-        return GenericResponse(self.add_response(other))
-
-    def __radd__(self, other: GenericResponse) -> GenericResponse:
+    def __radd__(self, other: Response) -> Response:
         return self.__add__(other)
 
-    cpdef dict mul_scalar(self, float other):
-        if not isinstance(other, float):
-            raise TypeError("other must be type float or int.")
-        cdef dict rv = dict()
-        for name, value in self.__dict__.items():
-            rv.update({name: value * other})
-        return rv
-
-    def __mul__(self, other: float | int) -> GenericResponse:
-        return GenericResponse(self.mul_scalar(other))
+    def __mul__(self, other: float | int) -> Response:
+        raise NotImplementedError
 
-    def __rmul__(self, other) -> GenericResponse:
+    def __rmul__(self, other: float | int) -> Response:
         return self.__mul__(other)
 
-    def __eq__(self, other: GenericResponse) -> bool:
-        self._check_reward_compatibility(other)
-        return all(value == other.__dict__[name] for name, value in self.__dict__.items())
+    def __eq__(self, other: Response) -> bool:
+        raise NotImplementedError
 
-    def __ne__(self, other) -> bool:
-        self._check_reward_compatibility(other)
-        return all(value != other.__dict__[name] for name, value in self.__dict__.items())
+    def __ne__(self, other: Response) -> bool:
+        raise NotImplementedError
 
-    def __lt__(self, other) -> bool:
-        self._check_reward_compatibility(other)
-        return all(value < other.__dict__[name] for name, value in self.__dict__.items())
+    def __lt__(self, other: Response) -> bool:
+        raise NotImplementedError
 
-    def __le__(self, other) -> bool:
-        self._check_reward_compatibility(other)
-        return all(value <= other.__dict__[name] for name, value in self.__dict__.items())
+    def __le__(self, other: Response) -> bool:
+        raise NotImplementedError
 
-    def __gt__(self, other) -> bool:
-        self._check_reward_compatibility(other)
-        return all(value > other.__dict__[name] for name, value in self.__dict__.items())
+    def __gt__(self, other: Response) -> bool:
+        raise NotImplementedError
 
-    def __ge__(self, other) -> bool:
-        self._check_reward_compatibility(other)
-        return all(value >= other.__dict__[name] for name, value in self.__dict__.items())
+    def __ge__(self, other: Response) -> bool:
+        raise NotImplementedError
         
     def __str__(self) -> str:
-        return ", ".join([f"{name}={values}" for name, values in self.__dict__.items()])
+        raise NotImplementedError
 
 
-cdef class RewardCost(GenericResponse):
+cdef class ResponseModel:
+    """
+    A ResponseModel returns a real or simulated response after the agent interacts with 
+    the real or a simulated environment. The implementation of this model contains a 
+    collection of more specific models such as reward and cost models.
+    """
+
+    def __init__(self, null_response: Response) -> None:
+        if not isinstance(null_response, Response):
+            raise TypeError(
+                "null_response must be type Response, "
+                f"but got {type(null_response)}."
+            )
+        self._null_response = null_response.copy()
+
+    def null_response(self) -> Response:
+        return self._null_response.copy()
+        
+    def sample(self, state: State, action: Action, next_state: State) -> Response:
+        raise NotImplementedError
 
-    def __init__(self, float reward=0.0, Vector cost=Vector()):
-        super().__init__({"reward": reward, "cost": cost})
 
-    def __add__(self, other: RewardCost) -> RewardCost:
-        return RewardCost(**self.add_response(other))
+cdef class ResponseAgent(Agent):
+    """
+    A `ResponseAgent` behaves the same as an `Agent` with one difference: a
+    `ReponseAgent` adds a `ResponseModel`. The `ResponseAgent` also provides direct
+    access to the models maintained in the `ResponseModel` to reduce the wordiness of
+    the code.
+    """
 
-    def __mul__(self, other: float) -> RewardCost:
-        return RewardCost(**self.mul_scalar(other))
+    def __init__(
+        self,
+        init_belief: GenerativeDistribution,
+        policy_model: Optional[PolicyModel] = None,
+        transition_model: Optional[TransitionModel] = None,
+        observation_model: Optional[ObservationModel] = None,
+        response_model: Optional[ResponseModel] = None,
+        blackbox_model: Optional[BlackboxModel] = None,
+        name: Optional[str] = None
+    ):
+        super().__init__(
+            init_belief=init_belief,
+            policy_model=policy_model,
+            transition_model=transition_model,
+            observation_model=observation_model,
+            reward_model=None,
+            blackbox_model=blackbox_model,
+        )
+
+        if (
+            not isinstance(response_model, ResponseModel)
+            and response_model is not None
+        ):
+            raise TypeError(
+                "response_model must be type ResponseModel, "
+                f"but got type {type(response_model)}."
+            )
+        self._response_model = None
+        if response_model is not None:
+            self.set_response_model(response_model)
+
+    @property
+    def reward_model(self):
+        raise AttributeError(
+            "Use the response_model property to access the reward model."
+        )
+
+    @property
+    def response_model(self) -> ResponseModel:
+        if self._response_model is None:
+            raise ValueError(
+                "response_model is None. Call set_response_model to set a model."
+            )
+        return self._response_model
+
+    def set_response_model(self, response_model: ResponseModel) -> None:
+        if not isinstance(response_model, ResponseModel):
+            raise TypeError(
+                f"model must be type ResponseModel, but got type {type(response_model)}."
+            )
+        self._response_model = response_model
+
+
+cdef class ResponseEnvironment(Environment):
+
+    def __init__(
+        self,
+        init_state: State,
+        transition_model: Optional[TransitionModel] = None,
+        response_model: Optional[ResponseModel] = None,
+        blackbox_model: Optional[BlackboxModel] = None
+    ) -> None:
+        super().__init__(
+            init_state=init_state,
+            transition_model=transition_model,
+            reward_model=None,
+            blackbox_model=blackbox_model,
+        )
+        if response_model is not None and blackbox_model is not None:
+            raise ValueError(
+                "Cannot specify a response and blackbox model at the same time."
+            )
+        self._response_model = response_model
+
+    @property
+    def reward_model(self):
+        raise AttributeError(
+            "Use the response_model property to access the reward model."
+        )
+
+    @property
+    def response_model(self) -> ResponseModel:
+        """
+        Returns:
+            The ResponseModel.
+        """
+        return self._response_model
+
+    def set_models(
+        self,
+        transition_model: Optional[TransitionModel] = None,
+        response_model: Optional[ResponseModel] = None,
+        blackbox_model: Optional[BlackboxModel] = None,
+    ) -> None:
+        """
+        Reassigns the models to be the ones given.
+
+        Args:
+            transition_model (TransitionModel): The transition model.
+            response_model (ResponseModel): The response model.
+            blackbox_model (BlackboxModel): Provided when the transition model and
+                response model are not available.
+
+        Returns:
+            None
+        """
+        super().set_models(
+            transition_model=transition_model,
+            reward_model=None,
+            blackbox_model=blackbox_model,
+        )
+        if response_model is not None and blackbox_model is not None:
+            raise ValueError(
+                "Cannot specify a response and blackbox model at the same time."
+            )
+        self._response_model = response_model
+
+    def state_transition(
+        self,
+        action: Action,
+        execute: bool = True,
+        discount_factor: float = 1.0
+    ) -> Response | tuple[State, Response]:
+        """
+        Simulates a state transition given `action`. If `execute` is set to True,
+        then the resulting state will be the new current state of the environment.
+
+        Args:
+            action (Action): action that triggers the state transition.
+            execute (bool): If True, the resulting state of the transition will become
+                            the current state.
+            discount_factor (float): Only necessary if action is an Option. It is the
+                discount factor when executing actions following an option's policy
+                until reaching terminal condition.
+
+        Returns:
+            Response or tuple[State, Response]: reward as a result of `action` and state
+            transition, if `execute` is True (next_state, reward) if `execute` is False.
+        """
+        next_state, response, _ = sample_generative_model_with_response(
+            T=self.transition_model,
+            O=None,
+            R=self.response_model,
+            state=self.state,
+            action=action,
+            null_response=self.response_model.null_response(),
+            discount_factor=discount_factor
+        )
+
+        if execute:
+            self.apply_transition(next_state)
+            return response
+        else:
+            return next_state, response
+
+
+cpdef tuple[State, Observation, Response, int] sample_generative_model_with_response(
+    TransitionModel T,
+    ObservationModel O,
+    ResponseModel R,
+    State state,
+    Action action,
+    Response null_response,
+    float discount_factor = 1.0
+):
+    cdef State next_state
+    cdef Observation observation
+    cdef Response response = null_response.copy()
+    cdef Option option
+    cdef int nsteps = 0
+
+    if isinstance(action, Option):
+        # The action is an option; simulate a rollout of the option
+        option = action
+        if not option.initiation(state):
+            # state is not in the initiation set of the option. This is
+            # similar to the case when you are in a particular (e.g. terminal)
+            # state and certain action cannot be performed, which will still
+            # be added to the PO-MCTS tree because some other state with the
+            # same history will allow this action. In this case, that certain
+            # action will lead to no state change, no observation, and 0 reward,
+            # because nothing happened.
+            if O is not None:
+                return state, None, 0, 0
+            else:
+                return state, 0, 0
+
+        step_discount_factor = 1.0
+        while not option.termination(state):
+            action = option.sample(state)
+            next_state = T.sample(state, action)
+            # For now, we don't care about intermediate observations (future work?).
+            response += step_discount_factor * R.sample(state, action, next_state)
+            step_discount_factor *= discount_factor
+            state = next_state
+            nsteps += 1
+        # sample observation at the end, where action is the last action.
+        # (doesn't quite make sense to just use option as the action at this point.)
+    else:
+        next_state = T.sample(state, action)
+        response = R.sample(state, action, next_state)
+        nsteps += 1
+    if O is not None:
+        observation = O.sample(next_state, action)
+        return next_state, observation, response, nsteps
+    else:
+        return next_state, response, nsteps
diff --git a/pomdp_py/problems/rocksample/rocksample_problem.py b/pomdp_py/problems/rocksample/rocksample_problem.py
index 2980af5..f1b7671 100644
--- a/pomdp_py/problems/rocksample/rocksample_problem.py
+++ b/pomdp_py/problems/rocksample/rocksample_problem.py
@@ -330,6 +330,7 @@ class RSPolicyModel(pomdp_py.RolloutPolicy):
 
     def __init__(self, n, k):
         check_actions = set({CheckAction(rock_id) for rock_id in range(k)})
+        print(check_actions)
         self._move_actions = {MoveEast, MoveWest, MoveNorth, MoveSouth}
         self._other_actions = {SampleAction()} | check_actions
         self._all_actions = self._move_actions | self._other_actions
@@ -430,22 +431,28 @@ def print_state(self):
             string += "\n"
         print(string)
 
-    def __init__(
-        self, n, k, init_state, rock_locs, init_belief, half_efficiency_dist=20
-    ):
-        self._n, self._k = n, k
-        agent = pomdp_py.Agent(
+    def build_agent(self, n, k, rock_locs, init_belief, half_efficiency_dist):
+        return pomdp_py.Agent(
             init_belief,
             RSPolicyModel(n, k),
             RSTransitionModel(n, rock_locs, self.in_exit_area),
             RSObservationModel(rock_locs, half_efficiency_dist=half_efficiency_dist),
             RSRewardModel(rock_locs, self.in_exit_area),
         )
-        env = pomdp_py.Environment(
+
+    def build_env(self, n, init_state, rock_locs):
+        return pomdp_py.Environment(
             init_state,
             RSTransitionModel(n, rock_locs, self.in_exit_area),
             RSRewardModel(rock_locs, self.in_exit_area),
         )
+
+    def __init__(
+        self, n, k, init_state, rock_locs, init_belief, half_efficiency_dist=20
+    ):
+        self._n, self._k = n, k
+        agent = self.build_agent(n, k, rock_locs, init_belief, half_efficiency_dist)
+        env = self.build_env(n, init_state, rock_locs)
         self._rock_locs = rock_locs
         super().__init__(agent, env, name="RockSampleProblem")
 
@@ -537,7 +544,7 @@ def create_instance(n, k, **kwargs):
 
 
 def main():
-    rocksample = debug_instance()  # create_instance(7, 8)
+    rocksample = create_instance(7, 8)
     rocksample.print_state()
 
     print("*** Testing POMCP ***")
diff --git a/pomdp_py/utils/cvec.pxd b/pomdp_py/utils/cvec.pxd
new file mode 100644
index 0000000..e85eb8e
--- /dev/null
+++ b/pomdp_py/utils/cvec.pxd
@@ -0,0 +1,38 @@
+# cython: language_level=3
+
+from __future__ import annotations
+from libcpp.vector cimport vector
+
+ctypedef vector[double] vectord_t
+
+
+cdef vectord_t null_vector(unsigned int n_zeros) except *
+cpdef vectord_t list_to_vectord(list[float] values)
+cpdef list[float] vectord_to_list(vectord_t values)
+
+cdef double vector_dot_prod(const vectord_t& v0, const vectord_t& v1) except *
+cdef void vector_add(const vectord_t& v0, const vectord_t& v1, vectord_t& res) except *
+cdef void vector_adds(const vectord_t& v, const double& scalar, vectord_t& res) except *
+cdef void vector_muls(const vectord_t& v, const double& scalar, vectord_t& res) except *
+cdef void vector_sub(const vectord_t& v0, const vectord_t& v1, vectord_t& res) except *
+cdef void vector_subvs(const vectord_t& v, const double& scalar, vectord_t& res) except *
+cdef void vector_subsv(const double& scalar, const vectord_t& v, vectord_t& res) except *
+cdef void vector_scalar_div(const vectord_t& v, const double& scalar, vectord_t& res) except *
+
+cdef double vector_max(const vectord_t& v) except *
+cdef double vector_min(const vectord_t& v) except *
+cdef void vector_clip(vectord_t& v, const double& min_value, const double& max_value) except *
+cdef void vector_copy(const vectord_t& src, vectord_t& dst) except *
+
+
+cdef class Vector:
+    cdef vectord_t _vals
+    cdef vectord_t _res_buff
+    cdef int _length
+
+    cdef bint _is_in_range(Vector self, int index)
+    cpdef Vector copy(Vector self)
+    cpdef double dot(Vector self, Vector other)
+    cpdef int len(Vector self)
+    cdef double max(Vector self)
+    cdef double min(Vector self)
diff --git a/pomdp_py/utils/cvec.pyx b/pomdp_py/utils/cvec.pyx
new file mode 100644
index 0000000..5464232
--- /dev/null
+++ b/pomdp_py/utils/cvec.pyx
@@ -0,0 +1,352 @@
+# cython: profile=True
+
+from __future__ import annotations
+cimport cython
+from libc.math cimport fmin, fmax
+from typing import Iterator
+
+
+cdef vectord_t null_vector(unsigned int n_zeros) except *:
+    cdef vectord_t vec
+    vec.assign(n_zeros, 0.0)
+    return vec
+
+
+@cython.boundscheck(False)
+cpdef vectord_t list_to_vectord(list[float] values):
+    cdef int length = len(values)
+    cdef unsigned int i = 0
+    cdef vectord_t rv = vectord_t(length)
+
+    if length > 0:
+        for i in range(length):
+            rv[i] = <double> values[i]
+    return rv
+
+
+@cython.boundscheck(False)
+cpdef list[float] vectord_to_list(vectord_t values):
+    cdef int length = len(values)
+    cdef unsigned int i = 0
+    cdef list[float] rv = list()
+
+    if length > 0:
+        for i in range(length):
+            rv.append(float(values[i]))
+    return rv
+
+
+@cython.boundscheck(False)
+cdef double vector_dot_prod(const vectord_t& v0, const vectord_t& v1) except *:
+    if v0.size() != v1.size():
+        raise ValueError("Both vectors must have the same size.")
+    if v0.size() == 0:
+        raise ValueError("Vectors should contain at least one value.")
+
+    cdef unsigned int i = 0
+    cdef double res = 0.0
+    for i in range(v0.size()):
+        res += (v0[i] * v1[i])
+    return res
+
+
+@cython.boundscheck(False)
+cdef void vector_add(const vectord_t& v0, const vectord_t& v1, vectord_t& res) except *:
+    if v0.size() != v1.size():
+        raise ValueError("Both vectors must have the same size.")
+    if v0.size() == 0:
+        raise ValueError("Vectors should contain at least one value.")
+
+    res = vectord_t(v0.size())
+    cdef unsigned int i = 0
+    for i in range(v0.size()):
+        res[i] = v0[i] + v1[i]
+
+
+@cython.boundscheck(False)
+cdef void vector_adds(const vectord_t& v, const double& scalar, vectord_t& res) except *:
+    if v.size() == 0:
+        raise ValueError("Vector should contain at least one value.")
+
+    res = vectord_t(v.size())
+    cdef unsigned int i = 0
+    for i in range(v.size()):
+        res[i] = v[i] + scalar
+
+
+@cython.boundscheck(False)
+cdef void vector_muls(const vectord_t& v, const double& scalar, vectord_t& res) except *:
+    cdef int n_values = v.size()
+    if n_values == 0:
+        raise ValueError("Vector should contain at least one value.")
+
+    res = vectord_t(n_values)
+    cdef unsigned int i = 0
+    for i in range(n_values):
+        res[i] = v[i] * scalar
+
+
+@cython.boundscheck(False)
+cdef void vector_sub(const vectord_t& v0, const vectord_t& v1, vectord_t& res) except *:
+    if v0.size() != v1.size():
+        raise ValueError("Both vectors must have the same size.")
+    if v0.size() == 0:
+        raise ValueError("Vectors should contain at least one value.")
+
+    res = vectord_t(v0.size())
+    cdef unsigned int i = 0
+    for i in range(v0.size()):
+        res[i] = v0[i] - v1[i]
+
+
+@cython.boundscheck(False)
+cdef void vector_subvs(const vectord_t& v, const double& scalar, vectord_t& res) except *:
+    cdef int n_values = v.size()
+    if n_values == 0:
+        raise ValueError("Vector should contain at least one value.")
+
+    res = vectord_t(n_values)
+    cdef unsigned int i = 0
+    for i in range(n_values):
+        res[i] = v[i] - scalar
+
+
+@cython.boundscheck(False)
+cdef void vector_subsv(const double& scalar, const vectord_t& v, vectord_t& res) except *:
+    cdef int n_values = v.size()
+    if n_values == 0:
+        raise ValueError("Vector should contain at least one value.")
+
+    res = vectord_t(n_values)
+    cdef unsigned int i = 0
+    for i in range(n_values):
+        res[i] = scalar - v[i]
+
+
+@cython.boundscheck(False)
+cdef void vector_scalar_div(const vectord_t& v, const double& scalar, vectord_t& res) except *:
+    cdef int n_values = v.size()
+    if n_values == 0:
+        raise ValueError("Vector should contain at least one value.")
+    if scalar == 0.0:
+        raise ZeroDivisionError("Scalar division by zero!")
+
+    res = vectord_t(n_values)
+    cdef unsigned int i = 0
+    for i in range(n_values):
+        res[i] = v[i] / scalar
+
+
+@cython.boundscheck(False)
+cdef double vector_max(const vectord_t& v) except *:
+    cdef int n_values = v.size()
+    if n_values == 0:
+        raise ValueError("Vector should contain at least one value.")
+    if n_values == 1:
+        return v[0]
+
+    cdef double max_value = v[0]
+    cdef int i = 0
+    for i in range(1, n_values):
+        if v[i] > max_value:
+            max_value = v[i]
+    return max_value
+
+
+@cython.boundscheck(False)
+cdef double vector_min(const vectord_t& v) except *:
+    cdef int n_values = v.size()
+    if n_values == 0:
+        raise ValueError("Vector should contain at least one value.")
+    if n_values == 1:
+        return v[0]
+
+    cdef double min_value = v[0]
+    cdef int i = 0
+    for i in range(1, n_values):
+        if v[i] < min_value:
+            min_value = v[i]
+    return min_value
+
+
+@cython.boundscheck(False)
+cdef void vector_clip(vectord_t& v, const double& min_value, const double& max_value) except *:
+    cdef int n_values = v.size()
+    if n_values == 0:
+        raise ValueError("Vector should contain at least one value.")
+    if min_value >= max_value:
+        raise ValueError(
+            f"Min value ({min_value}) must be less than max value ({max_value})."
+        )
+    cdef int i = 0
+    for i in range(n_values):
+        v[i] = fmax(min_value, fmin(max_value, v[i]))
+
+
+@cython.boundscheck(False)
+cdef void vector_copy(const vectord_t& src, vectord_t& dst) except *:
+    cdef int n_values = src.size()
+    if n_values == 0:
+        raise ValueError("Vector should contain at least one value.")
+    dst = vectord_t(n_values)
+    cdef int i = 0
+    for i in range(n_values):
+        dst[i] = src[i]
+
+
+cdef class Vector:
+    """
+    The Vector class. Provides an implementation of a vector for
+    maintaining multiple values.
+    """
+
+    def __init__(self, values: list | tuple):
+        if not isinstance(values, (list, tuple)):
+            raise TypeError(f"Unhandled type: {type(values)}.")
+        if len(values) == 0:
+            raise ValueError("The length of values must have at least one value.")
+        if not all(isinstance(v, (float, int)) for v in values):
+            raise ValueError("All values must be type float or int.")
+
+        self._vals = list_to_vectord(values)
+        self._length = self._vals.size()
+
+    cdef bint _is_in_range(Vector self, int index):
+        return 0 <= index < self._length
+
+    def as_list(self) -> list[float]:
+        """
+        Returns a list of the internal values.
+        """
+        return vectord_to_list(self._vals)
+
+    def as_vector(self) -> vectord_t:
+        cdef vectord_t copy
+        vector_copy(self._vals, copy)
+        return copy
+
+    @staticmethod
+    def clip(vec: Vector, min_value: float, max_value: float) -> Vector:
+        """
+        Clips the values within the value using the given min and max values.
+        """
+        if not isinstance(vec, Vector):
+            raise TypeError("vec must be a Vector.")
+        cdef vectord_t rv = vec.as_vector()
+        vector_clip(rv, min_value, max_value)
+        return Vector(vectord_to_list(rv))
+
+    cpdef Vector copy(Vector self):
+        """
+        Returns a copy of this vector.
+        """
+        return Vector(self.as_list())
+
+    cpdef double dot(Vector self, Vector other):
+        """
+        Performs the dot product between two Vectors.
+        """
+        if not isinstance(other, Vector):
+            raise TypeError("other must be type Vector.")
+        return vector_dot_prod(self._vals, other._vals)
+
+    @staticmethod
+    def fill(value: float, n_values: int) -> Vector:
+        return Vector([value] * n_values)
+
+    cpdef int len(Vector self):
+        return self._length
+
+    cdef double max(Vector self):
+        return vector_max(self._vals)
+
+    cdef double min(Vector self):
+        return vector_min(self._vals)
+
+    @staticmethod
+    def null(n_zeros: int) -> Vector:
+        return Vector.fill(0.0, n_zeros)
+
+    def __getitem__(self, index: int) -> float:
+        index = int(index)
+        if not self._is_in_range(index):
+            raise IndexError(f"index is out-of-range.")
+        return self._vals[index]
+
+    def __setitem__(self, index: int, value: float) -> None:
+        index = int(index)
+        if not self._is_in_range(index):
+            raise IndexError(f"index is out-of-range.")
+        if not isinstance(value, float):
+            raise TypeError(f"value must be type float, but got type {type(value)}.")
+        self._vals[index] = <double> value
+
+    def __iter__(self) -> Iterator:
+        return iter(self._vals)
+
+    def __len__(self) -> int:
+        return self._length
+
+    def __eq__(self, other: Vector | list | tuple) -> bool:
+        if not isinstance(other, (Vector, list, tuple)):
+            raise TypeError(
+                f"other must be type Vector, list, or tuple, but got {type(other)}."
+            )
+        if self._length != len(other):
+            return False
+        return all(v0 == v1 for v0, v1 in zip(self, other))
+
+    def __add__(self, other: Vector | float | int) -> Vector:
+        if isinstance(other, (float, int)):
+            vector_adds(self._vals, other, self._res_buff)
+        elif isinstance(other, Vector):
+            vector_add(self._vals, other.as_vector(), self._res_buff)
+        else:
+            raise TypeError(
+                "other must be type Vector with the same length, "
+                f"float, or int, but got {type(other)}."
+            )
+        return Vector(self._res_buff)
+
+    def __radd__(self, other: Vector | float | int) -> Vector:
+        return self.__add__(other)
+
+    def __mul__(self, other: float | int) -> Vector:
+        if not isinstance(other, (float, int)):
+            raise TypeError(f"other must be type float or int, but got {type(other)}.")
+        vector_muls(self._vals, other, self._res_buff)
+        return Vector(self._res_buff)
+
+    def __rmul__(self, other: float | int) -> Vector:
+        return self.__mul__(other)
+
+    def __sub__(self, other: Vector | float | int) -> Vector:
+        if isinstance(other, (float, int)):
+            vector_subvs(self._vals, other, self._res_buff)
+        elif isinstance(other, Vector):
+            vector_sub(self._vals, other.as_vector(), self._res_buff)
+        else:
+            raise TypeError(
+                "other must be type Vector with the same length, "
+                f"float, or int, but got {type(other)}."
+            )
+        return Vector(self._res_buff)
+
+    def __rsub__(self, other: Vector | float | int) -> Vector:
+        if isinstance(other, (float, int)):
+            vector_subsv(other, self._vals, self._res_buff)
+        elif isinstance(other, Vector):
+            vector_sub(other.as_vector(), self._vals, self._res_buff)
+        else:
+            raise TypeError(
+                "other must be type Vector with the same length, "
+                f"float, or int, but got {type(other)}."
+            )
+        return Vector(self._res_buff)
+
+    def __truediv__(self, other: float | int) -> Vector:
+        vector_scalar_div(self._vals, other, self._res_buff)
+        return Vector(self._res_buff)
+
+    def __str__(self) -> str:
+        return str(vectord_to_list(self._vals))
diff --git a/setup.py b/setup.py
index c519ea9..44f4d13 100644
--- a/setup.py
+++ b/setup.py
@@ -2,6 +2,7 @@
 
 from setuptools import setup, Extension, find_packages
 from Cython.Build import cythonize
+import numpy
 import os.path
 
 
@@ -19,7 +20,7 @@ def build_extensions(pkg_name, major_submodules):
                 filename = os.path.splitext(f)[0]
                 ext_name = f"{pkg_name}.{subm}.{filename}"
                 ext_path = os.path.join(pkg_name, subm.replace(".", "/"), f)
-                extensions.append(Extension(ext_name, [ext_path]))
+                extensions.append(Extension(ext_name, [ext_path], language="c++"))
 
     return extensions
 
@@ -45,5 +46,6 @@ def build_extensions(pkg_name, major_submodules):
     package_data={
         "pomdp_py": ["*.pxd", "*.pyx", "*.so", "*.c"],
     },
+    include_dirs=[numpy.get_include()],
     zip_safe=False,
 )
diff --git a/tests/test_util_vector_ops.py b/tests/test_util_vector_ops.py
new file mode 100644
index 0000000..8cbdac5
--- /dev/null
+++ b/tests/test_util_vector_ops.py
@@ -0,0 +1,151 @@
+from pomdp_py.utils.cvec import Vector
+
+
+description = "testing utils cvec"
+
+
+def test_assign():
+    v = Vector([0])
+    assert v == [0.]
+
+    v = Vector([2, 4, 8])
+    assert v == [2., 4., 8.]
+
+    v = Vector([0])
+    assert v != [1.]
+
+
+def test_as_list():
+    v = Vector([10., 3., 3.])
+    assert v.as_list() == [10., 3., 3.]
+
+    v = Vector([1., 5., 9., 11., 6.])
+    assert v.as_list() == [1., 5., 9., 11., 6.]
+
+
+def test_as_vector():
+    v = Vector([1., 2., 3.])
+    assert v.as_vector() == [1., 2., 3.]
+
+
+def test_clip():
+    v = Vector([2, 5, 7])
+    assert Vector.clip(v, 0, 10) == [2., 5., 7.]
+
+    v = Vector([2, 5, 7])
+    assert Vector.clip(v, 0, 4) == [2., 4., 4.]
+
+    v = Vector([2, 5, 7])
+    assert Vector.clip(v, 4, 10) == [4., 5., 7.]
+
+    v = Vector([2, 5, 7])
+    assert Vector.clip(v, 3, 4) == [3., 4., 4.]
+
+
+def test_copy():
+    v = Vector([1., 2., 3.])
+    assert v.copy() == [1., 2., 3.]
+
+
+def test_dot():
+    v0 = Vector([1., 3., 5., 7.])
+    v1 = Vector([0., 13., 0., 10.])
+    assert v0.dot(v1) == 109.
+
+
+def test_fill():
+    v0 = Vector.fill(10., 5)
+    assert v0 == [10., 10., 10., 10., 10.]
+
+    v1 = Vector.fill(3., 2)
+    assert v1 == [3., 3.]
+
+
+def test_len():
+    v = Vector([1., 2.])
+    assert v.len() == 2
+
+    v = Vector([5., 7., 2.])
+    assert v.len() == 3
+
+
+def test_null():
+    v = Vector.null(4)
+    assert v == [0., 0., 0., 0.]
+
+
+def test_get_and_set_item():
+    v = Vector.null(3)
+    v[0] = 1.
+    v[2] = 1999.
+
+    assert v == [1., 0., 1999.]
+    assert v[0] == 1.
+    assert v[1] == 0.
+    assert v[2] == 1999.
+
+
+def test_iter():
+    v = Vector([1., 2., 4., 8.])
+    for value0, value1 in zip(v, [1., 2., 4., 8.]):
+        assert value0 == value1
+
+
+def test_add():
+    v0 = Vector([1, 2, 3])
+    v1 = Vector([10, 22, 55])
+
+    assert v0 + 4. == [5., 6., 7.]
+    assert 4. + v0 == [5., 6., 7.]
+    assert v0 + v1 == [11., 24., 58.]
+    assert v1 + v0 == [11., 24., 58.]
+
+
+def test_mul():
+    v = Vector([9., 8.])
+    assert v * 5. == [45., 40.]
+    assert v * 10. == [90., 80.]
+
+
+def test_sub():
+    v0 = Vector([1, 2, 3])
+    v1 = Vector([10, 22, 55])
+
+    assert v0 - v1 == [-9., -20., -52.]
+    assert v1 - v0 == [9., 20., 52.]
+    assert v1 - 10. == [0., 12., 45.]
+    assert v0 - 0. == [1., 2., 3.]
+
+
+def test_truediv():
+    v = Vector([10., 20., 50.])
+    assert v / 2. == [5., 10., 25.]
+    assert v / 20. == [0.5, 1.0, 2.5]
+
+
+def test_str():
+    v = Vector([2., 4.])
+    assert str(v) == str([2., 4.])
+
+
+def run():
+    test_assign()
+    test_as_list()
+    test_as_vector()
+    test_clip()
+    test_copy()
+    test_dot()
+    test_fill()
+    test_len()
+    test_null()
+    test_get_and_set_item()
+
+    test_add()
+    test_mul()
+    test_sub()
+    test_truediv()
+    test_str()
+
+
+if __name__ == "__main__":
+    run()
\ No newline at end of file

From 0571618cba3b49b149513fa3cd9b323791a6da99 Mon Sep 17 00:00:00 2001
From: Troi Williams <40696868+troiwill@users.noreply.github.com>
Date: Tue, 16 Apr 2024 17:05:41 -0400
Subject: [PATCH 04/30] Fixed error.

---
 pomdp_py/algorithms/ccpomcp.pyx | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pomdp_py/algorithms/ccpomcp.pyx b/pomdp_py/algorithms/ccpomcp.pyx
index a0fde23..5fa3b66 100644
--- a/pomdp_py/algorithms/ccpomcp.pyx
+++ b/pomdp_py/algorithms/ccpomcp.pyx
@@ -376,7 +376,7 @@ cdef class CCPOMCP(POMCP):
         cdef int best_q_index = 0
         cdef double logN = log(<double> vnode.num_visits + 1)
         cdef double q_value
-        cdef Vector
+        cdef Vector Q_lambda, Action_UCB
         cdef CCQNode ccqnode
         cdef Action action
         cdef int i = 0

From 954c404af64af9058dd7b6c9ccb0fcd80f6a7db5 Mon Sep 17 00:00:00 2001
From: Troi Williams <40696868+troiwill@users.noreply.github.com>
Date: Wed, 17 Apr 2024 00:22:36 -0400
Subject: [PATCH 05/30] Updated code to improve speed.

---
 pomdp_py/algorithms/ccpomcp.pxd |  2 ++
 pomdp_py/algorithms/ccpomcp.pyx | 54 ++++++++++++++++++++-------------
 pomdp_py/utils/cvec.pxd         |  2 ++
 pomdp_py/utils/cvec.pyx         | 46 +++++++++++++++++++++++++---
 4 files changed, 79 insertions(+), 25 deletions(-)

diff --git a/pomdp_py/algorithms/ccpomcp.pxd b/pomdp_py/algorithms/ccpomcp.pxd
index 0c5235c..e945cb4 100644
--- a/pomdp_py/algorithms/ccpomcp.pxd
+++ b/pomdp_py/algorithms/ccpomcp.pxd
@@ -41,6 +41,8 @@ cdef class CCPOMCP(POMCP):
     cdef double _nu
     cdef list[float] _cost_value_init
     cdef unsigned int _n_constraints
+    # Buffers
+    cdef Vector _Q_lambda, _Action_UCB
 
     cpdef public Action plan(CCPOMCP self, Agent agent)
     cpdef _expand_vnode(self, VNode vnode, tuple history, State state = *)
diff --git a/pomdp_py/algorithms/ccpomcp.pyx b/pomdp_py/algorithms/ccpomcp.pyx
index a0fde23..b193968 100644
--- a/pomdp_py/algorithms/ccpomcp.pyx
+++ b/pomdp_py/algorithms/ccpomcp.pyx
@@ -104,7 +104,7 @@ cdef class CCQNode(QNode):
 
     @property
     def avg_cost_value(self) -> Vector:
-        return self._avg_cost_value.copy()
+        return self._avg_cost_value
 
     @avg_cost_value.setter
     def avg_cost_value(self, avg_cost_value: Vector) -> None:
@@ -117,7 +117,7 @@ cdef class CCQNode(QNode):
 
     @property
     def cost_value(self) -> Vector:
-        return self._cost_value.copy()
+        return self._cost_value
 
     @cost_value.setter
     def cost_value(self, cost_value: Vector) -> None:
@@ -138,8 +138,8 @@ cdef class CCQNode(QNode):
 cdef class _CCPolicyActionData:
     def __init__(self, double prob, Vector cost_value, Vector avg_cost_value):
         self._prob = prob
-        self._cost_value = cost_value.copy()
-        self._avg_cost_value = avg_cost_value.copy()
+        self._cost_value = cost_value
+        self._avg_cost_value = avg_cost_value
 
     @property
     def prob(self) -> float:
@@ -173,12 +173,12 @@ cdef class _CCPolicyModel(PolicyModel):
     cpdef Vector action_avg_cost(_CCPolicyModel self, Action action):
         if action not in self._data:
             raise KeyError(f"The action {action} is not exist in this policy model.")
-        return self._data[action].cost_value.copy()
+        return self._data[action].cost_value
 
     cpdef Vector action_cost_value(_CCPolicyModel self, Action action):
         if action not in self._data:
             raise KeyError(f"The action {action} is not exist in this policy model.")
-        return self._data[action].avg_cost_value.copy()
+        return self._data[action].avg_cost_value
 
     cdef public float probability(_CCPolicyModel self, Action action, State state):
         if action not in self._data:
@@ -294,6 +294,10 @@ cdef class CCPOMCP(POMCP):
         self._nu = <double> nu
         self._use_random_lambda = <bint> use_random_lambda
 
+        # Initialize buffers.
+        self._Q_lambda = Vector()
+        self._Action_UCB = Vector()
+
     cpdef public Action plan(CCPOMCP self, Agent agent):
         cdef Action action
         cdef _CCPolicyModel policy_dist
@@ -358,6 +362,7 @@ cdef class CCPOMCP(POMCP):
                 )
 
     @cython.boundscheck(False)
+    @cython.wraparound(False)
     cpdef _CCPolicyModel _greedy_policy(
         CCPOMCP self,
         VNode vnode,
@@ -376,13 +381,18 @@ cdef class CCPOMCP(POMCP):
         cdef int best_q_index = 0
         cdef double logN = log(<double> vnode.num_visits + 1)
         cdef double q_value
-        cdef Vector
         cdef CCQNode ccqnode
         cdef Action action
         cdef int i = 0
 
-        Q_lambda = Vector.null(n_actions)
-        Action_UCB = Vector.null(n_actions)
+        if n_actions == 0:
+            raise RuntimeError("The number of actions is 0?")
+
+        self._Q_lambda.resize(n_actions)
+        self._Q_lambda.zeros()
+
+        self._Action_UCB.resize(n_actions)
+        self._Action_UCB.zeros()
 
         for i in range(n_actions):
             ccqnode = vnode[action_list[i]]
@@ -395,7 +405,7 @@ cdef class CCPOMCP(POMCP):
                     n_ccqnode_visits,
                     explore_const
                 )
-                Action_UCB[i] = _compute_visits_ratio(
+                self._Action_UCB[i] = _compute_visits_ratio(
                     log(n_ccqnode_visits),
                     n_ccqnode_visits
                 )
@@ -404,28 +414,28 @@ cdef class CCPOMCP(POMCP):
                 best_q_lambda = q_value
                 best_q_index = i
 
-            Q_lambda[i] = q_value
+            self._Q_lambda[i] = q_value
 
         # Compute a*, the best action(s).
         cdef list[Action] best_action_list = list()
-        cdef double best_ucb_add = Action_UCB[best_q_index]
+        cdef double best_ucb_add = self._Action_UCB[best_q_index]
         cdef double ucb_add, q_value_diff
         cdef bint add_to_best_action_list = False
 
         q_value = 0.0
-        best_q_lambda = Q_lambda[best_q_index]
+        best_q_lambda = self._Q_lambda[best_q_index]
 
         for i in range(n_actions):
             action = action_list[i]
             ccqnode = vnode[action]
-            q_value = Q_lambda[i]
+            q_value = self._Q_lambda[i]
 
             if q_value == best_q_lambda:
                 add_to_best_action_list = True
 
             else:
                 q_value_diff = abs(q_value - best_q_lambda)
-                ucb_add = nu * (Action_UCB[i] + best_ucb_add)
+                ucb_add = nu * (self._Action_UCB[i] + best_ucb_add)
                 if q_value_diff <= ucb_add and action not in best_action_list:
                     add_to_best_action_list = True
 
@@ -547,7 +557,7 @@ cdef class CCPOMCP(POMCP):
             )
 
         else:
-            self._lambda = Vector.null(self._n_constraints)
+            self._lambda.zeros()
 
     cpdef _perform_simulation(self, state):
         super(CCPOMCP, self)._perform_simulation(state=state)
@@ -714,16 +724,19 @@ cdef class CCPOMCP(POMCP):
 
         return total_response
 
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
     cdef void _update_cost_constraint(
         CCPOMCP self,
         _CCPolicyModel policy_dist,
         Action sampled_action
     ):
         cdef double action_prob, prob_prime
-        cdef Vector chat_minus_avg_cost, action_avg_cost, cost_value, cost_sum
+        cdef Vector action_avg_cost, cost_value
         cdef Action action_prime
         cdef list[Action] action_prime_list
         cdef int i = 0
+        cdef int n_actions
 
         action_prob = policy_dist.probability(
             action=sampled_action,
@@ -733,9 +746,9 @@ cdef class CCPOMCP(POMCP):
         self._cost_constraint -= (action_prob * action_avg_cost)
 
         if action_prob < 1.0:
-            cost_sum = Vector.null(self._n_constraints)
             action_prime_list = policy_dist.get_all_actions()
-            for i in range(len(action_prime_list)):
+            n_actions = len(action_prime_list)
+            for i in range(n_actions):
                 action_prime = action_prime_list[i]
                 if action_prime == sampled_action:
                     continue
@@ -745,8 +758,7 @@ cdef class CCPOMCP(POMCP):
                     state=self._agent.history
                 )
                 cost_value = policy_dist.action_cost_value(sampled_action)
-                cost_sum[i] += (prob_prime * cost_value)
-            self._cost_constraint -= cost_sum
+                self._cost_constraint -= (prob_prime * cost_value)
         self._cost_constraint /= (self._discount_factor * action_prob)
 
 
diff --git a/pomdp_py/utils/cvec.pxd b/pomdp_py/utils/cvec.pxd
index e85eb8e..9fececc 100644
--- a/pomdp_py/utils/cvec.pxd
+++ b/pomdp_py/utils/cvec.pxd
@@ -36,3 +36,5 @@ cdef class Vector:
     cpdef int len(Vector self)
     cdef double max(Vector self)
     cdef double min(Vector self)
+    cdef void resize(Vector self, unsigned int new_size)
+    cdef void zeros(Vector self)
diff --git a/pomdp_py/utils/cvec.pyx b/pomdp_py/utils/cvec.pyx
index 5464232..4f09a3a 100644
--- a/pomdp_py/utils/cvec.pyx
+++ b/pomdp_py/utils/cvec.pyx
@@ -1,11 +1,10 @@
-# cython: profile=True
+# cython: language_level=3 profile=True
 
 from __future__ import annotations
 cimport cython
 from libc.math cimport fmin, fmax
 from typing import Iterator
 
-
 cdef vectord_t null_vector(unsigned int n_zeros) except *:
     cdef vectord_t vec
     vec.assign(n_zeros, 0.0)
@@ -13,6 +12,7 @@ cdef vectord_t null_vector(unsigned int n_zeros) except *:
 
 
 @cython.boundscheck(False)
+@cython.wraparound(False)
 cpdef vectord_t list_to_vectord(list[float] values):
     cdef int length = len(values)
     cdef unsigned int i = 0
@@ -25,6 +25,7 @@ cpdef vectord_t list_to_vectord(list[float] values):
 
 
 @cython.boundscheck(False)
+@cython.wraparound(False)
 cpdef list[float] vectord_to_list(vectord_t values):
     cdef int length = len(values)
     cdef unsigned int i = 0
@@ -37,6 +38,7 @@ cpdef list[float] vectord_to_list(vectord_t values):
 
 
 @cython.boundscheck(False)
+@cython.wraparound(False)
 cdef double vector_dot_prod(const vectord_t& v0, const vectord_t& v1) except *:
     if v0.size() != v1.size():
         raise ValueError("Both vectors must have the same size.")
@@ -51,6 +53,7 @@ cdef double vector_dot_prod(const vectord_t& v0, const vectord_t& v1) except *:
 
 
 @cython.boundscheck(False)
+@cython.wraparound(False)
 cdef void vector_add(const vectord_t& v0, const vectord_t& v1, vectord_t& res) except *:
     if v0.size() != v1.size():
         raise ValueError("Both vectors must have the same size.")
@@ -64,6 +67,7 @@ cdef void vector_add(const vectord_t& v0, const vectord_t& v1, vectord_t& res) e
 
 
 @cython.boundscheck(False)
+@cython.wraparound(False)
 cdef void vector_adds(const vectord_t& v, const double& scalar, vectord_t& res) except *:
     if v.size() == 0:
         raise ValueError("Vector should contain at least one value.")
@@ -75,6 +79,7 @@ cdef void vector_adds(const vectord_t& v, const double& scalar, vectord_t& res)
 
 
 @cython.boundscheck(False)
+@cython.wraparound(False)
 cdef void vector_muls(const vectord_t& v, const double& scalar, vectord_t& res) except *:
     cdef int n_values = v.size()
     if n_values == 0:
@@ -87,6 +92,7 @@ cdef void vector_muls(const vectord_t& v, const double& scalar, vectord_t& res)
 
 
 @cython.boundscheck(False)
+@cython.wraparound(False)
 cdef void vector_sub(const vectord_t& v0, const vectord_t& v1, vectord_t& res) except *:
     if v0.size() != v1.size():
         raise ValueError("Both vectors must have the same size.")
@@ -100,6 +106,7 @@ cdef void vector_sub(const vectord_t& v0, const vectord_t& v1, vectord_t& res) e
 
 
 @cython.boundscheck(False)
+@cython.wraparound(False)
 cdef void vector_subvs(const vectord_t& v, const double& scalar, vectord_t& res) except *:
     cdef int n_values = v.size()
     if n_values == 0:
@@ -112,6 +119,7 @@ cdef void vector_subvs(const vectord_t& v, const double& scalar, vectord_t& res)
 
 
 @cython.boundscheck(False)
+@cython.wraparound(False)
 cdef void vector_subsv(const double& scalar, const vectord_t& v, vectord_t& res) except *:
     cdef int n_values = v.size()
     if n_values == 0:
@@ -124,6 +132,7 @@ cdef void vector_subsv(const double& scalar, const vectord_t& v, vectord_t& res)
 
 
 @cython.boundscheck(False)
+@cython.wraparound(False)
 cdef void vector_scalar_div(const vectord_t& v, const double& scalar, vectord_t& res) except *:
     cdef int n_values = v.size()
     if n_values == 0:
@@ -138,6 +147,7 @@ cdef void vector_scalar_div(const vectord_t& v, const double& scalar, vectord_t&
 
 
 @cython.boundscheck(False)
+@cython.wraparound(False)
 cdef double vector_max(const vectord_t& v) except *:
     cdef int n_values = v.size()
     if n_values == 0:
@@ -154,6 +164,7 @@ cdef double vector_max(const vectord_t& v) except *:
 
 
 @cython.boundscheck(False)
+@cython.wraparound(False)
 cdef double vector_min(const vectord_t& v) except *:
     cdef int n_values = v.size()
     if n_values == 0:
@@ -170,6 +181,7 @@ cdef double vector_min(const vectord_t& v) except *:
 
 
 @cython.boundscheck(False)
+@cython.wraparound(False)
 cdef void vector_clip(vectord_t& v, const double& min_value, const double& max_value) except *:
     cdef int n_values = v.size()
     if n_values == 0:
@@ -184,6 +196,7 @@ cdef void vector_clip(vectord_t& v, const double& min_value, const double& max_v
 
 
 @cython.boundscheck(False)
+@cython.wraparound(False)
 cdef void vector_copy(const vectord_t& src, vectord_t& dst) except *:
     cdef int n_values = src.size()
     if n_values == 0:
@@ -194,13 +207,19 @@ cdef void vector_copy(const vectord_t& src, vectord_t& dst) except *:
         dst[i] = src[i]
 
 
+cdef void vector_resize(vectord_t& v, unsigned int new_size):
+    if new_size <= 0:
+        raise ValueError("New vector size must be a positive integer.")
+    v.resize(new_size)
+
+
 cdef class Vector:
     """
     The Vector class. Provides an implementation of a vector for
     maintaining multiple values.
     """
 
-    def __init__(self, values: list | tuple):
+    def __init__(self, values: list | tuple = (0.0,)):
         if not isinstance(values, (list, tuple)):
             raise TypeError(f"Unhandled type: {type(values)}.")
         if len(values) == 0:
@@ -208,7 +227,14 @@ cdef class Vector:
         if not all(isinstance(v, (float, int)) for v in values):
             raise ValueError("All values must be type float or int.")
 
-        self._vals = list_to_vectord(values)
+        cdef int i
+        cdef int n_values = len(values)
+        self._vals = vectord_t(n_values)
+        if n_values == 1:
+            self._vals[0] = values[0]
+        else:
+            for i in range(n_values):
+                self._vals[i] = values[i]
         self._length = self._vals.size()
 
     cdef bint _is_in_range(Vector self, int index):
@@ -267,6 +293,18 @@ cdef class Vector:
     def null(n_zeros: int) -> Vector:
         return Vector.fill(0.0, n_zeros)
 
+    cdef void resize(Vector self, unsigned int new_size):
+        vector_resize(self._vals, new_size)
+        self._length = self._vals.size()
+
+    cdef void zeros(Vector self):
+        cdef int i
+        if self._length == 1:
+            self._vals[0] = 0.
+        else:
+            for i in range(self._length):
+                self._vals[i] = 0.
+
     def __getitem__(self, index: int) -> float:
         index = int(index)
         if not self._is_in_range(index):

From a6610eb63cc9a8a01b39480c22d78eaf5efccb57 Mon Sep 17 00:00:00 2001
From: Troi Williams <40696868+troiwill@users.noreply.github.com>
Date: Thu, 18 Apr 2024 13:19:12 -0400
Subject: [PATCH 06/30] Removed complex way of handling null responses.

---
 pomdp_py/framework/generalization.pxd |  2 ++
 pomdp_py/framework/generalization.pyx | 18 ++++++------------
 2 files changed, 8 insertions(+), 12 deletions(-)

diff --git a/pomdp_py/framework/generalization.pxd b/pomdp_py/framework/generalization.pxd
index d81bef4..cfba56c 100644
--- a/pomdp_py/framework/generalization.pxd
+++ b/pomdp_py/framework/generalization.pxd
@@ -1,3 +1,5 @@
+# cython: language_level=3
+
 from __future__ import annotations
 from pomdp_py.framework.basics cimport (
     Agent,
diff --git a/pomdp_py/framework/generalization.pyx b/pomdp_py/framework/generalization.pyx
index 705bff8..223482b 100644
--- a/pomdp_py/framework/generalization.pyx
+++ b/pomdp_py/framework/generalization.pyx
@@ -1,4 +1,4 @@
-# cython: profile=True
+# cython: language_level=3
 
 from __future__ import annotations
 from pomdp_py.framework.basics cimport (
@@ -74,18 +74,12 @@ cdef class ResponseModel:
     the real or a simulated environment. The implementation of this model contains a 
     collection of more specific models such as reward and cost models.
     """
-
-    def __init__(self, null_response: Response) -> None:
-        if not isinstance(null_response, Response):
-            raise TypeError(
-                "null_response must be type Response, "
-                f"but got {type(null_response)}."
-            )
-        self._null_response = null_response.copy()
+    def __init__(self):
+        pass
 
     def null_response(self) -> Response:
-        return self._null_response.copy()
-        
+        raise NotImplementedError
+
     def sample(self, state: State, action: Action, next_state: State) -> Response:
         raise NotImplementedError
 
@@ -265,7 +259,7 @@ cpdef tuple[State, Observation, Response, int] sample_generative_model_with_resp
 ):
     cdef State next_state
     cdef Observation observation
-    cdef Response response = null_response.copy()
+    cdef Response response = null_response
     cdef Option option
     cdef int nsteps = 0
 

From 6dbcbf73f9918a94a004df717790fe0053b4ff78 Mon Sep 17 00:00:00 2001
From: Troi Williams <40696868+troiwill@users.noreply.github.com>
Date: Thu, 18 Apr 2024 13:22:33 -0400
Subject: [PATCH 07/30] Implemented NumPy vectors and reduced Python
 references.

---
 pomdp_py/algorithms/ccpomcp.pxd |  13 +-
 pomdp_py/algorithms/ccpomcp.pyx | 242 +++++++++++++-------------
 pomdp_py/utils/cvec.pxd         |  56 ++++---
 pomdp_py/utils/cvec.pyx         | 289 +++++++++++++-------------------
 4 files changed, 282 insertions(+), 318 deletions(-)

diff --git a/pomdp_py/algorithms/ccpomcp.pxd b/pomdp_py/algorithms/ccpomcp.pxd
index e945cb4..c2f657c 100644
--- a/pomdp_py/algorithms/ccpomcp.pxd
+++ b/pomdp_py/algorithms/ccpomcp.pxd
@@ -1,3 +1,5 @@
+# cython: language_level=3
+
 from pomdp_py.algorithms.po_uct cimport QNode
 from pomdp_py.algorithms.pomcp cimport POMCP, VNode
 from pomdp_py.framework.basics cimport PolicyModel, Action, Agent, State, Observation
@@ -22,7 +24,11 @@ cdef class _CCPolicyActionData:
 
 cdef class _CCPolicyModel(PolicyModel):
     cdef dict[Action, _CCPolicyActionData] _data
+    cdef double _prob_sum
 
+    cdef bint _total_prob_is_not_one(_CCPolicyModel self)
+    cpdef void add(_CCPolicyModel self, Action action, double prob, CCQNode node)
+    cpdef void clear(_CCPolicyModel self)
     cpdef Vector action_avg_cost(_CCPolicyModel self, Action action)
     cpdef Vector action_cost_value(_CCPolicyModel self, Action action)
     cdef public float probability(_CCPolicyModel self, Action action, State state)
@@ -43,17 +49,18 @@ cdef class CCPOMCP(POMCP):
     cdef unsigned int _n_constraints
     # Buffers
     cdef Vector _Q_lambda, _Action_UCB
+    cdef _CCPolicyModel _greedy_policy_model
 
     cpdef public Action plan(CCPOMCP self, Agent agent)
     cpdef _expand_vnode(self, VNode vnode, tuple history, State state = *)
-    cpdef _CCPolicyModel _greedy_policy(CCPOMCP self, VNode vnode, double explore_const, double nu)
+    cpdef void _greedy_policy(CCPOMCP self, VNode vnode, double explore_const, double nu)
     cdef void _init_lambda_fn(CCPOMCP self)
     cpdef tuple[State, Observation, Response] _sample_generative_model(CCPOMCP self, State state, Action action)
     cpdef _search(CCPOMCP self)
     cpdef Response _simulate(CCPOMCP self, State state, tuple history, VNode root, QNode parent,
                              Observation observation, int depth)
-    cdef void _update_cost_constraint(CCPOMCP self, _CCPolicyModel policy_dist, Action sampled_action)
+    cdef void _update_cost_constraint(CCPOMCP self, Action sampled_action)
 
 
-cdef double _compute_visits_ratio(double visits_num, double visits_denom, double explore_const = *)
+cdef double _compute_visits_ratio(double visits_num, double visits_denom, double explore_const)
 cdef double _get_ccqnode_scalar_cost(VNode node, Action action)
diff --git a/pomdp_py/algorithms/ccpomcp.pyx b/pomdp_py/algorithms/ccpomcp.pyx
index b193968..189a443 100644
--- a/pomdp_py/algorithms/ccpomcp.pyx
+++ b/pomdp_py/algorithms/ccpomcp.pyx
@@ -1,8 +1,9 @@
-# cython: profile=True
+# cython: language_level=3
 
 from __future__ import annotations
 cimport cython
-from libc.math cimport log, sqrt, exp, abs
+from cython.parallel cimport prange
+from libc.math cimport log, sqrt, abs
 import math
 cimport numpy as cnp
 import numpy as np
@@ -14,7 +15,6 @@ from pomdp_py.framework.generalization cimport (
     ResponseAgent,
     sample_generative_model_with_response
 )
-from pomdp_py.representations.distribution.histogram cimport Histogram
 from pomdp_py.representations.distribution.particles cimport Particles
 from pomdp_py.utils import typ
 from pomdp_py.utils.cvec cimport Vector
@@ -155,37 +155,63 @@ cdef class _CCPolicyActionData:
 
 
 cdef class _CCPolicyModel(PolicyModel):
-    def __init__(self, dict[Action, _CCPolicyActionData] data_dict) -> None:
+    def __init__(self) -> None:
         super().__init__()
-        cdef Action action
-        cdef _CCPolicyActionData datum
-        cdef double prob_sum = 0.0
+        self._data = dict()
+        self.clear()
+
+    cdef bint _total_prob_is_not_one(_CCPolicyModel self):
+        return self._prob_sum != 1.0
 
-        for action, datum in data_dict.items():
-            if not isinstance(action, Action):
-                raise TypeError("action must be type Action.")
-            prob_sum += datum.prob
+    cpdef void add(_CCPolicyModel self, Action action, double prob, CCQNode node):
+        self._data[action] = _CCPolicyActionData(
+            prob=prob,
+            cost_value=node.cost_value,
+            avg_cost_value=node.avg_cost_value
+        )
+        self._prob_sum += prob
+        if self._prob_sum > 1.0:
+            raise RuntimeError(
+                "Too much actions were added. The probability sum is greater than one!"
+            )
 
-        if prob_sum != 1.0:
-            raise ValueError(f"The probabilities must sum to 1.0, but got {prob_sum}.")
-        self._data = data_dict.copy()
+    cpdef void clear(_CCPolicyModel self):
+        self._data.clear()
+        self._prob_sum = 0.0
 
     cpdef Vector action_avg_cost(_CCPolicyModel self, Action action):
+        if self._total_prob_is_not_one():
+            raise RuntimeError(
+                "Tried to get action avg cost when total probability != 1.0."
+            )
         if action not in self._data:
             raise KeyError(f"The action {action} is not exist in this policy model.")
         return self._data[action].cost_value
 
     cpdef Vector action_cost_value(_CCPolicyModel self, Action action):
+        if self._total_prob_is_not_one():
+            raise RuntimeError(
+                "Tried to get action cost value when total probability != 1.0."
+            )
         if action not in self._data:
             raise KeyError(f"The action {action} is not exist in this policy model.")
         return self._data[action].avg_cost_value
 
     cdef public float probability(_CCPolicyModel self, Action action, State state):
+        if self._total_prob_is_not_one():
+            raise RuntimeError(
+                "Tried to get action probability when total probability != 1.0."
+            )
         if action not in self._data:
             raise KeyError(f"The action {action} is not exist in this policy model.")
         return self._data[action].prob
 
     cdef public Action sample(_CCPolicyModel self, State state):
+        if self._prob_sum != 1.0:
+            raise RuntimeError("Tried to sample with a total probability != 1.0.")
+
+        if len(self._data) == 1:
+            return list(self._data.keys())[0]
         return np.random.choice(np.array(list(self._data.keys()), dtype=object))
 
     def get_all_actions(self, state: Optional[State] = None, history: Optional[tuple] = None):
@@ -297,6 +323,7 @@ cdef class CCPOMCP(POMCP):
         # Initialize buffers.
         self._Q_lambda = Vector()
         self._Action_UCB = Vector()
+        self._greedy_policy_model = _CCPolicyModel()
 
     cpdef public Action plan(CCPOMCP self, Agent agent):
         cdef Action action
@@ -329,9 +356,9 @@ cdef class CCPOMCP(POMCP):
 
         # Then get the policy distribution, sample from it,
         # and update the cost constraint.
-        policy_dist, time_taken, sims_count = self._search()
-        action = policy_dist.sample(state=None)
-        self._update_cost_constraint(policy_dist, action)
+        _, time_taken, sims_count = self._search()
+        action = self._greedy_policy_model.sample(state=None)
+        self._update_cost_constraint(action)
 
         # Update stats.
         self._last_num_sims = sims_count
@@ -363,7 +390,7 @@ cdef class CCPOMCP(POMCP):
 
     @cython.boundscheck(False)
     @cython.wraparound(False)
-    cpdef _CCPolicyModel _greedy_policy(
+    cpdef void _greedy_policy(
         CCPOMCP self,
         VNode vnode,
         double explore_const,
@@ -377,10 +404,9 @@ cdef class CCPOMCP(POMCP):
 
         # Compute Q_lambda.
         cdef double n_ccqnode_visits
-        cdef double best_q_lambda = DBL_MIN
-        cdef int best_q_index = 0
         cdef double logN = log(<double> vnode.num_visits + 1)
-        cdef double q_value
+        cdef double q_value = 0.
+        cdef double action_ucb = 0.
         cdef CCQNode ccqnode
         cdef Action action
         cdef int i = 0
@@ -388,11 +414,12 @@ cdef class CCPOMCP(POMCP):
         if n_actions == 0:
             raise RuntimeError("The number of actions is 0?")
 
-        self._Q_lambda.resize(n_actions)
-        self._Q_lambda.zeros()
-
-        self._Action_UCB.resize(n_actions)
-        self._Action_UCB.zeros()
+        if n_actions == self._Q_lambda.len():
+            self._Q_lambda.zeros()
+            self._Action_UCB.zeros()
+        else:
+            self._Q_lambda.resize(n_actions)
+            self._Action_UCB.resize(n_actions)
 
         for i in range(n_actions):
             ccqnode = vnode[action_list[i]]
@@ -405,63 +432,59 @@ cdef class CCPOMCP(POMCP):
                     n_ccqnode_visits,
                     explore_const
                 )
-                self._Action_UCB[i] = _compute_visits_ratio(
+                action_ucb = _compute_visits_ratio(
                     log(n_ccqnode_visits),
-                    n_ccqnode_visits
+                    n_ccqnode_visits,
+                    1.0
                 )
-
-            if q_value > best_q_lambda:
-                best_q_lambda = q_value
-                best_q_index = i
-
-            self._Q_lambda[i] = q_value
+                self._Action_UCB.set(i, action_ucb)
+            self._Q_lambda.set(i, q_value)
 
         # Compute a*, the best action(s).
         cdef list[Action] best_action_list = list()
-        cdef double best_ucb_add = self._Action_UCB[best_q_index]
+        cdef int best_q_index = self._Q_lambda.argmax()
+        cdef double best_ucb_add = self._Action_UCB.get(best_q_index)
+        cdef double best_q_lambda = self._Q_lambda.get(best_q_index)
         cdef double ucb_add, q_value_diff
         cdef bint add_to_best_action_list = False
 
         q_value = 0.0
-        best_q_lambda = self._Q_lambda[best_q_index]
 
         for i in range(n_actions):
-            action = action_list[i]
-            ccqnode = vnode[action]
-            q_value = self._Q_lambda[i]
+            q_value = self._Q_lambda.get(i)
 
             if q_value == best_q_lambda:
                 add_to_best_action_list = True
 
             else:
                 q_value_diff = abs(q_value - best_q_lambda)
-                ucb_add = nu * (self._Action_UCB[i] + best_ucb_add)
-                if q_value_diff <= ucb_add and action not in best_action_list:
+                ucb_add = nu * (self._Action_UCB.get(i) + best_ucb_add)
+                # The original statement also checks the condition:
+                #   "action not in best_action_list"
+                # But since actions in the list are unique, we do not need to perform it.
+                if q_value_diff <= ucb_add:
                     add_to_best_action_list = True
 
             if add_to_best_action_list:
-                best_action_list.append(action)
+                best_action_list.append(action_list[i])
 
         # Find the policy.
         cdef int n_best_actions = len(best_action_list)
-        cdef Action action_min, action_max
+        cdef int action_min_idx, action_max_idx
+        cdef Action action_max, action_min
         cdef CCQNode ccqnode_min, ccqnode_max
-        cdef double cost_constraint_scalar = self._cost_constraint[0]
+        cdef double cost_constraint_scalar = self._cost_constraint.get(0)
         cdef double max_cost_value, min_cost_value, min_prob, cost_value
         cdef dict[Action, _CCPolicyActionData] data
 
+        self._greedy_policy_model.clear()
+
         if n_best_actions == 0:
             raise RuntimeError("No best actions were found?!")
 
         elif n_best_actions == 1:
             action = best_action_list[0]
-            data = {
-                action: _CCPolicyActionData(
-                    1.0,
-                    vnode[action].cost_value,
-                    vnode[action].avg_cost_value
-                )
-            }
+            self._greedy_policy_model.add(action, 1.0, vnode[action])
 
         else:
             # TODO: Implement linear programming to handle multiple constraints.
@@ -484,15 +507,14 @@ cdef class CCPOMCP(POMCP):
             min_cost_value = DBL_MAX
 
             for i in range(n_best_actions):
-                action = best_action_list[i]
-                cost_value = _get_ccqnode_scalar_cost(vnode, action)
+                cost_value = _get_ccqnode_scalar_cost(vnode, best_action_list[i])
 
                 if cost_value < min_cost_value:
-                    action_min = action
+                    action_min_idx = i
                     min_cost_value = cost_value
 
                 if cost_value > max_cost_value:
-                    action_max = action
+                    action_max_idx = i
                     max_cost_value = cost_value
 
             # Sanity checks.
@@ -507,23 +529,16 @@ cdef class CCPOMCP(POMCP):
                     f"Note: there are {n_best_actions} best actions. An error exists!"
                 )
 
-            if max_cost_value <= cost_constraint_scalar or action_min == action_max:
-                data = {
-                    action_max: _CCPolicyActionData(
-                        1.0,
-                        vnode[action_max].cost_value,
-                        vnode[action_max].avg_cost_value
-                    )
-                }
+            if (
+                max_cost_value <= cost_constraint_scalar
+                or action_min_idx == action_max_idx
+            ):
+                action = best_action_list[action_max_idx]
+                self._greedy_policy_model.add(action, 1.0, vnode[action])
 
             elif min_cost_value <= cost_constraint_scalar:
-                data = {
-                    action_min: _CCPolicyActionData(
-                        1.0,
-                        vnode[action_min].cost_value,
-                        vnode[action_min].avg_cost_value
-                    )
-                }
+                action = best_action_list[action_min_idx]
+                self._greedy_policy_model.add(action, 1.0, vnode[action])
 
             else:
                 min_prob = (
@@ -531,22 +546,12 @@ cdef class CCPOMCP(POMCP):
                     / (max_cost_value - min_cost_value)
                 )
 
-                data = {
-                    action_min: _CCPolicyActionData(
-                        min_prob,
-                        vnode[action_min].cost_value,
-                        vnode[action_min].avg_cost_value
-                    ),
-                    action_max: _CCPolicyActionData(
-                        1.0 - min_prob,
-                        vnode[action_max].cost_value,
-                        vnode[action_max].avg_cost_value
-                    ),
-                }
-
-        return _CCPolicyModel(data)
-
-    cdef void _init_lambda_fn(CCPOMCP self):
+                action_min = best_action_list[action_min_idx]
+                action_max = best_action_list[action_max_idx]
+                self._greedy_policy_model.add(action_min, min_prob, vnode[action_min])
+                self._greedy_policy_model.add(action_max, 1.-min_prob, vnode[action_max])
+
+    cdef void _init_lambda_fn(CCPOMCP self) except *:
         if self._use_random_lambda:
             self._lambda = Vector(
                 np.random.uniform(
@@ -564,12 +569,8 @@ cdef class CCPOMCP(POMCP):
 
         # Sample using the greedy policy. This greedy policy corresponds to the first
         # call in the search(h_0) function.
-        policy_dist = self._greedy_policy(
-            self._agent.tree,
-            0.0,
-            0.0,
-        )
-        action = policy_dist.sample(state=state)
+        self._greedy_policy(self._agent.tree, 0.0, 0.0)
+        action = self._greedy_policy_model.sample(state=state)
 
         # Update lambda.
         self._lambda = self._lambda + self._alpha_n * (
@@ -579,7 +580,7 @@ cdef class CCPOMCP(POMCP):
             lambda_vec_max = self._r_diff / (
                     self._tau * (1.0 - self._discount_factor)
             )
-            self._lambda = Vector.clip(self._lambda, 0.0, lambda_vec_max)
+            self._lambda.clip(0.0, lambda_vec_max)
 
     cpdef _rollout(self, State state, tuple history, VNode root, int depth):
         cdef Action action
@@ -615,7 +616,7 @@ cdef class CCPOMCP(POMCP):
         cdef Action action
         cdef double time_taken
         cdef int sims_count
-        cdef PolicyModel policy_dist
+        # cdef PolicyModel policy_dist
 
         # Initialize the lambda vector.
         self._init_lambda_fn()
@@ -625,12 +626,13 @@ cdef class CCPOMCP(POMCP):
 
         # After the search times out, create a policy using the greedy method.
         # This greedy policy corresponds to the last call in the search(h_0) function.
-        policy_dist = self._greedy_policy(
-            self._agent.tree,
-            0.0,
-            self._nu,
-        )
-        return policy_dist, time_taken, sims_count
+        # policy_dist = self._greedy_policy(
+        #     self._agent.tree,
+        #     0.0,
+        #     self._nu,
+        # )
+        self._greedy_policy(self._agent.tree, 0.0, self._nu)
+        return None, time_taken, sims_count
 
     cpdef Response _simulate(
         CCPOMCP self,
@@ -669,12 +671,8 @@ cdef class CCPOMCP(POMCP):
 
         # This greedy policy corresponds to the call in the simulate(s, h, d) function
         # in the paper.
-        policy_dist = self._greedy_policy(
-            root,
-            self._exploration_const,
-            self._nu
-        )
-        action = policy_dist.sample(state=state)
+        self._greedy_policy(root, self._exploration_const, self._nu)
+        action = self._greedy_policy_model.sample(state)
         next_state, observation, response, nsteps = (
             sample_generative_model_with_response(
                 self._agent.transition_model,
@@ -728,45 +726,47 @@ cdef class CCPOMCP(POMCP):
     @cython.wraparound(False)
     cdef void _update_cost_constraint(
         CCPOMCP self,
-        _CCPolicyModel policy_dist,
         Action sampled_action
-    ):
+    ) except *:
         cdef double action_prob, prob_prime
-        cdef Vector action_avg_cost, cost_value
         cdef Action action_prime
         cdef list[Action] action_prime_list
         cdef int i = 0
         cdef int n_actions
 
-        action_prob = policy_dist.probability(
+        action_prob = self._greedy_policy_model.probability(
             action=sampled_action,
             state=None
         )
-        action_avg_cost = policy_dist.action_avg_cost(sampled_action)
-        self._cost_constraint -= (action_prob * action_avg_cost)
+        self._cost_constraint -= (
+            action_prob
+            * self._greedy_policy_model.action_avg_cost(sampled_action)
+        )
 
         if action_prob < 1.0:
-            action_prime_list = policy_dist.get_all_actions()
+            action_prime_list = self._greedy_policy_model.get_all_actions()
             n_actions = len(action_prime_list)
             for i in range(n_actions):
                 action_prime = action_prime_list[i]
                 if action_prime == sampled_action:
                     continue
 
-                prob_prime = policy_dist.probability(
+                prob_prime = self._greedy_policy_model.probability(
                     action=action_prime,
                     state=self._agent.history
                 )
-                cost_value = policy_dist.action_cost_value(sampled_action)
-                self._cost_constraint -= (prob_prime * cost_value)
+                self._cost_constraint -= (
+                    prob_prime
+                    * self._greedy_policy_model.action_cost_value(sampled_action)
+                )
         self._cost_constraint /= (self._discount_factor * action_prob)
 
 
 cdef double _compute_visits_ratio(
-        double visits_num,
-        double visits_denom,
-        double explore_const = 1.0,
-):
+    double visits_num,
+    double visits_denom,
+    double explore_const,
+) except *:
     if visits_denom == 0.0:
         return DBL_MIN
     else:
@@ -776,7 +776,7 @@ cdef double _compute_visits_ratio(
 cdef double _get_ccqnode_scalar_cost(
     VNode node,
     Action action
-):
+) except *:
     if action not in node:
         raise KeyError(f"Action {action} does not exist in node.")
     return node[action].cost_value[0]
diff --git a/pomdp_py/utils/cvec.pxd b/pomdp_py/utils/cvec.pxd
index 9fececc..0a0a46d 100644
--- a/pomdp_py/utils/cvec.pxd
+++ b/pomdp_py/utils/cvec.pxd
@@ -1,40 +1,48 @@
-# cython: language_level=3
+# cython: language_level=3, boundscheck=False, wraparound=False
 
 from __future__ import annotations
-from libcpp.vector cimport vector
+cimport numpy as cnp
+cnp.import_array()
 
-ctypedef vector[double] vectord_t
+ctypedef cnp.ndarray Arrayf_t
 
 
-cdef vectord_t null_vector(unsigned int n_zeros) except *
-cpdef vectord_t list_to_vectord(list[float] values)
-cpdef list[float] vectord_to_list(vectord_t values)
+cdef Arrayf_t null_vector(unsigned int n_zeros)
+cpdef Arrayf_t list_to_vectord(list[float] values)
+cpdef list[float] vectord_to_list(Arrayf_t values)
+cdef bint vectors_are_not_same_size(double[:] v0, double[:] v1)
+cdef bint vector_size_is_zero(double[:] v)
 
-cdef double vector_dot_prod(const vectord_t& v0, const vectord_t& v1) except *
-cdef void vector_add(const vectord_t& v0, const vectord_t& v1, vectord_t& res) except *
-cdef void vector_adds(const vectord_t& v, const double& scalar, vectord_t& res) except *
-cdef void vector_muls(const vectord_t& v, const double& scalar, vectord_t& res) except *
-cdef void vector_sub(const vectord_t& v0, const vectord_t& v1, vectord_t& res) except *
-cdef void vector_subvs(const vectord_t& v, const double& scalar, vectord_t& res) except *
-cdef void vector_subsv(const double& scalar, const vectord_t& v, vectord_t& res) except *
-cdef void vector_scalar_div(const vectord_t& v, const double& scalar, vectord_t& res) except *
+cdef double vector_dot_prod(double[:] v0, double[:] v1) except *
+cdef void vector_add(double[:] v0, double[:] v1, double[:] res) except *
+cdef void vector_adds(double[:] v, double scalar, double[:] res) except *
+cdef void vector_muls(double[:] v, double scalar, double[:] res) except *
+cdef void vector_sub(double[:] v0, double[:] v1, double[:] res) except *
+cdef void vector_subvs(double[:] v, double scalar, double[:] res) except *
+cdef void vector_subsv(double scalar, double[:] v, double[:] res) except *
+cdef void vector_scalar_div(double[:] v, double scalar, double[:] res) except *
 
-cdef double vector_max(const vectord_t& v) except *
-cdef double vector_min(const vectord_t& v) except *
-cdef void vector_clip(vectord_t& v, const double& min_value, const double& max_value) except *
-cdef void vector_copy(const vectord_t& src, vectord_t& dst) except *
+cdef unsigned int vector_argmax(double[:] v) except *
+cdef unsigned int vector_argmin(double[:] v) except *
+cdef void vector_clip(double[:] v, double min_value, double max_value) except *
+cdef void vector_copy(double[:] src, double[:] dst) except *
 
 
 cdef class Vector:
-    cdef vectord_t _vals
-    cdef vectord_t _res_buff
+    cdef cnp.ndarray _vals
+    cdef cnp.ndarray _res_buff
     cdef int _length
 
-    cdef bint _is_in_range(Vector self, int index)
+    cdef bint _index_is_out_of_range(Vector self, unsigned int index)
+    cpdef void clip(Vector self, double min_value, double max_value)
     cpdef Vector copy(Vector self)
     cpdef double dot(Vector self, Vector other)
     cpdef int len(Vector self)
-    cdef double max(Vector self)
-    cdef double min(Vector self)
+    cpdef unsigned int argmax(Vector self)
+    cpdef unsigned int argmin(Vector self)
+    cpdef double max(Vector self)
+    cpdef double min(Vector self)
     cdef void resize(Vector self, unsigned int new_size)
-    cdef void zeros(Vector self)
+    cpdef void zeros(Vector self)
+    cpdef double get(Vector self, unsigned int index)
+    cpdef void set(Vector self, unsigned int index, double value)
diff --git a/pomdp_py/utils/cvec.pyx b/pomdp_py/utils/cvec.pyx
index 4f09a3a..12a2172 100644
--- a/pomdp_py/utils/cvec.pyx
+++ b/pomdp_py/utils/cvec.pyx
@@ -1,190 +1,151 @@
-# cython: language_level=3 profile=True
+# cython: language_level=3, boundscheck=False, wraparound=False
 
 from __future__ import annotations
-cimport cython
 from libc.math cimport fmin, fmax
-from typing import Iterator
+import numpy as np
+cimport numpy as cnp
+from typing import Iterator, Iterable
+cnp.import_array()
 
-cdef vectord_t null_vector(unsigned int n_zeros) except *:
-    cdef vectord_t vec
-    vec.assign(n_zeros, 0.0)
-    return vec
+ArrayDtype_t = np.float64
 
+cdef inline Arrayf_t null_vector(unsigned int n_zeros):
+    return np.zeros((n_zeros,), ArrayDtype_t)
 
-@cython.boundscheck(False)
-@cython.wraparound(False)
-cpdef vectord_t list_to_vectord(list[float] values):
-    cdef int length = len(values)
-    cdef unsigned int i = 0
-    cdef vectord_t rv = vectord_t(length)
 
-    if length > 0:
-        for i in range(length):
-            rv[i] = <double> values[i]
-    return rv
+cpdef inline Arrayf_t list_to_vectord(list[float] values):
+    return np.array(values, ArrayDtype_t)
 
 
-@cython.boundscheck(False)
-@cython.wraparound(False)
-cpdef list[float] vectord_to_list(vectord_t values):
-    cdef int length = len(values)
-    cdef unsigned int i = 0
-    cdef list[float] rv = list()
+cpdef inline list[float] vectord_to_list(Arrayf_t values):
+    return values.tolist()
+
+
+cdef inline bint vectors_are_not_same_size(double[:] v0, double[:] v1):
+    return v0.shape[0] != v1.shape[0]
 
-    if length > 0:
-        for i in range(length):
-            rv.append(float(values[i]))
-    return rv
 
+cdef inline bint vector_size_is_zero(double[:] v):
+    return v.shape[0] == 0
+
+
+cdef double vector_dot_prod(double[:] v0, double[:] v1) except *:
+    cdef unsigned int i = 0
+    cdef double _sum = 0.
 
-@cython.boundscheck(False)
-@cython.wraparound(False)
-cdef double vector_dot_prod(const vectord_t& v0, const vectord_t& v1) except *:
-    if v0.size() != v1.size():
+    if vectors_are_not_same_size(v0, v1):
         raise ValueError("Both vectors must have the same size.")
-    if v0.size() == 0:
+    if vector_size_is_zero(v0):
         raise ValueError("Vectors should contain at least one value.")
 
-    cdef unsigned int i = 0
-    cdef double res = 0.0
-    for i in range(v0.size()):
-        res += (v0[i] * v1[i])
-    return res
+    for i in range(v0.shape[0]):
+        _sum += (v0[i] * v1[i])
+    return _sum
 
 
-@cython.boundscheck(False)
-@cython.wraparound(False)
-cdef void vector_add(const vectord_t& v0, const vectord_t& v1, vectord_t& res) except *:
-    if v0.size() != v1.size():
+cdef void vector_add(double[:] v0, double[:] v1, double[:] res) except *:
+    if vectors_are_not_same_size(v0, v1):
         raise ValueError("Both vectors must have the same size.")
-    if v0.size() == 0:
+    if vector_size_is_zero(v0):
         raise ValueError("Vectors should contain at least one value.")
 
-    res = vectord_t(v0.size())
     cdef unsigned int i = 0
-    for i in range(v0.size()):
+    for i in range(v0.shape[0]):
         res[i] = v0[i] + v1[i]
 
 
-@cython.boundscheck(False)
-@cython.wraparound(False)
-cdef void vector_adds(const vectord_t& v, const double& scalar, vectord_t& res) except *:
-    if v.size() == 0:
+cdef void vector_adds(double[:] v, double scalar, double[:] res) except *:
+    if vector_size_is_zero(v):
         raise ValueError("Vector should contain at least one value.")
 
-    res = vectord_t(v.size())
     cdef unsigned int i = 0
-    for i in range(v.size()):
+    for i in range(v.shape[0]):
         res[i] = v[i] + scalar
 
 
-@cython.boundscheck(False)
-@cython.wraparound(False)
-cdef void vector_muls(const vectord_t& v, const double& scalar, vectord_t& res) except *:
-    cdef int n_values = v.size()
-    if n_values == 0:
+cdef void vector_muls(double[:] v, double scalar,double[:] res) except *:
+    if vectors_are_not_same_size(v, res):
+        raise ValueError("Vectors v and res must be the same size.")
+    if vector_size_is_zero(v):
         raise ValueError("Vector should contain at least one value.")
 
-    res = vectord_t(n_values)
     cdef unsigned int i = 0
-    for i in range(n_values):
+    for i in range(v.shape[0]):
         res[i] = v[i] * scalar
 
 
-@cython.boundscheck(False)
-@cython.wraparound(False)
-cdef void vector_sub(const vectord_t& v0, const vectord_t& v1, vectord_t& res) except *:
-    if v0.size() != v1.size():
+cdef void vector_sub(double[:] v0, double[:] v1, double[:] res) except *:
+    if vectors_are_not_same_size(v0, v1):
         raise ValueError("Both vectors must have the same size.")
-    if v0.size() == 0:
+    if vector_size_is_zero(v0):
         raise ValueError("Vectors should contain at least one value.")
 
-    res = vectord_t(v0.size())
     cdef unsigned int i = 0
-    for i in range(v0.size()):
+    for i in range(v0.shape[0]):
         res[i] = v0[i] - v1[i]
 
 
-@cython.boundscheck(False)
-@cython.wraparound(False)
-cdef void vector_subvs(const vectord_t& v, const double& scalar, vectord_t& res) except *:
-    cdef int n_values = v.size()
-    if n_values == 0:
+cdef void vector_subvs(double[:] v, double scalar, double[:] res) except *:
+    if vector_size_is_zero(v):
         raise ValueError("Vector should contain at least one value.")
 
-    res = vectord_t(n_values)
     cdef unsigned int i = 0
-    for i in range(n_values):
+    for i in range(v.shape[0]):
         res[i] = v[i] - scalar
 
 
-@cython.boundscheck(False)
-@cython.wraparound(False)
-cdef void vector_subsv(const double& scalar, const vectord_t& v, vectord_t& res) except *:
-    cdef int n_values = v.size()
-    if n_values == 0:
+cdef void vector_subsv(double scalar, double[:] v, double[:] res) except *:
+    if vector_size_is_zero(v):
         raise ValueError("Vector should contain at least one value.")
 
-    res = vectord_t(n_values)
     cdef unsigned int i = 0
-    for i in range(n_values):
+    for i in range(v.shape[0]):
         res[i] = scalar - v[i]
 
 
-@cython.boundscheck(False)
-@cython.wraparound(False)
-cdef void vector_scalar_div(const vectord_t& v, const double& scalar, vectord_t& res) except *:
-    cdef int n_values = v.size()
-    if n_values == 0:
+cdef void vector_scalar_div(double[:] v, double scalar, double[:] res) except *:
+    if vector_size_is_zero(v):
         raise ValueError("Vector should contain at least one value.")
     if scalar == 0.0:
         raise ZeroDivisionError("Scalar division by zero!")
 
-    res = vectord_t(n_values)
     cdef unsigned int i = 0
-    for i in range(n_values):
+    for i in range(v.shape[0]):
         res[i] = v[i] / scalar
 
-
-@cython.boundscheck(False)
-@cython.wraparound(False)
-cdef double vector_max(const vectord_t& v) except *:
-    cdef int n_values = v.size()
-    if n_values == 0:
+cdef unsigned int vector_argmax(double[:] v) except *:
+    cdef int n_values = v.shape[0]
+    if vector_size_is_zero(v):
         raise ValueError("Vector should contain at least one value.")
     if n_values == 1:
-        return v[0]
+        return 0
 
-    cdef double max_value = v[0]
+    cdef int max_idx = 0
     cdef int i = 0
     for i in range(1, n_values):
-        if v[i] > max_value:
-            max_value = v[i]
-    return max_value
+        if v[i] > v[max_idx]:
+            max_idx = i
+    return max_idx
 
 
-@cython.boundscheck(False)
-@cython.wraparound(False)
-cdef double vector_min(const vectord_t& v) except *:
-    cdef int n_values = v.size()
-    if n_values == 0:
+cdef unsigned int vector_argmin(double[:] v) except *:
+    cdef int n_values = v.shape[0]
+    if vector_size_is_zero(v):
         raise ValueError("Vector should contain at least one value.")
     if n_values == 1:
-        return v[0]
+        return 0
 
-    cdef double min_value = v[0]
+    cdef int min_idx = 0
     cdef int i = 0
     for i in range(1, n_values):
-        if v[i] < min_value:
-            min_value = v[i]
-    return min_value
+        if v[i] < v[min_idx]:
+            min_idx = i
+    return min_idx
 
 
-@cython.boundscheck(False)
-@cython.wraparound(False)
-cdef void vector_clip(vectord_t& v, const double& min_value, const double& max_value) except *:
-    cdef int n_values = v.size()
-    if n_values == 0:
+cdef void vector_clip(double[:] v, double min_value, double max_value) except *:
+    cdef int n_values = v.shape[0]
+    if vector_size_is_zero(v):
         raise ValueError("Vector should contain at least one value.")
     if min_value >= max_value:
         raise ValueError(
@@ -195,22 +156,18 @@ cdef void vector_clip(vectord_t& v, const double& min_value, const double& max_v
         v[i] = fmax(min_value, fmin(max_value, v[i]))
 
 
-@cython.boundscheck(False)
-@cython.wraparound(False)
-cdef void vector_copy(const vectord_t& src, vectord_t& dst) except *:
-    cdef int n_values = src.size()
-    if n_values == 0:
+cdef void vector_copy(double[:] src, double[:] dst) except *:
+    if vector_size_is_zero(src):
         raise ValueError("Vector should contain at least one value.")
-    dst = vectord_t(n_values)
     cdef int i = 0
-    for i in range(n_values):
+    for i in range(src.shape[0]):
         dst[i] = src[i]
 
 
-cdef void vector_resize(vectord_t& v, unsigned int new_size):
+cdef void vector_resize(Arrayf_t v, unsigned int new_size):
     if new_size <= 0:
         raise ValueError("New vector size must be a positive integer.")
-    v.resize(new_size)
+    v = np.zeros((new_size,), dtype=v.dtype)
 
 
 cdef class Vector:
@@ -219,26 +176,14 @@ cdef class Vector:
     maintaining multiple values.
     """
 
-    def __init__(self, values: list | tuple = (0.0,)):
-        if not isinstance(values, (list, tuple)):
-            raise TypeError(f"Unhandled type: {type(values)}.")
-        if len(values) == 0:
-            raise ValueError("The length of values must have at least one value.")
-        if not all(isinstance(v, (float, int)) for v in values):
-            raise ValueError("All values must be type float or int.")
+    def __init__(self, values: Iterable[float] = (0.0,)):
+        # Perform a lazy conversion of the input values.
+        self._vals = list_to_vectord(list(values)).flatten()
+        self._res_buff = null_vector(self._vals.shape[0])
+        self._length = self._vals.shape[0]
 
-        cdef int i
-        cdef int n_values = len(values)
-        self._vals = vectord_t(n_values)
-        if n_values == 1:
-            self._vals[0] = values[0]
-        else:
-            for i in range(n_values):
-                self._vals[i] = values[i]
-        self._length = self._vals.size()
-
-    cdef bint _is_in_range(Vector self, int index):
-        return 0 <= index < self._length
+    cdef bint _index_is_out_of_range(Vector self, unsigned int index):
+        return index < 0 or self._length <= index
 
     def as_list(self) -> list[float]:
         """
@@ -246,34 +191,25 @@ cdef class Vector:
         """
         return vectord_to_list(self._vals)
 
-    def as_vector(self) -> vectord_t:
-        cdef vectord_t copy
-        vector_copy(self._vals, copy)
-        return copy
+    def as_vector(self) -> np.ndarray:
+        return self._vals[:]
 
-    @staticmethod
-    def clip(vec: Vector, min_value: float, max_value: float) -> Vector:
+    cpdef void clip(Vector self, double min_value, double max_value):
         """
         Clips the values within the value using the given min and max values.
         """
-        if not isinstance(vec, Vector):
-            raise TypeError("vec must be a Vector.")
-        cdef vectord_t rv = vec.as_vector()
-        vector_clip(rv, min_value, max_value)
-        return Vector(vectord_to_list(rv))
+        vector_clip(self._vals, min_value, max_value)
 
     cpdef Vector copy(Vector self):
         """
         Returns a copy of this vector.
         """
-        return Vector(self.as_list())
+        return Vector(self._vals)
 
     cpdef double dot(Vector self, Vector other):
         """
         Performs the dot product between two Vectors.
         """
-        if not isinstance(other, Vector):
-            raise TypeError("other must be type Vector.")
         return vector_dot_prod(self._vals, other._vals)
 
     @staticmethod
@@ -283,21 +219,28 @@ cdef class Vector:
     cpdef int len(Vector self):
         return self._length
 
-    cdef double max(Vector self):
-        return vector_max(self._vals)
+    cpdef unsigned int argmax(Vector self):
+        return vector_argmax(self._vals)
 
-    cdef double min(Vector self):
-        return vector_min(self._vals)
+    cpdef unsigned int argmin(Vector self):
+        return vector_argmin(self._vals)
+
+    cpdef double max(Vector self):
+        return self._vals[self.argmax()]
+
+    cpdef double min(Vector self):
+        return self._vals[self.argmin()]
 
     @staticmethod
     def null(n_zeros: int) -> Vector:
         return Vector.fill(0.0, n_zeros)
 
     cdef void resize(Vector self, unsigned int new_size):
-        vector_resize(self._vals, new_size)
-        self._length = self._vals.size()
+        self._vals = null_vector(new_size)
+        self._res_buff = null_vector(new_size)
+        self._length = self._vals.shape[0]
 
-    cdef void zeros(Vector self):
+    cpdef void zeros(Vector self):
         cdef int i
         if self._length == 1:
             self._vals[0] = 0.
@@ -305,20 +248,26 @@ cdef class Vector:
             for i in range(self._length):
                 self._vals[i] = 0.
 
-    def __getitem__(self, index: int) -> float:
-        index = int(index)
-        if not self._is_in_range(index):
-            raise IndexError(f"index is out-of-range.")
+    cpdef double get(Vector self, unsigned int index):
+        if self._index_is_out_of_range(index):
+            raise IndexError(
+                f"index ({index}) is out-of-range for length {self._length}."
+            )
         return self._vals[index]
 
-    def __setitem__(self, index: int, value: float) -> None:
-        index = int(index)
-        if not self._is_in_range(index):
-            raise IndexError(f"index is out-of-range.")
-        if not isinstance(value, float):
-            raise TypeError(f"value must be type float, but got type {type(value)}.")
+    cpdef void set(Vector self, unsigned int index, double value):
+        if self._index_is_out_of_range(index):
+            raise IndexError(
+                f"index ({index}) is out-of-range for length {self._length}."
+            )
         self._vals[index] = <double> value
 
+    def __getitem__(self, index: int) -> float:
+        return self.get(index)
+
+    def __setitem__(self, index: int, value: float) -> None:
+        self.set(index, value)
+
     def __iter__(self) -> Iterator:
         return iter(self._vals)
 

From 62fa04b487e63f0f869b0bdce7eb5eaebbeb307c Mon Sep 17 00:00:00 2001
From: Troi Williams <40696868+troiwill@users.noreply.github.com>
Date: Thu, 18 Apr 2024 13:23:23 -0400
Subject: [PATCH 08/30] Updated and added tests.

---
 tests/test_util_vector_ops.py | 32 +++++++++++++++++++++++++++-----
 1 file changed, 27 insertions(+), 5 deletions(-)

diff --git a/tests/test_util_vector_ops.py b/tests/test_util_vector_ops.py
index 8cbdac5..5c66a5a 100644
--- a/tests/test_util_vector_ops.py
+++ b/tests/test_util_vector_ops.py
@@ -25,21 +25,25 @@ def test_as_list():
 
 def test_as_vector():
     v = Vector([1., 2., 3.])
-    assert v.as_vector() == [1., 2., 3.]
+    assert list(v.as_vector()) == [1., 2., 3.]
 
 
 def test_clip():
     v = Vector([2, 5, 7])
-    assert Vector.clip(v, 0, 10) == [2., 5., 7.]
+    v.clip(0, 10)
+    assert v == [2., 5., 7.]
 
     v = Vector([2, 5, 7])
-    assert Vector.clip(v, 0, 4) == [2., 4., 4.]
+    v.clip(0, 4)
+    assert v == [2., 4., 4.]
 
     v = Vector([2, 5, 7])
-    assert Vector.clip(v, 4, 10) == [4., 5., 7.]
+    v.clip(4, 10)
+    assert v == [4., 5., 7.]
 
     v = Vector([2, 5, 7])
-    assert Vector.clip(v, 3, 4) == [3., 4., 4.]
+    v.clip(3, 4)
+    assert v == [3., 4., 4.]
 
 
 def test_copy():
@@ -128,6 +132,22 @@ def test_str():
     assert str(v) == str([2., 4.])
 
 
+def test_min():
+    v = Vector([1., 2.])
+    assert v.min() == 1.
+
+    v = Vector([1., -90., 3.])
+    assert v.min() == -90.
+
+
+def test_max():
+    v = Vector([1., 2.])
+    assert v.max() == 2.
+
+    v = Vector([1., -90., 3.])
+    assert v.max() == 3.
+
+
 def run():
     test_assign()
     test_as_list()
@@ -145,6 +165,8 @@ def run():
     test_sub()
     test_truediv()
     test_str()
+    test_min()
+    test_max()
 
 
 if __name__ == "__main__":

From bd26db76f58649ec7af3dd139cd90f4e68a3f113 Mon Sep 17 00:00:00 2001
From: Troi Williams <40696868+troiwill@users.noreply.github.com>
Date: Thu, 18 Apr 2024 13:24:01 -0400
Subject: [PATCH 09/30] Added example problem for rocksample for CCPOMCP.

---
 pomdp_py/problems/cc_rocksample/__init__.py   |   0
 .../cc_rocksample/cc_rocksample_problem.py    | 235 ++++++++++++++++++
 2 files changed, 235 insertions(+)
 create mode 100644 pomdp_py/problems/cc_rocksample/__init__.py
 create mode 100644 pomdp_py/problems/cc_rocksample/cc_rocksample_problem.py

diff --git a/pomdp_py/problems/cc_rocksample/__init__.py b/pomdp_py/problems/cc_rocksample/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/pomdp_py/problems/cc_rocksample/cc_rocksample_problem.py b/pomdp_py/problems/cc_rocksample/cc_rocksample_problem.py
new file mode 100644
index 0000000..a859880
--- /dev/null
+++ b/pomdp_py/problems/cc_rocksample/cc_rocksample_problem.py
@@ -0,0 +1,235 @@
+from __future__ import annotations
+import copy
+import math
+import numpy as np
+import pomdp_py
+from pomdp_py.problems.rocksample.rocksample_problem import (
+    RockSampleProblem,
+    create_instance,
+    RSTransitionModel,
+    RSObservationModel,
+    RSPolicyModel,
+    CheckAction,
+    RSRewardModel,
+    init_particles_belief,
+    State,
+)
+
+
+class RSResponse(pomdp_py.Response):
+
+    def __init__(self, reward: int = 0, cost: int = 0) -> None:
+        super().__init__()
+        self.reward = int(reward)
+        self.cost = int(cost)
+
+    def __add__(self, other: RSResponse) -> RSResponse:
+        return RSResponse(self.reward + other.reward, self.cost + other.cost)
+
+    def __mul__(self, other: float | int) -> RSResponse:
+        return RSResponse(self.reward * other, self.cost * other)
+
+    def __str__(self) -> str:
+        return f"reward={self.reward}, cost={self.cost}"
+
+    # def copy(self) -> RSResponse:
+    #     return RSResponse(self.reward, self.cost)
+
+    @staticmethod
+    def null() -> RSResponse:
+        return RSResponse(reward=0, cost=0)
+
+
+class RSCostModel(pomdp_py.CostModel):
+
+    def sample(
+        self,
+        state: pomdp_py.State,
+        action: pomdp_py.Action,
+        next_state: pomdp_py.State,
+        reward: float = 0,
+        **kwargs,
+    ) -> int:
+        cost = 0
+        if reward < 0:
+            cost += 1
+        if isinstance(action, CheckAction):
+            cost += 1
+        return cost
+
+
+class RSResponseModel(pomdp_py.ResponseModel):
+    def __init__(
+        self,
+        reward_model: RSRewardModel,
+        cost_model: RSCostModel,
+    ):
+        super().__init__()
+        self._reward_model = reward_model
+        self._cost_model = cost_model
+
+    def null_response(self) -> RSResponse:
+        return RSResponse(reward=0, cost=0)
+
+    def sample(
+        self, state: pomdp_py.State, action: pomdp_py.Action, next_state: pomdp_py.State
+    ) -> RSResponse:
+        reward = self._reward_model.sample(
+            state=state, action=action, next_state=next_state
+        )
+        cost = self._cost_model.sample(
+            state=state, action=action, next_state=next_state, reward=reward
+        )
+        return RSResponse(reward, cost)
+
+
+class CCRockSampleProblem(RockSampleProblem):
+
+    def __init__(
+        self,
+        n_grid: int,
+        n_rocks: int,
+        init_state: State,
+        rock_locs: dict[tuple[int, int], int],
+        init_belief: pomdp_py.GenerativeDistribution,
+        half_efficiency_dist: int = 20,
+    ):
+        super().__init__(
+            n=n_grid,
+            k=n_rocks,
+            init_state=init_state,
+            rock_locs=rock_locs,
+            init_belief=init_belief,
+            half_efficiency_dist=half_efficiency_dist,
+        )
+
+    def build_agent(
+        self,
+        n: int,
+        k: int,
+        rock_locs: dict[tuple[int, int], int],
+        init_belief: pomdp_py.GenerativeDistribution,
+        half_efficiency_dist: int,
+    ) -> pomdp_py.ResponseAgent:
+        return pomdp_py.ResponseAgent(
+            init_belief=init_belief,
+            policy_model=RSPolicyModel(n, k),
+            transition_model=RSTransitionModel(n, rock_locs, self.in_exit_area),
+            observation_model=RSObservationModel(
+                rock_locs, half_efficiency_dist=half_efficiency_dist
+            ),
+            response_model=RSResponseModel(
+                reward_model=RSRewardModel(rock_locs, self.in_exit_area),
+                cost_model=RSCostModel(),
+
+            ),
+        )
+
+    def build_env(
+        self, n: int, init_state: State, rock_locs: dict[tuple[int, int], int]
+    ) -> pomdp_py.ResponseEnvironment:
+        return pomdp_py.ResponseEnvironment(
+            init_state=init_state,
+            transition_model=RSTransitionModel(n, rock_locs, self.in_exit_area),
+            response_model=RSResponseModel(
+                reward_model=RSRewardModel(rock_locs, self.in_exit_area),
+                cost_model=RSCostModel(),
+            ),
+        )
+
+
+def test_planner(
+    cc_rocksample: CCRockSampleProblem,
+    ccpomcp: pomdp_py.CCPOMCP,
+    nsteps: int = 3,
+    discount: float = 0.95,
+):
+    gamma: float = 1.0
+    total_response = RSResponse.null()
+    total_discounted_response = RSResponse.null()
+
+    for i in range(nsteps):
+        print("==== Step %d ====" % (i + 1))
+        action = ccpomcp.plan(cc_rocksample.agent)
+
+        true_state = copy.deepcopy(cc_rocksample.env.state)
+        env_response = cc_rocksample.env.state_transition(action, execute=True)
+
+        real_observation = cc_rocksample.env.provide_observation(
+            cc_rocksample.agent.observation_model, action
+        )
+        cc_rocksample.agent.update_history(action, real_observation)
+        ccpomcp.update(cc_rocksample.agent, action, real_observation)
+        total_response += env_response
+        total_discounted_response += (env_response * gamma)
+        gamma *= discount
+
+        print("True state: %s" % true_state)
+        print("Action: %s" % str(action))
+        print("Observation: %s" % str(real_observation))
+        print("Response: %s" % str(env_response))
+        print("Response (Cumulative): %s" % str(total_response))
+        print("Response (Cumulative Discounted): %s" % str(total_discounted_response))
+        print("__num_sims__: %d" % ccpomcp.last_num_sims)
+        print("__plan_time__: %.5f" % ccpomcp.last_planning_time)
+        print("World:")
+        cc_rocksample.print_state()
+
+        if cc_rocksample.in_exit_area(cc_rocksample.env.state.position):
+            break
+    return total_response, total_discounted_response
+
+
+def create_instance(n_grid: int, n_rocks: int) -> CCRockSampleProblem:
+    init_state, rock_locs = CCRockSampleProblem.generate_instance(n_grid, n_rocks)
+    belief = "uniform"
+    init_belief = init_particles_belief(n_rocks, 200, init_state, belief=belief)
+    return CCRockSampleProblem(
+        n_grid=n_grid,
+        n_rocks=n_rocks,
+        init_state=init_state,
+        rock_locs=rock_locs,
+        init_belief=init_belief,
+    )
+
+
+def main(n_grid: int = 7, n_rocks: int = 8) -> None:
+    cc_rocksample = create_instance(n_grid=n_grid, n_rocks=n_rocks)
+    cc_rocksample.print_state()
+
+    k_discount_factor = 0.95
+    k_max_depth = int(math.log(0.001) / math.log(k_discount_factor))
+    k_max_reward = 10
+    k_min_reward = -10
+
+    print("*** Testing CC-POMCP ***")
+    ccpomcp = pomdp_py.CCPOMCP(
+        r_diff=float(k_max_reward - k_min_reward),
+        alpha_n=1.0 / len(cc_rocksample.agent.cur_belief),
+        nu=1.0,
+        tau=1.0,
+        cost_constraint=1.0,
+        max_depth=k_max_depth,
+        discount_factor=k_discount_factor,
+        num_sims=10000,
+        exploration_const=20,
+        rollout_policy=cc_rocksample.agent.policy_model,
+        num_visits_init=1,
+    )
+    total_response, total_discounted_response = test_planner(
+        cc_rocksample=cc_rocksample,
+        ccpomcp=ccpomcp,
+        nsteps=10,
+        discount=k_discount_factor,
+    )
+
+
+if __name__ == "__main__":
+    import argparse
+
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--ngrid", type=int, default=7)
+    parser.add_argument("--nrocks", type=int, default=8)
+    args = parser.parse_args()
+
+    main(n_grid=args.ngrid, n_rocks=args.nrocks)

From 2e68bfb57acc4b5625c96c7f1b3a506cf3e0d8b1 Mon Sep 17 00:00:00 2001
From: Troi Williams <40696868+troiwill@users.noreply.github.com>
Date: Thu, 18 Apr 2024 14:42:43 -0400
Subject: [PATCH 10/30] Added profiling for cython.

---
 pomdp_py/algorithms/ccpomcp.pxd       | 2 +-
 pomdp_py/algorithms/ccpomcp.pyx       | 2 --
 pomdp_py/framework/generalization.pxd | 2 +-
 pomdp_py/utils/cvec.pxd               | 2 +-
 4 files changed, 3 insertions(+), 5 deletions(-)

diff --git a/pomdp_py/algorithms/ccpomcp.pxd b/pomdp_py/algorithms/ccpomcp.pxd
index c2f657c..b37d027 100644
--- a/pomdp_py/algorithms/ccpomcp.pxd
+++ b/pomdp_py/algorithms/ccpomcp.pxd
@@ -1,4 +1,4 @@
-# cython: language_level=3
+# cython: language_level=3, profile=True
 
 from pomdp_py.algorithms.po_uct cimport QNode
 from pomdp_py.algorithms.pomcp cimport POMCP, VNode
diff --git a/pomdp_py/algorithms/ccpomcp.pyx b/pomdp_py/algorithms/ccpomcp.pyx
index 189a443..5e9ddf1 100644
--- a/pomdp_py/algorithms/ccpomcp.pyx
+++ b/pomdp_py/algorithms/ccpomcp.pyx
@@ -2,7 +2,6 @@
 
 from __future__ import annotations
 cimport cython
-from cython.parallel cimport prange
 from libc.math cimport log, sqrt, abs
 import math
 cimport numpy as cnp
@@ -327,7 +326,6 @@ cdef class CCPOMCP(POMCP):
 
     cpdef public Action plan(CCPOMCP self, Agent agent):
         cdef Action action
-        cdef _CCPolicyModel policy_dist
         cdef double time_taken
         cdef int sims_count
 
diff --git a/pomdp_py/framework/generalization.pxd b/pomdp_py/framework/generalization.pxd
index cfba56c..99f254b 100644
--- a/pomdp_py/framework/generalization.pxd
+++ b/pomdp_py/framework/generalization.pxd
@@ -1,4 +1,4 @@
-# cython: language_level=3
+# cython: language_level=3, profile=True
 
 from __future__ import annotations
 from pomdp_py.framework.basics cimport (
diff --git a/pomdp_py/utils/cvec.pxd b/pomdp_py/utils/cvec.pxd
index 0a0a46d..43e4dfe 100644
--- a/pomdp_py/utils/cvec.pxd
+++ b/pomdp_py/utils/cvec.pxd
@@ -1,4 +1,4 @@
-# cython: language_level=3, boundscheck=False, wraparound=False
+# cython: language_level=3, boundscheck=False, wraparound=False, profile=True
 
 from __future__ import annotations
 cimport numpy as cnp

From 555bb686a0d479d9c8d2f6576d670348e06a531f Mon Sep 17 00:00:00 2001
From: Troi Williams <40696868+troiwill@users.noreply.github.com>
Date: Thu, 18 Apr 2024 14:43:14 -0400
Subject: [PATCH 11/30] Limited nsteps for profiling.

---
 pomdp_py/problems/cc_rocksample/cc_rocksample_problem.py | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/pomdp_py/problems/cc_rocksample/cc_rocksample_problem.py b/pomdp_py/problems/cc_rocksample/cc_rocksample_problem.py
index a859880..0492831 100644
--- a/pomdp_py/problems/cc_rocksample/cc_rocksample_problem.py
+++ b/pomdp_py/problems/cc_rocksample/cc_rocksample_problem.py
@@ -32,9 +32,6 @@ def __mul__(self, other: float | int) -> RSResponse:
     def __str__(self) -> str:
         return f"reward={self.reward}, cost={self.cost}"
 
-    # def copy(self) -> RSResponse:
-    #     return RSResponse(self.reward, self.cost)
-
     @staticmethod
     def null() -> RSResponse:
         return RSResponse(reward=0, cost=0)
@@ -219,7 +216,7 @@ def main(n_grid: int = 7, n_rocks: int = 8) -> None:
     total_response, total_discounted_response = test_planner(
         cc_rocksample=cc_rocksample,
         ccpomcp=ccpomcp,
-        nsteps=10,
+        nsteps=1,
         discount=k_discount_factor,
     )
 

From c759d42a2f3f41d070742ec1f3f73eb2a1acaf82 Mon Sep 17 00:00:00 2001
From: Troi Williams <40696868+troiwill@users.noreply.github.com>
Date: Thu, 18 Apr 2024 14:43:32 -0400
Subject: [PATCH 12/30] Limited nsteps for profiling.

---
 pomdp_py/problems/rocksample/rocksample_problem.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/pomdp_py/problems/rocksample/rocksample_problem.py b/pomdp_py/problems/rocksample/rocksample_problem.py
index f1b7671..c920602 100644
--- a/pomdp_py/problems/rocksample/rocksample_problem.py
+++ b/pomdp_py/problems/rocksample/rocksample_problem.py
@@ -556,7 +556,8 @@ def main():
         rollout_policy=rocksample.agent.policy_model,
         num_visits_init=1,
     )
-    tt, ttd = test_planner(rocksample, pomcp, nsteps=100, discount=0.95)
+    # tt, ttd = test_planner(rocksample, pomcp, nsteps=100, discount=0.95)
+    tt, ttd = test_planner(rocksample, pomcp, nsteps=1, discount=0.95)
 
 
 if __name__ == "__main__":

From e89975403ff0cc67bc011ef52b77232b3f090dd7 Mon Sep 17 00:00:00 2001
From: Troi Williams <40696868+troiwill@users.noreply.github.com>
Date: Thu, 18 Apr 2024 14:43:59 -0400
Subject: [PATCH 13/30] Added code for profiling.

---
 pomdp_py/problems/cc_rocksample/profile_code.py | 11 +++++++++++
 pomdp_py/problems/rocksample/profile_code.py    | 11 +++++++++++
 2 files changed, 22 insertions(+)
 create mode 100644 pomdp_py/problems/cc_rocksample/profile_code.py
 create mode 100644 pomdp_py/problems/rocksample/profile_code.py

diff --git a/pomdp_py/problems/cc_rocksample/profile_code.py b/pomdp_py/problems/cc_rocksample/profile_code.py
new file mode 100644
index 0000000..bd9dfb4
--- /dev/null
+++ b/pomdp_py/problems/cc_rocksample/profile_code.py
@@ -0,0 +1,11 @@
+import pstats, cProfile
+
+import pyximport
+pyximport.install()
+
+import cc_rocksample_problem
+
+cProfile.runctx("cc_rocksample_problem.main()", globals(), locals(), "fastProfile.prof")
+
+s = pstats.Stats("fastProfile.prof")
+s.strip_dirs().sort_stats("tottime").print_stats()
diff --git a/pomdp_py/problems/rocksample/profile_code.py b/pomdp_py/problems/rocksample/profile_code.py
new file mode 100644
index 0000000..fc612f0
--- /dev/null
+++ b/pomdp_py/problems/rocksample/profile_code.py
@@ -0,0 +1,11 @@
+import pstats, cProfile
+
+import pyximport
+pyximport.install()
+
+import rocksample_problem
+
+cProfile.runctx("rocksample_problem.main()", globals(), locals(), "fastProfile.prof")
+
+s = pstats.Stats("fastProfile.prof")
+s.strip_dirs().sort_stats("tottime").print_stats()

From 5ddaaaec45cf937cfc14a1d27613e80af2bb6677 Mon Sep 17 00:00:00 2001
From: Troi Williams <40696868+troiwill@users.noreply.github.com>
Date: Thu, 18 Apr 2024 15:08:43 -0400
Subject: [PATCH 14/30] Removed except * from c functions.

---
 pomdp_py/utils/cvec.pyx | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/pomdp_py/utils/cvec.pyx b/pomdp_py/utils/cvec.pyx
index 12a2172..fefd02e 100644
--- a/pomdp_py/utils/cvec.pyx
+++ b/pomdp_py/utils/cvec.pyx
@@ -29,7 +29,7 @@ cdef inline bint vector_size_is_zero(double[:] v):
     return v.shape[0] == 0
 
 
-cdef double vector_dot_prod(double[:] v0, double[:] v1) except *:
+cdef double vector_dot_prod(double[:] v0, double[:] v1):
     cdef unsigned int i = 0
     cdef double _sum = 0.
 
@@ -43,7 +43,7 @@ cdef double vector_dot_prod(double[:] v0, double[:] v1) except *:
     return _sum
 
 
-cdef void vector_add(double[:] v0, double[:] v1, double[:] res) except *:
+cdef void vector_add(double[:] v0, double[:] v1, double[:] res):
     if vectors_are_not_same_size(v0, v1):
         raise ValueError("Both vectors must have the same size.")
     if vector_size_is_zero(v0):
@@ -54,7 +54,7 @@ cdef void vector_add(double[:] v0, double[:] v1, double[:] res) except *:
         res[i] = v0[i] + v1[i]
 
 
-cdef void vector_adds(double[:] v, double scalar, double[:] res) except *:
+cdef void vector_adds(double[:] v, double scalar, double[:] res):
     if vector_size_is_zero(v):
         raise ValueError("Vector should contain at least one value.")
 
@@ -63,7 +63,7 @@ cdef void vector_adds(double[:] v, double scalar, double[:] res) except *:
         res[i] = v[i] + scalar
 
 
-cdef void vector_muls(double[:] v, double scalar,double[:] res) except *:
+cdef void vector_muls(double[:] v, double scalar,double[:] res):
     if vectors_are_not_same_size(v, res):
         raise ValueError("Vectors v and res must be the same size.")
     if vector_size_is_zero(v):
@@ -74,7 +74,7 @@ cdef void vector_muls(double[:] v, double scalar,double[:] res) except *:
         res[i] = v[i] * scalar
 
 
-cdef void vector_sub(double[:] v0, double[:] v1, double[:] res) except *:
+cdef void vector_sub(double[:] v0, double[:] v1, double[:] res):
     if vectors_are_not_same_size(v0, v1):
         raise ValueError("Both vectors must have the same size.")
     if vector_size_is_zero(v0):
@@ -85,7 +85,7 @@ cdef void vector_sub(double[:] v0, double[:] v1, double[:] res) except *:
         res[i] = v0[i] - v1[i]
 
 
-cdef void vector_subvs(double[:] v, double scalar, double[:] res) except *:
+cdef void vector_subvs(double[:] v, double scalar, double[:] res):
     if vector_size_is_zero(v):
         raise ValueError("Vector should contain at least one value.")
 
@@ -94,7 +94,7 @@ cdef void vector_subvs(double[:] v, double scalar, double[:] res) except *:
         res[i] = v[i] - scalar
 
 
-cdef void vector_subsv(double scalar, double[:] v, double[:] res) except *:
+cdef void vector_subsv(double scalar, double[:] v, double[:] res):
     if vector_size_is_zero(v):
         raise ValueError("Vector should contain at least one value.")
 
@@ -103,7 +103,7 @@ cdef void vector_subsv(double scalar, double[:] v, double[:] res) except *:
         res[i] = scalar - v[i]
 
 
-cdef void vector_scalar_div(double[:] v, double scalar, double[:] res) except *:
+cdef void vector_scalar_div(double[:] v, double scalar, double[:] res):
     if vector_size_is_zero(v):
         raise ValueError("Vector should contain at least one value.")
     if scalar == 0.0:
@@ -113,7 +113,7 @@ cdef void vector_scalar_div(double[:] v, double scalar, double[:] res) except *:
     for i in range(v.shape[0]):
         res[i] = v[i] / scalar
 
-cdef unsigned int vector_argmax(double[:] v) except *:
+cdef unsigned int vector_argmax(double[:] v):
     cdef int n_values = v.shape[0]
     if vector_size_is_zero(v):
         raise ValueError("Vector should contain at least one value.")
@@ -128,7 +128,7 @@ cdef unsigned int vector_argmax(double[:] v) except *:
     return max_idx
 
 
-cdef unsigned int vector_argmin(double[:] v) except *:
+cdef unsigned int vector_argmin(double[:] v):
     cdef int n_values = v.shape[0]
     if vector_size_is_zero(v):
         raise ValueError("Vector should contain at least one value.")
@@ -143,7 +143,7 @@ cdef unsigned int vector_argmin(double[:] v) except *:
     return min_idx
 
 
-cdef void vector_clip(double[:] v, double min_value, double max_value) except *:
+cdef void vector_clip(double[:] v, double min_value, double max_value):
     cdef int n_values = v.shape[0]
     if vector_size_is_zero(v):
         raise ValueError("Vector should contain at least one value.")
@@ -156,7 +156,7 @@ cdef void vector_clip(double[:] v, double min_value, double max_value) except *:
         v[i] = fmax(min_value, fmin(max_value, v[i]))
 
 
-cdef void vector_copy(double[:] src, double[:] dst) except *:
+cdef void vector_copy(double[:] src, double[:] dst):
     if vector_size_is_zero(src):
         raise ValueError("Vector should contain at least one value.")
     cdef int i = 0

From 0b8830774da18c7ea0c7b898e61efa7d14e9d0a5 Mon Sep 17 00:00:00 2001
From: Troi Williams <40696868+troiwill@users.noreply.github.com>
Date: Thu, 18 Apr 2024 15:44:15 -0400
Subject: [PATCH 15/30] Minor additions.

---
 pomdp_py/algorithms/ccpomcp.pyx | 23 +++++++++++++++++------
 1 file changed, 17 insertions(+), 6 deletions(-)

diff --git a/pomdp_py/algorithms/ccpomcp.pyx b/pomdp_py/algorithms/ccpomcp.pyx
index 5e9ddf1..6c731f5 100644
--- a/pomdp_py/algorithms/ccpomcp.pyx
+++ b/pomdp_py/algorithms/ccpomcp.pyx
@@ -1,4 +1,4 @@
-# cython: language_level=3
+# cython: language_level=3, profile=True
 
 from __future__ import annotations
 cimport cython
@@ -152,6 +152,9 @@ cdef class _CCPolicyActionData:
     def avg_cost_value(self) -> Vector:
         return self._avg_cost_value
 
+    def __str__(self) -> str:
+        return f"prob: {self._prob}, cost: {self._cost_value}, avg_cost: {self._avg_cost_value}"
+
 
 cdef class _CCPolicyModel(PolicyModel):
     def __init__(self) -> None:
@@ -170,8 +173,13 @@ cdef class _CCPolicyModel(PolicyModel):
         )
         self._prob_sum += prob
         if self._prob_sum > 1.0:
+            error_str = ""
+            for action, datum in self._data.items():
+                error_str += f"  action={action} | datum={datum}\n"
             raise RuntimeError(
-                "Too much actions were added. The probability sum is greater than one!"
+                f"Too much actions were added. The probability sum {self._prob_sum} is greater than one! "
+                "Actions added:\n"
+                + error_str
             )
 
     cpdef void clear(_CCPolicyModel self):
@@ -549,7 +557,7 @@ cdef class CCPOMCP(POMCP):
                 self._greedy_policy_model.add(action_min, min_prob, vnode[action_min])
                 self._greedy_policy_model.add(action_max, 1.-min_prob, vnode[action_max])
 
-    cdef void _init_lambda_fn(CCPOMCP self) except *:
+    cdef void _init_lambda_fn(CCPOMCP self):
         if self._use_random_lambda:
             self._lambda = Vector(
                 np.random.uniform(
@@ -563,6 +571,9 @@ cdef class CCPOMCP(POMCP):
             self._lambda.zeros()
 
     cpdef _perform_simulation(self, state):
+        cdef double lambda_vec_max
+        cdef Action action
+
         super(CCPOMCP, self)._perform_simulation(state=state)
 
         # Sample using the greedy policy. This greedy policy corresponds to the first
@@ -725,7 +736,7 @@ cdef class CCPOMCP(POMCP):
     cdef void _update_cost_constraint(
         CCPOMCP self,
         Action sampled_action
-    ) except *:
+    ):
         cdef double action_prob, prob_prime
         cdef Action action_prime
         cdef list[Action] action_prime_list
@@ -764,7 +775,7 @@ cdef double _compute_visits_ratio(
     double visits_num,
     double visits_denom,
     double explore_const,
-) except *:
+):
     if visits_denom == 0.0:
         return DBL_MIN
     else:
@@ -774,7 +785,7 @@ cdef double _compute_visits_ratio(
 cdef double _get_ccqnode_scalar_cost(
     VNode node,
     Action action
-) except *:
+):
     if action not in node:
         raise KeyError(f"Action {action} does not exist in node.")
     return node[action].cost_value[0]

From dd4705ff23ee9ebfd976ab9c3738e9e3e275cde5 Mon Sep 17 00:00:00 2001
From: Troi Williams <40696868+troiwill@users.noreply.github.com>
Date: Thu, 18 Apr 2024 15:44:33 -0400
Subject: [PATCH 16/30] Added profiling.

---
 pomdp_py/framework/generalization.pyx | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pomdp_py/framework/generalization.pyx b/pomdp_py/framework/generalization.pyx
index 223482b..3ed27c8 100644
--- a/pomdp_py/framework/generalization.pyx
+++ b/pomdp_py/framework/generalization.pyx
@@ -1,4 +1,4 @@
-# cython: language_level=3
+# cython: language_level=3, profile=True
 
 from __future__ import annotations
 from pomdp_py.framework.basics cimport (

From b946db07cd8abeb08c11ff37ff38a9ba62d4a5cd Mon Sep 17 00:00:00 2001
From: Troi Williams <40696868+troiwill@users.noreply.github.com>
Date: Thu, 18 Apr 2024 15:44:55 -0400
Subject: [PATCH 17/30] Added profiling.

---
 pomdp_py/utils/cvec.pyx | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pomdp_py/utils/cvec.pyx b/pomdp_py/utils/cvec.pyx
index fefd02e..ab3ed30 100644
--- a/pomdp_py/utils/cvec.pyx
+++ b/pomdp_py/utils/cvec.pyx
@@ -1,4 +1,4 @@
-# cython: language_level=3, boundscheck=False, wraparound=False
+# cython: language_level=3, boundscheck=False, wraparound=False, profile=True
 
 from __future__ import annotations
 from libc.math cimport fmin, fmax

From f166ad87792c78345ccb983b25a84183046c251e Mon Sep 17 00:00:00 2001
From: Troi Williams <40696868+troiwill@users.noreply.github.com>
Date: Thu, 18 Apr 2024 15:45:39 -0400
Subject: [PATCH 18/30] Minor changes.

---
 pomdp_py/problems/cc_rocksample/cc_rocksample_problem.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pomdp_py/problems/cc_rocksample/cc_rocksample_problem.py b/pomdp_py/problems/cc_rocksample/cc_rocksample_problem.py
index 0492831..d674b59 100644
--- a/pomdp_py/problems/cc_rocksample/cc_rocksample_problem.py
+++ b/pomdp_py/problems/cc_rocksample/cc_rocksample_problem.py
@@ -216,7 +216,7 @@ def main(n_grid: int = 7, n_rocks: int = 8) -> None:
     total_response, total_discounted_response = test_planner(
         cc_rocksample=cc_rocksample,
         ccpomcp=ccpomcp,
-        nsteps=1,
+        nsteps=100,
         discount=k_discount_factor,
     )
 

From fa8dac1366499fd048745898cd49b681f72405e9 Mon Sep 17 00:00:00 2001
From: Troi Williams <40696868+troiwill@users.noreply.github.com>
Date: Thu, 18 Apr 2024 18:44:50 -0400
Subject: [PATCH 19/30] Added the comments to function calls.

---
 pomdp_py/utils/cvec.pyx | 199 +++++++++++++++++++++++++++++++---------
 1 file changed, 155 insertions(+), 44 deletions(-)

diff --git a/pomdp_py/utils/cvec.pyx b/pomdp_py/utils/cvec.pyx
index ab3ed30..84e18cb 100644
--- a/pomdp_py/utils/cvec.pyx
+++ b/pomdp_py/utils/cvec.pyx
@@ -9,45 +9,74 @@ cnp.import_array()
 
 ArrayDtype_t = np.float64
 
+
 cdef inline Arrayf_t null_vector(unsigned int n_zeros):
     return np.zeros((n_zeros,), ArrayDtype_t)
 
 
 cpdef inline Arrayf_t list_to_vectord(list[float] values):
+    """
+    Converts a list of numbers to an underlying array data structure.
+    
+    Args:
+        values (list[float]): The values to place in the array. 
+
+    Returns:
+        An array.
+    """
     return np.array(values, ArrayDtype_t)
 
 
 cpdef inline list[float] vectord_to_list(Arrayf_t values):
-    return values.tolist()
-
+    """
+    Converts an array to a list of float.
+    
+    Args:
+        values (array): The values to place in a list. 
 
-cdef inline bint vectors_are_not_same_size(double[:] v0, double[:] v1):
-    return v0.shape[0] != v1.shape[0]
+    Returns:
+        A list of float.
+    """
+    return values.tolist()
 
 
 cdef inline bint vector_size_is_zero(double[:] v):
     return v.shape[0] == 0
 
 
+cdef inline bint vectors2_are_not_valid(double[:] v0, double[:] v1):
+    return (
+        v0.shape[0] != v1.shape[0]
+        | vector_size_is_zero(v0)
+    )
+
+
+cdef inline bint vectors3_are_not_valid(double[:] v0, double[:] v1, double[:] v2):
+    return (
+        v0.shape[0] != v1.shape[0]
+        | v1.shape[0] != v2.shape[0]
+        | vector_size_is_zero(v0)
+    )
+
+
 cdef double vector_dot_prod(double[:] v0, double[:] v1):
+    if vectors2_are_not_valid(v0, v1):
+        raise ValueError(
+            "All vectors must have the same size and contain at least one value."
+        )
+
     cdef unsigned int i = 0
     cdef double _sum = 0.
-
-    if vectors_are_not_same_size(v0, v1):
-        raise ValueError("Both vectors must have the same size.")
-    if vector_size_is_zero(v0):
-        raise ValueError("Vectors should contain at least one value.")
-
     for i in range(v0.shape[0]):
         _sum += (v0[i] * v1[i])
     return _sum
 
 
 cdef void vector_add(double[:] v0, double[:] v1, double[:] res):
-    if vectors_are_not_same_size(v0, v1):
-        raise ValueError("Both vectors must have the same size.")
-    if vector_size_is_zero(v0):
-        raise ValueError("Vectors should contain at least one value.")
+    if vectors3_are_not_valid(v0, v1, res):
+        raise ValueError(
+            "All vectors must have the same size and contain at least one value."
+        )
 
     cdef unsigned int i = 0
     for i in range(v0.shape[0]):
@@ -55,19 +84,21 @@ cdef void vector_add(double[:] v0, double[:] v1, double[:] res):
 
 
 cdef void vector_adds(double[:] v, double scalar, double[:] res):
-    if vector_size_is_zero(v):
-        raise ValueError("Vector should contain at least one value.")
+    if vectors2_are_not_valid(v, res):
+        raise ValueError(
+            "All vectors must have the same size and contain at least one value."
+        )
 
     cdef unsigned int i = 0
     for i in range(v.shape[0]):
         res[i] = v[i] + scalar
 
 
-cdef void vector_muls(double[:] v, double scalar,double[:] res):
-    if vectors_are_not_same_size(v, res):
-        raise ValueError("Vectors v and res must be the same size.")
-    if vector_size_is_zero(v):
-        raise ValueError("Vector should contain at least one value.")
+cdef void vector_muls(double[:] v, double scalar, double[:] res):
+    if vectors2_are_not_valid(v, res):
+        raise ValueError(
+            "All vectors must have the same size and contain at least one value."
+        )
 
     cdef unsigned int i = 0
     for i in range(v.shape[0]):
@@ -75,10 +106,10 @@ cdef void vector_muls(double[:] v, double scalar,double[:] res):
 
 
 cdef void vector_sub(double[:] v0, double[:] v1, double[:] res):
-    if vectors_are_not_same_size(v0, v1):
-        raise ValueError("Both vectors must have the same size.")
-    if vector_size_is_zero(v0):
-        raise ValueError("Vectors should contain at least one value.")
+    if vectors3_are_not_valid(v0, v1, res):
+        raise ValueError(
+            "All vectors must have the same size and contain at least one value."
+        )
 
     cdef unsigned int i = 0
     for i in range(v0.shape[0]):
@@ -86,8 +117,10 @@ cdef void vector_sub(double[:] v0, double[:] v1, double[:] res):
 
 
 cdef void vector_subvs(double[:] v, double scalar, double[:] res):
-    if vector_size_is_zero(v):
-        raise ValueError("Vector should contain at least one value.")
+    if vectors2_are_not_valid(v, res):
+        raise ValueError(
+            "All vectors must have the same size and contain at least one value."
+        )
 
     cdef unsigned int i = 0
     for i in range(v.shape[0]):
@@ -95,8 +128,10 @@ cdef void vector_subvs(double[:] v, double scalar, double[:] res):
 
 
 cdef void vector_subsv(double scalar, double[:] v, double[:] res):
-    if vector_size_is_zero(v):
-        raise ValueError("Vector should contain at least one value.")
+    if vectors2_are_not_valid(v, res):
+        raise ValueError(
+            "All vectors must have the same size and contain at least one value."
+        )
 
     cdef unsigned int i = 0
     for i in range(v.shape[0]):
@@ -104,10 +139,10 @@ cdef void vector_subsv(double scalar, double[:] v, double[:] res):
 
 
 cdef void vector_scalar_div(double[:] v, double scalar, double[:] res):
-    if vector_size_is_zero(v):
-        raise ValueError("Vector should contain at least one value.")
-    if scalar == 0.0:
-        raise ZeroDivisionError("Scalar division by zero!")
+    if vectors2_are_not_valid(v, res):
+        raise ValueError(
+            "All vectors must have the same size and contain at least one value."
+        )
 
     cdef unsigned int i = 0
     for i in range(v.shape[0]):
@@ -157,19 +192,15 @@ cdef void vector_clip(double[:] v, double min_value, double max_value):
 
 
 cdef void vector_copy(double[:] src, double[:] dst):
-    if vector_size_is_zero(src):
-        raise ValueError("Vector should contain at least one value.")
+    if vectors2_are_not_valid(src, dst):
+        raise ValueError(
+            "All vectors must have the same size and contain at least one value."
+        )
     cdef int i = 0
     for i in range(src.shape[0]):
         dst[i] = src[i]
 
 
-cdef void vector_resize(Arrayf_t v, unsigned int new_size):
-    if new_size <= 0:
-        raise ValueError("New vector size must be a positive integer.")
-    v = np.zeros((new_size,), dtype=v.dtype)
-
-
 cdef class Vector:
     """
     The Vector class. Provides an implementation of a vector for
@@ -192,11 +223,18 @@ cdef class Vector:
         return vectord_to_list(self._vals)
 
     def as_vector(self) -> np.ndarray:
+        """
+        Returns the internal array of values.
+        """
         return self._vals[:]
 
     cpdef void clip(Vector self, double min_value, double max_value):
         """
         Clips the values within the value using the given min and max values.
+        
+        Args:
+            min_value (double): The minimum value that will be in the vector.
+            max_value (double): The maximum value that will be in the vector.
         """
         vector_clip(self._vals, min_value, max_value)
 
@@ -209,46 +247,112 @@ cdef class Vector:
     cpdef double dot(Vector self, Vector other):
         """
         Performs the dot product between two Vectors.
+        
+        Args:
+            other (Vector): The other vector to perform the dot product with.
+
+        Returns:
+            The dot product of this and the other Vector.
         """
         return vector_dot_prod(self._vals, other._vals)
 
     @staticmethod
     def fill(value: float, n_values: int) -> Vector:
+        """
+        Creates a new Vector and fills it with the given value.
+
+        Args:
+            value (float): The value to fill the Vector with.
+            n_values (int): The number of values to fill the Vector with.
+
+        Returns:
+            A new Vector with n_values value's.
+        """
         return Vector([value] * n_values)
 
     cpdef int len(Vector self):
+        """
+        Returns the length of the Vector.
+        """
         return self._length
 
     cpdef unsigned int argmax(Vector self):
+        """
+        Returns the index of the max value.
+        """
         return vector_argmax(self._vals)
 
     cpdef unsigned int argmin(Vector self):
+        """
+        Returns the index of the min value.
+        """
         return vector_argmin(self._vals)
 
     cpdef double max(Vector self):
+        """
+        Returns the max value.
+        """
         return self._vals[self.argmax()]
 
     cpdef double min(Vector self):
+        """
+        Returns the min value.
+        """
         return self._vals[self.argmin()]
 
     @staticmethod
     def null(n_zeros: int) -> Vector:
+        """
+        Creates a null Vector with N zeros.
+
+        Args:
+            n_zeros (int): The number of zeros to place in the Vector.
+
+        Returns:
+            A Vector with all zeros.
+        """
         return Vector.fill(0.0, n_zeros)
 
     cdef void resize(Vector self, unsigned int new_size):
-        self._vals = null_vector(new_size)
-        self._res_buff = null_vector(new_size)
-        self._length = self._vals.shape[0]
+        """
+        Resizes the Vector with the new specified size.
+        
+        Args:
+            new_size (unsigned int): The new size of the Vector. 
+
+        Returns:
+
+        """
+        if new_size == self._length:
+            self.zeros()
+        else:
+            self._vals = null_vector(new_size)
+            self._res_buff = null_vector(new_size)
+            self._length = self._vals.shape[0]
 
     cpdef void zeros(Vector self):
+        """
+        Overwrites the Vector with all zeros.
+        """
         cdef int i
         if self._length == 1:
             self._vals[0] = 0.
+            self._res_buff[0] = 0.
         else:
             for i in range(self._length):
                 self._vals[i] = 0.
+                self._res_buff[i] = 0.
 
     cpdef double get(Vector self, unsigned int index):
+        """
+        Retrieves the value at the given index.
+        
+        Args:
+            index (unsigned int): The index.
+
+        Returns:
+            The value at the index.
+        """
         if self._index_is_out_of_range(index):
             raise IndexError(
                 f"index ({index}) is out-of-range for length {self._length}."
@@ -256,6 +360,13 @@ cdef class Vector:
         return self._vals[index]
 
     cpdef void set(Vector self, unsigned int index, double value):
+        """
+        Sets the value at the given index.
+        
+        Args:
+            index (unsigned int): The index. 
+            value (double): The value to set.
+        """
         if self._index_is_out_of_range(index):
             raise IndexError(
                 f"index ({index}) is out-of-range for length {self._length}."

From 19e779ae899cc3785d549b227c19c3cf20fdd00b Mon Sep 17 00:00:00 2001
From: Troi Williams <40696868+troiwill@users.noreply.github.com>
Date: Thu, 18 Apr 2024 18:45:17 -0400
Subject: [PATCH 20/30] Removed except * from function names.

---
 pomdp_py/utils/cvec.pxd | 31 ++++++++++++++++---------------
 1 file changed, 16 insertions(+), 15 deletions(-)

diff --git a/pomdp_py/utils/cvec.pxd b/pomdp_py/utils/cvec.pxd
index 43e4dfe..dbf48af 100644
--- a/pomdp_py/utils/cvec.pxd
+++ b/pomdp_py/utils/cvec.pxd
@@ -10,22 +10,23 @@ ctypedef cnp.ndarray Arrayf_t
 cdef Arrayf_t null_vector(unsigned int n_zeros)
 cpdef Arrayf_t list_to_vectord(list[float] values)
 cpdef list[float] vectord_to_list(Arrayf_t values)
-cdef bint vectors_are_not_same_size(double[:] v0, double[:] v1)
 cdef bint vector_size_is_zero(double[:] v)
-
-cdef double vector_dot_prod(double[:] v0, double[:] v1) except *
-cdef void vector_add(double[:] v0, double[:] v1, double[:] res) except *
-cdef void vector_adds(double[:] v, double scalar, double[:] res) except *
-cdef void vector_muls(double[:] v, double scalar, double[:] res) except *
-cdef void vector_sub(double[:] v0, double[:] v1, double[:] res) except *
-cdef void vector_subvs(double[:] v, double scalar, double[:] res) except *
-cdef void vector_subsv(double scalar, double[:] v, double[:] res) except *
-cdef void vector_scalar_div(double[:] v, double scalar, double[:] res) except *
-
-cdef unsigned int vector_argmax(double[:] v) except *
-cdef unsigned int vector_argmin(double[:] v) except *
-cdef void vector_clip(double[:] v, double min_value, double max_value) except *
-cdef void vector_copy(double[:] src, double[:] dst) except *
+cdef bint vectors2_are_not_valid(double[:] v0, double[:] v1)
+cdef bint vectors3_are_not_valid(double[:] v0, double[:] v1, double[:] v2)
+
+cdef double vector_dot_prod(double[:] v0, double[:] v1)
+cdef void vector_add(double[:] v0, double[:] v1, double[:] res)
+cdef void vector_adds(double[:] v, double scalar, double[:] res)
+cdef void vector_muls(double[:] v, double scalar, double[:] res)
+cdef void vector_sub(double[:] v0, double[:] v1, double[:] res)
+cdef void vector_subvs(double[:] v, double scalar, double[:] res)
+cdef void vector_subsv(double scalar, double[:] v, double[:] res)
+cdef void vector_scalar_div(double[:] v, double scalar, double[:] res)
+
+cdef unsigned int vector_argmax(double[:] v)
+cdef unsigned int vector_argmin(double[:] v)
+cdef void vector_clip(double[:] v, double min_value, double max_value)
+cdef void vector_copy(double[:] src, double[:] dst)
 
 
 cdef class Vector:

From a7e666cba9dfc2f0f072b964eabfb59fdc502ae6 Mon Sep 17 00:00:00 2001
From: Troi Williams <40696868+troiwill@users.noreply.github.com>
Date: Fri, 19 Apr 2024 00:43:35 -0400
Subject: [PATCH 21/30] Added _create_qnode function to reduce code.

---
 pomdp_py/algorithms/ccpomcp.pxd |   4 +-
 pomdp_py/algorithms/ccpomcp.pyx | 204 ++++++++++++++++++++++----------
 pomdp_py/algorithms/po_uct.pxd  |   1 +
 pomdp_py/algorithms/po_uct.pyx  |  22 +++-
 4 files changed, 161 insertions(+), 70 deletions(-)

diff --git a/pomdp_py/algorithms/ccpomcp.pxd b/pomdp_py/algorithms/ccpomcp.pxd
index b37d027..230716f 100644
--- a/pomdp_py/algorithms/ccpomcp.pxd
+++ b/pomdp_py/algorithms/ccpomcp.pxd
@@ -1,4 +1,4 @@
-# cython: language_level=3, profile=True
+# cython: language_level=3
 
 from pomdp_py.algorithms.po_uct cimport QNode
 from pomdp_py.algorithms.pomcp cimport POMCP, VNode
@@ -52,7 +52,7 @@ cdef class CCPOMCP(POMCP):
     cdef _CCPolicyModel _greedy_policy_model
 
     cpdef public Action plan(CCPOMCP self, Agent agent)
-    cpdef _expand_vnode(self, VNode vnode, tuple history, State state = *)
+    cpdef QNode _create_qnode(self, tuple qnode_params = *)
     cpdef void _greedy_policy(CCPOMCP self, VNode vnode, double explore_const, double nu)
     cdef void _init_lambda_fn(CCPOMCP self)
     cpdef tuple[State, Observation, Response] _sample_generative_model(CCPOMCP self, State state, Action action)
diff --git a/pomdp_py/algorithms/ccpomcp.pyx b/pomdp_py/algorithms/ccpomcp.pyx
index 6c731f5..f90d3ae 100644
--- a/pomdp_py/algorithms/ccpomcp.pyx
+++ b/pomdp_py/algorithms/ccpomcp.pyx
@@ -1,4 +1,4 @@
-# cython: language_level=3, profile=True
+# cython: language_level=3
 
 from __future__ import annotations
 cimport cython
@@ -21,20 +21,22 @@ from typing import Optional
 cnp.import_array()
 
 
-cdef double DBL_MIN = <double> -1e200
-cdef double DBL_MAX = <double> 1e200
+cdef double NEG_INFINITY = <double> -1e10
+cdef double POS_INFINITY = <double> 1e10
 
 
 cdef class CostModel:
     """
+    A CostModel models the distribution :math:`\Pr(c|s,a,s')` where
+    :math:`c\in\mathbb{C}`.
     """
 
     def probability(
-            self,
-            cost: float | Vector,
-            state: State,
-            action: Action,
-            next_state: State
+        self,
+        cost: float | Vector,
+        state: State,
+        action: Action,
+        next_state: State
     ) -> float:
         """
         probability(self, cost, state, action, next_state)
@@ -52,11 +54,11 @@ cdef class CostModel:
         raise NotImplementedError
 
     def sample(
-            self,
-            state: State,
-            action: Action,
-            next_state: State,
-            **kwargs,
+        self,
+        state: State,
+        action: Action,
+        next_state: State,
+        **kwargs,
     ) -> float | Vector:
         """
         sample(self, state, action, next_state)
@@ -103,6 +105,9 @@ cdef class CCQNode(QNode):
 
     @property
     def avg_cost_value(self) -> Vector:
+        """
+        The average cost value (Vector).
+        """
         return self._avg_cost_value
 
     @avg_cost_value.setter
@@ -116,6 +121,9 @@ cdef class CCQNode(QNode):
 
     @property
     def cost_value(self) -> Vector:
+        """
+        The cost value.
+        """
         return self._cost_value
 
     @cost_value.setter
@@ -135,6 +143,10 @@ cdef class CCQNode(QNode):
         )
 
 cdef class _CCPolicyActionData:
+    """
+    A data structure used internally within _CCPolicyModel. Stores the probability, cost
+    value, and average cost value for a particular CCQNode.
+    """
     def __init__(self, double prob, Vector cost_value, Vector avg_cost_value):
         self._prob = prob
         self._cost_value = cost_value
@@ -142,21 +154,31 @@ cdef class _CCPolicyActionData:
 
     @property
     def prob(self) -> float:
+        """The probability of a corresponding action."""
         return self._prob
 
     @property
     def cost_value(self) -> Vector:
+        """The cost value for a corresponding action."""
         return self._cost_value
 
     @property
     def avg_cost_value(self) -> Vector:
+        """The average cost value for a corresponding action."""
         return self._avg_cost_value
 
     def __str__(self) -> str:
-        return f"prob: {self._prob}, cost: {self._cost_value}, avg_cost: {self._avg_cost_value}"
+        return (
+            f"prob: {self._prob}, cost: {self._cost_value}, "
+            f"avg_cost: {self._avg_cost_value}"
+        )
 
 
 cdef class _CCPolicyModel(PolicyModel):
+    """
+    A derived policy class used internally within the CCPOMCP algorithm for sampling
+    actions and updating the cost constraint.
+    """
     def __init__(self) -> None:
         super().__init__()
         self._data = dict()
@@ -166,6 +188,16 @@ cdef class _CCPolicyModel(PolicyModel):
         return self._prob_sum != 1.0
 
     cpdef void add(_CCPolicyModel self, Action action, double prob, CCQNode node):
+        """
+        Adds an action, its probability, and appropriate information from a CCQNode. 
+        This method also raises an exception if the sum of the probabilities sum over
+        1.
+        
+        Args:
+            action (Action): The action. 
+            prob (double): The probability. 
+            node (CCQNode): The CCQNode.
+        """
         self._data[action] = _CCPolicyActionData(
             prob=prob,
             cost_value=node.cost_value,
@@ -177,16 +209,27 @@ cdef class _CCPolicyModel(PolicyModel):
             for action, datum in self._data.items():
                 error_str += f"  action={action} | datum={datum}\n"
             raise RuntimeError(
-                f"Too much actions were added. The probability sum {self._prob_sum} is greater than one! "
+                "Too much actions were added. "
+                f"The probability sum {self._prob_sum} is greater than one! "
                 "Actions added:\n"
                 + error_str
             )
 
     cpdef void clear(_CCPolicyModel self):
+        """Clears the internal data structures."""
         self._data.clear()
         self._prob_sum = 0.0
 
     cpdef Vector action_avg_cost(_CCPolicyModel self, Action action):
+        """
+        Returns the average cost value for a given action.
+        
+        Args:
+            action (Action): The action.
+
+        Returns:
+            The average cost value (Vector).
+        """
         if self._total_prob_is_not_one():
             raise RuntimeError(
                 "Tried to get action avg cost when total probability != 1.0."
@@ -196,6 +239,15 @@ cdef class _CCPolicyModel(PolicyModel):
         return self._data[action].cost_value
 
     cpdef Vector action_cost_value(_CCPolicyModel self, Action action):
+        """
+        Returns the cost value for a given action.
+
+        Args:
+            action (Action): The action.
+
+        Returns:
+            The cost value (Vector).
+        """
         if self._total_prob_is_not_one():
             raise RuntimeError(
                 "Tried to get action cost value when total probability != 1.0."
@@ -205,6 +257,16 @@ cdef class _CCPolicyModel(PolicyModel):
         return self._data[action].avg_cost_value
 
     cdef public float probability(_CCPolicyModel self, Action action, State state):
+        """
+        Returns the probability for a given action.
+
+        Args:
+            action (Action): The action.
+            state (State): Currently ignored.
+
+        Returns:
+            The probability (float).
+        """
         if self._total_prob_is_not_one():
             raise RuntimeError(
                 "Tried to get action probability when total probability != 1.0."
@@ -214,6 +276,15 @@ cdef class _CCPolicyModel(PolicyModel):
         return self._data[action].prob
 
     cdef public Action sample(_CCPolicyModel self, State state):
+        """
+        Samples an action using the underlying probability distribution.
+        
+        Args:
+            state (State): Currently ignored.
+
+        Returns:
+            The sampled action (Action).
+        """
         if self._prob_sum != 1.0:
             raise RuntimeError("Tried to sample with a total probability != 1.0.")
 
@@ -221,7 +292,21 @@ cdef class _CCPolicyModel(PolicyModel):
             return list(self._data.keys())[0]
         return np.random.choice(np.array(list(self._data.keys()), dtype=object))
 
-    def get_all_actions(self, state: Optional[State] = None, history: Optional[tuple] = None):
+    def get_all_actions(
+        self,
+        state: Optional[State] = None,
+        history: Optional[tuple] = None
+    ):
+        """
+        Returns all the stored actions.
+
+        Args:
+            state (State): Currently ignored.
+            history (tuple): Currently ignored.
+
+        Returns:
+            The list of actions (list[Action]).
+        """
         return list(self._data.keys())
 
 
@@ -307,16 +392,15 @@ cdef class CCPOMCP(POMCP):
 
         # Initialize lambda, cost constraint, and cost value init.
         if isinstance(cost_constraint, list):
-            self._n_constraints = len(cost_constraint)
             if len(cost_value_init) != len(cost_value_init):
                 raise ValueError(
                     "The cost constraint and cost value init must have the same length."
                 )
         else:
-            self._n_constraints = 1
             cost_constraint = [cost_constraint]
             cost_value_init = [cost_value_init] if cost_value_init is not None else [0.0]
 
+        self._n_constraints = len(cost_constraint)
         self._lambda = Vector.null(self._n_constraints)
         self._cost_value_init = list(cost_value_init)
         self._cost_constraint = Vector(cost_constraint)
@@ -333,6 +417,15 @@ cdef class CCPOMCP(POMCP):
         self._greedy_policy_model = _CCPolicyModel()
 
     cpdef public Action plan(CCPOMCP self, Agent agent):
+        """
+        Determines the next action to perform.
+        
+        Args:
+            agent (ResponseAgent): The agent used to plan. 
+
+        Returns:
+            The next action.
+        """
         cdef Action action
         cdef double time_taken
         cdef int sims_count
@@ -362,8 +455,7 @@ cdef class CCPOMCP(POMCP):
 
         # Then get the policy distribution, sample from it,
         # and update the cost constraint.
-        _, time_taken, sims_count = self._search()
-        action = self._greedy_policy_model.sample(state=None)
+        action, time_taken, sims_count = self._search()
         self._update_cost_constraint(action)
 
         # Update stats.
@@ -372,27 +464,20 @@ cdef class CCPOMCP(POMCP):
 
         return action
 
-    cpdef _expand_vnode(
-        CCPOMCP self,
-        VNode vnode,
-        tuple history,
-        State state = None,
+    cpdef QNode _create_qnode(
+        self,
+        tuple qnode_params = tuple()
     ):
-        cdef Action action
+        cdef int num_visits_init
+        cdef double value_init
+        cdef list[float] cost_value_init
 
-        for action in self._agent.valid_actions(state=state, history=history):
-            if vnode[action] is None:
-                vnode[action] = CCQNode(
-                    self._num_visits_init, self._value_init, self._cost_value_init
-                )
+        if len(qnode_params) == 3:
+            # Expand the tuple and set the new CCQNode.
+            num_visits_init, value_init, cost_value_init = qnode_params
+            return CCQNode(num_visits_init, value_init, cost_value_init)
 
-        if self._action_prior is not None:
-            # Using action prior; special values are set;
-            for preference in self._action_prior.get_preferred_actions(state, history):
-                action, num_visits_init, value_init = preference
-                vnode[action] = CCQNode(
-                    self._num_visits_init, self._value_init, self._cost_value_init
-                )
+        return CCQNode(self._num_visits_init, self._value_init, self._cost_value_init)
 
     @cython.boundscheck(False)
     @cython.wraparound(False)
@@ -509,8 +594,8 @@ cdef class CCPOMCP(POMCP):
 
             # Find a_max and a_min, the actions with the max and min scalar costs
             # from the list of best actions.
-            max_cost_value = DBL_MIN
-            min_cost_value = DBL_MAX
+            max_cost_value = NEG_INFINITY
+            min_cost_value = POS_INFINITY
 
             for i in range(n_best_actions):
                 cost_value = _get_ccqnode_scalar_cost(vnode, best_action_list[i])
@@ -524,14 +609,14 @@ cdef class CCPOMCP(POMCP):
                     max_cost_value = cost_value
 
             # Sanity checks.
-            if max_cost_value == DBL_MIN:
+            if max_cost_value == NEG_INFINITY:
                 raise RuntimeError(
-                    f"Max cost value ({max_cost_value}) must be more than {DBL_MIN}. "
+                    f"Max cost value ({max_cost_value}) must be more than {NEG_INFINITY}. "
                     f"Note: there are {n_best_actions} best actions. An error exists!"
                 )
-            if min_cost_value == DBL_MAX:
+            if min_cost_value == POS_INFINITY:
                 raise RuntimeError(
-                    f"Min cost value ({min_cost_value}) must be less than {DBL_MAX}. "
+                    f"Min cost value ({min_cost_value}) must be less than {POS_INFINITY}. "
                     f"Note: there are {n_best_actions} best actions. An error exists!"
                 )
 
@@ -558,15 +643,14 @@ cdef class CCPOMCP(POMCP):
                 self._greedy_policy_model.add(action_max, 1.-min_prob, vnode[action_max])
 
     cdef void _init_lambda_fn(CCPOMCP self):
+        cdef cnp.ndarray rand_vec
+        cdef double value
+        cdef int i
         if self._use_random_lambda:
-            self._lambda = Vector(
-                np.random.uniform(
-                    0.00001,
-                    1.0,
-                    size=self._cost_constraint.len()
-                ).tolist()
-            )
-
+            rand_vec = np.random.uniform(0.00001, 1.0, size=self._n_constraints)
+            for i in range(self._n_constraints):
+                value = rand_vec[i]
+                self._lambda.set(i, value)
         else:
             self._lambda.zeros()
 
@@ -625,23 +709,18 @@ cdef class CCPOMCP(POMCP):
         cdef Action action
         cdef double time_taken
         cdef int sims_count
-        # cdef PolicyModel policy_dist
 
         # Initialize the lambda vector.
         self._init_lambda_fn()
 
-        # Run the _search(...) method in the super class.
-        action, time_taken, sims_count = super(CCPOMCP, self)._search()
+        # Run the _search(...) method in the super class. Ignore this action.
+        _, time_taken, sims_count = super(CCPOMCP, self)._search()
 
         # After the search times out, create a policy using the greedy method.
         # This greedy policy corresponds to the last call in the search(h_0) function.
-        # policy_dist = self._greedy_policy(
-        #     self._agent.tree,
-        #     0.0,
-        #     self._nu,
-        # )
         self._greedy_policy(self._agent.tree, 0.0, self._nu)
-        return None, time_taken, sims_count
+        action = self._greedy_policy_model.sample(state=None)
+        return action, time_taken, sims_count
 
     cpdef Response _simulate(
         CCPOMCP self,
@@ -656,7 +735,6 @@ cdef class CCPOMCP(POMCP):
         cdef int nsteps = 1
         cdef Action action
         cdef State next_state
-        cdef _CCPolicyModel policy_dist
 
         if depth > self._max_depth:
             return self._null_response
@@ -777,7 +855,7 @@ cdef double _compute_visits_ratio(
     double explore_const,
 ):
     if visits_denom == 0.0:
-        return DBL_MIN
+        return NEG_INFINITY
     else:
         return explore_const * sqrt(visits_num / visits_denom)
 
diff --git a/pomdp_py/algorithms/po_uct.pxd b/pomdp_py/algorithms/po_uct.pxd
index 6f66fff..f2b83cd 100644
--- a/pomdp_py/algorithms/po_uct.pxd
+++ b/pomdp_py/algorithms/po_uct.pxd
@@ -42,6 +42,7 @@ cdef class POUCT(Planner):
                     State state, tuple history, VNode root, QNode parent,
                     Observation observation, int depth)
 
+    cpdef QNode _create_qnode(self, tuple qnode_params = *)
     cpdef _expand_vnode(self, VNode vnode, tuple history, State state=*)
     cpdef _rollout(self, State state, tuple history, VNode root, int depth)
     cpdef Action _ucb(self, VNode root)
diff --git a/pomdp_py/algorithms/po_uct.pyx b/pomdp_py/algorithms/po_uct.pyx
index c0f0266..f11533d 100644
--- a/pomdp_py/algorithms/po_uct.pyx
+++ b/pomdp_py/algorithms/po_uct.pyx
@@ -295,6 +295,20 @@ cdef class POUCT(Planner):
         """
         self._rollout_policy = rollout_policy
 
+    cpdef QNode _create_qnode(
+        self,
+        tuple qnode_params = tuple()
+    ):
+        cdef int num_visits_init
+        cdef double value_init
+
+        if len(qnode_params) == 2:
+            # Expand the tuple and set the new QNode.
+            num_visits_init, value_init = qnode_params
+            return QNode(num_visits_init, value_init)
+
+        return QNode(self._num_visits_init, self._value_init)
+
     cpdef _expand_vnode(self, VNode vnode, tuple history, State state=None):
         cdef Action action
         cdef tuple preference
@@ -303,17 +317,15 @@ cdef class POUCT(Planner):
 
         for action in self._agent.valid_actions(state=state, history=history):
             if vnode[action] is None:
-                history_action_node = QNode(self._num_visits_init,
-                                            self._value_init)
+                history_action_node = self._create_qnode()
                 vnode[action] = history_action_node
 
         if self._action_prior is not None:
             # Using action prior; special values are set;
             for preference in \
                 self._action_prior.get_preferred_actions(state, history):
-                action, num_visits_init, value_init = preference
-                history_action_node = QNode(num_visits_init,
-                                            value_init)
+                action = preference[0]
+                history_action_node = self._create_qnode(preference[1:])
                 vnode[action] = history_action_node
 
     cpdef _search(self):

From 1a99ff765cbbbe643d0fca5d598bb9247b770dde Mon Sep 17 00:00:00 2001
From: Troi Williams <40696868+troiwill@users.noreply.github.com>
Date: Fri, 19 Apr 2024 00:43:57 -0400
Subject: [PATCH 22/30] Minor update.

---
 pomdp_py/problems/cc_rocksample/cc_rocksample_problem.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/pomdp_py/problems/cc_rocksample/cc_rocksample_problem.py b/pomdp_py/problems/cc_rocksample/cc_rocksample_problem.py
index d674b59..fcb0252 100644
--- a/pomdp_py/problems/cc_rocksample/cc_rocksample_problem.py
+++ b/pomdp_py/problems/cc_rocksample/cc_rocksample_problem.py
@@ -1,7 +1,6 @@
 from __future__ import annotations
 import copy
 import math
-import numpy as np
 import pomdp_py
 from pomdp_py.problems.rocksample.rocksample_problem import (
     RockSampleProblem,

From 57c0568809388505999d3b10fe45721ca4b1d741 Mon Sep 17 00:00:00 2001
From: Troi Williams <40696868+troiwill@users.noreply.github.com>
Date: Fri, 19 Apr 2024 00:44:49 -0400
Subject: [PATCH 23/30] Removed unneeded test.

---
 tests/test_framework_generalization.py | 29 --------------------------
 1 file changed, 29 deletions(-)
 delete mode 100644 tests/test_framework_generalization.py

diff --git a/tests/test_framework_generalization.py b/tests/test_framework_generalization.py
deleted file mode 100644
index eda6069..0000000
--- a/tests/test_framework_generalization.py
+++ /dev/null
@@ -1,29 +0,0 @@
-from pomdp_py.framework.generalization import Vector, RewardCost
-
-description = "testing framework generalization"
-
-
-def test_assign():
-    v = Vector()
-    assert v == [0.]
-
-    v = Vector((2, 4, 8))
-    assert v == [2., 4., 8.]
-
-    v = Vector()
-    assert v != [1.]
-
-
-def test_add():
-    r = RewardCost(0., Vector([0., 10.])) + RewardCost(10., Vector([90., 13.]))
-    assert r.reward == 10.
-    assert r.cost == [90., 23.]
-    
-
-def run():
-    test_assign()
-    test_add()
-
-
-if __name__ == "__main__":
-    run()
\ No newline at end of file

From ce008e8845d6d5f6b36de3d17deb82d2c1210a42 Mon Sep 17 00:00:00 2001
From: Troi Williams <40696868+troiwill@users.noreply.github.com>
Date: Fri, 19 Apr 2024 00:58:25 -0400
Subject: [PATCH 24/30] Added code comments.

---
 pomdp_py/framework/generalization.pxd |  2 +-
 pomdp_py/framework/generalization.pyx | 54 +++++++++++++++++++++++----
 2 files changed, 48 insertions(+), 8 deletions(-)

diff --git a/pomdp_py/framework/generalization.pxd b/pomdp_py/framework/generalization.pxd
index 99f254b..cfba56c 100644
--- a/pomdp_py/framework/generalization.pxd
+++ b/pomdp_py/framework/generalization.pxd
@@ -1,4 +1,4 @@
-# cython: language_level=3, profile=True
+# cython: language_level=3
 
 from __future__ import annotations
 from pomdp_py.framework.basics cimport (
diff --git a/pomdp_py/framework/generalization.pyx b/pomdp_py/framework/generalization.pyx
index 3ed27c8..baac0c3 100644
--- a/pomdp_py/framework/generalization.pyx
+++ b/pomdp_py/framework/generalization.pyx
@@ -1,4 +1,4 @@
-# cython: language_level=3, profile=True
+# cython: language_level=3
 
 from __future__ import annotations
 from pomdp_py.framework.basics cimport (
@@ -28,10 +28,12 @@ cdef class Response:
     """
 
     def copy(self) -> Response:
+        """Returns a copy of a Response."""
         raise NotImplementedError
 
     @staticmethod
     def null() -> Response:
+        """Returns a null Response. This is equivalent to a 'zero' reward."""
         raise NotImplementedError
 
     def __add__(self, other: Response) -> Response:
@@ -78,16 +80,28 @@ cdef class ResponseModel:
         pass
 
     def null_response(self) -> Response:
+        """Returns a null Response."""
         raise NotImplementedError
 
     def sample(self, state: State, action: Action, next_state: State) -> Response:
+        """
+        Samples a response given the state, action, and next state.
+
+        Args:
+            state (State): The state.
+            action (Action): The action.
+            next_state (State): The next state.
+
+        Returns:
+            The sampled response (Response).
+        """
         raise NotImplementedError
 
 
 cdef class ResponseAgent(Agent):
     """
     A `ResponseAgent` behaves the same as an `Agent` with one difference: a
-    `ReponseAgent` adds a `ResponseModel`. The `ResponseAgent` also provides direct
+    `ReponseAgent` adds a `ResponseModel`. The `ResponseAgent` can also provide direct
     access to the models maintained in the `ResponseModel` to reduce the wordiness of
     the code.
     """
@@ -131,6 +145,7 @@ cdef class ResponseAgent(Agent):
 
     @property
     def response_model(self) -> ResponseModel:
+        """Returns the response model."""
         if self._response_model is None:
             raise ValueError(
                 "response_model is None. Call set_response_model to set a model."
@@ -138,6 +153,12 @@ cdef class ResponseAgent(Agent):
         return self._response_model
 
     def set_response_model(self, response_model: ResponseModel) -> None:
+        """
+        Sets the response model.
+
+        Args:
+            response_model (ResponseModel): The response model.
+        """
         if not isinstance(response_model, ResponseModel):
             raise TypeError(
                 f"model must be type ResponseModel, but got type {type(response_model)}."
@@ -146,6 +167,12 @@ cdef class ResponseAgent(Agent):
 
 
 cdef class ResponseEnvironment(Environment):
+    """
+    A `ResponseEnvironment` is the same as an `Environment` with one difference: a
+    `ResponseEnvironment` adds a `ResponseModel`. The `ResponseEnvironment` can also
+    provide direct access to the models maintained in the `ResponseModel` to reduce
+    the wordiness of the code.
+    """
 
     def __init__(
         self,
@@ -175,8 +202,7 @@ cdef class ResponseEnvironment(Environment):
     @property
     def response_model(self) -> ResponseModel:
         """
-        Returns:
-            The ResponseModel.
+        Returns the ResponseModel.
         """
         return self._response_model
 
@@ -194,9 +220,6 @@ cdef class ResponseEnvironment(Environment):
             response_model (ResponseModel): The response model.
             blackbox_model (BlackboxModel): Provided when the transition model and
                 response model are not available.
-
-        Returns:
-            None
         """
         super().set_models(
             transition_model=transition_model,
@@ -257,6 +280,23 @@ cpdef tuple[State, Observation, Response, int] sample_generative_model_with_resp
     Response null_response,
     float discount_factor = 1.0
 ):
+    """
+    Samples the next state, observation, and response from the underlying models. It also
+    returns the number of steps performed during sampling.
+    
+    Args:
+        T (TransitionModel): The transition model. 
+        O (ObservationModel): The observation model.
+        R (ResponseModel): The response model.
+        state (State): The current state. 
+        action (Action): The action. 
+        null_response (Response): A null response.
+        discount_factor (float): The discount factor. Default = 1.
+
+    Returns:
+        A tuple of the next state (State), observation (Observation), 
+        response (Response), and the number of steps performed (int).
+    """
     cdef State next_state
     cdef Observation observation
     cdef Response response = null_response

From 915b4c0737ae02086f329f10ba898461e5a26e40 Mon Sep 17 00:00:00 2001
From: Troi Williams <40696868+troiwill@users.noreply.github.com>
Date: Fri, 19 Apr 2024 00:59:05 -0400
Subject: [PATCH 25/30] Removed profiling.

---
 pomdp_py/utils/cvec.pxd | 2 +-
 pomdp_py/utils/cvec.pyx | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/pomdp_py/utils/cvec.pxd b/pomdp_py/utils/cvec.pxd
index dbf48af..acd8c56 100644
--- a/pomdp_py/utils/cvec.pxd
+++ b/pomdp_py/utils/cvec.pxd
@@ -1,4 +1,4 @@
-# cython: language_level=3, boundscheck=False, wraparound=False, profile=True
+# cython: language_level=3, boundscheck=False, wraparound=False
 
 from __future__ import annotations
 cimport numpy as cnp
diff --git a/pomdp_py/utils/cvec.pyx b/pomdp_py/utils/cvec.pyx
index 84e18cb..36a603b 100644
--- a/pomdp_py/utils/cvec.pyx
+++ b/pomdp_py/utils/cvec.pyx
@@ -1,4 +1,4 @@
-# cython: language_level=3, boundscheck=False, wraparound=False, profile=True
+# cython: language_level=3, boundscheck=False, wraparound=False
 
 from __future__ import annotations
 from libc.math cimport fmin, fmax

From 900b6a8ed3ca87bf6e1cce44c67aa4c946ec7d56 Mon Sep 17 00:00:00 2001
From: Troi Williams <40696868+troiwill@users.noreply.github.com>
Date: Fri, 19 Apr 2024 00:59:53 -0400
Subject: [PATCH 26/30] Removed profiling code.

---
 pomdp_py/problems/cc_rocksample/profile_code.py | 11 -----------
 pomdp_py/problems/rocksample/profile_code.py    | 11 -----------
 2 files changed, 22 deletions(-)
 delete mode 100644 pomdp_py/problems/cc_rocksample/profile_code.py
 delete mode 100644 pomdp_py/problems/rocksample/profile_code.py

diff --git a/pomdp_py/problems/cc_rocksample/profile_code.py b/pomdp_py/problems/cc_rocksample/profile_code.py
deleted file mode 100644
index bd9dfb4..0000000
--- a/pomdp_py/problems/cc_rocksample/profile_code.py
+++ /dev/null
@@ -1,11 +0,0 @@
-import pstats, cProfile
-
-import pyximport
-pyximport.install()
-
-import cc_rocksample_problem
-
-cProfile.runctx("cc_rocksample_problem.main()", globals(), locals(), "fastProfile.prof")
-
-s = pstats.Stats("fastProfile.prof")
-s.strip_dirs().sort_stats("tottime").print_stats()
diff --git a/pomdp_py/problems/rocksample/profile_code.py b/pomdp_py/problems/rocksample/profile_code.py
deleted file mode 100644
index fc612f0..0000000
--- a/pomdp_py/problems/rocksample/profile_code.py
+++ /dev/null
@@ -1,11 +0,0 @@
-import pstats, cProfile
-
-import pyximport
-pyximport.install()
-
-import rocksample_problem
-
-cProfile.runctx("rocksample_problem.main()", globals(), locals(), "fastProfile.prof")
-
-s = pstats.Stats("fastProfile.prof")
-s.strip_dirs().sort_stats("tottime").print_stats()

From cc2e21808e16927392b8202ba90c6a2b20220324 Mon Sep 17 00:00:00 2001
From: Troi Williams <40696868+troiwill@users.noreply.github.com>
Date: Fri, 19 Apr 2024 01:09:01 -0400
Subject: [PATCH 27/30] Changed nsteps to 100.

---
 pomdp_py/problems/rocksample/rocksample_problem.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/pomdp_py/problems/rocksample/rocksample_problem.py b/pomdp_py/problems/rocksample/rocksample_problem.py
index c920602..f1b7671 100644
--- a/pomdp_py/problems/rocksample/rocksample_problem.py
+++ b/pomdp_py/problems/rocksample/rocksample_problem.py
@@ -556,8 +556,7 @@ def main():
         rollout_policy=rocksample.agent.policy_model,
         num_visits_init=1,
     )
-    # tt, ttd = test_planner(rocksample, pomcp, nsteps=100, discount=0.95)
-    tt, ttd = test_planner(rocksample, pomcp, nsteps=1, discount=0.95)
+    tt, ttd = test_planner(rocksample, pomcp, nsteps=100, discount=0.95)
 
 
 if __name__ == "__main__":

From cad3e92a502d1a0f6fef5883e0cdfa98d43d7938 Mon Sep 17 00:00:00 2001
From: Troi Williams <40696868+troiwill@users.noreply.github.com>
Date: Fri, 19 Apr 2024 01:28:06 -0400
Subject: [PATCH 28/30] Corrected the description for the Response class.

---
 pomdp_py/framework/generalization.pyx | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/pomdp_py/framework/generalization.pyx b/pomdp_py/framework/generalization.pyx
index baac0c3..6008de6 100644
--- a/pomdp_py/framework/generalization.pyx
+++ b/pomdp_py/framework/generalization.pyx
@@ -19,12 +19,10 @@ from typing import Optional
 
 cdef class Response:
     """
-    A Response class maintains variables within a dictionary.
-    However, subclasses of Response can provide access to the
-    dictionary variables using the dot (.) operator. Currently,
-    this class can handle arithmetic and comparison operations.
-    However, if special operations will need to be performed,
-    these operations need to be handled in the subclass.
+    A Response class serves as the output of ResponseModel. This 
+    class should be derived for specific problems. All operations,
+    including arithmetic, comparison, null, and copy must be
+    implemented in subclasses.
     """
 
     def copy(self) -> Response:

From 835449de18ac20d99faf043c1f6b9921c7583547 Mon Sep 17 00:00:00 2001
From: Troi Williams <40696868+troiwill@users.noreply.github.com>
Date: Fri, 19 Apr 2024 01:30:39 -0400
Subject: [PATCH 29/30] Removed print statement used for debugging.

---
 pomdp_py/problems/rocksample/rocksample_problem.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/pomdp_py/problems/rocksample/rocksample_problem.py b/pomdp_py/problems/rocksample/rocksample_problem.py
index f1b7671..c2ab772 100644
--- a/pomdp_py/problems/rocksample/rocksample_problem.py
+++ b/pomdp_py/problems/rocksample/rocksample_problem.py
@@ -330,7 +330,6 @@ class RSPolicyModel(pomdp_py.RolloutPolicy):
 
     def __init__(self, n, k):
         check_actions = set({CheckAction(rock_id) for rock_id in range(k)})
-        print(check_actions)
         self._move_actions = {MoveEast, MoveWest, MoveNorth, MoveSouth}
         self._other_actions = {SampleAction()} | check_actions
         self._all_actions = self._move_actions | self._other_actions

From cf334207c93469a339c2f7d1ca1fac60c0bc91d4 Mon Sep 17 00:00:00 2001
From: Troi Williams <40696868+troiwill@users.noreply.github.com>
Date: Fri, 17 May 2024 12:48:04 -0400
Subject: [PATCH 30/30] Fixed issue with missing numpy dependency during pip
 install.

---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index 2a27e90..61e5270 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,5 +1,5 @@
 [build-system]
-requires = ["setuptools", "wheel", "Cython>=3.0.8"]
+requires = ["setuptools", "wheel", "Cython>=3.0.8", "numpy>=1.18.0"]
 build-backend = "setuptools.build_meta"
 
 [project]