From 034cd45ebb63ccd01da1d1b057f0d6892344908b Mon Sep 17 00:00:00 2001 From: Troi Williams <40696868+troiwill@users.noreply.github.com> Date: Wed, 27 Mar 2024 17:31:14 -0400 Subject: [PATCH 01/30] Intial commit for Vector, GenericResponse, RewardCost, and an initial test file. --- pomdp_py/framework/generalization.pyx | 146 +++++++++++++++++++++++++ tests/test_framework_generalization.py | 29 +++++ 2 files changed, 175 insertions(+) create mode 100644 pomdp_py/framework/generalization.pyx create mode 100644 tests/test_framework_generalization.py diff --git a/pomdp_py/framework/generalization.pyx b/pomdp_py/framework/generalization.pyx new file mode 100644 index 0000000..04c3e1a --- /dev/null +++ b/pomdp_py/framework/generalization.pyx @@ -0,0 +1,146 @@ +from __future__ import annotations +from functools import cached_property +from typing import Iterable, Iterator, Union + + +cdef class Vector: + """ + The Vector class. Provides an implementation of a vector for maintaining multiple values. + """ + cdef list[float] vals + + def __init__(self, values: float | int | Iterable[float | int] = list([0.])): + cdef list _vec = list() + if isinstance(values, (float, int)): + _vec.append(values) + elif isinstance(values, (list, tuple)): + _vec += list(values) + else: + raise TypeError(f"values must be type int, float, list, or tuple, but got {type(values)}.") + + # Store the values as an array of floats. + self.vals = list(float(v) for v in _vec) + + @cached_property + def values(self) -> list[float]: + return self.vals.copy() + + def __iter__(self) -> Iterator: + return iter(self.vals) + + def __len__(self) -> int: + return len(self.vals) + + def __eq__(self, other: Vector | list) -> bool: + if not isinstance(other, (Vector, list)): + raise TypeError(f"other must be type Vector or list, but got {type(other)}.") + return len(self) == len(other) and all(v0 == v1 for v0, v1 in zip(self, other)) + + def __add__(self, other: Vector | list | float | int) -> Vector: + if isinstance(other, (float, int)): + vec = [other] * len(self) + elif isinstance(other, Vector): + vec = other + else: + raise TypeError(f"other must be type Vector, float, or int, but got {type(other)}.") + return Vector([v0 + v1 for v0, v1 in zip(self, vec)]) + + def __radd__(self, other): + return self.__add__(other) + + def __mul__(self, other): + if not isinstance(other, (float, int)): + raise TypeError(f"other must be type float or int, but got {type(other)}.") + return Vector([v * other for v in self]) + + def __rmul__(self, other): + return self.__mul__(other) + + def __str__(self) -> str: + return str(self.vals) + + +ResponseVariableType = Union[float, Vector] + + +cdef class GenericResponse: + """ + A GenericResponse class maintains variables within a dictionary. However, subclasses of GenericResponse + can provide access to the dictionary variables using the dot (.) operator. Currently, this class can + handle arithmetic and comparison operations. However, if special operations will need to be performed, + these operations need to be handled in the subclass. + """ + cdef dict __dict__ + + def __init__(self, dict response_dict = dict()): + self.__dict__.update(response_dict.copy()) + + cpdef void _check_reward_compatibility(self, value): + if not isinstance(value, GenericResponse): + raise TypeError(f"other must be type GenericResponse, float, or int, but got {type(value)}.") + + cdef dict add_response(self, GenericResponse other): + self._check_reward_compatibility(other) + cdef dict rv = dict() + for name, value in self.__dict__.items(): + rv.update({name: value + other.__dict__[name]}) + return rv + + def __add__(self, other: GenericResponse) -> GenericResponse: + return GenericResponse(self.add_response(other)) + + def __radd__(self, other: GenericResponse) -> GenericResponse: + return self.__add__(other) + + cpdef dict mul_scalar(self, float other): + if not isinstance(other, float): + raise TypeError("other must be type float or int.") + cdef dict rv = dict() + for name, value in self.__dict__.items(): + rv.update({name: value * other}) + return rv + + def __mul__(self, other: float | int) -> GenericResponse: + return GenericResponse(self.mul_scalar(other)) + + def __rmul__(self, other) -> GenericResponse: + return self.__mul__(other) + + def __eq__(self, other: GenericResponse) -> bool: + self._check_reward_compatibility(other) + return all(value == other.__dict__[name] for name, value in self.__dict__.items()) + + def __ne__(self, other) -> bool: + self._check_reward_compatibility(other) + return all(value != other.__dict__[name] for name, value in self.__dict__.items()) + + def __lt__(self, other) -> bool: + self._check_reward_compatibility(other) + return all(value < other.__dict__[name] for name, value in self.__dict__.items()) + + def __le__(self, other) -> bool: + self._check_reward_compatibility(other) + return all(value <= other.__dict__[name] for name, value in self.__dict__.items()) + + def __gt__(self, other) -> bool: + self._check_reward_compatibility(other) + return all(value > other.__dict__[name] for name, value in self.__dict__.items()) + + def __ge__(self, other) -> bool: + self._check_reward_compatibility(other) + return all(value >= other.__dict__[name] for name, value in self.__dict__.items()) + + def __str__(self) -> str: + return ", ".join([f"{name}={values}" for name, values in self.__dict__.items()]) + + +cdef class RewardCost(GenericResponse): + + def __init__(self, float reward=0.0, Vector cost=Vector()): + super().__init__({"reward": reward, "cost": cost}) + + def __add__(self, other: RewardCost) -> RewardCost: + return RewardCost(**self.add_response(other)) + + def __mul__(self, other: float) -> RewardCost: + return RewardCost(**self.mul_scalar(other)) diff --git a/tests/test_framework_generalization.py b/tests/test_framework_generalization.py new file mode 100644 index 0000000..eda6069 --- /dev/null +++ b/tests/test_framework_generalization.py @@ -0,0 +1,29 @@ +from pomdp_py.framework.generalization import Vector, RewardCost + +description = "testing framework generalization" + + +def test_assign(): + v = Vector() + assert v == [0.] + + v = Vector((2, 4, 8)) + assert v == [2., 4., 8.] + + v = Vector() + assert v != [1.] + + +def test_add(): + r = RewardCost(0., Vector([0., 10.])) + RewardCost(10., Vector([90., 13.])) + assert r.reward == 10. + assert r.cost == [90., 23.] + + +def run(): + test_assign() + test_add() + + +if __name__ == "__main__": + run() \ No newline at end of file From 5b3321a7db659e22f7a82456bb24f8bebd86044f Mon Sep 17 00:00:00 2001 From: Troi Williams <40696868+troiwill@users.noreply.github.com> Date: Tue, 16 Apr 2024 16:52:09 -0400 Subject: [PATCH 02/30] Updated ignore file. --- .gitignore | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/.gitignore b/.gitignore index a287569..5971777 100644 --- a/.gitignore +++ b/.gitignore @@ -136,3 +136,8 @@ Thumbs.db *.pg .DS_Store + +# Cython debugging files +######################## +*.c +*.html From 9223f9421840e92f4143f6e7ae67b0aafbf81c2b Mon Sep 17 00:00:00 2001 From: Troi Williams <40696868+troiwill@users.noreply.github.com> Date: Tue, 16 Apr 2024 17:03:23 -0400 Subject: [PATCH 03/30] Added CCPOMCP algorithm and dependencies; added test script for Vector; added example problem for CCPOMCP. --- pomdp_py/__init__.py | 6 + pomdp_py/__main__.py | 7 +- pomdp_py/algorithms/ccpomcp.pxd | 57 ++ pomdp_py/algorithms/ccpomcp.pyx | 770 ++++++++++++++++++ pomdp_py/algorithms/pomcp.pxd | 5 +- pomdp_py/framework/generalization.pxd | 32 + pomdp_py/framework/generalization.pyx | 403 ++++++--- .../problems/rocksample/rocksample_problem.py | 21 +- pomdp_py/utils/cvec.pxd | 38 + pomdp_py/utils/cvec.pyx | 352 ++++++++ setup.py | 4 +- tests/test_util_vector_ops.py | 151 ++++ 12 files changed, 1715 insertions(+), 131 deletions(-) create mode 100644 pomdp_py/algorithms/ccpomcp.pxd create mode 100644 pomdp_py/algorithms/ccpomcp.pyx create mode 100644 pomdp_py/framework/generalization.pxd create mode 100644 pomdp_py/utils/cvec.pxd create mode 100644 pomdp_py/utils/cvec.pyx create mode 100644 tests/test_util_vector_ops.py diff --git a/pomdp_py/__init__.py b/pomdp_py/__init__.py index 057c552..80471d3 100644 --- a/pomdp_py/__init__.py +++ b/pomdp_py/__init__.py @@ -2,6 +2,7 @@ # Framework from pomdp_py.framework.basics import * +from pomdp_py.framework.generalization import * from pomdp_py.framework.oopomdp import * from pomdp_py.framework.planner import * @@ -22,6 +23,10 @@ # Algorithms from pomdp_py.algorithms.value_iteration import ValueIteration # Cython compiled from pomdp_py.algorithms.value_function import value, qvalue, belief_update +from pomdp_py.algorithms.ccpomcp import ( + CostModel, + CCPOMCP, +) from pomdp_py.algorithms.pomcp import POMCP from pomdp_py.algorithms.po_rollout import PORollout from pomdp_py.algorithms.po_uct import ( @@ -38,3 +43,4 @@ # Templates & Utilities from pomdp_py.utils.templates import * from pomdp_py.utils.debugging import TreeDebugger +from pomdp_py.utils.cvec import Vector diff --git a/pomdp_py/__main__.py b/pomdp_py/__main__.py index 22c04a7..059d075 100644 --- a/pomdp_py/__main__.py +++ b/pomdp_py/__main__.py @@ -1,6 +1,6 @@ import argparse -available_problems = ["tiger", "rocksample", "mos", "tag", "load_unload"] +available_problems = ["tiger", "rocksample", "mos", "tag", "load_unload", "ccrocksample"] def parse_args(): @@ -45,6 +45,11 @@ def parse_args(): main() + elif args.run.lower() == "ccrocksample": + from pomdp_py.problems.cc_rocksample.cc_rocksample_problem import main + + main() + else: print("Unrecognized pomdp: {}".format(args.run)) diff --git a/pomdp_py/algorithms/ccpomcp.pxd b/pomdp_py/algorithms/ccpomcp.pxd new file mode 100644 index 0000000..0c5235c --- /dev/null +++ b/pomdp_py/algorithms/ccpomcp.pxd @@ -0,0 +1,57 @@ +from pomdp_py.algorithms.po_uct cimport QNode +from pomdp_py.algorithms.pomcp cimport POMCP, VNode +from pomdp_py.framework.basics cimport PolicyModel, Action, Agent, State, Observation +from pomdp_py.framework.generalization cimport Response +from pomdp_py.utils.cvec cimport Vector + + +cdef class CostModel: + pass + + +cdef class CCQNode(QNode): + cdef Vector _cost_value + cdef Vector _avg_cost_value + + +cdef class _CCPolicyActionData: + cdef double _prob + cdef Vector _cost_value + cdef Vector _avg_cost_value + + +cdef class _CCPolicyModel(PolicyModel): + cdef dict[Action, _CCPolicyActionData] _data + + cpdef Vector action_avg_cost(_CCPolicyModel self, Action action) + cpdef Vector action_cost_value(_CCPolicyModel self, Action action) + cdef public float probability(_CCPolicyModel self, Action action, State state) + cdef public Action sample(_CCPolicyModel self, State state) + + +cdef class CCPOMCP(POMCP): + cdef double _r_diff + cdef double _tau + cdef double _alpha_n + cdef Vector _lambda + cdef Vector _cost_constraint + cdef Response _null_response + cdef bint _use_random_lambda + cdef bint _clip_lambda + cdef double _nu + cdef list[float] _cost_value_init + cdef unsigned int _n_constraints + + cpdef public Action plan(CCPOMCP self, Agent agent) + cpdef _expand_vnode(self, VNode vnode, tuple history, State state = *) + cpdef _CCPolicyModel _greedy_policy(CCPOMCP self, VNode vnode, double explore_const, double nu) + cdef void _init_lambda_fn(CCPOMCP self) + cpdef tuple[State, Observation, Response] _sample_generative_model(CCPOMCP self, State state, Action action) + cpdef _search(CCPOMCP self) + cpdef Response _simulate(CCPOMCP self, State state, tuple history, VNode root, QNode parent, + Observation observation, int depth) + cdef void _update_cost_constraint(CCPOMCP self, _CCPolicyModel policy_dist, Action sampled_action) + + +cdef double _compute_visits_ratio(double visits_num, double visits_denom, double explore_const = *) +cdef double _get_ccqnode_scalar_cost(VNode node, Action action) diff --git a/pomdp_py/algorithms/ccpomcp.pyx b/pomdp_py/algorithms/ccpomcp.pyx new file mode 100644 index 0000000..a0fde23 --- /dev/null +++ b/pomdp_py/algorithms/ccpomcp.pyx @@ -0,0 +1,770 @@ +# cython: profile=True + +from __future__ import annotations +cimport cython +from libc.math cimport log, sqrt, exp, abs +import math +cimport numpy as cnp +import numpy as np +from pomdp_py.algorithms.po_uct cimport QNode, ActionPrior +from pomdp_py.algorithms.pomcp cimport POMCP +from pomdp_py.framework.basics cimport PolicyModel, Action, Agent, State, Observation +from pomdp_py.framework.generalization cimport ( + Response, + ResponseAgent, + sample_generative_model_with_response +) +from pomdp_py.representations.distribution.histogram cimport Histogram +from pomdp_py.representations.distribution.particles cimport Particles +from pomdp_py.utils import typ +from pomdp_py.utils.cvec cimport Vector +from typing import Optional +cnp.import_array() + + +cdef double DBL_MIN = -1e200 +cdef double DBL_MAX = 1e200 + + +cdef class CostModel: + """ + """ + + def probability( + self, + cost: float | Vector, + state: State, + action: Action, + next_state: State + ) -> float: + """ + probability(self, cost, state, action, next_state) + Returns the probability of :math:`\Pr(c|s,a,s')`. + + Args: + cost (float or ~pomdp_py.framework.generalization.Vector): the cost :math:`c` + state (~pomdp_py.framework.basics.State): the state :math:`s` + action (~pomdp_py.framework.basics.Action): the action :math:`a` + next_state (State): the next state :math:`s'` + + Returns: + float: the probability :math:`\Pr(c|s,a,s')` + """ + raise NotImplementedError + + def sample( + self, + state: State, + action: Action, + next_state: State, + **kwargs, + ) -> float | Vector: + """ + sample(self, state, action, next_state) + Returns a cost randomly sampled according to the + distribution of this cost model. This is required for cost-aware planners. + + Args: + state (~pomdp_py.framework.basics.State): the next state :math:`s` + action (~pomdp_py.framework.basics.Action): the action :math:`a` + next_state (State): the next state :math:`s'` + + Returns: + float or ~pomdp_py.framework.generalization.Vector: the cost :math:`c` + """ + raise NotImplementedError + + def argmax(self, state: State, action: Action, next_state: State) -> float | Vector: + """ + argmax(self, state, action, next_state) + Returns the most likely cost. This is optional. + """ + raise NotImplementedError + + def get_distribution(self, state: State, action: Action, next_state: State): + """ + get_distribution(self, state, action, next_state) + Returns the underlying distribution of the model. + """ + raise NotImplementedError + + +cdef class CCQNode(QNode): + def __init__( + self, + num_visits: int, + value: float, + cost_value: list[float], + ) -> None: + super().__init__(num_visits=num_visits, value=value) + if len(cost_value) == 0: + raise ValueError("len(cost_value) must be positive.") + self._cost_value = Vector(cost_value) + self._avg_cost_value = Vector.null(self._cost_value.len()) + + @property + def avg_cost_value(self) -> Vector: + return self._avg_cost_value.copy() + + @avg_cost_value.setter + def avg_cost_value(self, avg_cost_value: Vector) -> None: + if not isinstance(avg_cost_value, Vector): + raise TypeError( + "avg_cost_value must be type Vector, " + f"but got {type(avg_cost_value)}." + ) + self._avg_cost_value = avg_cost_value.copy() + + @property + def cost_value(self) -> Vector: + return self._cost_value.copy() + + @cost_value.setter + def cost_value(self, cost_value: Vector) -> None: + if not isinstance(cost_value, Vector): + raise TypeError( + "cost_value must be type Vector, " + f"but got {type(cost_value)}." + ) + self._cost_value = cost_value.copy() + + def __str__(self) -> str: + return ( + typ.red("CCQNode") + + f"(n={self.num_visits}, v={self.value:.3f}, c={self.cost_value} " + + f"c_bar={self.avg_cost_value} | children=[{', '.join(list(self.children.keys()))}])" + ) + +cdef class _CCPolicyActionData: + def __init__(self, double prob, Vector cost_value, Vector avg_cost_value): + self._prob = prob + self._cost_value = cost_value.copy() + self._avg_cost_value = avg_cost_value.copy() + + @property + def prob(self) -> float: + return self._prob + + @property + def cost_value(self) -> Vector: + return self._cost_value + + @property + def avg_cost_value(self) -> Vector: + return self._avg_cost_value + + +cdef class _CCPolicyModel(PolicyModel): + def __init__(self, dict[Action, _CCPolicyActionData] data_dict) -> None: + super().__init__() + cdef Action action + cdef _CCPolicyActionData datum + cdef double prob_sum = 0.0 + + for action, datum in data_dict.items(): + if not isinstance(action, Action): + raise TypeError("action must be type Action.") + prob_sum += datum.prob + + if prob_sum != 1.0: + raise ValueError(f"The probabilities must sum to 1.0, but got {prob_sum}.") + self._data = data_dict.copy() + + cpdef Vector action_avg_cost(_CCPolicyModel self, Action action): + if action not in self._data: + raise KeyError(f"The action {action} is not exist in this policy model.") + return self._data[action].cost_value.copy() + + cpdef Vector action_cost_value(_CCPolicyModel self, Action action): + if action not in self._data: + raise KeyError(f"The action {action} is not exist in this policy model.") + return self._data[action].avg_cost_value.copy() + + cdef public float probability(_CCPolicyModel self, Action action, State state): + if action not in self._data: + raise KeyError(f"The action {action} is not exist in this policy model.") + return self._data[action].prob + + cdef public Action sample(_CCPolicyModel self, State state): + return np.random.choice(np.array(list(self._data.keys()), dtype=object)) + + def get_all_actions(self, state: Optional[State] = None, history: Optional[tuple] = None): + return list(self._data.keys()) + + +cdef class CCPOMCP(POMCP): + """ + The cost-constrained POMCP (CCPOMCP) is POMCP + cost constraints. + The current implementation assumes the cost constraint is 1D. + """ + + def __init__( + self, + r_diff: float, + tau: float, + alpha_n: float, + cost_constraint: list[float] | float, + clip_lambda: bool = True, + nu: float = 1.0, + max_depth: int = 5, + planning_time: float = -1.0, + num_sims: int = -1, + discount_factor: float = 0.9, + exploration_const: float = math.sqrt(2.0), + num_visits_init: int = 0, + value_init: int = 0, + cost_value_init: Optional[list[float] | float] = None, + use_random_lambda: bool = True, + rollout_policy: Optional[PolicyModel] = None, + action_prior: Optional[ActionPrior] = None, + show_progress: bool = False, + pbar_update_interval: int = 5 + ): + super(CCPOMCP, self).__init__( + max_depth=max_depth, + planning_time=planning_time, + num_sims=num_sims, + discount_factor=discount_factor, + exploration_const=exploration_const, + num_visits_init=num_visits_init, + value_init=value_init, + rollout_policy=rollout_policy, + action_prior=action_prior, + show_progress=show_progress, + pbar_update_interval=pbar_update_interval + ) + # Sanity checks and set the parameters. + if not isinstance(r_diff, float): + raise TypeError(f"r_diff must be type float, but got {type(r_diff)}.") + if r_diff < 0.0: + raise ValueError("r_diff must be a non-negative float.") + if not isinstance(tau, float): + raise TypeError(f"tau must be type float, but got {type(tau)}.") + if not isinstance(alpha_n, float): + raise TypeError(f"alpha_n must be type float, but got {type(alpha_n)}.") + if alpha_n < 0.0 or 1.0 < alpha_n: + raise ValueError("alpha_n must be in range [0.0, 1.0].") + if not isinstance(cost_constraint, (list, float)): + raise TypeError( + "cost_constraint must be a Vector or float " + f"but got type {type(cost_constraint)}." + ) + if not isinstance(clip_lambda, bool): + raise TypeError( + f"clip_lambda must be a Boolean, but got type {type(clip_lambda)}." + ) + if not isinstance(nu, float): + raise TypeError(f"nu must be type float, but got {type(nu)}.") + if not isinstance(use_random_lambda, bool): + raise TypeError( + "use_random_lambda must be type bool, " + f"but got {type(use_random_lambda)}." + ) + + if cost_value_init is not None: + if not isinstance(cost_value_init, (list, float)): + raise TypeError( + "cost_value_init must be type Vector or float, " + f"but got {type(cost_value_init)}." + ) + if type(cost_value_init) != type(cost_constraint): + raise TypeError( + "cost_value_init and cost_constraint must be the same type." + ) + + # Initialize lambda, cost constraint, and cost value init. + if isinstance(cost_constraint, list): + self._n_constraints = len(cost_constraint) + if len(cost_value_init) != len(cost_value_init): + raise ValueError( + "The cost constraint and cost value init must have the same length." + ) + else: + self._n_constraints = 1 + cost_constraint = [cost_constraint] + cost_value_init = [cost_value_init] if cost_value_init is not None else [0.0] + + self._lambda = Vector.null(self._n_constraints) + self._cost_value_init = list(cost_value_init) + self._cost_constraint = Vector(cost_constraint) + self._r_diff = r_diff + self._tau = tau + self._alpha_n = alpha_n + self._clip_lambda = clip_lambda + self._nu = nu + self._use_random_lambda = use_random_lambda + + cpdef public Action plan(CCPOMCP self, Agent agent): + cdef Action action + cdef _CCPolicyModel policy_dist + cdef double time_taken + cdef int sims_count + + if not isinstance(agent.belief, Particles): + raise TypeError( + "Agent's belief is not represented in particles. " + "CCPOMCP not usable. Please convert it to particles." + ) + + if self._rollout_policy is None: + raise ValueError( + "rollout_policy unset. Please call set_rollout_policy, " + "or pass in a rollout_policy upon initialization." + ) + + if not isinstance(agent, ResponseAgent): + raise TypeError( + f"agent must be type ResponseAgent, but got type {type(agent)}." + ) + + # Set the current agent being used for planning. + self._agent = agent + self._null_response = self._agent.response_model.null_response() + if not hasattr(self._agent, "tree"): + self._agent.add_attr("tree", None) + + # Then get the policy distribution, sample from it, + # and update the cost constraint. + policy_dist, time_taken, sims_count = self._search() + action = policy_dist.sample(state=None) + self._update_cost_constraint(policy_dist, action) + + # Update stats. + self._last_num_sims = sims_count + self._last_planning_time = time_taken + + return action + + cpdef _expand_vnode( + CCPOMCP self, + VNode vnode, + tuple history, + State state = None, + ): + cdef Action action + + for action in self._agent.valid_actions(state=state, history=history): + if vnode[action] is None: + vnode[action] = CCQNode( + self._num_visits_init, self._value_init, self._cost_value_init + ) + + if self._action_prior is not None: + # Using action prior; special values are set; + for preference in self._action_prior.get_preferred_actions(state, history): + action, num_visits_init, value_init = preference + vnode[action] = CCQNode( + self._num_visits_init, self._value_init, self._cost_value_init + ) + + @cython.boundscheck(False) + cpdef _CCPolicyModel _greedy_policy( + CCPOMCP self, + VNode vnode, + double explore_const, + double nu, + ): + cdef list[Action] action_list = list(vnode.children.keys()) + cdef int n_actions = len(action_list) + + if n_actions == 0: + raise RuntimeError("The vnode has no visited actions?!") + + # Compute Q_lambda. + cdef double n_ccqnode_visits + cdef double best_q_lambda = DBL_MIN + cdef int best_q_index = 0 + cdef double logN = log( vnode.num_visits + 1) + cdef double q_value + cdef Vector + cdef CCQNode ccqnode + cdef Action action + cdef int i = 0 + + Q_lambda = Vector.null(n_actions) + Action_UCB = Vector.null(n_actions) + + for i in range(n_actions): + ccqnode = vnode[action_list[i]] + q_value = ccqnode.value - self._lambda.dot(ccqnode.cost_value) + + if ccqnode.num_visits > 0: + n_ccqnode_visits = ccqnode.num_visits + 1.0 + q_value += _compute_visits_ratio( + logN, + n_ccqnode_visits, + explore_const + ) + Action_UCB[i] = _compute_visits_ratio( + log(n_ccqnode_visits), + n_ccqnode_visits + ) + + if q_value > best_q_lambda: + best_q_lambda = q_value + best_q_index = i + + Q_lambda[i] = q_value + + # Compute a*, the best action(s). + cdef list[Action] best_action_list = list() + cdef double best_ucb_add = Action_UCB[best_q_index] + cdef double ucb_add, q_value_diff + cdef bint add_to_best_action_list = False + + q_value = 0.0 + best_q_lambda = Q_lambda[best_q_index] + + for i in range(n_actions): + action = action_list[i] + ccqnode = vnode[action] + q_value = Q_lambda[i] + + if q_value == best_q_lambda: + add_to_best_action_list = True + + else: + q_value_diff = abs(q_value - best_q_lambda) + ucb_add = nu * (Action_UCB[i] + best_ucb_add) + if q_value_diff <= ucb_add and action not in best_action_list: + add_to_best_action_list = True + + if add_to_best_action_list: + best_action_list.append(action) + + # Find the policy. + cdef int n_best_actions = len(best_action_list) + cdef Action action_min, action_max + cdef CCQNode ccqnode_min, ccqnode_max + cdef double cost_constraint_scalar = self._cost_constraint[0] + cdef double max_cost_value, min_cost_value, min_prob, cost_value + cdef dict[Action, _CCPolicyActionData] data + + if n_best_actions == 0: + raise RuntimeError("No best actions were found?!") + + elif n_best_actions == 1: + action = best_action_list[0] + data = { + action: _CCPolicyActionData( + 1.0, + vnode[action].cost_value, + vnode[action].avg_cost_value + ) + } + + else: + # TODO: Implement linear programming to handle multiple constraints. + # The code below can only handle ONE constraint. + if self._cost_constraint.len() > 1: + raise NotImplementedError( + f"This algorithm can only handle one constraint for now." + ) + # if self._lambda[0] <= 0.0: + # raise RuntimeError( + # "The scalar lambda must be positive to continue. " + # "See the Appendix G in the Supplementary Materials for the paper " + # "titled 'Monte-Carlo Tree Search for Constrained POMDPs' " + # "by Lee et. al (2018)." + # ) + + # Find a_max and a_min, the actions with the max and min scalar costs + # from the list of best actions. + max_cost_value = DBL_MIN + min_cost_value = DBL_MAX + + for i in range(n_best_actions): + action = best_action_list[i] + cost_value = _get_ccqnode_scalar_cost(vnode, action) + + if cost_value < min_cost_value: + action_min = action + min_cost_value = cost_value + + if cost_value > max_cost_value: + action_max = action + max_cost_value = cost_value + + # Sanity checks. + if max_cost_value == DBL_MIN: + raise RuntimeError( + f"Max cost value ({max_cost_value}) must be more than {DBL_MIN}. " + f"Note: there are {n_best_actions} best actions. An error exists!" + ) + if min_cost_value == DBL_MAX: + raise RuntimeError( + f"Min cost value ({min_cost_value}) must be less than {DBL_MAX}. " + f"Note: there are {n_best_actions} best actions. An error exists!" + ) + + if max_cost_value <= cost_constraint_scalar or action_min == action_max: + data = { + action_max: _CCPolicyActionData( + 1.0, + vnode[action_max].cost_value, + vnode[action_max].avg_cost_value + ) + } + + elif min_cost_value <= cost_constraint_scalar: + data = { + action_min: _CCPolicyActionData( + 1.0, + vnode[action_min].cost_value, + vnode[action_min].avg_cost_value + ) + } + + else: + min_prob = ( + (max_cost_value - cost_constraint_scalar) + / (max_cost_value - min_cost_value) + ) + + data = { + action_min: _CCPolicyActionData( + min_prob, + vnode[action_min].cost_value, + vnode[action_min].avg_cost_value + ), + action_max: _CCPolicyActionData( + 1.0 - min_prob, + vnode[action_max].cost_value, + vnode[action_max].avg_cost_value + ), + } + + return _CCPolicyModel(data) + + cdef void _init_lambda_fn(CCPOMCP self): + if self._use_random_lambda: + self._lambda = Vector( + np.random.uniform( + 0.00001, + 1.0, + size=self._cost_constraint.len() + ).tolist() + ) + + else: + self._lambda = Vector.null(self._n_constraints) + + cpdef _perform_simulation(self, state): + super(CCPOMCP, self)._perform_simulation(state=state) + + # Sample using the greedy policy. This greedy policy corresponds to the first + # call in the search(h_0) function. + policy_dist = self._greedy_policy( + self._agent.tree, + 0.0, + 0.0, + ) + action = policy_dist.sample(state=state) + + # Update lambda. + self._lambda = self._lambda + self._alpha_n * ( + self._agent.tree[action].cost_value - self._cost_constraint + ) + if self._clip_lambda: + lambda_vec_max = self._r_diff / ( + self._tau * (1.0 - self._discount_factor) + ) + self._lambda = Vector.clip(self._lambda, 0.0, lambda_vec_max) + + cpdef _rollout(self, State state, tuple history, VNode root, int depth): + cdef Action action + cdef float discount = 1.0 + cdef State next_state + cdef Observation observation + cdef Response response, total_discounted_response + cdef int nsteps + + total_discounted_response = self._null_response + while depth < self._max_depth: + action = self._rollout_policy.rollout(state, history) + next_state, observation, response, nsteps = ( + sample_generative_model_with_response( + self._agent.transition_model, + self._agent.observation_model, + self._agent.response_model, + state, + action, + self._null_response, + ) + ) + history = history + ((action, observation),) + depth += nsteps + total_discounted_response = ( + total_discounted_response + (response * discount) + ) + discount *= (self._discount_factor ** nsteps) + state = next_state + return total_discounted_response + + cpdef _search(CCPOMCP self): + cdef Action action + cdef double time_taken + cdef int sims_count + cdef PolicyModel policy_dist + + # Initialize the lambda vector. + self._init_lambda_fn() + + # Run the _search(...) method in the super class. + action, time_taken, sims_count = super(CCPOMCP, self)._search() + + # After the search times out, create a policy using the greedy method. + # This greedy policy corresponds to the last call in the search(h_0) function. + policy_dist = self._greedy_policy( + self._agent.tree, + 0.0, + self._nu, + ) + return policy_dist, time_taken, sims_count + + cpdef Response _simulate( + CCPOMCP self, + State state, + tuple history, + VNode root, + QNode parent, + Observation observation, + int depth + ): + cdef Response response, total_response + cdef int nsteps = 1 + cdef Action action + cdef State next_state + cdef _CCPolicyModel policy_dist + + if depth > self._max_depth: + return self._null_response + + if root is None: + if self._agent.tree is None: + root = self._VNode(root=True) + self._agent.tree = root + if self._agent.tree.history != self._agent.history: + raise ValueError("Unable to plan for the given history.") + + else: + root = self._VNode() + + if parent is not None: + parent[observation] = root + + self._expand_vnode(root, history, state=state) + response = self._rollout(state, history, root, depth) + return response + + # This greedy policy corresponds to the call in the simulate(s, h, d) function + # in the paper. + policy_dist = self._greedy_policy( + root, + self._exploration_const, + self._nu + ) + action = policy_dist.sample(state=state) + next_state, observation, response, nsteps = ( + sample_generative_model_with_response( + self._agent.transition_model, + self._agent.observation_model, + self._agent.response_model, + state, + action, + self._null_response, + ) + ) + + if nsteps == 0: + return response + + total_response = ( + response + + (self._discount_factor ** nsteps) + * self._simulate( + next_state, + history + ((action, observation),), + root[action][observation], + root[action], + observation, + depth + nsteps + ) + ) + + root.num_visits += 1 + root[action].num_visits += 1 + root[action].value = ( + root[action].value + + (total_response.reward - root[action].value) / root[action].num_visits + ) + + root[action].cost_value = ( + root[action].cost_value + + (total_response.cost - root[action].cost_value) / root[action].num_visits + ) + + root[action].avg_cost_value = ( + root[action].avg_cost_value + + (response.cost - root[action].avg_cost_value) / root[action].num_visits + ) + + if depth == 1 and root is not None: + root.belief.add(state) + + return total_response + + cdef void _update_cost_constraint( + CCPOMCP self, + _CCPolicyModel policy_dist, + Action sampled_action + ): + cdef double action_prob, prob_prime + cdef Vector chat_minus_avg_cost, action_avg_cost, cost_value, cost_sum + cdef Action action_prime + cdef list[Action] action_prime_list + cdef int i = 0 + + action_prob = policy_dist.probability( + action=sampled_action, + state=None + ) + action_avg_cost = policy_dist.action_avg_cost(sampled_action) + self._cost_constraint -= (action_prob * action_avg_cost) + + if action_prob < 1.0: + cost_sum = Vector.null(self._n_constraints) + action_prime_list = policy_dist.get_all_actions() + for i in range(len(action_prime_list)): + action_prime = action_prime_list[i] + if action_prime == sampled_action: + continue + + prob_prime = policy_dist.probability( + action=action_prime, + state=self._agent.history + ) + cost_value = policy_dist.action_cost_value(sampled_action) + cost_sum[i] += (prob_prime * cost_value) + self._cost_constraint -= cost_sum + self._cost_constraint /= (self._discount_factor * action_prob) + + +cdef double _compute_visits_ratio( + double visits_num, + double visits_denom, + double explore_const = 1.0, +): + if visits_denom == 0.0: + return DBL_MIN + else: + return explore_const * sqrt(visits_num / visits_denom) + + +cdef double _get_ccqnode_scalar_cost( + VNode node, + Action action +): + if action not in node: + raise KeyError(f"Action {action} does not exist in node.") + return node[action].cost_value[0] diff --git a/pomdp_py/algorithms/pomcp.pxd b/pomdp_py/algorithms/pomcp.pxd index b3d48fe..e0d1ff8 100644 --- a/pomdp_py/algorithms/pomcp.pxd +++ b/pomdp_py/algorithms/pomcp.pxd @@ -1,7 +1,10 @@ -from pomdp_py.algorithms.po_uct cimport VNode, RootVNode +from pomdp_py.algorithms.po_uct cimport VNode, RootVNode, POUCT from pomdp_py.representations.distribution.particles cimport Particles cdef class VNodeParticles(VNode): cdef public Particles belief cdef class RootVNodeParticles(RootVNode): cdef public Particles belief + +cdef class POMCP(POUCT): + pass diff --git a/pomdp_py/framework/generalization.pxd b/pomdp_py/framework/generalization.pxd new file mode 100644 index 0000000..d81bef4 --- /dev/null +++ b/pomdp_py/framework/generalization.pxd @@ -0,0 +1,32 @@ +from __future__ import annotations +from pomdp_py.framework.basics cimport ( + Agent, + Environment, + Observation, + State, + Action, + TransitionModel, + ObservationModel +) + + +cdef class Response: + pass + + +cdef class ResponseModel: + cdef Response _null_response + + +cdef class ResponseAgent(Agent): + cdef ResponseModel _response_model + + +cdef class ResponseEnvironment(Environment): + cdef ResponseModel _response_model + + +cpdef tuple[State, Observation, Response, int] sample_generative_model_with_response( + TransitionModel T, ObservationModel O, ResponseModel R, State state, Action action, + Response null_response, float discount_factor = * +) diff --git a/pomdp_py/framework/generalization.pyx b/pomdp_py/framework/generalization.pyx index 04c3e1a..705bff8 100644 --- a/pomdp_py/framework/generalization.pyx +++ b/pomdp_py/framework/generalization.pyx @@ -1,146 +1,307 @@ -from __future__ import annotations -from functools import cached_property -from typing import Iterable, Iterator, Union - - -cdef class Vector: - """ - The Vector class. Provides an implementation of a vector for maintaining multiple values. - """ - cdef list[float] vals - - def __init__(self, values: float | int | Iterable[float | int] = list([0.])): - cdef list _vec = list() - if isinstance(values, (float, int)): - _vec.append(values) - elif isinstance(values, (list, tuple)): - _vec += list(values) - else: - raise TypeError(f"values must be type int, float, list, or tuple, but got {type(values)}.") - - # Store the values as an array of floats. - self.vals = list(float(v) for v in _vec) - - @cached_property - def values(self) -> list[float]: - return self.vals.copy() - - def __iter__(self) -> Iterator: - return iter(self.vals) - - def __len__(self) -> int: - return len(self.vals) - - def __eq__(self, other: Vector | list) -> bool: - if not isinstance(other, (Vector, list)): - raise TypeError(f"other must be type Vector or list, but got {type(other)}.") - return len(self) == len(other) and all(v0 == v1 for v0, v1 in zip(self, other)) - - def __add__(self, other: Vector | list | float | int) -> Vector: - if isinstance(other, (float, int)): - vec = [other] * len(self) - elif isinstance(other, Vector): - vec = other - else: - raise TypeError(f"other must be type Vector, float, or int, but got {type(other)}.") - return Vector([v0 + v1 for v0, v1 in zip(self, vec)]) - - def __radd__(self, other): - return self.__add__(other) - - def __mul__(self, other): - if not isinstance(other, (float, int)): - raise TypeError(f"other must be type float or int, but got {type(other)}.") - return Vector([v * other for v in self]) - - def __rmul__(self, other): - return self.__mul__(other) - - def __str__(self) -> str: - return str(self.vals) - - -ResponseVariableType = Union[float, Vector] +# cython: profile=True - -cdef class GenericResponse: +from __future__ import annotations +from pomdp_py.framework.basics cimport ( + Agent, + GenerativeDistribution, + PolicyModel, + TransitionModel, + ObservationModel, + BlackboxModel, + Action, + Observation, + State, + Environment, + Option +) +from typing import Optional + + +cdef class Response: """ - A GenericResponse class maintains variables within a dictionary. However, subclasses of GenericResponse - can provide access to the dictionary variables using the dot (.) operator. Currently, this class can - handle arithmetic and comparison operations. However, if special operations will need to be performed, + A Response class maintains variables within a dictionary. + However, subclasses of Response can provide access to the + dictionary variables using the dot (.) operator. Currently, + this class can handle arithmetic and comparison operations. + However, if special operations will need to be performed, these operations need to be handled in the subclass. """ - cdef dict __dict__ - def __init__(self, dict response_dict = dict()): - self.__dict__.update(response_dict.copy()) + def copy(self) -> Response: + raise NotImplementedError - cpdef void _check_reward_compatibility(self, value): - if not isinstance(value, GenericResponse): - raise TypeError(f"other must be type GenericResponse, float, or int, but got {type(value)}.") + @staticmethod + def null() -> Response: + raise NotImplementedError - cdef dict add_response(self, GenericResponse other): - self._check_reward_compatibility(other) - cdef dict rv = dict() - for name, value in self.__dict__.items(): - rv.update({name: value + other.__dict__[name]}) - return rv + def __add__(self, other: Response) -> Response: + raise NotImplementedError - def __add__(self, other: GenericResponse) -> GenericResponse: - return GenericResponse(self.add_response(other)) - - def __radd__(self, other: GenericResponse) -> GenericResponse: + def __radd__(self, other: Response) -> Response: return self.__add__(other) - cpdef dict mul_scalar(self, float other): - if not isinstance(other, float): - raise TypeError("other must be type float or int.") - cdef dict rv = dict() - for name, value in self.__dict__.items(): - rv.update({name: value * other}) - return rv - - def __mul__(self, other: float | int) -> GenericResponse: - return GenericResponse(self.mul_scalar(other)) + def __mul__(self, other: float | int) -> Response: + raise NotImplementedError - def __rmul__(self, other) -> GenericResponse: + def __rmul__(self, other: float | int) -> Response: return self.__mul__(other) - def __eq__(self, other: GenericResponse) -> bool: - self._check_reward_compatibility(other) - return all(value == other.__dict__[name] for name, value in self.__dict__.items()) + def __eq__(self, other: Response) -> bool: + raise NotImplementedError - def __ne__(self, other) -> bool: - self._check_reward_compatibility(other) - return all(value != other.__dict__[name] for name, value in self.__dict__.items()) + def __ne__(self, other: Response) -> bool: + raise NotImplementedError - def __lt__(self, other) -> bool: - self._check_reward_compatibility(other) - return all(value < other.__dict__[name] for name, value in self.__dict__.items()) + def __lt__(self, other: Response) -> bool: + raise NotImplementedError - def __le__(self, other) -> bool: - self._check_reward_compatibility(other) - return all(value <= other.__dict__[name] for name, value in self.__dict__.items()) + def __le__(self, other: Response) -> bool: + raise NotImplementedError - def __gt__(self, other) -> bool: - self._check_reward_compatibility(other) - return all(value > other.__dict__[name] for name, value in self.__dict__.items()) + def __gt__(self, other: Response) -> bool: + raise NotImplementedError - def __ge__(self, other) -> bool: - self._check_reward_compatibility(other) - return all(value >= other.__dict__[name] for name, value in self.__dict__.items()) + def __ge__(self, other: Response) -> bool: + raise NotImplementedError def __str__(self) -> str: - return ", ".join([f"{name}={values}" for name, values in self.__dict__.items()]) + raise NotImplementedError -cdef class RewardCost(GenericResponse): +cdef class ResponseModel: + """ + A ResponseModel returns a real or simulated response after the agent interacts with + the real or a simulated environment. The implementation of this model contains a + collection of more specific models such as reward and cost models. + """ + + def __init__(self, null_response: Response) -> None: + if not isinstance(null_response, Response): + raise TypeError( + "null_response must be type Response, " + f"but got {type(null_response)}." + ) + self._null_response = null_response.copy() + + def null_response(self) -> Response: + return self._null_response.copy() + + def sample(self, state: State, action: Action, next_state: State) -> Response: + raise NotImplementedError - def __init__(self, float reward=0.0, Vector cost=Vector()): - super().__init__({"reward": reward, "cost": cost}) - def __add__(self, other: RewardCost) -> RewardCost: - return RewardCost(**self.add_response(other)) +cdef class ResponseAgent(Agent): + """ + A `ResponseAgent` behaves the same as an `Agent` with one difference: a + `ReponseAgent` adds a `ResponseModel`. The `ResponseAgent` also provides direct + access to the models maintained in the `ResponseModel` to reduce the wordiness of + the code. + """ - def __mul__(self, other: float) -> RewardCost: - return RewardCost(**self.mul_scalar(other)) + def __init__( + self, + init_belief: GenerativeDistribution, + policy_model: Optional[PolicyModel] = None, + transition_model: Optional[TransitionModel] = None, + observation_model: Optional[ObservationModel] = None, + response_model: Optional[ResponseModel] = None, + blackbox_model: Optional[BlackboxModel] = None, + name: Optional[str] = None + ): + super().__init__( + init_belief=init_belief, + policy_model=policy_model, + transition_model=transition_model, + observation_model=observation_model, + reward_model=None, + blackbox_model=blackbox_model, + ) + + if ( + not isinstance(response_model, ResponseModel) + and response_model is not None + ): + raise TypeError( + "response_model must be type ResponseModel, " + f"but got type {type(response_model)}." + ) + self._response_model = None + if response_model is not None: + self.set_response_model(response_model) + + @property + def reward_model(self): + raise AttributeError( + "Use the response_model property to access the reward model." + ) + + @property + def response_model(self) -> ResponseModel: + if self._response_model is None: + raise ValueError( + "response_model is None. Call set_response_model to set a model." + ) + return self._response_model + + def set_response_model(self, response_model: ResponseModel) -> None: + if not isinstance(response_model, ResponseModel): + raise TypeError( + f"model must be type ResponseModel, but got type {type(response_model)}." + ) + self._response_model = response_model + + +cdef class ResponseEnvironment(Environment): + + def __init__( + self, + init_state: State, + transition_model: Optional[TransitionModel] = None, + response_model: Optional[ResponseModel] = None, + blackbox_model: Optional[BlackboxModel] = None + ) -> None: + super().__init__( + init_state=init_state, + transition_model=transition_model, + reward_model=None, + blackbox_model=blackbox_model, + ) + if response_model is not None and blackbox_model is not None: + raise ValueError( + "Cannot specify a response and blackbox model at the same time." + ) + self._response_model = response_model + + @property + def reward_model(self): + raise AttributeError( + "Use the response_model property to access the reward model." + ) + + @property + def response_model(self) -> ResponseModel: + """ + Returns: + The ResponseModel. + """ + return self._response_model + + def set_models( + self, + transition_model: Optional[TransitionModel] = None, + response_model: Optional[ResponseModel] = None, + blackbox_model: Optional[BlackboxModel] = None, + ) -> None: + """ + Reassigns the models to be the ones given. + + Args: + transition_model (TransitionModel): The transition model. + response_model (ResponseModel): The response model. + blackbox_model (BlackboxModel): Provided when the transition model and + response model are not available. + + Returns: + None + """ + super().set_models( + transition_model=transition_model, + reward_model=None, + blackbox_model=blackbox_model, + ) + if response_model is not None and blackbox_model is not None: + raise ValueError( + "Cannot specify a response and blackbox model at the same time." + ) + self._response_model = response_model + + def state_transition( + self, + action: Action, + execute: bool = True, + discount_factor: float = 1.0 + ) -> Response | tuple[State, Response]: + """ + Simulates a state transition given `action`. If `execute` is set to True, + then the resulting state will be the new current state of the environment. + + Args: + action (Action): action that triggers the state transition. + execute (bool): If True, the resulting state of the transition will become + the current state. + discount_factor (float): Only necessary if action is an Option. It is the + discount factor when executing actions following an option's policy + until reaching terminal condition. + + Returns: + Response or tuple[State, Response]: reward as a result of `action` and state + transition, if `execute` is True (next_state, reward) if `execute` is False. + """ + next_state, response, _ = sample_generative_model_with_response( + T=self.transition_model, + O=None, + R=self.response_model, + state=self.state, + action=action, + null_response=self.response_model.null_response(), + discount_factor=discount_factor + ) + + if execute: + self.apply_transition(next_state) + return response + else: + return next_state, response + + +cpdef tuple[State, Observation, Response, int] sample_generative_model_with_response( + TransitionModel T, + ObservationModel O, + ResponseModel R, + State state, + Action action, + Response null_response, + float discount_factor = 1.0 +): + cdef State next_state + cdef Observation observation + cdef Response response = null_response.copy() + cdef Option option + cdef int nsteps = 0 + + if isinstance(action, Option): + # The action is an option; simulate a rollout of the option + option = action + if not option.initiation(state): + # state is not in the initiation set of the option. This is + # similar to the case when you are in a particular (e.g. terminal) + # state and certain action cannot be performed, which will still + # be added to the PO-MCTS tree because some other state with the + # same history will allow this action. In this case, that certain + # action will lead to no state change, no observation, and 0 reward, + # because nothing happened. + if O is not None: + return state, None, 0, 0 + else: + return state, 0, 0 + + step_discount_factor = 1.0 + while not option.termination(state): + action = option.sample(state) + next_state = T.sample(state, action) + # For now, we don't care about intermediate observations (future work?). + response += step_discount_factor * R.sample(state, action, next_state) + step_discount_factor *= discount_factor + state = next_state + nsteps += 1 + # sample observation at the end, where action is the last action. + # (doesn't quite make sense to just use option as the action at this point.) + else: + next_state = T.sample(state, action) + response = R.sample(state, action, next_state) + nsteps += 1 + if O is not None: + observation = O.sample(next_state, action) + return next_state, observation, response, nsteps + else: + return next_state, response, nsteps diff --git a/pomdp_py/problems/rocksample/rocksample_problem.py b/pomdp_py/problems/rocksample/rocksample_problem.py index 2980af5..f1b7671 100644 --- a/pomdp_py/problems/rocksample/rocksample_problem.py +++ b/pomdp_py/problems/rocksample/rocksample_problem.py @@ -330,6 +330,7 @@ class RSPolicyModel(pomdp_py.RolloutPolicy): def __init__(self, n, k): check_actions = set({CheckAction(rock_id) for rock_id in range(k)}) + print(check_actions) self._move_actions = {MoveEast, MoveWest, MoveNorth, MoveSouth} self._other_actions = {SampleAction()} | check_actions self._all_actions = self._move_actions | self._other_actions @@ -430,22 +431,28 @@ def print_state(self): string += "\n" print(string) - def __init__( - self, n, k, init_state, rock_locs, init_belief, half_efficiency_dist=20 - ): - self._n, self._k = n, k - agent = pomdp_py.Agent( + def build_agent(self, n, k, rock_locs, init_belief, half_efficiency_dist): + return pomdp_py.Agent( init_belief, RSPolicyModel(n, k), RSTransitionModel(n, rock_locs, self.in_exit_area), RSObservationModel(rock_locs, half_efficiency_dist=half_efficiency_dist), RSRewardModel(rock_locs, self.in_exit_area), ) - env = pomdp_py.Environment( + + def build_env(self, n, init_state, rock_locs): + return pomdp_py.Environment( init_state, RSTransitionModel(n, rock_locs, self.in_exit_area), RSRewardModel(rock_locs, self.in_exit_area), ) + + def __init__( + self, n, k, init_state, rock_locs, init_belief, half_efficiency_dist=20 + ): + self._n, self._k = n, k + agent = self.build_agent(n, k, rock_locs, init_belief, half_efficiency_dist) + env = self.build_env(n, init_state, rock_locs) self._rock_locs = rock_locs super().__init__(agent, env, name="RockSampleProblem") @@ -537,7 +544,7 @@ def create_instance(n, k, **kwargs): def main(): - rocksample = debug_instance() # create_instance(7, 8) + rocksample = create_instance(7, 8) rocksample.print_state() print("*** Testing POMCP ***") diff --git a/pomdp_py/utils/cvec.pxd b/pomdp_py/utils/cvec.pxd new file mode 100644 index 0000000..e85eb8e --- /dev/null +++ b/pomdp_py/utils/cvec.pxd @@ -0,0 +1,38 @@ +# cython: language_level=3 + +from __future__ import annotations +from libcpp.vector cimport vector + +ctypedef vector[double] vectord_t + + +cdef vectord_t null_vector(unsigned int n_zeros) except * +cpdef vectord_t list_to_vectord(list[float] values) +cpdef list[float] vectord_to_list(vectord_t values) + +cdef double vector_dot_prod(const vectord_t& v0, const vectord_t& v1) except * +cdef void vector_add(const vectord_t& v0, const vectord_t& v1, vectord_t& res) except * +cdef void vector_adds(const vectord_t& v, const double& scalar, vectord_t& res) except * +cdef void vector_muls(const vectord_t& v, const double& scalar, vectord_t& res) except * +cdef void vector_sub(const vectord_t& v0, const vectord_t& v1, vectord_t& res) except * +cdef void vector_subvs(const vectord_t& v, const double& scalar, vectord_t& res) except * +cdef void vector_subsv(const double& scalar, const vectord_t& v, vectord_t& res) except * +cdef void vector_scalar_div(const vectord_t& v, const double& scalar, vectord_t& res) except * + +cdef double vector_max(const vectord_t& v) except * +cdef double vector_min(const vectord_t& v) except * +cdef void vector_clip(vectord_t& v, const double& min_value, const double& max_value) except * +cdef void vector_copy(const vectord_t& src, vectord_t& dst) except * + + +cdef class Vector: + cdef vectord_t _vals + cdef vectord_t _res_buff + cdef int _length + + cdef bint _is_in_range(Vector self, int index) + cpdef Vector copy(Vector self) + cpdef double dot(Vector self, Vector other) + cpdef int len(Vector self) + cdef double max(Vector self) + cdef double min(Vector self) diff --git a/pomdp_py/utils/cvec.pyx b/pomdp_py/utils/cvec.pyx new file mode 100644 index 0000000..5464232 --- /dev/null +++ b/pomdp_py/utils/cvec.pyx @@ -0,0 +1,352 @@ +# cython: profile=True + +from __future__ import annotations +cimport cython +from libc.math cimport fmin, fmax +from typing import Iterator + + +cdef vectord_t null_vector(unsigned int n_zeros) except *: + cdef vectord_t vec + vec.assign(n_zeros, 0.0) + return vec + + +@cython.boundscheck(False) +cpdef vectord_t list_to_vectord(list[float] values): + cdef int length = len(values) + cdef unsigned int i = 0 + cdef vectord_t rv = vectord_t(length) + + if length > 0: + for i in range(length): + rv[i] = values[i] + return rv + + +@cython.boundscheck(False) +cpdef list[float] vectord_to_list(vectord_t values): + cdef int length = len(values) + cdef unsigned int i = 0 + cdef list[float] rv = list() + + if length > 0: + for i in range(length): + rv.append(float(values[i])) + return rv + + +@cython.boundscheck(False) +cdef double vector_dot_prod(const vectord_t& v0, const vectord_t& v1) except *: + if v0.size() != v1.size(): + raise ValueError("Both vectors must have the same size.") + if v0.size() == 0: + raise ValueError("Vectors should contain at least one value.") + + cdef unsigned int i = 0 + cdef double res = 0.0 + for i in range(v0.size()): + res += (v0[i] * v1[i]) + return res + + +@cython.boundscheck(False) +cdef void vector_add(const vectord_t& v0, const vectord_t& v1, vectord_t& res) except *: + if v0.size() != v1.size(): + raise ValueError("Both vectors must have the same size.") + if v0.size() == 0: + raise ValueError("Vectors should contain at least one value.") + + res = vectord_t(v0.size()) + cdef unsigned int i = 0 + for i in range(v0.size()): + res[i] = v0[i] + v1[i] + + +@cython.boundscheck(False) +cdef void vector_adds(const vectord_t& v, const double& scalar, vectord_t& res) except *: + if v.size() == 0: + raise ValueError("Vector should contain at least one value.") + + res = vectord_t(v.size()) + cdef unsigned int i = 0 + for i in range(v.size()): + res[i] = v[i] + scalar + + +@cython.boundscheck(False) +cdef void vector_muls(const vectord_t& v, const double& scalar, vectord_t& res) except *: + cdef int n_values = v.size() + if n_values == 0: + raise ValueError("Vector should contain at least one value.") + + res = vectord_t(n_values) + cdef unsigned int i = 0 + for i in range(n_values): + res[i] = v[i] * scalar + + +@cython.boundscheck(False) +cdef void vector_sub(const vectord_t& v0, const vectord_t& v1, vectord_t& res) except *: + if v0.size() != v1.size(): + raise ValueError("Both vectors must have the same size.") + if v0.size() == 0: + raise ValueError("Vectors should contain at least one value.") + + res = vectord_t(v0.size()) + cdef unsigned int i = 0 + for i in range(v0.size()): + res[i] = v0[i] - v1[i] + + +@cython.boundscheck(False) +cdef void vector_subvs(const vectord_t& v, const double& scalar, vectord_t& res) except *: + cdef int n_values = v.size() + if n_values == 0: + raise ValueError("Vector should contain at least one value.") + + res = vectord_t(n_values) + cdef unsigned int i = 0 + for i in range(n_values): + res[i] = v[i] - scalar + + +@cython.boundscheck(False) +cdef void vector_subsv(const double& scalar, const vectord_t& v, vectord_t& res) except *: + cdef int n_values = v.size() + if n_values == 0: + raise ValueError("Vector should contain at least one value.") + + res = vectord_t(n_values) + cdef unsigned int i = 0 + for i in range(n_values): + res[i] = scalar - v[i] + + +@cython.boundscheck(False) +cdef void vector_scalar_div(const vectord_t& v, const double& scalar, vectord_t& res) except *: + cdef int n_values = v.size() + if n_values == 0: + raise ValueError("Vector should contain at least one value.") + if scalar == 0.0: + raise ZeroDivisionError("Scalar division by zero!") + + res = vectord_t(n_values) + cdef unsigned int i = 0 + for i in range(n_values): + res[i] = v[i] / scalar + + +@cython.boundscheck(False) +cdef double vector_max(const vectord_t& v) except *: + cdef int n_values = v.size() + if n_values == 0: + raise ValueError("Vector should contain at least one value.") + if n_values == 1: + return v[0] + + cdef double max_value = v[0] + cdef int i = 0 + for i in range(1, n_values): + if v[i] > max_value: + max_value = v[i] + return max_value + + +@cython.boundscheck(False) +cdef double vector_min(const vectord_t& v) except *: + cdef int n_values = v.size() + if n_values == 0: + raise ValueError("Vector should contain at least one value.") + if n_values == 1: + return v[0] + + cdef double min_value = v[0] + cdef int i = 0 + for i in range(1, n_values): + if v[i] < min_value: + min_value = v[i] + return min_value + + +@cython.boundscheck(False) +cdef void vector_clip(vectord_t& v, const double& min_value, const double& max_value) except *: + cdef int n_values = v.size() + if n_values == 0: + raise ValueError("Vector should contain at least one value.") + if min_value >= max_value: + raise ValueError( + f"Min value ({min_value}) must be less than max value ({max_value})." + ) + cdef int i = 0 + for i in range(n_values): + v[i] = fmax(min_value, fmin(max_value, v[i])) + + +@cython.boundscheck(False) +cdef void vector_copy(const vectord_t& src, vectord_t& dst) except *: + cdef int n_values = src.size() + if n_values == 0: + raise ValueError("Vector should contain at least one value.") + dst = vectord_t(n_values) + cdef int i = 0 + for i in range(n_values): + dst[i] = src[i] + + +cdef class Vector: + """ + The Vector class. Provides an implementation of a vector for + maintaining multiple values. + """ + + def __init__(self, values: list | tuple): + if not isinstance(values, (list, tuple)): + raise TypeError(f"Unhandled type: {type(values)}.") + if len(values) == 0: + raise ValueError("The length of values must have at least one value.") + if not all(isinstance(v, (float, int)) for v in values): + raise ValueError("All values must be type float or int.") + + self._vals = list_to_vectord(values) + self._length = self._vals.size() + + cdef bint _is_in_range(Vector self, int index): + return 0 <= index < self._length + + def as_list(self) -> list[float]: + """ + Returns a list of the internal values. + """ + return vectord_to_list(self._vals) + + def as_vector(self) -> vectord_t: + cdef vectord_t copy + vector_copy(self._vals, copy) + return copy + + @staticmethod + def clip(vec: Vector, min_value: float, max_value: float) -> Vector: + """ + Clips the values within the value using the given min and max values. + """ + if not isinstance(vec, Vector): + raise TypeError("vec must be a Vector.") + cdef vectord_t rv = vec.as_vector() + vector_clip(rv, min_value, max_value) + return Vector(vectord_to_list(rv)) + + cpdef Vector copy(Vector self): + """ + Returns a copy of this vector. + """ + return Vector(self.as_list()) + + cpdef double dot(Vector self, Vector other): + """ + Performs the dot product between two Vectors. + """ + if not isinstance(other, Vector): + raise TypeError("other must be type Vector.") + return vector_dot_prod(self._vals, other._vals) + + @staticmethod + def fill(value: float, n_values: int) -> Vector: + return Vector([value] * n_values) + + cpdef int len(Vector self): + return self._length + + cdef double max(Vector self): + return vector_max(self._vals) + + cdef double min(Vector self): + return vector_min(self._vals) + + @staticmethod + def null(n_zeros: int) -> Vector: + return Vector.fill(0.0, n_zeros) + + def __getitem__(self, index: int) -> float: + index = int(index) + if not self._is_in_range(index): + raise IndexError(f"index is out-of-range.") + return self._vals[index] + + def __setitem__(self, index: int, value: float) -> None: + index = int(index) + if not self._is_in_range(index): + raise IndexError(f"index is out-of-range.") + if not isinstance(value, float): + raise TypeError(f"value must be type float, but got type {type(value)}.") + self._vals[index] = value + + def __iter__(self) -> Iterator: + return iter(self._vals) + + def __len__(self) -> int: + return self._length + + def __eq__(self, other: Vector | list | tuple) -> bool: + if not isinstance(other, (Vector, list, tuple)): + raise TypeError( + f"other must be type Vector, list, or tuple, but got {type(other)}." + ) + if self._length != len(other): + return False + return all(v0 == v1 for v0, v1 in zip(self, other)) + + def __add__(self, other: Vector | float | int) -> Vector: + if isinstance(other, (float, int)): + vector_adds(self._vals, other, self._res_buff) + elif isinstance(other, Vector): + vector_add(self._vals, other.as_vector(), self._res_buff) + else: + raise TypeError( + "other must be type Vector with the same length, " + f"float, or int, but got {type(other)}." + ) + return Vector(self._res_buff) + + def __radd__(self, other: Vector | float | int) -> Vector: + return self.__add__(other) + + def __mul__(self, other: float | int) -> Vector: + if not isinstance(other, (float, int)): + raise TypeError(f"other must be type float or int, but got {type(other)}.") + vector_muls(self._vals, other, self._res_buff) + return Vector(self._res_buff) + + def __rmul__(self, other: float | int) -> Vector: + return self.__mul__(other) + + def __sub__(self, other: Vector | float | int) -> Vector: + if isinstance(other, (float, int)): + vector_subvs(self._vals, other, self._res_buff) + elif isinstance(other, Vector): + vector_sub(self._vals, other.as_vector(), self._res_buff) + else: + raise TypeError( + "other must be type Vector with the same length, " + f"float, or int, but got {type(other)}." + ) + return Vector(self._res_buff) + + def __rsub__(self, other: Vector | float | int) -> Vector: + if isinstance(other, (float, int)): + vector_subsv(other, self._vals, self._res_buff) + elif isinstance(other, Vector): + vector_sub(other.as_vector(), self._vals, self._res_buff) + else: + raise TypeError( + "other must be type Vector with the same length, " + f"float, or int, but got {type(other)}." + ) + return Vector(self._res_buff) + + def __truediv__(self, other: float | int) -> Vector: + vector_scalar_div(self._vals, other, self._res_buff) + return Vector(self._res_buff) + + def __str__(self) -> str: + return str(vectord_to_list(self._vals)) diff --git a/setup.py b/setup.py index c519ea9..44f4d13 100644 --- a/setup.py +++ b/setup.py @@ -2,6 +2,7 @@ from setuptools import setup, Extension, find_packages from Cython.Build import cythonize +import numpy import os.path @@ -19,7 +20,7 @@ def build_extensions(pkg_name, major_submodules): filename = os.path.splitext(f)[0] ext_name = f"{pkg_name}.{subm}.{filename}" ext_path = os.path.join(pkg_name, subm.replace(".", "/"), f) - extensions.append(Extension(ext_name, [ext_path])) + extensions.append(Extension(ext_name, [ext_path], language="c++")) return extensions @@ -45,5 +46,6 @@ def build_extensions(pkg_name, major_submodules): package_data={ "pomdp_py": ["*.pxd", "*.pyx", "*.so", "*.c"], }, + include_dirs=[numpy.get_include()], zip_safe=False, ) diff --git a/tests/test_util_vector_ops.py b/tests/test_util_vector_ops.py new file mode 100644 index 0000000..8cbdac5 --- /dev/null +++ b/tests/test_util_vector_ops.py @@ -0,0 +1,151 @@ +from pomdp_py.utils.cvec import Vector + + +description = "testing utils cvec" + + +def test_assign(): + v = Vector([0]) + assert v == [0.] + + v = Vector([2, 4, 8]) + assert v == [2., 4., 8.] + + v = Vector([0]) + assert v != [1.] + + +def test_as_list(): + v = Vector([10., 3., 3.]) + assert v.as_list() == [10., 3., 3.] + + v = Vector([1., 5., 9., 11., 6.]) + assert v.as_list() == [1., 5., 9., 11., 6.] + + +def test_as_vector(): + v = Vector([1., 2., 3.]) + assert v.as_vector() == [1., 2., 3.] + + +def test_clip(): + v = Vector([2, 5, 7]) + assert Vector.clip(v, 0, 10) == [2., 5., 7.] + + v = Vector([2, 5, 7]) + assert Vector.clip(v, 0, 4) == [2., 4., 4.] + + v = Vector([2, 5, 7]) + assert Vector.clip(v, 4, 10) == [4., 5., 7.] + + v = Vector([2, 5, 7]) + assert Vector.clip(v, 3, 4) == [3., 4., 4.] + + +def test_copy(): + v = Vector([1., 2., 3.]) + assert v.copy() == [1., 2., 3.] + + +def test_dot(): + v0 = Vector([1., 3., 5., 7.]) + v1 = Vector([0., 13., 0., 10.]) + assert v0.dot(v1) == 109. + + +def test_fill(): + v0 = Vector.fill(10., 5) + assert v0 == [10., 10., 10., 10., 10.] + + v1 = Vector.fill(3., 2) + assert v1 == [3., 3.] + + +def test_len(): + v = Vector([1., 2.]) + assert v.len() == 2 + + v = Vector([5., 7., 2.]) + assert v.len() == 3 + + +def test_null(): + v = Vector.null(4) + assert v == [0., 0., 0., 0.] + + +def test_get_and_set_item(): + v = Vector.null(3) + v[0] = 1. + v[2] = 1999. + + assert v == [1., 0., 1999.] + assert v[0] == 1. + assert v[1] == 0. + assert v[2] == 1999. + + +def test_iter(): + v = Vector([1., 2., 4., 8.]) + for value0, value1 in zip(v, [1., 2., 4., 8.]): + assert value0 == value1 + + +def test_add(): + v0 = Vector([1, 2, 3]) + v1 = Vector([10, 22, 55]) + + assert v0 + 4. == [5., 6., 7.] + assert 4. + v0 == [5., 6., 7.] + assert v0 + v1 == [11., 24., 58.] + assert v1 + v0 == [11., 24., 58.] + + +def test_mul(): + v = Vector([9., 8.]) + assert v * 5. == [45., 40.] + assert v * 10. == [90., 80.] + + +def test_sub(): + v0 = Vector([1, 2, 3]) + v1 = Vector([10, 22, 55]) + + assert v0 - v1 == [-9., -20., -52.] + assert v1 - v0 == [9., 20., 52.] + assert v1 - 10. == [0., 12., 45.] + assert v0 - 0. == [1., 2., 3.] + + +def test_truediv(): + v = Vector([10., 20., 50.]) + assert v / 2. == [5., 10., 25.] + assert v / 20. == [0.5, 1.0, 2.5] + + +def test_str(): + v = Vector([2., 4.]) + assert str(v) == str([2., 4.]) + + +def run(): + test_assign() + test_as_list() + test_as_vector() + test_clip() + test_copy() + test_dot() + test_fill() + test_len() + test_null() + test_get_and_set_item() + + test_add() + test_mul() + test_sub() + test_truediv() + test_str() + + +if __name__ == "__main__": + run() \ No newline at end of file From 0571618cba3b49b149513fa3cd9b323791a6da99 Mon Sep 17 00:00:00 2001 From: Troi Williams <40696868+troiwill@users.noreply.github.com> Date: Tue, 16 Apr 2024 17:05:41 -0400 Subject: [PATCH 04/30] Fixed error. --- pomdp_py/algorithms/ccpomcp.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pomdp_py/algorithms/ccpomcp.pyx b/pomdp_py/algorithms/ccpomcp.pyx index a0fde23..5fa3b66 100644 --- a/pomdp_py/algorithms/ccpomcp.pyx +++ b/pomdp_py/algorithms/ccpomcp.pyx @@ -376,7 +376,7 @@ cdef class CCPOMCP(POMCP): cdef int best_q_index = 0 cdef double logN = log( vnode.num_visits + 1) cdef double q_value - cdef Vector + cdef Vector Q_lambda, Action_UCB cdef CCQNode ccqnode cdef Action action cdef int i = 0 From 954c404af64af9058dd7b6c9ccb0fcd80f6a7db5 Mon Sep 17 00:00:00 2001 From: Troi Williams <40696868+troiwill@users.noreply.github.com> Date: Wed, 17 Apr 2024 00:22:36 -0400 Subject: [PATCH 05/30] Updated code to improve speed. --- pomdp_py/algorithms/ccpomcp.pxd | 2 ++ pomdp_py/algorithms/ccpomcp.pyx | 54 ++++++++++++++++++++------------- pomdp_py/utils/cvec.pxd | 2 ++ pomdp_py/utils/cvec.pyx | 46 +++++++++++++++++++++++++--- 4 files changed, 79 insertions(+), 25 deletions(-) diff --git a/pomdp_py/algorithms/ccpomcp.pxd b/pomdp_py/algorithms/ccpomcp.pxd index 0c5235c..e945cb4 100644 --- a/pomdp_py/algorithms/ccpomcp.pxd +++ b/pomdp_py/algorithms/ccpomcp.pxd @@ -41,6 +41,8 @@ cdef class CCPOMCP(POMCP): cdef double _nu cdef list[float] _cost_value_init cdef unsigned int _n_constraints + # Buffers + cdef Vector _Q_lambda, _Action_UCB cpdef public Action plan(CCPOMCP self, Agent agent) cpdef _expand_vnode(self, VNode vnode, tuple history, State state = *) diff --git a/pomdp_py/algorithms/ccpomcp.pyx b/pomdp_py/algorithms/ccpomcp.pyx index a0fde23..b193968 100644 --- a/pomdp_py/algorithms/ccpomcp.pyx +++ b/pomdp_py/algorithms/ccpomcp.pyx @@ -104,7 +104,7 @@ cdef class CCQNode(QNode): @property def avg_cost_value(self) -> Vector: - return self._avg_cost_value.copy() + return self._avg_cost_value @avg_cost_value.setter def avg_cost_value(self, avg_cost_value: Vector) -> None: @@ -117,7 +117,7 @@ cdef class CCQNode(QNode): @property def cost_value(self) -> Vector: - return self._cost_value.copy() + return self._cost_value @cost_value.setter def cost_value(self, cost_value: Vector) -> None: @@ -138,8 +138,8 @@ cdef class CCQNode(QNode): cdef class _CCPolicyActionData: def __init__(self, double prob, Vector cost_value, Vector avg_cost_value): self._prob = prob - self._cost_value = cost_value.copy() - self._avg_cost_value = avg_cost_value.copy() + self._cost_value = cost_value + self._avg_cost_value = avg_cost_value @property def prob(self) -> float: @@ -173,12 +173,12 @@ cdef class _CCPolicyModel(PolicyModel): cpdef Vector action_avg_cost(_CCPolicyModel self, Action action): if action not in self._data: raise KeyError(f"The action {action} is not exist in this policy model.") - return self._data[action].cost_value.copy() + return self._data[action].cost_value cpdef Vector action_cost_value(_CCPolicyModel self, Action action): if action not in self._data: raise KeyError(f"The action {action} is not exist in this policy model.") - return self._data[action].avg_cost_value.copy() + return self._data[action].avg_cost_value cdef public float probability(_CCPolicyModel self, Action action, State state): if action not in self._data: @@ -294,6 +294,10 @@ cdef class CCPOMCP(POMCP): self._nu = nu self._use_random_lambda = use_random_lambda + # Initialize buffers. + self._Q_lambda = Vector() + self._Action_UCB = Vector() + cpdef public Action plan(CCPOMCP self, Agent agent): cdef Action action cdef _CCPolicyModel policy_dist @@ -358,6 +362,7 @@ cdef class CCPOMCP(POMCP): ) @cython.boundscheck(False) + @cython.wraparound(False) cpdef _CCPolicyModel _greedy_policy( CCPOMCP self, VNode vnode, @@ -376,13 +381,18 @@ cdef class CCPOMCP(POMCP): cdef int best_q_index = 0 cdef double logN = log( vnode.num_visits + 1) cdef double q_value - cdef Vector cdef CCQNode ccqnode cdef Action action cdef int i = 0 - Q_lambda = Vector.null(n_actions) - Action_UCB = Vector.null(n_actions) + if n_actions == 0: + raise RuntimeError("The number of actions is 0?") + + self._Q_lambda.resize(n_actions) + self._Q_lambda.zeros() + + self._Action_UCB.resize(n_actions) + self._Action_UCB.zeros() for i in range(n_actions): ccqnode = vnode[action_list[i]] @@ -395,7 +405,7 @@ cdef class CCPOMCP(POMCP): n_ccqnode_visits, explore_const ) - Action_UCB[i] = _compute_visits_ratio( + self._Action_UCB[i] = _compute_visits_ratio( log(n_ccqnode_visits), n_ccqnode_visits ) @@ -404,28 +414,28 @@ cdef class CCPOMCP(POMCP): best_q_lambda = q_value best_q_index = i - Q_lambda[i] = q_value + self._Q_lambda[i] = q_value # Compute a*, the best action(s). cdef list[Action] best_action_list = list() - cdef double best_ucb_add = Action_UCB[best_q_index] + cdef double best_ucb_add = self._Action_UCB[best_q_index] cdef double ucb_add, q_value_diff cdef bint add_to_best_action_list = False q_value = 0.0 - best_q_lambda = Q_lambda[best_q_index] + best_q_lambda = self._Q_lambda[best_q_index] for i in range(n_actions): action = action_list[i] ccqnode = vnode[action] - q_value = Q_lambda[i] + q_value = self._Q_lambda[i] if q_value == best_q_lambda: add_to_best_action_list = True else: q_value_diff = abs(q_value - best_q_lambda) - ucb_add = nu * (Action_UCB[i] + best_ucb_add) + ucb_add = nu * (self._Action_UCB[i] + best_ucb_add) if q_value_diff <= ucb_add and action not in best_action_list: add_to_best_action_list = True @@ -547,7 +557,7 @@ cdef class CCPOMCP(POMCP): ) else: - self._lambda = Vector.null(self._n_constraints) + self._lambda.zeros() cpdef _perform_simulation(self, state): super(CCPOMCP, self)._perform_simulation(state=state) @@ -714,16 +724,19 @@ cdef class CCPOMCP(POMCP): return total_response + @cython.boundscheck(False) + @cython.wraparound(False) cdef void _update_cost_constraint( CCPOMCP self, _CCPolicyModel policy_dist, Action sampled_action ): cdef double action_prob, prob_prime - cdef Vector chat_minus_avg_cost, action_avg_cost, cost_value, cost_sum + cdef Vector action_avg_cost, cost_value cdef Action action_prime cdef list[Action] action_prime_list cdef int i = 0 + cdef int n_actions action_prob = policy_dist.probability( action=sampled_action, @@ -733,9 +746,9 @@ cdef class CCPOMCP(POMCP): self._cost_constraint -= (action_prob * action_avg_cost) if action_prob < 1.0: - cost_sum = Vector.null(self._n_constraints) action_prime_list = policy_dist.get_all_actions() - for i in range(len(action_prime_list)): + n_actions = len(action_prime_list) + for i in range(n_actions): action_prime = action_prime_list[i] if action_prime == sampled_action: continue @@ -745,8 +758,7 @@ cdef class CCPOMCP(POMCP): state=self._agent.history ) cost_value = policy_dist.action_cost_value(sampled_action) - cost_sum[i] += (prob_prime * cost_value) - self._cost_constraint -= cost_sum + self._cost_constraint -= (prob_prime * cost_value) self._cost_constraint /= (self._discount_factor * action_prob) diff --git a/pomdp_py/utils/cvec.pxd b/pomdp_py/utils/cvec.pxd index e85eb8e..9fececc 100644 --- a/pomdp_py/utils/cvec.pxd +++ b/pomdp_py/utils/cvec.pxd @@ -36,3 +36,5 @@ cdef class Vector: cpdef int len(Vector self) cdef double max(Vector self) cdef double min(Vector self) + cdef void resize(Vector self, unsigned int new_size) + cdef void zeros(Vector self) diff --git a/pomdp_py/utils/cvec.pyx b/pomdp_py/utils/cvec.pyx index 5464232..4f09a3a 100644 --- a/pomdp_py/utils/cvec.pyx +++ b/pomdp_py/utils/cvec.pyx @@ -1,11 +1,10 @@ -# cython: profile=True +# cython: language_level=3 profile=True from __future__ import annotations cimport cython from libc.math cimport fmin, fmax from typing import Iterator - cdef vectord_t null_vector(unsigned int n_zeros) except *: cdef vectord_t vec vec.assign(n_zeros, 0.0) @@ -13,6 +12,7 @@ cdef vectord_t null_vector(unsigned int n_zeros) except *: @cython.boundscheck(False) +@cython.wraparound(False) cpdef vectord_t list_to_vectord(list[float] values): cdef int length = len(values) cdef unsigned int i = 0 @@ -25,6 +25,7 @@ cpdef vectord_t list_to_vectord(list[float] values): @cython.boundscheck(False) +@cython.wraparound(False) cpdef list[float] vectord_to_list(vectord_t values): cdef int length = len(values) cdef unsigned int i = 0 @@ -37,6 +38,7 @@ cpdef list[float] vectord_to_list(vectord_t values): @cython.boundscheck(False) +@cython.wraparound(False) cdef double vector_dot_prod(const vectord_t& v0, const vectord_t& v1) except *: if v0.size() != v1.size(): raise ValueError("Both vectors must have the same size.") @@ -51,6 +53,7 @@ cdef double vector_dot_prod(const vectord_t& v0, const vectord_t& v1) except *: @cython.boundscheck(False) +@cython.wraparound(False) cdef void vector_add(const vectord_t& v0, const vectord_t& v1, vectord_t& res) except *: if v0.size() != v1.size(): raise ValueError("Both vectors must have the same size.") @@ -64,6 +67,7 @@ cdef void vector_add(const vectord_t& v0, const vectord_t& v1, vectord_t& res) e @cython.boundscheck(False) +@cython.wraparound(False) cdef void vector_adds(const vectord_t& v, const double& scalar, vectord_t& res) except *: if v.size() == 0: raise ValueError("Vector should contain at least one value.") @@ -75,6 +79,7 @@ cdef void vector_adds(const vectord_t& v, const double& scalar, vectord_t& res) @cython.boundscheck(False) +@cython.wraparound(False) cdef void vector_muls(const vectord_t& v, const double& scalar, vectord_t& res) except *: cdef int n_values = v.size() if n_values == 0: @@ -87,6 +92,7 @@ cdef void vector_muls(const vectord_t& v, const double& scalar, vectord_t& res) @cython.boundscheck(False) +@cython.wraparound(False) cdef void vector_sub(const vectord_t& v0, const vectord_t& v1, vectord_t& res) except *: if v0.size() != v1.size(): raise ValueError("Both vectors must have the same size.") @@ -100,6 +106,7 @@ cdef void vector_sub(const vectord_t& v0, const vectord_t& v1, vectord_t& res) e @cython.boundscheck(False) +@cython.wraparound(False) cdef void vector_subvs(const vectord_t& v, const double& scalar, vectord_t& res) except *: cdef int n_values = v.size() if n_values == 0: @@ -112,6 +119,7 @@ cdef void vector_subvs(const vectord_t& v, const double& scalar, vectord_t& res) @cython.boundscheck(False) +@cython.wraparound(False) cdef void vector_subsv(const double& scalar, const vectord_t& v, vectord_t& res) except *: cdef int n_values = v.size() if n_values == 0: @@ -124,6 +132,7 @@ cdef void vector_subsv(const double& scalar, const vectord_t& v, vectord_t& res) @cython.boundscheck(False) +@cython.wraparound(False) cdef void vector_scalar_div(const vectord_t& v, const double& scalar, vectord_t& res) except *: cdef int n_values = v.size() if n_values == 0: @@ -138,6 +147,7 @@ cdef void vector_scalar_div(const vectord_t& v, const double& scalar, vectord_t& @cython.boundscheck(False) +@cython.wraparound(False) cdef double vector_max(const vectord_t& v) except *: cdef int n_values = v.size() if n_values == 0: @@ -154,6 +164,7 @@ cdef double vector_max(const vectord_t& v) except *: @cython.boundscheck(False) +@cython.wraparound(False) cdef double vector_min(const vectord_t& v) except *: cdef int n_values = v.size() if n_values == 0: @@ -170,6 +181,7 @@ cdef double vector_min(const vectord_t& v) except *: @cython.boundscheck(False) +@cython.wraparound(False) cdef void vector_clip(vectord_t& v, const double& min_value, const double& max_value) except *: cdef int n_values = v.size() if n_values == 0: @@ -184,6 +196,7 @@ cdef void vector_clip(vectord_t& v, const double& min_value, const double& max_v @cython.boundscheck(False) +@cython.wraparound(False) cdef void vector_copy(const vectord_t& src, vectord_t& dst) except *: cdef int n_values = src.size() if n_values == 0: @@ -194,13 +207,19 @@ cdef void vector_copy(const vectord_t& src, vectord_t& dst) except *: dst[i] = src[i] +cdef void vector_resize(vectord_t& v, unsigned int new_size): + if new_size <= 0: + raise ValueError("New vector size must be a positive integer.") + v.resize(new_size) + + cdef class Vector: """ The Vector class. Provides an implementation of a vector for maintaining multiple values. """ - def __init__(self, values: list | tuple): + def __init__(self, values: list | tuple = (0.0,)): if not isinstance(values, (list, tuple)): raise TypeError(f"Unhandled type: {type(values)}.") if len(values) == 0: @@ -208,7 +227,14 @@ cdef class Vector: if not all(isinstance(v, (float, int)) for v in values): raise ValueError("All values must be type float or int.") - self._vals = list_to_vectord(values) + cdef int i + cdef int n_values = len(values) + self._vals = vectord_t(n_values) + if n_values == 1: + self._vals[0] = values[0] + else: + for i in range(n_values): + self._vals[i] = values[i] self._length = self._vals.size() cdef bint _is_in_range(Vector self, int index): @@ -267,6 +293,18 @@ cdef class Vector: def null(n_zeros: int) -> Vector: return Vector.fill(0.0, n_zeros) + cdef void resize(Vector self, unsigned int new_size): + vector_resize(self._vals, new_size) + self._length = self._vals.size() + + cdef void zeros(Vector self): + cdef int i + if self._length == 1: + self._vals[0] = 0. + else: + for i in range(self._length): + self._vals[i] = 0. + def __getitem__(self, index: int) -> float: index = int(index) if not self._is_in_range(index): From a6610eb63cc9a8a01b39480c22d78eaf5efccb57 Mon Sep 17 00:00:00 2001 From: Troi Williams <40696868+troiwill@users.noreply.github.com> Date: Thu, 18 Apr 2024 13:19:12 -0400 Subject: [PATCH 06/30] Removed complex way of handling null responses. --- pomdp_py/framework/generalization.pxd | 2 ++ pomdp_py/framework/generalization.pyx | 18 ++++++------------ 2 files changed, 8 insertions(+), 12 deletions(-) diff --git a/pomdp_py/framework/generalization.pxd b/pomdp_py/framework/generalization.pxd index d81bef4..cfba56c 100644 --- a/pomdp_py/framework/generalization.pxd +++ b/pomdp_py/framework/generalization.pxd @@ -1,3 +1,5 @@ +# cython: language_level=3 + from __future__ import annotations from pomdp_py.framework.basics cimport ( Agent, diff --git a/pomdp_py/framework/generalization.pyx b/pomdp_py/framework/generalization.pyx index 705bff8..223482b 100644 --- a/pomdp_py/framework/generalization.pyx +++ b/pomdp_py/framework/generalization.pyx @@ -1,4 +1,4 @@ -# cython: profile=True +# cython: language_level=3 from __future__ import annotations from pomdp_py.framework.basics cimport ( @@ -74,18 +74,12 @@ cdef class ResponseModel: the real or a simulated environment. The implementation of this model contains a collection of more specific models such as reward and cost models. """ - - def __init__(self, null_response: Response) -> None: - if not isinstance(null_response, Response): - raise TypeError( - "null_response must be type Response, " - f"but got {type(null_response)}." - ) - self._null_response = null_response.copy() + def __init__(self): + pass def null_response(self) -> Response: - return self._null_response.copy() - + raise NotImplementedError + def sample(self, state: State, action: Action, next_state: State) -> Response: raise NotImplementedError @@ -265,7 +259,7 @@ cpdef tuple[State, Observation, Response, int] sample_generative_model_with_resp ): cdef State next_state cdef Observation observation - cdef Response response = null_response.copy() + cdef Response response = null_response cdef Option option cdef int nsteps = 0 From 6dbcbf73f9918a94a004df717790fe0053b4ff78 Mon Sep 17 00:00:00 2001 From: Troi Williams <40696868+troiwill@users.noreply.github.com> Date: Thu, 18 Apr 2024 13:22:33 -0400 Subject: [PATCH 07/30] Implemented NumPy vectors and reduced Python references. --- pomdp_py/algorithms/ccpomcp.pxd | 13 +- pomdp_py/algorithms/ccpomcp.pyx | 242 +++++++++++++------------- pomdp_py/utils/cvec.pxd | 56 ++++--- pomdp_py/utils/cvec.pyx | 289 +++++++++++++------------------- 4 files changed, 282 insertions(+), 318 deletions(-) diff --git a/pomdp_py/algorithms/ccpomcp.pxd b/pomdp_py/algorithms/ccpomcp.pxd index e945cb4..c2f657c 100644 --- a/pomdp_py/algorithms/ccpomcp.pxd +++ b/pomdp_py/algorithms/ccpomcp.pxd @@ -1,3 +1,5 @@ +# cython: language_level=3 + from pomdp_py.algorithms.po_uct cimport QNode from pomdp_py.algorithms.pomcp cimport POMCP, VNode from pomdp_py.framework.basics cimport PolicyModel, Action, Agent, State, Observation @@ -22,7 +24,11 @@ cdef class _CCPolicyActionData: cdef class _CCPolicyModel(PolicyModel): cdef dict[Action, _CCPolicyActionData] _data + cdef double _prob_sum + cdef bint _total_prob_is_not_one(_CCPolicyModel self) + cpdef void add(_CCPolicyModel self, Action action, double prob, CCQNode node) + cpdef void clear(_CCPolicyModel self) cpdef Vector action_avg_cost(_CCPolicyModel self, Action action) cpdef Vector action_cost_value(_CCPolicyModel self, Action action) cdef public float probability(_CCPolicyModel self, Action action, State state) @@ -43,17 +49,18 @@ cdef class CCPOMCP(POMCP): cdef unsigned int _n_constraints # Buffers cdef Vector _Q_lambda, _Action_UCB + cdef _CCPolicyModel _greedy_policy_model cpdef public Action plan(CCPOMCP self, Agent agent) cpdef _expand_vnode(self, VNode vnode, tuple history, State state = *) - cpdef _CCPolicyModel _greedy_policy(CCPOMCP self, VNode vnode, double explore_const, double nu) + cpdef void _greedy_policy(CCPOMCP self, VNode vnode, double explore_const, double nu) cdef void _init_lambda_fn(CCPOMCP self) cpdef tuple[State, Observation, Response] _sample_generative_model(CCPOMCP self, State state, Action action) cpdef _search(CCPOMCP self) cpdef Response _simulate(CCPOMCP self, State state, tuple history, VNode root, QNode parent, Observation observation, int depth) - cdef void _update_cost_constraint(CCPOMCP self, _CCPolicyModel policy_dist, Action sampled_action) + cdef void _update_cost_constraint(CCPOMCP self, Action sampled_action) -cdef double _compute_visits_ratio(double visits_num, double visits_denom, double explore_const = *) +cdef double _compute_visits_ratio(double visits_num, double visits_denom, double explore_const) cdef double _get_ccqnode_scalar_cost(VNode node, Action action) diff --git a/pomdp_py/algorithms/ccpomcp.pyx b/pomdp_py/algorithms/ccpomcp.pyx index b193968..189a443 100644 --- a/pomdp_py/algorithms/ccpomcp.pyx +++ b/pomdp_py/algorithms/ccpomcp.pyx @@ -1,8 +1,9 @@ -# cython: profile=True +# cython: language_level=3 from __future__ import annotations cimport cython -from libc.math cimport log, sqrt, exp, abs +from cython.parallel cimport prange +from libc.math cimport log, sqrt, abs import math cimport numpy as cnp import numpy as np @@ -14,7 +15,6 @@ from pomdp_py.framework.generalization cimport ( ResponseAgent, sample_generative_model_with_response ) -from pomdp_py.representations.distribution.histogram cimport Histogram from pomdp_py.representations.distribution.particles cimport Particles from pomdp_py.utils import typ from pomdp_py.utils.cvec cimport Vector @@ -155,37 +155,63 @@ cdef class _CCPolicyActionData: cdef class _CCPolicyModel(PolicyModel): - def __init__(self, dict[Action, _CCPolicyActionData] data_dict) -> None: + def __init__(self) -> None: super().__init__() - cdef Action action - cdef _CCPolicyActionData datum - cdef double prob_sum = 0.0 + self._data = dict() + self.clear() + + cdef bint _total_prob_is_not_one(_CCPolicyModel self): + return self._prob_sum != 1.0 - for action, datum in data_dict.items(): - if not isinstance(action, Action): - raise TypeError("action must be type Action.") - prob_sum += datum.prob + cpdef void add(_CCPolicyModel self, Action action, double prob, CCQNode node): + self._data[action] = _CCPolicyActionData( + prob=prob, + cost_value=node.cost_value, + avg_cost_value=node.avg_cost_value + ) + self._prob_sum += prob + if self._prob_sum > 1.0: + raise RuntimeError( + "Too much actions were added. The probability sum is greater than one!" + ) - if prob_sum != 1.0: - raise ValueError(f"The probabilities must sum to 1.0, but got {prob_sum}.") - self._data = data_dict.copy() + cpdef void clear(_CCPolicyModel self): + self._data.clear() + self._prob_sum = 0.0 cpdef Vector action_avg_cost(_CCPolicyModel self, Action action): + if self._total_prob_is_not_one(): + raise RuntimeError( + "Tried to get action avg cost when total probability != 1.0." + ) if action not in self._data: raise KeyError(f"The action {action} is not exist in this policy model.") return self._data[action].cost_value cpdef Vector action_cost_value(_CCPolicyModel self, Action action): + if self._total_prob_is_not_one(): + raise RuntimeError( + "Tried to get action cost value when total probability != 1.0." + ) if action not in self._data: raise KeyError(f"The action {action} is not exist in this policy model.") return self._data[action].avg_cost_value cdef public float probability(_CCPolicyModel self, Action action, State state): + if self._total_prob_is_not_one(): + raise RuntimeError( + "Tried to get action probability when total probability != 1.0." + ) if action not in self._data: raise KeyError(f"The action {action} is not exist in this policy model.") return self._data[action].prob cdef public Action sample(_CCPolicyModel self, State state): + if self._prob_sum != 1.0: + raise RuntimeError("Tried to sample with a total probability != 1.0.") + + if len(self._data) == 1: + return list(self._data.keys())[0] return np.random.choice(np.array(list(self._data.keys()), dtype=object)) def get_all_actions(self, state: Optional[State] = None, history: Optional[tuple] = None): @@ -297,6 +323,7 @@ cdef class CCPOMCP(POMCP): # Initialize buffers. self._Q_lambda = Vector() self._Action_UCB = Vector() + self._greedy_policy_model = _CCPolicyModel() cpdef public Action plan(CCPOMCP self, Agent agent): cdef Action action @@ -329,9 +356,9 @@ cdef class CCPOMCP(POMCP): # Then get the policy distribution, sample from it, # and update the cost constraint. - policy_dist, time_taken, sims_count = self._search() - action = policy_dist.sample(state=None) - self._update_cost_constraint(policy_dist, action) + _, time_taken, sims_count = self._search() + action = self._greedy_policy_model.sample(state=None) + self._update_cost_constraint(action) # Update stats. self._last_num_sims = sims_count @@ -363,7 +390,7 @@ cdef class CCPOMCP(POMCP): @cython.boundscheck(False) @cython.wraparound(False) - cpdef _CCPolicyModel _greedy_policy( + cpdef void _greedy_policy( CCPOMCP self, VNode vnode, double explore_const, @@ -377,10 +404,9 @@ cdef class CCPOMCP(POMCP): # Compute Q_lambda. cdef double n_ccqnode_visits - cdef double best_q_lambda = DBL_MIN - cdef int best_q_index = 0 cdef double logN = log( vnode.num_visits + 1) - cdef double q_value + cdef double q_value = 0. + cdef double action_ucb = 0. cdef CCQNode ccqnode cdef Action action cdef int i = 0 @@ -388,11 +414,12 @@ cdef class CCPOMCP(POMCP): if n_actions == 0: raise RuntimeError("The number of actions is 0?") - self._Q_lambda.resize(n_actions) - self._Q_lambda.zeros() - - self._Action_UCB.resize(n_actions) - self._Action_UCB.zeros() + if n_actions == self._Q_lambda.len(): + self._Q_lambda.zeros() + self._Action_UCB.zeros() + else: + self._Q_lambda.resize(n_actions) + self._Action_UCB.resize(n_actions) for i in range(n_actions): ccqnode = vnode[action_list[i]] @@ -405,63 +432,59 @@ cdef class CCPOMCP(POMCP): n_ccqnode_visits, explore_const ) - self._Action_UCB[i] = _compute_visits_ratio( + action_ucb = _compute_visits_ratio( log(n_ccqnode_visits), - n_ccqnode_visits + n_ccqnode_visits, + 1.0 ) - - if q_value > best_q_lambda: - best_q_lambda = q_value - best_q_index = i - - self._Q_lambda[i] = q_value + self._Action_UCB.set(i, action_ucb) + self._Q_lambda.set(i, q_value) # Compute a*, the best action(s). cdef list[Action] best_action_list = list() - cdef double best_ucb_add = self._Action_UCB[best_q_index] + cdef int best_q_index = self._Q_lambda.argmax() + cdef double best_ucb_add = self._Action_UCB.get(best_q_index) + cdef double best_q_lambda = self._Q_lambda.get(best_q_index) cdef double ucb_add, q_value_diff cdef bint add_to_best_action_list = False q_value = 0.0 - best_q_lambda = self._Q_lambda[best_q_index] for i in range(n_actions): - action = action_list[i] - ccqnode = vnode[action] - q_value = self._Q_lambda[i] + q_value = self._Q_lambda.get(i) if q_value == best_q_lambda: add_to_best_action_list = True else: q_value_diff = abs(q_value - best_q_lambda) - ucb_add = nu * (self._Action_UCB[i] + best_ucb_add) - if q_value_diff <= ucb_add and action not in best_action_list: + ucb_add = nu * (self._Action_UCB.get(i) + best_ucb_add) + # The original statement also checks the condition: + # "action not in best_action_list" + # But since actions in the list are unique, we do not need to perform it. + if q_value_diff <= ucb_add: add_to_best_action_list = True if add_to_best_action_list: - best_action_list.append(action) + best_action_list.append(action_list[i]) # Find the policy. cdef int n_best_actions = len(best_action_list) - cdef Action action_min, action_max + cdef int action_min_idx, action_max_idx + cdef Action action_max, action_min cdef CCQNode ccqnode_min, ccqnode_max - cdef double cost_constraint_scalar = self._cost_constraint[0] + cdef double cost_constraint_scalar = self._cost_constraint.get(0) cdef double max_cost_value, min_cost_value, min_prob, cost_value cdef dict[Action, _CCPolicyActionData] data + self._greedy_policy_model.clear() + if n_best_actions == 0: raise RuntimeError("No best actions were found?!") elif n_best_actions == 1: action = best_action_list[0] - data = { - action: _CCPolicyActionData( - 1.0, - vnode[action].cost_value, - vnode[action].avg_cost_value - ) - } + self._greedy_policy_model.add(action, 1.0, vnode[action]) else: # TODO: Implement linear programming to handle multiple constraints. @@ -484,15 +507,14 @@ cdef class CCPOMCP(POMCP): min_cost_value = DBL_MAX for i in range(n_best_actions): - action = best_action_list[i] - cost_value = _get_ccqnode_scalar_cost(vnode, action) + cost_value = _get_ccqnode_scalar_cost(vnode, best_action_list[i]) if cost_value < min_cost_value: - action_min = action + action_min_idx = i min_cost_value = cost_value if cost_value > max_cost_value: - action_max = action + action_max_idx = i max_cost_value = cost_value # Sanity checks. @@ -507,23 +529,16 @@ cdef class CCPOMCP(POMCP): f"Note: there are {n_best_actions} best actions. An error exists!" ) - if max_cost_value <= cost_constraint_scalar or action_min == action_max: - data = { - action_max: _CCPolicyActionData( - 1.0, - vnode[action_max].cost_value, - vnode[action_max].avg_cost_value - ) - } + if ( + max_cost_value <= cost_constraint_scalar + or action_min_idx == action_max_idx + ): + action = best_action_list[action_max_idx] + self._greedy_policy_model.add(action, 1.0, vnode[action]) elif min_cost_value <= cost_constraint_scalar: - data = { - action_min: _CCPolicyActionData( - 1.0, - vnode[action_min].cost_value, - vnode[action_min].avg_cost_value - ) - } + action = best_action_list[action_min_idx] + self._greedy_policy_model.add(action, 1.0, vnode[action]) else: min_prob = ( @@ -531,22 +546,12 @@ cdef class CCPOMCP(POMCP): / (max_cost_value - min_cost_value) ) - data = { - action_min: _CCPolicyActionData( - min_prob, - vnode[action_min].cost_value, - vnode[action_min].avg_cost_value - ), - action_max: _CCPolicyActionData( - 1.0 - min_prob, - vnode[action_max].cost_value, - vnode[action_max].avg_cost_value - ), - } - - return _CCPolicyModel(data) - - cdef void _init_lambda_fn(CCPOMCP self): + action_min = best_action_list[action_min_idx] + action_max = best_action_list[action_max_idx] + self._greedy_policy_model.add(action_min, min_prob, vnode[action_min]) + self._greedy_policy_model.add(action_max, 1.-min_prob, vnode[action_max]) + + cdef void _init_lambda_fn(CCPOMCP self) except *: if self._use_random_lambda: self._lambda = Vector( np.random.uniform( @@ -564,12 +569,8 @@ cdef class CCPOMCP(POMCP): # Sample using the greedy policy. This greedy policy corresponds to the first # call in the search(h_0) function. - policy_dist = self._greedy_policy( - self._agent.tree, - 0.0, - 0.0, - ) - action = policy_dist.sample(state=state) + self._greedy_policy(self._agent.tree, 0.0, 0.0) + action = self._greedy_policy_model.sample(state=state) # Update lambda. self._lambda = self._lambda + self._alpha_n * ( @@ -579,7 +580,7 @@ cdef class CCPOMCP(POMCP): lambda_vec_max = self._r_diff / ( self._tau * (1.0 - self._discount_factor) ) - self._lambda = Vector.clip(self._lambda, 0.0, lambda_vec_max) + self._lambda.clip(0.0, lambda_vec_max) cpdef _rollout(self, State state, tuple history, VNode root, int depth): cdef Action action @@ -615,7 +616,7 @@ cdef class CCPOMCP(POMCP): cdef Action action cdef double time_taken cdef int sims_count - cdef PolicyModel policy_dist + # cdef PolicyModel policy_dist # Initialize the lambda vector. self._init_lambda_fn() @@ -625,12 +626,13 @@ cdef class CCPOMCP(POMCP): # After the search times out, create a policy using the greedy method. # This greedy policy corresponds to the last call in the search(h_0) function. - policy_dist = self._greedy_policy( - self._agent.tree, - 0.0, - self._nu, - ) - return policy_dist, time_taken, sims_count + # policy_dist = self._greedy_policy( + # self._agent.tree, + # 0.0, + # self._nu, + # ) + self._greedy_policy(self._agent.tree, 0.0, self._nu) + return None, time_taken, sims_count cpdef Response _simulate( CCPOMCP self, @@ -669,12 +671,8 @@ cdef class CCPOMCP(POMCP): # This greedy policy corresponds to the call in the simulate(s, h, d) function # in the paper. - policy_dist = self._greedy_policy( - root, - self._exploration_const, - self._nu - ) - action = policy_dist.sample(state=state) + self._greedy_policy(root, self._exploration_const, self._nu) + action = self._greedy_policy_model.sample(state) next_state, observation, response, nsteps = ( sample_generative_model_with_response( self._agent.transition_model, @@ -728,45 +726,47 @@ cdef class CCPOMCP(POMCP): @cython.wraparound(False) cdef void _update_cost_constraint( CCPOMCP self, - _CCPolicyModel policy_dist, Action sampled_action - ): + ) except *: cdef double action_prob, prob_prime - cdef Vector action_avg_cost, cost_value cdef Action action_prime cdef list[Action] action_prime_list cdef int i = 0 cdef int n_actions - action_prob = policy_dist.probability( + action_prob = self._greedy_policy_model.probability( action=sampled_action, state=None ) - action_avg_cost = policy_dist.action_avg_cost(sampled_action) - self._cost_constraint -= (action_prob * action_avg_cost) + self._cost_constraint -= ( + action_prob + * self._greedy_policy_model.action_avg_cost(sampled_action) + ) if action_prob < 1.0: - action_prime_list = policy_dist.get_all_actions() + action_prime_list = self._greedy_policy_model.get_all_actions() n_actions = len(action_prime_list) for i in range(n_actions): action_prime = action_prime_list[i] if action_prime == sampled_action: continue - prob_prime = policy_dist.probability( + prob_prime = self._greedy_policy_model.probability( action=action_prime, state=self._agent.history ) - cost_value = policy_dist.action_cost_value(sampled_action) - self._cost_constraint -= (prob_prime * cost_value) + self._cost_constraint -= ( + prob_prime + * self._greedy_policy_model.action_cost_value(sampled_action) + ) self._cost_constraint /= (self._discount_factor * action_prob) cdef double _compute_visits_ratio( - double visits_num, - double visits_denom, - double explore_const = 1.0, -): + double visits_num, + double visits_denom, + double explore_const, +) except *: if visits_denom == 0.0: return DBL_MIN else: @@ -776,7 +776,7 @@ cdef double _compute_visits_ratio( cdef double _get_ccqnode_scalar_cost( VNode node, Action action -): +) except *: if action not in node: raise KeyError(f"Action {action} does not exist in node.") return node[action].cost_value[0] diff --git a/pomdp_py/utils/cvec.pxd b/pomdp_py/utils/cvec.pxd index 9fececc..0a0a46d 100644 --- a/pomdp_py/utils/cvec.pxd +++ b/pomdp_py/utils/cvec.pxd @@ -1,40 +1,48 @@ -# cython: language_level=3 +# cython: language_level=3, boundscheck=False, wraparound=False from __future__ import annotations -from libcpp.vector cimport vector +cimport numpy as cnp +cnp.import_array() -ctypedef vector[double] vectord_t +ctypedef cnp.ndarray Arrayf_t -cdef vectord_t null_vector(unsigned int n_zeros) except * -cpdef vectord_t list_to_vectord(list[float] values) -cpdef list[float] vectord_to_list(vectord_t values) +cdef Arrayf_t null_vector(unsigned int n_zeros) +cpdef Arrayf_t list_to_vectord(list[float] values) +cpdef list[float] vectord_to_list(Arrayf_t values) +cdef bint vectors_are_not_same_size(double[:] v0, double[:] v1) +cdef bint vector_size_is_zero(double[:] v) -cdef double vector_dot_prod(const vectord_t& v0, const vectord_t& v1) except * -cdef void vector_add(const vectord_t& v0, const vectord_t& v1, vectord_t& res) except * -cdef void vector_adds(const vectord_t& v, const double& scalar, vectord_t& res) except * -cdef void vector_muls(const vectord_t& v, const double& scalar, vectord_t& res) except * -cdef void vector_sub(const vectord_t& v0, const vectord_t& v1, vectord_t& res) except * -cdef void vector_subvs(const vectord_t& v, const double& scalar, vectord_t& res) except * -cdef void vector_subsv(const double& scalar, const vectord_t& v, vectord_t& res) except * -cdef void vector_scalar_div(const vectord_t& v, const double& scalar, vectord_t& res) except * +cdef double vector_dot_prod(double[:] v0, double[:] v1) except * +cdef void vector_add(double[:] v0, double[:] v1, double[:] res) except * +cdef void vector_adds(double[:] v, double scalar, double[:] res) except * +cdef void vector_muls(double[:] v, double scalar, double[:] res) except * +cdef void vector_sub(double[:] v0, double[:] v1, double[:] res) except * +cdef void vector_subvs(double[:] v, double scalar, double[:] res) except * +cdef void vector_subsv(double scalar, double[:] v, double[:] res) except * +cdef void vector_scalar_div(double[:] v, double scalar, double[:] res) except * -cdef double vector_max(const vectord_t& v) except * -cdef double vector_min(const vectord_t& v) except * -cdef void vector_clip(vectord_t& v, const double& min_value, const double& max_value) except * -cdef void vector_copy(const vectord_t& src, vectord_t& dst) except * +cdef unsigned int vector_argmax(double[:] v) except * +cdef unsigned int vector_argmin(double[:] v) except * +cdef void vector_clip(double[:] v, double min_value, double max_value) except * +cdef void vector_copy(double[:] src, double[:] dst) except * cdef class Vector: - cdef vectord_t _vals - cdef vectord_t _res_buff + cdef cnp.ndarray _vals + cdef cnp.ndarray _res_buff cdef int _length - cdef bint _is_in_range(Vector self, int index) + cdef bint _index_is_out_of_range(Vector self, unsigned int index) + cpdef void clip(Vector self, double min_value, double max_value) cpdef Vector copy(Vector self) cpdef double dot(Vector self, Vector other) cpdef int len(Vector self) - cdef double max(Vector self) - cdef double min(Vector self) + cpdef unsigned int argmax(Vector self) + cpdef unsigned int argmin(Vector self) + cpdef double max(Vector self) + cpdef double min(Vector self) cdef void resize(Vector self, unsigned int new_size) - cdef void zeros(Vector self) + cpdef void zeros(Vector self) + cpdef double get(Vector self, unsigned int index) + cpdef void set(Vector self, unsigned int index, double value) diff --git a/pomdp_py/utils/cvec.pyx b/pomdp_py/utils/cvec.pyx index 4f09a3a..12a2172 100644 --- a/pomdp_py/utils/cvec.pyx +++ b/pomdp_py/utils/cvec.pyx @@ -1,190 +1,151 @@ -# cython: language_level=3 profile=True +# cython: language_level=3, boundscheck=False, wraparound=False from __future__ import annotations -cimport cython from libc.math cimport fmin, fmax -from typing import Iterator +import numpy as np +cimport numpy as cnp +from typing import Iterator, Iterable +cnp.import_array() -cdef vectord_t null_vector(unsigned int n_zeros) except *: - cdef vectord_t vec - vec.assign(n_zeros, 0.0) - return vec +ArrayDtype_t = np.float64 +cdef inline Arrayf_t null_vector(unsigned int n_zeros): + return np.zeros((n_zeros,), ArrayDtype_t) -@cython.boundscheck(False) -@cython.wraparound(False) -cpdef vectord_t list_to_vectord(list[float] values): - cdef int length = len(values) - cdef unsigned int i = 0 - cdef vectord_t rv = vectord_t(length) - if length > 0: - for i in range(length): - rv[i] = values[i] - return rv +cpdef inline Arrayf_t list_to_vectord(list[float] values): + return np.array(values, ArrayDtype_t) -@cython.boundscheck(False) -@cython.wraparound(False) -cpdef list[float] vectord_to_list(vectord_t values): - cdef int length = len(values) - cdef unsigned int i = 0 - cdef list[float] rv = list() +cpdef inline list[float] vectord_to_list(Arrayf_t values): + return values.tolist() + + +cdef inline bint vectors_are_not_same_size(double[:] v0, double[:] v1): + return v0.shape[0] != v1.shape[0] - if length > 0: - for i in range(length): - rv.append(float(values[i])) - return rv +cdef inline bint vector_size_is_zero(double[:] v): + return v.shape[0] == 0 + + +cdef double vector_dot_prod(double[:] v0, double[:] v1) except *: + cdef unsigned int i = 0 + cdef double _sum = 0. -@cython.boundscheck(False) -@cython.wraparound(False) -cdef double vector_dot_prod(const vectord_t& v0, const vectord_t& v1) except *: - if v0.size() != v1.size(): + if vectors_are_not_same_size(v0, v1): raise ValueError("Both vectors must have the same size.") - if v0.size() == 0: + if vector_size_is_zero(v0): raise ValueError("Vectors should contain at least one value.") - cdef unsigned int i = 0 - cdef double res = 0.0 - for i in range(v0.size()): - res += (v0[i] * v1[i]) - return res + for i in range(v0.shape[0]): + _sum += (v0[i] * v1[i]) + return _sum -@cython.boundscheck(False) -@cython.wraparound(False) -cdef void vector_add(const vectord_t& v0, const vectord_t& v1, vectord_t& res) except *: - if v0.size() != v1.size(): +cdef void vector_add(double[:] v0, double[:] v1, double[:] res) except *: + if vectors_are_not_same_size(v0, v1): raise ValueError("Both vectors must have the same size.") - if v0.size() == 0: + if vector_size_is_zero(v0): raise ValueError("Vectors should contain at least one value.") - res = vectord_t(v0.size()) cdef unsigned int i = 0 - for i in range(v0.size()): + for i in range(v0.shape[0]): res[i] = v0[i] + v1[i] -@cython.boundscheck(False) -@cython.wraparound(False) -cdef void vector_adds(const vectord_t& v, const double& scalar, vectord_t& res) except *: - if v.size() == 0: +cdef void vector_adds(double[:] v, double scalar, double[:] res) except *: + if vector_size_is_zero(v): raise ValueError("Vector should contain at least one value.") - res = vectord_t(v.size()) cdef unsigned int i = 0 - for i in range(v.size()): + for i in range(v.shape[0]): res[i] = v[i] + scalar -@cython.boundscheck(False) -@cython.wraparound(False) -cdef void vector_muls(const vectord_t& v, const double& scalar, vectord_t& res) except *: - cdef int n_values = v.size() - if n_values == 0: +cdef void vector_muls(double[:] v, double scalar,double[:] res) except *: + if vectors_are_not_same_size(v, res): + raise ValueError("Vectors v and res must be the same size.") + if vector_size_is_zero(v): raise ValueError("Vector should contain at least one value.") - res = vectord_t(n_values) cdef unsigned int i = 0 - for i in range(n_values): + for i in range(v.shape[0]): res[i] = v[i] * scalar -@cython.boundscheck(False) -@cython.wraparound(False) -cdef void vector_sub(const vectord_t& v0, const vectord_t& v1, vectord_t& res) except *: - if v0.size() != v1.size(): +cdef void vector_sub(double[:] v0, double[:] v1, double[:] res) except *: + if vectors_are_not_same_size(v0, v1): raise ValueError("Both vectors must have the same size.") - if v0.size() == 0: + if vector_size_is_zero(v0): raise ValueError("Vectors should contain at least one value.") - res = vectord_t(v0.size()) cdef unsigned int i = 0 - for i in range(v0.size()): + for i in range(v0.shape[0]): res[i] = v0[i] - v1[i] -@cython.boundscheck(False) -@cython.wraparound(False) -cdef void vector_subvs(const vectord_t& v, const double& scalar, vectord_t& res) except *: - cdef int n_values = v.size() - if n_values == 0: +cdef void vector_subvs(double[:] v, double scalar, double[:] res) except *: + if vector_size_is_zero(v): raise ValueError("Vector should contain at least one value.") - res = vectord_t(n_values) cdef unsigned int i = 0 - for i in range(n_values): + for i in range(v.shape[0]): res[i] = v[i] - scalar -@cython.boundscheck(False) -@cython.wraparound(False) -cdef void vector_subsv(const double& scalar, const vectord_t& v, vectord_t& res) except *: - cdef int n_values = v.size() - if n_values == 0: +cdef void vector_subsv(double scalar, double[:] v, double[:] res) except *: + if vector_size_is_zero(v): raise ValueError("Vector should contain at least one value.") - res = vectord_t(n_values) cdef unsigned int i = 0 - for i in range(n_values): + for i in range(v.shape[0]): res[i] = scalar - v[i] -@cython.boundscheck(False) -@cython.wraparound(False) -cdef void vector_scalar_div(const vectord_t& v, const double& scalar, vectord_t& res) except *: - cdef int n_values = v.size() - if n_values == 0: +cdef void vector_scalar_div(double[:] v, double scalar, double[:] res) except *: + if vector_size_is_zero(v): raise ValueError("Vector should contain at least one value.") if scalar == 0.0: raise ZeroDivisionError("Scalar division by zero!") - res = vectord_t(n_values) cdef unsigned int i = 0 - for i in range(n_values): + for i in range(v.shape[0]): res[i] = v[i] / scalar - -@cython.boundscheck(False) -@cython.wraparound(False) -cdef double vector_max(const vectord_t& v) except *: - cdef int n_values = v.size() - if n_values == 0: +cdef unsigned int vector_argmax(double[:] v) except *: + cdef int n_values = v.shape[0] + if vector_size_is_zero(v): raise ValueError("Vector should contain at least one value.") if n_values == 1: - return v[0] + return 0 - cdef double max_value = v[0] + cdef int max_idx = 0 cdef int i = 0 for i in range(1, n_values): - if v[i] > max_value: - max_value = v[i] - return max_value + if v[i] > v[max_idx]: + max_idx = i + return max_idx -@cython.boundscheck(False) -@cython.wraparound(False) -cdef double vector_min(const vectord_t& v) except *: - cdef int n_values = v.size() - if n_values == 0: +cdef unsigned int vector_argmin(double[:] v) except *: + cdef int n_values = v.shape[0] + if vector_size_is_zero(v): raise ValueError("Vector should contain at least one value.") if n_values == 1: - return v[0] + return 0 - cdef double min_value = v[0] + cdef int min_idx = 0 cdef int i = 0 for i in range(1, n_values): - if v[i] < min_value: - min_value = v[i] - return min_value + if v[i] < v[min_idx]: + min_idx = i + return min_idx -@cython.boundscheck(False) -@cython.wraparound(False) -cdef void vector_clip(vectord_t& v, const double& min_value, const double& max_value) except *: - cdef int n_values = v.size() - if n_values == 0: +cdef void vector_clip(double[:] v, double min_value, double max_value) except *: + cdef int n_values = v.shape[0] + if vector_size_is_zero(v): raise ValueError("Vector should contain at least one value.") if min_value >= max_value: raise ValueError( @@ -195,22 +156,18 @@ cdef void vector_clip(vectord_t& v, const double& min_value, const double& max_v v[i] = fmax(min_value, fmin(max_value, v[i])) -@cython.boundscheck(False) -@cython.wraparound(False) -cdef void vector_copy(const vectord_t& src, vectord_t& dst) except *: - cdef int n_values = src.size() - if n_values == 0: +cdef void vector_copy(double[:] src, double[:] dst) except *: + if vector_size_is_zero(src): raise ValueError("Vector should contain at least one value.") - dst = vectord_t(n_values) cdef int i = 0 - for i in range(n_values): + for i in range(src.shape[0]): dst[i] = src[i] -cdef void vector_resize(vectord_t& v, unsigned int new_size): +cdef void vector_resize(Arrayf_t v, unsigned int new_size): if new_size <= 0: raise ValueError("New vector size must be a positive integer.") - v.resize(new_size) + v = np.zeros((new_size,), dtype=v.dtype) cdef class Vector: @@ -219,26 +176,14 @@ cdef class Vector: maintaining multiple values. """ - def __init__(self, values: list | tuple = (0.0,)): - if not isinstance(values, (list, tuple)): - raise TypeError(f"Unhandled type: {type(values)}.") - if len(values) == 0: - raise ValueError("The length of values must have at least one value.") - if not all(isinstance(v, (float, int)) for v in values): - raise ValueError("All values must be type float or int.") + def __init__(self, values: Iterable[float] = (0.0,)): + # Perform a lazy conversion of the input values. + self._vals = list_to_vectord(list(values)).flatten() + self._res_buff = null_vector(self._vals.shape[0]) + self._length = self._vals.shape[0] - cdef int i - cdef int n_values = len(values) - self._vals = vectord_t(n_values) - if n_values == 1: - self._vals[0] = values[0] - else: - for i in range(n_values): - self._vals[i] = values[i] - self._length = self._vals.size() - - cdef bint _is_in_range(Vector self, int index): - return 0 <= index < self._length + cdef bint _index_is_out_of_range(Vector self, unsigned int index): + return index < 0 or self._length <= index def as_list(self) -> list[float]: """ @@ -246,34 +191,25 @@ cdef class Vector: """ return vectord_to_list(self._vals) - def as_vector(self) -> vectord_t: - cdef vectord_t copy - vector_copy(self._vals, copy) - return copy + def as_vector(self) -> np.ndarray: + return self._vals[:] - @staticmethod - def clip(vec: Vector, min_value: float, max_value: float) -> Vector: + cpdef void clip(Vector self, double min_value, double max_value): """ Clips the values within the value using the given min and max values. """ - if not isinstance(vec, Vector): - raise TypeError("vec must be a Vector.") - cdef vectord_t rv = vec.as_vector() - vector_clip(rv, min_value, max_value) - return Vector(vectord_to_list(rv)) + vector_clip(self._vals, min_value, max_value) cpdef Vector copy(Vector self): """ Returns a copy of this vector. """ - return Vector(self.as_list()) + return Vector(self._vals) cpdef double dot(Vector self, Vector other): """ Performs the dot product between two Vectors. """ - if not isinstance(other, Vector): - raise TypeError("other must be type Vector.") return vector_dot_prod(self._vals, other._vals) @staticmethod @@ -283,21 +219,28 @@ cdef class Vector: cpdef int len(Vector self): return self._length - cdef double max(Vector self): - return vector_max(self._vals) + cpdef unsigned int argmax(Vector self): + return vector_argmax(self._vals) - cdef double min(Vector self): - return vector_min(self._vals) + cpdef unsigned int argmin(Vector self): + return vector_argmin(self._vals) + + cpdef double max(Vector self): + return self._vals[self.argmax()] + + cpdef double min(Vector self): + return self._vals[self.argmin()] @staticmethod def null(n_zeros: int) -> Vector: return Vector.fill(0.0, n_zeros) cdef void resize(Vector self, unsigned int new_size): - vector_resize(self._vals, new_size) - self._length = self._vals.size() + self._vals = null_vector(new_size) + self._res_buff = null_vector(new_size) + self._length = self._vals.shape[0] - cdef void zeros(Vector self): + cpdef void zeros(Vector self): cdef int i if self._length == 1: self._vals[0] = 0. @@ -305,20 +248,26 @@ cdef class Vector: for i in range(self._length): self._vals[i] = 0. - def __getitem__(self, index: int) -> float: - index = int(index) - if not self._is_in_range(index): - raise IndexError(f"index is out-of-range.") + cpdef double get(Vector self, unsigned int index): + if self._index_is_out_of_range(index): + raise IndexError( + f"index ({index}) is out-of-range for length {self._length}." + ) return self._vals[index] - def __setitem__(self, index: int, value: float) -> None: - index = int(index) - if not self._is_in_range(index): - raise IndexError(f"index is out-of-range.") - if not isinstance(value, float): - raise TypeError(f"value must be type float, but got type {type(value)}.") + cpdef void set(Vector self, unsigned int index, double value): + if self._index_is_out_of_range(index): + raise IndexError( + f"index ({index}) is out-of-range for length {self._length}." + ) self._vals[index] = value + def __getitem__(self, index: int) -> float: + return self.get(index) + + def __setitem__(self, index: int, value: float) -> None: + self.set(index, value) + def __iter__(self) -> Iterator: return iter(self._vals) From 62fa04b487e63f0f869b0bdce7eb5eaebbeb307c Mon Sep 17 00:00:00 2001 From: Troi Williams <40696868+troiwill@users.noreply.github.com> Date: Thu, 18 Apr 2024 13:23:23 -0400 Subject: [PATCH 08/30] Updated and added tests. --- tests/test_util_vector_ops.py | 32 +++++++++++++++++++++++++++----- 1 file changed, 27 insertions(+), 5 deletions(-) diff --git a/tests/test_util_vector_ops.py b/tests/test_util_vector_ops.py index 8cbdac5..5c66a5a 100644 --- a/tests/test_util_vector_ops.py +++ b/tests/test_util_vector_ops.py @@ -25,21 +25,25 @@ def test_as_list(): def test_as_vector(): v = Vector([1., 2., 3.]) - assert v.as_vector() == [1., 2., 3.] + assert list(v.as_vector()) == [1., 2., 3.] def test_clip(): v = Vector([2, 5, 7]) - assert Vector.clip(v, 0, 10) == [2., 5., 7.] + v.clip(0, 10) + assert v == [2., 5., 7.] v = Vector([2, 5, 7]) - assert Vector.clip(v, 0, 4) == [2., 4., 4.] + v.clip(0, 4) + assert v == [2., 4., 4.] v = Vector([2, 5, 7]) - assert Vector.clip(v, 4, 10) == [4., 5., 7.] + v.clip(4, 10) + assert v == [4., 5., 7.] v = Vector([2, 5, 7]) - assert Vector.clip(v, 3, 4) == [3., 4., 4.] + v.clip(3, 4) + assert v == [3., 4., 4.] def test_copy(): @@ -128,6 +132,22 @@ def test_str(): assert str(v) == str([2., 4.]) +def test_min(): + v = Vector([1., 2.]) + assert v.min() == 1. + + v = Vector([1., -90., 3.]) + assert v.min() == -90. + + +def test_max(): + v = Vector([1., 2.]) + assert v.max() == 2. + + v = Vector([1., -90., 3.]) + assert v.max() == 3. + + def run(): test_assign() test_as_list() @@ -145,6 +165,8 @@ def run(): test_sub() test_truediv() test_str() + test_min() + test_max() if __name__ == "__main__": From bd26db76f58649ec7af3dd139cd90f4e68a3f113 Mon Sep 17 00:00:00 2001 From: Troi Williams <40696868+troiwill@users.noreply.github.com> Date: Thu, 18 Apr 2024 13:24:01 -0400 Subject: [PATCH 09/30] Added example problem for rocksample for CCPOMCP. --- pomdp_py/problems/cc_rocksample/__init__.py | 0 .../cc_rocksample/cc_rocksample_problem.py | 235 ++++++++++++++++++ 2 files changed, 235 insertions(+) create mode 100644 pomdp_py/problems/cc_rocksample/__init__.py create mode 100644 pomdp_py/problems/cc_rocksample/cc_rocksample_problem.py diff --git a/pomdp_py/problems/cc_rocksample/__init__.py b/pomdp_py/problems/cc_rocksample/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/pomdp_py/problems/cc_rocksample/cc_rocksample_problem.py b/pomdp_py/problems/cc_rocksample/cc_rocksample_problem.py new file mode 100644 index 0000000..a859880 --- /dev/null +++ b/pomdp_py/problems/cc_rocksample/cc_rocksample_problem.py @@ -0,0 +1,235 @@ +from __future__ import annotations +import copy +import math +import numpy as np +import pomdp_py +from pomdp_py.problems.rocksample.rocksample_problem import ( + RockSampleProblem, + create_instance, + RSTransitionModel, + RSObservationModel, + RSPolicyModel, + CheckAction, + RSRewardModel, + init_particles_belief, + State, +) + + +class RSResponse(pomdp_py.Response): + + def __init__(self, reward: int = 0, cost: int = 0) -> None: + super().__init__() + self.reward = int(reward) + self.cost = int(cost) + + def __add__(self, other: RSResponse) -> RSResponse: + return RSResponse(self.reward + other.reward, self.cost + other.cost) + + def __mul__(self, other: float | int) -> RSResponse: + return RSResponse(self.reward * other, self.cost * other) + + def __str__(self) -> str: + return f"reward={self.reward}, cost={self.cost}" + + # def copy(self) -> RSResponse: + # return RSResponse(self.reward, self.cost) + + @staticmethod + def null() -> RSResponse: + return RSResponse(reward=0, cost=0) + + +class RSCostModel(pomdp_py.CostModel): + + def sample( + self, + state: pomdp_py.State, + action: pomdp_py.Action, + next_state: pomdp_py.State, + reward: float = 0, + **kwargs, + ) -> int: + cost = 0 + if reward < 0: + cost += 1 + if isinstance(action, CheckAction): + cost += 1 + return cost + + +class RSResponseModel(pomdp_py.ResponseModel): + def __init__( + self, + reward_model: RSRewardModel, + cost_model: RSCostModel, + ): + super().__init__() + self._reward_model = reward_model + self._cost_model = cost_model + + def null_response(self) -> RSResponse: + return RSResponse(reward=0, cost=0) + + def sample( + self, state: pomdp_py.State, action: pomdp_py.Action, next_state: pomdp_py.State + ) -> RSResponse: + reward = self._reward_model.sample( + state=state, action=action, next_state=next_state + ) + cost = self._cost_model.sample( + state=state, action=action, next_state=next_state, reward=reward + ) + return RSResponse(reward, cost) + + +class CCRockSampleProblem(RockSampleProblem): + + def __init__( + self, + n_grid: int, + n_rocks: int, + init_state: State, + rock_locs: dict[tuple[int, int], int], + init_belief: pomdp_py.GenerativeDistribution, + half_efficiency_dist: int = 20, + ): + super().__init__( + n=n_grid, + k=n_rocks, + init_state=init_state, + rock_locs=rock_locs, + init_belief=init_belief, + half_efficiency_dist=half_efficiency_dist, + ) + + def build_agent( + self, + n: int, + k: int, + rock_locs: dict[tuple[int, int], int], + init_belief: pomdp_py.GenerativeDistribution, + half_efficiency_dist: int, + ) -> pomdp_py.ResponseAgent: + return pomdp_py.ResponseAgent( + init_belief=init_belief, + policy_model=RSPolicyModel(n, k), + transition_model=RSTransitionModel(n, rock_locs, self.in_exit_area), + observation_model=RSObservationModel( + rock_locs, half_efficiency_dist=half_efficiency_dist + ), + response_model=RSResponseModel( + reward_model=RSRewardModel(rock_locs, self.in_exit_area), + cost_model=RSCostModel(), + + ), + ) + + def build_env( + self, n: int, init_state: State, rock_locs: dict[tuple[int, int], int] + ) -> pomdp_py.ResponseEnvironment: + return pomdp_py.ResponseEnvironment( + init_state=init_state, + transition_model=RSTransitionModel(n, rock_locs, self.in_exit_area), + response_model=RSResponseModel( + reward_model=RSRewardModel(rock_locs, self.in_exit_area), + cost_model=RSCostModel(), + ), + ) + + +def test_planner( + cc_rocksample: CCRockSampleProblem, + ccpomcp: pomdp_py.CCPOMCP, + nsteps: int = 3, + discount: float = 0.95, +): + gamma: float = 1.0 + total_response = RSResponse.null() + total_discounted_response = RSResponse.null() + + for i in range(nsteps): + print("==== Step %d ====" % (i + 1)) + action = ccpomcp.plan(cc_rocksample.agent) + + true_state = copy.deepcopy(cc_rocksample.env.state) + env_response = cc_rocksample.env.state_transition(action, execute=True) + + real_observation = cc_rocksample.env.provide_observation( + cc_rocksample.agent.observation_model, action + ) + cc_rocksample.agent.update_history(action, real_observation) + ccpomcp.update(cc_rocksample.agent, action, real_observation) + total_response += env_response + total_discounted_response += (env_response * gamma) + gamma *= discount + + print("True state: %s" % true_state) + print("Action: %s" % str(action)) + print("Observation: %s" % str(real_observation)) + print("Response: %s" % str(env_response)) + print("Response (Cumulative): %s" % str(total_response)) + print("Response (Cumulative Discounted): %s" % str(total_discounted_response)) + print("__num_sims__: %d" % ccpomcp.last_num_sims) + print("__plan_time__: %.5f" % ccpomcp.last_planning_time) + print("World:") + cc_rocksample.print_state() + + if cc_rocksample.in_exit_area(cc_rocksample.env.state.position): + break + return total_response, total_discounted_response + + +def create_instance(n_grid: int, n_rocks: int) -> CCRockSampleProblem: + init_state, rock_locs = CCRockSampleProblem.generate_instance(n_grid, n_rocks) + belief = "uniform" + init_belief = init_particles_belief(n_rocks, 200, init_state, belief=belief) + return CCRockSampleProblem( + n_grid=n_grid, + n_rocks=n_rocks, + init_state=init_state, + rock_locs=rock_locs, + init_belief=init_belief, + ) + + +def main(n_grid: int = 7, n_rocks: int = 8) -> None: + cc_rocksample = create_instance(n_grid=n_grid, n_rocks=n_rocks) + cc_rocksample.print_state() + + k_discount_factor = 0.95 + k_max_depth = int(math.log(0.001) / math.log(k_discount_factor)) + k_max_reward = 10 + k_min_reward = -10 + + print("*** Testing CC-POMCP ***") + ccpomcp = pomdp_py.CCPOMCP( + r_diff=float(k_max_reward - k_min_reward), + alpha_n=1.0 / len(cc_rocksample.agent.cur_belief), + nu=1.0, + tau=1.0, + cost_constraint=1.0, + max_depth=k_max_depth, + discount_factor=k_discount_factor, + num_sims=10000, + exploration_const=20, + rollout_policy=cc_rocksample.agent.policy_model, + num_visits_init=1, + ) + total_response, total_discounted_response = test_planner( + cc_rocksample=cc_rocksample, + ccpomcp=ccpomcp, + nsteps=10, + discount=k_discount_factor, + ) + + +if __name__ == "__main__": + import argparse + + parser = argparse.ArgumentParser() + parser.add_argument("--ngrid", type=int, default=7) + parser.add_argument("--nrocks", type=int, default=8) + args = parser.parse_args() + + main(n_grid=args.ngrid, n_rocks=args.nrocks) From 2e68bfb57acc4b5625c96c7f1b3a506cf3e0d8b1 Mon Sep 17 00:00:00 2001 From: Troi Williams <40696868+troiwill@users.noreply.github.com> Date: Thu, 18 Apr 2024 14:42:43 -0400 Subject: [PATCH 10/30] Added profiling for cython. --- pomdp_py/algorithms/ccpomcp.pxd | 2 +- pomdp_py/algorithms/ccpomcp.pyx | 2 -- pomdp_py/framework/generalization.pxd | 2 +- pomdp_py/utils/cvec.pxd | 2 +- 4 files changed, 3 insertions(+), 5 deletions(-) diff --git a/pomdp_py/algorithms/ccpomcp.pxd b/pomdp_py/algorithms/ccpomcp.pxd index c2f657c..b37d027 100644 --- a/pomdp_py/algorithms/ccpomcp.pxd +++ b/pomdp_py/algorithms/ccpomcp.pxd @@ -1,4 +1,4 @@ -# cython: language_level=3 +# cython: language_level=3, profile=True from pomdp_py.algorithms.po_uct cimport QNode from pomdp_py.algorithms.pomcp cimport POMCP, VNode diff --git a/pomdp_py/algorithms/ccpomcp.pyx b/pomdp_py/algorithms/ccpomcp.pyx index 189a443..5e9ddf1 100644 --- a/pomdp_py/algorithms/ccpomcp.pyx +++ b/pomdp_py/algorithms/ccpomcp.pyx @@ -2,7 +2,6 @@ from __future__ import annotations cimport cython -from cython.parallel cimport prange from libc.math cimport log, sqrt, abs import math cimport numpy as cnp @@ -327,7 +326,6 @@ cdef class CCPOMCP(POMCP): cpdef public Action plan(CCPOMCP self, Agent agent): cdef Action action - cdef _CCPolicyModel policy_dist cdef double time_taken cdef int sims_count diff --git a/pomdp_py/framework/generalization.pxd b/pomdp_py/framework/generalization.pxd index cfba56c..99f254b 100644 --- a/pomdp_py/framework/generalization.pxd +++ b/pomdp_py/framework/generalization.pxd @@ -1,4 +1,4 @@ -# cython: language_level=3 +# cython: language_level=3, profile=True from __future__ import annotations from pomdp_py.framework.basics cimport ( diff --git a/pomdp_py/utils/cvec.pxd b/pomdp_py/utils/cvec.pxd index 0a0a46d..43e4dfe 100644 --- a/pomdp_py/utils/cvec.pxd +++ b/pomdp_py/utils/cvec.pxd @@ -1,4 +1,4 @@ -# cython: language_level=3, boundscheck=False, wraparound=False +# cython: language_level=3, boundscheck=False, wraparound=False, profile=True from __future__ import annotations cimport numpy as cnp From 555bb686a0d479d9c8d2f6576d670348e06a531f Mon Sep 17 00:00:00 2001 From: Troi Williams <40696868+troiwill@users.noreply.github.com> Date: Thu, 18 Apr 2024 14:43:14 -0400 Subject: [PATCH 11/30] Limited nsteps for profiling. --- pomdp_py/problems/cc_rocksample/cc_rocksample_problem.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/pomdp_py/problems/cc_rocksample/cc_rocksample_problem.py b/pomdp_py/problems/cc_rocksample/cc_rocksample_problem.py index a859880..0492831 100644 --- a/pomdp_py/problems/cc_rocksample/cc_rocksample_problem.py +++ b/pomdp_py/problems/cc_rocksample/cc_rocksample_problem.py @@ -32,9 +32,6 @@ def __mul__(self, other: float | int) -> RSResponse: def __str__(self) -> str: return f"reward={self.reward}, cost={self.cost}" - # def copy(self) -> RSResponse: - # return RSResponse(self.reward, self.cost) - @staticmethod def null() -> RSResponse: return RSResponse(reward=0, cost=0) @@ -219,7 +216,7 @@ def main(n_grid: int = 7, n_rocks: int = 8) -> None: total_response, total_discounted_response = test_planner( cc_rocksample=cc_rocksample, ccpomcp=ccpomcp, - nsteps=10, + nsteps=1, discount=k_discount_factor, ) From c759d42a2f3f41d070742ec1f3f73eb2a1acaf82 Mon Sep 17 00:00:00 2001 From: Troi Williams <40696868+troiwill@users.noreply.github.com> Date: Thu, 18 Apr 2024 14:43:32 -0400 Subject: [PATCH 12/30] Limited nsteps for profiling. --- pomdp_py/problems/rocksample/rocksample_problem.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pomdp_py/problems/rocksample/rocksample_problem.py b/pomdp_py/problems/rocksample/rocksample_problem.py index f1b7671..c920602 100644 --- a/pomdp_py/problems/rocksample/rocksample_problem.py +++ b/pomdp_py/problems/rocksample/rocksample_problem.py @@ -556,7 +556,8 @@ def main(): rollout_policy=rocksample.agent.policy_model, num_visits_init=1, ) - tt, ttd = test_planner(rocksample, pomcp, nsteps=100, discount=0.95) + # tt, ttd = test_planner(rocksample, pomcp, nsteps=100, discount=0.95) + tt, ttd = test_planner(rocksample, pomcp, nsteps=1, discount=0.95) if __name__ == "__main__": From e89975403ff0cc67bc011ef52b77232b3f090dd7 Mon Sep 17 00:00:00 2001 From: Troi Williams <40696868+troiwill@users.noreply.github.com> Date: Thu, 18 Apr 2024 14:43:59 -0400 Subject: [PATCH 13/30] Added code for profiling. --- pomdp_py/problems/cc_rocksample/profile_code.py | 11 +++++++++++ pomdp_py/problems/rocksample/profile_code.py | 11 +++++++++++ 2 files changed, 22 insertions(+) create mode 100644 pomdp_py/problems/cc_rocksample/profile_code.py create mode 100644 pomdp_py/problems/rocksample/profile_code.py diff --git a/pomdp_py/problems/cc_rocksample/profile_code.py b/pomdp_py/problems/cc_rocksample/profile_code.py new file mode 100644 index 0000000..bd9dfb4 --- /dev/null +++ b/pomdp_py/problems/cc_rocksample/profile_code.py @@ -0,0 +1,11 @@ +import pstats, cProfile + +import pyximport +pyximport.install() + +import cc_rocksample_problem + +cProfile.runctx("cc_rocksample_problem.main()", globals(), locals(), "fastProfile.prof") + +s = pstats.Stats("fastProfile.prof") +s.strip_dirs().sort_stats("tottime").print_stats() diff --git a/pomdp_py/problems/rocksample/profile_code.py b/pomdp_py/problems/rocksample/profile_code.py new file mode 100644 index 0000000..fc612f0 --- /dev/null +++ b/pomdp_py/problems/rocksample/profile_code.py @@ -0,0 +1,11 @@ +import pstats, cProfile + +import pyximport +pyximport.install() + +import rocksample_problem + +cProfile.runctx("rocksample_problem.main()", globals(), locals(), "fastProfile.prof") + +s = pstats.Stats("fastProfile.prof") +s.strip_dirs().sort_stats("tottime").print_stats() From 5ddaaaec45cf937cfc14a1d27613e80af2bb6677 Mon Sep 17 00:00:00 2001 From: Troi Williams <40696868+troiwill@users.noreply.github.com> Date: Thu, 18 Apr 2024 15:08:43 -0400 Subject: [PATCH 14/30] Removed except * from c functions. --- pomdp_py/utils/cvec.pyx | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/pomdp_py/utils/cvec.pyx b/pomdp_py/utils/cvec.pyx index 12a2172..fefd02e 100644 --- a/pomdp_py/utils/cvec.pyx +++ b/pomdp_py/utils/cvec.pyx @@ -29,7 +29,7 @@ cdef inline bint vector_size_is_zero(double[:] v): return v.shape[0] == 0 -cdef double vector_dot_prod(double[:] v0, double[:] v1) except *: +cdef double vector_dot_prod(double[:] v0, double[:] v1): cdef unsigned int i = 0 cdef double _sum = 0. @@ -43,7 +43,7 @@ cdef double vector_dot_prod(double[:] v0, double[:] v1) except *: return _sum -cdef void vector_add(double[:] v0, double[:] v1, double[:] res) except *: +cdef void vector_add(double[:] v0, double[:] v1, double[:] res): if vectors_are_not_same_size(v0, v1): raise ValueError("Both vectors must have the same size.") if vector_size_is_zero(v0): @@ -54,7 +54,7 @@ cdef void vector_add(double[:] v0, double[:] v1, double[:] res) except *: res[i] = v0[i] + v1[i] -cdef void vector_adds(double[:] v, double scalar, double[:] res) except *: +cdef void vector_adds(double[:] v, double scalar, double[:] res): if vector_size_is_zero(v): raise ValueError("Vector should contain at least one value.") @@ -63,7 +63,7 @@ cdef void vector_adds(double[:] v, double scalar, double[:] res) except *: res[i] = v[i] + scalar -cdef void vector_muls(double[:] v, double scalar,double[:] res) except *: +cdef void vector_muls(double[:] v, double scalar,double[:] res): if vectors_are_not_same_size(v, res): raise ValueError("Vectors v and res must be the same size.") if vector_size_is_zero(v): @@ -74,7 +74,7 @@ cdef void vector_muls(double[:] v, double scalar,double[:] res) except *: res[i] = v[i] * scalar -cdef void vector_sub(double[:] v0, double[:] v1, double[:] res) except *: +cdef void vector_sub(double[:] v0, double[:] v1, double[:] res): if vectors_are_not_same_size(v0, v1): raise ValueError("Both vectors must have the same size.") if vector_size_is_zero(v0): @@ -85,7 +85,7 @@ cdef void vector_sub(double[:] v0, double[:] v1, double[:] res) except *: res[i] = v0[i] - v1[i] -cdef void vector_subvs(double[:] v, double scalar, double[:] res) except *: +cdef void vector_subvs(double[:] v, double scalar, double[:] res): if vector_size_is_zero(v): raise ValueError("Vector should contain at least one value.") @@ -94,7 +94,7 @@ cdef void vector_subvs(double[:] v, double scalar, double[:] res) except *: res[i] = v[i] - scalar -cdef void vector_subsv(double scalar, double[:] v, double[:] res) except *: +cdef void vector_subsv(double scalar, double[:] v, double[:] res): if vector_size_is_zero(v): raise ValueError("Vector should contain at least one value.") @@ -103,7 +103,7 @@ cdef void vector_subsv(double scalar, double[:] v, double[:] res) except *: res[i] = scalar - v[i] -cdef void vector_scalar_div(double[:] v, double scalar, double[:] res) except *: +cdef void vector_scalar_div(double[:] v, double scalar, double[:] res): if vector_size_is_zero(v): raise ValueError("Vector should contain at least one value.") if scalar == 0.0: @@ -113,7 +113,7 @@ cdef void vector_scalar_div(double[:] v, double scalar, double[:] res) except *: for i in range(v.shape[0]): res[i] = v[i] / scalar -cdef unsigned int vector_argmax(double[:] v) except *: +cdef unsigned int vector_argmax(double[:] v): cdef int n_values = v.shape[0] if vector_size_is_zero(v): raise ValueError("Vector should contain at least one value.") @@ -128,7 +128,7 @@ cdef unsigned int vector_argmax(double[:] v) except *: return max_idx -cdef unsigned int vector_argmin(double[:] v) except *: +cdef unsigned int vector_argmin(double[:] v): cdef int n_values = v.shape[0] if vector_size_is_zero(v): raise ValueError("Vector should contain at least one value.") @@ -143,7 +143,7 @@ cdef unsigned int vector_argmin(double[:] v) except *: return min_idx -cdef void vector_clip(double[:] v, double min_value, double max_value) except *: +cdef void vector_clip(double[:] v, double min_value, double max_value): cdef int n_values = v.shape[0] if vector_size_is_zero(v): raise ValueError("Vector should contain at least one value.") @@ -156,7 +156,7 @@ cdef void vector_clip(double[:] v, double min_value, double max_value) except *: v[i] = fmax(min_value, fmin(max_value, v[i])) -cdef void vector_copy(double[:] src, double[:] dst) except *: +cdef void vector_copy(double[:] src, double[:] dst): if vector_size_is_zero(src): raise ValueError("Vector should contain at least one value.") cdef int i = 0 From 0b8830774da18c7ea0c7b898e61efa7d14e9d0a5 Mon Sep 17 00:00:00 2001 From: Troi Williams <40696868+troiwill@users.noreply.github.com> Date: Thu, 18 Apr 2024 15:44:15 -0400 Subject: [PATCH 15/30] Minor additions. --- pomdp_py/algorithms/ccpomcp.pyx | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/pomdp_py/algorithms/ccpomcp.pyx b/pomdp_py/algorithms/ccpomcp.pyx index 5e9ddf1..6c731f5 100644 --- a/pomdp_py/algorithms/ccpomcp.pyx +++ b/pomdp_py/algorithms/ccpomcp.pyx @@ -1,4 +1,4 @@ -# cython: language_level=3 +# cython: language_level=3, profile=True from __future__ import annotations cimport cython @@ -152,6 +152,9 @@ cdef class _CCPolicyActionData: def avg_cost_value(self) -> Vector: return self._avg_cost_value + def __str__(self) -> str: + return f"prob: {self._prob}, cost: {self._cost_value}, avg_cost: {self._avg_cost_value}" + cdef class _CCPolicyModel(PolicyModel): def __init__(self) -> None: @@ -170,8 +173,13 @@ cdef class _CCPolicyModel(PolicyModel): ) self._prob_sum += prob if self._prob_sum > 1.0: + error_str = "" + for action, datum in self._data.items(): + error_str += f" action={action} | datum={datum}\n" raise RuntimeError( - "Too much actions were added. The probability sum is greater than one!" + f"Too much actions were added. The probability sum {self._prob_sum} is greater than one! " + "Actions added:\n" + + error_str ) cpdef void clear(_CCPolicyModel self): @@ -549,7 +557,7 @@ cdef class CCPOMCP(POMCP): self._greedy_policy_model.add(action_min, min_prob, vnode[action_min]) self._greedy_policy_model.add(action_max, 1.-min_prob, vnode[action_max]) - cdef void _init_lambda_fn(CCPOMCP self) except *: + cdef void _init_lambda_fn(CCPOMCP self): if self._use_random_lambda: self._lambda = Vector( np.random.uniform( @@ -563,6 +571,9 @@ cdef class CCPOMCP(POMCP): self._lambda.zeros() cpdef _perform_simulation(self, state): + cdef double lambda_vec_max + cdef Action action + super(CCPOMCP, self)._perform_simulation(state=state) # Sample using the greedy policy. This greedy policy corresponds to the first @@ -725,7 +736,7 @@ cdef class CCPOMCP(POMCP): cdef void _update_cost_constraint( CCPOMCP self, Action sampled_action - ) except *: + ): cdef double action_prob, prob_prime cdef Action action_prime cdef list[Action] action_prime_list @@ -764,7 +775,7 @@ cdef double _compute_visits_ratio( double visits_num, double visits_denom, double explore_const, -) except *: +): if visits_denom == 0.0: return DBL_MIN else: @@ -774,7 +785,7 @@ cdef double _compute_visits_ratio( cdef double _get_ccqnode_scalar_cost( VNode node, Action action -) except *: +): if action not in node: raise KeyError(f"Action {action} does not exist in node.") return node[action].cost_value[0] From dd4705ff23ee9ebfd976ab9c3738e9e3e275cde5 Mon Sep 17 00:00:00 2001 From: Troi Williams <40696868+troiwill@users.noreply.github.com> Date: Thu, 18 Apr 2024 15:44:33 -0400 Subject: [PATCH 16/30] Added profiling. --- pomdp_py/framework/generalization.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pomdp_py/framework/generalization.pyx b/pomdp_py/framework/generalization.pyx index 223482b..3ed27c8 100644 --- a/pomdp_py/framework/generalization.pyx +++ b/pomdp_py/framework/generalization.pyx @@ -1,4 +1,4 @@ -# cython: language_level=3 +# cython: language_level=3, profile=True from __future__ import annotations from pomdp_py.framework.basics cimport ( From b946db07cd8abeb08c11ff37ff38a9ba62d4a5cd Mon Sep 17 00:00:00 2001 From: Troi Williams <40696868+troiwill@users.noreply.github.com> Date: Thu, 18 Apr 2024 15:44:55 -0400 Subject: [PATCH 17/30] Added profiling. --- pomdp_py/utils/cvec.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pomdp_py/utils/cvec.pyx b/pomdp_py/utils/cvec.pyx index fefd02e..ab3ed30 100644 --- a/pomdp_py/utils/cvec.pyx +++ b/pomdp_py/utils/cvec.pyx @@ -1,4 +1,4 @@ -# cython: language_level=3, boundscheck=False, wraparound=False +# cython: language_level=3, boundscheck=False, wraparound=False, profile=True from __future__ import annotations from libc.math cimport fmin, fmax From f166ad87792c78345ccb983b25a84183046c251e Mon Sep 17 00:00:00 2001 From: Troi Williams <40696868+troiwill@users.noreply.github.com> Date: Thu, 18 Apr 2024 15:45:39 -0400 Subject: [PATCH 18/30] Minor changes. --- pomdp_py/problems/cc_rocksample/cc_rocksample_problem.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pomdp_py/problems/cc_rocksample/cc_rocksample_problem.py b/pomdp_py/problems/cc_rocksample/cc_rocksample_problem.py index 0492831..d674b59 100644 --- a/pomdp_py/problems/cc_rocksample/cc_rocksample_problem.py +++ b/pomdp_py/problems/cc_rocksample/cc_rocksample_problem.py @@ -216,7 +216,7 @@ def main(n_grid: int = 7, n_rocks: int = 8) -> None: total_response, total_discounted_response = test_planner( cc_rocksample=cc_rocksample, ccpomcp=ccpomcp, - nsteps=1, + nsteps=100, discount=k_discount_factor, ) From fa8dac1366499fd048745898cd49b681f72405e9 Mon Sep 17 00:00:00 2001 From: Troi Williams <40696868+troiwill@users.noreply.github.com> Date: Thu, 18 Apr 2024 18:44:50 -0400 Subject: [PATCH 19/30] Added the comments to function calls. --- pomdp_py/utils/cvec.pyx | 199 +++++++++++++++++++++++++++++++--------- 1 file changed, 155 insertions(+), 44 deletions(-) diff --git a/pomdp_py/utils/cvec.pyx b/pomdp_py/utils/cvec.pyx index ab3ed30..84e18cb 100644 --- a/pomdp_py/utils/cvec.pyx +++ b/pomdp_py/utils/cvec.pyx @@ -9,45 +9,74 @@ cnp.import_array() ArrayDtype_t = np.float64 + cdef inline Arrayf_t null_vector(unsigned int n_zeros): return np.zeros((n_zeros,), ArrayDtype_t) cpdef inline Arrayf_t list_to_vectord(list[float] values): + """ + Converts a list of numbers to an underlying array data structure. + + Args: + values (list[float]): The values to place in the array. + + Returns: + An array. + """ return np.array(values, ArrayDtype_t) cpdef inline list[float] vectord_to_list(Arrayf_t values): - return values.tolist() - + """ + Converts an array to a list of float. + + Args: + values (array): The values to place in a list. -cdef inline bint vectors_are_not_same_size(double[:] v0, double[:] v1): - return v0.shape[0] != v1.shape[0] + Returns: + A list of float. + """ + return values.tolist() cdef inline bint vector_size_is_zero(double[:] v): return v.shape[0] == 0 +cdef inline bint vectors2_are_not_valid(double[:] v0, double[:] v1): + return ( + v0.shape[0] != v1.shape[0] + | vector_size_is_zero(v0) + ) + + +cdef inline bint vectors3_are_not_valid(double[:] v0, double[:] v1, double[:] v2): + return ( + v0.shape[0] != v1.shape[0] + | v1.shape[0] != v2.shape[0] + | vector_size_is_zero(v0) + ) + + cdef double vector_dot_prod(double[:] v0, double[:] v1): + if vectors2_are_not_valid(v0, v1): + raise ValueError( + "All vectors must have the same size and contain at least one value." + ) + cdef unsigned int i = 0 cdef double _sum = 0. - - if vectors_are_not_same_size(v0, v1): - raise ValueError("Both vectors must have the same size.") - if vector_size_is_zero(v0): - raise ValueError("Vectors should contain at least one value.") - for i in range(v0.shape[0]): _sum += (v0[i] * v1[i]) return _sum cdef void vector_add(double[:] v0, double[:] v1, double[:] res): - if vectors_are_not_same_size(v0, v1): - raise ValueError("Both vectors must have the same size.") - if vector_size_is_zero(v0): - raise ValueError("Vectors should contain at least one value.") + if vectors3_are_not_valid(v0, v1, res): + raise ValueError( + "All vectors must have the same size and contain at least one value." + ) cdef unsigned int i = 0 for i in range(v0.shape[0]): @@ -55,19 +84,21 @@ cdef void vector_add(double[:] v0, double[:] v1, double[:] res): cdef void vector_adds(double[:] v, double scalar, double[:] res): - if vector_size_is_zero(v): - raise ValueError("Vector should contain at least one value.") + if vectors2_are_not_valid(v, res): + raise ValueError( + "All vectors must have the same size and contain at least one value." + ) cdef unsigned int i = 0 for i in range(v.shape[0]): res[i] = v[i] + scalar -cdef void vector_muls(double[:] v, double scalar,double[:] res): - if vectors_are_not_same_size(v, res): - raise ValueError("Vectors v and res must be the same size.") - if vector_size_is_zero(v): - raise ValueError("Vector should contain at least one value.") +cdef void vector_muls(double[:] v, double scalar, double[:] res): + if vectors2_are_not_valid(v, res): + raise ValueError( + "All vectors must have the same size and contain at least one value." + ) cdef unsigned int i = 0 for i in range(v.shape[0]): @@ -75,10 +106,10 @@ cdef void vector_muls(double[:] v, double scalar,double[:] res): cdef void vector_sub(double[:] v0, double[:] v1, double[:] res): - if vectors_are_not_same_size(v0, v1): - raise ValueError("Both vectors must have the same size.") - if vector_size_is_zero(v0): - raise ValueError("Vectors should contain at least one value.") + if vectors3_are_not_valid(v0, v1, res): + raise ValueError( + "All vectors must have the same size and contain at least one value." + ) cdef unsigned int i = 0 for i in range(v0.shape[0]): @@ -86,8 +117,10 @@ cdef void vector_sub(double[:] v0, double[:] v1, double[:] res): cdef void vector_subvs(double[:] v, double scalar, double[:] res): - if vector_size_is_zero(v): - raise ValueError("Vector should contain at least one value.") + if vectors2_are_not_valid(v, res): + raise ValueError( + "All vectors must have the same size and contain at least one value." + ) cdef unsigned int i = 0 for i in range(v.shape[0]): @@ -95,8 +128,10 @@ cdef void vector_subvs(double[:] v, double scalar, double[:] res): cdef void vector_subsv(double scalar, double[:] v, double[:] res): - if vector_size_is_zero(v): - raise ValueError("Vector should contain at least one value.") + if vectors2_are_not_valid(v, res): + raise ValueError( + "All vectors must have the same size and contain at least one value." + ) cdef unsigned int i = 0 for i in range(v.shape[0]): @@ -104,10 +139,10 @@ cdef void vector_subsv(double scalar, double[:] v, double[:] res): cdef void vector_scalar_div(double[:] v, double scalar, double[:] res): - if vector_size_is_zero(v): - raise ValueError("Vector should contain at least one value.") - if scalar == 0.0: - raise ZeroDivisionError("Scalar division by zero!") + if vectors2_are_not_valid(v, res): + raise ValueError( + "All vectors must have the same size and contain at least one value." + ) cdef unsigned int i = 0 for i in range(v.shape[0]): @@ -157,19 +192,15 @@ cdef void vector_clip(double[:] v, double min_value, double max_value): cdef void vector_copy(double[:] src, double[:] dst): - if vector_size_is_zero(src): - raise ValueError("Vector should contain at least one value.") + if vectors2_are_not_valid(src, dst): + raise ValueError( + "All vectors must have the same size and contain at least one value." + ) cdef int i = 0 for i in range(src.shape[0]): dst[i] = src[i] -cdef void vector_resize(Arrayf_t v, unsigned int new_size): - if new_size <= 0: - raise ValueError("New vector size must be a positive integer.") - v = np.zeros((new_size,), dtype=v.dtype) - - cdef class Vector: """ The Vector class. Provides an implementation of a vector for @@ -192,11 +223,18 @@ cdef class Vector: return vectord_to_list(self._vals) def as_vector(self) -> np.ndarray: + """ + Returns the internal array of values. + """ return self._vals[:] cpdef void clip(Vector self, double min_value, double max_value): """ Clips the values within the value using the given min and max values. + + Args: + min_value (double): The minimum value that will be in the vector. + max_value (double): The maximum value that will be in the vector. """ vector_clip(self._vals, min_value, max_value) @@ -209,46 +247,112 @@ cdef class Vector: cpdef double dot(Vector self, Vector other): """ Performs the dot product between two Vectors. + + Args: + other (Vector): The other vector to perform the dot product with. + + Returns: + The dot product of this and the other Vector. """ return vector_dot_prod(self._vals, other._vals) @staticmethod def fill(value: float, n_values: int) -> Vector: + """ + Creates a new Vector and fills it with the given value. + + Args: + value (float): The value to fill the Vector with. + n_values (int): The number of values to fill the Vector with. + + Returns: + A new Vector with n_values value's. + """ return Vector([value] * n_values) cpdef int len(Vector self): + """ + Returns the length of the Vector. + """ return self._length cpdef unsigned int argmax(Vector self): + """ + Returns the index of the max value. + """ return vector_argmax(self._vals) cpdef unsigned int argmin(Vector self): + """ + Returns the index of the min value. + """ return vector_argmin(self._vals) cpdef double max(Vector self): + """ + Returns the max value. + """ return self._vals[self.argmax()] cpdef double min(Vector self): + """ + Returns the min value. + """ return self._vals[self.argmin()] @staticmethod def null(n_zeros: int) -> Vector: + """ + Creates a null Vector with N zeros. + + Args: + n_zeros (int): The number of zeros to place in the Vector. + + Returns: + A Vector with all zeros. + """ return Vector.fill(0.0, n_zeros) cdef void resize(Vector self, unsigned int new_size): - self._vals = null_vector(new_size) - self._res_buff = null_vector(new_size) - self._length = self._vals.shape[0] + """ + Resizes the Vector with the new specified size. + + Args: + new_size (unsigned int): The new size of the Vector. + + Returns: + + """ + if new_size == self._length: + self.zeros() + else: + self._vals = null_vector(new_size) + self._res_buff = null_vector(new_size) + self._length = self._vals.shape[0] cpdef void zeros(Vector self): + """ + Overwrites the Vector with all zeros. + """ cdef int i if self._length == 1: self._vals[0] = 0. + self._res_buff[0] = 0. else: for i in range(self._length): self._vals[i] = 0. + self._res_buff[i] = 0. cpdef double get(Vector self, unsigned int index): + """ + Retrieves the value at the given index. + + Args: + index (unsigned int): The index. + + Returns: + The value at the index. + """ if self._index_is_out_of_range(index): raise IndexError( f"index ({index}) is out-of-range for length {self._length}." @@ -256,6 +360,13 @@ cdef class Vector: return self._vals[index] cpdef void set(Vector self, unsigned int index, double value): + """ + Sets the value at the given index. + + Args: + index (unsigned int): The index. + value (double): The value to set. + """ if self._index_is_out_of_range(index): raise IndexError( f"index ({index}) is out-of-range for length {self._length}." From 19e779ae899cc3785d549b227c19c3cf20fdd00b Mon Sep 17 00:00:00 2001 From: Troi Williams <40696868+troiwill@users.noreply.github.com> Date: Thu, 18 Apr 2024 18:45:17 -0400 Subject: [PATCH 20/30] Removed except * from function names. --- pomdp_py/utils/cvec.pxd | 31 ++++++++++++++++--------------- 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/pomdp_py/utils/cvec.pxd b/pomdp_py/utils/cvec.pxd index 43e4dfe..dbf48af 100644 --- a/pomdp_py/utils/cvec.pxd +++ b/pomdp_py/utils/cvec.pxd @@ -10,22 +10,23 @@ ctypedef cnp.ndarray Arrayf_t cdef Arrayf_t null_vector(unsigned int n_zeros) cpdef Arrayf_t list_to_vectord(list[float] values) cpdef list[float] vectord_to_list(Arrayf_t values) -cdef bint vectors_are_not_same_size(double[:] v0, double[:] v1) cdef bint vector_size_is_zero(double[:] v) - -cdef double vector_dot_prod(double[:] v0, double[:] v1) except * -cdef void vector_add(double[:] v0, double[:] v1, double[:] res) except * -cdef void vector_adds(double[:] v, double scalar, double[:] res) except * -cdef void vector_muls(double[:] v, double scalar, double[:] res) except * -cdef void vector_sub(double[:] v0, double[:] v1, double[:] res) except * -cdef void vector_subvs(double[:] v, double scalar, double[:] res) except * -cdef void vector_subsv(double scalar, double[:] v, double[:] res) except * -cdef void vector_scalar_div(double[:] v, double scalar, double[:] res) except * - -cdef unsigned int vector_argmax(double[:] v) except * -cdef unsigned int vector_argmin(double[:] v) except * -cdef void vector_clip(double[:] v, double min_value, double max_value) except * -cdef void vector_copy(double[:] src, double[:] dst) except * +cdef bint vectors2_are_not_valid(double[:] v0, double[:] v1) +cdef bint vectors3_are_not_valid(double[:] v0, double[:] v1, double[:] v2) + +cdef double vector_dot_prod(double[:] v0, double[:] v1) +cdef void vector_add(double[:] v0, double[:] v1, double[:] res) +cdef void vector_adds(double[:] v, double scalar, double[:] res) +cdef void vector_muls(double[:] v, double scalar, double[:] res) +cdef void vector_sub(double[:] v0, double[:] v1, double[:] res) +cdef void vector_subvs(double[:] v, double scalar, double[:] res) +cdef void vector_subsv(double scalar, double[:] v, double[:] res) +cdef void vector_scalar_div(double[:] v, double scalar, double[:] res) + +cdef unsigned int vector_argmax(double[:] v) +cdef unsigned int vector_argmin(double[:] v) +cdef void vector_clip(double[:] v, double min_value, double max_value) +cdef void vector_copy(double[:] src, double[:] dst) cdef class Vector: From a7e666cba9dfc2f0f072b964eabfb59fdc502ae6 Mon Sep 17 00:00:00 2001 From: Troi Williams <40696868+troiwill@users.noreply.github.com> Date: Fri, 19 Apr 2024 00:43:35 -0400 Subject: [PATCH 21/30] Added _create_qnode function to reduce code. --- pomdp_py/algorithms/ccpomcp.pxd | 4 +- pomdp_py/algorithms/ccpomcp.pyx | 204 ++++++++++++++++++++++---------- pomdp_py/algorithms/po_uct.pxd | 1 + pomdp_py/algorithms/po_uct.pyx | 22 +++- 4 files changed, 161 insertions(+), 70 deletions(-) diff --git a/pomdp_py/algorithms/ccpomcp.pxd b/pomdp_py/algorithms/ccpomcp.pxd index b37d027..230716f 100644 --- a/pomdp_py/algorithms/ccpomcp.pxd +++ b/pomdp_py/algorithms/ccpomcp.pxd @@ -1,4 +1,4 @@ -# cython: language_level=3, profile=True +# cython: language_level=3 from pomdp_py.algorithms.po_uct cimport QNode from pomdp_py.algorithms.pomcp cimport POMCP, VNode @@ -52,7 +52,7 @@ cdef class CCPOMCP(POMCP): cdef _CCPolicyModel _greedy_policy_model cpdef public Action plan(CCPOMCP self, Agent agent) - cpdef _expand_vnode(self, VNode vnode, tuple history, State state = *) + cpdef QNode _create_qnode(self, tuple qnode_params = *) cpdef void _greedy_policy(CCPOMCP self, VNode vnode, double explore_const, double nu) cdef void _init_lambda_fn(CCPOMCP self) cpdef tuple[State, Observation, Response] _sample_generative_model(CCPOMCP self, State state, Action action) diff --git a/pomdp_py/algorithms/ccpomcp.pyx b/pomdp_py/algorithms/ccpomcp.pyx index 6c731f5..f90d3ae 100644 --- a/pomdp_py/algorithms/ccpomcp.pyx +++ b/pomdp_py/algorithms/ccpomcp.pyx @@ -1,4 +1,4 @@ -# cython: language_level=3, profile=True +# cython: language_level=3 from __future__ import annotations cimport cython @@ -21,20 +21,22 @@ from typing import Optional cnp.import_array() -cdef double DBL_MIN = -1e200 -cdef double DBL_MAX = 1e200 +cdef double NEG_INFINITY = -1e10 +cdef double POS_INFINITY = 1e10 cdef class CostModel: """ + A CostModel models the distribution :math:`\Pr(c|s,a,s')` where + :math:`c\in\mathbb{C}`. """ def probability( - self, - cost: float | Vector, - state: State, - action: Action, - next_state: State + self, + cost: float | Vector, + state: State, + action: Action, + next_state: State ) -> float: """ probability(self, cost, state, action, next_state) @@ -52,11 +54,11 @@ cdef class CostModel: raise NotImplementedError def sample( - self, - state: State, - action: Action, - next_state: State, - **kwargs, + self, + state: State, + action: Action, + next_state: State, + **kwargs, ) -> float | Vector: """ sample(self, state, action, next_state) @@ -103,6 +105,9 @@ cdef class CCQNode(QNode): @property def avg_cost_value(self) -> Vector: + """ + The average cost value (Vector). + """ return self._avg_cost_value @avg_cost_value.setter @@ -116,6 +121,9 @@ cdef class CCQNode(QNode): @property def cost_value(self) -> Vector: + """ + The cost value. + """ return self._cost_value @cost_value.setter @@ -135,6 +143,10 @@ cdef class CCQNode(QNode): ) cdef class _CCPolicyActionData: + """ + A data structure used internally within _CCPolicyModel. Stores the probability, cost + value, and average cost value for a particular CCQNode. + """ def __init__(self, double prob, Vector cost_value, Vector avg_cost_value): self._prob = prob self._cost_value = cost_value @@ -142,21 +154,31 @@ cdef class _CCPolicyActionData: @property def prob(self) -> float: + """The probability of a corresponding action.""" return self._prob @property def cost_value(self) -> Vector: + """The cost value for a corresponding action.""" return self._cost_value @property def avg_cost_value(self) -> Vector: + """The average cost value for a corresponding action.""" return self._avg_cost_value def __str__(self) -> str: - return f"prob: {self._prob}, cost: {self._cost_value}, avg_cost: {self._avg_cost_value}" + return ( + f"prob: {self._prob}, cost: {self._cost_value}, " + f"avg_cost: {self._avg_cost_value}" + ) cdef class _CCPolicyModel(PolicyModel): + """ + A derived policy class used internally within the CCPOMCP algorithm for sampling + actions and updating the cost constraint. + """ def __init__(self) -> None: super().__init__() self._data = dict() @@ -166,6 +188,16 @@ cdef class _CCPolicyModel(PolicyModel): return self._prob_sum != 1.0 cpdef void add(_CCPolicyModel self, Action action, double prob, CCQNode node): + """ + Adds an action, its probability, and appropriate information from a CCQNode. + This method also raises an exception if the sum of the probabilities sum over + 1. + + Args: + action (Action): The action. + prob (double): The probability. + node (CCQNode): The CCQNode. + """ self._data[action] = _CCPolicyActionData( prob=prob, cost_value=node.cost_value, @@ -177,16 +209,27 @@ cdef class _CCPolicyModel(PolicyModel): for action, datum in self._data.items(): error_str += f" action={action} | datum={datum}\n" raise RuntimeError( - f"Too much actions were added. The probability sum {self._prob_sum} is greater than one! " + "Too much actions were added. " + f"The probability sum {self._prob_sum} is greater than one! " "Actions added:\n" + error_str ) cpdef void clear(_CCPolicyModel self): + """Clears the internal data structures.""" self._data.clear() self._prob_sum = 0.0 cpdef Vector action_avg_cost(_CCPolicyModel self, Action action): + """ + Returns the average cost value for a given action. + + Args: + action (Action): The action. + + Returns: + The average cost value (Vector). + """ if self._total_prob_is_not_one(): raise RuntimeError( "Tried to get action avg cost when total probability != 1.0." @@ -196,6 +239,15 @@ cdef class _CCPolicyModel(PolicyModel): return self._data[action].cost_value cpdef Vector action_cost_value(_CCPolicyModel self, Action action): + """ + Returns the cost value for a given action. + + Args: + action (Action): The action. + + Returns: + The cost value (Vector). + """ if self._total_prob_is_not_one(): raise RuntimeError( "Tried to get action cost value when total probability != 1.0." @@ -205,6 +257,16 @@ cdef class _CCPolicyModel(PolicyModel): return self._data[action].avg_cost_value cdef public float probability(_CCPolicyModel self, Action action, State state): + """ + Returns the probability for a given action. + + Args: + action (Action): The action. + state (State): Currently ignored. + + Returns: + The probability (float). + """ if self._total_prob_is_not_one(): raise RuntimeError( "Tried to get action probability when total probability != 1.0." @@ -214,6 +276,15 @@ cdef class _CCPolicyModel(PolicyModel): return self._data[action].prob cdef public Action sample(_CCPolicyModel self, State state): + """ + Samples an action using the underlying probability distribution. + + Args: + state (State): Currently ignored. + + Returns: + The sampled action (Action). + """ if self._prob_sum != 1.0: raise RuntimeError("Tried to sample with a total probability != 1.0.") @@ -221,7 +292,21 @@ cdef class _CCPolicyModel(PolicyModel): return list(self._data.keys())[0] return np.random.choice(np.array(list(self._data.keys()), dtype=object)) - def get_all_actions(self, state: Optional[State] = None, history: Optional[tuple] = None): + def get_all_actions( + self, + state: Optional[State] = None, + history: Optional[tuple] = None + ): + """ + Returns all the stored actions. + + Args: + state (State): Currently ignored. + history (tuple): Currently ignored. + + Returns: + The list of actions (list[Action]). + """ return list(self._data.keys()) @@ -307,16 +392,15 @@ cdef class CCPOMCP(POMCP): # Initialize lambda, cost constraint, and cost value init. if isinstance(cost_constraint, list): - self._n_constraints = len(cost_constraint) if len(cost_value_init) != len(cost_value_init): raise ValueError( "The cost constraint and cost value init must have the same length." ) else: - self._n_constraints = 1 cost_constraint = [cost_constraint] cost_value_init = [cost_value_init] if cost_value_init is not None else [0.0] + self._n_constraints = len(cost_constraint) self._lambda = Vector.null(self._n_constraints) self._cost_value_init = list(cost_value_init) self._cost_constraint = Vector(cost_constraint) @@ -333,6 +417,15 @@ cdef class CCPOMCP(POMCP): self._greedy_policy_model = _CCPolicyModel() cpdef public Action plan(CCPOMCP self, Agent agent): + """ + Determines the next action to perform. + + Args: + agent (ResponseAgent): The agent used to plan. + + Returns: + The next action. + """ cdef Action action cdef double time_taken cdef int sims_count @@ -362,8 +455,7 @@ cdef class CCPOMCP(POMCP): # Then get the policy distribution, sample from it, # and update the cost constraint. - _, time_taken, sims_count = self._search() - action = self._greedy_policy_model.sample(state=None) + action, time_taken, sims_count = self._search() self._update_cost_constraint(action) # Update stats. @@ -372,27 +464,20 @@ cdef class CCPOMCP(POMCP): return action - cpdef _expand_vnode( - CCPOMCP self, - VNode vnode, - tuple history, - State state = None, + cpdef QNode _create_qnode( + self, + tuple qnode_params = tuple() ): - cdef Action action + cdef int num_visits_init + cdef double value_init + cdef list[float] cost_value_init - for action in self._agent.valid_actions(state=state, history=history): - if vnode[action] is None: - vnode[action] = CCQNode( - self._num_visits_init, self._value_init, self._cost_value_init - ) + if len(qnode_params) == 3: + # Expand the tuple and set the new CCQNode. + num_visits_init, value_init, cost_value_init = qnode_params + return CCQNode(num_visits_init, value_init, cost_value_init) - if self._action_prior is not None: - # Using action prior; special values are set; - for preference in self._action_prior.get_preferred_actions(state, history): - action, num_visits_init, value_init = preference - vnode[action] = CCQNode( - self._num_visits_init, self._value_init, self._cost_value_init - ) + return CCQNode(self._num_visits_init, self._value_init, self._cost_value_init) @cython.boundscheck(False) @cython.wraparound(False) @@ -509,8 +594,8 @@ cdef class CCPOMCP(POMCP): # Find a_max and a_min, the actions with the max and min scalar costs # from the list of best actions. - max_cost_value = DBL_MIN - min_cost_value = DBL_MAX + max_cost_value = NEG_INFINITY + min_cost_value = POS_INFINITY for i in range(n_best_actions): cost_value = _get_ccqnode_scalar_cost(vnode, best_action_list[i]) @@ -524,14 +609,14 @@ cdef class CCPOMCP(POMCP): max_cost_value = cost_value # Sanity checks. - if max_cost_value == DBL_MIN: + if max_cost_value == NEG_INFINITY: raise RuntimeError( - f"Max cost value ({max_cost_value}) must be more than {DBL_MIN}. " + f"Max cost value ({max_cost_value}) must be more than {NEG_INFINITY}. " f"Note: there are {n_best_actions} best actions. An error exists!" ) - if min_cost_value == DBL_MAX: + if min_cost_value == POS_INFINITY: raise RuntimeError( - f"Min cost value ({min_cost_value}) must be less than {DBL_MAX}. " + f"Min cost value ({min_cost_value}) must be less than {POS_INFINITY}. " f"Note: there are {n_best_actions} best actions. An error exists!" ) @@ -558,15 +643,14 @@ cdef class CCPOMCP(POMCP): self._greedy_policy_model.add(action_max, 1.-min_prob, vnode[action_max]) cdef void _init_lambda_fn(CCPOMCP self): + cdef cnp.ndarray rand_vec + cdef double value + cdef int i if self._use_random_lambda: - self._lambda = Vector( - np.random.uniform( - 0.00001, - 1.0, - size=self._cost_constraint.len() - ).tolist() - ) - + rand_vec = np.random.uniform(0.00001, 1.0, size=self._n_constraints) + for i in range(self._n_constraints): + value = rand_vec[i] + self._lambda.set(i, value) else: self._lambda.zeros() @@ -625,23 +709,18 @@ cdef class CCPOMCP(POMCP): cdef Action action cdef double time_taken cdef int sims_count - # cdef PolicyModel policy_dist # Initialize the lambda vector. self._init_lambda_fn() - # Run the _search(...) method in the super class. - action, time_taken, sims_count = super(CCPOMCP, self)._search() + # Run the _search(...) method in the super class. Ignore this action. + _, time_taken, sims_count = super(CCPOMCP, self)._search() # After the search times out, create a policy using the greedy method. # This greedy policy corresponds to the last call in the search(h_0) function. - # policy_dist = self._greedy_policy( - # self._agent.tree, - # 0.0, - # self._nu, - # ) self._greedy_policy(self._agent.tree, 0.0, self._nu) - return None, time_taken, sims_count + action = self._greedy_policy_model.sample(state=None) + return action, time_taken, sims_count cpdef Response _simulate( CCPOMCP self, @@ -656,7 +735,6 @@ cdef class CCPOMCP(POMCP): cdef int nsteps = 1 cdef Action action cdef State next_state - cdef _CCPolicyModel policy_dist if depth > self._max_depth: return self._null_response @@ -777,7 +855,7 @@ cdef double _compute_visits_ratio( double explore_const, ): if visits_denom == 0.0: - return DBL_MIN + return NEG_INFINITY else: return explore_const * sqrt(visits_num / visits_denom) diff --git a/pomdp_py/algorithms/po_uct.pxd b/pomdp_py/algorithms/po_uct.pxd index 6f66fff..f2b83cd 100644 --- a/pomdp_py/algorithms/po_uct.pxd +++ b/pomdp_py/algorithms/po_uct.pxd @@ -42,6 +42,7 @@ cdef class POUCT(Planner): State state, tuple history, VNode root, QNode parent, Observation observation, int depth) + cpdef QNode _create_qnode(self, tuple qnode_params = *) cpdef _expand_vnode(self, VNode vnode, tuple history, State state=*) cpdef _rollout(self, State state, tuple history, VNode root, int depth) cpdef Action _ucb(self, VNode root) diff --git a/pomdp_py/algorithms/po_uct.pyx b/pomdp_py/algorithms/po_uct.pyx index c0f0266..f11533d 100644 --- a/pomdp_py/algorithms/po_uct.pyx +++ b/pomdp_py/algorithms/po_uct.pyx @@ -295,6 +295,20 @@ cdef class POUCT(Planner): """ self._rollout_policy = rollout_policy + cpdef QNode _create_qnode( + self, + tuple qnode_params = tuple() + ): + cdef int num_visits_init + cdef double value_init + + if len(qnode_params) == 2: + # Expand the tuple and set the new QNode. + num_visits_init, value_init = qnode_params + return QNode(num_visits_init, value_init) + + return QNode(self._num_visits_init, self._value_init) + cpdef _expand_vnode(self, VNode vnode, tuple history, State state=None): cdef Action action cdef tuple preference @@ -303,17 +317,15 @@ cdef class POUCT(Planner): for action in self._agent.valid_actions(state=state, history=history): if vnode[action] is None: - history_action_node = QNode(self._num_visits_init, - self._value_init) + history_action_node = self._create_qnode() vnode[action] = history_action_node if self._action_prior is not None: # Using action prior; special values are set; for preference in \ self._action_prior.get_preferred_actions(state, history): - action, num_visits_init, value_init = preference - history_action_node = QNode(num_visits_init, - value_init) + action = preference[0] + history_action_node = self._create_qnode(preference[1:]) vnode[action] = history_action_node cpdef _search(self): From 1a99ff765cbbbe643d0fca5d598bb9247b770dde Mon Sep 17 00:00:00 2001 From: Troi Williams <40696868+troiwill@users.noreply.github.com> Date: Fri, 19 Apr 2024 00:43:57 -0400 Subject: [PATCH 22/30] Minor update. --- pomdp_py/problems/cc_rocksample/cc_rocksample_problem.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pomdp_py/problems/cc_rocksample/cc_rocksample_problem.py b/pomdp_py/problems/cc_rocksample/cc_rocksample_problem.py index d674b59..fcb0252 100644 --- a/pomdp_py/problems/cc_rocksample/cc_rocksample_problem.py +++ b/pomdp_py/problems/cc_rocksample/cc_rocksample_problem.py @@ -1,7 +1,6 @@ from __future__ import annotations import copy import math -import numpy as np import pomdp_py from pomdp_py.problems.rocksample.rocksample_problem import ( RockSampleProblem, From 57c0568809388505999d3b10fe45721ca4b1d741 Mon Sep 17 00:00:00 2001 From: Troi Williams <40696868+troiwill@users.noreply.github.com> Date: Fri, 19 Apr 2024 00:44:49 -0400 Subject: [PATCH 23/30] Removed unneeded test. --- tests/test_framework_generalization.py | 29 -------------------------- 1 file changed, 29 deletions(-) delete mode 100644 tests/test_framework_generalization.py diff --git a/tests/test_framework_generalization.py b/tests/test_framework_generalization.py deleted file mode 100644 index eda6069..0000000 --- a/tests/test_framework_generalization.py +++ /dev/null @@ -1,29 +0,0 @@ -from pomdp_py.framework.generalization import Vector, RewardCost - -description = "testing framework generalization" - - -def test_assign(): - v = Vector() - assert v == [0.] - - v = Vector((2, 4, 8)) - assert v == [2., 4., 8.] - - v = Vector() - assert v != [1.] - - -def test_add(): - r = RewardCost(0., Vector([0., 10.])) + RewardCost(10., Vector([90., 13.])) - assert r.reward == 10. - assert r.cost == [90., 23.] - - -def run(): - test_assign() - test_add() - - -if __name__ == "__main__": - run() \ No newline at end of file From ce008e8845d6d5f6b36de3d17deb82d2c1210a42 Mon Sep 17 00:00:00 2001 From: Troi Williams <40696868+troiwill@users.noreply.github.com> Date: Fri, 19 Apr 2024 00:58:25 -0400 Subject: [PATCH 24/30] Added code comments. --- pomdp_py/framework/generalization.pxd | 2 +- pomdp_py/framework/generalization.pyx | 54 +++++++++++++++++++++++---- 2 files changed, 48 insertions(+), 8 deletions(-) diff --git a/pomdp_py/framework/generalization.pxd b/pomdp_py/framework/generalization.pxd index 99f254b..cfba56c 100644 --- a/pomdp_py/framework/generalization.pxd +++ b/pomdp_py/framework/generalization.pxd @@ -1,4 +1,4 @@ -# cython: language_level=3, profile=True +# cython: language_level=3 from __future__ import annotations from pomdp_py.framework.basics cimport ( diff --git a/pomdp_py/framework/generalization.pyx b/pomdp_py/framework/generalization.pyx index 3ed27c8..baac0c3 100644 --- a/pomdp_py/framework/generalization.pyx +++ b/pomdp_py/framework/generalization.pyx @@ -1,4 +1,4 @@ -# cython: language_level=3, profile=True +# cython: language_level=3 from __future__ import annotations from pomdp_py.framework.basics cimport ( @@ -28,10 +28,12 @@ cdef class Response: """ def copy(self) -> Response: + """Returns a copy of a Response.""" raise NotImplementedError @staticmethod def null() -> Response: + """Returns a null Response. This is equivalent to a 'zero' reward.""" raise NotImplementedError def __add__(self, other: Response) -> Response: @@ -78,16 +80,28 @@ cdef class ResponseModel: pass def null_response(self) -> Response: + """Returns a null Response.""" raise NotImplementedError def sample(self, state: State, action: Action, next_state: State) -> Response: + """ + Samples a response given the state, action, and next state. + + Args: + state (State): The state. + action (Action): The action. + next_state (State): The next state. + + Returns: + The sampled response (Response). + """ raise NotImplementedError cdef class ResponseAgent(Agent): """ A `ResponseAgent` behaves the same as an `Agent` with one difference: a - `ReponseAgent` adds a `ResponseModel`. The `ResponseAgent` also provides direct + `ReponseAgent` adds a `ResponseModel`. The `ResponseAgent` can also provide direct access to the models maintained in the `ResponseModel` to reduce the wordiness of the code. """ @@ -131,6 +145,7 @@ cdef class ResponseAgent(Agent): @property def response_model(self) -> ResponseModel: + """Returns the response model.""" if self._response_model is None: raise ValueError( "response_model is None. Call set_response_model to set a model." @@ -138,6 +153,12 @@ cdef class ResponseAgent(Agent): return self._response_model def set_response_model(self, response_model: ResponseModel) -> None: + """ + Sets the response model. + + Args: + response_model (ResponseModel): The response model. + """ if not isinstance(response_model, ResponseModel): raise TypeError( f"model must be type ResponseModel, but got type {type(response_model)}." @@ -146,6 +167,12 @@ cdef class ResponseAgent(Agent): cdef class ResponseEnvironment(Environment): + """ + A `ResponseEnvironment` is the same as an `Environment` with one difference: a + `ResponseEnvironment` adds a `ResponseModel`. The `ResponseEnvironment` can also + provide direct access to the models maintained in the `ResponseModel` to reduce + the wordiness of the code. + """ def __init__( self, @@ -175,8 +202,7 @@ cdef class ResponseEnvironment(Environment): @property def response_model(self) -> ResponseModel: """ - Returns: - The ResponseModel. + Returns the ResponseModel. """ return self._response_model @@ -194,9 +220,6 @@ cdef class ResponseEnvironment(Environment): response_model (ResponseModel): The response model. blackbox_model (BlackboxModel): Provided when the transition model and response model are not available. - - Returns: - None """ super().set_models( transition_model=transition_model, @@ -257,6 +280,23 @@ cpdef tuple[State, Observation, Response, int] sample_generative_model_with_resp Response null_response, float discount_factor = 1.0 ): + """ + Samples the next state, observation, and response from the underlying models. It also + returns the number of steps performed during sampling. + + Args: + T (TransitionModel): The transition model. + O (ObservationModel): The observation model. + R (ResponseModel): The response model. + state (State): The current state. + action (Action): The action. + null_response (Response): A null response. + discount_factor (float): The discount factor. Default = 1. + + Returns: + A tuple of the next state (State), observation (Observation), + response (Response), and the number of steps performed (int). + """ cdef State next_state cdef Observation observation cdef Response response = null_response From 915b4c0737ae02086f329f10ba898461e5a26e40 Mon Sep 17 00:00:00 2001 From: Troi Williams <40696868+troiwill@users.noreply.github.com> Date: Fri, 19 Apr 2024 00:59:05 -0400 Subject: [PATCH 25/30] Removed profiling. --- pomdp_py/utils/cvec.pxd | 2 +- pomdp_py/utils/cvec.pyx | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pomdp_py/utils/cvec.pxd b/pomdp_py/utils/cvec.pxd index dbf48af..acd8c56 100644 --- a/pomdp_py/utils/cvec.pxd +++ b/pomdp_py/utils/cvec.pxd @@ -1,4 +1,4 @@ -# cython: language_level=3, boundscheck=False, wraparound=False, profile=True +# cython: language_level=3, boundscheck=False, wraparound=False from __future__ import annotations cimport numpy as cnp diff --git a/pomdp_py/utils/cvec.pyx b/pomdp_py/utils/cvec.pyx index 84e18cb..36a603b 100644 --- a/pomdp_py/utils/cvec.pyx +++ b/pomdp_py/utils/cvec.pyx @@ -1,4 +1,4 @@ -# cython: language_level=3, boundscheck=False, wraparound=False, profile=True +# cython: language_level=3, boundscheck=False, wraparound=False from __future__ import annotations from libc.math cimport fmin, fmax From 900b6a8ed3ca87bf6e1cce44c67aa4c946ec7d56 Mon Sep 17 00:00:00 2001 From: Troi Williams <40696868+troiwill@users.noreply.github.com> Date: Fri, 19 Apr 2024 00:59:53 -0400 Subject: [PATCH 26/30] Removed profiling code. --- pomdp_py/problems/cc_rocksample/profile_code.py | 11 ----------- pomdp_py/problems/rocksample/profile_code.py | 11 ----------- 2 files changed, 22 deletions(-) delete mode 100644 pomdp_py/problems/cc_rocksample/profile_code.py delete mode 100644 pomdp_py/problems/rocksample/profile_code.py diff --git a/pomdp_py/problems/cc_rocksample/profile_code.py b/pomdp_py/problems/cc_rocksample/profile_code.py deleted file mode 100644 index bd9dfb4..0000000 --- a/pomdp_py/problems/cc_rocksample/profile_code.py +++ /dev/null @@ -1,11 +0,0 @@ -import pstats, cProfile - -import pyximport -pyximport.install() - -import cc_rocksample_problem - -cProfile.runctx("cc_rocksample_problem.main()", globals(), locals(), "fastProfile.prof") - -s = pstats.Stats("fastProfile.prof") -s.strip_dirs().sort_stats("tottime").print_stats() diff --git a/pomdp_py/problems/rocksample/profile_code.py b/pomdp_py/problems/rocksample/profile_code.py deleted file mode 100644 index fc612f0..0000000 --- a/pomdp_py/problems/rocksample/profile_code.py +++ /dev/null @@ -1,11 +0,0 @@ -import pstats, cProfile - -import pyximport -pyximport.install() - -import rocksample_problem - -cProfile.runctx("rocksample_problem.main()", globals(), locals(), "fastProfile.prof") - -s = pstats.Stats("fastProfile.prof") -s.strip_dirs().sort_stats("tottime").print_stats() From cc2e21808e16927392b8202ba90c6a2b20220324 Mon Sep 17 00:00:00 2001 From: Troi Williams <40696868+troiwill@users.noreply.github.com> Date: Fri, 19 Apr 2024 01:09:01 -0400 Subject: [PATCH 27/30] Changed nsteps to 100. --- pomdp_py/problems/rocksample/rocksample_problem.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pomdp_py/problems/rocksample/rocksample_problem.py b/pomdp_py/problems/rocksample/rocksample_problem.py index c920602..f1b7671 100644 --- a/pomdp_py/problems/rocksample/rocksample_problem.py +++ b/pomdp_py/problems/rocksample/rocksample_problem.py @@ -556,8 +556,7 @@ def main(): rollout_policy=rocksample.agent.policy_model, num_visits_init=1, ) - # tt, ttd = test_planner(rocksample, pomcp, nsteps=100, discount=0.95) - tt, ttd = test_planner(rocksample, pomcp, nsteps=1, discount=0.95) + tt, ttd = test_planner(rocksample, pomcp, nsteps=100, discount=0.95) if __name__ == "__main__": From cad3e92a502d1a0f6fef5883e0cdfa98d43d7938 Mon Sep 17 00:00:00 2001 From: Troi Williams <40696868+troiwill@users.noreply.github.com> Date: Fri, 19 Apr 2024 01:28:06 -0400 Subject: [PATCH 28/30] Corrected the description for the Response class. --- pomdp_py/framework/generalization.pyx | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/pomdp_py/framework/generalization.pyx b/pomdp_py/framework/generalization.pyx index baac0c3..6008de6 100644 --- a/pomdp_py/framework/generalization.pyx +++ b/pomdp_py/framework/generalization.pyx @@ -19,12 +19,10 @@ from typing import Optional cdef class Response: """ - A Response class maintains variables within a dictionary. - However, subclasses of Response can provide access to the - dictionary variables using the dot (.) operator. Currently, - this class can handle arithmetic and comparison operations. - However, if special operations will need to be performed, - these operations need to be handled in the subclass. + A Response class serves as the output of ResponseModel. This + class should be derived for specific problems. All operations, + including arithmetic, comparison, null, and copy must be + implemented in subclasses. """ def copy(self) -> Response: From 835449de18ac20d99faf043c1f6b9921c7583547 Mon Sep 17 00:00:00 2001 From: Troi Williams <40696868+troiwill@users.noreply.github.com> Date: Fri, 19 Apr 2024 01:30:39 -0400 Subject: [PATCH 29/30] Removed print statement used for debugging. --- pomdp_py/problems/rocksample/rocksample_problem.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pomdp_py/problems/rocksample/rocksample_problem.py b/pomdp_py/problems/rocksample/rocksample_problem.py index f1b7671..c2ab772 100644 --- a/pomdp_py/problems/rocksample/rocksample_problem.py +++ b/pomdp_py/problems/rocksample/rocksample_problem.py @@ -330,7 +330,6 @@ class RSPolicyModel(pomdp_py.RolloutPolicy): def __init__(self, n, k): check_actions = set({CheckAction(rock_id) for rock_id in range(k)}) - print(check_actions) self._move_actions = {MoveEast, MoveWest, MoveNorth, MoveSouth} self._other_actions = {SampleAction()} | check_actions self._all_actions = self._move_actions | self._other_actions From cf334207c93469a339c2f7d1ca1fac60c0bc91d4 Mon Sep 17 00:00:00 2001 From: Troi Williams <40696868+troiwill@users.noreply.github.com> Date: Fri, 17 May 2024 12:48:04 -0400 Subject: [PATCH 30/30] Fixed issue with missing numpy dependency during pip install. --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 2a27e90..61e5270 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,5 +1,5 @@ [build-system] -requires = ["setuptools", "wheel", "Cython>=3.0.8"] +requires = ["setuptools", "wheel", "Cython>=3.0.8", "numpy>=1.18.0"] build-backend = "setuptools.build_meta" [project]