diff --git a/.github/workflows/build_wheel.yml b/.github/workflows/build_wheel.yml new file mode 100644 index 00000000..6e540c97 --- /dev/null +++ b/.github/workflows/build_wheel.yml @@ -0,0 +1,45 @@ +name: pomdp-py build dev-latest wheel + +on: + push: + tags: + - 'vdev-latest' # trigger on the tag vdev-latest + +concurrency: + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} + cancel-in-progress: true + +jobs: + build: + runs-on: ubuntu-latest + strategy: + fail-fast: true + steps: + + - name: Set release name with date + id: set_release_name + run: echo "RELEASE_NAME=pomdp-py (dev-latest $(date +'%Y-%m-%d'))" >> $GITHUB_ENV + + - uses: actions/checkout@v4 + with: + ref: 'dev-latest' + + - uses: actions/setup-python@v5 + with: + python-version: '3.10' + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + + - name: Run release script + working-directory: . + run: | + cd scripts + ./pomdp_py_release.sh + + - name: Release artifact + uses: softprops/action-gh-release@v1 + with: + name: ${{ env.RELEASE_NAME }} + files: dist/*.whl diff --git a/.github/workflows/macos.yml b/.github/workflows/macos.yml index cf046b3f..38fdf8f1 100644 --- a/.github/workflows/macos.yml +++ b/.github/workflows/macos.yml @@ -2,9 +2,9 @@ name: pomdp-py repo macOS build on: push: - branches: [ main ] + branches: [ main, dev-latest ] pull_request: - branches: [ main ] + branches: [ main, dev-latest ] concurrency: group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} diff --git a/.github/workflows/pre_commit.yml b/.github/workflows/pre_commit.yml index 8cb7ed2c..b0029f92 100644 --- a/.github/workflows/pre_commit.yml +++ b/.github/workflows/pre_commit.yml @@ -7,7 +7,7 @@ on: pull_request: branches: - main - + - dev-latest jobs: pre-commit: runs-on: ubuntu-latest diff --git a/.github/workflows/pypi_deployment.yml b/.github/workflows/pypi_deployment.yml new file mode 100644 index 00000000..a074385e --- /dev/null +++ b/.github/workflows/pypi_deployment.yml @@ -0,0 +1,74 @@ +name: pomdp-py package pypi deployment + +on: + push: + tags: + - 'v*' + +concurrency: + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} + cancel-in-progress: true + +jobs: + pypi-publish: + runs-on: ubuntu-latest + strategy: + fail-fast: true + matrix: + python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"] + environment: + name: testpypi + url: https://test.pypi.org/p/pomdp-py/ + permissions: + id-token: write # IMPORTANT: this permission is mandatory for trusted publishing + + steps: + - name: Set Variables + if: startsWith(github.ref, 'refs/tags/v') + run: | # Run the command within a multiline string + echo "VERSION=\"Version ${{ github.ref_name }}\"" >> $GITHUB_ENV + echo "TRIGGER_FOR_TEST_PYPI=${{ github.ref == 'refs/tags/vdev-latest' }}" >> $GITHUB_ENV + + - uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + + - uses: actions/checkout@v4 + with: + ref: 'dev-latest' + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + + - name: Run release script + working-directory: . + run: | + cd scripts + ./pomdp_py_release.sh ${{ vars.RELEASE_ATTEMPT_COUNT }} + + - name: Upload to Test PyPi + uses: pypa/gh-action-pypi-publish@release/v1 + if: ${{ env.TRIGGER_FOR_TEST_PYPI }} + with: + # This will upload stuff under dist/ to PyPI + user: __token__ + password: ${{ secrets.TEST_PYPI_TOKEN }} + repository_url: https://test.pypi.org/legacy/ + skip-existing: true + + - name: Upload to PyPi + uses: pypa/gh-action-pypi-publish@release/v1 + if: ${{ ! env.TRIGGER_FOR_TEST_PYPI }} + with: + # This will upload stuff under dist/ to PyPI + user: __token__ + password: ${{ secrets.PYPI_TOKEN }} + repository_url: https://test.pypi.org/legacy/ + skip-existing: true + + - name: Release artifact + uses: softprops/action-gh-release@v1 + with: + name: ${{ env.VERSION }} + files: dist/*.whl diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index 15c55d0e..f976d61c 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -5,9 +5,9 @@ name: pomdp-py package on: push: - branches: [ "main" ] + branches: [ "main", "dev-latest" ] pull_request: - branches: [ "main" ] + branches: [ "main", "dev-latest" ] jobs: build: @@ -16,7 +16,7 @@ jobs: strategy: fail-fast: false matrix: - python-version: ["3.9", "3.10", "3.11"] + python-version: ["3.9", "3.10", "3.11", "3.12"] steps: - uses: actions/checkout@v3 diff --git a/.github/workflows/ubuntu.yml b/.github/workflows/ubuntu.yml index 42bad4e1..b30e0d6e 100644 --- a/.github/workflows/ubuntu.yml +++ b/.github/workflows/ubuntu.yml @@ -2,9 +2,9 @@ name: pomdp-py repo ubuntu build on: push: - branches: [ main ] + branches: [ main, dev-latest ] pull_request: - branches: [ main ] + branches: [ main, dev-latest ] concurrency: group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 694f23d1..b81c31ec 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -3,6 +3,32 @@ Changelog Best viewed on `the website `_. +Version 1.3.5 (03/12/2024) +-------------------------- +* Refactored :code:`_search` in :py:mod:`~pomdp_py.algorithms.po_uct.POUCT` to be more modular +* More fields made accessible for :py:mod:`~pomdp_py.algorithms.po_uct.POUCT` +* Allows initializing :py:mod:`~pomdp_py.framework.basics.Agent` and :py:mod:`~pomdp_py.framework.basics.Environment` without all models ready, and provide those models later (or update them) by :code:`set_models` +* Minor function renaming for :py:mod:`~pomdp_py.framework.basics.Option` +* Added :py:mod:`~pomdp_py.framework.oopomdp.DictState` +* Improved modularity and documentation for the RockSample example +* :py:mod:`~pomdp_py.representations.distribution.particles.WeightedParticles` now hashable if frozen. +* Changed :code:`det_dict_hash` to not use sorting +* Added a GHA workflow for automatically building, releasing wheels and uploading them to PyPI +* Repo convention: default branch renamed from :code:`master` to :code:`main`, and :code:`dev-latest` is the development branch. + + +WeightedParticles now hashable if frozen +Refactor _search in POUCT for more modularity +added a workflow that builds wheel +* + +Modernize repo without introducing new features. +* Migrate to :code:`pyproject.toml` to specify dependencies, with minimal :code:`setup.py` to build Cython extensions. +* Separate core and optional dependencies. Optional dependencies are needed only for `pomdp_py.problems`. +* Added CI testing to repo. +* Added pre-commit checks to repo. + + Version 1.3.4 (01/26/2024) -------------------------- * Modernize repo without introducing new features. diff --git a/MANIFEST.in b/MANIFEST.in index f0f5e423..c9d53d8c 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,26 +1,27 @@ -include ./pomdp_py/utils/cython_utils.pxd -include ./pomdp_py/algorithms/po_uct.pxd -include ./pomdp_py/algorithms/po_rollout.pxd -include ./pomdp_py/algorithms/pomcp.pxd -include ./pomdp_py/algorithms/value_iteration.pxd -include ./pomdp_py/representations/belief/particles.pxd include ./pomdp_py/representations/distribution/particles.pxd -include ./pomdp_py/representations/distribution/gaussian.pxd include ./pomdp_py/representations/distribution/histogram.pxd +include ./pomdp_py/representations/distribution/gaussian.pxd +include ./pomdp_py/representations/belief/particles.pxd +include ./pomdp_py/utils/cython_utils.pxd +include ./pomdp_py/framework/oopomdp.pxd include ./pomdp_py/framework/planner.pxd include ./pomdp_py/framework/basics.pxd -include ./pomdp_py/framework/oopomdp.pxd -include ./pomdp_py/utils/cython_utils.pyx +include ./pomdp_py/algorithms/po_rollout.pxd +include ./pomdp_py/algorithms/pomcp.pxd +include ./pomdp_py/algorithms/po_uct.pxd +include ./pomdp_py/algorithms/value_iteration.pxd include ./pomdp_py/problems/rocksample/cythonize/rocksample_problem.pyx include ./pomdp_py/problems/tiger/cythonize/tiger_problem.pyx -include ./pomdp_py/algorithms/value_iteration.pyx -include ./pomdp_py/algorithms/pomcp.pyx -include ./pomdp_py/algorithms/po_rollout.pyx -include ./pomdp_py/algorithms/po_uct.pyx -include ./pomdp_py/representations/belief/particles.pyx -include ./pomdp_py/representations/distribution/particles.pyx include ./pomdp_py/representations/distribution/histogram.pyx include ./pomdp_py/representations/distribution/gaussian.pyx -include ./pomdp_py/framework/basics.pyx +include ./pomdp_py/representations/distribution/particles.pyx +include ./pomdp_py/representations/belief/particles.pyx +include ./pomdp_py/utils/cython_utils.pyx include ./pomdp_py/framework/planner.pyx +include ./pomdp_py/framework/basics.pyx include ./pomdp_py/framework/oopomdp.pyx +include ./pomdp_py/algorithms/po_rollout.pyx +include ./pomdp_py/algorithms/value_iteration.pyx +include ./pomdp_py/algorithms/pomcp.pyx +include ./pomdp_py/algorithms/po_uct.pyx +include ./thirdparty/pomdp-solve/src/pomdp-solve diff --git a/pomdp_py/algorithms/po_uct.pxd b/pomdp_py/algorithms/po_uct.pxd index 85ba55dd..6f66fffd 100644 --- a/pomdp_py/algorithms/po_uct.pxd +++ b/pomdp_py/algorithms/po_uct.pxd @@ -32,6 +32,12 @@ cdef class POUCT(Planner): cdef int _pbar_update_interval cpdef _search(self) + cdef _initialize_progress_bar(self) + cpdef _perform_simulation(self, state) + cdef bint _should_stop(self, int sims_count, double start_time) + cdef _update_progress(self, pbar, int sims_count, double start_time) + cdef _finalize_progress_bar(self, pbar) + cpdef _simulate(POUCT self, State state, tuple history, VNode root, QNode parent, Observation observation, int depth) diff --git a/pomdp_py/algorithms/po_uct.pyx b/pomdp_py/algorithms/po_uct.pyx index 2b16bbcb..c0f02665 100644 --- a/pomdp_py/algorithms/po_uct.pyx +++ b/pomdp_py/algorithms/po_uct.pyx @@ -186,7 +186,7 @@ cdef class POUCT(Planner): max_depth=5, planning_time=-1., num_sims=-1, discount_factor=0.9, exploration_const=math.sqrt(2), num_visits_init=0, value_init=0, - rollout_policy=RandomRollout(), + rollout_policy=None, action_prior=None, show_progress=False, pbar_update_interval=5): self._max_depth = max_depth self._planning_time = planning_time @@ -222,11 +222,39 @@ cdef class POUCT(Planner): """Returns the amount of time (seconds) ran for the last `plan` call.""" return self._last_planning_time + @property + def max_depth(self): + return self._max_depth + + @property + def num_visits_init(self): + return self._num_visits_init + + @property + def discount_factor(self): + return self._discount_factor + + @property + def value_init(self): + return self._value_init + + @property + def action_prior(self): + return self._action_prior + + @property + def rollout_policy(self): + return self._rollout_policy + cpdef public plan(self, Agent agent): cdef Action action cdef float time_taken cdef int sims_count + if self._rollout_policy is None: + raise ValueError("rollout_policy unset. Please call set_rollout_policy, " + "or pass in a rollout_policy upon initialization") + self._agent = agent # switch focus on planning for the given agent if not hasattr(self._agent, "tree"): self._agent.add_attr("tree", None) @@ -288,55 +316,46 @@ cdef class POUCT(Planner): value_init) vnode[action] = history_action_node - cpdef _search(self): - cdef State state - cdef Action best_action cdef int sims_count = 0 - cdef float time_taken = 0 - cdef float best_value - cdef bint stop_by_sims = self._num_sims > 0 - cdef object pbar + cdef double start_time, time_taken + pbar = self._initialize_progress_bar() + start_time = time.time() + while not self._should_stop(sims_count, start_time): + state = self._agent.sample_belief() + self._perform_simulation(state) + sims_count += 1 + self._update_progress(pbar, sims_count, start_time) + + self._finalize_progress_bar(pbar) + best_action = self._agent.tree.argmax() + time_taken = time.time() - start_time + return best_action, time_taken, sims_count + + cdef _initialize_progress_bar(self): if self._show_progress: - if stop_by_sims: - total = int(self._num_sims) - else: - total = self._planning_time - pbar = tqdm(total=total) + total = self._num_sims if self._num_sims > 0 else self._planning_time + return tqdm(total=total) - start_time = time.time() - while True: - ## Note: the tree node with () history will have - ## the init belief given to the agent. - state = self._agent.sample_belief() - self._simulate(state, self._agent.history, self._agent.tree, - None, None, 0) - sims_count +=1 - time_taken = time.time() - start_time - - if self._show_progress and sims_count % self._pbar_update_interval == 0: - if stop_by_sims: - pbar.n = sims_count - else: - pbar.n = time_taken - pbar.refresh() - - if stop_by_sims: - if sims_count >= self._num_sims: - break - else: - if time_taken > self._planning_time: - if self._show_progress: - pbar.n = self._planning_time - pbar.refresh() - break + cpdef _perform_simulation(self, state): + self._simulate(state, self._agent.history, self._agent.tree, None, None, 0) + + cdef bint _should_stop(self, int sims_count, double start_time): + cdef float time_taken = time.time() - start_time + if self._num_sims > 0: + return sims_count >= self._num_sims + else: + return time_taken > self._planning_time + cdef _update_progress(self, pbar, int sims_count, double start_time): if self._show_progress: - pbar.close() + pbar.n = sims_count if self._num_sims > 0 else round(time.time() - start_time, 2) + pbar.refresh() - best_action = self._agent.tree.argmax() - return best_action, time_taken, sims_count + cdef _finalize_progress_bar(self, pbar): + if self._show_progress: + pbar.close() cpdef _simulate(POUCT self, State state, tuple history, VNode root, QNode parent, @@ -345,7 +364,7 @@ cdef class POUCT(Planner): return 0 if root is None: if self._agent.tree is None: - root = self._VNode(agent=self._agent, root=True) + root = self._VNode(root=True) self._agent.tree = root if self._agent.tree.history != self._agent.history: raise ValueError("Unable to plan for the given history.") @@ -427,7 +446,7 @@ cdef class POUCT(Planner): reward = self._agent.reward_model.sample(state, action, next_state) return next_state, observation, reward - def _VNode(self, agent=None, root=False, **kwargs): + def _VNode(self, root=False, **kwargs): """Returns a VNode with default values; The function naming makes it clear that this function is about creating a VNode object.""" if root: diff --git a/pomdp_py/algorithms/pomcp.pyx b/pomdp_py/algorithms/pomcp.pyx index 47b5ffa6..349b8127 100644 --- a/pomdp_py/algorithms/pomcp.pyx +++ b/pomdp_py/algorithms/pomcp.pyx @@ -133,18 +133,14 @@ cdef class POMCP(POUCT): root.belief.add(state) # belief update happens as simulation goes. return total_reward - def _VNode(self, agent=None, root=False, **kwargs): + def _VNode(self, root=False, **kwargs): """Returns a VNode with default values; The function naming makes it clear that this function is about creating a VNode object.""" if root: # agent cannot be None. return RootVNodeParticles(self._num_visits_init, - agent.history, - belief=copy.deepcopy(agent.belief)) + self._agent.history, + belief=copy.deepcopy(self._agent.belief)) else: - if agent is None: - return VNodeParticles(self._num_visits_init, - belief=Particles([])) - else: - return VNodeParticles(self._num_visits_init, - belief=copy.deepcopy(agent.belief)) + return VNodeParticles(self._num_visits_init, + belief=Particles([])) diff --git a/pomdp_py/framework/basics.pyx b/pomdp_py/framework/basics.pyx index 478f58ce..d53c0b35 100644 --- a/pomdp_py/framework/basics.pyx +++ b/pomdp_py/framework/basics.pyx @@ -194,7 +194,7 @@ cdef class BlackboxModel: def sample(self, state, action): """ sample(self, state, action) - Sample (s',o,r) ~ G(s',o,r)""" + Sample (s',o,r) ~ G(s,a)""" raise NotImplementedError def argmax(self, state, action): @@ -333,7 +333,7 @@ cdef class Agent: blackbox_model=None) """ def __init__(self, init_belief, - policy_model, + policy_model=None, transition_model=None, observation_model=None, reward_model=None, @@ -345,11 +345,6 @@ cdef class Agent: self._observation_model = observation_model self._reward_model = reward_model self._blackbox_model = blackbox_model - # It cannot be the case that both explicit models and blackbox model are None. - if self._blackbox_model is None: - assert self._transition_model is not None\ - and self._observation_model is not None\ - and self._reward_model is not None # For online planning self._cur_belief = init_belief @@ -419,6 +414,20 @@ cdef class Agent: def generative_model(self): return self.blackbox_model + def set_models(self, transition_model=None, observation_model=None, + reward_model=None, blackbox_model=None, policy_model=None): + """Re-assign the models to be the ones given.""" + if transition_model is not None: + self._transition_model = transition_model + if observation_model is not None: + self._observation_model = observation_model + if reward_model is not None: + self._reward_model = reward_model + if blackbox_model is not None: + self._blackbox_model = blackbox_model + if policy_model is not None: + self._policy_model = policy_model + def add_attr(self, attr_name, attr_value): """ add_attr(self, attr_name, attr_value) @@ -482,11 +491,6 @@ cdef class Environment: self._reward_model = reward_model self._blackbox_model = blackbox_model - # It cannot be the case that both explicit models and blackbox model are None. - if self._blackbox_model is None: - assert self._transition_model is not None\ - and self._reward_model is not None - @property def state(self): """Synonym for :meth:`cur_state`""" @@ -512,6 +516,15 @@ cdef class Environment: """The :class:`BlackboxModel` underlying the environment""" return self._blackbox_model + def set_models(self, transition_model=None, reward_model=None, blackbox_model=None): + """Re-assign the models to be the ones given.""" + if transition_model is not None: + self._transition_model = transition_model + if reward_model is not None: + self._reward_model = reward_model + if blackbox_model is not None: + self._blackbox_model = blackbox_model + def state_transition(self, action, execute=True, discount_factor=1.0): """ state_transition(self, action, execute=True) @@ -574,13 +587,13 @@ cdef class Option(Action): Described in `Between MDPs and semi-MDPs: A framework for temporal abstraction in reinforcement learning` """ - def initiation(self, state): + def initiate(self, state): """ initiation(self, state) Returns True if the given parameters satisfy the initiation set""" raise NotImplementedError - def termination(self, state): + def terminate(self, state): """termination(self, state) Returns a boolean of whether state satisfies the termination condition; Technically returning a float between 0 and 1 is also allowed.""" diff --git a/pomdp_py/framework/oopomdp.pxd b/pomdp_py/framework/oopomdp.pxd index 2392a1e8..320842c0 100644 --- a/pomdp_py/framework/oopomdp.pxd +++ b/pomdp_py/framework/oopomdp.pxd @@ -25,3 +25,6 @@ cdef class OOObservationModel(ObservationModel): cdef class OOBelief(GenerativeDistribution): cdef dict _object_beliefs + +cdef class DictState(ObjectState): + pass diff --git a/pomdp_py/framework/oopomdp.pyx b/pomdp_py/framework/oopomdp.pyx index 5dc5172c..020772c5 100644 --- a/pomdp_py/framework/oopomdp.pyx +++ b/pomdp_py/framework/oopomdp.pyx @@ -405,3 +405,9 @@ cdef class OOBelief(GenerativeDistribution): def b(self, objid): """convenient alias function call""" return self._object_beliefs[objid] + +cdef class DictState(ObjectState): + """This is synonymous as ObjectState, but does not convey 'objectness' + of the information being described.""" + def __init__(self, attributes): + super().__init__(None, attributes) diff --git a/pomdp_py/problems/rocksample/rocksample_problem.py b/pomdp_py/problems/rocksample/rocksample_problem.py index 2db7e90d..2980af5a 100644 --- a/pomdp_py/problems/rocksample/rocksample_problem.py +++ b/pomdp_py/problems/rocksample/rocksample_problem.py @@ -1,5 +1,4 @@ -""" -RockSample(n,k) problem +"""RockSample(n,k) problem Origin: Heuristic Search Value Iteration for POMDPs (UAI 2004) @@ -24,12 +23,16 @@ Check_i: receives a noisy observation about RockType_i (noise determined by eta (:math:`\eta`). eta=1 -> perfect sensor; eta=0 -> uniform) -Observation: observes the property of rock i when taking Check_i. +Observation: observes the property of rock i when taking Check_i. The + observation may be noisy, depending on an efficiency parameter which + decreases exponentially as the distance increases between the rover and + rock i. 'half_efficiency_dist' influences this parameter (larger, more robust) Reward: +10 for Sample a good rock. -10 for Sampling a bad rock. Move to exit area +10. Other actions have no cost or reward. Initial belief: every rock has equal probability of being Good or Bad. + """ import pomdp_py @@ -427,13 +430,15 @@ def print_state(self): string += "\n" print(string) - def __init__(self, n, k, init_state, rock_locs, init_belief): + def __init__( + self, n, k, init_state, rock_locs, init_belief, half_efficiency_dist=20 + ): self._n, self._k = n, k agent = pomdp_py.Agent( init_belief, RSPolicyModel(n, k), RSTransitionModel(n, rock_locs, self.in_exit_area), - RSObservationModel(rock_locs), + RSObservationModel(rock_locs, half_efficiency_dist=half_efficiency_dist), RSRewardModel(rock_locs, self.in_exit_area), ) env = pomdp_py.Environment( @@ -502,48 +507,50 @@ def init_particles_belief(k, num_particles, init_state, belief="uniform"): return init_belief -def main(): - n, k = 5, 5 - init_state, rock_locs = RockSampleProblem.generate_instance(n, k) - # # For debugging purpose - # n, k = 2,2 - # rover_position = (0, 0) - # rock_locs = {} # map from rock location to rock id - # rock_locs[(0,1)] = 0 - # rock_locs[(1,1)] = 1 - # rocktypes = ('good', 'good') +def minimal_instance(**kwargs): + # A particular instance for debugging purpose + n, k = 2, 2 + rover_position = (0, 0) + rock_locs = {} # map from rock location to rock id + rock_locs[(0, 1)] = 0 + rock_locs[(1, 1)] = 1 + rocktypes = ("good", "good") # Ground truth state - # init_state = State(rover_position, rocktypes, False) - # belief = "uniform" - + init_state = State(rover_position, rocktypes, False) belief = "uniform" + init_belief = init_particles_belief(k, 200, init_state, belief=belief) + rocksample = RockSampleProblem(n, k, init_state, rock_locs, init_belief, **kwargs) + return rocksample - init_state_copy = copy.deepcopy(init_state) + +def create_instance(n, k, **kwargs): + init_state, rock_locs = RockSampleProblem.generate_instance(n, k) + + belief = "uniform" # init belief (uniform), represented in particles; # We don't factor the state here; We are also not doing any action prior. init_belief = init_particles_belief(k, 200, init_state, belief=belief) - rocksample = RockSampleProblem(n, k, init_state, rock_locs, init_belief) + rocksample = RockSampleProblem(n, k, init_state, rock_locs, init_belief, **kwargs) + return rocksample + + +def main(): + rocksample = debug_instance() # create_instance(7, 8) rocksample.print_state() print("*** Testing POMCP ***") pomcp = pomdp_py.POMCP( - max_depth=12, + max_depth=30, discount_factor=0.95, num_sims=10000, - exploration_const=20, + exploration_const=5, rollout_policy=rocksample.agent.policy_model, num_visits_init=1, ) tt, ttd = test_planner(rocksample, pomcp, nsteps=100, discount=0.95) - rocksample.env.state.position = init_state_copy.position - rocksample.env.state.rocktypes = init_state_copy.rocktypes - rocksample.env.state.terminal = False - init_belief = init_particles_belief(k, 200, rocksample.env.state, belief=belief) - rocksample.agent.set_belief(init_belief) - if __name__ == "__main__": main() diff --git a/pomdp_py/representations/distribution/particles.pxd b/pomdp_py/representations/distribution/particles.pxd index a8226be5..e2d0b3f5 100644 --- a/pomdp_py/representations/distribution/particles.pxd +++ b/pomdp_py/representations/distribution/particles.pxd @@ -8,6 +8,8 @@ cdef class WeightedParticles(GenerativeDistribution): cdef object _distance_func cdef dict _hist cdef bint _hist_valid + cdef bint _frozen + cdef int _hashcode cpdef dict get_histogram(self) diff --git a/pomdp_py/representations/distribution/particles.pyx b/pomdp_py/representations/distribution/particles.pyx index b462c50e..7997dd94 100644 --- a/pomdp_py/representations/distribution/particles.pyx +++ b/pomdp_py/representations/distribution/particles.pyx @@ -1,4 +1,5 @@ from pomdp_py.framework.basics cimport GenerativeDistribution +from pomdp_py.utils.cython_utils cimport det_dict_hash import random cdef class WeightedParticles(GenerativeDistribution): @@ -19,8 +20,10 @@ cdef class WeightedParticles(GenerativeDistribution): are comparable; "none" if no approximation, return 0. distance_func: Used when approx_method is 'nearest'. Returns a number given two values in this particle set. + frozen: if true, then this WeightedParticles object cannot be modified. This + makes it hashable. """ - def __init__(self, list particles, str approx_method="none", distance_func=None): + def __init__(self, list particles, str approx_method="none", distance_func=None, frozen=False): self._values = [value for value, _ in particles] self._weights = [weight for _, weight in particles] self._particles = particles @@ -30,6 +33,9 @@ cdef class WeightedParticles(GenerativeDistribution): self._approx_method = approx_method self._distance_func = distance_func + self._frozen = frozen + if self._frozen: + self._hashcode = det_dict_hash(self._hist) @property def particles(self): @@ -43,9 +49,15 @@ cdef class WeightedParticles(GenerativeDistribution): def weights(self): return self._weights + @property + def frozen(self): + return self._frozen + def add(self, particle): """add(self, particle) particle: (value, weight) tuple""" + if self._frozen: + raise NotImplementedError("weighted particles is frozen and cannot be modified") self._particles.append(particle) s, w = particle self._values.append(s) @@ -58,6 +70,16 @@ cdef class WeightedParticles(GenerativeDistribution): def __len__(self): return len(self._particles) + def __hash__(self): + if self._frozen: + return self._hashcode + raise NotImplementedError + + def __eq__(self, other): + if isinstance(other, WeightedParticles): + return self._hist == other._hist + return False + def __getitem__(self, value): """Returns the probability of `value`; normalized""" if len(self.particles) == 0: @@ -126,13 +148,13 @@ cdef class WeightedParticles(GenerativeDistribution): return hist @classmethod - def from_histogram(cls, histogram): + def from_histogram(cls, histogram, frozen=False): """Given a pomdp_py.Histogram return a particle representation of it, which is an approximation""" particles = [] for v in histogram: particles.append((v, histogram[v])) - return WeightedParticles(particles) + return WeightedParticles(particles, frozen=frozen) def condense(self): """ diff --git a/pomdp_py/utils/cython_utils.pyx b/pomdp_py/utils/cython_utils.pyx index 613ff4f3..ba326254 100644 --- a/pomdp_py/utils/cython_utils.pyx +++ b/pomdp_py/utils/cython_utils.pyx @@ -3,7 +3,13 @@ import hashlib cpdef det_dict_hash(dct, keep=9): - """deterministic hash of a dictionary.""" - content = str(list(sorted(dct.items()))).encode() - hashcode = int(str(int(hashlib.sha1(content).hexdigest(), 16))[:keep]) + """Deterministic hash of a dictionary without sorting.""" + hash_accumulator = 0 + for key, value in dct.items(): + pair_str = f"{key}:{value}".encode() + pair_hash = hashlib.sha1(pair_str).hexdigest() + hash_accumulator += int(pair_hash, 16) + + # Convert the accumulated hash back to a string, take the first 'keep' digits, and convert to an integer + hashcode = int(str(hash_accumulator)[:keep]) return hashcode diff --git a/pyproject.toml b/pyproject.toml index bd28e85c..ff42c20b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "pomdp-py" -version = "1.3.4" +version = "1.3.5" description = "Python POMDP Library." authors = [{name = "Kaiyu Zheng", email = "kzheng10@cs.brown.edu"}] license = {file = "LICENSE"} diff --git a/scripts/pomdp_py_release.sh b/scripts/pomdp_py_release.sh new file mode 100755 index 00000000..1d2c4f55 --- /dev/null +++ b/scripts/pomdp_py_release.sh @@ -0,0 +1,140 @@ +#!/bin/bash +# +# Creates a wheel for pomdp-py and prompts user how to release it on +# pypi. Note that the wheel does not contain thirdparty binaries such +# as pomdp-solve because we cannot ensure compatibility of the +# pre-built binary. If needed, the user can clone the pomdp-py repo +# and build those binary from within the thirdparty folder. +# +# Example usage: +# +# ./release.sh + +# Define the function. +find_pxd_files_and_write_manifest() { + dir=$1 + output_file=$2 + find "$dir" -name '*.pxd'| while read -r line; do + echo "include $line" + done > "$output_file" + find "$dir" -name '*.pyx'| while read -r line; do + echo "include $line" + done >> "$output_file" + # Include pomdp-solve as well + echo "include ./thirdparty/pomdp-solve/src/pomdp-solve" >> "$output_file" +} + +# Define the function. +is_git_repo_on_branch() { + repo_path=$1 + branch_name=$2 + + # Move to the git repository + cd "$repo_path" || return 1 + + # Get the current branch name + current_branch=$(git rev-parse --abbrev-ref HEAD) + + # Check if the current branch is the expected one + if [ "$current_branch" = "$branch_name" ]; then + true && return + else + false + fi +} + +does_docker_image_exist() { + image_name=$1 + + # Check if the Docker image exists locally + if [[ "$(docker images -q "$image_name" 2> /dev/null)" == "" ]]; then + # If the image doesn't exist locally, pull it + false + else + true && return + fi +} + +get_python_version() { + version_string=$(python3 --version 2>&1) + version=$(echo "$version_string" | awk '{print $2}') + major_version=${version%%.*} + minor_version=$(echo "$version" | cut -d. -f2) + echo "cp${major_version}${minor_version}" +} + +extract_package_version() { + local pyproject_path="$1" + local version_line=$(grep '^version\s*=\s*"' "$pyproject_path" | head -n 1) + local version=$(echo "$version_line" | sed -E 's/version\s*=\s*"([^"]+)"/\1/') + echo "$version" +} + +user_pwd=$PWD + +# Write the MANIFEST.in file +SCRIPTPATH="$( cd -- "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P )" +pomdp_py_path=$SCRIPTPATH/../ +cd $pomdp_py_path +find_pxd_files_and_write_manifest ./ MANIFEST.in + +# Check if pomdp-py is on the right branch +version=$(extract_package_version "$pomdp_py_path/pyproject.toml") +attempt_count="" +if [ $# -gt 0 ]; then + attempt_count="-$1-" +fi +echo $version +if ! is_git_repo_on_branch $pomdp_py_path dev-$version; then + if ! is_git_repo_on_branch $pomdp_py_path dev-latest; then + echo "pomdp-py repo must be either on dev-latest or dev-$version, but not $current_branch. Abort" + exit 1 + fi +fi + +echo -e "========= making release for pomdp-py $version =========" + +pip install setuptools +pip install Cython + +# Note that we are building with pyproject.toml +python3 setup.py build_ext --inplace +pip install build +python -m build + +# create the manylinux container +linux_dist=manylinux2014_x86_64 +manylinux_image=quay.io/pypa/$linux_dist +if ! does_docker_image_exist $manylinux_image; then + docker pull $manylinux_image +fi +cpv=$(get_python_version) +wheel_name="pomdp_py-$version-$cpv-${cpv}-linux_x86_64.whl" +command="auditwheel repair io/dist/${wheel_name} -w /io/wheelhouse/" +docker run --user $(id -u):$(id -g) --mount type=bind,source=${pomdp_py_path},target=/io $manylinux_image bash -c "$command" +rm $pomdp_py_path/dist/$wheel_name +fixed_wheel_name="pomdp_py-${version}-${cpv}-${cpv}-manylinux_2_17_x86_64.$linux_dist.whl" +fixed_wheel_name_with_attempt="pomdp_py-${version}${attempt_count}${cpv}-${cpv}-manylinux_2_17_x86_64.$linux_dist.whl" +mv "$pomdp_py_path/wheelhouse/$fixed_wheel_name" "$pomdp_py_path/dist/$fixed_wheel_name_with_attempt" +rm -r $pomdp_py_path/wheelhouse + +# Verification (wheel) +echo -e "------------ verification: wheel ---------" +pip uninstall pomdp_py +pip install --force-reinstall "$pomdp_py_path/dist/$fixed_wheel_name_with_attempt" +python $pomdp_py_path/tests/test_all.py + +# Verification (source) +echo -e "------------ verification: source ---------" +pip uninstall pomdp_py +cd $pomdp_py_path/dist +pip install --force-reinstall pomdp-py-$version.tar.gz +python $pomdp_py_path/tests/test_all.py + +pip install twine +echo -e "If successful, run" +echo -e " python3 -m twine upload --repository pypi $pomdp_py_path/dist/*" +echo -e "to upload the release to PyPI." + + +cd $user_pwd diff --git a/setup.py b/setup.py index 70655dda..c519ea91 100644 --- a/setup.py +++ b/setup.py @@ -44,7 +44,6 @@ def build_extensions(pkg_name, major_submodules): packages=find_packages(exclude=["thirdparty", "thirdparty.*"]), package_data={ "pomdp_py": ["*.pxd", "*.pyx", "*.so", "*.c"], - "pomdp_problems": ["*.pxd", "*.pyx", "*.so", "*.c"], }, zip_safe=False, ) diff --git a/tests/test_framework_basics.py b/tests/test_framework_basics.py new file mode 100644 index 00000000..a99c0abe --- /dev/null +++ b/tests/test_framework_basics.py @@ -0,0 +1,51 @@ +import pomdp_py + +description = "testing framework basics" + +TRANSITION = pomdp_py.TabularTransitionModel( + { + # state, action, next_state + ("hungry", "eat", "full"): 0.7, + ("hungry", "eat", "hungry"): 0.3, + ("hungry", "sleep", "full"): 0.01, + ("hungry", "sleep", "hungry"): 0.99, + ("full", "eat", "full"): 0.9, + ("full", "eat", "hungry"): 0.1, + ("full", "sleep", "full"): 0.5, + ("full", "sleep", "hungry"): 0.5, + } +) + + +def test_agent_set_model() -> None: + b0 = pomdp_py.Histogram({"hungry": 0.5, "full": 0.5}) + + # test that agent can be created with incomplete models + # and we can set the agent's model after its creation + agent = pomdp_py.Agent(b0) + + agent.set_models(transition_model=TRANSITION) + # next_state, state, action + assert agent.transition_model.probability("full", "hungry", "eat") == 0.7 + + policy = pomdp_py.UniformPolicyModel(["eat", "sleep"]) + agent.set_models(policy_model=policy) + assert agent.policy_model.sample(b0.random()) in ["eat", "sleep"] + + +def test_env_set_model() -> None: + # test that agent can be created with incomplete models + # and we can set the agent's model after its creation + env = pomdp_py.Environment(pomdp_py.SimpleState("hungry")) + env.set_models(transition_model=TRANSITION) + # next_state, state, action + assert env.transition_model.probability("full", "hungry", "eat") == 0.7 + + +def run() -> None: + test_agent_set_model() + test_env_set_model() + + +if __name__ == "__main__": + run() diff --git a/tests/test_particles.py b/tests/test_particles.py index 5470176c..bb0a5f80 100644 --- a/tests/test_particles.py +++ b/tests/test_particles.py @@ -41,17 +41,16 @@ def test_weighted_particles(): for v in range(5): random_dist[f"x{v}"] = random.uniform(0, 1) total_prob += random_dist[f"x{v}"] - for v in random_dist: - random_dist[v] /= total_prob - particles = pomdp_py.WeightedParticles.from_histogram( - pomdp_py.Histogram(random_dist) - ) + particles = pomdp_py.WeightedParticles.from_histogram(random_dist) + particles_frozen = pomdp_py.WeightedParticles(particles.particles, frozen=True) assert abs(sum(particles[v] for v, _ in particles) - 1.0) <= 1e-6 + assert abs(sum(particles_frozen[v] for v, _ in particles_frozen) - 1.0) <= 1e-6 for v in random_dist: - assert abs(particles[v] - random_dist[v]) <= 2e-3 + assert abs(particles[v] - random_dist[v] / total_prob) <= 2e-3 + assert abs(particles_frozen[v] - random_dist[v] / total_prob) <= 2e-3 counts = {} total = int(1e6) @@ -61,9 +60,11 @@ def test_weighted_particles(): for v in counts: counts[v] /= total for v in random_dist: - assert abs(counts[v] - random_dist[v]) <= 2e-3 + assert abs(counts[v] - random_dist[v] / total_prob) <= 2e-3 assert particles.mpe() == pomdp_py.Histogram(random_dist).mpe() + assert particles_frozen.mpe() == pomdp_py.Histogram(random_dist).mpe() + hash(particles_frozen) def run():