diff --git a/rllib/BUILD b/rllib/BUILD index 37fe7b255ca35..043bbf4a3022d 100644 --- a/rllib/BUILD +++ b/rllib/BUILD @@ -326,7 +326,7 @@ py_test( name = "learning_tests_pendulum_cql_old_api_stack", main = "tests/run_regression_tests.py", tags = ["team:rllib", "exclusive", "learning_tests", "learning_tests_pendulum", "learning_tests_continuous", "learning_tests_with_ray_data"], - size = "medium", + size = "large", srcs = ["tests/run_regression_tests.py"], # Include the zipped json data file as well. data = [ @@ -839,6 +839,14 @@ py_test( ) # BC +py_test( + name = "test_bc_old_stack", + tags = ["team:rllib", "algorithms_dir"], + size = "medium", + # Include the json data file. + data = ["tests/data/cartpole/large.json"], + srcs = ["algorithms/bc/tests/test_bc_old_stack.py"] +) py_test( name = "test_bc", tags = ["team:rllib", "algorithms_dir"], @@ -1574,6 +1582,16 @@ py_test( srcs = ["offline/estimators/tests/test_dr_learning.py"], ) +py_test( + name = "test_offline_data", + tags = ["team:rllib", "offline"], + size = "small", + srcs = ["offline/tests/test_offline_data.py"], + data = [ + "tests/data/cartpole/cartpole-v1_large", + ], +) + # -------------------------------------------------------------------- # Policies # rllib/policy/ diff --git a/rllib/algorithms/algorithm.py b/rllib/algorithms/algorithm.py index fd19a63d9bc65..f423c6f9ac4a4 100644 --- a/rllib/algorithms/algorithm.py +++ b/rllib/algorithms/algorithm.py @@ -658,12 +658,50 @@ def setup(self, config: AlgorithmConfig) -> None: validate_env=self.validate_env, default_policy_class=self.get_default_policy_class(self.config), config=self.config, - num_env_runners=self.config.num_env_runners, + num_env_runners=( + 0 + if ( + self.config.input_ + and ( + isinstance(self.config.input_, str) + or ( + isinstance(self.config.input_, list) + and isinstance(self.config.input_[0], str) + ) + ) + and self.config.input_ != "sampler" + and self.config.enable_rl_module_and_learner + and self.config.enable_env_runner_and_connector_v2 + ) + else self.config.num_env_runners + ), local_env_runner=True, logdir=self.logdir, tune_trial_id=self.trial_id, ) + # If an input path is available and we are on the new API stack generate + # an `OfflineData` instance. + if ( + self.config.input_ + and ( + isinstance(self.config.input_, str) + or ( + isinstance(self.config.input_, list) + and isinstance(self.config.input_[0], str) + ) + ) + and self.config.input_ != "sampler" + and self.config.enable_rl_module_and_learner + and self.config.enable_env_runner_and_connector_v2 + ): + from ray.rllib.offline.offline_data import OfflineData + + self.offline_data = OfflineData(self.config) + # Otherwise set the attribute to `None`. + else: + self.offline_data = None + # Compile, validate, and freeze an evaluation config. self.evaluation_config = self.config.get_evaluation_config_object() self.evaluation_config.validate() @@ -743,7 +781,7 @@ def setup(self, config: AlgorithmConfig) -> None: # TODO (Rohan138): Refactor this and remove deprecated methods # Need to add back method_type in case Algorithm is restored from checkpoint method_config["type"] = method_type - + self.learner_group = None if self.config.enable_rl_module_and_learner: local_worker = self.workers.local_worker() env = spaces = None @@ -819,6 +857,29 @@ def setup(self, config: AlgorithmConfig) -> None: ), ) + if self.offline_data: + # If the learners are remote we need to provide specific + # information and the learner's actor handles. + if self.learner_group.is_remote: + # If learners run on different nodes, locality hints help + # to use the nearest learner in the workers that do the + # data preprocessing. + learner_node_ids = self.learner_group.foreach_learner( + lambda l: ray.get_runtime_context().get_node_id() + ) + self.offline_data.locality_hints = [ + node_id.get() for node_id in learner_node_ids + ] + # Provide the actor handles for the learners for module + # updating during preprocessing. + self.offline_data.learner_handles = self.learner_group._workers + # Provide the module_spec. Note, in the remote case this is needed + # because the learner module cannot be copied, but must be built. + self.offline_data.module_spec = module_spec + # Otherwise we can simply pass in the local learner. + else: + self.offline_data.learner_handles = [self.learner_group._learner] + # Run `on_algorithm_init` callback after initialization is done. self.callbacks.on_algorithm_init(algorithm=self, metrics_logger=self.metrics) diff --git a/rllib/algorithms/algorithm_config.py b/rllib/algorithms/algorithm_config.py index dd62bd3152bb0..3f64471d159b6 100644 --- a/rllib/algorithms/algorithm_config.py +++ b/rllib/algorithms/algorithm_config.py @@ -430,6 +430,10 @@ def __init__(self, algo_class: Optional[type] = None): # `self.offline_data()` self.input_ = "sampler" + self.input_read_method = "read_parquet" + self.input_read_method_kwargs = {} + self.prelearner_module_synch_period = 10 + self.dataset_num_iters_per_learner = None self.input_config = {} self.actions_in_input_normalized = False self.postprocess_inputs = False @@ -2376,6 +2380,10 @@ def offline_data( self, *, input_=NotProvided, + input_read_method=NotProvided, + input_read_method_kwargs=NotProvided, + prelearner_module_synch_period=NotProvided, + dataset_num_iters_per_learner=NotProvided, input_config=NotProvided, actions_in_input_normalized=NotProvided, input_evaluation=NotProvided, @@ -2400,7 +2408,24 @@ def offline_data( - A callable that takes an `IOContext` object as only arg and returns a ray.rllib.offline.InputReader. - A string key that indexes a callable with tune.registry.register_input - input_config: Arguments that describe the settings for reading the input. + input_read_method: Read method for the `ray.data.Dataset` to read in the + offline data from `input_`. The default is `read_json` for JSON files. + See https://docs.ray.io/en/latest/data/api/input_output.html for more + info about available read methods in `ray.data`. + input_read_method_kwargs: kwargs for the `input_read_method`. These will be + passed into the read method without checking. + prelearner_module_synch_period: The period (number of batches converted) + after which the `RLModule` held by the `PreLearner` should sync weights. + The `PreLearner` is used to preprocess batches for the learners. The + higher this value the more off-policy the `PreLearner`'s module will be. + Values too small will force the `PreLearner` to sync a ,lot with the + `Learner` and will slow down the data pipeline. The default value chosen + by the `OfflinePreLearner` is 10. + dataset_num_iters_per_learner: Number of iterations to run in each learner + during a single training iteration. If `None`, each learner runs a + complete epoch over its data block (the dataset is partitioned into + as many blocks as there are learners). The default is `None`. + input_config: Arguments that describe the settings for reading the inpu t. If input is `sample`, this will be environment configuation, e.g. `env_name` and `env_config`, etc. See `EnvContext` for more info. If the input is `dataset`, this will be e.g. `format`, `path`. @@ -2438,6 +2463,14 @@ def offline_data( """ if input_ is not NotProvided: self.input_ = input_ + if input_read_method is not NotProvided: + self.input_read_method = input_read_method + if input_read_method_kwargs is not NotProvided: + self.input_read_method_kwargs = input_read_method_kwargs + if prelearner_module_synch_period is not NotProvided: + self.prelearner_module_synch_period = prelearner_module_synch_period + if dataset_num_iters_per_learner is not NotProvided: + self.dataset_num_iters_per_learner = dataset_num_iters_per_learner if input_config is not NotProvided: if not isinstance(input_config, dict): raise ValueError( diff --git a/rllib/algorithms/bc/bc.py b/rllib/algorithms/bc/bc.py index 37fb59a504771..bcf2539fed35a 100644 --- a/rllib/algorithms/bc/bc.py +++ b/rllib/algorithms/bc/bc.py @@ -8,12 +8,19 @@ from ray.rllib.utils.annotations import override from ray.rllib.utils.metrics import ( ALL_MODULES, + LEARNER_RESULTS, + LEARNER_UPDATE_TIMER, + OFFLINE_SAMPLING_TIMER, NUM_AGENT_STEPS_SAMPLED, NUM_AGENT_STEPS_TRAINED, NUM_ENV_STEPS_SAMPLED, NUM_ENV_STEPS_TRAINED, + NUM_ENV_STEPS_TRAINED_LIFETIME, + NUM_MODULE_STEPS_TRAINED, + NUM_MODULE_STEPS_TRAINED_LIFETIME, SAMPLE_TIMER, SYNCH_WORKER_WEIGHTS_TIMER, + TIMERS, ) from ray.rllib.utils.typing import RLModuleSpec, ResultDict @@ -74,8 +81,10 @@ def __init__(self, algo_class=None): # Advantages (calculated during postprocessing) # not important for behavioral cloning. self.postprocess_inputs = False - # Set RLModule as default. - self.api_stack(enable_rl_module_and_learner=True) + # Set RLModule as default if the `EnvRUnner`'s are used. + if self.enable_env_runner_and_connector_v2: + self.api_stack(enable_rl_module_and_learner=True) + # __sphinx_doc_end__ # fmt: on @@ -139,86 +148,165 @@ def get_default_config(cls) -> AlgorithmConfig: @override(MARWIL) def training_step(self) -> ResultDict: - if not self.config.enable_rl_module_and_learner: - # Using ModelV2. - return super().training_step() + # Check, which stack is run. + if self.config.enable_env_runner_and_connector_v2: + # Using `EnvRunner`s, `OfflineData` and `RLModule`s. + return self._training_step_new_stack() + elif self.config.enable_rl_module_and_learner: + # Using `RLModule`s, but `RolloutWorker`s. + return self._training_step_hybrid_stack() else: - # Implement logic using RLModule and Learner API. - # TODO (sven): Remove RolloutWorkers/EnvRunners for - # datasets. Use RolloutWorker/EnvRunner only for - # env stepping. - # TODO (simon): Take care of sampler metrics: right - # now all rewards are `nan`, which possibly confuses - # the user that sth. is not right, although it is as - # we do not step the env. - with self._timers[SAMPLE_TIMER]: - # Sampling from offline data. - # TODO (simon): We have to remove the `RolloutWorker` - # here and just use the already distributed `dataset` - # for sampling. Only in online evaluation - # `RolloutWorker/EnvRunner` should be used. - if self.config.count_steps_by == "agent_steps": - train_batch = synchronous_parallel_sample( - worker_set=self.workers, - max_agent_steps=self.config.train_batch_size, - sample_timeout_s=self.config.sample_timeout_s, - ) - else: - train_batch = synchronous_parallel_sample( - worker_set=self.workers, - max_env_steps=self.config.train_batch_size, - sample_timeout_s=self.config.sample_timeout_s, - ) + # Using ModelV2 and `RolloutWorker`s. + return super().training_step() + + def _training_step_new_stack(self) -> ResultDict: + """Implements training logic for the new stack - # TODO (sven): Use metrics API as soon as we moved to new API stack - # (from currently hybrid stack). - # self.metrics.log_dict( - # { - # NUM_AGENT_STEPS_SAMPLED_LIFETIME: len(train_batch), - # NUM_ENV_STEPS_SAMPLED_LIFETIME: len(train_batch), - # }, - # reduce="sum", - # ) - self._counters[NUM_AGENT_STEPS_SAMPLED] += len(train_batch) - self._counters[NUM_ENV_STEPS_SAMPLED] += len(train_batch) + Note, this includes so far training with the `OfflineData` + class (multi-/single-learner setup) and evaluation on + `EnvRunner`s. Note further, evaluation on the dataset itself + using estimators is not implemented, yet. + """ + # Implement logic using RLModule and Learner API. + # TODO (simon): Take care of sampler metrics: right + # now all rewards are `nan`, which possibly confuses + # the user that sth. is not right, although it is as + # we do not step the env. + with self.metrics.log_time((TIMERS, OFFLINE_SAMPLING_TIMER)): + # Sampling from offline data. + batch = self.offline_data.sample( + num_samples=self.config.train_batch_size_per_learner, + num_shards=self.config.num_learners, + return_iterator=True if self.config.num_learners > 1 else False, + ) + with self.metrics.log_time((TIMERS, LEARNER_UPDATE_TIMER)): # Updating the policy. - train_results = self.learner_group.update_from_batch( - batch=train_batch.as_multi_agent( - module_id=list(self.config.policies)[0] - ) + # TODO (simon, sven): Check, if we should execute directly s.th. like + # update_from_iterator. + learner_results = self.learner_group.update_from_batch( + batch, + minibatch_size=self.config.train_batch_size_per_learner, + num_iters=self.config.dataset_num_iters_per_learner, ) + + # Log training results. + self.metrics.merge_and_log_n_dicts(learner_results, key=LEARNER_RESULTS) + self.metrics.log_value( + NUM_ENV_STEPS_TRAINED_LIFETIME, + self.metrics.peek( + (LEARNER_RESULTS, ALL_MODULES, NUM_ENV_STEPS_TRAINED) + ), + reduce="sum", + ) + self.metrics.log_dict( + { + (LEARNER_RESULTS, mid, NUM_MODULE_STEPS_TRAINED_LIFETIME): ( + stats[NUM_MODULE_STEPS_TRAINED] + ) + for mid, stats in self.metrics.peek(LEARNER_RESULTS).items() + }, + reduce="sum", + ) + # Synchronize weights. + # As the results contain for each policy the loss and in addition the + # total loss over all policies is returned, this total loss has to be + # removed. + modules_to_update = set(learner_results[0].keys()) - {ALL_MODULES} + + # Update weights - after learning on the local worker - + # on all remote workers. + with self.metrics.log_time((TIMERS, SYNCH_WORKER_WEIGHTS_TIMER)): + self.workers.sync_weights( + # Sync weights from learner_group to all EnvRunners. + from_worker_or_learner_group=self.learner_group, + policies=modules_to_update, + inference_only=True, + ) + + return self.metrics.reduce() + + def _training_step_hybrid_stack(self) -> ResultDict: + """Implements training logic for the hybrid stack. + + Note, the hybrid stack cannot fall back on MARWIL b/c MARWIL + is still on the old stack. Instead it needs to use `RolloutWorkers` + for evaluation and the `RLModule`s for inference and training. + Specifically it cannot use the new `OfflineData` class for + training. + """ + # Implement logic using RLModule and Learner API. + # TODO (sven): Remove RolloutWorkers/EnvRunners for + # datasets. Use RolloutWorker/EnvRunner only for + # env stepping. + # TODO (simon): Take care of sampler metrics: right + # now all rewards are `nan`, which possibly confuses + # the user that sth. is not right, although it is as + # we do not step the env. + with self._timers[SAMPLE_TIMER]: + # Sampling from offline data. + # TODO (simon): We have to remove the `RolloutWorker` + # here and just use the already distributed `dataset` + # for sampling. Only in online evaluation + # `RolloutWorker/EnvRunner` should be used. + if self.config.count_steps_by == "agent_steps": + train_batch = synchronous_parallel_sample( + worker_set=self.workers, + max_agent_steps=self.config.train_batch_size, + ) + else: + train_batch = synchronous_parallel_sample( + worker_set=self.workers, + max_env_steps=self.config.train_batch_size, + ) + # TODO (sven): Use metrics API as soon as we moved to new API stack # (from currently hybrid stack). # self.metrics.log_dict( # { - # NUM_AGENT_STEPS_TRAINED_LIFETIME: len(train_batch), - # NUM_ENV_STEPS_TRAINED_LIFETIME: len(train_batch), + # NUM_AGENT_STEPS_SAMPLED_LIFETIME: len(train_batch), + # NUM_ENV_STEPS_SAMPLED_LIFETIME: len(train_batch), # }, # reduce="sum", # ) - self._counters[NUM_AGENT_STEPS_TRAINED] += len(train_batch) - self._counters[NUM_ENV_STEPS_TRAINED] += len(train_batch) - - # Synchronize weights. - # As the results contain for each policy the loss and in addition the - # total loss over all policies is returned, this total loss has to be - # removed. - policies_to_update = set(train_results.keys()) - {ALL_MODULES} - - # with self.metrics.log_time((TIMERS, SYNCH_WORKER_WEIGHTS_TIMER)): - with self._timers[SYNCH_WORKER_WEIGHTS_TIMER]: - if self.workers.num_remote_workers() > 0: - self.workers.sync_weights( - from_worker_or_learner_group=self.learner_group, - policies=policies_to_update, - ) - # Get weights from Learner to local worker. - else: - self.workers.local_worker().set_weights( - self.learner_group.get_weights() - ) + self._counters[NUM_AGENT_STEPS_SAMPLED] += len(train_batch) + self._counters[NUM_ENV_STEPS_SAMPLED] += len(train_batch) - # TODO (sven): Use metrics API as soon as we moved to new API stack - # (from currently hybrid stack). - return train_results + # Updating the policy. + train_results = self.learner_group.update_from_batch( + batch=train_batch.as_multi_agent(module_id=list(self.config.policies)[0]) + ) + # TODO (sven): Use metrics API as soon as we moved to new API stack + # (from currently hybrid stack). + # self.metrics.log_dict( + # { + # NUM_AGENT_STEPS_TRAINED_LIFETIME: len(train_batch), + # NUM_ENV_STEPS_TRAINED_LIFETIME: len(train_batch), + # }, + # reduce="sum", + # ) + self._counters[NUM_AGENT_STEPS_TRAINED] += len(train_batch) + self._counters[NUM_ENV_STEPS_TRAINED] += len(train_batch) + + # Synchronize weights. + # As the results contain for each policy the loss and in addition the + # total loss over all policies is returned, this total loss has to be + # removed. + policies_to_update = set(train_results.keys()) - {ALL_MODULES} + + # with self.metrics.log_time((TIMERS, SYNCH_WORKER_WEIGHTS_TIMER)): + with self._timers[SYNCH_WORKER_WEIGHTS_TIMER]: + if self.workers.num_remote_workers() > 0: + self.workers.sync_weights( + from_worker_or_learner_group=self.learner_group, + policies=policies_to_update, + ) + # Get weights from Learner to local worker. + else: + self.workers.local_worker().set_weights( + self.learner_group.get_weights() + ) + + # TODO (sven): Use metrics API as soon as we moved to new API stack + # (from currently hybrid stack). + return train_results diff --git a/rllib/algorithms/bc/bc_learner.py b/rllib/algorithms/bc/bc_learner.py new file mode 100644 index 0000000000000..ab3c983628953 --- /dev/null +++ b/rllib/algorithms/bc/bc_learner.py @@ -0,0 +1,28 @@ +from ray.rllib.core.learner.learner import Learner +from ray.rllib.connectors.common.add_observations_from_episodes_to_batch import ( + AddObservationsFromEpisodesToBatch, +) +from ray.rllib.connectors.learner.add_next_observations_from_episodes_to_train_batch import ( # noqa + AddNextObservationsFromEpisodesToTrainBatch, +) +from ray.rllib.utils.annotations import ( + override, + OverrideToImplementCustomLogic_CallToSuperRecommended, +) + + +class BCLearner(Learner): + @OverrideToImplementCustomLogic_CallToSuperRecommended + @override(Learner) + def build(self) -> None: + super().build() + # Prepend a NEXT_OBS from episodes to train batch connector piece (right + # after the observation default piece). + if ( + self.config.add_default_connectors_to_learner_pipeline + and self.config.enable_env_runner_and_connector_v2 + ): + self._learner_connector.insert_after( + AddObservationsFromEpisodesToBatch, + AddNextObservationsFromEpisodesToTrainBatch(), + ) diff --git a/rllib/algorithms/bc/tests/test_bc.py b/rllib/algorithms/bc/tests/test_bc.py index a6ac43d31b4be..dd7329a084298 100644 --- a/rllib/algorithms/bc/tests/test_bc.py +++ b/rllib/algorithms/bc/tests/test_bc.py @@ -1,103 +1,81 @@ -import os from pathlib import Path import unittest - import ray -import ray.rllib.algorithms.bc as bc -from ray.rllib.utils.metrics import ( - ENV_RUNNER_RESULTS, - EPISODE_RETURN_MEAN, -) -from ray.rllib.utils.test_utils import ( - check_compute_single_action, - check_train_results, - framework_iterator, -) + +from ray.rllib.algorithms.bc import BCConfig +from ray.rllib.utils.metrics import ENV_RUNNER_RESULTS, EPISODE_RETURN_MEAN class TestBC(unittest.TestCase): @classmethod - def setUpClass(cls): + def setUpClass(cls) -> None: ray.init() @classmethod - def tearDownClass(cls): + def tearDownClass(cls) -> None: ray.shutdown() def test_bc_compilation_and_learning_from_offline_file(self): - """Test whether BC can be built with all frameworks. - - And learns from a historic-data file (while being evaluated on an - actual env using evaluation_num_env_runners > 0). - """ - rllib_dir = Path(__file__).parents[3] - print("rllib_dir={}".format(rllib_dir)) - # This has still to be done until `pathlib` will be used in the readers. - data_file = os.path.join(rllib_dir, "tests/data/cartpole/large.json") - print(f"data_file={data_file} exists={os.path.isfile(data_file)}") - + # Define the data paths. + data_path = "tests/data/cartpole/cartpole-v1_large" + base_path = Path(__file__).parents[3] + print(f"base_path={base_path}") + data_path = "local://" + base_path.joinpath(data_path).as_posix() + print(f"data_path={data_path}") + + # Define the BC config. config = ( - bc.BCConfig() + BCConfig() + .environment(env="CartPole-v1") + .api_stack( + enable_rl_module_and_learner=True, + enable_env_runner_and_connector_v2=True, + ) + .learners( + num_learners=0, + ) .evaluation( evaluation_interval=3, evaluation_num_env_runners=1, evaluation_duration=5, evaluation_parallel_to_training=True, - evaluation_config=bc.BCConfig.overrides(input_="sampler"), ) - .offline_data(input_=[data_file]) + # Note, the `input_` argument is the major argument for the + # new offline API. + .offline_data(input_=[data_path]) + .training( + lr=0.0008, + train_batch_size_per_learner=2000, + ) ) - num_iterations = 350 - min_return_to_reach = 75.0 - # Test for RLModule API and ModelV2. - for rl_modules in [True, False]: - config.api_stack(enable_rl_module_and_learner=rl_modules) - # Old and new stack support different frameworks - if rl_modules: - frameworks_to_test = ("torch", "tf2") - else: - frameworks_to_test = ("torch", "tf") - - for _ in framework_iterator(config, frameworks=frameworks_to_test): - for recurrent in [True, False]: - # We only test recurrent networks with RLModules. - if recurrent: - # TODO (Artur): We read input data without a time-dimensions. - # In order for a recurrent offline learning RL Module to - # work, the input data needs to be transformed do add a - # time-dimension. - continue - - config.training(model={"use_lstm": recurrent}) - algo = config.build(env="CartPole-v1") - learnt = False - for i in range(num_iterations): - results = algo.train() - check_train_results(results) - print(results) - - eval_results = results.get("evaluation") - if eval_results: - mean_return = eval_results[ENV_RUNNER_RESULTS][ - EPISODE_RETURN_MEAN - ] - print("iter={} R={}".format(i, mean_return)) - # Learn until good reward is reached in the actual env. - if mean_return > min_return_to_reach: - print("learnt!") - learnt = True - break - - if not learnt: - raise ValueError( - "`BC` did not reach {} reward from expert offline " - "data!".format(min_return_to_reach) - ) - - check_compute_single_action(algo, include_prev_action_reward=True) + num_iterations = 350 + min_reward = 120.0 + + # TODO (simon): Add support for recurrent modules. + algo = config.build() + learnt = False + for i in range(num_iterations): + results = algo.train() + print(results) + + eval_results = results.get("evaluation", {}) + if eval_results: + episode_return_mean = eval_results[ENV_RUNNER_RESULTS][ + EPISODE_RETURN_MEAN + ] + print(f"iter={i}, R={episode_return_mean}") + if episode_return_mean > min_reward: + print("BC has learnt the task!") + learnt = True + break + + if not learnt: + raise ValueError( + f"`BC` did not reach {min_reward} reward from expert offline data!" + ) - algo.stop() + algo.stop() if __name__ == "__main__": diff --git a/rllib/algorithms/bc/tests/test_bc_old_stack.py b/rllib/algorithms/bc/tests/test_bc_old_stack.py new file mode 100644 index 0000000000000..a6ac43d31b4be --- /dev/null +++ b/rllib/algorithms/bc/tests/test_bc_old_stack.py @@ -0,0 +1,107 @@ +import os +from pathlib import Path +import unittest + +import ray +import ray.rllib.algorithms.bc as bc +from ray.rllib.utils.metrics import ( + ENV_RUNNER_RESULTS, + EPISODE_RETURN_MEAN, +) +from ray.rllib.utils.test_utils import ( + check_compute_single_action, + check_train_results, + framework_iterator, +) + + +class TestBC(unittest.TestCase): + @classmethod + def setUpClass(cls): + ray.init() + + @classmethod + def tearDownClass(cls): + ray.shutdown() + + def test_bc_compilation_and_learning_from_offline_file(self): + """Test whether BC can be built with all frameworks. + + And learns from a historic-data file (while being evaluated on an + actual env using evaluation_num_env_runners > 0). + """ + rllib_dir = Path(__file__).parents[3] + print("rllib_dir={}".format(rllib_dir)) + # This has still to be done until `pathlib` will be used in the readers. + data_file = os.path.join(rllib_dir, "tests/data/cartpole/large.json") + print(f"data_file={data_file} exists={os.path.isfile(data_file)}") + + config = ( + bc.BCConfig() + .evaluation( + evaluation_interval=3, + evaluation_num_env_runners=1, + evaluation_duration=5, + evaluation_parallel_to_training=True, + evaluation_config=bc.BCConfig.overrides(input_="sampler"), + ) + .offline_data(input_=[data_file]) + ) + num_iterations = 350 + min_return_to_reach = 75.0 + + # Test for RLModule API and ModelV2. + for rl_modules in [True, False]: + config.api_stack(enable_rl_module_and_learner=rl_modules) + # Old and new stack support different frameworks + if rl_modules: + frameworks_to_test = ("torch", "tf2") + else: + frameworks_to_test = ("torch", "tf") + + for _ in framework_iterator(config, frameworks=frameworks_to_test): + for recurrent in [True, False]: + # We only test recurrent networks with RLModules. + if recurrent: + # TODO (Artur): We read input data without a time-dimensions. + # In order for a recurrent offline learning RL Module to + # work, the input data needs to be transformed do add a + # time-dimension. + continue + + config.training(model={"use_lstm": recurrent}) + algo = config.build(env="CartPole-v1") + learnt = False + for i in range(num_iterations): + results = algo.train() + check_train_results(results) + print(results) + + eval_results = results.get("evaluation") + if eval_results: + mean_return = eval_results[ENV_RUNNER_RESULTS][ + EPISODE_RETURN_MEAN + ] + print("iter={} R={}".format(i, mean_return)) + # Learn until good reward is reached in the actual env. + if mean_return > min_return_to_reach: + print("learnt!") + learnt = True + break + + if not learnt: + raise ValueError( + "`BC` did not reach {} reward from expert offline " + "data!".format(min_return_to_reach) + ) + + check_compute_single_action(algo, include_prev_action_reward=True) + + algo.stop() + + +if __name__ == "__main__": + import pytest + import sys + + sys.exit(pytest.main(["-v", __file__])) diff --git a/rllib/algorithms/bc/torch/bc_torch_learner.py b/rllib/algorithms/bc/torch/bc_torch_learner.py index 6e5cdea99a611..93d8f1db34d14 100644 --- a/rllib/algorithms/bc/torch/bc_torch_learner.py +++ b/rllib/algorithms/bc/torch/bc_torch_learner.py @@ -2,6 +2,7 @@ from typing import Dict from ray.rllib.algorithms.bc.bc import BCConfig +from ray.rllib.algorithms.bc.bc_learner import BCLearner from ray.rllib.core.learner.learner import POLICY_LOSS_KEY from ray.rllib.core.learner.torch.torch_learner import TorchLearner from ray.rllib.policy.sample_batch import SampleBatch @@ -14,7 +15,7 @@ logger = logging.getLogger(__file__) -class BCTorchLearner(TorchLearner): +class BCTorchLearner(TorchLearner, BCLearner): """Implements torch-specific BC loss logic.""" @override(TorchLearner) diff --git a/rllib/algorithms/marwil/marwil.py b/rllib/algorithms/marwil/marwil.py index 9f8c8c19e05fa..9fad382941e45 100644 --- a/rllib/algorithms/marwil/marwil.py +++ b/rllib/algorithms/marwil/marwil.py @@ -207,6 +207,12 @@ def validate(self) -> None: "`config.offline_data(postprocess_inputs=True)`." ) + # Assert that for a local learner the number of iterations is 1. Note, + # this is needed because we have no iterators, but instead a single + # batch returned directly from the `OfflineData.sample` method. + if self.num_learners == 0 and not self.dataset_num_iters_per_learner: + self.dataset_num_iters_per_learner = 1 + @property def _model_auto_keys(self): return super()._model_auto_keys | {"beta": self.beta} diff --git a/rllib/core/learner/learner.py b/rllib/core/learner/learner.py index d1cecc8248ed0..c1b9b8223be02 100644 --- a/rllib/core/learner/learner.py +++ b/rllib/core/learner/learner.py @@ -4,6 +4,7 @@ from dataclasses import dataclass from functools import partial import logging +import numpy from typing import ( Any, Callable, @@ -1134,6 +1135,95 @@ def _set_optimizer_state(self, state: StateDict) -> None: """ raise NotImplementedError + def update_from_iterator( + self, + iterator, + *, + timesteps: Optional[Dict[str, Any]] = None, + minibatch_size: Optional[int] = None, + num_iters: int = 1, + ): + self._check_is_built() + minibatch_size = minibatch_size or 32 + + # Call `before_gradient_based_update` to allow for non-gradient based + # preparations-, logging-, and update logic to happen. + self.before_gradient_based_update(timesteps=timesteps or {}) + + def _finalize_fn(batch: Dict[str, numpy.ndarray]) -> Dict[str, Any]: + # Note, the incoming batch is a dictionary with a numpy array + # holding the `MultiAgentBatch`. + batch = self._convert_batch_type(batch["batch"][0]) + return {"batch": self._set_slicing_by_batch_id(batch, value=True)} + + i = 0 + for batch in iterator.iter_batches( + batch_size=minibatch_size, + _finalize_fn=_finalize_fn, + prefetch_batches=2, + local_shuffle_buffer_size=minibatch_size * 10, + ): + # Update the iteration counter. + i += 1 + + # Note, `_finalize_fn` must return a dictionary. + batch = batch["batch"] + # Check the MultiAgentBatch, whether our RLModule contains all ModuleIDs + # found in this batch. If not, throw an error. + unknown_module_ids = set(batch.policy_batches.keys()) - set( + self.module.keys() + ) + if len(unknown_module_ids) > 0: + raise ValueError( + "Batch contains one or more ModuleIDs that are not in this " + f"Learner! Found IDs: {unknown_module_ids}" + ) + + # Log metrics. + self.metrics.log_dict( + { + (ALL_MODULES, NUM_ENV_STEPS_TRAINED): batch.env_steps(), + (ALL_MODULES, NUM_MODULE_STEPS_TRAINED): batch.agent_steps(), + **{ + (mid, NUM_MODULE_STEPS_TRAINED): len(b) + for mid, b in batch.policy_batches.items() + }, + }, + reduce="sum", + clear_on_reduce=True, + ) + + # Make the actual in-graph/traced `_update` call. This should return + # all tensor values (no numpy). + fwd_out, loss_per_module, tensor_metrics = self._update( + batch.policy_batches + ) + + self._set_slicing_by_batch_id(batch, value=False) + # If `num_iters` is reached break and return. + if num_iters and i == num_iters: + break + + logger.info(f"[Learner] Iterations run in epoch: {i}") + # Convert logged tensor metrics (logged during tensor-mode of MetricsLogger) + # to actual (numpy) values. + self.metrics.tensors_to_numpy(tensor_metrics) + + # Log all individual RLModules' loss terms and its registered optimizers' + # current learning rates. + for mid, loss in convert_to_numpy(loss_per_module).items(): + self.metrics.log_value( + key=(mid, self.TOTAL_LOSS_KEY), + value=loss, + window=1, + ) + # Call `after_gradient_based_update` to allow for non-gradient based + # cleanups-, logging-, and update logic to happen. + self.after_gradient_based_update(timesteps=timesteps or {}) + + # Reduce results across all minibatch update steps. + return self.metrics.reduce() + def _update_from_batch_or_episodes( self, *, @@ -1167,9 +1257,9 @@ def _update_from_batch_or_episodes( episodes = tree.flatten(episodes) # Call the learner connector. - shared_data = {} if self._learner_connector is not None and episodes is not None: # Call the learner connector pipeline. + shared_data = {} batch = self._learner_connector( rl_module=self.module, data=batch if batch is not None else {}, diff --git a/rllib/core/learner/learner_group.py b/rllib/core/learner/learner_group.py index 60989dfb5f155..7109aa95dbbc3 100644 --- a/rllib/core/learner/learner_group.py +++ b/rllib/core/learner/learner_group.py @@ -373,7 +373,18 @@ def _learner_update( _min_total_mini_batches=0, **_kwargs, ): - if _batch_shard is not None: + # If the batch shard is an `DataIterator` we have an offline + # multi-learner setup and `update_from_iterator` needs to + # handle updating. + if isinstance(_batch_shard, ray.data.DataIterator): + result = _learner.update_from_iterator( + iterator=_batch_shard, + timesteps=_timesteps, + minibatch_size=minibatch_size, + num_iters=num_iters, + **_kwargs, + ) + elif _batch_shard is not None: result = _learner.update_from_batch( batch=_batch_shard, timesteps=_timesteps, @@ -427,7 +438,20 @@ def _learner_update( # "lockstep"), the `ShardBatchIterator` should not be used. # Then again, we might move into a world where Learner always # receives Episodes, never batches. - if batch is not None: + if isinstance(batch, list) and isinstance(batch[0], ray.data.DataIterator): + partials = [ + partial( + _learner_update, + _batch_shard=iterator, + _return_state=(return_state and i == 0), + _timesteps=timesteps, + **kwargs, + ) + # Note, `OfflineData` defines exactly as many iterators as there + # are learners. + for i, iterator in enumerate(batch) + ] + elif batch is not None: partials = [ partial( _learner_update, @@ -456,45 +480,57 @@ def _learner_update( # Single- or MultiAgentEpisodes: Shard into equal pieces (only roughly equal # in case of multi-agent). else: - eps_shards = list(ShardEpisodesIterator(episodes, len(self._workers))) - # In the multi-agent case AND `minibatch_size` AND num_workers > 1, we - # compute a max iteration counter such that the different Learners will - # not go through a different number of iterations. - min_total_mini_batches = 0 - if ( - isinstance(episodes[0], MultiAgentEpisode) - and minibatch_size - and len(self._workers) > 1 - ): - # Find episode w/ the largest single-agent episode in it, then - # compute this single-agent episode's total number of mini batches - # (if we iterated over it num_sgd_iter times with the mini batch - # size). - longest_ts = 0 - per_mod_ts = defaultdict(int) - for i, shard in enumerate(eps_shards): - for ma_episode in shard: - for sa_episode in ma_episode.agent_episodes.values(): - key = (i, sa_episode.module_id) - per_mod_ts[key] += len(sa_episode) - if per_mod_ts[key] > longest_ts: - longest_ts = per_mod_ts[key] - min_total_mini_batches = self._compute_num_total_mini_batches( - batch_size=longest_ts, - mini_batch_size=minibatch_size, - num_iters=num_iters, - ) - partials = [ - partial( - _learner_update, - _episodes_shard=eps_shard, - _timesteps=timesteps, - _return_state=(return_state and i == 0), - _min_total_mini_batches=min_total_mini_batches, - **kwargs, + from ray.data.iterator import DataIterator + + if isinstance(episodes[0], DataIterator): + min_total_mini_batches = 0 + partials = [ + partial( + _learner_update, + _episodes_shard=episodes_shard, + _min_total_mini_batches=min_total_mini_batches, + ) + for episodes_shard in episodes + ] + else: + eps_shards = list( + ShardEpisodesIterator(episodes, len(self._workers)) ) - for i, eps_shard in enumerate(eps_shards) - ] + # In the multi-agent case AND `minibatch_size` AND num_workers + # > 1, we compute a max iteration counter such that the different + # Learners will not go through a different number of iterations. + min_total_mini_batches = 0 + if ( + isinstance(episodes[0], MultiAgentEpisode) + and minibatch_size + and len(self._workers) > 1 + ): + # Find episode w/ the largest single-agent episode in it, then + # compute this single-agent episode's total number of mini + # batches (if we iterated over it num_sgd_iter times with the + # mini batch size). + longest_ts = 0 + per_mod_ts = defaultdict(int) + for i, shard in enumerate(eps_shards): + for ma_episode in shard: + for sa_episode in ma_episode.agent_episodes.values(): + key = (i, sa_episode.module_id) + per_mod_ts[key] += len(sa_episode) + if per_mod_ts[key] > longest_ts: + longest_ts = per_mod_ts[key] + min_total_mini_batches = self._compute_num_total_mini_batches( + batch_size=longest_ts, + mini_batch_size=minibatch_size, + num_iters=num_iters, + ) + partials = [ + partial( + _learner_update, + _episodes_shard=eps_shard, + _min_total_mini_batches=min_total_mini_batches, + ) + for eps_shard in eps_shards + ] if async_update: # Retrieve all ready results (kicked off by prior calls to this method). diff --git a/rllib/evaluation/tests/test_rollout_worker.py b/rllib/evaluation/tests/test_rollout_worker.py index f95ea610a3423..a0eae87cafc64 100644 --- a/rllib/evaluation/tests/test_rollout_worker.py +++ b/rllib/evaluation/tests/test_rollout_worker.py @@ -646,6 +646,7 @@ def test_vector_env_support(self): for _ in range(8): batch = ev.sample() self.assertEqual(batch.count, 10) + result = collect_metrics(ws, []) self.assertEqual(result["episodes_this_iter"], 0) for _ in range(8): diff --git a/rllib/examples/offline_rl/pretrain_bc_single_agent_evaluate_as_multi_agent.py b/rllib/examples/offline_rl/pretrain_bc_single_agent_evaluate_as_multi_agent.py index a6e468771834a..0b1a55c7fc77e 100644 --- a/rllib/examples/offline_rl/pretrain_bc_single_agent_evaluate_as_multi_agent.py +++ b/rllib/examples/offline_rl/pretrain_bc_single_agent_evaluate_as_multi_agent.py @@ -99,6 +99,9 @@ observation_space=dummy_env.observation_space, action_space=dummy_env.action_space, ) + .api_stack( + enable_rl_module_and_learner=True, + ) .offline_data( input_=offline_file, ) diff --git a/rllib/offline/offline_data.py b/rllib/offline/offline_data.py new file mode 100644 index 0000000000000..20f5f15fa6334 --- /dev/null +++ b/rllib/offline/offline_data.py @@ -0,0 +1,334 @@ +import logging +import numpy as np +from pathlib import Path +import random +import ray +from ray.actor import ActorHandle +from typing import Any, Dict, List, Optional, Union + +from ray.rllib.algorithms.algorithm_config import AlgorithmConfig +from ray.rllib.core import COMPONENT_RL_MODULE +from ray.rllib.core.columns import Columns +from ray.rllib.core.learner import Learner +from ray.rllib.core.rl_module.marl_module import MultiAgentRLModuleSpec +from ray.rllib.env.single_agent_episode import SingleAgentEpisode +from ray.rllib.policy.sample_batch import MultiAgentBatch, SampleBatch +from ray.rllib.utils.compression import unpack_if_needed +from ray.rllib.utils.typing import EpisodeType, ModuleID + +logger = logging.getLogger(__name__) + +# TODO (simon): Implement schema mapping for users, i.e. user define +# which row name to map to which default schema name below. +SCHEMA = [ + Columns.EPS_ID, + Columns.AGENT_ID, + Columns.MODULE_ID, + Columns.OBS, + Columns.ACTIONS, + Columns.REWARDS, + Columns.INFOS, + Columns.NEXT_OBS, + Columns.TERMINATEDS, + Columns.TRUNCATEDS, + Columns.T, + # TODO (simon): Add remove as soon as we are new stack only. + "agent_index", + "dones", + "unroll_id", +] + + +class OfflineData: + def __init__(self, config: AlgorithmConfig): + + self.config = config + self.is_multi_agent = config.is_multi_agent() + self.path = ( + config.get("input_") + if isinstance(config.get("input_"), list) + else Path(config.get("input_")) + ) + # Use `read_json` as default data read method. + self.data_read_method = config.input_read_method + # Override default arguments for the data read method. + self.data_read_method_kwargs = ( + self.default_read_method_kwargs | config.input_read_method_kwargs + ) + try: + # Load the dataset. + self.data = getattr(ray.data, self.data_read_method)( + self.path, **self.data_read_method_kwargs + ) + logger.info("Reading data from {}".format(self.path)) + logger.info(self.data.schema()) + except Exception as e: + logger.error(e) + # Avoids reinstantiating the batch iterator each time we sample. + self.batch_iterator = None + # For remote learner setups. + self.locality_hints = None + self.learner_handles = None + self.module_spec = None + + def sample( + self, + num_samples: int, + return_iterator: bool = False, + num_shards: int = 1, + ): + if ( + not return_iterator + or return_iterator + and num_shards <= 1 + and not self.batch_iterator + ): + # If no iterator should be returned, or if we want to return a single + # batch iterator, we instantiate the batch iterator once, here. + # TODO (simon, sven): The iterator depends on the `num_samples`, i.e.abs + # sampling later with a different batch size would need a + # reinstantiation of the iterator. + self.batch_iterator = self.data.map_batches( + OfflinePreLearner, + fn_constructor_kwargs={ + "config": self.config, + "learner": self.learner_handles[0], + }, + concurrency=2, + batch_size=num_samples, + ).iter_batches( + batch_size=num_samples, + prefetch_batches=2, + local_shuffle_buffer_size=num_samples * 10, + ) + + # Do we want to return an iterator or a single batch? + if return_iterator: + # In case of multiple shards, we return multiple + # `StreamingSplitIterator` instances. + if num_shards > 1: + # Call here the learner to get an up-to-date module state. + # TODO (simon): This is a workaround as along as learners cannot + # receive any calls from another actor. + module_state = ray.get( + self.learner_handles[0].get_state.remote( + component=COMPONENT_RL_MODULE + ) + ) + return self.data.map_batches( + # TODO (cheng su): At best the learner handle passed in here should + # be the one from the learner that is nearest, but here we cannot + # provide locality hints. + OfflinePreLearner, + fn_constructor_kwargs={ + "config": self.config, + "learner": self.learner_handles, + "locality_hints": self.locality_hints, + "module_spec": self.module_spec, + "module_state": module_state, + }, + concurrency=num_shards, + batch_size=num_samples, + zero_copy_batch=True, + ).streaming_split( + n=num_shards, equal=False, locality_hints=self.locality_hints + ) + + # Otherwise, we return a simple batch `DataIterator`. + else: + return self.batch_iterator + else: + # Return a single batch from the iterator. + return next(iter(self.batch_iterator))["batch"][0] + + @property + def default_read_method_kwargs(self): + return { + "override_num_blocks": max(self.config.num_learners * 2, 2), + } + + +class OfflinePreLearner: + def __init__( + self, + config, + learner: Union[Learner, list[ActorHandle]], + locality_hints: Optional[list] = None, + module_spec: Optional[MultiAgentRLModuleSpec] = None, + module_state: Optional[Dict[ModuleID, Any]] = None, + ): + + self.config = config + # We need this learner to run the learner connector pipeline. + # If it is a `Learner` instance, the `Learner` is local. + if isinstance(learner, Learner): + self._learner = learner + self.learner_is_remote = False + self._module = self._learner._module + # Otherwise we have remote `Learner`s. + else: + # TODO (simon): Check with the data team how to get at + # initialization the data block location. + node_id = ray.get_runtime_context().get_node_id() + # Shuffle indices such that not each data block syncs weights + # with the same learner in case there are multiple learners + # on the same node like the `PreLearner`. + indices = list(range(len(locality_hints))) + random.shuffle(indices) + locality_hints = [locality_hints[i] for i in indices] + learner = [learner[i] for i in indices] + # Choose a learner from the same node. + for i, hint in enumerate(locality_hints): + if hint == node_id: + self._learner = learner[i] + # If no learner has been chosen, there is none on the same node. + if not self._learner: + # Then choose a learner randomly. + self._learner = learner[random.randint(0, len(learner) - 1)] + self.learner_is_remote = True + # Build the module from spec. Note, this will be a MARL module. + self._module = module_spec.build() + self._module.set_state(module_state) + # Build the learner connector pipeline. + self._learner_connector = self.config.build_learner_connector( + input_observation_space=None, + input_action_space=None, + ) + # Cache the policies to be trained to update weights only for these. + self._policies_to_train = self.config.policies_to_train + self._is_multi_agent = config.is_multi_agent() + # Set the counter to zero. + self.iter_since_last_module_update = 0 + # self._future = None + + def __call__(self, batch: Dict[str, np.ndarray]) -> Dict[str, List[EpisodeType]]: + # Map the batch to episodes. + episodes = self._map_to_episodes(self._is_multi_agent, batch) + # TODO (simon): Make synching work. Right now this becomes blocking or never + # receives weights. Learners appear to be non accessable via other actors. + # Increase the counter for updating the module. + # IDEA: put the module state into the object store. From there any actor has + # access. + # self.iter_since_last_module_update += 1 + + # if self._future: + # refs, _ = ray.wait([self._future], timeout=0) + # print(f"refs: {refs}") + # if refs: + # module_state = ray.get(self._future) + # + # self._module.set_state(module_state) + # self._future = None + + # # Synch the learner module, if necessary. Note, in case of a local learner + # # we have a reference to the module and therefore an up-to-date module. + # if self.learner_is_remote and self.iter_since_last_module_update + # > self.config.prelearner_module_synch_period: + # # Reset the iteration counter. + # self.iter_since_last_module_update = 0 + # # Request the module weights from the remote learner. + # self._future = + # self._learner.get_module_state.remote(inference_only=False) + # # module_state = + # ray.get(self._learner.get_module_state.remote(inference_only=False)) + # # self._module.set_state(module_state) + + # Run the `Learner`'s connector pipeline. + batch = self._learner_connector( + rl_module=self._module, + data={}, + episodes=episodes["episodes"], + shared_data={}, + ) + # Convert to `MultiAgentBatch`. + batch = MultiAgentBatch( + { + module_id: SampleBatch(module_data) + for module_id, module_data in batch.items() + }, + # TODO (simon): This can be run once for the batch and the + # metrics, but we run it twice: here and later in the learner. + env_steps=sum(e.env_steps() for e in episodes["episodes"]), + ) + # Remove all data from modules that should not be trained. We do + # not want to pass around more data than necessaty. + for module_id in list(batch.policy_batches.keys()): + if not self._should_module_be_updated(module_id, batch): + del batch.policy_batches[module_id] + + # TODO (simon): Log steps trained for metrics (how?). At best in learner + # and not here. But we could precompute metrics here and pass it to the learner + # for logging. Like this we do not have to pass around episode lists. + + # TODO (simon): episodes are only needed for logging here. + return {"batch": [batch]} + + def _should_module_be_updated(self, module_id, multi_agent_batch=None): + """Checks which modules in a MARL module should be updated.""" + if not self._policies_to_train: + # In case of no update information, the module is updated. + return True + elif not callable(self._policies_to_train): + return module_id in set(self._policies_to_train) + else: + return self._policies_to_train(module_id, multi_agent_batch) + + @staticmethod + def _map_to_episodes( + is_multi_agent: bool, batch: Dict[str, np.ndarray] + ) -> Dict[str, List[EpisodeType]]: + """Maps a batch of data to episodes.""" + + episodes = [] + # TODO (simon): Give users possibility to provide a custom schema. + for i, obs in enumerate(batch["obs"]): + + # If multi-agent we need to extract the agent ID. + # TODO (simon): Check, what happens with the module ID. + if is_multi_agent: + agent_id = ( + batch[Columns.AGENT_ID][i] + if Columns.AGENT_ID in batch + # The old stack uses "agent_index" instead of "agent_id". + # TODO (simon): Remove this as soon as we are new stack only. + else (batch["agent_index"][i] if "agent_index" in batch else None) + ) + else: + agent_id = None + + if is_multi_agent: + # TODO (simon): Add support for multi-agent episodes. + pass + else: + # Build a single-agent episode with a single row of the batch. + episode = SingleAgentEpisode( + id_=batch[Columns.EPS_ID][i], + agent_id=agent_id, + observations=[ + unpack_if_needed(obs), + unpack_if_needed(batch[Columns.NEXT_OBS][i]), + ], + infos=[ + {}, + batch[Columns.INFOS][i] if Columns.INFOS in batch else {}, + ], + actions=[batch[Columns.ACTIONS][i]], + rewards=[batch[Columns.REWARDS][i]], + terminated=batch[ + Columns.TERMINATEDS if Columns.TERMINATEDS in batch else "dones" + ][i], + truncated=batch[Columns.TRUNCATEDS][i] + if Columns.TRUNCATEDS in batch + else False, + # TODO (simon): Results in zero-length episodes in connector. + # t_started=batch[Columns.T if Columns.T in batch else + # "unroll_id"][i][0], + # TODO (simon): Single-dimensional columns are not supported. + extra_model_outputs={ + k: [v[i]] for k, v in batch.items() if k not in SCHEMA + }, + len_lookback_buffer=0, + ) + episodes.append(episode) + # Note, `map_batches` expects a `Dict` as return value. + return {"episodes": episodes} diff --git a/rllib/offline/tests/test_offline_data.py b/rllib/offline/tests/test_offline_data.py new file mode 100644 index 0000000000000..78bf5b190a441 --- /dev/null +++ b/rllib/offline/tests/test_offline_data.py @@ -0,0 +1,71 @@ +import functools +import ray +import unittest + +from pathlib import Path + +from ray.rllib.algorithms.algorithm_config import AlgorithmConfig +from ray.rllib.env.single_agent_episode import SingleAgentEpisode +from ray.rllib.offline.offline_data import OfflineData, OfflinePreLearner + + +class TestOfflineData(unittest.TestCase): + def setUp(self) -> None: + data_path = "tests/data/cartpole/cartpole-v1_large" + base_path = Path(__file__).parents[2] + self.data_path = "local://" + base_path.joinpath(data_path).as_posix() + ray.init() + + def tearDown(self) -> None: + ray.shutdown() + + def test_offline_data_load(self): + + config = AlgorithmConfig().offline_data(input_=[self.data_path]) + + offline_data = OfflineData(config) + + single_row = offline_data.data.take_batch(batch_size=1) + self.assertTrue("obs" in single_row) + + def test_offline_convert_to_episodes(self): + + config = AlgorithmConfig().offline_data( + input_=[self.data_path], + ) + + offline_data = OfflineData(config) + + batch = offline_data.data.take_batch(batch_size=10) + episodes = OfflinePreLearner._map_to_episodes(False, batch)["episodes"] + + self.assertTrue(len(episodes) == 10) + self.assertTrue(isinstance(episodes[0], SingleAgentEpisode)) + + def test_sample(self): + + config = AlgorithmConfig().offline_data(input_=[self.data_path]) + + offline_data = OfflineData(config) + + batch_iterator = offline_data.data.map_batches( + functools.partial( + OfflinePreLearner._map_to_episodes, offline_data.is_multi_agent + ) + ).iter_batches( + batch_size=10, + prefetch_batches=1, + local_shuffle_buffer_size=100, + ) + + batch = next(iter(batch_iterator)) + + self.assertTrue("episodes" in batch) + self.assertTrue(isinstance(batch["episodes"][0], SingleAgentEpisode)) + + +if __name__ == "__main__": + import sys + import pytest + + sys.exit(pytest.main(["-v", __file__])) diff --git a/rllib/tests/data/cartpole/cartpole-v1.jsonl/1_000000_000000.json b/rllib/tests/data/cartpole/cartpole-v1.jsonl/1_000000_000000.json new file mode 100644 index 0000000000000..055220439569c --- /dev/null +++ b/rllib/tests/data/cartpole/cartpole-v1.jsonl/1_000000_000000.json @@ -0,0 +1,102 @@ +{"weights":1.0,"eps_id":241561760,"dones":false,"infos":{},"prev_rewards":0.0,"t":0,"agent_index":0,"action_prob":0.4979577959,"actions":0,"q_values":[-0.0056439545,0.0025248583],"rewards":1.0,"prev_actions":0,"obs":[0.040251147,-0.0094470019,0.0473547354,-0.0012375333],"new_obs":[0.0400622077,-0.2052149773,0.0473299846,0.3060023189]} +{"weights":1.0,"eps_id":241561760,"dones":false,"infos":{},"prev_rewards":1.0,"t":1,"agent_index":0,"action_prob":0.5745141506,"actions":1,"q_values":[-0.0472300202,0.2530632019],"rewards":1.0,"prev_actions":0,"obs":[0.0400622077,-0.2052149773,0.0473299846,0.3060023189],"new_obs":[0.0359579101,-0.0107983146,0.0534500293,0.0286137313]} +{"weights":1.0,"eps_id":241561760,"dones":false,"infos":{},"prev_rewards":1.0,"t":2,"agent_index":0,"action_prob":0.5042742491,"actions":1,"q_values":[-0.0041622878,0.0129351038],"rewards":1.0,"prev_actions":1,"obs":[0.0359579101,-0.0107983146,0.0534500293,0.0286137313],"new_obs":[0.0357419401,0.1835180074,0.0540223047,-0.246737659]} +{"weights":1.0,"eps_id":241561760,"dones":false,"infos":{},"prev_rewards":1.0,"t":3,"agent_index":0,"action_prob":0.5248998404,"actions":0,"q_values":[0.0577963144,-0.0418855101],"rewards":1.0,"prev_actions":1,"obs":[0.0357419401,0.1835180074,0.0540223047,-0.246737659],"new_obs":[0.039412301,-0.0123322094,0.0490875505,0.0624839179]} +{"weights":1.0,"eps_id":241561760,"dones":false,"infos":{},"prev_rewards":1.0,"t":4,"agent_index":0,"action_prob":0.5048907995,"actions":1,"q_values":[-0.0001599407,0.0194039177],"rewards":1.0,"prev_actions":0,"obs":[0.039412301,-0.0123322094,0.0490875505,0.0624839179],"new_obs":[0.039165657,0.1820528209,0.0503372289,-0.2143164724]} +{"weights":1.0,"eps_id":241561760,"dones":false,"infos":{},"prev_rewards":1.0,"t":5,"agent_index":0,"action_prob":0.5254997611,"actions":0,"q_values":[0.0518780947,-0.0502095222],"rewards":1.0,"prev_actions":1,"obs":[0.039165657,0.1820528209,0.0503372289,-0.2143164724],"new_obs":[0.0428067148,-0.0137512879,0.0460509025,0.0938105732]} +{"weights":1.0,"eps_id":241561760,"dones":false,"infos":{},"prev_rewards":1.0,"t":6,"agent_index":0,"action_prob":0.4930223525,"actions":0,"q_values":[-0.0000635125,0.027848877],"rewards":1.0,"prev_actions":0,"obs":[0.0428067148,-0.0137512879,0.0460509025,0.0938105732],"new_obs":[0.0425316878,-0.2095019966,0.0479271114,0.4006595314]} +{"weights":1.0,"eps_id":241561760,"dones":false,"infos":{},"prev_rewards":1.0,"t":7,"agent_index":0,"action_prob":0.5723332167,"actions":1,"q_values":[-0.0353333429,0.2560437024],"rewards":1.0,"prev_actions":0,"obs":[0.0425316878,-0.2095019966,0.0479271114,0.4006595314],"new_obs":[0.0383416489,-0.0150914835,0.055940304,0.1234637946]} +{"weights":1.0,"eps_id":241561760,"dones":false,"infos":{},"prev_rewards":1.0,"t":8,"agent_index":0,"action_prob":0.5071576238,"actions":1,"q_values":[0.0050234329,0.0336557478],"rewards":1.0,"prev_actions":1,"obs":[0.0383416489,-0.0150914835,0.055940304,0.1234637946],"new_obs":[0.0380398184,0.1791862696,0.0584095791,-0.151059106]} +{"weights":1.0,"eps_id":241561760,"dones":false,"infos":{},"prev_rewards":1.0,"t":9,"agent_index":0,"action_prob":0.5262983441,"actions":0,"q_values":[0.045250643,-0.0600399636],"rewards":1.0,"prev_actions":1,"obs":[0.0380398184,0.1791862696,0.0584095791,-0.151059106],"new_obs":[0.0416235439,-0.0167213082,0.0553883985,0.1594637781]} +{"weights":1.0,"eps_id":241561760,"dones":false,"infos":{},"prev_rewards":1.0,"t":10,"agent_index":0,"action_prob":0.5075111389,"actions":1,"q_values":[0.0028384719,0.0328851715],"rewards":1.0,"prev_actions":0,"obs":[0.0416235439,-0.0167213082,0.0553883985,0.1594637781],"new_obs":[0.0412891172,0.1775657237,0.0585776716,-0.1152439862]} +{"weights":1.0,"eps_id":241561760,"dones":false,"infos":{},"prev_rewards":1.0,"t":11,"agent_index":0,"action_prob":0.4721700847,"actions":1,"q_values":[0.0372359976,-0.0741987899],"rewards":1.0,"prev_actions":1,"obs":[0.0412891172,0.1775657237,0.0585776716,-0.1152439862],"new_obs":[0.0448404327,0.3718015552,0.0562727936,-0.3888860047]} +{"weights":1.0,"eps_id":241561760,"dones":false,"infos":{},"prev_rewards":1.0,"t":12,"agent_index":0,"action_prob":0.4541035295,"actions":1,"q_values":[0.0957556367,-0.0883483961],"rewards":1.0,"prev_actions":1,"obs":[0.0448404327,0.3718015552,0.0562727936,-0.3888860047],"new_obs":[0.0522764623,0.5660815239,0.0484950729,-0.6633091569]} +{"weights":1.0,"eps_id":241561760,"dones":false,"infos":{},"prev_rewards":1.0,"t":13,"agent_index":0,"action_prob":0.5691784024,"actions":0,"q_values":[0.1641600132,-0.1143397987],"rewards":1.0,"prev_actions":1,"obs":[0.0522764623,0.5660815239,0.0484950729,-0.6633091569],"new_obs":[0.0635980964,0.3703196049,0.0352288894,-0.3557596207]} +{"weights":1.0,"eps_id":241561760,"dones":false,"infos":{},"prev_rewards":1.0,"t":14,"agent_index":0,"action_prob":0.4500200748,"actions":1,"q_values":[0.0931370407,-0.1074525341],"rewards":1.0,"prev_actions":0,"obs":[0.0635980964,0.3703196049,0.0352288894,-0.3557596207],"new_obs":[0.0710044876,0.5649234653,0.0281136967,-0.6371290684]} +{"weights":1.0,"eps_id":241561760,"dones":false,"infos":{},"prev_rewards":1.0,"t":15,"agent_index":0,"action_prob":0.4280275404,"actions":1,"q_values":[0.1619675756,-0.1279357076],"rewards":1.0,"prev_actions":1,"obs":[0.0710044876,0.5649234653,0.0281136967,-0.6371290684],"new_obs":[0.0823029578,0.759642303,0.0153711149,-0.9208275676]} +{"weights":1.0,"eps_id":241561760,"dones":false,"infos":{},"prev_rewards":1.0,"t":16,"agent_index":0,"action_prob":0.5951988697,"actions":0,"q_values":[0.2391040921,-0.1463954747],"rewards":1.0,"prev_actions":1,"obs":[0.0823029578,0.759642303,0.0153711149,-0.9208275676],"new_obs":[0.0974958017,0.5643160343,-0.0030454374,-0.6233538985]} +{"weights":1.0,"eps_id":241561760,"dones":false,"infos":{},"prev_rewards":1.0,"t":17,"agent_index":0,"action_prob":0.574338913,"actions":0,"q_values":[0.1580524296,-0.1415237635],"rewards":1.0,"prev_actions":0,"obs":[0.0974958017,0.5643160343,-0.0030454374,-0.6233538985],"new_obs":[0.10878212,0.3692367077,-0.0155125149,-0.3316316307]} +{"weights":1.0,"eps_id":241561760,"dones":false,"infos":{},"prev_rewards":1.0,"t":18,"agent_index":0,"action_prob":0.4429775178,"actions":1,"q_values":[0.0966238081,-0.1324627697],"rewards":1.0,"prev_actions":0,"obs":[0.10878212,0.3692367077,-0.0155125149,-0.3316316307],"new_obs":[0.1161668524,0.5645759702,-0.0221451484,-0.6291658282]} +{"weights":1.0,"eps_id":241561760,"dones":false,"infos":{},"prev_rewards":1.0,"t":19,"agent_index":0,"action_prob":0.5751434565,"actions":0,"q_values":[0.1541520208,-0.1487159878],"rewards":1.0,"prev_actions":1,"obs":[0.1161668524,0.5645759702,-0.0221451484,-0.6291658282],"new_obs":[0.1274583787,0.3697699904,-0.0347284637,-0.3435385525]} +{"weights":1.0,"eps_id":241561760,"dones":false,"infos":{},"prev_rewards":1.0,"t":20,"agent_index":0,"action_prob":0.4427056909,"actions":1,"q_values":[0.0929112658,-0.1372770369],"rewards":1.0,"prev_actions":0,"obs":[0.1274583787,0.3697699904,-0.0347284637,-0.3435385525],"new_obs":[0.1348537803,0.5653683543,-0.0415992364,-0.6469672918]} +{"weights":1.0,"eps_id":241561760,"dones":false,"infos":{},"prev_rewards":1.0,"t":21,"agent_index":0,"action_prob":0.5753542781,"actions":0,"q_values":[0.1511463225,-0.1525844634],"rewards":1.0,"prev_actions":1,"obs":[0.1348537803,0.5653683543,-0.0415992364,-0.6469672918],"new_obs":[0.146161139,0.3708499074,-0.0545385815,-0.3676687479]} +{"weights":1.0,"eps_id":241561760,"dones":false,"infos":{},"prev_rewards":1.0,"t":22,"agent_index":0,"action_prob":0.558316946,"actions":0,"q_values":[0.0875367746,-0.146797508],"rewards":1.0,"prev_actions":0,"obs":[0.146161139,0.3708499074,-0.0545385815,-0.3676687479],"new_obs":[0.1535781324,0.1765436381,-0.0618919544,-0.0926690847]} +{"weights":1.0,"eps_id":241561760,"dones":false,"infos":{},"prev_rewards":1.0,"t":23,"agent_index":0,"action_prob":0.5349109769,"actions":0,"q_values":[0.0885494351,-0.05132211],"rewards":1.0,"prev_actions":0,"obs":[0.1535781324,0.1765436381,-0.0618919544,-0.0926690847],"new_obs":[0.1571090072,-0.0176391155,-0.0637453347,0.1798627228]} +{"weights":1.0,"eps_id":241561760,"dones":false,"infos":{},"prev_rewards":1.0,"t":24,"agent_index":0,"action_prob":0.4932322502,"actions":0,"q_values":[0.0184261184,0.0454988182],"rewards":1.0,"prev_actions":0,"obs":[0.1571090072,-0.0176391155,-0.0637453347,0.1798627228],"new_obs":[0.1567562371,-0.2117937058,-0.0601480827,0.4517746568]} +{"weights":1.0,"eps_id":241561760,"dones":false,"infos":{},"prev_rewards":1.0,"t":25,"agent_index":0,"action_prob":0.428198576,"actions":0,"q_values":[-0.0499614105,0.2392434478],"rewards":1.0,"prev_actions":0,"obs":[0.1567562371,-0.2117937058,-0.0601480827,0.4517746568],"new_obs":[0.1525203586,-0.4060157239,-0.0511125885,0.7249079943]} +{"weights":1.0,"eps_id":241561760,"dones":false,"infos":{},"prev_rewards":1.0,"t":26,"agent_index":0,"action_prob":0.6240300536,"actions":1,"q_values":[-0.0935454667,0.4131438434],"rewards":1.0,"prev_actions":0,"obs":[0.1525203586,-0.4060157239,-0.0511125885,0.7249079943],"new_obs":[0.1444000453,-0.2102256268,-0.0366144292,0.416585803]} +{"weights":1.0,"eps_id":241561760,"dones":false,"infos":{},"prev_rewards":1.0,"t":27,"agent_index":0,"action_prob":0.4272377491,"actions":0,"q_values":[-0.0380442739,0.2550858259],"rewards":1.0,"prev_actions":1,"obs":[0.1444000453,-0.2102256268,-0.0366144292,0.416585803],"new_obs":[0.1401955336,-0.4048100412,-0.0282827131,0.69750458]} +{"weights":1.0,"eps_id":241561760,"dones":false,"infos":{},"prev_rewards":1.0,"t":28,"agent_index":0,"action_prob":0.6247843504,"actions":1,"q_values":[-0.092116043,0.4177895784],"rewards":1.0,"prev_actions":0,"obs":[0.1401955336,-0.4048100412,-0.0282827131,0.69750458],"new_obs":[0.1320993304,-0.2093075663,-0.0143326214,0.3960541487]} +{"weights":1.0,"eps_id":241561760,"dones":false,"infos":{},"prev_rewards":1.0,"t":29,"agent_index":0,"action_prob":0.4268564284,"actions":0,"q_values":[-0.0307480171,0.2639406323],"rewards":1.0,"prev_actions":1,"obs":[0.1320993304,-0.2093075663,-0.0143326214,0.3960541487],"new_obs":[0.127913177,-0.4042232633,-0.0064115389,0.6841840148]} +{"weights":1.0,"eps_id":241561760,"dones":false,"infos":{},"prev_rewards":1.0,"t":30,"agent_index":0,"action_prob":0.6255699396,"actions":1,"q_values":[-0.0910449326,0.4222134054],"rewards":1.0,"prev_actions":0,"obs":[0.127913177,-0.4042232633,-0.0064115389,0.6841840148],"new_obs":[0.1198287085,-0.2090128809,0.0072721415,0.3894894719]} +{"weights":1.0,"eps_id":241561760,"dones":false,"infos":{},"prev_rewards":1.0,"t":31,"agent_index":0,"action_prob":0.5718400478,"actions":1,"q_values":[-0.0231937021,0.2661687136],"rewards":1.0,"prev_actions":1,"obs":[0.1198287085,-0.2090128809,0.0072721415,0.3894894719],"new_obs":[0.1156484559,-0.0139948912,0.0150619308,0.0991082117]} +{"weights":1.0,"eps_id":241561760,"dones":false,"infos":{},"prev_rewards":1.0,"t":32,"agent_index":0,"action_prob":0.4935725331,"actions":0,"q_values":[0.0213395655,0.0470508635],"rewards":1.0,"prev_actions":1,"obs":[0.1156484559,-0.0139948912,0.0150619308,0.0991082117],"new_obs":[0.1153685525,-0.2093294263,0.0170440953,0.3965048194]} +{"weights":1.0,"eps_id":241561760,"dones":false,"infos":{},"prev_rewards":1.0,"t":33,"agent_index":0,"action_prob":0.571847856,"actions":1,"q_values":[-0.0216540992,0.2677402198],"rewards":1.0,"prev_actions":0,"obs":[0.1153685525,-0.2093294263,0.0170440953,0.3965048194],"new_obs":[0.111181967,-0.0144533999,0.0249741916,0.1092439592]} +{"weights":1.0,"eps_id":241561760,"dones":false,"infos":{},"prev_rewards":1.0,"t":34,"agent_index":0,"action_prob":0.506999135,"actions":1,"q_values":[0.0179430563,0.0459413528],"rewards":1.0,"prev_actions":1,"obs":[0.111181967,-0.0144533999,0.0249741916,0.1092439592],"new_obs":[0.1108928993,0.1803019345,0.0271590706,-0.1754563004]} +{"weights":1.0,"eps_id":241561760,"dones":false,"infos":{},"prev_rewards":1.0,"t":35,"agent_index":0,"action_prob":0.4627947509,"actions":1,"q_values":[0.0568101928,-0.0922863558],"rewards":1.0,"prev_actions":1,"obs":[0.1108928993,0.1803019345,0.0271590706,-0.1754563004],"new_obs":[0.1144989356,0.3750248849,0.023649944,-0.4594492316]} +{"weights":1.0,"eps_id":241561760,"dones":false,"infos":{},"prev_rewards":1.0,"t":36,"agent_index":0,"action_prob":0.4436970949,"actions":1,"q_values":[0.110231474,-0.115939416],"rewards":1.0,"prev_actions":1,"obs":[0.1144989356,0.3750248849,0.023649944,-0.4594492316],"new_obs":[0.1219994351,0.5698046684,0.0144609604,-0.7445847988]} +{"weights":1.0,"eps_id":241561760,"dones":false,"infos":{},"prev_rewards":1.0,"t":37,"agent_index":0,"action_prob":0.4228127301,"actions":1,"q_values":[0.1665245742,-0.1447127312],"rewards":1.0,"prev_actions":1,"obs":[0.1219994351,0.5698046684,0.0144609604,-0.7445847988],"new_obs":[0.1333955228,0.7647241354,-0.0004307352,-1.032681942]} +{"weights":1.0,"eps_id":241561760,"dones":false,"infos":{},"prev_rewards":1.0,"t":38,"agent_index":0,"action_prob":0.4017649591,"actions":1,"q_values":[0.2356905341,-0.1624258757],"rewards":1.0,"prev_actions":1,"obs":[0.1333955228,0.7647241354,-0.0004307352,-1.032681942],"new_obs":[0.1486900151,0.9598518014,-0.0210843757,-1.3255001307]} +{"weights":1.0,"eps_id":241561760,"dones":false,"infos":{},"prev_rewards":1.0,"t":39,"agent_index":0,"action_prob":0.6177492142,"actions":0,"q_values":[0.3146172464,-0.1653882861],"rewards":1.0,"prev_actions":1,"obs":[0.1486900151,0.9598518014,-0.0210843757,-1.3255001307],"new_obs":[0.1678870469,0.7650023103,-0.0475943759,-1.0394892693]} +{"weights":1.0,"eps_id":241561760,"dones":false,"infos":{},"prev_rewards":1.0,"t":40,"agent_index":0,"action_prob":0.6000679731,"actions":0,"q_values":[0.2252316922,-0.1805165857],"rewards":1.0,"prev_actions":0,"obs":[0.1678870469,0.7650023103,-0.0475943759,-1.0394892693],"new_obs":[0.1831870973,0.5705440044,-0.0683841631,-0.762119472]} +{"weights":1.0,"eps_id":241561760,"dones":false,"infos":{},"prev_rewards":1.0,"t":41,"agent_index":0,"action_prob":0.4211883247,"actions":1,"q_values":[0.1449959129,-0.1729011685],"rewards":1.0,"prev_actions":0,"obs":[0.1831870973,0.5705440044,-0.0683841631,-0.762119472],"new_obs":[0.1945979744,0.7665379047,-0.0836265534,-1.0755125284]} +{"weights":1.0,"eps_id":241561760,"dones":false,"infos":{},"prev_rewards":1.0,"t":42,"agent_index":0,"action_prob":0.5995147228,"actions":0,"q_values":[0.2126035839,-0.1908400208],"rewards":1.0,"prev_actions":1,"obs":[0.1945979744,0.7665379047,-0.0836265534,-1.0755125284],"new_obs":[0.2099287361,0.5726144314,-0.1051368043,-0.8102014065]} +{"weights":1.0,"eps_id":241561760,"dones":false,"infos":{},"prev_rewards":1.0,"t":43,"agent_index":0,"action_prob":0.5784649253,"actions":0,"q_values":[0.125258714,-0.1912160814],"rewards":1.0,"prev_actions":0,"obs":[0.2099287361,0.5726144314,-0.1051368043,-0.8102014065],"new_obs":[0.2213810235,0.379077971,-0.1213408262,-0.5523530245]} +{"weights":1.0,"eps_id":241561760,"dones":false,"infos":{},"prev_rewards":1.0,"t":44,"agent_index":0,"action_prob":0.5586039424,"actions":0,"q_values":[0.078900367,-0.1565978825],"rewards":1.0,"prev_actions":0,"obs":[0.2213810235,0.379077971,-0.1213408262,-0.5523530245],"new_obs":[0.2289625853,0.1858503371,-0.1323878914,-0.3002294004]} +{"weights":1.0,"eps_id":241561760,"dones":false,"infos":{},"prev_rewards":1.0,"t":45,"agent_index":0,"action_prob":0.5260810256,"actions":0,"q_values":[0.0707091391,-0.0337096937],"rewards":1.0,"prev_actions":0,"obs":[0.2289625853,0.1858503371,-0.1323878914,-0.3002294004],"new_obs":[0.2326795906,-0.0071606194,-0.1383924782,-0.0520548299]} +{"weights":1.0,"eps_id":241561760,"dones":false,"infos":{},"prev_rewards":1.0,"t":46,"agent_index":0,"action_prob":0.4879906178,"actions":0,"q_values":[-0.0010413788,0.0470055342],"rewards":1.0,"prev_actions":0,"obs":[0.2326795906,-0.0071606194,-0.1383924782,-0.0520548299],"new_obs":[0.2325363755,-0.2000548244,-0.1394335777,0.1939624697]} +{"weights":1.0,"eps_id":241561760,"dones":false,"infos":{},"prev_rewards":1.0,"t":47,"agent_index":0,"action_prob":0.4281115532,"actions":0,"q_values":[-0.0550241098,0.2345360816],"rewards":1.0,"prev_actions":0,"obs":[0.2325363755,-0.2000548244,-0.1394335777,0.1939624697],"new_obs":[0.2285352796,-0.3929353952,-0.1355543286,0.4396146834]} +{"weights":1.0,"eps_id":241561760,"dones":false,"infos":{},"prev_rewards":1.0,"t":48,"agent_index":0,"action_prob":0.6308852434,"actions":1,"q_values":[-0.1573764086,0.3786399961],"rewards":1.0,"prev_actions":0,"obs":[0.2285352796,-0.3929353952,-0.1355543286,0.4396146834],"new_obs":[0.2206765711,-0.1961815357,-0.1267620325,0.1074601933]} +{"weights":1.0,"eps_id":241561760,"dones":false,"infos":{},"prev_rewards":1.0,"t":49,"agent_index":0,"action_prob":0.5760338306,"actions":1,"q_values":[-0.0950685218,0.2114441395],"rewards":1.0,"prev_actions":1,"obs":[0.2206765711,-0.1961815357,-0.1267620325,0.1074601933],"new_obs":[0.2167529464,0.0005075104,-0.1246128306,-0.222374469]} +{"weights":1.0,"eps_id":241561760,"dones":false,"infos":{},"prev_rewards":1.0,"t":50,"agent_index":0,"action_prob":0.5073276162,"actions":1,"q_values":[-0.0634048432,-0.0340922587],"rewards":1.0,"prev_actions":1,"obs":[0.2167529464,0.0005075104,-0.1246128306,-0.222374469],"new_obs":[0.2167630941,0.1971698552,-0.1290603131,-0.5516219735]} +{"weights":1.0,"eps_id":241561760,"dones":false,"infos":{},"prev_rewards":1.0,"t":51,"agent_index":0,"action_prob":0.4669471085,"actions":1,"q_values":[0.0167170092,-0.1156875566],"rewards":1.0,"prev_actions":1,"obs":[0.2167630941,0.1971698552,-0.1290603131,-0.5516219735],"new_obs":[0.2207064927,0.3938456476,-0.1400927603,-0.8820206523]} +{"weights":1.0,"eps_id":241561760,"dones":false,"infos":{},"prev_rewards":1.0,"t":52,"agent_index":0,"action_prob":0.4393852353,"actions":1,"q_values":[0.0598426796,-0.183814615],"rewards":1.0,"prev_actions":1,"obs":[0.2207064927,0.3938456476,-0.1400927603,-0.8820206523],"new_obs":[0.2285833955,0.5905638933,-0.1577331722,-1.2152597904]} +{"weights":1.0,"eps_id":241561760,"dones":false,"infos":{},"prev_rewards":1.0,"t":53,"agent_index":0,"action_prob":0.5832104087,"actions":0,"q_values":[0.128090471,-0.2078761756],"rewards":1.0,"prev_actions":1,"obs":[0.2285833955,0.5905638933,-0.1577331722,-1.2152597904],"new_obs":[0.2403946817,0.397788465,-0.1820383668,-0.9758678675]} +{"weights":1.0,"eps_id":241561760,"dones":false,"infos":{},"prev_rewards":1.0,"t":54,"agent_index":0,"action_prob":0.5628215671,"actions":0,"q_values":[0.0553115979,-0.1973097622],"rewards":1.0,"prev_actions":0,"obs":[0.2403946817,0.397788465,-0.1820383668,-0.9758678675],"new_obs":[0.2483504415,0.2055128217,-0.2015557289,-0.745444119]} +{"weights":1.0,"eps_id":241561760,"dones":true,"infos":{},"prev_rewards":1.0,"t":55,"agent_index":0,"action_prob":0.5309032202,"actions":0,"q_values":[-0.0222308636,-0.1460015923],"rewards":1.0,"prev_actions":0,"obs":[0.2483504415,0.2055128217,-0.2015557289,-0.745444119],"new_obs":[0.2524607182,0.0136578996,-0.2164646089,-0.52234447]} +{"weights":1.0,"eps_id":1238833020,"dones":false,"infos":{},"prev_rewards":0.0,"t":0,"agent_index":0,"action_prob":0.5135254264,"actions":1,"q_values":[-0.0155974496,0.0385172926],"rewards":1.0,"prev_actions":0,"obs":[0.0450199731,-0.0348616093,0.0160647109,0.0116978278],"new_obs":[0.0443227403,0.1600263119,0.0162986666,-0.275873512]} +{"weights":1.0,"eps_id":1238833020,"dones":false,"infos":{},"prev_rewards":1.0,"t":1,"agent_index":0,"action_prob":0.4770704508,"actions":1,"q_values":[0.0431629531,-0.0486194752],"rewards":1.0,"prev_actions":1,"obs":[0.0443227403,0.1600263119,0.0162986666,-0.275873512],"new_obs":[0.0475232676,0.354911983,0.0107811969,-0.5633715987]} +{"weights":1.0,"eps_id":1238833020,"dones":false,"infos":{},"prev_rewards":1.0,"t":2,"agent_index":0,"action_prob":0.5442214012,"actions":0,"q_values":[0.0987678394,-0.0785810724],"rewards":1.0,"prev_actions":1,"obs":[0.0475232676,0.354911983,0.0107811969,-0.5633715987],"new_obs":[0.0546215065,0.1596404165,-0.0004862353,-0.2673116624]} +{"weights":1.0,"eps_id":1238833020,"dones":false,"infos":{},"prev_rewards":1.0,"t":3,"agent_index":0,"action_prob":0.4762794971,"actions":1,"q_values":[0.0386313275,-0.0563219115],"rewards":1.0,"prev_actions":0,"obs":[0.0546215065,0.1596404165,-0.0004862353,-0.2673116624],"new_obs":[0.057814315,0.3547693193,-0.0058324686,-0.5601479411]} +{"weights":1.0,"eps_id":1238833020,"dones":false,"infos":{},"prev_rewards":1.0,"t":4,"agent_index":0,"action_prob":0.5454674363,"actions":0,"q_values":[0.0945036113,-0.0878700614],"rewards":1.0,"prev_actions":1,"obs":[0.057814315,0.3547693193,-0.0058324686,-0.5601479411],"new_obs":[0.0649096966,0.1597297043,-0.0170354266,-0.2693082094]} +{"weights":1.0,"eps_id":1238833020,"dones":false,"infos":{},"prev_rewards":1.0,"t":5,"agent_index":0,"action_prob":0.5253314972,"actions":0,"q_values":[0.0331180729,-0.0682947934],"rewards":1.0,"prev_actions":0,"obs":[0.0649096966,0.1597297043,-0.0170354266,-0.2693082094],"new_obs":[0.0681042969,-0.0351450518,-0.022421591,0.0179533251]} +{"weights":1.0,"eps_id":1238833020,"dones":false,"infos":{},"prev_rewards":1.0,"t":6,"agent_index":0,"action_prob":0.484343648,"actions":0,"q_values":[-0.0116132349,0.0510326624],"rewards":1.0,"prev_actions":0,"obs":[0.0681042969,-0.0351450518,-0.022421591,0.0179533251],"new_obs":[0.0674013942,-0.2299383879,-0.0220625252,0.3034785092]} +{"weights":1.0,"eps_id":1238833020,"dones":false,"infos":{},"prev_rewards":1.0,"t":7,"agent_index":0,"action_prob":0.5828204155,"actions":1,"q_values":[-0.083898738,0.2504634857],"rewards":1.0,"prev_actions":0,"obs":[0.0674013942,-0.2299383879,-0.0220625252,0.3034785092],"new_obs":[0.0628026277,-0.0345090777,-0.0159929544,0.0039200126]} +{"weights":1.0,"eps_id":1238833020,"dones":false,"infos":{},"prev_rewards":1.0,"t":8,"agent_index":0,"action_prob":0.4853146374,"actions":0,"q_values":[-0.0213783514,0.0373799205],"rewards":1.0,"prev_actions":1,"obs":[0.0628026277,-0.0345090777,-0.0159929544,0.0039200126],"new_obs":[0.0621124431,-0.2293980569,-0.0159145538,0.2915143669]} +{"weights":1.0,"eps_id":1238833020,"dones":false,"infos":{},"prev_rewards":1.0,"t":9,"agent_index":0,"action_prob":0.5827109814,"actions":1,"q_values":[-0.0855528563,0.2483592182],"rewards":1.0,"prev_actions":0,"obs":[0.0621124431,-0.2293980569,-0.0159145538,0.2915143669],"new_obs":[0.0575244837,-0.0340528488,-0.0100842668,-0.0061450214]} +{"weights":1.0,"eps_id":1238833020,"dones":false,"infos":{},"prev_rewards":1.0,"t":10,"agent_index":0,"action_prob":0.5136748552,"actions":1,"q_values":[-0.0289013889,0.0258116815],"rewards":1.0,"prev_actions":1,"obs":[0.0575244837,-0.0340528488,-0.0100842668,-0.0061450214],"new_obs":[0.0568434261,0.1612122655,-0.0102071678,-0.3019925058]} +{"weights":1.0,"eps_id":1238833020,"dones":false,"infos":{},"prev_rewards":1.0,"t":11,"agent_index":0,"action_prob":0.4766709805,"actions":1,"q_values":[0.0278598145,-0.0655241534],"rewards":1.0,"prev_actions":1,"obs":[0.0568434261,0.1612122655,-0.0102071678,-0.3019925058],"new_obs":[0.0600676723,0.3564781845,-0.0162470173,-0.5978770256]} +{"weights":1.0,"eps_id":1238833020,"dones":false,"infos":{},"prev_rewards":1.0,"t":12,"agent_index":0,"action_prob":0.4540769458,"actions":1,"q_values":[0.0917566642,-0.092454724],"rewards":1.0,"prev_actions":1,"obs":[0.0600676723,0.3564781845,-0.0162470173,-0.5978770256],"new_obs":[0.0671972334,0.5518236756,-0.0282045584,-0.8956329823]} +{"weights":1.0,"eps_id":1238833020,"dones":false,"infos":{},"prev_rewards":1.0,"t":13,"agent_index":0,"action_prob":0.4279625416,"actions":1,"q_values":[0.1692613959,-0.1209073961],"rewards":1.0,"prev_actions":1,"obs":[0.0671972334,0.5518236756,-0.0282045584,-0.8956329823],"new_obs":[0.0782337114,0.7473164797,-0.0461172163,-1.1970465183]} +{"weights":1.0,"eps_id":1238833020,"dones":false,"infos":{},"prev_rewards":1.0,"t":14,"agent_index":0,"action_prob":0.5955550075,"actions":0,"q_values":[0.2569304705,-0.1300475001],"rewards":1.0,"prev_actions":1,"obs":[0.0782337114,0.7473164797,-0.0461172163,-1.1970465183],"new_obs":[0.093180038,0.5528207421,-0.0700581521,-0.919166863]} +{"weights":1.0,"eps_id":1238833020,"dones":false,"infos":{},"prev_rewards":1.0,"t":15,"agent_index":0,"action_prob":0.5748928189,"actions":0,"q_values":[0.1545487344,-0.1472935379],"rewards":1.0,"prev_actions":0,"obs":[0.093180038,0.5528207421,-0.0700581521,-0.919166863],"new_obs":[0.1042364538,0.3587121665,-0.0884414837,-0.6492984295]} +{"weights":1.0,"eps_id":1238833020,"dones":false,"infos":{},"prev_rewards":1.0,"t":16,"agent_index":0,"action_prob":0.5481062531,"actions":0,"q_values":[0.0553374141,-0.1376847029],"rewards":1.0,"prev_actions":0,"obs":[0.1042364538,0.3587121665,-0.0884414837,-0.6492984295],"new_obs":[0.1114106923,0.1649262756,-0.1014274582,-0.3857236207]} +{"weights":1.0,"eps_id":1238833020,"dones":false,"infos":{},"prev_rewards":1.0,"t":17,"agent_index":0,"action_prob":0.4735119939,"actions":1,"q_values":[0.0067196079,-0.0993316919],"rewards":1.0,"prev_actions":0,"obs":[0.1114106923,0.1649262756,-0.1014274582,-0.3857236207],"new_obs":[0.1147092208,0.361330837,-0.1091419235,-0.7085849643]} +{"weights":1.0,"eps_id":1238833020,"dones":false,"infos":{},"prev_rewards":1.0,"t":18,"agent_index":0,"action_prob":0.5489782095,"actions":0,"q_values":[0.0514150262,-0.1451282054],"rewards":1.0,"prev_actions":1,"obs":[0.1147092208,0.361330837,-0.1091419235,-0.7085849643],"new_obs":[0.121935837,0.1678763479,-0.1233136281,-0.4521538615]} +{"weights":1.0,"eps_id":1238833020,"dones":false,"infos":{},"prev_rewards":1.0,"t":19,"agent_index":0,"action_prob":0.4744041562,"actions":1,"q_values":[-0.0089950934,-0.1114681214],"rewards":1.0,"prev_actions":0,"obs":[0.121935837,0.1678763479,-0.1233136281,-0.4521538615],"new_obs":[0.1252933592,0.3645067215,-0.1323567033,-0.7810221314]} +{"weights":1.0,"eps_id":1238833020,"dones":false,"infos":{},"prev_rewards":1.0,"t":20,"agent_index":0,"action_prob":0.5505622625,"actions":0,"q_values":[0.0450827032,-0.157859996],"rewards":1.0,"prev_actions":1,"obs":[0.1252933592,0.3645067215,-0.1323567033,-0.7810221314],"new_obs":[0.132583499,0.1714283675,-0.1479771435,-0.5327370763]} +{"weights":1.0,"eps_id":1238833020,"dones":false,"infos":{},"prev_rewards":1.0,"t":21,"agent_index":0,"action_prob":0.5247683525,"actions":0,"q_values":[-0.0248695239,-0.1240241006],"rewards":1.0,"prev_actions":0,"obs":[0.132583499,0.1714283675,-0.1479771435,-0.5327370763],"new_obs":[0.1360120624,-0.0213364325,-0.158631891,-0.2900965512]} +{"weights":1.0,"eps_id":1238833020,"dones":false,"infos":{},"prev_rewards":1.0,"t":22,"agent_index":0,"action_prob":0.5148704052,"actions":1,"q_values":[-0.1575047523,-0.0980054587],"rewards":1.0,"prev_actions":0,"obs":[0.1360120624,-0.0213364325,-0.158631891,-0.2900965512],"new_obs":[0.1355853379,0.1756499112,-0.164433822,-0.6283085346]} +{"weights":1.0,"eps_id":1238833020,"dones":false,"infos":{},"prev_rewards":1.0,"t":23,"agent_index":0,"action_prob":0.4746163487,"actions":1,"q_values":[-0.0437197797,-0.145341754],"rewards":1.0,"prev_actions":1,"obs":[0.1355853379,0.1756499112,-0.164433822,-0.6283085346],"new_obs":[0.1390983313,0.3726385236,-0.1769999862,-0.9679317474]} +{"weights":1.0,"eps_id":1238833020,"dones":false,"infos":{},"prev_rewards":1.0,"t":24,"agent_index":0,"action_prob":0.4442490339,"actions":1,"q_values":[0.0348966867,-0.1890382469],"rewards":1.0,"prev_actions":1,"obs":[0.1390983313,0.3726385236,-0.1769999862,-0.9679317474],"new_obs":[0.1465511024,0.5696383715,-0.1963586211,-1.3105814457]} +{"weights":1.0,"eps_id":1238833020,"dones":true,"infos":{},"prev_rewards":1.0,"t":25,"agent_index":0,"action_prob":0.4205590785,"actions":1,"q_values":[0.1171964705,-0.2032823265],"rewards":1.0,"prev_actions":1,"obs":[0.1465511024,0.5696383715,-0.1963586211,-1.3105814457],"new_obs":[0.1579438746,0.7666289806,-0.2225702554,-1.6577483416]} +{"weights":1.0,"eps_id":464626363,"dones":false,"infos":{},"prev_rewards":0.0,"t":0,"agent_index":0,"action_prob":0.4981169999,"actions":0,"q_values":[0.0343732461,0.0419052467],"rewards":1.0,"prev_actions":0,"obs":[-0.0354358181,0.0323112085,0.042508129,-0.0454557873],"new_obs":[-0.0347895958,-0.1633936912,0.0415990129,0.2603300214]} +{"weights":1.0,"eps_id":464626363,"dones":false,"infos":{},"prev_rewards":1.0,"t":1,"agent_index":0,"action_prob":0.5603018999,"actions":1,"q_values":[-0.0403244868,0.2020627856],"rewards":1.0,"prev_actions":0,"obs":[-0.0347895958,-0.1633936912,0.0415990129,0.2603300214],"new_obs":[-0.0380574688,0.0311104916,0.0468056127,-0.0189472828]} +{"weights":1.0,"eps_id":464626363,"dones":false,"infos":{},"prev_rewards":1.0,"t":2,"agent_index":0,"action_prob":0.4948766828,"actions":0,"q_values":[0.0310895666,0.0515836626],"rewards":1.0,"prev_actions":1,"obs":[-0.0380574688,0.0311104916,0.0468056127,-0.0189472828],"new_obs":[-0.0374352597,-0.1646503657,0.0464266688,0.288128227]} +{"weights":1.0,"eps_id":464626363,"dones":false,"infos":{},"prev_rewards":1.0,"t":3,"agent_index":0,"action_prob":0.5607614517,"actions":1,"q_values":[-0.0381223857,0.2061306834],"rewards":1.0,"prev_actions":0,"obs":[-0.0374352597,-0.1646503657,0.0464266688,0.288128227],"new_obs":[-0.0407282673,0.0297798309,0.0521892346,0.0104417372]} +{"weights":1.0,"eps_id":464626363,"dones":false,"infos":{},"prev_rewards":1.0,"t":4,"agent_index":0,"action_prob":0.4922669232,"actions":0,"q_values":[0.0162204485,0.0471551418],"rewards":1.0,"prev_actions":1,"obs":[-0.0407282673,0.0297798309,0.0521892346,0.0104417372],"new_obs":[-0.0401326679,-0.1660502404,0.052398067,0.3191235662]} +{"weights":1.0,"eps_id":464626363,"dones":false,"infos":{},"prev_rewards":1.0,"t":5,"agent_index":0,"action_prob":0.4393489063,"actions":0,"q_values":[-0.0348389372,0.2089660615],"rewards":1.0,"prev_actions":0,"obs":[-0.0401326679,-0.1660502404,0.052398067,0.3191235662],"new_obs":[-0.0434536748,-0.3618777692,0.0587805398,0.6278597116]} +{"weights":1.0,"eps_id":464626363,"dones":false,"infos":{},"prev_rewards":1.0,"t":6,"agent_index":0,"action_prob":0.6127749681,"actions":1,"q_values":[-0.1047379225,0.3542538285],"rewards":1.0,"prev_actions":0,"obs":[-0.0434536748,-0.3618777692,0.0587805398,0.6278597116],"new_obs":[-0.0506912284,-0.1676233411,0.0713377297,0.3542528152]} +{"weights":1.0,"eps_id":464626363,"dones":false,"infos":{},"prev_rewards":1.0,"t":7,"agent_index":0,"action_prob":0.4384131432,"actions":0,"q_values":[-0.0259464532,0.2216582298],"rewards":1.0,"prev_actions":1,"obs":[-0.0506912284,-0.1676233411,0.0713377297,0.3542528152],"new_obs":[-0.0540436953,-0.3636832833,0.0784227923,0.6685502529]} +{"weights":1.0,"eps_id":464626363,"dones":false,"infos":{},"prev_rewards":1.0,"t":8,"agent_index":0,"action_prob":0.388571918,"actions":0,"q_values":[-0.1003192365,0.3529998958],"rewards":1.0,"prev_actions":0,"obs":[-0.0540436953,-0.3636832833,0.0784227923,0.6685502529],"new_obs":[-0.0613173619,-0.5598031282,0.0917937979,0.9848584533]} +{"weights":1.0,"eps_id":464626363,"dones":false,"infos":{},"prev_rewards":1.0,"t":9,"agent_index":0,"action_prob":0.6461699605,"actions":1,"q_values":[-0.1714298129,0.4308161139],"rewards":1.0,"prev_actions":0,"obs":[-0.0613173619,-0.5598031282,0.0917937979,0.9848584533],"new_obs":[-0.0725134239,-0.3660226166,0.1114909649,0.7223610282]} +{"weights":1.0,"eps_id":464626363,"dones":false,"infos":{},"prev_rewards":1.0,"t":10,"agent_index":0,"action_prob":0.6107516289,"actions":1,"q_values":[-0.0950586647,0.3554142118],"rewards":1.0,"prev_actions":1,"obs":[-0.0725134239,-0.3660226166,0.1114909649,0.7223610282],"new_obs":[-0.0798338801,-0.1726049483,0.1259381771,0.4667462707]} +{"weights":1.0,"eps_id":464626363,"dones":false,"infos":{},"prev_rewards":1.0,"t":11,"agent_index":0,"action_prob":0.4383061528,"actions":0,"q_values":[0.000685975,0.2487252802],"rewards":1.0,"prev_actions":1,"obs":[-0.0798338801,-0.1726049483,0.1259381771,0.4667462707],"new_obs":[-0.0832859725,-0.3692602813,0.1352731138,0.7963211536]} +{"weights":1.0,"eps_id":464626363,"dones":false,"infos":{},"prev_rewards":1.0,"t":12,"agent_index":0,"action_prob":0.6084119678,"actions":1,"q_values":[-0.0878745764,0.3527673483],"rewards":1.0,"prev_actions":0,"obs":[-0.0832859725,-0.3692602813,0.1352731138,0.7963211536],"new_obs":[-0.0906711817,-0.1762282252,0.1511995345,0.5490673184]} +{"weights":1.0,"eps_id":464626363,"dones":false,"infos":{},"prev_rewards":1.0,"t":13,"agent_index":0,"action_prob":0.5631444454,"actions":1,"q_values":[0.0041227341,0.2580562532],"rewards":1.0,"prev_actions":1,"obs":[-0.0906711817,-0.1762282252,0.1511995345,0.5490673184],"new_obs":[-0.0941957459,0.0164825916,0.1621808857,0.3075824678]} +{"weights":1.0,"eps_id":464626363,"dones":false,"infos":{},"prev_rewards":1.0,"t":14,"agent_index":0,"action_prob":0.5186504722,"actions":1,"q_values":[0.0387043171,0.1133406907],"rewards":1.0,"prev_actions":1,"obs":[-0.0941957459,0.0164825916,0.1621808857,0.3075824678],"new_obs":[-0.0938660949,0.2089668959,0.168332532,0.0701172799]} +{"weights":1.0,"eps_id":464626363,"dones":false,"infos":{},"prev_rewards":1.0,"t":15,"agent_index":0,"action_prob":0.5026047826,"actions":0,"q_values":[-0.0185318962,-0.0289510973],"rewards":1.0,"prev_actions":1,"obs":[-0.0938660949,0.2089668959,0.168332532,0.0701172799],"new_obs":[-0.0896867588,0.011881738,0.1697348803,0.4108228385]} +{"weights":1.0,"eps_id":464626363,"dones":false,"infos":{},"prev_rewards":1.0,"t":16,"agent_index":0,"action_prob":0.4808782339,"actions":0,"q_values":[0.0252887234,0.1018131152],"rewards":1.0,"prev_actions":0,"obs":[-0.0896867588,0.011881738,0.1697348803,0.4108228385],"new_obs":[-0.0894491225,-0.1851886958,0.1779513359,0.7518432736]} +{"weights":1.0,"eps_id":464626363,"dones":false,"infos":{},"prev_rewards":1.0,"t":17,"agent_index":0,"action_prob":0.5650154948,"actions":1,"q_values":[-0.0206841528,0.2408585846],"rewards":1.0,"prev_actions":0,"obs":[-0.0894491225,-0.1851886958,0.1779513359,0.7518432736],"new_obs":[-0.0931528956,0.0070914798,0.192988202,0.5200196505]} +{"weights":1.0,"eps_id":464626363,"dones":false,"infos":{},"prev_rewards":1.0,"t":18,"agent_index":0,"action_prob":0.4770132005,"actions":0,"q_values":[0.013561476,0.1055735499],"rewards":1.0,"prev_actions":1,"obs":[-0.0931528956,0.0070914798,0.192988202,0.5200196505],"new_obs":[-0.0930110663,-0.1901485175,0.2033885866,0.8667741418]} +{"weights":1.0,"eps_id":464626363,"dones":true,"infos":{},"prev_rewards":1.0,"t":19,"agent_index":0,"action_prob":0.566983223,"actions":1,"q_values":[-0.0356550738,0.2338979244],"rewards":1.0,"prev_actions":0,"obs":[-0.0930110663,-0.1901485175,0.2033885866,0.8667741418],"new_obs":[-0.0968140364,0.0017116106,0.220724076,0.6443008184]} diff --git a/rllib/tests/data/cartpole/cartpole-v1_large.jsonl/1_000000_000000.json b/rllib/tests/data/cartpole/cartpole-v1_large.jsonl/1_000000_000000.json new file mode 100644 index 0000000000000..13749d2581aa3 --- /dev/null +++ b/rllib/tests/data/cartpole/cartpole-v1_large.jsonl/1_000000_000000.json @@ -0,0 +1,4137 @@ +{"eps_id":1484177277,"obs":[-0.0006970267,-0.0461515822,-0.0192976426,-0.0453409776],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":0.0,"dones":false,"new_obs":[-0.0016200584,-0.2409915775,-0.0202044621,0.2411914468],"action_prob":0.6172010899,"action_logp":-0.482560426,"action_dist_inputs":[0.2385958284,-0.2390891165],"value_targets":86.6020355225} +{"eps_id":1484177277,"obs":[-0.0016200584,-0.2409915775,-0.0202044621,0.2411914468],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0064398898,-0.045586925,-0.015380634,-0.0577954203],"action_prob":0.7644305825,"action_logp":-0.2686240673,"action_dist_inputs":[-0.5871837735,0.58994174],"value_targets":86.4666976929} +{"eps_id":1484177277,"obs":[-0.0064398898,-0.045586925,-0.015380634,-0.0577954203],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0073516285,0.14975214,-0.0165365413,-0.3552910984],"action_prob":0.3716135323,"action_logp":-0.9899008274,"action_dist_inputs":[0.262332052,-0.2629687488],"value_targets":86.3300018311} +{"eps_id":1484177277,"obs":[-0.0073516285,0.14975214,-0.0165365413,-0.3552910984],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0043565854,-0.045130834,-0.023642363,-0.067868121],"action_prob":0.871964395,"action_logp":-0.1370066702,"action_dist_inputs":[0.9572312832,-0.9612089396],"value_targets":86.1919174194} +{"eps_id":1484177277,"obs":[-0.0043565854,-0.045130834,-0.023642363,-0.067868121],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0052592023,0.1503219604,-0.0249997266,-0.3679155707],"action_prob":0.3389349878,"action_logp":-1.081946969,"action_dist_inputs":[0.3336254358,-0.334418416],"value_targets":86.052444458} +{"eps_id":1484177277,"obs":[-0.0052592023,0.1503219604,-0.0249997266,-0.3679155707],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0022527631,-0.044436004,-0.0323580392,-0.0832190737],"action_prob":0.8810922503,"action_logp":-0.1265929341,"action_dist_inputs":[0.9993316531,-1.0034831762],"value_targets":85.9115600586} +{"eps_id":1484177277,"obs":[-0.0022527631,-0.044436004,-0.0323580392,-0.0832190737],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0031414833,-0.239079535,-0.0340224206,0.1990817785],"action_prob":0.6990028024,"action_logp":-0.3581005633,"action_dist_inputs":[0.4207706153,-0.4217832386],"value_targets":85.7692489624} +{"eps_id":1484177277,"obs":[-0.0031414833,-0.239079535,-0.0340224206,0.1990817785],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0079230741,-0.043487899,-0.0300407838,-0.1041365489],"action_prob":0.685587883,"action_logp":-0.3774785697,"action_dist_inputs":[-0.3886187375,0.3909534812],"value_targets":85.62550354} +{"eps_id":1484177277,"obs":[-0.0079230741,-0.043487899,-0.0300407838,-0.1041365489],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0087928316,-0.2381667495,-0.0321235135,0.1789193004],"action_prob":0.7212018371,"action_logp":-0.3268362284,"action_dist_inputs":[0.4745840132,-0.4758468866],"value_targets":85.4803085327} +{"eps_id":1484177277,"obs":[-0.0087928316,-0.2381667495,-0.0321235135,0.1789193004],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0135561666,-0.0426001772,-0.0285451282,-0.1237218976],"action_prob":0.659406364,"action_logp":-0.4164153337,"action_dist_inputs":[-0.3292568326,0.3313929737],"value_targets":85.3336486816} +{"eps_id":1484177277,"obs":[-0.0135561666,-0.0426001772,-0.0285451282,-0.1237218976],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0144081702,0.1529188454,-0.0310195666,-0.4252720773],"action_prob":0.258682251,"action_logp":-1.352154851,"action_dist_inputs":[0.5256599784,-0.5271689296],"value_targets":85.1855010986} +{"eps_id":1484177277,"obs":[-0.0144081702,0.1529188454,-0.0310195666,-0.4252720773],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0113497935,0.3484661579,-0.0395250097,-0.7275702357],"action_prob":0.1003921777,"action_logp":-2.2986710072,"action_dist_inputs":[1.0940442085,-1.0988304615],"value_targets":85.0358581543} +{"eps_id":1484177277,"obs":[-0.0113497935,0.3484661579,-0.0395250097,-0.7275702357],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0043804706,0.153912276,-0.0540764146,-0.4475846589],"action_prob":0.9378321171,"action_logp":-0.0641843453,"action_dist_inputs":[1.3531697989,-1.3605616093],"value_targets":84.8847045898} +{"eps_id":1484177277,"obs":[-0.0043804706,0.153912276,-0.0540764146,-0.4475846589],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.001302225,-0.0404046848,-0.0630281046,-0.1724260151],"action_prob":0.9101071954,"action_logp":-0.0941929147,"action_dist_inputs":[1.1549112797,-1.160033226],"value_targets":84.7320251465} +{"eps_id":1484177277,"obs":[-0.001302225,-0.0404046848,-0.0630281046,-0.1724260151],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0021103187,-0.2345705777,-0.0664766282,0.0997265205],"action_prob":0.8225395083,"action_logp":-0.1953587979,"action_dist_inputs":[0.7656822205,-0.767966032],"value_targets":84.5778045654} +{"eps_id":1484177277,"obs":[-0.0021103187,-0.2345705777,-0.0664766282,0.0997265205],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.00680173,-0.0385619886,-0.0644820929,-0.2131666988],"action_prob":0.4463277757,"action_logp":-0.8067016602,"action_dist_inputs":[0.1083428413,-0.1071764976],"value_targets":84.4220275879} +{"eps_id":1484177277,"obs":[-0.00680173,-0.0385619886,-0.0644820929,-0.2131666988],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0075729699,-0.232705608,-0.0687454268,0.0584989116],"action_prob":0.8459404707,"action_logp":-0.1673062742,"action_dist_inputs":[0.8501612544,-0.8529485464],"value_targets":84.2646713257} +{"eps_id":1484177277,"obs":[-0.0075729699,-0.232705608,-0.0687454268,0.0584989116],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0122270817,-0.4267779887,-0.0675754547,0.3287246227],"action_prob":0.6230086088,"action_logp":-0.473194927,"action_dist_inputs":[0.251481086,-0.2508570552],"value_targets":84.1057281494} +{"eps_id":1484177277,"obs":[-0.0122270817,-0.4267779887,-0.0675754547,0.3287246227],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.020762641,-0.6208761334,-0.0610009581,0.5993552208],"action_prob":0.2742998898,"action_logp":-1.2935333252,"action_dist_inputs":[-0.4845516384,0.4883631468],"value_targets":83.9451828003} +{"eps_id":1484177277,"obs":[-0.020762641,-0.6208761334,-0.0610009581,0.5993552208],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0331801623,-0.8150939345,-0.0490138531,0.8722169399],"action_prob":0.1079043299,"action_logp":-2.2265102863,"action_dist_inputs":[-1.0531679392,1.0591603518],"value_targets":83.7830123901} +{"eps_id":1484177277,"obs":[-0.0331801623,-0.8150939345,-0.0490138531,0.8722169399],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0494820438,-0.6193408966,-0.0315695144,0.5645357966],"action_prob":0.9394758344,"action_logp":-0.062433172,"action_dist_inputs":[-1.3672943115,1.3749845028],"value_targets":83.6192016602} +{"eps_id":1484177277,"obs":[-0.0494820438,-0.6193408966,-0.0315695144,0.5645357966],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0618688613,-0.4237905741,-0.0202788003,0.2620765269],"action_prob":0.8953350186,"action_logp":-0.1105572805,"action_dist_inputs":[-1.0703155994,1.0761181116],"value_targets":83.453742981} +{"eps_id":1484177277,"obs":[-0.0618688613,-0.4237905741,-0.0202788003,0.2620765269],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0703446716,-0.2283851206,-0.0150372684,-0.036932949],"action_prob":0.7267664075,"action_logp":-0.3191501796,"action_dist_inputs":[-0.4875309467,0.4907469451],"value_targets":83.286605835} +{"eps_id":1484177277,"obs":[-0.0703446716,-0.2283851206,-0.0150372684,-0.036932949],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0749123767,-0.4232882559,-0.0157759283,0.25096789],"action_prob":0.6520414352,"action_logp":-0.4276472032,"action_dist_inputs":[0.3137631118,-0.3142613769],"value_targets":83.1177825928} +{"eps_id":1484177277,"obs":[-0.0749123767,-0.4232882559,-0.0157759283,0.25096789],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0833781436,-0.2279446125,-0.0107565699,-0.0466490313],"action_prob":0.7211108804,"action_logp":-0.3269623518,"action_dist_inputs":[-0.4734698534,0.4765090048],"value_targets":82.9472579956} +{"eps_id":1484177277,"obs":[-0.0833781436,-0.2279446125,-0.0107565699,-0.0466490313],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0879370347,-0.0326700844,-0.0116895502,-0.3427062631],"action_prob":0.3433688879,"action_logp":-1.0689499378,"action_dist_inputs":[0.323813051,-0.3245041072],"value_targets":82.7750091553} +{"eps_id":1484177277,"obs":[-0.0879370347,-0.0326700844,-0.0116895502,-0.3427062631],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0885904357,0.1626162082,-0.0185436755,-0.6390523314],"action_prob":0.1300332397,"action_logp":-2.0399651527,"action_dist_inputs":[0.9482049942,-0.9524601102],"value_targets":82.601020813} +{"eps_id":1484177277,"obs":[-0.0885904357,0.1626162082,-0.0185436755,-0.6390523314],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0853381082,-0.0322423503,-0.0313247219,-0.3522662818],"action_prob":0.9270824194,"action_logp":-0.0757128,"action_dist_inputs":[1.2679207325,-1.2747924328],"value_targets":82.4252700806} +{"eps_id":1484177277,"obs":[-0.0853381082,-0.0322423503,-0.0313247219,-0.3522662818],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.085982956,-0.2269051671,-0.0383700468,-0.0696232468],"action_prob":0.8818970919,"action_logp":-0.1256798804,"action_dist_inputs":[1.0029989481,-1.0075199604],"value_targets":82.2477493286} +{"eps_id":1484177277,"obs":[-0.085982956,-0.2269051671,-0.0383700468,-0.0696232468],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0905210599,-0.4214566052,-0.0397625118,0.2107110918],"action_prob":0.7298334241,"action_logp":-0.3149389923,"action_dist_inputs":[0.4962214828,-0.4975560308],"value_targets":82.0684280396} +{"eps_id":1484177277,"obs":[-0.0905210599,-0.4214566052,-0.0397625118,0.2107110918],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0989501923,-0.6159881353,-0.0355482921,0.490590632],"action_prob":0.3838699162,"action_logp":-0.9574515224,"action_dist_inputs":[-0.2354124188,0.2377420068],"value_targets":81.8873062134} +{"eps_id":1484177277,"obs":[-0.0989501923,-0.6159881353,-0.0355482921,0.490590632],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1112699583,-0.810591042,-0.0257364772,0.7718616128],"action_prob":0.1427830905,"action_logp":-1.9464286566,"action_dist_inputs":[-0.8936587572,0.8987056613],"value_targets":81.7043457031} +{"eps_id":1484177277,"obs":[-0.1112699583,-0.810591042,-0.0257364772,0.7718616128],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1274817735,-0.6151245832,-0.0102992449,0.4711933434],"action_prob":0.9295526743,"action_logp":-0.073051773,"action_dist_inputs":[-1.2864240408,1.2934144735],"value_targets":81.5195465088} +{"eps_id":1484177277,"obs":[-0.1274817735,-0.6151245832,-0.0102992449,0.4711933434],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1397842616,-0.4198586941,-0.0008753783,0.1752821058],"action_prob":0.866422832,"action_logp":-0.1433822215,"action_dist_inputs":[-0.9324130416,0.9372808337],"value_targets":81.3328704834} +{"eps_id":1484177277,"obs":[-0.1397842616,-0.4198586941,-0.0008753783,0.1752821058],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1481814384,-0.2247242332,0.0026302636,-0.1176768541],"action_prob":0.6476490498,"action_logp":-0.4344063401,"action_dist_inputs":[-0.3034178019,0.3053032458],"value_targets":81.144317627} +{"eps_id":1484177277,"obs":[-0.1481814384,-0.2247242332,0.0026302636,-0.1176768541],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1526759267,-0.4198837876,0.0002767266,0.1758347452],"action_prob":0.7178376913,"action_logp":-0.3315118253,"action_dist_inputs":[0.4658871889,-0.4678738117],"value_targets":80.9538574219} +{"eps_id":1484177277,"obs":[-0.1526759267,-0.4198837876,0.0002767266,0.1758347452],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1610736102,-0.2247657925,0.0037934214,-0.1167608723],"action_prob":0.6517562866,"action_logp":-0.4280845523,"action_dist_inputs":[-0.3124961257,0.3142721355],"value_targets":80.76146698} +{"eps_id":1484177277,"obs":[-0.1610736102,-0.2247657925,0.0037934214,-0.1167608723],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.165568918,-0.4199418724,0.0014582039,0.1771164238],"action_prob":0.7142260075,"action_logp":-0.3365558386,"action_dist_inputs":[0.4569619298,-0.459036231],"value_targets":80.5671386719} +{"eps_id":1484177277,"obs":[-0.165568918,-0.4199418724,0.0014582039,0.1771164238],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1739677638,-0.6150846481,0.0050005326,0.4702590108],"action_prob":0.3430972695,"action_logp":-1.0697412491,"action_dist_inputs":[-0.3239239156,0.3255979121],"value_targets":80.3708496094} +{"eps_id":1484177277,"obs":[-0.1739677638,-0.6150846481,0.0050005326,0.4702590108],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1862694472,-0.4200337231,0.0144057125,0.1791563779],"action_prob":0.8753039241,"action_logp":-0.133184135,"action_dist_inputs":[-0.9720594287,0.976631701],"value_targets":80.1725769043} +{"eps_id":1484177277,"obs":[-0.1862694472,-0.4200337231,0.0144057125,0.1791563779],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1946701258,-0.2251208425,0.0179888401,-0.1089474559],"action_prob":0.6861123443,"action_logp":-0.376713872,"action_dist_inputs":[-0.3901996315,0.3918067515],"value_targets":79.9722976685} +{"eps_id":1484177277,"obs":[-0.1946701258,-0.2251208425,0.0179888401,-0.1089474559],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1991725415,-0.0302612279,0.0158098917,-0.3959011436],"action_prob":0.3218936026,"action_logp":-1.133534193,"action_dist_inputs":[0.3714877963,-0.3735952377],"value_targets":79.7699966431} +{"eps_id":1484177277,"obs":[-0.1991725415,-0.0302612279,0.0158098917,-0.3959011436],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1997777671,-0.2256038934,0.0078918682,-0.0982758701],"action_prob":0.8768818974,"action_logp":-0.1313829869,"action_dist_inputs":[0.9789344072,-0.9842936993],"value_targets":79.5656509399} +{"eps_id":1484177277,"obs":[-0.1997777671,-0.2256038934,0.0078918682,-0.0982758701],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2042898387,-0.420838058,0.0059263506,0.1968864352],"action_prob":0.6818606853,"action_logp":-0.3829299212,"action_dist_inputs":[0.3800988197,-0.382237196],"value_targets":79.3592453003} +{"eps_id":1484177277,"obs":[-0.2042898387,-0.420838058,0.0059263506,0.1968864352],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2127066106,-0.6160442829,0.0098640798,0.4914329648],"action_prob":0.3051803708,"action_logp":-1.1868523359,"action_dist_inputs":[-0.4106171131,0.4121322036],"value_targets":79.1507568359} +{"eps_id":1484177277,"obs":[-0.2127066106,-0.6160442829,0.0098640798,0.4914329648],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2250274867,-0.4210628271,0.0196927395,0.201875031],"action_prob":0.8849087954,"action_logp":-0.1222706884,"action_dist_inputs":[-1.0176773071,1.0220820904],"value_targets":78.9401550293} +{"eps_id":1484177277,"obs":[-0.2250274867,-0.4210628271,0.0196927395,0.201875031],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2334487438,-0.2262279838,0.0237302389,-0.0845312923],"action_prob":0.7253122926,"action_logp":-0.3211529851,"action_dist_inputs":[-0.4847492874,0.4862180948],"value_targets":78.727432251} +{"eps_id":1484177277,"obs":[-0.2334487438,-0.2262279838,0.0237302389,-0.0845312923],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2379733026,-0.0314540863,0.0220396128,-0.3696338534],"action_prob":0.3709959984,"action_logp":-0.9915640354,"action_dist_inputs":[0.2629261017,-0.2650202513],"value_targets":78.5125579834} +{"eps_id":1484177277,"obs":[-0.2379733026,-0.0314540863,0.0220396128,-0.3696338534],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2386023849,-0.2268821299,0.0146469362,-0.0700838417],"action_prob":0.8634963632,"action_logp":-0.1467656046,"action_dist_inputs":[0.9196363688,-0.9250019789],"value_targets":78.2955093384} +{"eps_id":1484177277,"obs":[-0.2386023849,-0.2268821299,0.0146469362,-0.0700838417],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2431400269,-0.4222109914,0.0132452594,0.2271840423],"action_prob":0.6254748106,"action_logp":-0.4692442119,"action_dist_inputs":[0.2553905547,-0.2574613988],"value_targets":78.0762710571} +{"eps_id":1484177277,"obs":[-0.2431400269,-0.4222109914,0.0132452594,0.2271840423],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2515842617,-0.2272807956,0.017788941,-0.0612915903],"action_prob":0.7436659336,"action_logp":-0.2961633801,"action_dist_inputs":[-0.5318325162,0.5332775712],"value_targets":77.8548202515} +{"eps_id":1484177277,"obs":[-0.2515842617,-0.2272807956,0.017788941,-0.0612915903],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2561298609,-0.422653228,0.0165631082,0.2369503528],"action_prob":0.6041091681,"action_logp":-0.5040003657,"action_dist_inputs":[0.2102820277,-0.2123344988],"value_targets":77.6311340332} +{"eps_id":1484177277,"obs":[-0.2561298609,-0.422653228,0.0165631082,0.2369503528],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.264582932,-0.2277717739,0.0213021152,-0.0504623912],"action_prob":0.7584005594,"action_logp":-0.2765435874,"action_dist_inputs":[-0.5712552071,0.5726755261],"value_targets":77.4051818848} +{"eps_id":1484177277,"obs":[-0.264582932,-0.2277717739,0.0213021152,-0.0504623912],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.269138366,-0.032961648,0.0202928688,-0.3363489211],"action_prob":0.4218431413,"action_logp":-0.863121748,"action_dist_inputs":[0.1565989107,-0.1586128473],"value_targets":77.1769561768} +{"eps_id":1484177277,"obs":[-0.269138366,-0.032961648,0.0202928688,-0.3363489211],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2697975934,-0.2283664197,0.0135658896,-0.0373364277],"action_prob":0.8478961587,"action_logp":-0.1649971157,"action_dist_inputs":[0.8564422131,-0.8617525101],"value_targets":76.9464187622} +{"eps_id":1484177277,"obs":[-0.2697975934,-0.2283664197,0.0135658896,-0.0373364277],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2743649185,-0.4236802459,0.0128191607,0.2595956028],"action_prob":0.5734989047,"action_logp":-0.5559992194,"action_dist_inputs":[0.1470732242,-0.1490679085],"value_targets":76.7135543823} +{"eps_id":1484177277,"obs":[-0.2743649185,-0.4236802459,0.0128191607,0.2595956028],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2828385234,-0.2287436277,0.0180110727,-0.0290165693],"action_prob":0.7742031217,"action_logp":-0.2559209764,"action_dist_inputs":[-0.6154056191,0.6167928576],"value_targets":76.4783401489} +{"eps_id":1484177277,"obs":[-0.2828385234,-0.2287436277,0.0180110727,-0.0290165693],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2874134183,-0.4241191745,0.0174307413,0.2692942023],"action_prob":0.5489335656,"action_logp":-0.5997778773,"action_dist_inputs":[0.0971907526,-0.099171944],"value_targets":76.2407455444} +{"eps_id":1484177277,"obs":[-0.2874134183,-0.4241191745,0.0174307413,0.2692942023],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2958957851,-0.2292502522,0.0228166245,-0.0178404078],"action_prob":0.7875524163,"action_logp":-0.2388253808,"action_dist_inputs":[-0.6544331908,0.655801177],"value_targets":76.0007553101} +{"eps_id":1484177277,"obs":[-0.2958957851,-0.2292502522,0.0228166245,-0.0178404078],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.300480783,-0.4246918559,0.0224598181,0.2819532156],"action_prob":0.5188910961,"action_logp":-0.6560612917,"action_dist_inputs":[0.0368283913,-0.0387718901],"value_targets":75.7583389282} +{"eps_id":1484177277,"obs":[-0.300480783,-0.4246918559,0.0224598181,0.2819532156],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.3089746237,-0.229897365,0.0280988812,-0.0035622271],"action_prob":0.8022010326,"action_logp":-0.2203960568,"action_dist_inputs":[-0.6993672252,0.700740695],"value_targets":75.5134735107} +{"eps_id":1484177277,"obs":[-0.3089746237,-0.229897365,0.0280988812,-0.0035622271],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.3135725856,-0.4254107773,0.0280276369,0.2978521287],"action_prob":0.4831171334,"action_logp":-0.7274961472,"action_dist_inputs":[-0.0347197875,0.0328373834],"value_targets":75.26612854} +{"eps_id":1484177277,"obs":[-0.3135725856,-0.4254107773,0.0280276369,0.2978521287],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.322080791,-0.6209208369,0.0339846797,0.5992410779],"action_prob":0.1822840273,"action_logp":-1.7021892071,"action_dist_inputs":[-0.7497705221,0.751178503],"value_targets":75.0162963867} +{"eps_id":1484177277,"obs":[-0.322080791,-0.6209208369,0.0339846797,0.5992410779],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.3344992101,-0.4262904227,0.0459695011,0.317453593],"action_prob":0.9128580093,"action_logp":-0.0911749601,"action_dist_inputs":[-1.1722831726,1.1767579317],"value_targets":74.7639312744} +{"eps_id":1484177277,"obs":[-0.3344992101,-0.4262904227,0.0459695011,0.317453593],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.3430250287,-0.2318523377,0.052318573,0.03961486],"action_prob":0.8419694304,"action_logp":-0.1720115691,"action_dist_inputs":[-0.8357272148,0.8372282982],"value_targets":74.5090255737} +{"eps_id":1484177277,"obs":[-0.3430250287,-0.2318523377,0.052318573,0.03961486],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.3476620615,-0.0375181548,0.0531108715,-0.2361129224],"action_prob":0.6324882507,"action_logp":-0.4580936432,"action_dist_inputs":[-0.2722782195,0.2706281841],"value_targets":74.2515411377} +{"eps_id":1484177277,"obs":[-0.3476620615,-0.0375181548,0.0531108715,-0.2361129224],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.3484124243,-0.2333570868,0.0483886115,0.0728385299],"action_prob":0.7134703398,"action_logp":-0.3376144469,"action_dist_inputs":[0.4537993073,-0.4584994316],"value_targets":73.9914550781} +{"eps_id":1484177277,"obs":[-0.3484124243,-0.2333570868,0.0483886115,0.0728385299],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.3530795574,-0.0389610529,0.0498453826,-0.2041935176],"action_prob":0.6684358716,"action_logp":-0.4028148353,"action_dist_inputs":[-0.3512963653,0.3498229086],"value_targets":73.7287445068} +{"eps_id":1484177277,"obs":[-0.3530795574,-0.0389610529,0.0498453826,-0.2041935176],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.3538587987,-0.2347590774,0.0457615107,0.103787221],"action_prob":0.6777079701,"action_logp":-0.389038831,"action_dist_inputs":[0.3693773746,-0.3738808036],"value_targets":73.4633712769} +{"eps_id":1484177277,"obs":[-0.3538587987,-0.2347590774,0.0457615107,0.103787221],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.3585539758,-0.0403217934,0.0478372574,-0.1741141975],"action_prob":0.70014745,"action_logp":-0.3564643264,"action_dist_inputs":[-0.4246560633,0.4233441055],"value_targets":73.1953277588} +{"eps_id":1484177277,"obs":[-0.3585539758,-0.0403217934,0.0478372574,-0.1741141975],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.3593603969,-0.2360945791,0.0443549715,0.1332678497],"action_prob":0.6390702128,"action_logp":-0.447740972,"action_dist_inputs":[0.2835048735,-0.2878258526],"value_targets":72.9245758057} +{"eps_id":1484177277,"obs":[-0.3593603969,-0.2360945791,0.0443549715,0.1332678497],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.3640823066,-0.0416351222,0.0470203273,-0.1450983882],"action_prob":0.7284498811,"action_logp":-0.3168364763,"action_dist_inputs":[-0.493963778,0.4928082228],"value_targets":72.6510848999} +{"eps_id":1484177277,"obs":[-0.3640823066,-0.0416351222,0.0470203273,-0.1450983882],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.3649150133,-0.2373978049,0.0441183597,0.1620399803],"action_prob":0.5973775983,"action_logp":-0.5152058601,"action_dist_inputs":[0.1952010244,-0.199349314],"value_targets":72.3748321533} +{"eps_id":1484177277,"obs":[-0.3649150133,-0.2373978049,0.0441183597,0.1620399803],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.3696629703,-0.042934306,0.0473591611,-0.1164048463],"action_prob":0.754009366,"action_logp":-0.2823505104,"action_dist_inputs":[-0.5605527759,0.559558332],"value_targets":72.0957946777} +{"eps_id":1484177277,"obs":[-0.3696629703,-0.042934306,0.0473591611,-0.1164048463],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.3705216348,0.1514782012,0.0450310633,-0.3937782645],"action_prob":0.4474759102,"action_logp":-0.8041325808,"action_dist_inputs":[0.103446871,-0.1074273735],"value_targets":71.8139266968} +{"eps_id":1484177277,"obs":[-0.3705216348,0.1514782012,0.0450310633,-0.3937782645],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.3674920797,-0.0442528538,0.0371554978,-0.0872446299],"action_prob":0.8347035646,"action_logp":-0.1806786358,"action_dist_inputs":[0.8063669205,-0.8129692078],"value_targets":71.5292205811} +{"eps_id":1484177277,"obs":[-0.3674920797,-0.0442528538,0.0371554978,-0.0872446299],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.3683771491,-0.2398871481,0.0354106054,0.2169254869],"action_prob":0.5292801261,"action_logp":-0.6362374425,"action_dist_inputs":[0.0567182153,-0.0605365075],"value_targets":71.2416381836} +{"eps_id":1484177277,"obs":[-0.3683771491,-0.2398871481,0.0354106054,0.2169254869],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.373174876,-0.0452888347,0.0397491157,-0.0643804595],"action_prob":0.7863667011,"action_logp":-0.2403320223,"action_dist_inputs":[-0.6519159079,0.6512467265],"value_targets":70.9511489868} +{"eps_id":1484177277,"obs":[-0.373174876,-0.0452888347,0.0397491157,-0.0643804595],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.374080658,-0.2409574836,0.0384615064,0.2405738235],"action_prob":0.4887954593,"action_logp":-0.7158111334,"action_dist_inputs":[-0.0242578778,0.0205677077],"value_targets":70.6577301025} +{"eps_id":1484177277,"obs":[-0.374080658,-0.2409574836,0.0384615064,0.2405738235],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.3788998127,-0.4366071522,0.0432729833,0.5451361537],"action_prob":0.1963794976,"action_logp":-1.6277062893,"action_dist_inputs":[-0.7047916055,0.7042865157],"value_targets":70.3613433838} +{"eps_id":1484177277,"obs":[-0.3788998127,-0.4366071522,0.0432729833,0.5451361537],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.3876319528,-0.2421191037,0.0541757084,0.2663955092],"action_prob":0.9009644389,"action_logp":-0.1042894647,"action_dist_inputs":[-1.1024717093,1.1055151224],"value_targets":70.061958313} +{"eps_id":1484177277,"obs":[-0.3876319528,-0.2421191037,0.0541757084,0.2663955092],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.3924743235,-0.0478105284,0.0595036186,-0.0087198643],"action_prob":0.8275336623,"action_logp":-0.1893054694,"action_dist_inputs":[-0.7842561007,0.7839918733],"value_targets":69.7595596313} +{"eps_id":1484177277,"obs":[-0.3924743235,-0.0478105284,0.0595036186,-0.0087198643],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.393430531,-0.2437331378,0.0593292192,0.3021276593],"action_prob":0.3742617369,"action_logp":-0.9827998877,"action_dist_inputs":[-0.2586652935,0.2553116679],"value_targets":69.4540939331} +{"eps_id":1484177277,"obs":[-0.393430531,-0.2437331378,0.0593292192,0.3021276593],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.3983052075,-0.049504742,0.0653717741,0.0287302397],"action_prob":0.8447798491,"action_logp":-0.1686792374,"action_dist_inputs":[-0.8470824361,0.8471493721],"value_targets":69.1455535889} +{"eps_id":1484177277,"obs":[-0.3983052075,-0.049504742,0.0653717741,0.0287302397],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.3992953002,0.1446217895,0.0659463778,-0.2426319718],"action_prob":0.677726388,"action_logp":-0.3890116215,"action_dist_inputs":[-0.3732197881,0.3701229692],"value_targets":68.8338928223} +{"eps_id":1484177277,"obs":[-0.3992953002,0.1446217895,0.0659463778,-0.2426319718],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.3964028656,-0.0513771251,0.0610937402,0.0701015368],"action_prob":0.6397463679,"action_logp":-0.446683526,"action_dist_inputs":[0.2842237055,-0.2900396287],"value_targets":68.5190811157} +{"eps_id":1484177277,"obs":[-0.3964028656,-0.0513771251,0.0610937402,0.0701015368],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.3974304199,0.1428181529,0.0624957681,-0.2026975453],"action_prob":0.7129714489,"action_logp":-0.3383139372,"action_dist_inputs":[-0.4563258886,0.4535336792],"value_targets":68.2010955811} +{"eps_id":1484177277,"obs":[-0.3974304199,0.1428181529,0.0624957681,-0.2026975453],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.3945740461,0.3369932473,0.0584418178,-0.4750297666],"action_prob":0.4099323153,"action_logp":-0.8917632103,"action_dist_inputs":[0.1793282032,-0.1849170625],"value_targets":67.8798904419} +{"eps_id":1484177277,"obs":[-0.3945740461,0.3369932473,0.0584418178,-0.4750297666],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.3878341913,0.1410968155,0.0489412211,-0.1645146757],"action_prob":0.8426219821,"action_logp":-0.1712368131,"action_dist_inputs":[0.8350804448,-0.8427875638],"value_targets":67.5554504395} +{"eps_id":1484177277,"obs":[-0.3878341913,0.1410968155,0.0489412211,-0.1645146757],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.3850122392,0.3354852498,0.0456509292,-0.4413652122],"action_prob":0.4381092787,"action_logp":-0.8252869248,"action_dist_inputs":[0.1217401177,-0.1270989031],"value_targets":67.227722168} +{"eps_id":1484177277,"obs":[-0.3850122392,0.3354852498,0.0456509292,-0.4413652122],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.3783025444,0.1397480071,0.0368236266,-0.1346485168],"action_prob":0.8328902125,"action_logp":-0.1828534305,"action_dist_inputs":[0.7993330956,-0.8069176674],"value_targets":66.8966903687} +{"eps_id":1484177277,"obs":[-0.3783025444,0.1397480071,0.0368236266,-0.1346485168],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.3755075932,0.3343236744,0.0341306552,-0.4154904783],"action_prob":0.457461834,"action_logp":-0.7820618153,"action_dist_inputs":[0.0827003717,-0.0878645927],"value_targets":66.5623168945} +{"eps_id":1484177277,"obs":[-0.3755075932,0.3343236744,0.0341306552,-0.4154904783],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.3688211143,0.138735041,0.0258208457,-0.1122459173],"action_prob":0.8267974257,"action_logp":-0.1901955754,"action_dist_inputs":[0.7778095007,-0.7852884531],"value_targets":66.2245635986} +{"eps_id":1484177277,"obs":[-0.3688211143,0.138735041,0.0258208457,-0.1122459173],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.3660463989,-0.0567472056,0.0235759262,0.1884701848],"action_prob":0.5317175388,"action_logp":-0.6316428185,"action_dist_inputs":[0.0610170476,-0.0660238266],"value_targets":65.883392334} +{"eps_id":1484177277,"obs":[-0.3660463989,-0.0567472056,0.0235759262,0.1884701848],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.3671813607,0.1380296499,0.0273453295,-0.0966832265],"action_prob":0.7692079544,"action_logp":-0.2623939514,"action_dist_inputs":[-0.6028319001,0.6010121703],"value_targets":65.5387802124} +{"eps_id":1484177277,"obs":[-0.3671813607,0.1380296499,0.0273453295,-0.0966832265],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.3644207418,-0.057473328,0.0254116654,0.2045001388],"action_prob":0.5058699846,"action_logp":-0.6814756393,"action_dist_inputs":[0.0093012983,-0.0141797168],"value_targets":65.1906890869} +{"eps_id":1484177277,"obs":[-0.3644207418,-0.057473328,0.0254116654,0.2045001388],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.3655702174,0.1372761726,0.0295016691,-0.0800595805],"action_prob":0.7814024091,"action_logp":-0.2466650158,"action_dist_inputs":[-0.6377478242,0.636110127],"value_targets":64.8390808105} +{"eps_id":1484177277,"obs":[-0.3655702174,0.1372761726,0.0295016691,-0.0800595805],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.3628247082,-0.0582559966,0.027900476,0.2217832804],"action_prob":0.4777122438,"action_logp":-0.7387467027,"action_dist_inputs":[-0.0469718128,0.0422382802],"value_targets":64.4839172363} +{"eps_id":1484177277,"obs":[-0.3628247082,-0.0582559966,0.027900476,0.2217832804],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.36398983,-0.2537654042,0.0323361419,0.5231349468],"action_prob":0.2062528133,"action_logp":-1.5786526203,"action_dist_inputs":[-0.6745468974,0.6731155515],"value_targets":64.1251678467} +{"eps_id":1484177277,"obs":[-0.36398983,-0.2537654042,0.0323361419,0.5231349468],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.3690651357,-0.05911313,0.0427988395,0.2408143282],"action_prob":0.889680624,"action_logp":-0.116892755,"action_dist_inputs":[-1.0426273346,1.0448558331],"value_targets":63.7627983093} +{"eps_id":1484177277,"obs":[-0.3690651357,-0.05911313,0.0427988395,0.2408143282],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.3702473938,0.135372147,0.0476151258,-0.0380674191],"action_prob":0.8130830526,"action_logp":-0.2069220096,"action_dist_inputs":[-0.7356538773,0.7345151901],"value_targets":63.3967666626} +{"eps_id":1484177277,"obs":[-0.3702473938,0.135372147,0.0476151258,-0.0380674191],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.3675399423,-0.0603991263,0.0468537807,0.2692502141],"action_prob":0.3899389207,"action_logp":-0.9417651892,"action_dist_inputs":[-0.2259405851,0.2216283828],"value_targets":63.0270347595} +{"eps_id":1484177277,"obs":[-0.3675399423,-0.0603991263,0.0468537807,0.2692502141],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.3687479198,0.1340239644,0.0522387847,-0.0082943896],"action_prob":0.827469945,"action_logp":-0.1893825084,"action_dist_inputs":[-0.7842913866,0.7835098505],"value_targets":62.6535720825} +{"eps_id":1484177277,"obs":[-0.3687479198,0.1340239644,0.0522387847,-0.0082943896],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.3660674393,-0.0618067384,0.0520728976,0.3004021645],"action_prob":0.3486230969,"action_logp":-1.0537638664,"action_dist_inputs":[-0.3145485222,0.3105483949],"value_targets":62.2763366699} +{"eps_id":1484177277,"obs":[-0.3660674393,-0.0618067384,0.0520728976,0.3004021645],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.36730358,0.1325358152,0.0580809377,0.0245859884],"action_prob":0.840959847,"action_logp":-0.1732113808,"action_dist_inputs":[-0.8328797221,0.8325076103],"value_targets":61.8952865601} +{"eps_id":1484177277,"obs":[-0.36730358,0.1325358152,0.0580809377,0.0245859884],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.3646528721,0.3267788291,0.0585726574,-0.249221012],"action_prob":0.692103982,"action_logp":-0.368019104,"action_dist_inputs":[-0.4067978859,0.4031760693],"value_targets":61.5103912354} +{"eps_id":1484177277,"obs":[-0.3646528721,0.3267788291,0.0585726574,-0.249221012],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.3581172824,0.1308714449,0.0535882376,0.0613466389],"action_prob":0.6021102071,"action_logp":-0.5073147416,"action_dist_inputs":[0.2038385272,-0.2104268372],"value_targets":61.1216087341} +{"eps_id":1484177277,"obs":[-0.3581172824,0.1308714449,0.0535882376,0.0613466389],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.3554998636,0.325185746,0.0548151731,-0.2139589936],"action_prob":0.7195526958,"action_logp":-0.3291254938,"action_dist_inputs":[-0.4727285504,0.4695153832],"value_targets":60.7288970947} +{"eps_id":1484177277,"obs":[-0.3554998636,0.325185746,0.0548151731,-0.2139589936],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.3489961624,0.1293246895,0.0505359918,0.0954988971],"action_prob":0.5601374507,"action_logp":-0.5795730948,"action_dist_inputs":[0.11770612,-0.1240136623],"value_targets":60.3322181702} +{"eps_id":1484177277,"obs":[-0.3489961624,0.1293246895,0.0505359918,0.0954988971],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.3464096487,0.3236872852,0.0524459705,-0.1808215231],"action_prob":0.7432558537,"action_logp":-0.2967149317,"action_dist_inputs":[-0.5328803658,0.5300799608],"value_targets":59.9315338135} +{"eps_id":1484177277,"obs":[-0.3464096487,0.3236872852,0.0524459705,-0.1808215231],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.3399358988,0.5180210471,0.0488295406,-0.4565093219],"action_prob":0.4816551805,"action_logp":-0.7305268049,"action_dist_inputs":[0.0337072462,-0.0397050492],"value_targets":59.526802063} +{"eps_id":1484177277,"obs":[-0.3399358988,0.5180210471,0.0488295406,-0.4565093219],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.329575479,0.3222439885,0.0396993533,-0.1488435566],"action_prob":0.7973558903,"action_logp":-0.2264541835,"action_dist_inputs":[0.6808146834,-0.6890352368],"value_targets":59.117980957} +{"eps_id":1484177277,"obs":[-0.329575479,0.3222439885,0.0396993533,-0.1488435566],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.3231306076,0.1265766919,0.0367224813,0.1560946703],"action_prob":0.4975872636,"action_logp":-0.6979843378,"action_dist_inputs":[-0.0076730456,0.0019780165],"value_targets":58.7050323486} +{"eps_id":1484177277,"obs":[-0.3231306076,0.1265766919,0.0367224813,0.1560946703],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.3205990791,0.3211541474,0.0398443751,-0.1247807592],"action_prob":0.7730737329,"action_logp":-0.2573808134,"action_dist_inputs":[-0.6138975024,0.6118517518],"value_targets":58.2879104614} +{"eps_id":1484177277,"obs":[-0.3205990791,0.3211541474,0.0398443751,-0.1247807592],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.3141759932,0.1254846901,0.0373487584,0.1802016199],"action_prob":0.4644634426,"action_logp":-0.766872406,"action_dist_inputs":[-0.0738890916,0.0684971362],"value_targets":57.8665771484} +{"eps_id":1484177277,"obs":[-0.3141759932,0.1254846901,0.0373487584,0.1802016199],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.3116663098,0.3200528324,0.0409527905,-0.1004690826],"action_prob":0.7877385616,"action_logp":-0.2385890484,"action_dist_inputs":[-0.6565136909,0.654833734],"value_targets":57.4409866333} +{"eps_id":1484177277,"obs":[-0.3116663098,0.3200528324,0.0409527905,-0.1004690826],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.3052652478,0.1243686229,0.0389434099,0.2048478872],"action_prob":0.4306240678,"action_logp":-0.8425198197,"action_dist_inputs":[-0.1421802342,0.1371252984],"value_targets":57.0110969543} +{"eps_id":1484177277,"obs":[-0.3052652478,0.1243686229,0.0389434099,0.2048478872],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.3027778566,0.3189126551,0.0430403687,-0.0753004327],"action_prob":0.8016247749,"action_logp":-0.2211146504,"action_dist_inputs":[-0.698887229,0.6975932121],"value_targets":56.5768661499} +{"eps_id":1484177277,"obs":[-0.3027778566,0.3189126551,0.0430403687,-0.0753004327],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2963996232,0.5133919716,0.0415343605,-0.3540993333],"action_prob":0.6039153934,"action_logp":-0.5043212175,"action_dist_inputs":[-0.2132411003,0.208565101],"value_targets":56.1382484436} +{"eps_id":1484177277,"obs":[-0.2963996232,0.5133919716,0.0415343605,-0.3540993333],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2861317694,0.3177048266,0.0344523713,-0.0486141406],"action_prob":0.7029700875,"action_logp":-0.3524409533,"action_dist_inputs":[0.426938951,-0.4345426261],"value_targets":55.6952018738} +{"eps_id":1484177277,"obs":[-0.2861317694,0.3177048266,0.0344523713,-0.0486141406],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2797776759,0.512316227,0.0334800892,-0.3302310705],"action_prob":0.6226302981,"action_logp":-0.4738023877,"action_dist_inputs":[-0.2525216043,0.2482057512],"value_targets":55.2476768494} +{"eps_id":1484177277,"obs":[-0.2797776759,0.512316227,0.0334800892,-0.3302310705],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2695313692,0.3167341053,0.0268754698,-0.0271809343],"action_prob":0.6881753802,"action_logp":-0.3737115562,"action_dist_inputs":[0.3921034336,-0.3994993567],"value_targets":54.7956352234} +{"eps_id":1484177277,"obs":[-0.2695313692,0.3167341053,0.0268754698,-0.0271809343],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.263196677,0.1212372556,0.0263318494,0.273858875],"action_prob":0.3634429276,"action_logp":-1.0121330023,"action_dist_inputs":[-0.2822234333,0.2782282829],"value_targets":54.3390235901} +{"eps_id":1484177277,"obs":[-0.263196677,0.1212372556,0.0263318494,0.273858875],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2607719302,-0.0742503181,0.0318090282,0.5747292638],"action_prob":0.1733584702,"action_logp":-1.7523937225,"action_dist_inputs":[-0.7811343074,0.7808753252],"value_targets":53.8778038025} +{"eps_id":1484177277,"obs":[-0.2607719302,-0.0742503181,0.0318090282,0.5747292638],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2622569203,0.1204115972,0.0433036126,0.2922343612],"action_prob":0.8935658932,"action_logp":-0.1125352085,"action_dist_inputs":[-1.0622917414,1.0654020309],"value_targets":53.4119224548} +{"eps_id":1484177277,"obs":[-0.2622569203,0.1204115972,0.0433036126,0.2922343612],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2598487139,0.3148902357,0.0491482988,0.0135171833],"action_prob":0.8398793936,"action_logp":-0.1744969636,"action_dist_inputs":[-0.8285838366,0.8287472725],"value_targets":52.9413375854} +{"eps_id":1484177277,"obs":[-0.2598487139,0.3148902357,0.0491482988,0.0135171833],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2535508871,0.5092741251,0.0494186431,-0.2632628083],"action_prob":0.7047068477,"action_logp":-0.3499733508,"action_dist_inputs":[-0.4364556968,0.4333578348],"value_targets":52.4659957886} +{"eps_id":1484177277,"obs":[-0.2535508871,0.5092741251,0.0494186431,-0.2632628083],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2433654219,0.7036571503,0.0441533886,-0.539958179],"action_prob":0.4300523996,"action_logp":-0.8438482285,"action_dist_inputs":[0.1375420243,-0.1440953165],"value_targets":51.9858551025} +{"eps_id":1484177277,"obs":[-0.2433654219,0.7036571503,0.0441533886,-0.539958179],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2292922735,0.507943213,0.0333542228,-0.233696565],"action_prob":0.8212373853,"action_logp":-0.1969430894,"action_dist_inputs":[0.7580773234,-0.7666761875],"value_targets":51.5008621216} +{"eps_id":1484177277,"obs":[-0.2292922735,0.507943213,0.0333542228,-0.233696565],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2191334069,0.7025731206,0.0286802929,-0.5156747103],"action_prob":0.4405892789,"action_logp":-0.8196421862,"action_dist_inputs":[0.1162749082,-0.1224959046],"value_targets":51.0109710693} +{"eps_id":1484177277,"obs":[-0.2191334069,0.7025731206,0.0286802929,-0.5156747103],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2050819397,0.5070592761,0.0183667988,-0.2140937001],"action_prob":0.8212295175,"action_logp":-0.1969526708,"action_dist_inputs":[0.7581376433,-0.7665620446],"value_targets":50.5161323547} +{"eps_id":1484177277,"obs":[-0.2050819397,0.5070592761,0.0183667988,-0.2140937001],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1949407607,0.3116796315,0.0140849249,0.0843258873],"action_prob":0.5606842637,"action_logp":-0.5785973072,"action_dist_inputs":[0.1189882457,-0.1249514669],"value_targets":50.0162963867} +{"eps_id":1484177277,"obs":[-0.1949407607,0.3116796315,0.0140849249,0.0843258873],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1887071729,0.5065968633,0.015771443,-0.2038801461],"action_prob":0.7301122546,"action_logp":-0.3145569861,"action_dist_inputs":[-0.4985941947,0.4965980351],"value_targets":49.5114097595} +{"eps_id":1484177277,"obs":[-0.1887071729,0.5065968633,0.015771443,-0.2038801461],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1785752326,0.7014898062,0.0116938399,-0.4915465415],"action_prob":0.4486399293,"action_logp":-0.8015346527,"action_dist_inputs":[0.1002192348,-0.1059483215],"value_targets":49.0014266968} +{"eps_id":1484177277,"obs":[-0.1785752326,0.7014898062,0.0116938399,-0.4915465415],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1645454317,0.5062048435,0.0018629094,-0.1952012479],"action_prob":0.8241958618,"action_logp":-0.1933470517,"action_dist_inputs":[0.7684604526,-0.7765771151],"value_targets":48.486289978} +{"eps_id":1484177277,"obs":[-0.1645454317,0.5062048435,0.0018629094,-0.1952012479],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1544213295,0.3110563159,-0.0020411154,0.0980687663],"action_prob":0.5664407015,"action_logp":-0.5683828592,"action_dist_inputs":[0.1308902055,-0.1364536434],"value_targets":47.9659461975} +{"eps_id":1484177277,"obs":[-0.1544213295,0.3110563159,-0.0020411154,0.0980687663],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1482002139,0.1159636602,-0.0000797401,0.3901070356],"action_prob":0.2714017928,"action_logp":-1.3041548729,"action_dist_inputs":[-0.494564712,0.492957294],"value_targets":47.4403495789} +{"eps_id":1484177277,"obs":[-0.1482002139,0.1159636602,-0.0000797401,0.3901070356],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1458809376,0.3110867441,0.0077224006,0.0973989591],"action_prob":0.8608215451,"action_logp":-0.1498680413,"action_dist_inputs":[-0.9101679921,0.9119621515],"value_targets":46.9094467163} +{"eps_id":1484177277,"obs":[-0.1458809376,0.3110867441,0.0077224006,0.0973989591],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1396591961,0.5060971975,0.0096703796,-0.1928375959],"action_prob":0.7394403219,"action_logp":-0.3018617034,"action_dist_inputs":[-0.5222236514,0.520837903],"value_targets":46.3731765747} +{"eps_id":1484177277,"obs":[-0.1396591961,0.5060971975,0.0096703796,-0.1928375959],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1295372546,0.3108382225,0.0058136275,0.1028801724],"action_prob":0.5480663776,"action_logp":-0.6013588309,"action_dist_inputs":[0.0938318968,-0.0990293771],"value_targets":45.8314933777} +{"eps_id":1484177277,"obs":[-0.1295372546,0.3108382225,0.0058136275,0.1028801724],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1233204976,0.5058763623,0.0078712311,-0.1879629195],"action_prob":0.7434103489,"action_logp":-0.2965071201,"action_dist_inputs":[-0.5324671865,0.5313026309],"value_targets":45.2843360901} +{"eps_id":1484177277,"obs":[-0.1233204976,0.5058763623,0.0078712311,-0.1879629195],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1132029667,0.3106427193,0.0041119726,0.107192643],"action_prob":0.5448539853,"action_logp":-0.6072373986,"action_dist_inputs":[0.0874491334,-0.092450507],"value_targets":44.7316513062} +{"eps_id":1484177277,"obs":[-0.1132029667,0.3106427193,0.0041119726,0.107192643],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1069901139,0.1154620722,0.0062558255,0.4011700451],"action_prob":0.2533278167,"action_logp":-1.3730709553,"action_dist_inputs":[-0.540948689,0.5399932265],"value_targets":44.1733856201} +{"eps_id":1484177277,"obs":[-0.1069901139,0.1154620722,0.0062558255,0.4011700451],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1046808735,0.3104947209,0.0142792258,0.1104659662],"action_prob":0.8699093461,"action_logp":-0.1393662691,"action_dist_inputs":[-0.9488766193,0.9512808323],"value_targets":43.6094818115} +{"eps_id":1484177277,"obs":[-0.1046808735,0.3104947209,0.0142792258,0.1104659662],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0984709784,0.5054091811,0.0164885465,-0.1776779592],"action_prob":0.7597888112,"action_logp":-0.2747147679,"action_dist_inputs":[-0.5761010647,0.5754211545],"value_targets":43.0398788452} +{"eps_id":1484177277,"obs":[-0.0984709784,0.5054091811,0.0164885465,-0.1776779592],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0883627906,0.7002913356,0.0129349865,-0.4651140571],"action_prob":0.4869118333,"action_logp":-0.7196722031,"action_dist_inputs":[0.0239226148,-0.0284419097],"value_targets":42.4645233154} +{"eps_id":1484177277,"obs":[-0.0883627906,0.7002913356,0.0129349865,-0.4651140571],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0743569657,0.504989028,0.0036327057,-0.1683823019],"action_prob":0.8066810966,"action_logp":-0.2148268819,"action_dist_inputs":[0.7106487155,-0.7179381847],"value_targets":41.8833580017} +{"eps_id":1484177277,"obs":[-0.0743569657,0.504989028,0.0036327057,-0.1683823019],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0642571822,0.7000588179,0.0002650597,-0.4599170089],"action_prob":0.4743136466,"action_logp":-0.7458865047,"action_dist_inputs":[0.0492518358,-0.053584259],"value_targets":41.2963218689} +{"eps_id":1484177277,"obs":[-0.0642571822,0.7000588179,0.0002650597,-0.4599170089],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0502560101,0.8951770067,-0.0089332806,-0.7525163889],"action_prob":0.1808138788,"action_logp":-1.7102870941,"action_dist_inputs":[0.7518558502,-0.7589871883],"value_targets":40.7033538818} +{"eps_id":1484177277,"obs":[-0.0502560101,0.8951770067,-0.0089332806,-0.7525163889],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0323524699,0.7001793385,-0.0239836071,-0.4626579285],"action_prob":0.9250127673,"action_logp":-0.0779477134,"action_dist_inputs":[1.2519942522,-1.2604961395],"value_targets":40.1044006348} +{"eps_id":1484177277,"obs":[-0.0323524699,0.7001793385,-0.0239836071,-0.4626579285],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.018348882,0.5054044127,-0.0332367681,-0.177630052],"action_prob":0.8491207361,"action_logp":-0.1635539085,"action_dist_inputs":[0.8603653908,-0.8673563004],"value_targets":39.4993934631} +{"eps_id":1484177277,"obs":[-0.018348882,0.5054044127,-0.0332367681,-0.177630052],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0082407929,0.7009858489,-0.0367893688,-0.4806099832],"action_prob":0.3807168007,"action_logp":-0.9656994939,"action_dist_inputs":[0.2410966456,-0.2454102933],"value_targets":38.8882751465} +{"eps_id":1484177277,"obs":[-0.0082407929,0.7009858489,-0.0367893688,-0.4806099832],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0057789241,0.5064020157,-0.0464015678,-0.1997453719],"action_prob":0.8723171949,"action_logp":-0.1366021335,"action_dist_inputs":[0.9573457837,-0.9642583728],"value_targets":38.2709846497} +{"eps_id":1484177277,"obs":[0.0057789241,0.5064020157,-0.0464015678,-0.1997453719],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0159069654,0.311973393,-0.0503964759,0.0779465064],"action_prob":0.6806305647,"action_logp":-0.3847355843,"action_dist_inputs":[0.3761266768,-0.3805444837],"value_targets":37.6474609375} +{"eps_id":1484177277,"obs":[0.0159069654,0.311973393,-0.0503964759,0.0779465064],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0221464317,0.5077801943,-0.0488375463,-0.2302011251],"action_prob":0.6549351215,"action_logp":-0.4232191145,"action_dist_inputs":[-0.3207544386,0.3200491667],"value_targets":37.0176353455} +{"eps_id":1484177277,"obs":[0.0221464317,0.5077801943,-0.0488375463,-0.2302011251],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0323020369,0.3133889139,-0.0534415655,0.04668561],"action_prob":0.7278136015,"action_logp":-0.3177102804,"action_dist_inputs":[0.4895141721,-0.4940439463],"value_targets":36.3814506531} +{"eps_id":1484177277,"obs":[0.0323020369,0.3133889139,-0.0534415655,0.04668561],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0385698155,0.5092348456,-0.052507855,-0.2623682022],"action_prob":0.6076278687,"action_logp":-0.4981926084,"action_dist_inputs":[-0.219161287,0.21819067],"value_targets":35.7388381958} +{"eps_id":1484177277,"obs":[0.0385698155,0.5092348456,-0.052507855,-0.2623682022],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0487545133,0.3149001896,-0.0577552207,0.0133017926],"action_prob":0.7731434703,"action_logp":-0.2572906613,"action_dist_inputs":[0.6107583642,-0.6153885126],"value_targets":35.0897369385} +{"eps_id":1484177277,"obs":[0.0487545133,0.3149001896,-0.0577552207,0.0133017926],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0550525151,0.5108008385,-0.0574891828,-0.2970297933],"action_prob":0.5473138094,"action_logp":-0.6027328968,"action_dist_inputs":[-0.0955425799,0.0942806154],"value_targets":34.4340782166} +{"eps_id":1484177277,"obs":[0.0550525151,0.5108008385,-0.0574891828,-0.2970297933],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0652685314,0.3165435195,-0.0634297803,-0.0230177287],"action_prob":0.8146541119,"action_logp":-0.2049916834,"action_dist_inputs":[0.7379063368,-0.7426334023],"value_targets":33.7717971802} +{"eps_id":1484177277,"obs":[0.0652685314,0.3165435195,-0.0634297803,-0.0230177287],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0715994015,0.5125150681,-0.0638901368,-0.3350197375],"action_prob":0.4730240107,"action_logp":-0.7486091256,"action_dist_inputs":[0.0532190315,-0.0547897741],"value_targets":33.1028251648} +{"eps_id":1484177277,"obs":[0.0715994015,0.5125150681,-0.0638901368,-0.3350197375],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0818497017,0.3183578253,-0.0705905259,-0.0631490052],"action_prob":0.8506231308,"action_logp":-0.1617861092,"action_dist_inputs":[0.8673306108,-0.8721662164],"value_targets":32.4270935059} +{"eps_id":1484177277,"obs":[0.0818497017,0.3183578253,-0.0705905259,-0.0631490052],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0882168636,0.5144171715,-0.071853511,-0.3772419393],"action_prob":0.3875558376,"action_logp":-0.9478953481,"action_dist_inputs":[0.2278568,-0.2297410071],"value_targets":31.7445411682} +{"eps_id":1484177277,"obs":[0.0882168636,0.5144171715,-0.071853511,-0.3772419393],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0985052064,0.3203853369,-0.0793983489,-0.1080520153],"action_prob":0.8800198436,"action_logp":-0.127810806,"action_dist_inputs":[0.9938194156,-0.9987992644],"value_targets":31.0550918579} +{"eps_id":1484177277,"obs":[0.0985052064,0.3203853369,-0.0793983489,-0.1080520153],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.1049129143,0.1264856458,-0.0815593898,0.1585626453],"action_prob":0.7007612586,"action_logp":-0.3555879891,"action_dist_inputs":[0.4243659377,-0.4265596569],"value_targets":30.3586788177} +{"eps_id":1484177277,"obs":[0.1049129143,0.1264856458,-0.0815593898,0.1585626453],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.1074426249,-0.0673796535,-0.0783881322,0.4244420528],"action_prob":0.3456892371,"action_logp":-1.0622150898,"action_dist_inputs":[-0.3184404969,0.3196018636],"value_targets":29.6552295685} +{"eps_id":1484177277,"obs":[0.1074426249,-0.0673796535,-0.0783881322,0.4244420528],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.1060950309,0.1287601292,-0.0698992908,0.1081133187],"action_prob":0.8637705445,"action_logp":-0.1464481056,"action_dist_inputs":[-0.9214345813,0.9255316257],"value_targets":28.9446773529} +{"eps_id":1484177277,"obs":[0.1060950309,0.1287601292,-0.0698992908,0.1081133187],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.1086702347,0.3248105049,-0.067737028,-0.2057777941],"action_prob":0.6011140943,"action_logp":-0.5089705586,"action_dist_inputs":[-0.2046647519,0.2054444551],"value_targets":28.2269458771} +{"eps_id":1484177277,"obs":[0.1086702347,0.3248105049,-0.067737028,-0.2057777941],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.1151664406,0.1307193041,-0.0718525872,0.0647919029],"action_prob":0.7959572673,"action_logp":-0.2282097787,"action_dist_inputs":[0.679230392,-0.6819857359],"value_targets":27.5019664764} +{"eps_id":1484177277,"obs":[0.1151664406,0.1307193041,-0.0718525872,0.0647919029],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.117780827,0.3267939389,-0.0705567449,-0.2496676594],"action_prob":0.5218994617,"action_logp":-0.6502802968,"action_dist_inputs":[-0.0436115675,0.0440423451],"value_targets":26.7696628571} +{"eps_id":1484177277,"obs":[0.117780827,0.3267939389,-0.0705567449,-0.2496676594],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.1243167073,0.1327468008,-0.0755501017,0.0199523643],"action_prob":0.8340724111,"action_logp":-0.1814350784,"action_dist_inputs":[0.8058798313,-0.8088884354],"value_targets":26.0299625397} +{"eps_id":1484177277,"obs":[0.1243167073,0.1327468008,-0.0755501017,0.0199523643],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.1269716471,0.3288663626,-0.0751510486,-0.2955783904],"action_prob":0.4332584441,"action_logp":-0.8364208937,"action_dist_inputs":[0.1343251616,-0.1342438906],"value_targets":25.2827911377} +{"eps_id":1484177277,"obs":[0.1269716471,0.3288663626,-0.0751510486,-0.2955783904],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.133548975,0.1348917335,-0.0810626224,-0.027511334],"action_prob":0.8641083837,"action_logp":-0.1460570544,"action_dist_inputs":[0.9232691526,-0.9265716076],"value_targets":24.5280704498} +{"eps_id":1484177277,"obs":[0.133548975,0.1348917335,-0.0810626224,-0.027511334],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.1362468004,0.3310769796,-0.0816128477,-0.3446291685],"action_prob":0.3431759477,"action_logp":-1.0695120096,"action_dist_inputs":[0.3244459927,-0.3247268796],"value_targets":23.7657279968} +{"eps_id":1484177277,"obs":[0.1362468004,0.3310769796,-0.0816128477,-0.3446291685],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.14286834,0.137205109,-0.088505432,-0.0787568688],"action_prob":0.8870835304,"action_logp":-0.1198161095,"action_dist_inputs":[1.0288145542,-1.0324764252],"value_targets":22.9956855774} +{"eps_id":1484177277,"obs":[0.14286834,0.137205109,-0.088505432,-0.0787568688],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.1456124485,-0.0565439351,-0.0900805667,0.1847425401],"action_prob":0.7383106351,"action_logp":-0.3033906221,"action_dist_inputs":[0.5182617903,-0.5189445615],"value_targets":22.2178649902} +{"eps_id":1484177277,"obs":[0.1456124485,-0.0565439351,-0.0900805667,0.1847425401],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.1444815695,-0.250269264,-0.0863857195,0.4477044642],"action_prob":0.3924922049,"action_logp":-0.9352385998,"action_dist_inputs":[-0.2172060311,0.2196422815],"value_targets":21.4321861267} +{"eps_id":1484177277,"obs":[0.1444815695,-0.250269264,-0.0863857195,0.4477044642],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.1394761801,-0.0540383197,-0.0774316266,0.1290898174],"action_prob":0.8546698093,"action_logp":-0.1570400894,"action_dist_inputs":[-0.8833425641,0.888364017],"value_targets":20.6385707855} +{"eps_id":1484177277,"obs":[0.1394761801,-0.0540383197,-0.0774316266,0.1290898174],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.1383954138,0.1421025544,-0.0748498291,-0.1869820803],"action_prob":0.5418452621,"action_logp":-0.6127747893,"action_dist_inputs":[-0.0828743279,0.0848992318],"value_targets":19.8369407654} +{"eps_id":1484177277,"obs":[0.1383954138,0.1421025544,-0.0748498291,-0.1869820803],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.1412374675,0.338211149,-0.0785894692,-0.5023066401],"action_prob":0.1812522262,"action_logp":-1.707865715,"action_dist_inputs":[0.7531628013,-0.7547237277],"value_targets":19.0272140503} +{"eps_id":1484177277,"obs":[0.1412374675,0.338211149,-0.0785894692,-0.5023066401],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.1480016857,0.1442797929,-0.0886356086,-0.235389024],"action_prob":0.9181436896,"action_logp":-0.0854013637,"action_dist_inputs":[1.2062340975,-1.2111543417],"value_targets":18.2093067169} +{"eps_id":1484177277,"obs":[0.1480016857,0.1442797929,-0.0886356086,-0.235389024],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.1508872807,-0.0494713373,-0.0933433846,0.0280720387],"action_prob":0.8553342819,"action_logp":-0.1562629193,"action_dist_inputs":[0.8875055909,-0.8895613551],"value_targets":17.3831367493} +{"eps_id":1484177277,"obs":[0.1508872807,-0.0494713373,-0.0933433846,0.0280720387],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.1498978585,0.1468566507,-0.0927819461,-0.2925410569],"action_prob":0.3412962854,"action_logp":-1.0750043392,"action_dist_inputs":[0.3293158412,-0.3282071352],"value_targets":16.5486240387} +{"eps_id":1484177277,"obs":[0.1498978585,0.1468566507,-0.0927819461,-0.2925410569],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.1528349966,0.3431705832,-0.0986327678,-0.6129841208],"action_prob":0.1220736355,"action_logp":-2.1031308174,"action_dist_inputs":[0.9851501584,-0.98778826],"value_targets":15.7056808472} +{"eps_id":1484177277,"obs":[0.1528349966,0.3431705832,-0.0986327678,-0.6129841208],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.1596984118,0.1495552808,-0.1108924523,-0.3529237807],"action_prob":0.9293205142,"action_logp":-0.0733016208,"action_dist_inputs":[1.2851643562,-1.2911326885],"value_targets":14.8542232513} +{"eps_id":1484177277,"obs":[0.1596984118,0.1495552808,-0.1108924523,-0.3529237807],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.162689507,-0.0438294671,-0.1179509237,-0.0971639827],"action_prob":0.8971737623,"action_logp":-0.1085057035,"action_dist_inputs":[1.0814348459,-1.0847746134],"value_targets":13.9941644669} +{"eps_id":1484177277,"obs":[0.162689507,-0.0438294671,-0.1179509237,-0.0971639827],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.1618129164,-0.2370807379,-0.1198942065,0.1561015993],"action_prob":0.8100069165,"action_logp":-0.2107125223,"action_dist_inputs":[0.7248540521,-0.7252007127],"value_targets":13.125418663} +{"eps_id":1484177277,"obs":[0.1618129164,-0.2370807379,-0.1198942065,0.1561015993],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.1570713073,-0.0404644199,-0.1167721748,-0.1718702167],"action_prob":0.4192480743,"action_logp":-0.8692924976,"action_dist_inputs":[0.1643728018,-0.1614881754],"value_targets":12.2478981018} +{"eps_id":1484177277,"obs":[0.1570713073,-0.0404644199,-0.1167721748,-0.1718702167],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.1562620252,-0.2337382585,-0.1202095747,0.081813179],"action_prob":0.8455225229,"action_logp":-0.167800501,"action_dist_inputs":[0.8493415713,-0.850564599],"value_targets":11.3615131378} +{"eps_id":1484177277,"obs":[0.1562620252,-0.2337382585,-0.1202095747,0.081813179],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.1515872478,-0.4269502759,-0.1185733154,0.3342831433],"action_prob":0.6787323356,"action_logp":-0.3875283897,"action_dist_inputs":[0.3749722838,-0.372979939],"value_targets":10.4661741257} +{"eps_id":1484177277,"obs":[0.1515872478,-0.4269502759,-0.1185733154,0.3342831433],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.1430482417,-0.620202601,-0.1118876487,0.5873473287],"action_prob":0.376635313,"action_logp":-0.976477921,"action_dist_inputs":[-0.2494006604,0.2544535995],"value_targets":9.5617923737} +{"eps_id":1484177277,"obs":[0.1430482417,-0.620202601,-0.1118876487,0.5873473287],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.1306442022,-0.4237062037,-0.1001407057,0.2616193891],"action_prob":0.8431016207,"action_logp":-0.1706678122,"action_dist_inputs":[-0.8372427821,0.8442460299],"value_targets":8.6482753754} +{"eps_id":1484177277,"obs":[0.1306442022,-0.4237062037,-0.1001407057,0.2616193891],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.1221700683,-0.2273079902,-0.0949083194,-0.0608942509],"action_prob":0.5517107844,"action_logp":-0.5947313309,"action_dist_inputs":[-0.1015858501,0.1059995741],"value_targets":7.7255306244} +{"eps_id":1484177277,"obs":[0.1221700683,-0.2273079902,-0.0949083194,-0.0608942509],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.1176239103,-0.4209500849,-0.0961261988,0.2004005015],"action_prob":0.7761185765,"action_logp":-0.2534499466,"action_dist_inputs":[0.6216843128,-0.6215047836],"value_targets":6.7934651375} +{"eps_id":1484177277,"obs":[0.1176239103,-0.4209500849,-0.0961261988,0.2004005015],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.1092049107,-0.6145751476,-0.0921181887,0.4612796903],"action_prob":0.5319216847,"action_logp":-0.6312590241,"action_dist_inputs":[0.0657681674,-0.0620924048],"value_targets":5.8519849777} +{"eps_id":1484177277,"obs":[0.1092049107,-0.6145751476,-0.0921181887,0.4612796903],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0969134048,-0.8082826138,-0.0828925967,0.7235645056],"action_prob":0.2421999574,"action_logp":-1.4179916382,"action_dist_inputs":[-0.5671284199,0.5735276341],"value_targets":4.9009947777} +{"eps_id":1484177277,"obs":[0.0969134048,-0.8082826138,-0.0828925967,0.7235645056],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0807477534,-0.6121180058,-0.0684213042,0.4059853554],"action_prob":0.8967491984,"action_logp":-0.1089790687,"action_dist_inputs":[-1.0768656731,1.0847489834],"value_targets":3.9403989315} +{"eps_id":1484177277,"obs":[0.0807477534,-0.6121180058,-0.0684213042,0.4059853554],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0685053915,-0.8062063456,-0.0603015982,0.6763365269],"action_prob":0.2656386197,"action_logp":-1.3256185055,"action_dist_inputs":[-0.5054264069,0.5114382505],"value_targets":2.970099926} +{"eps_id":1484177277,"obs":[0.0685053915,-0.8062063456,-0.0603015982,0.6763365269],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0523812659,-1.000440836,-0.0467748679,0.9494408369],"action_prob":0.1078928784,"action_logp":-2.2266163826,"action_dist_inputs":[-1.0523751974,1.0600721836],"value_targets":1.9900000095} +{"eps_id":1484177277,"obs":[0.0523812659,-1.000440836,-0.0467748679,0.9494408369],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":true,"new_obs":[0.0323724486,-0.8047215343,-0.0277860519,0.6424360871],"action_prob":0.9413533807,"action_logp":-0.0604366735,"action_dist_inputs":[-1.3835037947,1.3922843933],"value_targets":1.0} +{"eps_id":1999147951,"obs":[-0.0469013527,-0.0493556708,-0.0069620335,0.0398436524],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":0.0,"dones":false,"new_obs":[-0.0478884652,0.1458654255,-0.0061651603,-0.2550277114],"action_prob":0.565487504,"action_logp":-0.5700670481,"action_dist_inputs":[-0.1317813098,0.1316821575],"value_targets":86.6020355225} +{"eps_id":1999147951,"obs":[-0.0478884652,0.1458654255,-0.0061651603,-0.2550277114],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0449711569,-0.0491679683,-0.0112657147,0.0357042663],"action_prob":0.8040135503,"action_logp":-0.2181391418,"action_dist_inputs":[0.7040669918,-0.707503736],"value_targets":86.4666976929} +{"eps_id":1999147951,"obs":[-0.0449711569,-0.0491679683,-0.0112657147,0.0357042663],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.045954518,-0.2441265732,-0.0105516287,0.324811548],"action_prob":0.4522318542,"action_logp":-0.7935602665,"action_dist_inputs":[-0.0959037095,0.0957534239],"value_targets":86.3300018311} +{"eps_id":1999147951,"obs":[-0.045954518,-0.2441265732,-0.0105516287,0.324811548],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0508370474,-0.4390966892,-0.0040553981,0.6141483784],"action_prob":0.1549399346,"action_logp":-1.8647177219,"action_dist_inputs":[-0.8466938734,0.849676311],"value_targets":86.1919174194} +{"eps_id":1999147951,"obs":[-0.0508370474,-0.4390966892,-0.0040553981,0.6141483784],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0596189834,-0.2439183146,0.0082275691,0.3201909065],"action_prob":0.9259833097,"action_logp":-0.0768990442,"action_dist_inputs":[-1.2604373693,1.266127944],"value_targets":86.052444458} +{"eps_id":1999147951,"obs":[-0.0596189834,-0.2439183146,0.0082275691,0.3201909065],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0644973516,-0.0489144996,0.0146313878,0.0301139578],"action_prob":0.8570348024,"action_logp":-0.1542767286,"action_dist_inputs":[-0.8939500451,0.8969273567],"value_targets":85.9115600586} +{"eps_id":1999147951,"obs":[-0.0644973516,-0.0489144996,0.0146313878,0.0301139578],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0654756352,-0.2442431897,0.0152336666,0.3273771107],"action_prob":0.4001305699,"action_logp":-0.915964365,"action_dist_inputs":[-0.2025551647,0.2023659199],"value_targets":85.7692489624} +{"eps_id":1999147951,"obs":[-0.0654756352,-0.2442431897,0.0152336666,0.3273771107],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0703605041,-0.049341388,0.021781208,0.0395368524],"action_prob":0.8652527332,"action_logp":-0.1447336376,"action_dist_inputs":[-0.9282901287,0.9313303828],"value_targets":85.62550354} +{"eps_id":1999147951,"obs":[-0.0703605041,-0.049341388,0.021781208,0.0395368524],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0713473335,-0.2447687984,0.0225719456,0.3390116096],"action_prob":0.3686001003,"action_logp":-0.9980429411,"action_dist_inputs":[-0.2691682279,0.2690588534],"value_targets":85.4803085327} +{"eps_id":1999147951,"obs":[-0.0713473335,-0.2447687984,0.0225719456,0.3390116096],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0762427077,-0.0499751829,0.0293521769,0.0535311997],"action_prob":0.874571383,"action_logp":-0.1340213567,"action_dist_inputs":[-0.9694257379,0.9725714922],"value_targets":85.3336486816} +{"eps_id":1999147951,"obs":[-0.0762427077,-0.0499751829,0.0293521769,0.0535311997],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0772422105,-0.2455054373,0.0304228012,0.3553285599],"action_prob":0.3314045668,"action_logp":-1.1044154167,"action_dist_inputs":[-0.3509121239,0.3509271741],"value_targets":85.1855010986} +{"eps_id":1999147951,"obs":[-0.0772422105,-0.2455054373,0.0304228012,0.3553285599],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0821523219,-0.0508289635,0.0375293717,0.0723918825],"action_prob":0.8844476938,"action_logp":-0.1227918863,"action_dist_inputs":[-1.0159699917,1.0192704201],"value_targets":85.0358581543} +{"eps_id":1999147951,"obs":[-0.0821523219,-0.0508289635,0.0375293717,0.0723918825],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0831689015,0.1437354237,0.0389772095,-0.2082181424],"action_prob":0.7096751928,"action_logp":-0.3429478705,"action_dist_inputs":[-0.4468082488,0.4469990432],"value_targets":84.8847045898} +{"eps_id":1999147951,"obs":[-0.0831689015,0.1437354237,0.0389772095,-0.2082181424],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0802941918,-0.0519215688,0.034812849,0.0965008363],"action_prob":0.6736576557,"action_logp":-0.3950332403,"action_dist_inputs":[0.3608627617,-0.3639122844],"value_targets":84.7320251465} +{"eps_id":1999147951,"obs":[-0.0802941918,-0.0519215688,0.034812849,0.0965008363],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.081332624,0.1426845789,0.036742866,-0.1849986166],"action_prob":0.7345453501,"action_logp":-0.3085035086,"action_dist_inputs":[-0.5087029934,0.5091047287],"value_targets":84.5778045654} +{"eps_id":1999147951,"obs":[-0.081332624,0.1426845789,0.036742866,-0.1849986166],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0784789324,0.3372620642,0.0330428928,-0.465867877],"action_prob":0.3574319482,"action_logp":-1.0288102627,"action_dist_inputs":[0.2918307185,-0.2946970463],"value_targets":84.4220275879} +{"eps_id":1999147951,"obs":[-0.0784789324,0.3372620642,0.0330428928,-0.465867877],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0717336908,0.1416891962,0.0237255339,-0.1629558653],"action_prob":0.8757528663,"action_logp":-0.1326713562,"action_dist_inputs":[0.9736546874,-0.9791567326],"value_targets":84.2646713257} +{"eps_id":1999147951,"obs":[-0.0717336908,0.1416891962,0.0237255339,-0.1629558653],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0688999072,-0.0537642203,0.0204664171,0.1371164024],"action_prob":0.636290729,"action_logp":-0.4520997107,"action_dist_inputs":[0.2783022225,-0.2809984684],"value_targets":84.1057281494} +{"eps_id":1999147951,"obs":[-0.0688999072,-0.0537642203,0.0204664171,0.1371164024],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0699751899,0.1410586983,0.0232087448,-0.1490400732],"action_prob":0.7577415705,"action_logp":-0.2774129212,"action_dist_inputs":[-0.5697901845,0.5705468655],"value_targets":83.9451828003} +{"eps_id":1999147951,"obs":[-0.0699751899,0.1410586983,0.0232087448,-0.1490400732],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0671540126,-0.0543877743,0.0202279426,0.1508734375],"action_prob":0.6146493554,"action_logp":-0.486703366,"action_dist_inputs":[0.2321641147,-0.2347340584],"value_targets":83.7830123901} +{"eps_id":1999147951,"obs":[-0.0671540126,-0.0543877743,0.0202279426,0.1508734375],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0682417676,-0.2497934401,0.0232454129,0.4498685896],"action_prob":0.2288542539,"action_logp":-1.4746699333,"action_dist_inputs":[-0.6069467068,0.6078453064],"value_targets":83.6192016602} +{"eps_id":1999147951,"obs":[-0.0682417676,-0.2497934401,0.0232454129,0.4498685896],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0732376352,-0.0550078563,0.0322427824,0.164602682],"action_prob":0.9061077833,"action_logp":-0.0985970274,"action_dist_inputs":[-1.1314508915,1.1355596781],"value_targets":83.453742981} +{"eps_id":1999147951,"obs":[-0.0732376352,-0.0550078563,0.0322427824,0.164602682],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0743377954,0.1396380514,0.0355348364,-0.1177366972],"action_prob":0.7974872589,"action_logp":-0.2262894362,"action_dist_inputs":[-0.6847912669,0.6858716011],"value_targets":83.286605835} +{"eps_id":1999147951,"obs":[-0.0743377954,0.1396380514,0.0355348364,-0.1177366972],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0715450346,-0.0559745431,0.0331801027,0.1859420091],"action_prob":0.5312200189,"action_logp":-0.6325790286,"action_dist_inputs":[0.0613956638,-0.0636469573],"value_targets":83.1177825928} +{"eps_id":1999147951,"obs":[-0.0715450346,-0.0559745431,0.0331801027,0.1859420091],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0726645291,0.1386573613,0.0368989445,-0.0960919783],"action_prob":0.814442575,"action_logp":-0.2052513361,"action_dist_inputs":[-0.7389146686,0.7402250171],"value_targets":82.9472579956} +{"eps_id":1999147951,"obs":[-0.0726645291,0.1386573613,0.0368989445,-0.0960919783],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0698913783,-0.0569734871,0.0349771045,0.2080003619],"action_prob":0.4899393916,"action_logp":-0.7134735584,"action_dist_inputs":[-0.0211396515,0.0191081651],"value_targets":82.7750091553} +{"eps_id":1999147951,"obs":[-0.0698913783,-0.0569734871,0.0349771045,0.2080003619],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0710308477,0.137631312,0.0391371138,-0.0734471232],"action_prob":0.8303335905,"action_logp":-0.1859277189,"action_dist_inputs":[-0.7932172418,0.7947758436],"value_targets":82.601020813} +{"eps_id":1999147951,"obs":[-0.0710308477,0.137631312,0.0391371138,-0.0734471232],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0682782233,-0.058029227,0.0376681685,0.2313223183],"action_prob":0.4457215965,"action_logp":-0.8080607653,"action_dist_inputs":[-0.1098791808,0.108093597],"value_targets":82.4252700806} +{"eps_id":1999147951,"obs":[-0.0682782233,-0.058029227,0.0376681685,0.2313223183],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0694388077,-0.2536686063,0.0422946177,0.5356450677],"action_prob":0.1547675729,"action_logp":-1.8658307791,"action_dist_inputs":[-0.8479281664,0.8497589231],"value_targets":82.2477493286} +{"eps_id":1999147951,"obs":[-0.0694388077,-0.2536686063,0.0422946177,0.5356450677],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0745121762,-0.0591661148,0.053007517,0.2565831244],"action_prob":0.9221041203,"action_logp":-0.0810971335,"action_dist_inputs":[-1.2331289053,1.2381563187],"value_targets":82.0684280396} +{"eps_id":1999147951,"obs":[-0.0745121762,-0.0591661148,0.053007517,0.2565831244],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0756954998,0.1351605356,0.0581391789,-0.0189205278],"action_prob":0.8653063178,"action_logp":-0.1446717381,"action_dist_inputs":[-0.92894876,0.9311317801],"value_targets":81.8873062134} +{"eps_id":1999147951,"obs":[-0.0756954998,0.1351605356,0.0581391789,-0.0189205278],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0729922876,0.3294025958,0.0577607676,-0.2927080393],"action_prob":0.6759410501,"action_logp":-0.391649425,"action_dist_inputs":[-0.368135184,0.3670452237],"value_targets":81.7043457031} +{"eps_id":1999147951,"obs":[-0.0729922876,0.3294025958,0.0577607676,-0.2927080393],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0664042383,0.1335067153,0.051906608,0.01761798],"action_prob":0.6863719821,"action_logp":-0.3763355315,"action_dist_inputs":[0.389475584,-0.3937365413],"value_targets":81.5195465088} +{"eps_id":1999147951,"obs":[-0.0664042383,0.1335067153,0.051906608,0.01761798],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0637341067,0.3278473318,0.0522589684,-0.2582466304],"action_prob":0.7107152939,"action_logp":-0.3414833844,"action_dist_inputs":[-0.4497624338,0.4490979612],"value_targets":81.3328704834} +{"eps_id":1999147951,"obs":[-0.0637341067,0.3278473318,0.0522589684,-0.2582466304],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0571771562,0.1320197582,0.0470940359,0.050450854],"action_prob":0.6466722488,"action_logp":-0.4359156787,"action_dist_inputs":[0.3002376556,-0.3042060137],"value_targets":81.144317627} +{"eps_id":1999147951,"obs":[-0.0571771562,0.1320197582,0.0470940359,0.050450854],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0545367636,0.3264358938,0.0481030531,-0.227009505],"action_prob":0.7393904924,"action_logp":-0.3019290566,"action_dist_inputs":[-0.5215293765,0.5212737918],"value_targets":80.9538574219} +{"eps_id":1999147951,"obs":[-0.0545367636,0.3264358938,0.0481030531,-0.227009505],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0480080433,0.1306606829,0.043562863,0.0804504827],"action_prob":0.6065167189,"action_logp":-0.5000229478,"action_dist_inputs":[0.2145134956,-0.2181803286],"value_targets":80.76146698} +{"eps_id":1999147951,"obs":[-0.0480080433,0.1306606829,0.043562863,0.0804504827],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0453948304,0.3251319528,0.0451718718,-0.1981762499],"action_prob":0.7635070682,"action_logp":-0.2698328793,"action_dist_inputs":[-0.5859320164,0.5860720873],"value_targets":80.5671386719} +{"eps_id":1999147951,"obs":[-0.0453948304,0.3251319528,0.0451718718,-0.1981762499],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0388921909,0.1293939948,0.0412083454,0.1084072068],"action_prob":0.5659942031,"action_logp":-0.5691714883,"action_dist_inputs":[0.1310866773,-0.1344391108],"value_targets":80.3708496094} +{"eps_id":1999147951,"obs":[-0.0388921909,0.1293939948,0.0412083454,0.1084072068],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0363043137,-0.0662935078,0.0433764905,0.4138011038],"action_prob":0.2157578766,"action_logp":-1.533598423,"action_dist_inputs":[-0.6450169683,0.6455440521],"value_targets":80.1725769043} +{"eps_id":1999147951,"obs":[-0.0363043137,-0.0662935078,0.0433764905,0.4138011038],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0376301818,0.1281876266,0.0516525134,0.1351024806],"action_prob":0.9024361372,"action_logp":-0.1026573256,"action_dist_inputs":[-1.1103241444,1.1142671108],"value_targets":79.9722976685} +{"eps_id":1999147951,"obs":[-0.0376301818,0.1281876266,0.0516525134,0.1351024806],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0350664295,0.3225331306,0.0543545634,-0.1408479065],"action_prob":0.8123241067,"action_logp":-0.2078558505,"action_dist_inputs":[-0.7321059704,0.7330769897],"value_targets":79.7699966431} +{"eps_id":1999147951,"obs":[-0.0350664295,0.3225331306,0.0543545634,-0.1408479065],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0286157671,0.1266765445,0.0515376031,0.1684754789],"action_prob":0.4514608383,"action_logp":-0.7952666283,"action_dist_inputs":[-0.0986693203,0.0961007625],"value_targets":79.5656509399} +{"eps_id":1999147951,"obs":[-0.0286157671,0.1266765445,0.0515376031,0.1684754789],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0260822363,0.3210243583,0.0549071133,-0.1075138301],"action_prob":0.8309265971,"action_logp":-0.1852138042,"action_dist_inputs":[-0.7953857183,0.796823144],"value_targets":79.3592453003} +{"eps_id":1999147951,"obs":[-0.0260822363,0.3210243583,0.0549071133,-0.1075138301],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0196617488,0.1251603365,0.0527568385,0.2019740492],"action_prob":0.3999043405,"action_logp":-0.9165298939,"action_dist_inputs":[-0.2039730698,0.2018904835],"value_targets":79.1507568359} +{"eps_id":1999147951,"obs":[-0.0196617488,0.1251603365,0.0527568385,0.2019740492],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0171585418,0.3194896281,0.0567963198,-0.0736110285],"action_prob":0.8470900059,"action_logp":-0.1659483016,"action_dist_inputs":[-0.855024159,0.8569334149],"value_targets":78.9401550293} +{"eps_id":1999147951,"obs":[-0.0171585418,0.3194896281,0.0567963198,-0.0736110285],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0107687488,0.1236013845,0.0553240962,0.2364366949],"action_prob":0.350086689,"action_logp":-1.0495744944,"action_dist_inputs":[-0.3101021051,0.3085560501],"value_targets":78.727432251} +{"eps_id":1999147951,"obs":[-0.0107687488,0.1236013845,0.0553240962,0.2364366949],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0082967216,0.3178910613,0.0600528307,-0.0382949561],"action_prob":0.8612415195,"action_logp":-0.1493802816,"action_dist_inputs":[-0.9116246104,0.91401577],"value_targets":78.5125579834} +{"eps_id":1999147951,"obs":[-0.0082967216,0.3178910613,0.0600528307,-0.0382949561],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0019389,0.1219616681,0.0592869334,0.272714287],"action_prob":0.3030350208,"action_logp":-1.1939069033,"action_dist_inputs":[-0.4169204533,0.4159662724],"value_targets":78.2955093384} +{"eps_id":1999147951,"obs":[-0.0019389,0.1219616681,0.0592869334,0.272714287],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0005003333,0.3161897659,0.0647412166,-0.0006958151],"action_prob":0.8736553192,"action_logp":-0.1350693256,"action_dist_inputs":[-0.9653926492,0.9682798982],"value_targets":78.0762710571} +{"eps_id":1999147951,"obs":[0.0005003333,0.3161897659,0.0647412166,-0.0006958151],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0068241283,0.5103263259,0.0647272989,-0.2722700238],"action_prob":0.7401784658,"action_logp":-0.3008639812,"action_dist_inputs":[-0.5236008763,0.5232951045],"value_targets":77.8548202515} +{"eps_id":1999147951,"obs":[0.0068241283,0.5103263259,0.0647272989,-0.2722700238],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0170306563,0.704467833,0.0592819005,-0.5438559055],"action_prob":0.440112412,"action_logp":-0.8207250834,"action_dist_inputs":[0.1183896065,-0.1223161817],"value_targets":77.6311340332} +{"eps_id":1999147951,"obs":[0.0170306563,0.704467833,0.0592819005,-0.5438559055],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0311200134,0.5085651278,0.048404783,-0.2330993414],"action_prob":0.8226910233,"action_logp":-0.195174545,"action_dist_inputs":[0.7641167641,-0.7705703378],"value_targets":77.4051818848} +{"eps_id":1999147951,"obs":[0.0311200134,0.5085651278,0.048404783,-0.2330993414],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0412913151,0.3127861619,0.0437427945,0.0744505748],"action_prob":0.5353505611,"action_logp":-0.6248335242,"action_dist_inputs":[0.069089815,-0.0725487769],"value_targets":77.1769561768} +{"eps_id":1999147951,"obs":[0.0412913151,0.3127861619,0.0437427945,0.0744505748],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0475470386,0.5072546005,0.045231808,-0.2041167766],"action_prob":0.7776198387,"action_logp":-0.2515175343,"action_dist_inputs":[-0.625533402,0.6263161302],"value_targets":76.9464187622} +{"eps_id":1999147951,"obs":[0.0475470386,0.5072546005,0.045231808,-0.2041167766],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0576921292,0.7017015219,0.041149471,-0.4821951687],"action_prob":0.5014922619,"action_logp":-0.6901670694,"action_dist_inputs":[-0.0044824621,0.0014866935],"value_targets":76.7135543823} +{"eps_id":1999147951,"obs":[0.0576921292,0.7017015219,0.041149471,-0.4821951687],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0717261583,0.5060236454,0.0315055698,-0.1768323183],"action_prob":0.7936672568,"action_logp":-0.2310909927,"action_dist_inputs":[0.6706132293,-0.6765607595],"value_targets":76.4783401489} +{"eps_id":1999147951,"obs":[0.0717261583,0.5060236454,0.0315055698,-0.1768323183],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0818466321,0.310465306,0.0279689226,0.1256207079],"action_prob":0.4872415364,"action_logp":-0.7189953327,"action_dist_inputs":[-0.0268264879,0.0242185034],"value_targets":76.2407455444} +{"eps_id":1999147951,"obs":[0.0818466321,0.310465306,0.0279689226,0.1256207079],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0880559385,0.5051756501,0.0304813366,-0.1581087708],"action_prob":0.7978599072,"action_logp":-0.2258222699,"action_dist_inputs":[-0.6856899261,0.687282145],"value_targets":76.0007553101} +{"eps_id":1999147951,"obs":[0.0880559385,0.5051756501,0.0304813366,-0.1581087708],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0981594548,0.309630841,0.0273191612,0.1440322995],"action_prob":0.4624759257,"action_logp":-0.7711607814,"action_dist_inputs":[-0.076302126,0.0740770698],"value_targets":75.7583389282} +{"eps_id":1999147951,"obs":[0.0981594548,0.309630841,0.0273191612,0.1440322995],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.1043520719,0.5043511391,0.0301998071,-0.1399082839],"action_prob":0.8085149527,"action_logp":-0.212556079,"action_dist_inputs":[-0.719219923,0.7211697102],"value_targets":75.5134735107} +{"eps_id":1999147951,"obs":[0.1043520719,0.5043511391,0.0301998071,-0.1399082839],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.1144390926,0.6990278363,0.027401641,-0.4229127765],"action_prob":0.562269032,"action_logp":-0.5757747889,"action_dist_inputs":[-0.1261052042,0.1242708638],"value_targets":75.26612854} +{"eps_id":1999147951,"obs":[0.1144390926,0.6990278363,0.027401641,-0.4229127765],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.1284196526,0.503528595,0.0189433862,-0.1217189729],"action_prob":0.7564334869,"action_logp":-0.2791407108,"action_dist_inputs":[0.5640115738,-0.5692127347],"value_targets":75.0162963867} +{"eps_id":1999147951,"obs":[0.1284196526,0.503528595,0.0189433862,-0.1217189729],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.1384902149,0.3081404269,0.0165090058,0.176879704],"action_prob":0.4356200397,"action_logp":-0.8309848905,"action_dist_inputs":[-0.1302427948,0.1287145764],"value_targets":74.7639312744} +{"eps_id":1999147951,"obs":[0.1384902149,0.3081404269,0.0165090058,0.176879704],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.1446530223,0.5030222535,0.0200466011,-0.1105498075],"action_prob":0.8190778494,"action_logp":-0.1995761395,"action_dist_inputs":[-0.7537830472,0.756329298],"value_targets":74.5090255737} +{"eps_id":1999147951,"obs":[0.1446530223,0.5030222535,0.0200466011,-0.1105498075],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.1547134668,0.6978513002,0.017835604,-0.3968413472],"action_prob":0.5810030103,"action_logp":-0.5429993868,"action_dist_inputs":[-0.1640533358,0.1628387272],"value_targets":74.2515411377} +{"eps_id":1999147951,"obs":[0.1547134668,0.6978513002,0.017835604,-0.3968413472],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.1686705053,0.5024809241,0.0098987771,-0.0985889658],"action_prob":0.7448584437,"action_logp":-0.2945610583,"action_dist_inputs":[0.533318162,-0.5380575657],"value_targets":73.9914550781} +{"eps_id":1999147951,"obs":[0.1686705053,0.5024809241,0.0098987771,-0.0985889658],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.1787201166,0.6974596381,0.0079269977,-0.3881324828],"action_prob":0.5761049986,"action_logp":-0.5514653325,"action_dist_inputs":[-0.153893739,0.152910471],"value_targets":73.7287445068} +{"eps_id":1999147951,"obs":[0.1787201166,0.6974596381,0.0079269977,-0.3881324828],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.1926693171,0.5022260547,0.0001643483,-0.0929608196],"action_prob":0.7519031167,"action_logp":-0.2851477861,"action_dist_inputs":[0.5521201491,-0.5566680431],"value_targets":73.4633712769} +{"eps_id":1999147951,"obs":[0.1926693171,0.5022260547,0.0001643483,-0.0929608196],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.2027138323,0.697345674,-0.0016948682,-0.3855918944],"action_prob":0.5626222491,"action_logp":-0.5751467943,"action_dist_inputs":[-0.1263226271,0.1254885793],"value_targets":73.1953277588} +{"eps_id":1999147951,"obs":[0.2027138323,0.697345674,-0.0016948682,-0.3855918944],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.216660738,0.5022478104,-0.0094067063,-0.0934438333],"action_prob":0.7645523548,"action_logp":-0.2684647739,"action_dist_inputs":[0.5867012143,-0.5911006927],"value_targets":72.9245758057} +{"eps_id":1999147951,"obs":[0.216660738,0.5022478104,-0.0094067063,-0.0934438333],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.2267057002,0.307261914,-0.0112755829,0.1962565035],"action_prob":0.4601248205,"action_logp":-0.7762574553,"action_dist_inputs":[-0.0803017467,0.0795382857],"value_targets":72.6510848999} +{"eps_id":1999147951,"obs":[0.2267057002,0.307261914,-0.0112755829,0.1962565035],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.232850939,0.5025433302,-0.0073504527,-0.0999619365],"action_prob":0.8091349602,"action_logp":-0.2117895484,"action_dist_inputs":[-0.7206283808,0.7237709165],"value_targets":72.3748321533} +{"eps_id":1999147951,"obs":[0.232850939,0.5025433302,-0.0073504527,-0.0999619365],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.2429018021,0.6977698803,-0.0093496908,-0.3949548304],"action_prob":0.5324739814,"action_logp":-0.6302212477,"action_dist_inputs":[-0.0653818101,0.0646971762],"value_targets":72.0957946777} +{"eps_id":1999147951,"obs":[0.2429018021,0.6977698803,-0.0093496908,-0.3949548304],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.2568572164,0.5027818084,-0.017248787,-0.1052343026],"action_prob":0.785040319,"action_logp":-0.2420202047,"action_dist_inputs":[0.6455537677,-0.6497306228],"value_targets":71.8139266968} +{"eps_id":1999147951,"obs":[0.2568572164,0.5027818084,-0.017248787,-0.1052343026],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.2669128478,0.307911247,-0.0193534736,0.1819571853],"action_prob":0.4992908537,"action_logp":-0.6945664883,"action_dist_inputs":[-0.0017547561,0.0010818066],"value_targets":71.5292205811} +{"eps_id":1999147951,"obs":[0.2669128478,0.307911247,-0.0193534736,0.1819571853],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.2730710506,0.1130714864,-0.0157143306,0.4684725106],"action_prob":0.2084829211,"action_logp":-1.5678981543,"action_dist_inputs":[-0.6654560566,0.6686381698],"value_targets":71.2416381836} +{"eps_id":1999147951,"obs":[0.2730710506,0.1130714864,-0.0157143306,0.4684725106],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.2753324807,0.3084118664,-0.0063448795,0.1708782911],"action_prob":0.9036378264,"action_logp":-0.1013266593,"action_dist_inputs":[-1.1161496639,1.1221643686],"value_targets":70.9511489868} +{"eps_id":1999147951,"obs":[0.2753324807,0.3084118664,-0.0063448795,0.1708782911],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.2815007269,0.5036240816,-0.002927314,-0.1237994879],"action_prob":0.7965869904,"action_logp":-0.227418974,"action_dist_inputs":[-0.6809207201,0.6841771007],"value_targets":70.6577301025} +{"eps_id":1999147951,"obs":[0.2815007269,0.5036240816,-0.002927314,-0.1237994879],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.2915732265,0.3085441589,-0.0054033035,0.1679584682],"action_prob":0.5005064607,"action_logp":-0.6921347976,"action_dist_inputs":[0.0007505948,-0.0012750719],"value_targets":70.3613433838} +{"eps_id":1999147951,"obs":[0.2915732265,0.3085441589,-0.0054033035,0.1679584682],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.2977440953,0.5037430525,-0.0020441343,-0.1264241338],"action_prob":0.7947861552,"action_logp":-0.2296821624,"action_dist_inputs":[-0.6753396988,0.678681016],"value_targets":70.061958313} +{"eps_id":1999147951,"obs":[0.2977440953,0.5037430525,-0.0020441343,-0.1264241338],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.3078189492,0.6988942027,-0.0045726169,-0.4197512567],"action_prob":0.4952167571,"action_logp":-0.7027597427,"action_dist_inputs":[0.0093604214,-0.0097731212],"value_targets":69.7595596313} +{"eps_id":1999147951,"obs":[0.3078189492,0.6988942027,-0.0045726169,-0.4197512567],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.3217968345,0.8940806389,-0.0129676424,-0.7138721943],"action_prob":0.1999964416,"action_logp":-1.6094557047,"action_dist_inputs":[0.6912831068,-0.6950334311],"value_targets":69.4540939331} +{"eps_id":1999147951,"obs":[0.3217968345,0.8940806389,-0.0129676424,-0.7138721943],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.3396784663,0.6991406083,-0.0272450857,-0.425299108],"action_prob":0.9080366492,"action_logp":-0.0964705199,"action_dist_inputs":[1.1419014931,-1.1479928493],"value_targets":69.1455535889} +{"eps_id":1999147951,"obs":[0.3396784663,0.6991406083,-0.0272450857,-0.425299108],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.3536612689,0.5044149756,-0.0357510671,-0.1413281113],"action_prob":0.826543808,"action_logp":-0.1905023605,"action_dist_inputs":[0.7788270116,-0.7825006843],"value_targets":68.8338928223} +{"eps_id":1999147951,"obs":[0.3536612689,0.5044149756,-0.0357510671,-0.1413281113],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.3637495637,0.7000302076,-0.0385776311,-0.4450720549],"action_prob":0.3968445659,"action_logp":-0.924210608,"action_dist_inputs":[0.2090830207,-0.2095471621],"value_targets":68.5190811157} +{"eps_id":1999147951,"obs":[0.3637495637,0.7000302076,-0.0385776311,-0.4450720549],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.3777501583,0.5054746866,-0.0474790707,-0.1647949368],"action_prob":0.846432209,"action_logp":-0.1667251438,"action_dist_inputs":[0.8516075015,-0.8552806377],"value_targets":68.2010955811} +{"eps_id":1999147951,"obs":[0.3777501583,0.5054746866,-0.0474790707,-0.1647949368],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.3878596723,0.3110634387,-0.0507749692,0.1125397384],"action_prob":0.6586679816,"action_logp":-0.4175357223,"action_dist_inputs":[0.3283866346,-0.3289770484],"value_targets":67.8798904419} +{"eps_id":1999147951,"obs":[0.3878596723,0.3110634387,-0.0507749692,0.1125397384],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.3940809369,0.5068747997,-0.0485241748,-0.1957201809],"action_prob":0.666680038,"action_logp":-0.4054450691,"action_dist_inputs":[-0.3451598883,0.3480472267],"value_targets":67.5554504395} +{"eps_id":1999147951,"obs":[0.3940809369,0.5068747997,-0.0485241748,-0.1957201809],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.4042184353,0.3124793172,-0.0524385795,0.0812693015],"action_prob":0.6981601119,"action_logp":-0.3593068123,"action_dist_inputs":[0.4189113379,-0.4196403325],"value_targets":67.227722168} +{"eps_id":1999147951,"obs":[0.4042184353,0.3124793172,-0.0524385795,0.0812693015],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.4104680121,0.5083122253,-0.0508131944,-0.2274861634],"action_prob":0.6194940209,"action_logp":-0.4788521826,"action_dist_inputs":[-0.2423725128,0.2450287938],"value_targets":66.8966903687} +{"eps_id":1999147951,"obs":[0.4104680121,0.5083122253,-0.0508131944,-0.2274861634],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.4206342697,0.3139518797,-0.0553629175,0.0487454124],"action_prob":0.7349698544,"action_logp":-0.3079257607,"action_dist_inputs":[0.5095636249,-0.510422349],"value_targets":66.5623168945} +{"eps_id":1999147951,"obs":[0.4206342697,0.3139518797,-0.0553629175,0.0487454124],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.4269132912,0.1196656898,-0.054388009,0.3234598041],"action_prob":0.435285598,"action_logp":-0.8317528963,"action_dist_inputs":[-0.1289460659,0.1313715577],"value_targets":66.2245635986} +{"eps_id":1999147951,"obs":[0.4269132912,0.1196656898,-0.054388009,0.3234598041],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.4293065965,0.3155182004,-0.0479188114,0.014133228],"action_prob":0.8293425441,"action_logp":-0.187122032,"action_dist_inputs":[-0.7877587676,0.7932159305],"value_targets":65.883392334} +{"eps_id":1999147951,"obs":[0.4293065965,0.3155182004,-0.0479188114,0.014133228],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.4356169701,0.5112934709,-0.0476361476,-0.2932752967],"action_prob":0.527733922,"action_logp":-0.6391630769,"action_dist_inputs":[-0.0544121377,0.0566374101],"value_targets":65.5387802124} +{"eps_id":1999147951,"obs":[0.4356169701,0.5112934709,-0.0476361476,-0.2932752967],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.4458428323,0.3168818951,-0.0535016544,-0.0159885027],"action_prob":0.7856842875,"action_logp":-0.2412002534,"action_dist_inputs":[0.6490011811,-0.6501035094],"value_targets":65.1906890869} +{"eps_id":1999147951,"obs":[0.4458428323,0.3168818951,-0.0535016544,-0.0159885027],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.452180475,0.1225664318,-0.0538214222,0.2593456805],"action_prob":0.5295373797,"action_logp":-0.6357515454,"action_dist_inputs":[0.0601677708,-0.0581193604],"value_targets":64.8390808105} +{"eps_id":1999147951,"obs":[0.452180475,0.1225664318,-0.0538214222,0.2593456805],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.4546318054,0.3184137642,-0.0486345105,-0.0498158112],"action_prob":0.777933836,"action_logp":-0.251113832,"action_dist_inputs":[-0.6243018508,0.6293641925],"value_targets":64.4839172363} +{"eps_id":1999147951,"obs":[0.4546318054,0.3184137642,-0.0486345105,-0.0498158112],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.4610000849,0.124021709,-0.0496308282,0.2271345854],"action_prob":0.5684635639,"action_logp":-0.5648180842,"action_dist_inputs":[0.1387305409,-0.1368547082],"value_targets":64.1251678467} +{"eps_id":1999147951,"obs":[0.4610000849,0.124021709,-0.0496308282,0.2271345854],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.4634805322,0.3198164999,-0.0450881347,-0.0807814524],"action_prob":0.7514411211,"action_logp":-0.2857623994,"action_dist_inputs":[-0.5507183671,0.5555948019],"value_targets":63.7627983093} +{"eps_id":1999147951,"obs":[0.4634805322,0.3198164999,-0.0450881347,-0.0807814524],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.4698768556,0.5155548453,-0.0467037633,-0.3873419762],"action_prob":0.3962513804,"action_logp":-0.9257065058,"action_dist_inputs":[0.2114209533,-0.2096882313],"value_targets":63.3967666626} +{"eps_id":1999147951,"obs":[0.4698768556,0.5155548453,-0.0467037633,-0.3873419762],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.4801879525,0.7113075852,-0.0544506013,-0.6943767667],"action_prob":0.1637691408,"action_logp":-1.8092975616,"action_dist_inputs":[0.8144868612,-0.8159601688],"value_targets":63.0270347595} +{"eps_id":1999147951,"obs":[0.4801879525,0.7113075852,-0.0544506013,-0.6943767667],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.4944140911,0.5169814825,-0.0683381408,-0.4193202555],"action_prob":0.9107570648,"action_logp":-0.0934790969,"action_dist_inputs":[1.1591341496,-1.1637791395],"value_targets":62.6535720825} +{"eps_id":1999147951,"obs":[0.4944140911,0.5169814825,-0.0683381408,-0.4193202555],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.5047537088,0.3228910565,-0.0767245442,-0.1489399374],"action_prob":0.8587309718,"action_logp":-0.152299583,"action_dist_inputs":[0.9015487432,-0.9032406807],"value_targets":62.2763366699} +{"eps_id":1999147951,"obs":[0.5047537088,0.3228910565,-0.0767245442,-0.1489399374],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.5112115741,0.1289468557,-0.0797033459,0.1185856387],"action_prob":0.7246977091,"action_logp":-0.3220006526,"action_dist_inputs":[0.484649539,-0.4832353592],"value_targets":61.8952865601} +{"eps_id":1999147951,"obs":[0.5112115741,0.1289468557,-0.0797033459,0.1185856387],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.5137904882,0.3251149356,-0.0773316324,-0.1981403381],"action_prob":0.5548981428,"action_logp":-0.5889707208,"action_dist_inputs":[-0.1081331968,0.1123481914],"value_targets":61.5103912354} +{"eps_id":1999147951,"obs":[0.5137904882,0.3251149356,-0.0773316324,-0.1981403381],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.5202927589,0.1311793625,-0.0812944397,0.0691807941],"action_prob":0.7632200718,"action_logp":-0.2702088952,"action_dist_inputs":[0.5857762098,-0.5846387744],"value_targets":61.1216087341} +{"eps_id":1999147951,"obs":[0.5202927589,0.1311793625,-0.0812944397,0.0691807941],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.5229163766,-0.0626886785,-0.0799108222,0.3351482153],"action_prob":0.516802609,"action_logp":-0.6600942612,"action_dist_inputs":[0.0355978869,-0.0316379778],"value_targets":60.7288970947} +{"eps_id":1999147951,"obs":[0.5229163766,-0.0626886785,-0.0799108222,0.3351482153],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.5216625929,0.1334742308,-0.0732078552,0.0183740482],"action_prob":0.7729392052,"action_logp":-0.2575548887,"action_dist_inputs":[-0.6092840433,0.615698576],"value_targets":60.3322181702} +{"eps_id":1999147951,"obs":[0.5216625929,0.1334742308,-0.0732078552,0.0183740482],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.5243321061,0.3295655549,-0.0728403777,-0.2964789867],"action_prob":0.4311650991,"action_logp":-0.8412641883,"action_dist_inputs":[0.1403994262,-0.1366997063],"value_targets":59.9315338135} +{"eps_id":1999147951,"obs":[0.5243321061,0.3295655549,-0.0728403777,-0.2964789867],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.5309233665,0.1355534941,-0.0787699521,-0.0276287273],"action_prob":0.8146058917,"action_logp":-0.2050508261,"action_dist_inputs":[0.7403525114,-0.7398683429],"value_targets":59.526802063} +{"eps_id":1999147951,"obs":[0.5309233665,0.1355534941,-0.0787699521,-0.0276287273],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.5336344838,0.3317115307,-0.0793225318,-0.3440878391],"action_prob":0.3680339754,"action_logp":-0.9995800257,"action_dist_inputs":[0.2720632851,-0.2685970962],"value_targets":59.117980957} +{"eps_id":1999147951,"obs":[0.5336344838,0.3317115307,-0.0793225318,-0.3440878391],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.5402687192,0.1378023475,-0.0862042829,-0.0774350315],"action_prob":0.8364120722,"action_logp":-0.1786339134,"action_dist_inputs":[0.815923512,-0.8158468008],"value_targets":58.7050323486} +{"eps_id":1999147951,"obs":[0.5402687192,0.1378023475,-0.0862042829,-0.0774350315],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.5430247188,0.3340475261,-0.0877529904,-0.3960224092],"action_prob":0.3091025054,"action_logp":-1.1740822792,"action_dist_inputs":[0.4037486911,-0.4005697966],"value_targets":58.2879104614} +{"eps_id":1999147951,"obs":[0.5430247188,0.3340475261,-0.0877529904,-0.3960224092],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.5497056842,0.140273124,-0.0956734344,-0.1322455108],"action_prob":0.8548368216,"action_logp":-0.1568446457,"action_dist_inputs":[0.8863060474,-0.8867464662],"value_targets":57.8665771484} +{"eps_id":1999147951,"obs":[0.5497056842,0.140273124,-0.0956734344,-0.1322455108],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.5525111556,0.3366260827,-0.0983183458,-0.4535130858],"action_prob":0.2569610775,"action_logp":-1.3588306904,"action_dist_inputs":[0.5323063135,-0.5295174718],"value_targets":57.4409866333} +{"eps_id":1999147951,"obs":[0.5525111556,0.3366260827,-0.0983183458,-0.4535130858],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.559243679,0.1430218965,-0.1073886082,-0.1933683306],"action_prob":0.8700482845,"action_logp":-0.1392065585,"action_dist_inputs":[0.9501520395,-0.9512338638],"value_targets":57.0110969543} +{"eps_id":1999147951,"obs":[0.559243679,0.1430218965,-0.1073886082,-0.1933683306],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.5621041059,0.3395030797,-0.1112559736,-0.517903924],"action_prob":0.2132588178,"action_logp":-1.5452487469,"action_dist_inputs":[0.6538149714,-0.6515778899],"value_targets":56.5768661499} +{"eps_id":1999147951,"obs":[0.5621041059,0.3395030797,-0.1112559736,-0.517903924],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.5688941479,0.1461089402,-0.1216140538,-0.2622480392],"action_prob":0.8822277784,"action_logp":-0.1253050268,"action_dist_inputs":[1.0059238672,-1.0077741146],"value_targets":56.1382484436} +{"eps_id":1999147951,"obs":[0.5688941479,0.1461089402,-0.1216140538,-0.2622480392],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.5718163848,0.3427379727,-0.1268590093,-0.5906796455],"action_prob":0.1785104275,"action_logp":-1.7231082916,"action_dist_inputs":[0.7639721632,-0.7625000477],"value_targets":55.6952018738} +{"eps_id":1999147951,"obs":[0.5718163848,0.3427379727,-0.1268590093,-0.5906796455],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.5786710978,0.1495990604,-0.1386726052,-0.3404959142],"action_prob":0.8915993571,"action_logp":-0.1147384122,"action_dist_inputs":[1.0522265434,-1.0549565554],"value_targets":55.2476768494} +{"eps_id":1999147951,"obs":[0.5786710978,0.1495990604,-0.1386726052,-0.3404959142],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.5816630721,-0.0433054529,-0.1454825252,-0.0945560709],"action_prob":0.8477355838,"action_logp":-0.1651865095,"action_dist_inputs":[0.858712852,-0.8582375646],"value_targets":54.7956352234} +{"eps_id":1999147951,"obs":[0.5816630721,-0.0433054529,-0.1454825252,-0.0945560709],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.5807970166,-0.2360751182,-0.1473736465,0.1489226669],"action_prob":0.760078311,"action_logp":-0.2743338048,"action_dist_inputs":[0.578350246,-0.5747587085],"value_targets":54.3390235901} +{"eps_id":1999147951,"obs":[0.5807970166,-0.2360751182,-0.1473736465,0.1489226669],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.5760754943,-0.4288130701,-0.1443951875,0.3917245567],"action_prob":0.5940164924,"action_logp":-0.520848155,"action_dist_inputs":[0.1934536994,-0.1871409863],"value_targets":53.8778038025} +{"eps_id":1999147951,"obs":[0.5760754943,-0.4288130701,-0.1443951875,0.3917245567],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.5674992204,-0.2319687158,-0.1365607083,0.0572259985],"action_prob":0.6353214979,"action_logp":-0.4536241591,"action_dist_inputs":[-0.2732343376,0.2818806469],"value_targets":53.4119224548} +{"eps_id":1999147951,"obs":[0.5674992204,-0.2319687158,-0.1365607083,0.0572259985],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.5628598332,-0.4248952568,-0.1354161799,0.3038974404],"action_prob":0.6627923846,"action_logp":-0.4112934768,"action_dist_inputs":[0.3406176865,-0.3351454139],"value_targets":52.9413375854} +{"eps_id":1999147951,"obs":[0.5628598332,-0.4248952568,-0.1354161799,0.3038974404],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.5543619394,-0.2281294614,-0.1293382347,-0.0282400306],"action_prob":0.5495230556,"action_logp":-0.5987045169,"action_dist_inputs":[-0.0952932686,0.1034505442],"value_targets":52.4659957886} +{"eps_id":1999147951,"obs":[0.5543619394,-0.2281294614,-0.1293382347,-0.0282400306],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.5497993827,-0.421182245,-0.1299030334,0.2210009098],"action_prob":0.7158020139,"action_logp":-0.334351629,"action_dist_inputs":[0.4641332626,-0.4595994353],"value_targets":51.9858551025} +{"eps_id":1999147951,"obs":[0.5497993827,-0.421182245,-0.1299030334,0.2210009098],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.5413756967,-0.2244658768,-0.1254830211,-0.1096716374],"action_prob":0.4670157731,"action_logp":-0.7613922358,"action_dist_inputs":[0.0697877258,-0.0623409711],"value_targets":51.5008621216} +{"eps_id":1999147951,"obs":[0.5413756967,-0.2244658768,-0.1254830211,-0.1096716374],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.536886394,-0.4175871909,-0.1276764423,0.140937373],"action_prob":0.7560772896,"action_logp":-0.2796116769,"action_dist_inputs":[0.5674126148,-0.5638797879],"value_targets":51.0109710693} +{"eps_id":1999147951,"obs":[0.536886394,-0.4175871909,-0.1276764423,0.140937373],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.5285346508,-0.6106713414,-0.1248577014,0.3907711506],"action_prob":0.6069375277,"action_logp":-0.4993294179,"action_dist_inputs":[0.2204955518,-0.2139617652],"value_targets":50.5161323547} +{"eps_id":1999147951,"obs":[0.5285346508,-0.6106713414,-0.1248577014,0.3907711506],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.5163212419,-0.8038205504,-0.1170422807,0.6416267753],"action_prob":0.3947839737,"action_logp":-0.9294165373,"action_dist_inputs":[-0.2089904994,0.2182562202],"value_targets":50.0162963867} +{"eps_id":1999147951,"obs":[0.5163212419,-0.8038205504,-0.1170422807,0.6416267753],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.5002447963,-0.607278347,-0.1042097434,0.3144985437],"action_prob":0.7919875979,"action_logp":-0.2332095802,"action_dist_inputs":[-0.6631413102,0.6738066077],"value_targets":49.5114097595} +{"eps_id":1999147951,"obs":[0.5002447963,-0.607278347,-0.1042097434,0.3144985437],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.4880992472,-0.4108382165,-0.0979197696,-0.0091453614],"action_prob":0.552498579,"action_logp":-0.5933043957,"action_dist_inputs":[-0.1010503322,0.1097207218],"value_targets":49.0014266968} +{"eps_id":1999147951,"obs":[0.4880992472,-0.4108382165,-0.0979197696,-0.0091453614],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.4798824787,-0.2144582719,-0.0981026813,-0.3310466409],"action_prob":0.3075117767,"action_logp":-1.1792418957,"action_dist_inputs":[0.4082190096,-0.4035588205],"value_targets":48.486289978} +{"eps_id":1999147951,"obs":[0.4798824787,-0.2144582719,-0.0981026813,-0.3310466409],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.4755933285,-0.4080568552,-0.10472361,-0.0708409846],"action_prob":0.8177686334,"action_logp":-0.201175794,"action_dist_inputs":[0.751061976,-0.7502408028],"value_targets":47.9659461975} +{"eps_id":1999147951,"obs":[0.4755933285,-0.4080568552,-0.10472361,-0.0708409846],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.4674321711,-0.601533711,-0.1061404347,0.1870518327],"action_prob":0.7312847376,"action_logp":-0.3129523993,"action_dist_inputs":[0.5024112463,-0.4987390637],"value_targets":47.4403495789} +{"eps_id":1999147951,"obs":[0.4674321711,-0.601533711,-0.1061404347,0.1870518327],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.45540151,-0.4050659239,-0.1023993939,-0.1371381283],"action_prob":0.4222203493,"action_logp":-0.8622279763,"action_dist_inputs":[0.160301879,-0.1533633471],"value_targets":46.9094467163} +{"eps_id":1999147951,"obs":[0.45540151,-0.4050659239,-0.1023993939,-0.1371381283],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.4473001957,-0.2086377144,-0.1051421538,-0.4602898657],"action_prob":0.2408685386,"action_logp":-1.4235039949,"action_dist_inputs":[0.575316906,-0.572606802],"value_targets":46.3731765747} +{"eps_id":1999147951,"obs":[0.4473001957,-0.2086377144,-0.1051421538,-0.4602898657],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.4431274235,-0.402128607,-0.1143479571,-0.2025111616],"action_prob":0.8421581984,"action_logp":-0.171787411,"action_dist_inputs":[0.8367912769,-0.8375834823],"value_targets":45.8314933777} +{"eps_id":1999147951,"obs":[0.4431274235,-0.402128607,-0.1143479571,-0.2025111616],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.4350848496,-0.5954453349,-0.1183981746,0.0520250984],"action_prob":0.7854773998,"action_logp":-0.2414635867,"action_dist_inputs":[0.6497952342,-0.648081243],"value_targets":45.2843360901} +{"eps_id":1999147951,"obs":[0.4350848496,-0.5954453349,-0.1183981746,0.0520250984],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.4231759608,-0.7886881232,-0.1173576787,0.3051321805],"action_prob":0.6865177751,"action_logp":-0.3761231601,"action_dist_inputs":[0.3942422867,-0.3896470666],"value_targets":44.7316513062} +{"eps_id":1999147951,"obs":[0.4231759608,-0.7886881232,-0.1173576787,0.3051321805],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.4074021876,-0.5921063423,-0.1112550348,-0.0221362058],"action_prob":0.4708357155,"action_logp":-0.753246069,"action_dist_inputs":[0.0623229854,-0.0544666834],"value_targets":44.1733856201} +{"eps_id":1999147951,"obs":[0.4074021876,-0.5921063423,-0.1112550348,-0.0221362058],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.395560056,-0.785471499,-0.1116977558,0.2334768027],"action_prob":0.7204129696,"action_logp":-0.3279306889,"action_dist_inputs":[0.4749789834,-0.4715320766],"value_targets":43.6094818115} +{"eps_id":1999147951,"obs":[0.395560056,-0.785471499,-0.1116977558,0.2334768027],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.379850626,-0.9788349867,-0.1070282236,0.4889436364],"action_prob":0.5840632915,"action_logp":-0.537745893,"action_dist_inputs":[0.1730895489,-0.1663868129],"value_targets":43.0398788452} +{"eps_id":1999147951,"obs":[0.379850626,-0.9788349867,-0.1070282236,0.4889436364],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.3602739275,-0.7823787332,-0.097249344,0.164539203],"action_prob":0.5992926359,"action_logp":-0.5120052695,"action_dist_inputs":[-0.1964444369,0.2060740888],"value_targets":42.4645233154} +{"eps_id":1999147951,"obs":[0.3602739275,-0.7823787332,-0.097249344,0.164539203],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.3446263671,-0.9759837389,-0.0939585641,0.4250283241],"action_prob":0.6225019097,"action_logp":-0.4740085602,"action_dist_inputs":[0.2528919578,-0.2472891212],"value_targets":41.8833580017} +{"eps_id":1999147951,"obs":[0.3446263671,-0.9759837389,-0.0939585641,0.4250283241],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.3251066804,-0.7796652317,-0.0854579955,0.1042661965],"action_prob":0.5544939637,"action_logp":-0.5896993279,"action_dist_inputs":[-0.1050123274,0.1138329953],"value_targets":41.2963218689} +{"eps_id":1999147951,"obs":[0.3251066804,-0.7796652317,-0.0854579955,0.1042661965],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.3095133901,-0.9734651446,-0.0833726749,0.3688111305],"action_prob":0.6535488367,"action_logp":-0.4253380001,"action_dist_inputs":[0.3196263313,-0.3150490224],"value_targets":40.7033538818} +{"eps_id":1999147951,"obs":[0.3095133901,-0.9734651446,-0.0833726749,0.3688111305],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.2900440693,-1.1673096418,-0.0759964511,0.6340847015],"action_prob":0.485483855,"action_logp":-0.7226092219,"action_dist_inputs":[-0.0250590313,0.0330219194],"value_targets":40.1044006348} +{"eps_id":1999147951,"obs":[0.2900440693,-1.1673096418,-0.0759964511,0.6340847015],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.2666978836,-1.3612937927,-0.0633147582,0.9018996954],"action_prob":0.2945903242,"action_logp":-1.2221696377,"action_dist_inputs":[-0.4313842654,0.4418088198],"value_targets":39.4993934631} +{"eps_id":1999147951,"obs":[0.2666978836,-1.3612937927,-0.0633147582,0.9018996954],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.2394720167,-1.1653739214,-0.0452767611,0.5900065303],"action_prob":0.8453689814,"action_logp":-0.1679821163,"action_dist_inputs":[-0.8436812758,0.8550500274],"value_targets":38.8882751465} +{"eps_id":1999147951,"obs":[0.2394720167,-1.1653739214,-0.0452767611,0.5900065303],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.2161645293,-1.3598335981,-0.0334766321,0.8680903912],"action_prob":0.2931329012,"action_logp":-1.227129221,"action_dist_inputs":[-0.4350825548,0.4451341331],"value_targets":38.2709846497} +{"eps_id":1999147951,"obs":[0.2161645293,-1.3598335981,-0.0334766321,0.8680903912],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.1889678538,-1.1642725468,-0.0161148235,0.5650728345],"action_prob":0.8535504937,"action_logp":-0.1583505869,"action_dist_inputs":[-0.875784874,0.8869389892],"value_targets":37.6474609375} +{"eps_id":1999147951,"obs":[0.1889678538,-1.1642725468,-0.0161148235,0.5650728345],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.1656824052,-1.3591647148,-0.0048133675,0.852635622],"action_prob":0.2757158577,"action_logp":-1.2883844376,"action_dist_inputs":[-0.4780139625,0.4877988994],"value_targets":37.0176353455} +{"eps_id":1999147951,"obs":[0.1656824052,-1.3591647148,-0.0048133675,0.852635622],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.1384991109,-1.1639775038,0.0122393453,0.5584430099],"action_prob":0.8697211146,"action_logp":-0.1395826787,"action_dist_inputs":[-0.943787992,0.9547070861],"value_targets":36.3814506531} +{"eps_id":1999147951,"obs":[0.1384991109,-1.1639775038,0.0122393453,0.5584430099],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.1152195632,-0.9690294862,0.0234082062,0.2696411908],"action_prob":0.7574661374,"action_logp":-0.2777764499,"action_dist_inputs":[-0.5646049976,0.5742324591],"value_targets":35.7388381958} +{"eps_id":1999147951,"obs":[0.1152195632,-0.9690294862,0.0234082062,0.2696411908],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0958389789,-0.7742492557,0.0288010295,-0.015567719],"action_prob":0.5593320131,"action_logp":-0.5810120106,"action_dist_inputs":[-0.1159395278,0.1225121841],"value_targets":35.0897369385} +{"eps_id":1999147951,"obs":[0.0958389789,-0.7742492557,0.0288010295,-0.015567719],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0803539902,-0.5795519352,0.0284896754,-0.2990262508],"action_prob":0.3497561514,"action_logp":-1.0505191088,"action_dist_inputs":[0.3112530708,-0.3088583052],"value_targets":34.4340782166} +{"eps_id":1999147951,"obs":[0.0803539902,-0.5795519352,0.0284896754,-0.2990262508],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0687629506,-0.3848474026,0.0225091502,-0.5825896859],"action_prob":0.2145502567,"action_logp":-1.5392112732,"action_dist_inputs":[0.6482685208,-0.6494439244],"value_targets":33.7717971802} +{"eps_id":1999147951,"obs":[0.0687629506,-0.3848474026,0.0225091502,-0.5825896859],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0610660054,-0.5802773833,0.0108573567,-0.2829018533],"action_prob":0.8550212383,"action_logp":-0.1566289961,"action_dist_inputs":[0.8854566216,-0.8890820146],"value_targets":33.1028251648} +{"eps_id":1999147951,"obs":[0.0610660054,-0.5802773833,0.0108573567,-0.2829018533],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0494604558,-0.7755525112,0.0051993192,0.0131855467],"action_prob":0.7910482883,"action_logp":-0.2343962938,"action_dist_inputs":[0.6649193168,-0.6663360596],"value_targets":32.4270935059} +{"eps_id":1999147951,"obs":[0.0494604558,-0.7755525112,0.0051993192,0.0131855467],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0339494087,-0.5805054903,0.0054630302,-0.277852416],"action_prob":0.3426247239,"action_logp":-1.0711195469,"action_dist_inputs":[0.3267913461,-0.3248279095],"value_targets":31.7445411682} +{"eps_id":1999147951,"obs":[0.0339494087,-0.5805054903,0.0054630302,-0.277852416],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0223392975,-0.7757049203,-0.0000940181,0.0165485349],"action_prob":0.7950897813,"action_logp":-0.2293002754,"action_dist_inputs":[0.6770935655,-0.6787893176],"value_targets":31.0550918579} +{"eps_id":1999147951,"obs":[0.0223392975,-0.7757049203,-0.0000940181,0.0165485349],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0068251984,-0.9708255529,0.0002369526,0.3092018068],"action_prob":0.6621887088,"action_logp":-0.4122046828,"action_dist_inputs":[0.3373622298,-0.335701108],"value_targets":30.3586788177} +{"eps_id":1999147951,"obs":[0.0068251984,-0.9708255529,0.0002369526,0.3092018068],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0125913126,-0.7757069468,0.0064209886,0.0165936071],"action_prob":0.5699663758,"action_logp":-0.5621778965,"action_dist_inputs":[-0.1380006969,0.1437132806],"value_targets":29.6552295685} +{"eps_id":1999147951,"obs":[-0.0125913126,-0.7757069468,0.0064209886,0.0165936071],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0281054527,-0.9709204435,0.0067528607,0.3112955093],"action_prob":0.6574991941,"action_logp":-0.4193117619,"action_dist_inputs":[0.3267990351,-0.3253704309],"value_targets":28.9446773529} +{"eps_id":1999147951,"obs":[-0.0281054527,-0.9709204435,0.0067528607,0.3112955093],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0475238599,-0.7758952975,0.0129787708,0.0207498465],"action_prob":0.5847275257,"action_logp":-0.536609292,"action_dist_inputs":[-0.1683389693,0.1738720834],"value_targets":28.2269458771} +{"eps_id":1999147951,"obs":[-0.0475238599,-0.7758952975,0.0129787708,0.0207498465],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.063041769,-0.9712010026,0.0133937672,0.3174993098],"action_prob":0.6487139463,"action_logp":-0.4327634573,"action_dist_inputs":[0.3073269129,-0.3060638607],"value_targets":27.5019664764} +{"eps_id":1999147951,"obs":[-0.063041769,-0.9712010026,0.0133937672,0.3174993098],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0824657828,-1.1665110588,0.0197437536,0.6143758893],"action_prob":0.3946632743,"action_logp":-0.9297223687,"action_dist_inputs":[-0.2111688256,0.2165831476],"value_targets":26.7696628571} +{"eps_id":1999147951,"obs":[-0.0824657828,-1.1665110588,0.0197437536,0.6143758893],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1057960093,-0.9716705084,0.0320312716,0.3279762566],"action_prob":0.8260341287,"action_logp":-0.1911191791,"action_dist_inputs":[-0.7747462988,0.7830305696],"value_targets":26.0299625397} +{"eps_id":1999147951,"obs":[-0.1057960093,-0.9716705084,0.0320312716,0.3279762566],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1252294183,-0.7770189047,0.0385907963,0.0455641411],"action_prob":0.6525400281,"action_logp":-0.4268828332,"action_dist_inputs":[-0.312377274,0.3178457618],"value_targets":25.2827911377} +{"eps_id":1999147951,"obs":[-0.1252294183,-0.7770189047,0.0385907963,0.0455641411],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1407697946,-0.9726723433,0.0395020805,0.350168705],"action_prob":0.5941833258,"action_logp":-0.5205674171,"action_dist_inputs":[0.1913128495,-0.1899734437],"value_targets":24.5280704498} +{"eps_id":1999147951,"obs":[-0.1407697946,-0.9726723433,0.0395020805,0.350168705],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1602232456,-0.7781338096,0.0465054512,0.0701991171],"action_prob":0.6938971281,"action_logp":-0.3654315174,"action_dist_inputs":[-0.406429708,0.411972791],"value_targets":23.7657279968} +{"eps_id":1999147951,"obs":[-0.1602232456,-0.7781338096,0.0465054512,0.0701991171],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1757859141,-0.5837083459,0.0479094349,-0.2074560374],"action_prob":0.4459526539,"action_logp":-0.8075425029,"action_dist_inputs":[0.1092868596,-0.1077505797],"value_targets":22.9956855774} +{"eps_id":1999147951,"obs":[-0.1757859141,-0.5837083459,0.0479094349,-0.2074560374],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1874600798,-0.7794814706,0.0437603146,0.0999466777],"action_prob":0.7599968314,"action_logp":-0.2744410038,"action_dist_inputs":[0.5751335621,-0.5775284171],"value_targets":22.2178649902} +{"eps_id":1999147951,"obs":[-0.1874600798,-0.7794814706,0.0437603146,0.0999466777],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2030497193,-0.9752023816,0.0457592495,0.4061084092],"action_prob":0.5213564634,"action_logp":-0.6513212323,"action_dist_inputs":[0.0435930863,-0.041884806],"value_targets":21.4321861267} +{"eps_id":1999147951,"obs":[-0.2030497193,-0.9752023816,0.0457592495,0.4061084092],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2225537598,-1.1709423065,0.053881418,0.7128595114],"action_prob":0.2325862199,"action_logp":-1.4584943056,"action_dist_inputs":[-0.5940433741,0.5997217894],"value_targets":20.6385707855} +{"eps_id":1999147951,"obs":[-0.2225537598,-1.1709423065,0.053881418,0.7128595114],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2459726185,-0.9766061902,0.0681386068,0.4376117885],"action_prob":0.9032697082,"action_logp":-0.1017340869,"action_dist_inputs":[-1.1131151915,1.1209796667],"value_targets":19.8369407654} +{"eps_id":1999147951,"obs":[-0.2459726185,-0.9766061902,0.0681386068,0.4376117885],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2655047476,-0.7825115323,0.0768908411,0.1671626866],"action_prob":0.8228471279,"action_logp":-0.1949848533,"action_dist_inputs":[-0.7649771571,0.7707802057],"value_targets":19.0272140503} +{"eps_id":1999147951,"obs":[-0.2655047476,-0.7825115323,0.0768908411,0.1671626866],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2811549604,-0.5885696411,0.0802340955,-0.100307025],"action_prob":0.6363660693,"action_logp":-0.4519813061,"action_dist_inputs":[-0.278552711,0.2810736895],"value_targets":18.2093067169} +{"eps_id":1999147951,"obs":[-0.2811549604,-0.5885696411,0.0802340955,-0.100307025],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2929263711,-0.3946837485,0.0782279521,-0.3666366935],"action_prob":0.3660157919,"action_logp":-1.0050787926,"action_dist_inputs":[0.2739399076,-0.2754077315],"value_targets":17.3831367493} +{"eps_id":1999147951,"obs":[-0.2929263711,-0.3946837485,0.0782279521,-0.3666366935],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.3008200228,-0.5908251405,0.0708952248,-0.0503488146],"action_prob":0.8204308748,"action_logp":-0.1979256123,"action_dist_inputs":[0.7572821975,-0.7619871497],"value_targets":16.5486240387} +{"eps_id":1999147951,"obs":[-0.3008200228,-0.5908251405,0.0708952248,-0.0503488146],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.3126365244,-0.3967875838,0.069888249,-0.3198482692],"action_prob":0.4115976989,"action_logp":-0.8877088428,"action_dist_inputs":[0.1781524718,-0.1792120039],"value_targets":15.7056808472} +{"eps_id":1999147951,"obs":[-0.3126365244,-0.3967875838,0.069888249,-0.3198482692],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.3205722868,-0.2027269453,0.0634912774,-0.5896981955],"action_prob":0.1956786811,"action_logp":-1.6312813759,"action_dist_inputs":[0.7045150399,-0.7090099454],"value_targets":14.8542232513} +{"eps_id":1999147951,"obs":[-0.3205722868,-0.2027269453,0.0634912774,-0.5896981955],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.3246268332,-0.3986777663,0.0516973175,-0.2777102888],"action_prob":0.8955844045,"action_logp":-0.1102787778,"action_dist_inputs":[1.0711656809,-1.0779315233],"value_targets":13.9941644669} +{"eps_id":1999147951,"obs":[-0.3246268332,-0.3986777663,0.0516973175,-0.2777102888],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.332600385,-0.5944976807,0.0461431108,0.0308193695],"action_prob":0.7958370447,"action_logp":-0.2283608168,"action_dist_inputs":[0.6780674458,-0.6824088097],"value_targets":13.125418663} +{"eps_id":1999147951,"obs":[-0.332600385,-0.5944976807,0.0461431108,0.0308193695],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.3444903493,-0.4000667632,0.0467594974,-0.2469552904],"action_prob":0.4775746167,"action_logp":-0.7390348911,"action_dist_inputs":[0.0446222126,-0.045139607],"value_targets":12.2478981018} +{"eps_id":1999147951,"obs":[-0.3444903493,-0.4000667632,0.0467594974,-0.2469552904],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.3524916768,-0.5958242416,0.041820392,0.060102284],"action_prob":0.78020823,"action_logp":-0.2481943965,"action_dist_inputs":[0.631341517,-0.6355390549],"value_targets":11.3615131378} +{"eps_id":1999147951,"obs":[-0.3524916768,-0.5958242416,0.041820392,0.060102284],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.3644081652,-0.4013260901,0.0430224389,-0.219098255],"action_prob":0.5131816268,"action_logp":-0.6671254039,"action_dist_inputs":[-0.02653604,0.0262027271],"value_targets":10.4661741257} +{"eps_id":1999147951,"obs":[-0.3644081652,-0.4013260901,0.0430224389,-0.219098255],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.3724346757,-0.5970358253,0.038640473,0.0868393406],"action_prob":0.7626022696,"action_logp":-0.2710186839,"action_dist_inputs":[0.581471324,-0.5855281353],"value_targets":9.5617923737} +{"eps_id":1999147951,"obs":[-0.3724346757,-0.5970358253,0.038640473,0.0868393406],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.3843753934,-0.4024884105,0.0403772593,-0.1934064329],"action_prob":0.5475956798,"action_logp":-0.6022180915,"action_dist_inputs":[-0.095571205,0.0953896865],"value_targets":8.6482753754} +{"eps_id":1999147951,"obs":[-0.3843753934,-0.4024884105,0.0403772593,-0.1934064329],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.3924251497,-0.598164022,0.0365091302,0.111735411],"action_prob":0.7426940203,"action_logp":-0.2974711359,"action_dist_inputs":[0.5280501246,-0.5319681764],"value_targets":7.7255306244} +{"eps_id":1999147951,"obs":[-0.3924251497,-0.598164022,0.0365091302,0.111735411],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.4043884575,-0.7937895656,0.0387438387,0.4157095253],"action_prob":0.4192222357,"action_logp":-0.8693541288,"action_dist_inputs":[-0.1630114019,0.1629556417],"value_targets":6.7934651375} +{"eps_id":1999147951,"obs":[-0.4043884575,-0.7937895656,0.0387438387,0.4157095253],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.4202642441,-0.9894385934,0.047058031,0.7203506827],"action_prob":0.1704556048,"action_logp":-1.7692804337,"action_dist_inputs":[-0.7894871831,0.7929145694],"value_targets":5.8519849777} +{"eps_id":1999147951,"obs":[-0.4202642441,-0.9894385934,0.047058031,0.7203506827],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.4400530159,-0.7949981689,0.0614650436,0.4428429008],"action_prob":0.915397048,"action_logp":-0.0883973688,"action_dist_inputs":[-1.187734127,1.1936546564],"value_targets":4.9009947777} +{"eps_id":1999147951,"obs":[-0.4400530159,-0.7949981689,0.0614650436,0.4428429008],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.4559529722,-0.6007974148,0.0703219026,0.1701511145],"action_prob":0.8588346243,"action_logp":-0.1521788836,"action_dist_inputs":[-0.9010828137,0.9045618176],"value_targets":3.9403989315} +{"eps_id":1999147951,"obs":[-0.4559529722,-0.6007974148,0.0703219026,0.1701511145],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.4679689109,-0.4067487419,0.0737249255,-0.0995448306],"action_prob":0.7142754197,"action_logp":-0.3364866078,"action_dist_inputs":[-0.4579282105,0.4583123624],"value_targets":2.970099926} +{"eps_id":1999147951,"obs":[-0.4679689109,-0.4067487419,0.0737249255,-0.0995448306],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.4761039019,-0.212756604,0.0717340261,-0.3680871129],"action_prob":0.4325643778,"action_logp":-0.8380241394,"action_dist_inputs":[0.1341721714,-0.1372240037],"value_targets":1.9900000095} +{"eps_id":1999147951,"obs":[-0.4761039019,-0.212756604,0.0717340261,-0.3680871129],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":true,"new_obs":[-0.4803590178,-0.4088206291,0.0643722862,-0.0536757],"action_prob":0.815384388,"action_logp":-0.204095602,"action_dist_inputs":[0.7396941781,-0.7456898093],"value_targets":1.0} +{"eps_id":729290008,"obs":[-0.0099642361,0.0458092801,0.015888473,0.0078001763],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":0.0,"dones":false,"new_obs":[-0.0090480503,0.2406998128,0.016044477,-0.279827714],"action_prob":0.6006948948,"action_logp":-0.5096681714,"action_dist_inputs":[-0.2042957991,0.2040654272],"value_targets":86.6020355225} +{"eps_id":729290008,"obs":[-0.0090480503,0.2406998128,0.016044477,-0.279827714],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0042340541,0.4355892539,0.0104479222,-0.5674073696],"action_prob":0.2228744179,"action_logp":-1.5011467934,"action_dist_inputs":[0.6227823496,-0.6262109876],"value_targets":86.4666976929} +{"eps_id":729290008,"obs":[-0.0042340541,0.4355892539,0.0104479222,-0.5674073696],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.004477731,0.6305630803,-0.000900225,-0.856780529],"action_prob":0.088173449,"action_logp":-2.4284493923,"action_dist_inputs":[1.1650961637,-1.1710479259],"value_targets":86.3300018311} +{"eps_id":729290008,"obs":[0.004477731,0.6305630803,-0.000900225,-0.856780529],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0170889925,0.4354534149,-0.0180358365,-0.5643808246],"action_prob":0.9449185729,"action_logp":-0.0566565432,"action_dist_inputs":[1.4171204567,-1.4251652956],"value_targets":86.1919174194} +{"eps_id":729290008,"obs":[0.0170889925,0.4354534149,-0.0180358365,-0.5643808246],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0257980619,0.240589112,-0.0293234531,-0.2774341404],"action_prob":0.9197409749,"action_logp":-0.0836631879,"action_dist_inputs":[1.216409564,-1.2224234343],"value_targets":86.052444458} +{"eps_id":729290008,"obs":[0.0257980619,0.240589112,-0.0293234531,-0.2774341404],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0306098443,0.0458975062,-0.0348721333,0.0058578681],"action_prob":0.8303406835,"action_logp":-0.1859192103,"action_dist_inputs":[0.7923110127,-0.7957326174],"value_targets":85.9115600586} +{"eps_id":729290008,"obs":[0.0306098443,0.0458975062,-0.0348721333,0.0058578681],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0315277949,-0.1487074196,-0.0347549766,0.2873374522],"action_prob":0.5268265605,"action_logp":-0.6408839226,"action_dist_inputs":[0.0535749346,-0.0538343936],"value_targets":85.7692489624} +{"eps_id":729290008,"obs":[0.0315277949,-0.1487074196,-0.0347549766,0.2873374522],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0285536461,-0.3433169425,-0.0290082283,0.5688595772],"action_prob":0.1890949458,"action_logp":-1.6655060053,"action_dist_inputs":[-0.7264946103,0.7294071317],"value_targets":85.62550354} +{"eps_id":729290008,"obs":[0.0285536461,-0.3433169425,-0.0290082283,0.5688595772],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0216873065,-0.1478004009,-0.0176310372,0.2671808898],"action_prob":0.917399466,"action_logp":-0.0862122849,"action_dist_inputs":[-1.2009934187,1.2065331936],"value_targets":85.4803085327} +{"eps_id":729290008,"obs":[0.0216873065,-0.1478004009,-0.0176310372,0.2671808898],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0187312979,0.0475686826,-0.0122874193,-0.0310104396],"action_prob":0.8141242266,"action_logp":-0.2056423426,"action_dist_inputs":[-0.7371159792,0.7399184704],"value_targets":85.3336486816} +{"eps_id":729290008,"obs":[0.0187312979,0.0475686826,-0.0122874193,-0.0310104396],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0196826719,0.2428646684,-0.012907628,-0.327544719],"action_prob":0.4623689055,"action_logp":-0.7713922262,"action_dist_inputs":[0.0751447976,-0.0756646916],"value_targets":85.1855010986} +{"eps_id":729290008,"obs":[0.0196826719,0.2428646684,-0.012907628,-0.327544719],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0245399661,0.4381679893,-0.019458523,-0.6242700219],"action_prob":0.1522885263,"action_logp":-1.8819783926,"action_dist_inputs":[0.8565196395,-0.8602439165],"value_targets":85.0358581543} +{"eps_id":729290008,"obs":[0.0245399661,0.4381679893,-0.019458523,-0.6242700219],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0333033241,0.2433230281,-0.031943921,-0.3377783597],"action_prob":0.9281859994,"action_logp":-0.0745231509,"action_dist_inputs":[1.2763878107,-1.2827646732],"value_targets":84.8847045898} +{"eps_id":729290008,"obs":[0.0333033241,0.2433230281,-0.031943921,-0.3377783597],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0381697863,0.048669856,-0.0386994891,-0.0553374738],"action_prob":0.867120266,"action_logp":-0.1425776184,"action_dist_inputs":[0.9359460473,-0.9397868514],"value_targets":84.7320251465} +{"eps_id":729290008,"obs":[0.0381697863,0.048669856,-0.0386994891,-0.0553374738],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0391431823,0.2443247139,-0.0398062393,-0.3599748313],"action_prob":0.3586478233,"action_logp":-1.0254143476,"action_dist_inputs":[0.2902419865,-0.2909958363],"value_targets":84.5778045654} +{"eps_id":729290008,"obs":[0.0391431823,0.2443247139,-0.0398062393,-0.3599748313],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0440296754,0.0497905537,-0.0470057353,-0.0801046863],"action_prob":0.8808152676,"action_logp":-0.1269073635,"action_dist_inputs":[0.9980680346,-1.0021051168],"value_targets":84.4220275879} +{"eps_id":729290008,"obs":[0.0440296754,0.0497905537,-0.0470057353,-0.0801046863],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0450254865,-0.144627139,-0.04860783,0.1973851323],"action_prob":0.6948127747,"action_logp":-0.364112854,"action_dist_inputs":[0.4108637273,-0.4118531048],"value_targets":84.2646713257} +{"eps_id":729290008,"obs":[0.0450254865,-0.144627139,-0.04860783,0.1973851323],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0421329439,0.0511551462,-0.0446601287,-0.110226512],"action_prob":0.6883558631,"action_logp":-0.3734493554,"action_dist_inputs":[-0.3950969875,0.3973468542],"value_targets":84.1057281494} +{"eps_id":729290008,"obs":[0.0421329439,0.0511551462,-0.0446601287,-0.110226512],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0431560464,-0.1432993263,-0.0468646586,0.1680386662],"action_prob":0.7294589877,"action_logp":-0.3154520988,"action_dist_inputs":[0.4953069091,-0.4965725541],"value_targets":83.9451828003} +{"eps_id":729290008,"obs":[0.0431560464,-0.1432993263,-0.0468646586,0.1680386662],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0402900614,0.0524610169,-0.0435038842,-0.1390526295],"action_prob":0.6473959088,"action_logp":-0.4347972572,"action_dist_inputs":[-0.302798152,0.3048141301],"value_targets":83.7830123901} +{"eps_id":729290008,"obs":[0.0402900614,0.0524610169,-0.0435038842,-0.1390526295],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0413392819,-0.142011717,-0.0462849364,0.1395943761],"action_prob":0.7599560618,"action_logp":-0.274494648,"action_dist_inputs":[0.5754469037,-0.5769917965],"value_targets":83.6192016602} +{"eps_id":729290008,"obs":[0.0413392819,-0.142011717,-0.0462849364,0.1395943761],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0384990461,0.053741537,-0.0434930474,-0.1673239619],"action_prob":0.6018096209,"action_logp":-0.5078141093,"action_dist_inputs":[-0.2056189328,0.2073919624],"value_targets":83.453742981} +{"eps_id":729290008,"obs":[0.0384990461,0.053741537,-0.0434930474,-0.1673239619],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.039573878,-0.140731737,-0.0468395278,0.1113271564],"action_prob":0.7869938016,"action_logp":-0.2395348847,"action_dist_inputs":[0.6525328159,-0.6543665528],"value_targets":83.286605835} +{"eps_id":729290008,"obs":[0.039573878,-0.140731737,-0.0468395278,0.1113271564],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0367592424,-0.3351522982,-0.0446129851,0.3888723552],"action_prob":0.4486474097,"action_logp":-0.8015179634,"action_dist_inputs":[-0.102312535,0.1038246602],"value_targets":83.1177825928} +{"eps_id":729290008,"obs":[0.0367592424,-0.3351522982,-0.0446129851,0.3888723552],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0300561972,-0.13942644,-0.0368355364,0.0824638382],"action_prob":0.8426946402,"action_logp":-0.1711506248,"action_dist_inputs":[-0.8370455503,0.8413703442],"value_targets":82.9472579956} +{"eps_id":729290008,"obs":[0.0300561972,-0.13942644,-0.0368355364,0.0824638382],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0272676684,-0.3340015411,-0.0351862609,0.3633012772],"action_prob":0.4753117561,"action_logp":-0.743784368,"action_dist_inputs":[-0.048794549,0.0500387177],"value_targets":82.7750091553} +{"eps_id":729290008,"obs":[0.0272676684,-0.3340015411,-0.0351862609,0.3633012772],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.020587638,-0.1383976191,-0.0279202349,0.0597344004],"action_prob":0.8342831135,"action_logp":-0.1811825037,"action_dist_inputs":[-0.806071341,0.8102202415],"value_targets":82.601020813} +{"eps_id":729290008,"obs":[0.020587638,-0.1383976191,-0.0279202349,0.0597344004],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0178196859,0.0571132936,-0.0267255474,-0.24162516],"action_prob":0.5062485337,"action_logp":-0.6807276011,"action_dist_inputs":[-0.0119892769,0.0130060045],"value_targets":82.4252700806} +{"eps_id":729290008,"obs":[0.0178196859,0.0571132936,-0.0267255474,-0.24162516],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0189619511,-0.1376169026,-0.0315580517,0.0425094552],"action_prob":0.8257846832,"action_logp":-0.1914212406,"action_dist_inputs":[0.776724875,-0.779317081],"value_targets":82.2477493286} +{"eps_id":729290008,"obs":[0.0189619511,-0.1376169026,-0.0315580517,0.0425094552],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0162096135,-0.3322724402,-0.0307078604,0.325070858],"action_prob":0.5327751637,"action_logp":-0.6296557784,"action_dist_inputs":[0.0660464317,-0.065242514],"value_targets":82.0684280396} +{"eps_id":729290008,"obs":[0.0162096135,-0.3322724402,-0.0307078604,0.325070858],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0095641641,-0.1367270201,-0.0242064446,0.0228643212],"action_prob":0.8086897135,"action_logp":-0.2123399675,"action_dist_inputs":[-0.7188379169,0.7226808071],"value_targets":81.8873062134} +{"eps_id":729290008,"obs":[0.0095641641,-0.1367270201,-0.0242064446,0.0228643212],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0068296236,-0.3314936161,-0.0237491578,0.3078126311],"action_prob":0.5490944982,"action_logp":-0.5994846821,"action_dist_inputs":[0.0987983495,-0.0982145071],"value_targets":81.7043457031} +{"eps_id":729290008,"obs":[0.0068296236,-0.3314936161,-0.0237491578,0.3078126311],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0001997513,-0.1360414624,-0.0175929051,0.0077354633],"action_prob":0.8020724058,"action_logp":-0.2205564231,"action_dist_inputs":[-0.6978026032,0.7014947534],"value_targets":81.5195465088} +{"eps_id":729290008,"obs":[0.0001997513,-0.1360414624,-0.0175929051,0.0077354633],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0025210779,0.0593283214,-0.0174381956,-0.290445894],"action_prob":0.440831095,"action_logp":-0.8190934658,"action_dist_inputs":[0.1190947443,-0.1186950803],"value_targets":81.3328704834} +{"eps_id":729290008,"obs":[-0.0025210779,0.0593283214,-0.0174381956,-0.290445894],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0013345113,-0.135540694,-0.0232471135,-0.0033134001],"action_prob":0.8468027115,"action_logp":-0.1662875414,"action_dist_inputs":[0.8532877564,-0.8564535975],"value_targets":81.144317627} +{"eps_id":729290008,"obs":[-0.0013345113,-0.135540694,-0.0232471135,-0.0033134001],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0040453253,0.0599068105,-0.0233133826,-0.3032394648],"action_prob":0.4095884264,"action_logp":-0.8926024437,"action_dist_inputs":[0.1829412282,-0.1827258021],"value_targets":80.9538574219} +{"eps_id":729290008,"obs":[-0.0040453253,0.0599068105,-0.0233133826,-0.3032394648],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0028471889,-0.1348752528,-0.029378172,-0.0179992765],"action_prob":0.8570227623,"action_logp":-0.1542908251,"action_dist_inputs":[0.893707633,-0.8970713615],"value_targets":80.76146698} +{"eps_id":729290008,"obs":[-0.0028471889,-0.1348752528,-0.029378172,-0.0179992765],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0055446941,-0.3295638561,-0.0297381561,0.2652716339],"action_prob":0.6268348098,"action_logp":-0.4670722485,"action_dist_inputs":[0.2593220174,-0.259339869],"value_targets":80.5671386719} +{"eps_id":729290008,"obs":[-0.0055446941,-0.3295638561,-0.0297381561,0.2652716339],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0121359713,-0.1340303421,-0.0244327243,-0.0366406478],"action_prob":0.7483295202,"action_logp":-0.2899118364,"action_dist_inputs":[-0.5432342887,0.5464886427],"value_targets":80.3708496094} +{"eps_id":729290008,"obs":[-0.0121359713,-0.1340303421,-0.0244327243,-0.0366406478],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0148165775,0.0614333004,-0.0251655374,-0.3369311094],"action_prob":0.3556132317,"action_logp":-1.0339115858,"action_dist_inputs":[0.2970966399,-0.2973588407],"value_targets":80.1725769043} +{"eps_id":729290008,"obs":[-0.0148165775,0.0614333004,-0.0251655374,-0.3369311094],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0135879116,-0.1333216578,-0.031904161,-0.052289065],"action_prob":0.8719281554,"action_logp":-0.1370482594,"action_dist_inputs":[0.9571565986,-0.9609586596],"value_targets":79.9722976685} +{"eps_id":729290008,"obs":[-0.0135879116,-0.1333216578,-0.031904161,-0.052289065],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.016254345,-0.3279719651,-0.0329499394,0.2301595658],"action_prob":0.6804933548,"action_logp":-0.3849372566,"action_dist_inputs":[0.377753973,-0.3782858253],"value_targets":79.7699966431} +{"eps_id":729290008,"obs":[-0.016254345,-0.3279719651,-0.0329499394,0.2301595658],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.022813784,-0.5226079226,-0.028346749,0.5122695565],"action_prob":0.3029740453,"action_logp":-1.1941081285,"action_dist_inputs":[-0.4151500762,0.4180254936],"value_targets":79.5656509399} +{"eps_id":729290008,"obs":[-0.022813784,-0.5226079226,-0.028346749,0.5122695565],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0332659446,-0.7173193693,-0.0181013588,0.7958865762],"action_prob":0.1098239273,"action_logp":-2.2088768482,"action_dist_inputs":[-1.043582201,1.04895854],"value_targets":79.3592453003} +{"eps_id":729290008,"obs":[-0.0332659446,-0.7173193693,-0.0181013588,0.7958865762],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0476123318,-0.5219537616,-0.0021836266,0.4975646138],"action_prob":0.9402947426,"action_logp":-0.0615619235,"action_dist_inputs":[-1.3747000694,1.3820725679],"value_targets":79.1507568359} +{"eps_id":729290008,"obs":[-0.0476123318,-0.5219537616,-0.0021836266,0.4975646138],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0580514073,-0.3268010914,0.0077676657,0.2041943222],"action_prob":0.8985615969,"action_logp":-0.1069600284,"action_dist_inputs":[-1.0880334377,1.0933095217],"value_targets":78.9401550293} +{"eps_id":729290008,"obs":[-0.0580514073,-0.3268010914,0.0077676657,0.2041943222],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0645874292,-0.131791085,0.0118515519,-0.0860281959],"action_prob":0.7386944294,"action_logp":-0.3028709292,"action_dist_inputs":[-0.5183138847,0.5208798647],"value_targets":78.727432251} +{"eps_id":729290008,"obs":[-0.0645874292,-0.131791085,0.0118515519,-0.0860281959],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0672232509,-0.3270809054,0.0101309884,0.2103702277],"action_prob":0.6417557597,"action_logp":-0.4435474575,"action_dist_inputs":[0.2910240889,-0.2919688821],"value_targets":78.5125579834} +{"eps_id":729290008,"obs":[-0.0672232509,-0.3270809054,0.0101309884,0.2103702277],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0737648681,-0.5223462582,0.0143383928,0.5062316656],"action_prob":0.2496716082,"action_logp":-1.3876087666,"action_dist_inputs":[-0.5489144325,0.5514499545],"value_targets":78.2955093384} +{"eps_id":729290008,"obs":[-0.0737648681,-0.5223462582,0.0143383928,0.5062316656],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0842117965,-0.327429235,0.0244630259,0.2181015462],"action_prob":0.9078150988,"action_logp":-0.096714586,"action_dist_inputs":[-1.1410044432,1.1462401152],"value_targets":78.0762710571} +{"eps_id":729290008,"obs":[-0.0842117965,-0.327429235,0.0244630259,0.2181015462],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.09076038,-0.1326653659,0.0288250577,-0.0667654276],"action_prob":0.7807427049,"action_logp":-0.2475096285,"action_dist_inputs":[-0.6337369084,0.6362627149],"value_targets":77.8548202515} +{"eps_id":729290008,"obs":[-0.09076038,-0.1326653659,0.0288250577,-0.0667654276],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0934136882,0.0620317124,0.0274897479,-0.3502163291],"action_prob":0.4277498424,"action_logp":-0.8492167592,"action_dist_inputs":[0.1451021582,-0.1459355354],"value_targets":77.6311340332} +{"eps_id":729290008,"obs":[-0.0934136882,0.0620317124,0.0274897479,-0.3502163291],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0921730474,-0.1334701926,0.0204854216,-0.0489933081],"action_prob":0.8465148807,"action_logp":-0.1666274667,"action_dist_inputs":[0.8516976237,-0.8558269143],"value_targets":77.4051818848} +{"eps_id":729290008,"obs":[-0.0921730474,-0.1334701926,0.0204854216,-0.0489933081],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0948424563,0.0613521226,0.0195055548,-0.3351431489],"action_prob":0.4388379157,"action_logp":-0.8236251473,"action_dist_inputs":[0.122565046,-0.1233145297],"value_targets":77.1769561768} +{"eps_id":729290008,"obs":[-0.0948424563,0.0613521226,0.0195055548,-0.3351431489],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0936154127,-0.1340419203,0.0128026921,-0.0363735855],"action_prob":0.8448029757,"action_logp":-0.168651849,"action_dist_inputs":[0.8451586962,-0.8492496014],"value_targets":76.9464187622} +{"eps_id":729290008,"obs":[-0.0936154127,-0.1340419203,0.0128026921,-0.0363735855],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0962962508,0.0608941205,0.0120752202,-0.3249897957],"action_prob":0.4427470267,"action_logp":-0.8147567511,"action_dist_inputs":[0.1146547347,-0.1153661013],"value_targets":76.7135543823} +{"eps_id":729290008,"obs":[-0.0962962508,0.0608941205,0.0120752202,-0.3249897957],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0950783715,-0.1343976557,0.005575425,-0.0285234451],"action_prob":0.8452848792,"action_logp":-0.1680815667,"action_dist_inputs":[0.8469999433,-0.8510884047],"value_targets":76.4783401489} +{"eps_id":729290008,"obs":[-0.0950783715,-0.1343976557,0.005575425,-0.0285234451],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0977663174,-0.3295991123,0.0050049559,0.2659133673],"action_prob":0.5604555011,"action_logp":-0.5790054798,"action_dist_inputs":[0.1211472452,-0.1218636185],"value_targets":76.2407455444} +{"eps_id":729290008,"obs":[-0.0977663174,-0.3295991123,0.0050049559,0.2659133673],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1043583006,-0.1345489621,0.0103232227,-0.0251867659],"action_prob":0.8001528978,"action_logp":-0.2229524106,"action_dist_inputs":[-0.69233495,0.6949152946],"value_targets":76.0007553101} +{"eps_id":729290008,"obs":[-0.1043583006,-0.1345489621,0.0103232227,-0.0251867659],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1070492864,-0.3298174143,0.009819488,0.2707352936],"action_prob":0.5427635908,"action_logp":-0.611081481,"action_dist_inputs":[0.0853707343,-0.086102441],"value_targets":75.7583389282} +{"eps_id":729290008,"obs":[-0.1070492864,-0.3298174143,0.009819488,0.2707352936],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1136456281,-0.1348369569,0.0152341938,-0.0188344158],"action_prob":0.8098294139,"action_logp":-0.2109316885,"action_dist_inputs":[-0.723172307,0.7257294655],"value_targets":75.5134735107} +{"eps_id":729290008,"obs":[-0.1136456281,-0.1348369569,0.0152341938,-0.0188344158],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1163423732,-0.3301740289,0.0148575054,0.2786158919],"action_prob":0.5191994309,"action_logp":-0.6554671526,"action_dist_inputs":[0.0380585715,-0.0387769565],"value_targets":75.26612854} +{"eps_id":729290008,"obs":[-0.1163423732,-0.3301740289,0.0148575054,0.2786158919],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1229458526,-0.5255047679,0.0204298235,0.5759475827],"action_prob":0.178688094,"action_logp":-1.7221134901,"action_dist_inputs":[-0.761351645,0.7639095187],"value_targets":75.0162963867} +{"eps_id":729290008,"obs":[-0.1229458526,-0.5255047679,0.0204298235,0.5759475827],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.133455947,-0.3306750655,0.0319487751,0.2897699475],"action_prob":0.9217243791,"action_logp":-0.0815090463,"action_dist_inputs":[-1.230329752,1.2356802225],"value_targets":74.7639312744} +{"eps_id":729290008,"obs":[-0.133455947,-0.3306750655,0.0319487751,0.2897699475],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1400694549,-0.1360228956,0.0377441719,0.00733206],"action_prob":0.8444324732,"action_logp":-0.1690905392,"action_dist_inputs":[-0.8444899917,0.8470945358],"value_targets":74.5090255737} +{"eps_id":729290008,"obs":[-0.1400694549,-0.1360228956,0.0377441719,0.00733206],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1427899003,0.0585379861,0.0378908142,-0.2732072771],"action_prob":0.581394136,"action_logp":-0.5423263311,"action_dist_inputs":[-0.16452609,0.1639731228],"value_targets":74.2515411377} +{"eps_id":729290008,"obs":[-0.1427899003,0.0585379861,0.0378908142,-0.2732072771],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1416191459,-0.1371035576,0.0324266702,0.0311816409],"action_prob":0.7778615355,"action_logp":-0.2512067556,"action_dist_inputs":[0.6247133613,-0.628534317],"value_targets":73.9914550781} +{"eps_id":729290008,"obs":[-0.1416191459,-0.1371035576,0.0324266702,0.0311816409],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1443612128,0.0575387403,0.0330503024,-0.2510965466],"action_prob":0.6090558767,"action_logp":-0.495845288,"action_dist_inputs":[-0.2218763977,0.2214689106],"value_targets":73.7287445068} +{"eps_id":729290008,"obs":[-0.1443612128,0.0575387403,0.0330503024,-0.2510965466],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1432104409,-0.1380392015,0.0280283708,0.0518251732],"action_prob":0.7620982528,"action_logp":-0.2716798186,"action_dist_inputs":[0.580264926,-0.5839528441],"value_targets":73.4633712769} +{"eps_id":729290008,"obs":[-0.1432104409,-0.1380392015,0.0280283708,0.0518251732],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1459712237,-0.3335515857,0.0290648751,0.3532178402],"action_prob":0.3675045967,"action_logp":-1.0010194778,"action_dist_inputs":[-0.2716115117,0.2713257372],"value_targets":73.1953277588} +{"eps_id":729290008,"obs":[-0.1459712237,-0.3335515857,0.0290648751,0.3532178402],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.152642265,-0.1388547421,0.0361292325,0.0698397756],"action_prob":0.8730247021,"action_logp":-0.1357914358,"action_dist_inputs":[-0.9625176787,0.9654531479],"value_targets":72.9245758057} +{"eps_id":729290008,"obs":[-0.152642265,-0.1388547421,0.0361292325,0.0698397756],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1554193497,0.055731114,0.0375260264,-0.2112288475],"action_prob":0.675650239,"action_logp":-0.392079711,"action_dist_inputs":[-0.3670115471,0.3668415248],"value_targets":72.6510848999} +{"eps_id":729290008,"obs":[-0.1554193497,0.055731114,0.0375260264,-0.2112288475],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1543047279,-0.1399067342,0.0333014503,0.0930513889],"action_prob":0.705665946,"action_logp":-0.3486133516,"action_dist_inputs":[0.4355074763,-0.4389190972],"value_targets":72.3748321533} +{"eps_id":729290008,"obs":[-0.1543047279,-0.1399067342,0.0333014503,0.0930513889],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.157102868,-0.3354897797,0.0351624787,0.3960521221],"action_prob":0.298279494,"action_logp":-1.2097243071,"action_dist_inputs":[-0.4277624786,0.4277416468],"value_targets":72.0957946777} +{"eps_id":729290008,"obs":[-0.157102868,-0.3354897797,0.0351624787,0.3960521221],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1638126671,-0.5310925245,0.0430835187,0.6996105313],"action_prob":0.10990154,"action_logp":-2.208170414,"action_dist_inputs":[-1.0442581177,1.0474890471],"value_targets":71.8139266968} +{"eps_id":729290008,"obs":[-0.1638126671,-0.5310925245,0.0430835187,0.6996105313],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1744345129,-0.3365935087,0.0570757315,0.4207953811],"action_prob":0.9358240366,"action_logp":-0.0663277954,"action_dist_inputs":[-1.3368109465,1.3429875374],"value_targets":71.5292205811} +{"eps_id":729290008,"obs":[-0.1744345129,-0.3365935087,0.0570757315,0.4207953811],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1811663806,-0.1423247457,0.0654916391,0.1466378719],"action_prob":0.9032681584,"action_logp":-0.101735808,"action_dist_inputs":[-1.1152846813,1.1187919378],"value_targets":71.2416381836} +{"eps_id":729290008,"obs":[-0.1811663806,-0.1423247457,0.0654916391,0.1466378719],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.184012875,0.0518011786,0.0684243962,-0.1246859804],"action_prob":0.7997140884,"action_logp":-0.2235010266,"action_dist_inputs":[-0.692042172,0.6924660802],"value_targets":70.9511489868} +{"eps_id":729290008,"obs":[-0.184012875,0.0518011786,0.0684243962,-0.1246859804],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1829768568,0.2458795458,0.065930672,-0.3950212002],"action_prob":0.5026191473,"action_logp":-0.6879225373,"action_dist_inputs":[-0.0066004619,0.0038763038],"value_targets":70.6577301025} +{"eps_id":729290008,"obs":[-0.1829768568,0.2458795458,0.065930672,-0.3950212002],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.178059265,0.0498870611,0.0580302514,-0.0823016465],"action_prob":0.8116219044,"action_logp":-0.2087207139,"action_dist_inputs":[0.7275664806,-0.7330169678],"value_targets":70.3613433838} +{"eps_id":729290008,"obs":[-0.178059265,0.0498870611,0.0580302514,-0.0823016465],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.177061528,0.2441312075,0.056384217,-0.3561260998],"action_prob":0.5501976609,"action_logp":-0.5974777341,"action_dist_inputs":[-0.1019345373,0.0995347574],"value_targets":70.061958313} +{"eps_id":729290008,"obs":[-0.177061528,0.2441312075,0.056384217,-0.3561260998],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1721788943,0.0482548326,0.0492616966,-0.0462102778],"action_prob":0.7888884544,"action_logp":-0.2371303737,"action_dist_inputs":[0.6565032601,-0.6617350578],"value_targets":69.7595596313} +{"eps_id":729290008,"obs":[-0.1721788943,0.0482548326,0.0492616966,-0.0462102778],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1712138057,0.2426370531,0.0483374894,-0.3229528368],"action_prob":0.5894253254,"action_logp":-0.5286072493,"action_dist_inputs":[-0.1818487644,0.1797414273],"value_targets":69.4540939331} +{"eps_id":729290008,"obs":[-0.1712138057,0.2426370531,0.0483374894,-0.3229528368],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1663610637,0.046861317,0.0418784358,-0.0154265296],"action_prob":0.7665637136,"action_logp":-0.2658374608,"action_dist_inputs":[0.5919883847,-0.5970203876],"value_targets":69.1455535889} +{"eps_id":729290008,"obs":[-0.1663610637,0.046861317,0.0418784358,-0.0154265296],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1654238254,0.2413584441,0.0415699035,-0.2946077585],"action_prob":0.6214162707,"action_logp":-0.4757540524,"action_dist_inputs":[-0.2487033904,0.2468606085],"value_targets":68.8338928223} +{"eps_id":729290008,"obs":[-0.1654238254,0.2413584441,0.0415699035,-0.2946077585],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1605966687,0.0456692465,0.0356777497,0.0108904773],"action_prob":0.7451785803,"action_logp":-0.2941313684,"action_dist_inputs":[0.5341060758,-0.5389549732],"value_targets":68.5190811157} +{"eps_id":729290008,"obs":[-0.1605966687,0.0456692465,0.0356777497,0.0108904773],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1596832722,-0.149945721,0.0358955562,0.3146132529],"action_prob":0.3525192738,"action_logp":-1.0426499844,"action_dist_inputs":[-0.304793179,0.3031906188],"value_targets":68.2010955811} +{"eps_id":729290008,"obs":[-0.1596832722,-0.149945721,0.0358955562,0.3146132529],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1626821905,0.0446470007,0.0421878211,0.0334632061],"action_prob":0.8676265478,"action_logp":-0.141993925,"action_dist_inputs":[-0.9391227961,0.9410116076],"value_targets":67.8798904419} +{"eps_id":729290008,"obs":[-0.1626821905,0.0446470007,0.0421878211,0.0334632061],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1617892534,-0.1510537565,0.0428570881,0.3391526937],"action_prob":0.311250627,"action_logp":-1.1671568155,"action_dist_inputs":[-0.3978163004,0.3964627385],"value_targets":67.5554504395} +{"eps_id":729290008,"obs":[-0.1617892534,-0.1510537565,0.0428570881,0.3391526937],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1648103297,0.0434330069,0.0496401414,0.060286548],"action_prob":0.8782598972,"action_logp":-0.1298127323,"action_dist_inputs":[-0.9869360328,0.989117682],"value_targets":67.227722168} +{"eps_id":729290008,"obs":[-0.1648103297,0.0434330069,0.0496401414,0.060286548],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1639416665,0.2378093749,0.0508458726,-0.2163306326],"action_prob":0.7303496599,"action_logp":-0.3142318428,"action_dist_inputs":[-0.4987196028,0.497677803],"value_targets":66.8966903687} +{"eps_id":729290008,"obs":[-0.1639416665,0.2378093749,0.0508458726,-0.2163306326],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1591854841,0.4321689606,0.0465192571,-0.4925509989],"action_prob":0.3845143914,"action_logp":-0.9557740688,"action_dist_inputs":[0.2330916077,-0.2373387218],"value_targets":66.5623168945} +{"eps_id":729290008,"obs":[-0.1591854841,0.4321689606,0.0465192571,-0.4925509989],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1505420953,0.2364227921,0.0366682373,-0.1855776608],"action_prob":0.8635457158,"action_logp":-0.1467084736,"action_dist_inputs":[0.9192508459,-0.9258064032],"value_targets":66.2245635986} +{"eps_id":729290008,"obs":[-0.1505420953,0.2364227921,0.0366682373,-0.1855776608],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1458136439,0.0407959111,0.0329566859,0.1184434593],"action_prob":0.5977417231,"action_logp":-0.5145965219,"action_dist_inputs":[0.1960345507,-0.2000299394],"value_targets":65.883392334} +{"eps_id":729290008,"obs":[-0.1458136439,0.0407959111,0.0329566859,0.1184434593],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1449977309,0.2354305387,0.0353255533,-0.1636624485],"action_prob":0.7649198771,"action_logp":-0.2679841518,"action_dist_inputs":[-0.5901372433,0.589707613],"value_targets":65.5387802124} +{"eps_id":729290008,"obs":[-0.1449977309,0.2354305387,0.0353255533,-0.1636624485],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1402891129,0.0398211703,0.0320523046,0.1399521083],"action_prob":0.5648267865,"action_logp":-0.5712361336,"action_dist_inputs":[0.1284990013,-0.1322762221],"value_targets":65.1906890869} +{"eps_id":729290008,"obs":[-0.1402891129,0.0398211703,0.0320523046,0.1399521083],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1394926906,0.2344697416,0.0348513462,-0.1424490213],"action_prob":0.7817189097,"action_logp":-0.2462600172,"action_dist_inputs":[-0.6379350424,0.6377767324],"value_targets":64.8390808105} +{"eps_id":729290008,"obs":[-0.1394926906,0.2344697416,0.0348513462,-0.1424490213],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1348032951,0.0388664454,0.0320023671,0.1610219777],"action_prob":0.5307027102,"action_logp":-0.6335532665,"action_dist_inputs":[0.0597093292,-0.0632561669],"value_targets":64.4839172363} +{"eps_id":729290008,"obs":[-0.1348032951,0.0388664454,0.0320023671,0.1610219777],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1340259761,0.2335159779,0.035222806,-0.121395722],"action_prob":0.797254622,"action_logp":-0.2265811563,"action_dist_inputs":[-0.6845515966,0.6846718788],"value_targets":64.1251678467} +{"eps_id":729290008,"obs":[-0.1340259761,0.2335159779,0.035222806,-0.121395722],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1293556541,0.037907552,0.0327948928,0.1821882874],"action_prob":0.4950544834,"action_logp":-0.7030874491,"action_dist_inputs":[-0.0115405107,0.0082421331],"value_targets":63.7627983093} +{"eps_id":729290008,"obs":[-0.1293556541,0.037907552,0.0327948928,0.1821882874],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1285974979,0.2325452715,0.0364386588,-0.0999713242],"action_prob":0.8118222356,"action_logp":-0.2084738761,"action_dist_inputs":[-0.7307419181,0.7311527133],"value_targets":63.3967666626} +{"eps_id":729290008,"obs":[-0.1285974979,0.2325452715,0.0364386588,-0.0999713242],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1239465922,0.4271265566,0.0344392322,-0.3809389174],"action_prob":0.542307198,"action_logp":-0.6119226813,"action_dist_inputs":[-0.0863297582,0.0833046287],"value_targets":63.0270347595} +{"eps_id":729290008,"obs":[-0.1239465922,0.4271265566,0.0344392322,-0.3809389174],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.115404062,0.2315329313,0.0268204529,-0.0775993168],"action_prob":0.7928469777,"action_logp":-0.2321250439,"action_dist_inputs":[0.6681653261,-0.6740071177],"value_targets":62.6535720825} +{"eps_id":729290008,"obs":[-0.115404062,0.2315329313,0.0268204529,-0.0775993168],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1107734069,0.4262603223,0.0252684671,-0.3617011011],"action_prob":0.5570992827,"action_logp":-0.5850118399,"action_dist_inputs":[-0.1160806865,0.1133170798],"value_targets":62.2763366699} +{"eps_id":729290008,"obs":[-0.1107734069,0.4262603223,0.0252684671,-0.3617011011],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1022481993,0.6210141778,0.0180344451,-0.6463104486],"action_prob":0.2127073109,"action_logp":-1.5478382111,"action_dist_inputs":[0.6515004039,-0.6571824551],"value_targets":61.8952865601} +{"eps_id":729290008,"obs":[-0.1022481993,0.6210141778,0.0180344451,-0.6463104486],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0898279101,0.4256456494,0.0051082354,-0.3480034769],"action_prob":0.9171537161,"action_logp":-0.0864801854,"action_dist_inputs":[1.1983855963,-1.2059031725],"value_targets":61.5103912354} +{"eps_id":729290008,"obs":[-0.0898279101,0.4256456494,0.0051082354,-0.3480034769],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0813150033,0.230451405,-0.0018518341,-0.0537141412],"action_prob":0.8028511405,"action_logp":-0.2195859402,"action_dist_inputs":[0.69933635,-0.7048737407],"value_targets":61.1216087341} +{"eps_id":729290008,"obs":[-0.0813150033,0.230451405,-0.0018518341,-0.0537141412],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0767059699,0.0353560559,-0.0029261168,0.2383839488],"action_prob":0.4673415124,"action_logp":-0.7606949806,"action_dist_inputs":[-0.0666422397,0.0641780347],"value_targets":60.7288970947} +{"eps_id":729290008,"obs":[-0.0767059699,0.0353560559,-0.0029261168,0.2383839488],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0759988502,-0.1597239673,0.001841562,0.5301424265],"action_prob":0.1803165674,"action_logp":-1.7130413055,"action_dist_inputs":[-0.7565276027,0.7576767206],"value_targets":60.3322181702} +{"eps_id":729290008,"obs":[-0.0759988502,-0.1597239673,0.001841562,0.5301424265],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0791933313,0.0353720263,0.0124444114,0.2380403727],"action_prob":0.908413291,"action_logp":-0.0960558504,"action_dist_inputs":[-1.1450725794,1.1493405104],"value_targets":59.9315338135} +{"eps_id":729290008,"obs":[-0.0791933313,0.0353720263,0.0124444114,0.2380403727],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0784858912,0.2303140014,0.0172052179,-0.0506913885],"action_prob":0.8312113881,"action_logp":-0.1848711073,"action_dist_inputs":[-0.7964693904,0.7977679968],"value_targets":59.526802063} +{"eps_id":729290008,"obs":[-0.0784858912,0.2303140014,0.0172052179,-0.0506913885],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.073879607,0.4251850843,0.0161913913,-0.3378966451],"action_prob":0.578749001,"action_logp":-0.5468863845,"action_dist_inputs":[-0.1599199474,0.1577201188],"value_targets":59.117980957} +{"eps_id":729290008,"obs":[-0.073879607,0.4251850843,0.0161913913,-0.3378966451],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0653759092,0.2298365086,0.0094334576,-0.0401521474],"action_prob":0.7780124545,"action_logp":-0.2510127425,"action_dist_inputs":[0.6244246364,-0.6296967268],"value_targets":58.7050323486} +{"eps_id":729290008,"obs":[-0.0653759092,0.2298365086,0.0094334576,-0.0401521474],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0607791804,0.0345805623,0.0086304145,0.255492121],"action_prob":0.4213181138,"action_logp":-0.8643671274,"action_dist_inputs":[-0.159699738,0.1576651037],"value_targets":58.2879104614} +{"eps_id":729290008,"obs":[-0.0607791804,0.0345805623,0.0086304145,0.255492121],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0600875691,0.2295782268,0.0137402574,-0.0344561674],"action_prob":0.8386337161,"action_logp":-0.1759812683,"action_dist_inputs":[-0.8232334256,0.8248639107],"value_targets":57.8665771484} +{"eps_id":729290008,"obs":[-0.0600875691,0.2295782268,0.0137402574,-0.0344561674],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0554960035,0.034261968,0.0130511336,0.2625300884],"action_prob":0.4036249816,"action_logp":-0.9072691202,"action_dist_inputs":[-0.1961260289,0.1942574829],"value_targets":57.4409866333} +{"eps_id":729290008,"obs":[-0.0554960035,0.034261968,0.0130511336,0.2625300884],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0548107624,0.2291952074,0.0183017366,-0.0260079596],"action_prob":0.8451824188,"action_logp":-0.1682027876,"action_dist_inputs":[-0.8477473855,0.8495575786],"value_targets":57.0110969543} +{"eps_id":729290008,"obs":[-0.0548107624,0.2291952074,0.0183017366,-0.0260079596],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0502268597,0.4240499735,0.0177815761,-0.3128607571],"action_prob":0.6180712581,"action_logp":-0.4811514914,"action_dist_inputs":[-0.2415178716,0.2398519516],"value_targets":56.5768661499} +{"eps_id":729290008,"obs":[-0.0502268597,0.4240499735,0.0177815761,-0.3128607571],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0417458601,0.2286792994,0.0115243616,-0.0146235395],"action_prob":0.7484966516,"action_logp":-0.2896885276,"action_dist_inputs":[0.5428574681,-0.5477529168],"value_targets":56.1382484436} +{"eps_id":729290008,"obs":[-0.0417458601,0.2286792994,0.0115243616,-0.0146235395],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0371722728,0.0333939828,0.0112318909,0.2816730738],"action_prob":0.3787301183,"action_logp":-0.9709314108,"action_dist_inputs":[-0.2482100874,0.2467315495],"value_targets":55.6952018738} +{"eps_id":729290008,"obs":[-0.0371722728,0.0333939828,0.0112318909,0.2816730738],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0365043953,0.2283539325,0.0168653522,-0.0074463119],"action_prob":0.8534913659,"action_logp":-0.1584198773,"action_dist_inputs":[-0.8800379634,0.8822128773],"value_targets":55.2476768494} +{"eps_id":729290008,"obs":[-0.0365043953,0.2283539325,0.0168653522,-0.0074463119],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0319373161,0.4232299924,0.0167164262,-0.2947606742],"action_prob":0.6416886449,"action_logp":-0.4436520338,"action_dist_inputs":[-0.2919890583,0.2907119393],"value_targets":54.7956352234} +{"eps_id":729290008,"obs":[-0.0319373161,0.4232299924,0.0167164262,-0.2947606742],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0234727152,0.6181097031,0.0108212121,-0.5821249485],"action_prob":0.271037966,"action_logp":-1.305496335,"action_dist_inputs":[0.4923749864,-0.4969877601],"value_targets":54.3390235901} +{"eps_id":729290008,"obs":[-0.0234727152,0.6181097031,0.0108212121,-0.5821249485],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0111105219,0.4228378236,-0.0008212868,-0.2860529125],"action_prob":0.9013690352,"action_logp":-0.1038405299,"action_dist_inputs":[1.1028859615,-1.1096436977],"value_targets":53.8778038025} +{"eps_id":729290008,"obs":[-0.0111105219,0.4228378236,-0.0008212868,-0.2860529125],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0026537653,0.6179714799,-0.006542345,-0.578994751],"action_prob":0.2504369318,"action_logp":-1.3845481873,"action_dist_inputs":[0.5459031463,-0.5503802299],"value_targets":53.4119224548} +{"eps_id":729290008,"obs":[-0.0026537653,0.6179714799,-0.006542345,-0.578994751],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0097056637,0.8131844997,-0.0181222409,-0.8737314343],"action_prob":0.091304481,"action_logp":-2.3935554028,"action_dist_inputs":[1.1455594301,-1.1522506475],"value_targets":52.9413375854} +{"eps_id":729290008,"obs":[0.0097056637,0.8131844997,-0.0181222409,-0.8737314343],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0259693544,0.6183135509,-0.0355968699,-0.5868006945],"action_prob":0.9472228885,"action_logp":-0.0542208254,"action_dist_inputs":[1.4395128489,-1.4479448795],"value_targets":52.4659957886} +{"eps_id":729290008,"obs":[0.0259693544,0.6183135509,-0.0355968699,-0.5868006945],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0383356251,0.4237077832,-0.0473328829,-0.3055402339],"action_prob":0.9215310812,"action_logp":-0.0817187801,"action_dist_inputs":[1.22831285,-1.2350206375],"value_targets":51.9858551025} +{"eps_id":729290008,"obs":[0.0383356251,0.4237077832,-0.0473328829,-0.3055402339],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0468097813,0.2292911708,-0.0534436889,-0.0281526111],"action_prob":0.8333895206,"action_logp":-0.1822541356,"action_dist_inputs":[0.8027051091,-0.8071375489],"value_targets":51.5008621216} +{"eps_id":729290008,"obs":[0.0468097813,0.2292911708,-0.0534436889,-0.0281526111],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0513956025,0.425137192,-0.0540067405,-0.3372071683],"action_prob":0.4491909146,"action_logp":-0.8003072739,"action_dist_inputs":[0.1012786925,-0.1026616618],"value_targets":51.0109710693} +{"eps_id":729290008,"obs":[0.0513956025,0.425137192,-0.0540067405,-0.3372071683],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0598983467,0.2308237106,-0.0607508831,-0.0620321818],"action_prob":0.8606048226,"action_logp":-0.1501198262,"action_dist_inputs":[0.9078864455,-0.9124360681],"value_targets":50.5161323547} +{"eps_id":729290008,"obs":[0.0598983467,0.2308237106,-0.0607508831,-0.0620321818],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0645148233,0.0366230309,-0.0619915277,0.2108816803],"action_prob":0.6266179681,"action_logp":-0.4674182236,"action_dist_inputs":[0.2580554187,-0.2596794665],"value_targets":50.0162963867} +{"eps_id":729290008,"obs":[0.0645148233,0.0366230309,-0.0619915277,0.2108816803],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0652472824,0.2325740308,-0.0577738956,-0.1006940678],"action_prob":0.7353271246,"action_logp":-0.3074398339,"action_dist_inputs":[-0.5100435615,0.5117771626],"value_targets":49.5114097595} +{"eps_id":729290008,"obs":[0.0652472824,0.2325740308,-0.0577738956,-0.1006940678],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0698987618,0.4284743369,-0.0597877763,-0.4110303223],"action_prob":0.3187496364,"action_logp":-1.1433492899,"action_dist_inputs":[0.3788253367,-0.3806985021],"value_targets":49.0014266968} +{"eps_id":729290008,"obs":[0.0698987618,0.4284743369,-0.0597877763,-0.4110303223],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0784682482,0.2342486829,-0.0680083856,-0.1377795339],"action_prob":0.8963563442,"action_logp":-0.1094172671,"action_dist_inputs":[1.0762387514,-1.0811401606],"value_targets":48.486289978} +{"eps_id":729290008,"obs":[0.0784682482,0.2342486829,-0.0680083856,-0.1377795339],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0831532255,0.0401633941,-0.0707639754,0.1326963603],"action_prob":0.7494955659,"action_logp":-0.2883548439,"action_dist_inputs":[0.5469031334,-0.549020648],"value_targets":47.9659461975} +{"eps_id":729290008,"obs":[0.0831532255,0.0401633941,-0.0707639754,0.1326963603],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0839564949,-0.1538773179,-0.0681100488,0.4022417367],"action_prob":0.3923081756,"action_logp":-0.9357075691,"action_dist_inputs":[-0.2182547599,0.2193653882],"value_targets":47.4403495789} +{"eps_id":729290008,"obs":[0.0839564949,-0.1538773179,-0.0681100488,0.4022417367],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0808789432,0.0421412364,-0.0600652099,0.0888867527],"action_prob":0.8551053405,"action_logp":-0.1565306336,"action_dist_inputs":[-0.8856019378,0.8896154165],"value_targets":46.9094467163} +{"eps_id":729290008,"obs":[0.0808789432,0.0421412364,-0.0600652099,0.0888867527],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0817217678,0.2380704135,-0.0582874753,-0.2221255749],"action_prob":0.5576927066,"action_logp":-0.5839471221,"action_dist_inputs":[-0.1155082583,0.1162950397],"value_targets":46.3731765747} +{"eps_id":729290008,"obs":[0.0817217678,0.2380704135,-0.0582874753,-0.2221255749],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0864831805,0.4339749515,-0.0627299845,-0.532610178],"action_prob":0.1820837706,"action_logp":-1.7032884359,"action_dist_inputs":[0.7498115897,-0.7524815202],"value_targets":45.8314933777} +{"eps_id":729290008,"obs":[0.0864831805,0.4339749515,-0.0627299845,-0.532610178],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0951626748,0.2397887707,-0.0733821914,-0.2603343129],"action_prob":0.9232218862,"action_logp":-0.0798857063,"action_dist_inputs":[1.2406442165,-1.2463052273],"value_targets":45.2843360901} +{"eps_id":729290008,"obs":[0.0951626748,0.2397887707,-0.0733821914,-0.2603343129],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0999584571,0.0457869135,-0.0785888806,0.0083302958],"action_prob":0.8554369211,"action_logp":-0.1561428905,"action_dist_inputs":[0.8874592185,-0.8904376626],"value_targets":44.7316513062} +{"eps_id":729290008,"obs":[0.0999584571,0.0457869135,-0.0785888806,0.0083302958],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.100874193,0.2419428378,-0.0784222707,-0.3080767989],"action_prob":0.3723412454,"action_logp":-0.9879444838,"action_dist_inputs":[0.2611508965,-0.2610348165],"value_targets":44.1733856201} +{"eps_id":729290008,"obs":[0.100874193,0.2419428378,-0.0784222707,-0.3080767989],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.1057130471,0.0480208062,-0.084583804,-0.0411202759],"action_prob":0.8791412115,"action_logp":-0.1288097501,"action_dist_inputs":[0.9904723763,-0.9938501716],"value_targets":43.6094818115} +{"eps_id":729290008,"obs":[0.1057130471,0.0480208062,-0.084583804,-0.0411202759],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.1066734642,-0.1457927823,-0.085406214,0.2237227708],"action_prob":0.7103279829,"action_logp":-0.3420284986,"action_dist_inputs":[0.4483267963,-0.4486505687],"value_targets":43.0398788452} +{"eps_id":729290008,"obs":[0.1066734642,-0.1457927823,-0.085406214,0.2237227708],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.1037576124,-0.339596808,-0.0809317604,0.4882903099],"action_prob":0.3519662917,"action_logp":-1.0442198515,"action_dist_inputs":[-0.3038053811,0.306601882],"value_targets":42.4645233154} +{"eps_id":729290008,"obs":[0.1037576124,-0.339596808,-0.0809317604,0.4882903099],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0969656706,-0.1434318572,-0.0711659491,0.171238035],"action_prob":0.869491756,"action_logp":-0.1398463994,"action_dist_inputs":[-0.945615232,0.9508576989],"value_targets":41.8833580017} +{"eps_id":729290008,"obs":[0.0969656706,-0.1434318572,-0.0711659491,0.171238035],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0940970331,0.0526327007,-0.0677411929,-0.1430195272],"action_prob":0.5942882299,"action_logp":-0.5203908086,"action_dist_inputs":[-0.189658016,0.192063421],"value_targets":41.2963218689} +{"eps_id":729290008,"obs":[0.0940970331,0.0526327007,-0.0677411929,-0.1430195272],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0951496884,-0.1414569914,-0.0706015825,0.1275466084],"action_prob":0.7921116352,"action_logp":-0.2330529392,"action_dist_inputs":[0.6682235599,-0.6694773436],"value_targets":40.7033538818} +{"eps_id":729290008,"obs":[0.0951496884,-0.1414569914,-0.0706015825,0.1275466084],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0923205465,0.0546016209,-0.0680506453,-0.1865482926],"action_prob":0.5197191834,"action_logp":-0.6544666886,"action_dist_inputs":[-0.0384445302,0.0404730961],"value_targets":40.1044006348} +{"eps_id":729290008,"obs":[0.0923205465,0.0546016209,-0.0680506453,-0.1865482926],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0934125856,-0.1394840479,-0.0717816129,0.0839149132],"action_prob":0.8235014081,"action_logp":-0.1941899955,"action_dist_inputs":[0.7692726851,-0.7709800005],"value_targets":39.4993934631} +{"eps_id":729290008,"obs":[0.0934125856,-0.1394840479,-0.0717816129,0.0839149132],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0906229019,-0.3335075378,-0.0701033175,0.3531150222],"action_prob":0.5581100583,"action_logp":-0.5831990838,"action_dist_inputs":[0.1175484359,-0.1159468889],"value_targets":38.8882751465} +{"eps_id":729290008,"obs":[0.0906229019,-0.3335075378,-0.0701033175,0.3531150222],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0839527473,-0.5275662541,-0.0630410165,0.6228945851],"action_prob":0.2202277333,"action_logp":-1.5130931139,"action_dist_inputs":[-0.6299344897,0.6344051957],"value_targets":38.2709846497} +{"eps_id":729290008,"obs":[0.0839527473,-0.5275662541,-0.0630410165,0.6228945851],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0734014288,-0.3316233456,-0.0505831242,0.3110423088],"action_prob":0.9106083512,"action_logp":-0.0936423764,"action_dist_inputs":[-1.1572892666,1.1637969017],"value_targets":37.6474609375} +{"eps_id":729290008,"obs":[0.0734014288,-0.3316233456,-0.0505831242,0.3110423088],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0667689592,-0.1358185858,-0.0443622768,0.0028457399],"action_prob":0.7644540668,"action_logp":-0.2685933709,"action_dist_inputs":[-0.5865336061,0.5907223821],"value_targets":37.0176353455} +{"eps_id":729290008,"obs":[0.0667689592,-0.1358185858,-0.0443622768,0.0028457399],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0640525892,-0.3302771747,-0.0443053618,0.281208396],"action_prob":0.6274245977,"action_logp":-0.4661317766,"action_dist_inputs":[0.2609817088,-0.260202229],"value_targets":36.3814506531} +{"eps_id":729290008,"obs":[0.0640525892,-0.3302771747,-0.0443053618,0.281208396],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0574470423,-0.1345521659,-0.0386811942,-0.0251125246],"action_prob":0.7397283316,"action_logp":-0.3014722764,"action_dist_inputs":[-0.5203004479,0.5242563486],"value_targets":35.7388381958} +{"eps_id":729290008,"obs":[0.0574470423,-0.1345521659,-0.0386811942,-0.0251125246],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0547560006,-0.3290986419,-0.0391834453,0.2551195025],"action_prob":0.6564594507,"action_logp":-0.4208943248,"action_dist_inputs":[0.323998332,-0.3235573471],"value_targets":35.0897369385} +{"eps_id":729290008,"obs":[0.0547560006,-0.3290986419,-0.0391834453,0.2551195025],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0481740274,-0.1334397942,-0.0340810567,-0.0496605188],"action_prob":0.7154658437,"action_logp":-0.3348214328,"action_dist_inputs":[-0.4591761827,0.4629043341],"value_targets":34.4340782166} +{"eps_id":729290008,"obs":[0.0481740274,-0.1334397942,-0.0340810567,-0.0496605188],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0455052294,0.0621538572,-0.0350742675,-0.3528985083],"action_prob":0.3191425204,"action_logp":-1.1421175003,"action_dist_inputs":[0.3789187372,-0.378796488],"value_targets":33.7717971802} +{"eps_id":729290008,"obs":[0.0455052294,0.0621538572,-0.0350742675,-0.3528985083],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0467483066,-0.1324522346,-0.0421322361,-0.0714786127],"action_prob":0.8771927953,"action_logp":-0.1310284585,"action_dist_inputs":[0.9813277721,-0.9847836494],"value_targets":33.1028251648} +{"eps_id":729290008,"obs":[0.0467483066,-0.1324522346,-0.0421322361,-0.0714786127],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0440992638,-0.3269456327,-0.0435618088,0.2076193541],"action_prob":0.7198730707,"action_logp":-0.3286803961,"action_dist_inputs":[0.4717989266,-0.472033143],"value_targets":32.4270935059} +{"eps_id":729290008,"obs":[0.0440992638,-0.3269456327,-0.0435618088,0.2076193541],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0375603512,-0.1312287152,-0.0394094214,-0.0984806269],"action_prob":0.6370908618,"action_logp":-0.4508429766,"action_dist_inputs":[-0.2797630429,0.2829968631],"value_targets":31.7445411682} +{"eps_id":729290008,"obs":[0.0375603512,-0.1312287152,-0.0394094214,-0.0984806269],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0349357761,-0.3257643282,-0.0413790345,0.1815128624],"action_prob":0.744156301,"action_logp":-0.2955042124,"action_dist_inputs":[0.5335365534,-0.5341477394],"value_targets":31.0550918579} +{"eps_id":729290008,"obs":[0.0349357761,-0.3257643282,-0.0413790345,0.1815128624],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0284204893,-0.1300754696,-0.0377487764,-0.1239311472],"action_prob":0.600320816,"action_logp":-0.5102910995,"action_dist_inputs":[-0.2019327283,0.2048692852],"value_targets":30.3586788177} +{"eps_id":729290008,"obs":[0.0284204893,-0.1300754696,-0.0377487764,-0.1239311472],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0258189812,-0.3246368468,-0.0402273983,0.1566075087],"action_prob":0.7656164169,"action_logp":-0.2670739889,"action_dist_inputs":[0.5913698077,-0.5923526883],"value_targets":29.6552295685} +{"eps_id":729290008,"obs":[0.0258189812,-0.3246368468,-0.0402273983,0.1566075087],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0193262435,-0.5191604495,-0.0370952487,0.4363331795],"action_prob":0.4378557205,"action_logp":-0.8258658051,"action_dist_inputs":[-0.1236236393,0.1262453943],"value_targets":28.9446773529} +{"eps_id":729290008,"obs":[0.0193262435,-0.5191604495,-0.0370952487,0.4363331795],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0089430343,-0.3235335648,-0.0283685867,0.132191211],"action_prob":0.8428407907,"action_logp":-0.1709772199,"action_dist_inputs":[-0.8370915651,0.8424271941],"value_targets":28.2269458771} +{"eps_id":729290008,"obs":[0.0089430343,-0.3235335648,-0.0283685867,0.132191211],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0024723636,-0.1280169636,-0.0257247612,-0.1693049073],"action_prob":0.5493653417,"action_logp":-0.5989916325,"action_dist_inputs":[-0.0978904814,0.1002161801],"value_targets":27.5019664764} +{"eps_id":729290008,"obs":[0.0024723636,-0.1280169636,-0.0257247612,-0.1693049073],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0000879759,-0.3227614462,-0.0291108601,0.1151529104],"action_prob":0.7892793417,"action_logp":-0.2366349399,"action_dist_inputs":[0.659478724,-0.6611085534],"value_targets":26.7696628571} +{"eps_id":729290008,"obs":[-0.0000879759,-0.3227614462,-0.0291108601,0.1151529104],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0065432047,-0.5174544454,-0.0268078018,0.3985112906],"action_prob":0.4803184271,"action_logp":-0.7333059907,"action_dist_inputs":[-0.038359914,0.0404071026],"value_targets":26.0299625397} +{"eps_id":729290008,"obs":[-0.0065432047,-0.5174544454,-0.0268078018,0.3985112906],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0168922935,-0.3219626546,-0.0188375749,0.0974984095],"action_prob":0.8259503841,"action_logp":-0.1912205964,"action_dist_inputs":[-0.7761052847,0.781088829],"value_targets":25.2827911377} +{"eps_id":729290008,"obs":[-0.0168922935,-0.3219626546,-0.0188375749,0.0974984095],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0233315472,-0.1265758425,-0.0168876071,-0.2010677159],"action_prob":0.5146704316,"action_logp":-0.6642285585,"action_dist_inputs":[-0.0284512658,0.0302472748],"value_targets":24.5280704498} +{"eps_id":729290008,"obs":[-0.0233315472,-0.1265758425,-0.0168876071,-0.2010677159],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0258630645,-0.32145226,-0.0209089611,0.0862404928],"action_prob":0.8037062883,"action_logp":-0.2185213864,"action_dist_inputs":[0.703738451,-0.7058832645],"value_targets":23.7657279968} +{"eps_id":729290008,"obs":[-0.0258630645,-0.32145226,-0.0209089611,0.0862404928],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.032292109,-0.5162683725,-0.0191841517,0.3722539842],"action_prob":0.5081301928,"action_logp":-0.6770176291,"action_dist_inputs":[0.0170396771,-0.015483805],"value_targets":22.9956855774} +{"eps_id":729290008,"obs":[-0.032292109,-0.5162683725,-0.0191841517,0.3722539842],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0426174738,-0.7111125588,-0.0117390724,0.6588267684],"action_prob":0.1863583922,"action_logp":-1.6800836325,"action_dist_inputs":[-0.7345958352,0.7392524481],"value_targets":22.2178649902} +{"eps_id":729290008,"obs":[-0.0426174738,-0.7111125588,-0.0117390724,0.6588267684],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0568397269,-0.5158292055,0.0014374629,0.3624707162],"action_prob":0.9217783809,"action_logp":-0.0814504772,"action_dist_inputs":[-1.2300155163,1.2367432117],"value_targets":21.4321861267} +{"eps_id":729290008,"obs":[-0.0568397269,-0.5158292055,0.0014374629,0.3624707162],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0671563148,-0.3207277358,0.0086868769,0.0702413842],"action_prob":0.8299075961,"action_logp":-0.1864409149,"action_dist_inputs":[-0.7902329564,0.7947397828],"value_targets":20.6385707855} +{"eps_id":729290008,"obs":[-0.0671563148,-0.3207277358,0.0086868769,0.0702413842],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0735708699,-0.1257314086,0.0100917043,-0.2196881771],"action_prob":0.5358824134,"action_logp":-0.6238405108,"action_dist_inputs":[-0.0712457448,0.0725310147],"value_targets":19.8369407654} +{"eps_id":729290008,"obs":[-0.0735708699,-0.1257314086,0.0100917043,-0.2196881771],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.076085493,-0.3209961355,0.0056979409,0.0761609226],"action_prob":0.7914299369,"action_logp":-0.2339139283,"action_dist_inputs":[0.665518105,-0.668048203],"value_targets":19.0272140503} +{"eps_id":729290008,"obs":[-0.076085493,-0.3209961355,0.0056979409,0.0761609226],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0825054199,-0.1259563416,0.0072211595,-0.2147188485],"action_prob":0.5398208499,"action_logp":-0.6165179014,"action_dist_inputs":[-0.0791975707,0.0804240406],"value_targets":18.2093067169} +{"eps_id":729290008,"obs":[-0.0825054199,-0.1259563416,0.0072211595,-0.2147188485],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0850245431,0.0690616369,0.0029267825,-0.5051151514],"action_prob":0.2090349197,"action_logp":-1.565253973,"action_dist_inputs":[0.6640786529,-0.6666738391],"value_targets":17.3831367493} +{"eps_id":729290008,"obs":[-0.0850245431,0.0690616369,0.0029267825,-0.5051151514],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0836433098,-0.1261014342,-0.007175521,-0.2115113437],"action_prob":0.9048010707,"action_logp":-0.1000401676,"action_dist_inputs":[1.1230973005,-1.1286486387],"value_targets":16.5486240387} +{"eps_id":729290008,"obs":[-0.0836433098,-0.1261014342,-0.007175521,-0.2115113437],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0861653388,-0.3211200535,-0.0114057483,0.078899473],"action_prob":0.8035809994,"action_logp":-0.2186772674,"action_dist_inputs":[0.7030459046,-0.7057823539],"value_targets":15.7056808472} +{"eps_id":729290008,"obs":[-0.0861653388,-0.3211200535,-0.0114057483,0.078899473],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0925877392,-0.516076684,-0.0098277591,0.3679621518],"action_prob":0.493873179,"action_logp":-0.7054765224,"action_dist_inputs":[-0.0117685068,0.0127399601],"value_targets":14.8542232513} +{"eps_id":729290008,"obs":[-0.0925877392,-0.516076684,-0.0098277591,0.3679621518],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1029092744,-0.7110576034,-0.0024685159,0.657530129],"action_prob":0.1782754362,"action_logp":-1.7244255543,"action_dist_inputs":[-0.7619708776,0.7661046982],"value_targets":13.9941644669} +{"eps_id":729290008,"obs":[-0.1029092744,-0.7110576034,-0.0024685159,0.657530129],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1171304286,-0.5159013867,0.0106820865,0.3640709221],"action_prob":0.9226017594,"action_logp":-0.0805575922,"action_dist_inputs":[-1.2359275818,1.2423056364],"value_targets":13.125418663} +{"eps_id":729290008,"obs":[-0.1171304286,-0.5159013867,0.0106820865,0.3640709221],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1274484545,-0.3209328651,0.0179635044,0.0747753009],"action_prob":0.8402147889,"action_logp":-0.1740977019,"action_dist_inputs":[-0.8279025555,0.8319245577],"value_targets":12.2478981018} +{"eps_id":729290008,"obs":[-0.1274484545,-0.3209328651,0.0179635044,0.0747753009],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1338671148,-0.516307652,0.019459011,0.3730712235],"action_prob":0.4296994805,"action_logp":-0.8446691632,"action_dist_inputs":[-0.1411266625,0.1419507116],"value_targets":11.3615131378} +{"eps_id":729290008,"obs":[-0.1338671148,-0.516307652,0.019459011,0.3730712235],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1441932619,-0.3214674592,0.0269204341,0.0865868405],"action_prob":0.8535526395,"action_logp":-0.1583480388,"action_dist_inputs":[-0.8793795705,0.8833619356],"value_targets":10.4661741257} +{"eps_id":729290008,"obs":[-0.1441932619,-0.3214674592,0.0269204341,0.0865868405],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1506226212,-0.5169647336,0.0286521725,0.3876401484],"action_prob":0.3889436722,"action_logp":-0.9443207383,"action_dist_inputs":[-0.2254469544,0.2263076454],"value_targets":9.5617923737} +{"eps_id":729290008,"obs":[-0.1506226212,-0.5169647336,0.0286521725,0.3876401484],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1609619111,-0.3222609758,0.0364049748,0.1041269675],"action_prob":0.8680859804,"action_logp":-0.1414645463,"action_dist_inputs":[-0.9400782585,0.9440619349],"value_targets":8.6482753754} +{"eps_id":729290008,"obs":[-0.1609619111,-0.3222609758,0.0364049748,0.1041269675],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1674071252,-0.1276791543,0.0384875126,-0.1768516898],"action_prob":0.6592230797,"action_logp":-0.4166933298,"action_dist_inputs":[-0.329446733,0.3303871155],"value_targets":7.7255306244} +{"eps_id":729290008,"obs":[-0.1674071252,-0.1276791543,0.0384875126,-0.1768516898],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1699607074,-0.323330164,0.0349504799,0.1277200133],"action_prob":0.7079021335,"action_logp":-0.345449388,"action_dist_inputs":[0.4413000345,-0.4439169168],"value_targets":6.7934651375} +{"eps_id":729290008,"obs":[-0.1699607074,-0.323330164,0.0349504799,0.1277200133],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1764273196,-0.5189349055,0.0375048816,0.4312213063],"action_prob":0.3096650839,"action_logp":-1.1722639799,"action_dist_inputs":[-0.4003217518,0.4013638794],"value_targets":5.8519849777} +{"eps_id":729290008,"obs":[-0.1764273196,-0.5189349055,0.0375048816,0.4312213063],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1868060082,-0.3243635297,0.0461293049,0.1505933404],"action_prob":0.8902327418,"action_logp":-0.1162723377,"action_dist_inputs":[-1.0445110798,1.0486098528],"value_targets":4.9009947777} +{"eps_id":729290008,"obs":[-0.1868060082,-0.3243635297,0.0461293049,0.1505933404],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1932932884,-0.129931435,0.0491411723,-0.1271873415],"action_prob":0.7387530804,"action_logp":-0.3027915061,"action_dist_inputs":[-0.5191749334,0.5203230381],"value_targets":3.9403989315} +{"eps_id":729290008,"obs":[-0.1932932884,-0.129931435,0.0491411723,-0.1271873415],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1958919168,-0.3257216811,0.0465974249,0.1805851609],"action_prob":0.6151322722,"action_logp":-0.4859179556,"action_dist_inputs":[0.2333568186,-0.2355806679],"value_targets":2.970099926} +{"eps_id":729290008,"obs":[-0.1958919168,-0.3257216811,0.0465974249,0.1805851609],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2024063468,-0.1312964112,0.0502091311,-0.0970414802],"action_prob":0.7712938786,"action_logp":-0.2596857846,"action_dist_inputs":[-0.607169807,0.6084621549],"value_targets":1.9900000095} +{"eps_id":729290008,"obs":[-0.2024063468,-0.1312964112,0.0502091311,-0.0970414802],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":true,"new_obs":[-0.2050322741,0.0630713105,0.0482682995,-0.373470366],"action_prob":0.4369588494,"action_logp":-0.8279162645,"action_dist_inputs":[0.1257564127,-0.1277573705],"value_targets":1.0} +{"eps_id":623467292,"obs":[-0.0083120698,-0.005944496,0.0452225134,0.0328982733],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":0.0,"dones":false,"new_obs":[-0.0084309597,0.1885007471,0.0458804779,-0.2451805174],"action_prob":0.6833451986,"action_logp":-0.3807550967,"action_dist_inputs":[-0.3843438625,0.3848441839],"value_targets":86.6020355225} +{"eps_id":623467292,"obs":[-0.0084309597,0.1885007471,0.0458804779,-0.2451805174],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0046609449,-0.0072454498,0.0409768671,0.0616139285],"action_prob":0.7015690207,"action_logp":-0.3544360101,"action_dist_inputs":[0.4260335863,-0.4287469983],"value_targets":86.4666976929} +{"eps_id":623467292,"obs":[-0.0046609449,-0.0072454498,0.0409768671,0.0616139285],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0048058536,-0.2029302269,0.0422091484,0.3669385314],"action_prob":0.2854523063,"action_logp":-1.2536803484,"action_dist_inputs":[-0.4584037066,0.4591710865],"value_targets":86.3300018311} +{"eps_id":623467292,"obs":[-0.0048058536,-0.2029302269,0.0422091484,0.3669385314],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0088644587,-0.0084326994,0.0495479181,0.0878578946],"action_prob":0.89904356,"action_logp":-0.1064237654,"action_dist_inputs":[-1.0912767649,1.0953652859],"value_targets":86.1919174194} +{"eps_id":623467292,"obs":[-0.0088644587,-0.0084326994,0.0495479181,0.0878578946],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0090331119,0.1859453022,0.0513050742,-0.1887901723],"action_prob":0.7590383887,"action_logp":-0.2757028937,"action_dist_inputs":[-0.5731781721,0.5742368102],"value_targets":86.052444458} +{"eps_id":623467292,"obs":[-0.0090331119,0.1859453022,0.0513050742,-0.1887901723],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0053142062,0.3802971542,0.0475292727,-0.4648572505],"action_prob":0.3971951604,"action_logp":-0.9233275652,"action_dist_inputs":[0.2074621618,-0.2097036242],"value_targets":85.9115600586} +{"eps_id":623467292,"obs":[-0.0053142062,0.3802971542,0.0475292727,-0.4648572505],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0022917369,0.5747163892,0.0382321291,-0.7421884537],"action_prob":0.1419529915,"action_logp":-1.9522593021,"action_dist_inputs":[0.8971378207,-0.9020249844],"value_targets":85.7692489624} +{"eps_id":623467292,"obs":[0.0022917369,0.5747163892,0.0382321291,-0.7421884537],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0137860645,0.3790881336,0.0233883578,-0.4377229512],"action_prob":0.9292680025,"action_logp":-0.0733581185,"action_dist_inputs":[1.2842512131,-1.2912476063],"value_targets":85.62550354} +{"eps_id":623467292,"obs":[0.0137860645,0.3790881336,0.0233883578,-0.4377229512],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0213678274,0.1836430579,0.0146338996,-0.1377599984],"action_prob":0.8623815775,"action_logp":-0.1480574459,"action_dist_inputs":[0.9152405858,-0.9199723601],"value_targets":85.4803085327} +{"eps_id":623467292,"obs":[0.0213678274,0.1836430579,0.0146338996,-0.1377599984],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.025040688,-0.0116854012,0.0118787,0.1595035344],"action_prob":0.602632761,"action_logp":-0.5064473152,"action_dist_inputs":[0.2073198259,-0.2091271728],"value_targets":85.3336486816} +{"eps_id":623467292,"obs":[0.025040688,-0.0116854012,0.0118787,0.1595035344],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.02480698,0.1832644939,0.0150687704,-0.1294084191],"action_prob":0.779047966,"action_logp":-0.2496826351,"action_dist_inputs":[-0.6292002797,0.6309270263],"value_targets":85.1855010986} +{"eps_id":623467292,"obs":[0.02480698,0.1832644939,0.0150687704,-0.1294084191],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0284722708,-0.0120700449,0.0124806017,0.1679901779],"action_prob":0.5875546932,"action_logp":-0.5317859054,"action_dist_inputs":[0.1760840863,-0.1777818203],"value_targets":85.0358581543} +{"eps_id":623467292,"obs":[0.0284722708,-0.0120700449,0.0124806017,0.1679901779],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0282308683,0.1828710586,0.0158404056,-0.120729506],"action_prob":0.7876576781,"action_logp":-0.2386916876,"action_dist_inputs":[-0.6545069814,0.6563568115],"value_targets":84.8847045898} +{"eps_id":623467292,"obs":[0.0282308683,0.1828710586,0.0158404056,-0.120729506],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0318882912,0.3777625263,0.0134258159,-0.4083731472],"action_prob":0.4291032553,"action_logp":-0.846057713,"action_dist_inputs":[0.1419646591,-0.1435461342],"value_targets":84.7320251465} +{"eps_id":623467292,"obs":[0.0318882912,0.3777625263,0.0134258159,-0.4083731472],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0394435413,0.1824527979,0.005258353,-0.1114878953],"action_prob":0.8538107872,"action_logp":-0.1580456942,"action_dist_inputs":[0.8801736832,-0.8846338391],"value_targets":84.5778045654} +{"eps_id":623467292,"obs":[0.0394435413,0.1824527979,0.005258353,-0.1114878953],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0430925973,-0.0127440989,0.0030285949,0.1828493625],"action_prob":0.5805189013,"action_logp":-0.5438329577,"action_dist_inputs":[0.1617147028,-0.1631890237],"value_targets":84.4220275879} +{"eps_id":623467292,"obs":[0.0430925973,-0.0127440989,0.0030285949,0.1828493625],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0428377129,-0.2079092562,0.006685582,0.4764861465],"action_prob":0.2106830329,"action_logp":-1.557400465,"action_dist_inputs":[-0.6593833566,0.661429882],"value_targets":84.2646713257} +{"eps_id":623467292,"obs":[0.0428377129,-0.2079092562,0.006685582,0.4764861465],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.038679529,-0.0128823388,0.0162153058,0.1859178692],"action_prob":0.9125261307,"action_logp":-0.0915385783,"action_dist_inputs":[-1.1699124575,1.1749643087],"value_targets":84.1057281494} +{"eps_id":623467292,"obs":[0.038679529,-0.0128823388,0.0162153058,0.1859178692],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0384218842,0.1820039004,0.0199336633,-0.1016059667],"action_prob":0.8072220683,"action_logp":-0.2141564935,"action_dist_inputs":[-0.7149472237,0.7171126008],"value_targets":83.9451828003} +{"eps_id":623467292,"obs":[0.0384218842,0.1820039004,0.0199336633,-0.1016059667],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0420619622,0.3768345714,0.0179015435,-0.3879338205],"action_prob":0.4719575346,"action_logp":-0.7508662939,"action_dist_inputs":[0.0555080064,-0.0567798167],"value_targets":83.7830123901} +{"eps_id":623467292,"obs":[0.0420619622,0.3768345714,0.0179015435,-0.3879338205],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0495986529,0.1814631671,0.0101428665,-0.0896608829],"action_prob":0.8364913464,"action_logp":-0.1785390973,"action_dist_inputs":[0.8140717745,-0.8182787299],"value_targets":83.6192016602} +{"eps_id":623467292,"obs":[0.0495986529,0.1814631671,0.0101428665,-0.0896608829],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0532279164,0.3764382601,0.0083496487,-0.3791265488],"action_prob":0.4688549638,"action_logp":-0.7574617863,"action_dist_inputs":[0.061803516,-0.0629381761],"value_targets":83.453742981} +{"eps_id":623467292,"obs":[0.0532279164,0.3764382601,0.0083496487,-0.3791265488],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0607566796,0.5714406371,0.0007671179,-0.669165194],"action_prob":0.1603341699,"action_logp":-1.8304951191,"action_dist_inputs":[0.8258139491,-0.829929769],"value_targets":83.286605835} +{"eps_id":623467292,"obs":[0.0607566796,0.5714406371,0.0007671179,-0.669165194],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.072185494,0.3763080537,-0.0126161855,-0.3762407899],"action_prob":0.9261525273,"action_logp":-0.0767163411,"action_dist_inputs":[1.2612695694,-1.2677674294],"value_targets":83.1177825928} +{"eps_id":623467292,"obs":[0.072185494,0.3763080537,-0.0126161855,-0.3762407899],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0797116533,0.1813675463,-0.0201410018,-0.0875623748],"action_prob":0.8560117483,"action_logp":-0.155471161,"action_dist_inputs":[0.8892387152,-0.8933137059],"value_targets":82.9472579956} +{"eps_id":623467292,"obs":[0.0797116533,0.1813675463,-0.0201410018,-0.0875623748],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0833390057,-0.0134600028,-0.0218922496,0.198698625],"action_prob":0.6010593176,"action_logp":-0.5090616345,"action_dist_inputs":[0.2044096291,-0.2054712772],"value_targets":82.7750091553} +{"eps_id":623467292,"obs":[0.0833390057,-0.0134600028,-0.0218922496,0.198698625],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0830698088,0.1819681227,-0.0179182757,-0.1008091271],"action_prob":0.7712731957,"action_logp":-0.2597126067,"action_dist_inputs":[-0.6065796614,0.6089347601],"value_targets":82.601020813} +{"eps_id":623467292,"obs":[0.0830698088,0.1819681227,-0.0179182757,-0.1008091271],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0867091715,-0.0128925135,-0.0199344587,0.1861672103],"action_prob":0.6183586717,"action_logp":-0.480686605,"action_dist_inputs":[0.2407331765,-0.2418544143],"value_targets":82.4252700806} +{"eps_id":623467292,"obs":[0.0867091715,-0.0128925135,-0.0199344587,0.1861672103],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0864513218,0.1825089008,-0.0162111148,-0.1127370149],"action_prob":0.761089325,"action_logp":-0.2730045319,"action_dist_inputs":[-0.5781849027,0.5804760456],"value_targets":82.2477493286} +{"eps_id":623467292,"obs":[0.0864513218,0.1825089008,-0.0162111148,-0.1127370149],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0901014954,-0.0123770582,-0.0184658561,0.1747876406],"action_prob":0.6341750622,"action_logp":-0.4554302096,"action_dist_inputs":[0.2744991183,-0.2756710947],"value_targets":82.0684280396} +{"eps_id":623467292,"obs":[0.0901014954,-0.0123770582,-0.0184658561,0.1747876406],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0898539573,0.1830042303,-0.0149701023,-0.1236629784],"action_prob":0.7507858872,"action_logp":-0.286634773,"action_dist_inputs":[-0.5502873063,0.552520752],"value_targets":81.8873062134} +{"eps_id":623467292,"obs":[0.0898539573,0.1830042303,-0.0149701023,-0.1236629784],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0935140401,0.3783374131,-0.0174433626,-0.4210309684],"action_prob":0.3510684669,"action_logp":-1.0467740297,"action_dist_inputs":[0.3065646589,-0.3077812791],"value_targets":81.7043457031} +{"eps_id":623467292,"obs":[0.0935140401,0.3783374131,-0.0174433626,-0.4210309684],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.1010807902,0.1834669113,-0.0258639809,-0.1338977516],"action_prob":0.8795179129,"action_logp":-0.1283813417,"action_dist_inputs":[0.9918276668,-0.9960456491],"value_targets":81.5195465088} +{"eps_id":623467292,"obs":[0.1010807902,0.1834669113,-0.0258639809,-0.1338977516],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.1047501266,-0.0112752067,-0.0285419375,0.1505145431],"action_prob":0.6865583062,"action_logp":-0.3760641515,"action_dist_inputs":[0.3914044797,-0.392673254],"value_targets":81.3328704834} +{"eps_id":623467292,"obs":[0.1047501266,-0.0112752067,-0.0285419375,0.1505145431],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.1045246199,-0.2059770823,-0.0255316459,0.4340582192],"action_prob":0.2973350585,"action_logp":-1.2128956318,"action_dist_inputs":[-0.428976804,0.4310437441],"value_targets":81.144317627} +{"eps_id":623467292,"obs":[0.1045246199,-0.2059770823,-0.0255316459,0.4340582192],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.1004050821,-0.0105031356,-0.0168504808,0.1334373802],"action_prob":0.8925469518,"action_logp":-0.1136761457,"action_dist_inputs":[-1.056034565,1.0609906912],"value_targets":80.9538574219} +{"eps_id":623467292,"obs":[0.1004050821,-0.0105031356,-0.0168504808,0.1334373802],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.1001950204,0.1848560721,-0.0141817341,-0.1645136476],"action_prob":0.7014696598,"action_logp":-0.354577601,"action_dist_inputs":[-0.4261663258,0.4281398356],"value_targets":80.76146698} +{"eps_id":623467292,"obs":[0.1001950204,0.1848560721,-0.0141817341,-0.1645136476],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.1038921401,-0.0100600189,-0.0174720064,0.1236618012],"action_prob":0.706597507,"action_logp":-0.3472940326,"action_dist_inputs":[0.4387429953,-0.4401730597],"value_targets":80.5671386719} +{"eps_id":623467292,"obs":[0.1038921401,-0.0100600189,-0.0174720064,0.1236618012],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.1036909372,0.1853078306,-0.0149987713,-0.1744817495],"action_prob":0.6863119006,"action_logp":-0.3764230609,"action_dist_inputs":[-0.3905090392,0.3924241364],"value_targets":80.3708496094} +{"eps_id":623467292,"obs":[0.1036909372,0.1853078306,-0.0149987713,-0.1744817495],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.1073970944,-0.0095962854,-0.0184884053,0.1134320647],"action_prob":0.7208981514,"action_logp":-0.3272574246,"action_dist_inputs":[0.4737212062,-0.4751998782],"value_targets":80.1725769043} +{"eps_id":623467292,"obs":[0.1073970944,-0.0095962854,-0.0184884053,0.1134320647],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.1072051674,-0.2044485062,-0.0162197649,0.4002251625],"action_prob":0.3310352862,"action_logp":-1.105530262,"action_dist_inputs":[-0.3508280218,0.3526783288],"value_targets":79.9722976685} +{"eps_id":623467292,"obs":[0.1072051674,-0.2044485062,-0.0162197649,0.4002251625],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.1031161994,-0.0091002742,-0.0082152616,0.1024729609],"action_prob":0.8858644366,"action_logp":-0.121191375,"action_dist_inputs":[-1.0221810341,1.0269955397],"value_targets":79.7699966431} +{"eps_id":623467292,"obs":[0.1031161994,-0.0091002742,-0.0082152616,0.1024729609],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.1029341966,-0.2041035295,-0.006165802,0.3925527036],"action_prob":0.3262044489,"action_logp":-1.1202309132,"action_dist_inputs":[-0.3618021309,0.3636001945],"value_targets":79.5656509399} +{"eps_id":623467292,"obs":[0.1029341966,-0.2041035295,-0.006165802,0.3925527036],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0988521203,-0.0088946279,0.0016852522,0.0979321897],"action_prob":0.8885861635,"action_logp":-0.1181236506,"action_dist_inputs":[-1.0357854366,1.0405945778],"value_targets":79.3592453003} +{"eps_id":623467292,"obs":[0.0988521203,-0.0088946279,0.0016852522,0.0979321897],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0986742303,0.186203137,0.003643896,-0.1942185611],"action_prob":0.6871897578,"action_logp":-0.3751447797,"action_dist_inputs":[-0.3926067352,0.3944070339],"value_targets":79.1507568359} +{"eps_id":623467292,"obs":[0.0986742303,0.186203137,0.003643896,-0.1942185611],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.1023982912,0.381272763,-0.0002404754,-0.4857497811],"action_prob":0.2866360545,"action_logp":-1.2495419979,"action_dist_inputs":[0.4551203251,-0.4566581249],"value_targets":78.9401550293} +{"eps_id":623467292,"obs":[0.1023982912,0.381272763,-0.0002404754,-0.4857497811],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.1100237519,0.5763981342,-0.0099554714,-0.7785084844],"action_prob":0.1080254912,"action_logp":-2.2253880501,"action_dist_inputs":[1.053314209,-1.0577561855],"value_targets":78.727432251} +{"eps_id":623467292,"obs":[0.1100237519,0.5763981342,-0.0099554714,-0.7785084844],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.1215517148,0.3814144433,-0.0255256407,-0.4889743626],"action_prob":0.9363190532,"action_logp":-0.0657989755,"action_dist_inputs":[1.3405542374,-1.3475167751],"value_targets":78.5125579834} +{"eps_id":623467292,"obs":[0.1215517148,0.3814144433,-0.0255256407,-0.4889743626],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.1291799992,0.1866617501,-0.0353051275,-0.2044441849],"action_prob":0.9021504521,"action_logp":-0.1029739827,"action_dist_inputs":[1.108404994,-1.1129454374],"value_targets":78.2955093384} +{"eps_id":623467292,"obs":[0.1291799992,0.1866617501,-0.0353051275,-0.2044441849],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.1329132318,-0.0079379948,-0.0393940099,0.076895982],"action_prob":0.7827887535,"action_logp":-0.2448923737,"action_dist_inputs":[0.6401870251,-0.6418057084],"value_targets":78.0762710571} +{"eps_id":623467292,"obs":[0.1329132318,-0.0079379948,-0.0393940099,0.076895982],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.1327544749,0.1877259016,-0.0378560908,-0.2279510051],"action_prob":0.5597494841,"action_logp":-0.5802659392,"action_dist_inputs":[-0.1192634553,0.1208820343],"value_targets":77.8548202515} +{"eps_id":623467292,"obs":[0.1327544749,0.1877259016,-0.0378560908,-0.2279510051],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.1365090013,-0.0068351789,-0.0424151123,0.0525544956],"action_prob":0.8053160906,"action_logp":-0.2165204287,"action_dist_inputs":[0.7090297341,-0.7108276486],"value_targets":77.6311340332} +{"eps_id":623467292,"obs":[0.1365090013,-0.0068351789,-0.0424151123,0.0525544956],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.1363722831,-0.2013240904,-0.0413640216,0.3315591812],"action_prob":0.490806073,"action_logp":-0.7117062211,"action_dist_inputs":[-0.0176776275,0.0191021934],"value_targets":77.4051818848} +{"eps_id":623467292,"obs":[0.1363722831,-0.2013240904,-0.0413640216,0.3315591812],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.1323458105,-0.0056385165,-0.0347328372,0.0261243358],"action_prob":0.8279896379,"action_logp":-0.188754648,"action_dist_inputs":[-0.7835294008,0.7879163623],"value_targets":77.1769561768} +{"eps_id":623467292,"obs":[0.1323458105,-0.0056385165,-0.0347328372,0.0261243358],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.1322330385,0.1899638772,-0.0342103504,-0.2773116827],"action_prob":0.4809978008,"action_logp":-0.7318925858,"action_dist_inputs":[0.0386322401,-0.0374131277],"value_targets":76.9464187622} +{"eps_id":623467292,"obs":[0.1322330385,0.1899638772,-0.0342103504,-0.2773116827],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.1360323131,-0.0046537425,-0.0397565849,0.0043880395],"action_prob":0.8350207806,"action_logp":-0.1802986562,"action_dist_inputs":[0.8097243905,-0.8119126558],"value_targets":76.7135543823} +{"eps_id":623467292,"obs":[0.1360323131,-0.0046537425,-0.0397565849,0.0043880395],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.1359392405,-0.1991836429,-0.0396688245,0.2842669487],"action_prob":0.5684405565,"action_logp":-0.5648585558,"action_dist_inputs":[0.1382602006,-0.1372312605],"value_targets":76.4783401489} +{"eps_id":623467292,"obs":[0.1359392405,-0.1991836429,-0.0396688245,0.2842669487],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.131955564,-0.393718034,-0.0339834839,0.5641793609],"action_prob":0.2113501728,"action_logp":-1.5542389154,"action_dist_inputs":[-0.6563643217,0.6604418159],"value_targets":76.2407455444} +{"eps_id":623467292,"obs":[0.131955564,-0.393718034,-0.0339834839,0.5641793609],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.1240812093,-0.5883470774,-0.0226998981,0.8459652066],"action_prob":0.0832499638,"action_logp":-2.4859075546,"action_dist_inputs":[-1.1962947845,1.2026925087],"value_targets":76.0007553101} +{"eps_id":623467292,"obs":[0.1240812093,-0.5883470774,-0.0226998981,0.8459652066],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.1123142689,-0.3929229081,-0.005780594,0.5462312698],"action_prob":0.9488994479,"action_logp":-0.0524524376,"action_dist_inputs":[-1.4566174746,1.4648897648],"value_targets":75.7583389282} +{"eps_id":623467292,"obs":[0.1123142689,-0.3929229081,-0.005780594,0.5462312698],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.1044558063,-0.1977202147,0.0051440313,0.2517326176],"action_prob":0.9222957492,"action_logp":-0.0808893591,"action_dist_inputs":[-1.2337934971,1.2401616573],"value_targets":75.5134735107} +{"eps_id":623467292,"obs":[0.1044558063,-0.1977202147,0.0051440313,0.2517326176],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.1005014032,-0.0026720972,0.0101786843,-0.0393233411],"action_prob":0.8177610636,"action_logp":-0.2011850476,"action_dist_inputs":[-0.7486858964,0.75256598],"value_targets":75.26612854} +{"eps_id":623467292,"obs":[0.1005014032,-0.0026720972,0.0101786843,-0.0393233411],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.1004479602,-0.1979385167,0.0093922168,0.2565535903],"action_prob":0.5259039402,"action_logp":-0.6426366568,"action_dist_inputs":[0.0522005036,-0.0515082665],"value_targets":75.0162963867} +{"eps_id":623467292,"obs":[0.1004479602,-0.1979385167,0.0093922168,0.2565535903],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0964891911,-0.0029519144,0.0145232892,-0.0331521332],"action_prob":0.8266416192,"action_logp":-0.1903840154,"action_dist_inputs":[-0.7790504098,0.782959938],"value_targets":74.7639312744} +{"eps_id":623467292,"obs":[0.0964891911,-0.0029519144,0.0145232892,-0.0331521332],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0964301527,-0.1982790977,0.0138602462,0.264077425],"action_prob":0.5042446852,"action_logp":-0.6846936941,"action_dist_inputs":[0.0088589154,-0.0081200153],"value_targets":74.5090255737} +{"eps_id":623467292,"obs":[0.0964301527,-0.1982790977,0.0138602462,0.264077425],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0924645737,-0.003357688,0.0191417951,-0.0242018104],"action_prob":0.8371124268,"action_logp":-0.1777969152,"action_dist_inputs":[-0.8164682984,0.8204300404],"value_targets":74.2515411377} +{"eps_id":623467292,"obs":[0.0924645737,-0.003357688,0.0191417951,-0.0242018104],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0923974141,0.1914846003,0.0186577588,-0.3107843399],"action_prob":0.5232594013,"action_logp":-0.647677958,"action_dist_inputs":[-0.0461466908,0.0469582081],"value_targets":73.9914550781} +{"eps_id":623467292,"obs":[0.0923974141,0.1914846003,0.0186577588,-0.3107843399],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0962271094,-0.0038981377,0.0124420719,-0.0122761969],"action_prob":0.8094624877,"action_logp":-0.2113848329,"action_dist_inputs":[0.7220694423,-0.7244518399],"value_targets":73.7287445068} +{"eps_id":623467292,"obs":[0.0962271094,-0.0038981377,0.0124420719,-0.0122761969],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0961491466,-0.199196294,0.0121965483,0.2843062282],"action_prob":0.4717912078,"action_logp":-0.7512187362,"action_dist_inputs":[-0.0560282692,0.0569268912],"value_targets":73.4633712769} +{"eps_id":623467292,"obs":[0.0961491466,-0.199196294,0.0121965483,0.2843062282],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0921652168,-0.3944900632,0.0178826731,0.5808107853],"action_prob":0.1508016139,"action_logp":-1.8917901516,"action_dist_inputs":[-0.8621160984,0.8662115932],"value_targets":73.1953277588} +{"eps_id":623467292,"obs":[0.0921652168,-0.3944900632,0.0178826731,0.5808107853],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.084275417,-0.1996231824,0.0294988882,0.2938144505],"action_prob":0.933270216,"action_logp":-0.0690604821,"action_dist_inputs":[-1.3157037497,1.3223406076],"value_targets":72.9245758057} +{"eps_id":623467292,"obs":[0.084275417,-0.1996231824,0.0294988882,0.2938144505],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0802829564,-0.0049339584,0.0353751779,0.0105790719],"action_prob":0.8687687516,"action_logp":-0.1406782717,"action_dist_inputs":[-0.942956984,0.9471589327],"value_targets":72.6510848999} +{"eps_id":623467292,"obs":[0.0802829564,-0.0049339584,0.0353751779,0.0105790719],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0801842734,-0.2005449086,0.0355867594,0.3142100573],"action_prob":0.3766385019,"action_logp":-0.9764693975,"action_dist_inputs":[-0.2513593733,0.2524813712],"value_targets":72.3748321533} +{"eps_id":623467292,"obs":[0.0801842734,-0.2005449086,0.0355867594,0.3142100573],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.07617338,-0.0059474921,0.0418709591,0.032959044],"action_prob":0.8821364045,"action_logp":-0.1254086047,"action_dist_inputs":[-1.0042212009,1.0085976124],"value_targets":72.0957946777} +{"eps_id":623467292,"obs":[0.07617338,-0.0059474921,0.0418709591,0.032959044],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0760544315,-0.2016440928,0.0425301418,0.3385531306],"action_prob":0.3268224001,"action_logp":-1.1183383465,"action_dist_inputs":[-0.3606279194,0.3619643748],"value_targets":71.8139266968} +{"eps_id":623467292,"obs":[0.0760544315,-0.2016440928,0.0425301418,0.3385531306],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.072021544,-0.0071523283,0.0493012033,0.0595793873],"action_prob":0.8948099613,"action_logp":-0.1111439243,"action_dist_inputs":[-1.0681239367,1.0727185011],"value_targets":71.5292205811} +{"eps_id":623467292,"obs":[0.072021544,-0.0071523283,0.0493012033,0.0595793873],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0718785003,-0.2029452175,0.0504927933,0.3674005568],"action_prob":0.2752795815,"action_logp":-1.2899680138,"action_dist_inputs":[-0.4831955433,0.4848031104],"value_targets":71.2416381836} +{"eps_id":623467292,"obs":[0.0718785003,-0.2029452175,0.0504927933,0.3674005568],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0678195953,-0.0085757626,0.0578408018,0.0910565257],"action_prob":0.9063589573,"action_logp":-0.0983198211,"action_dist_inputs":[-1.1325519085,1.1374149323],"value_targets":70.9511489868} +{"eps_id":623467292,"obs":[0.0678195953,-0.0085757626,0.0578408018,0.0910565257],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.067648083,0.1856714785,0.0596619323,-0.1828313619],"action_prob":0.7745018005,"action_logp":-0.2555353045,"action_dist_inputs":[-0.6159796715,0.6179281473],"value_targets":70.6577301025} +{"eps_id":623467292,"obs":[0.067648083,0.1856714785,0.0596619323,-0.1828313619],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0713615119,0.3798912466,0.0560053065,-0.4561124444],"action_prob":0.4217404723,"action_logp":-0.8633651137,"action_dist_inputs":[0.1571505815,-0.1584821343],"value_targets":70.3613433838} +{"eps_id":623467292,"obs":[0.0713615119,0.3798912466,0.0560053065,-0.4561124444],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0789593309,0.5741785169,0.0468830578,-0.7306294441],"action_prob":0.1562165469,"action_logp":-1.8565120697,"action_dist_inputs":[0.8412998319,-0.8453527689],"value_targets":70.061958313} +{"eps_id":623467292,"obs":[0.0789593309,0.5741785169,0.0468830578,-0.7306294441],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0904429033,0.3784410357,0.0322704688,-0.4235672951],"action_prob":0.9222294092,"action_logp":-0.0809612572,"action_dist_inputs":[1.2333526611,-1.23967731],"value_targets":69.7595596313} +{"eps_id":623467292,"obs":[0.0904429033,0.3784410357,0.0322704688,-0.4235672951],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0980117247,0.1828771234,0.0237991232,-0.1208883002],"action_prob":0.8444953561,"action_logp":-0.1690160334,"action_dist_inputs":[0.8441036344,-0.8479600549],"value_targets":69.4540939331} +{"eps_id":623467292,"obs":[0.0980117247,0.1828771234,0.0237991232,-0.1208883002],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.1016692668,-0.0125775691,0.0213813558,0.1792069972],"action_prob":0.5571205616,"action_logp":-0.5849735737,"action_dist_inputs":[0.1143434048,-0.1151408181],"value_targets":69.1455535889} +{"eps_id":623467292,"obs":[0.1016692668,-0.0125775691,0.0213813558,0.1792069972],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.1014177129,0.1822319925,0.0249654967,-0.1066549048],"action_prob":0.8083190918,"action_logp":-0.212798357,"action_dist_inputs":[-0.7181783319,0.7209464312],"value_targets":68.8338928223} +{"eps_id":623467292,"obs":[0.1014177129,0.1822319925,0.0249654967,-0.1066549048],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.1050623581,0.3769874573,0.0228323992,-0.3913579583],"action_prob":0.470431447,"action_logp":-0.7541050315,"action_dist_inputs":[0.058891546,-0.059520755],"value_targets":68.5190811157} +{"eps_id":623467292,"obs":[0.1050623581,0.3769874573,0.0228323992,-0.3913579583],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.1126021072,0.1815490276,0.0150052393,-0.0915645957],"action_prob":0.8321146965,"action_logp":-0.1837850064,"action_dist_inputs":[0.7985511422,-0.8021377325],"value_targets":68.2010955811} +{"eps_id":623467292,"obs":[0.1126021072,0.1815490276,0.0150052393,-0.0915645957],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.1162330881,-0.0137847522,0.0131739471,0.2058144659],"action_prob":0.5275577903,"action_logp":-0.6394968629,"action_dist_inputs":[0.0549380891,-0.0554049462],"value_targets":67.8798904419} +{"eps_id":623467292,"obs":[0.1162330881,-0.0137847522,0.0131739471,0.2058144659],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.1159573868,0.1811463535,0.0172902364,-0.0826838017],"action_prob":0.8198288083,"action_logp":-0.1986597329,"action_dist_inputs":[-0.7560557127,0.7591325641],"value_targets":67.5554504395} +{"eps_id":623467292,"obs":[0.1159573868,0.1811463535,0.0172902364,-0.0826838017],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.119580321,0.3760162294,0.0156365596,-0.369861871],"action_prob":0.4933464527,"action_logp":-0.7065436244,"action_dist_inputs":[0.0131410696,-0.0134747932],"value_targets":67.227722168} +{"eps_id":623467292,"obs":[0.119580321,0.3760162294,0.0156365596,-0.369861871],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.1271006465,0.1806756556,0.0082393233,-0.0722898468],"action_prob":0.8237546086,"action_logp":-0.1938825846,"action_dist_inputs":[0.7693158984,-0.7726798058],"value_targets":66.8966903687} +{"eps_id":623467292,"obs":[0.1271006465,0.1806756556,0.0082393233,-0.0722898468],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.1307141483,-0.0145634403,0.006793526,0.2229811996],"action_prob":0.5108183622,"action_logp":-0.6717411876,"action_dist_inputs":[0.0215371139,-0.021743007],"value_targets":66.5623168945} +{"eps_id":623467292,"obs":[0.1307141483,-0.0145634403,0.006793526,0.2229811996],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.1304228902,0.1804607511,0.0112531502,-0.067551069],"action_prob":0.8255517483,"action_logp":-0.1917033345,"action_dist_inputs":[-0.775557816,0.7788660526],"value_targets":66.2245635986} +{"eps_id":623467292,"obs":[0.1304228902,0.1804607511,0.0112531502,-0.067551069],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.1340321004,-0.0148207089,0.0099021289,0.2286609411],"action_prob":0.4955345988,"action_logp":-0.7021180987,"action_dist_inputs":[-0.0089827785,0.0088792201],"value_targets":65.883392334} +{"eps_id":623467292,"obs":[0.1340321004,-0.0148207089,0.0099021289,0.2286609411],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.1337356865,0.1801583469,0.0144753475,-0.0608821288],"action_prob":0.8324477077,"action_logp":-0.1833848804,"action_dist_inputs":[-0.7998268008,0.8032483459],"value_targets":65.5387802124} +{"eps_id":623467292,"obs":[0.1337356865,0.1801583469,0.0144753475,-0.0608821288],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.1373388618,0.375069797,0.0132577047,-0.348963052],"action_prob":0.5235744119,"action_logp":-0.6470760703,"action_dist_inputs":[-0.0471742116,0.0471935421],"value_targets":65.1906890869} +{"eps_id":623467292,"obs":[0.1373388618,0.375069797,0.0132577047,-0.348963052],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.1448402554,0.1797618121,0.0062784436,-0.0521291941],"action_prob":0.8097207546,"action_logp":-0.2110658586,"action_dist_inputs":[0.7225610614,-0.725635469],"value_targets":64.8390808105} +{"eps_id":623467292,"obs":[0.1448402554,0.1797618121,0.0062784436,-0.0521291941],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.1484354883,0.3747931719,0.0052358597,-0.3428246379],"action_prob":0.5185021162,"action_logp":-0.6568111181,"action_dist_inputs":[-0.0369522162,0.0370901488],"value_targets":64.4839172363} +{"eps_id":623467292,"obs":[0.1484354883,0.3747931719,0.0052358597,-0.3428246379],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.1559313536,0.1795971394,-0.0016206328,-0.0484952331],"action_prob":0.813444376,"action_logp":-0.2064777017,"action_dist_inputs":[0.7347814441,-0.7377669215],"value_targets":64.1251678467} +{"eps_id":623467292,"obs":[0.1559313536,0.1795971394,-0.0016206328,-0.0484952331],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.1595232934,0.3747422993,-0.0025905375,-0.3416890502],"action_prob":0.5051685572,"action_logp":-0.6828631759,"action_dist_inputs":[-0.0102315601,0.0104433401],"value_targets":63.7627983093} +{"eps_id":623467292,"obs":[0.1595232934,0.3747422993,-0.0025905375,-0.3416890502],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.1670181453,0.1796572804,-0.0094243186,-0.0498241447],"action_prob":0.8204520941,"action_logp":-0.1978997886,"action_dist_inputs":[0.7582421303,-0.7611710429],"value_targets":63.3967666626} +{"eps_id":623467292,"obs":[0.1670181453,0.1796572804,-0.0094243186,-0.0498241447],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.1706112772,0.3749130964,-0.0104208011,-0.3454655707],"action_prob":0.4834413826,"action_logp":-0.7268252373,"action_dist_inputs":[0.0332507491,-0.0330080949],"value_targets":63.0270347595} +{"eps_id":623467292,"obs":[0.1706112772,0.3749130964,-0.0104208011,-0.3454655707],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.1781095415,0.179940924,-0.0173301119,-0.0560868345],"action_prob":0.8302463889,"action_logp":-0.1860327423,"action_dist_inputs":[0.7922336459,-0.7951409221],"value_targets":62.6535720825} +{"eps_id":623467292,"obs":[0.1781095415,0.179940924,-0.0173301119,-0.0560868345],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.1817083657,-0.014928312,-0.018451849,0.2310783267],"action_prob":0.5467741489,"action_logp":-0.6037194133,"action_dist_inputs":[0.0939388126,-0.0937064514],"value_targets":62.2763366699} +{"eps_id":623467292,"obs":[0.1817083657,-0.014928312,-0.018451849,0.2310783267],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.1814097911,0.1804523766,-0.0138302827,-0.06736736],"action_prob":0.8025554419,"action_logp":-0.2199543417,"action_dist_inputs":[-0.6993734837,0.7029696107],"value_targets":61.8952865601} +{"eps_id":623467292,"obs":[0.1814097911,0.1804523766,-0.0138302827,-0.06736736],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.1850188524,-0.0144685851,-0.0151776299,0.2209201455],"action_prob":0.5581349134,"action_logp":-0.5831545591,"action_dist_inputs":[0.1168961227,-0.1166999266],"value_targets":61.5103912354} +{"eps_id":623467292,"obs":[0.1850188524,-0.0144685851,-0.0151776299,0.2209201455],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.1847294718,0.1808669865,-0.010759227,-0.0765115097],"action_prob":0.7975594401,"action_logp":-0.2261989266,"action_dist_inputs":[-0.683773458,0.6873365045],"value_targets":61.1216087341} +{"eps_id":623467292,"obs":[0.1847294718,0.1808669865,-0.010759227,-0.0765115097],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.1883468181,0.3761415184,-0.012289457,-0.3725695014],"action_prob":0.4332637489,"action_logp":-0.8364086151,"action_dist_inputs":[0.1343616545,-0.1341856122],"value_targets":60.7288970947} +{"eps_id":623467292,"obs":[0.1883468181,0.3761415184,-0.012289457,-0.3725695014],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.1958696395,0.1811962873,-0.019740846,-0.0837867483],"action_prob":0.8469002247,"action_logp":-0.1661723554,"action_dist_inputs":[0.8537765145,-0.8567166328],"value_targets":60.3322181702} +{"eps_id":623467292,"obs":[0.1958696395,0.1811962873,-0.019740846,-0.0837867483],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.1994935721,-0.0136372093,-0.0214165822,0.2026031166],"action_prob":0.5993605256,"action_logp":-0.5118919611,"action_dist_inputs":[0.2014797926,-0.2013216913],"value_targets":59.9315338135} +{"eps_id":623467292,"obs":[0.1994935721,-0.0136372093,-0.0214165822,0.2026031166],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.1992208213,0.1817843765,-0.0173645187,-0.0967580304],"action_prob":0.7705518603,"action_logp":-0.26064834,"action_dist_inputs":[-0.603979528,0.6074501872],"value_targets":59.526802063} +{"eps_id":623467292,"obs":[0.1992208213,0.1817843765,-0.0173645187,-0.0967580304],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.2028565109,0.3771508336,-0.0192996804,-0.3948684633],"action_prob":0.38506338,"action_logp":-0.9543473721,"action_dist_inputs":[0.2341081798,-0.2340031564],"value_targets":59.117980957} +{"eps_id":623467292,"obs":[0.2028565109,0.3771508336,-0.0192996804,-0.3948684633],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.2103995234,0.1823079884,-0.0271970499,-0.1083323583],"action_prob":0.861790657,"action_logp":-0.1487429142,"action_dist_inputs":[0.9136090279,-0.9166335464],"value_targets":58.7050323486} +{"eps_id":623467292,"obs":[0.2103995234,0.1823079884,-0.0271970499,-0.1083323583],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.2140456885,0.3778088987,-0.0293636974,-0.4094702303],"action_prob":0.3472870588,"action_logp":-1.0576035976,"action_dist_inputs":[0.3155192733,-0.3154664636],"value_targets":58.2879104614} +{"eps_id":623467292,"obs":[0.2140456885,0.3778088987,-0.0293636974,-0.4094702303],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.2216018736,0.1831153035,-0.0375531018,-0.1261874139],"action_prob":0.8728835583,"action_logp":-0.1359531432,"action_dist_inputs":[0.961787343,-0.9649109244],"value_targets":57.8665771484} +{"eps_id":623467292,"obs":[0.2216018736,0.1831153035,-0.0375531018,-0.1261874139],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.2252641767,-0.011449107,-0.0400768481,0.1544154286],"action_prob":0.6959097981,"action_logp":-0.3625352383,"action_dist_inputs":[0.4139226079,-0.4139730036],"value_targets":57.4409866333} +{"eps_id":623467292,"obs":[0.2252641767,-0.011449107,-0.0400768481,0.1544154286],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.2250351906,0.1842230707,-0.0369885415,-0.1506366134],"action_prob":0.6753585339,"action_logp":-0.3925115764,"action_dist_inputs":[-0.3646926284,0.3678297997],"value_targets":57.0110969543} +{"eps_id":623467292,"obs":[0.2250351906,0.1842230707,-0.0369885415,-0.1506366134],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.2287196517,-0.0103502404,-0.0400012732,0.1301515847],"action_prob":0.7241145372,"action_logp":-0.3228057325,"action_dist_inputs":[0.482381016,-0.4825826585],"value_targets":56.5768661499} +{"eps_id":623467292,"obs":[0.2287196517,-0.0103502404,-0.0400012732,0.1301515847],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.2285126448,0.1853212118,-0.0373982415,-0.1748780608],"action_prob":0.6384331584,"action_logp":-0.4487383068,"action_dist_inputs":[-0.2827988863,0.2857712805],"value_targets":56.1382484436} +{"eps_id":623467292,"obs":[0.2285126448,0.1853212118,-0.0373982415,-0.1748780608],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.2322190702,0.3809579015,-0.040895801,-0.4791203737],"action_prob":0.249742195,"action_logp":-1.3873261213,"action_dist_inputs":[0.5498136878,-0.5501739979],"value_targets":55.6952018738} +{"eps_id":623467292,"obs":[0.2322190702,0.3809579015,-0.040895801,-0.4791203737],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.2398382276,0.1864364296,-0.0504782088,-0.1996021718],"action_prob":0.8962205052,"action_logp":-0.1095687747,"action_dist_inputs":[1.0761373043,-1.0797814131],"value_targets":55.2476768494} +{"eps_id":623467292,"obs":[0.2398382276,0.1864364296,-0.0504782088,-0.1996021718],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.2435669601,-0.00792858,-0.0544702522,0.0767396837],"action_prob":0.7874931693,"action_logp":-0.2389005721,"action_dist_inputs":[0.6546621323,-0.6552184224],"value_targets":54.7956352234} +{"eps_id":623467292,"obs":[0.2435669601,-0.00792858,-0.0544702522,0.0767396837],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.2434083819,-0.2022290677,-0.0529354587,0.3517518044],"action_prob":0.4835340381,"action_logp":-0.7266335487,"action_dist_inputs":[-0.0316509828,0.0342365988],"value_targets":54.3390235901} +{"eps_id":623467292,"obs":[0.2434083819,-0.2022290677,-0.0529354587,0.3517518044],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.2393638045,-0.006395861,-0.0459004231,0.0428577177],"action_prob":0.8250712156,"action_logp":-0.1922855675,"action_dist_inputs":[-0.7728664875,0.7782244682],"value_targets":53.8778038025} +{"eps_id":623467292,"obs":[0.2393638045,-0.006395861,-0.0459004231,0.0428577177],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.2392358929,-0.2008305937,-0.045043271,0.320712626],"action_prob":0.5207251906,"action_logp":-0.6525328159,"action_dist_inputs":[0.0426455028,-0.0403029174],"value_targets":53.4119224548} +{"eps_id":623467292,"obs":[0.2392358929,-0.2008305937,-0.045043271,0.320712626],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.23521927,-0.0050970786,-0.0386290178,0.0141719272],"action_prob":0.808732748,"action_logp":-0.2122867554,"action_dist_inputs":[-0.7183048129,0.7234922647],"value_targets":52.9413375854} +{"eps_id":623467292,"obs":[0.23521927,-0.0050970786,-0.0386290178,0.0141719272],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.2351173311,-0.199644357,-0.0383455791,0.294421047],"action_prob":0.551586628,"action_logp":-0.5949563384,"action_dist_inputs":[0.1046059579,-0.1024775133],"value_targets":52.4659957886} +{"eps_id":623467292,"obs":[0.2351173311,-0.199644357,-0.0383455791,0.294421047],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.2311244458,-0.003997298,-0.032457158,-0.0101046581],"action_prob":0.7933126092,"action_logp":-0.2315379381,"action_dist_inputs":[-0.6699867249,0.6750230193],"value_targets":51.9858551025} +{"eps_id":623467292,"obs":[0.2311244458,-0.003997298,-0.032457158,-0.0101046581],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.2310445011,0.1915747374,-0.0326592512,-0.3128489256],"action_prob":0.4230489433,"action_logp":-0.8602674007,"action_dist_inputs":[0.1561042517,-0.1541651934],"value_targets":51.5008621216} +{"eps_id":623467292,"obs":[0.2310445011,0.1915747374,-0.0326592512,-0.3128489256],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.2348759919,-0.0030670988,-0.0389162302,-0.0306419842],"action_prob":0.8417572975,"action_logp":-0.1722635627,"action_dist_inputs":[0.8349482417,-0.8364135027],"value_targets":51.0109710693} +{"eps_id":623467292,"obs":[0.2348759919,-0.0030670988,-0.0389162302,-0.0306419842],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.2348146588,-0.1976099908,-0.0395290703,0.2495129257],"action_prob":0.6209499836,"action_logp":-0.4765047729,"action_dist_inputs":[0.247669071,-0.2459133267],"value_targets":50.5161323547} +{"eps_id":623467292,"obs":[0.2348146588,-0.1976099908,-0.0395290703,0.2495129257],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.2308624536,-0.3921457827,-0.0345388092,0.5294701457],"action_prob":0.2579189837,"action_logp":-1.3551098108,"action_dist_inputs":[-0.5260289311,0.5307839513],"value_targets":50.0162963867} +{"eps_id":623467292,"obs":[0.2308624536,-0.3921457827,-0.0345388092,0.5294701457],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.2230195403,-0.1965554059,-0.023949407,0.2261071652],"action_prob":0.9068453908,"action_logp":-0.0977833346,"action_dist_inputs":[-1.1343929768,1.1413174868],"value_targets":49.5114097595} +{"eps_id":623467292,"obs":[0.2230195403,-0.1965554059,-0.023949407,0.2261071652],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.2190884352,-0.0010995152,-0.0194272641,-0.0740330443],"action_prob":0.7407488227,"action_logp":-0.3000936508,"action_dist_inputs":[-0.5226322412,0.5272321105],"value_targets":49.0014266968} +{"eps_id":623467292,"obs":[0.2190884352,-0.0010995152,-0.0194272641,-0.0740330443],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.2190664411,0.1942954808,-0.0209079254,-0.3727814853],"action_prob":0.3547281623,"action_logp":-1.0364035368,"action_dist_inputs":[0.2998535633,-0.298466444],"value_targets":48.486289978} +{"eps_id":623467292,"obs":[0.2190664411,0.1942954808,-0.0209079254,-0.3727814853],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.222952351,-0.0005233102,-0.0283635538,-0.0867636576],"action_prob":0.859996438,"action_logp":-0.1508270502,"action_dist_inputs":[0.9066256881,-0.908634603],"value_targets":47.9659461975} +{"eps_id":623467292,"obs":[0.222952351,-0.0005233102,-0.0283635538,-0.0867636576],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.2229418904,0.194993481,-0.0300988276,-0.3882586658],"action_prob":0.3226138949,"action_logp":-1.1312990189,"action_dist_inputs":[0.3715109825,-0.3702741265],"value_targets":47.4403495789} +{"eps_id":623467292,"obs":[0.2229418904,0.194993481,-0.0300988276,-0.3882586658],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.2268417478,0.0003114081,-0.0378639996,-0.1052155271],"action_prob":0.8691294789,"action_logp":-0.1402631998,"action_dist_inputs":[0.9455315471,-0.9477515817],"value_targets":46.9094467163} +{"eps_id":623467292,"obs":[0.2268417478,0.0003114081,-0.0378639996,-0.1052155271],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.2268479764,-0.1942480505,-0.0399683118,0.1752851158],"action_prob":0.7131863236,"action_logp":-0.3380125761,"action_dist_inputs":[0.4559649825,-0.4549447894],"value_targets":46.3731765747} +{"eps_id":623467292,"obs":[0.2268479764,-0.1942480505,-0.0399683118,0.1752851158],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.2229630202,0.0014224426,-0.0364626087,-0.1297337711],"action_prob":0.6378081441,"action_logp":-0.4497177899,"action_dist_inputs":[-0.2808297575,0.2850335538],"value_targets":45.8314933777} +{"eps_id":623467292,"obs":[0.2229630202,0.0014224426,-0.0364626087,-0.1297337711],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.2229914665,0.1970472336,-0.0390572846,-0.433693558],"action_prob":0.2637252808,"action_logp":-1.3328473568,"action_dist_inputs":[0.5137279034,-0.5129675865],"value_targets":45.2843360901} +{"eps_id":623467292,"obs":[0.2229914665,0.1970472336,-0.0390572846,-0.433693558],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.2269324213,0.3926997781,-0.0477311537,-0.7384284139],"action_prob":0.1160281897,"action_logp":-2.153922081,"action_dist_inputs":[1.0139172077,-1.0166748762],"value_targets":44.7316513062} +{"eps_id":623467292,"obs":[0.2269324213,0.3926997781,-0.0477311537,-0.7384284139],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.2347864062,0.1982683092,-0.0624997243,-0.4611410499],"action_prob":0.9250237942,"action_logp":-0.0779358074,"action_dist_inputs":[1.2533626556,-1.2592869997],"value_targets":44.1733856201} +{"eps_id":623467292,"obs":[0.2347864062,0.1982683092,-0.0624997243,-0.4611410499],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.238751784,0.0040828395,-0.0717225447,-0.1887945086],"action_prob":0.8946123123,"action_logp":-0.1113648117,"action_dist_inputs":[1.0677762032,-1.0709685087],"value_targets":43.6094818115} +{"eps_id":623467292,"obs":[0.238751784,0.0040828395,-0.0717225447,-0.1887945086],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.2388334274,-0.1899436116,-0.0754984319,0.0804285631],"action_prob":0.8125636578,"action_logp":-0.2075610459,"action_dist_inputs":[0.7333480716,-0.7334067225],"value_targets":43.0398788452} +{"eps_id":623467292,"obs":[0.2388334274,-0.1899436116,-0.0754984319,0.0804285631],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.2350345552,0.0061748675,-0.0738898665,-0.2350866199],"action_prob":0.413539052,"action_logp":-0.8830033541,"action_dist_inputs":[0.1762984693,-0.1730558127],"value_targets":42.4645233154} +{"eps_id":623467292,"obs":[0.2350345552,0.0061748675,-0.0738898665,-0.2350866199],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.235158056,0.202270478,-0.0785915926,-0.5501312613],"action_prob":0.1645357162,"action_logp":-1.8046276569,"action_dist_inputs":[0.8121201992,-0.8127399087],"value_targets":41.8833580017} +{"eps_id":623467292,"obs":[0.235158056,0.202270478,-0.0785915926,-0.5501312613],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.239203468,0.0083352979,-0.0895942226,-0.2832086086],"action_prob":0.9074441791,"action_logp":-0.0971232504,"action_dist_inputs":[1.1393038034,-1.1435160637],"value_targets":41.2963218689} +{"eps_id":623467292,"obs":[0.239203468,0.0083352979,-0.0895942226,-0.2832086086],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.2393701673,-0.1854021251,-0.0952583924,-0.0200735349],"action_prob":0.8582008481,"action_logp":-0.1529170871,"action_dist_inputs":[0.8995713592,-0.90085572],"value_targets":40.7033538818} +{"eps_id":623467292,"obs":[0.2393701673,-0.1854021251,-0.0952583924,-0.0200735349],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.2356621325,-0.3790380359,-0.0956598669,0.241099894],"action_prob":0.7291559577,"action_logp":-0.3158676624,"action_dist_inputs":[0.4961287975,-0.4942154288],"value_targets":40.1044006348} +{"eps_id":623467292,"obs":[0.2356621325,-0.3790380359,-0.0956598669,0.241099894],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.2280813754,-0.182689026,-0.0908378661,-0.0801582187],"action_prob":0.5461918712,"action_logp":-0.6047849655,"action_dist_inputs":[-0.0900558084,0.0952400118],"value_targets":39.4993934631} +{"eps_id":623467292,"obs":[0.2280813754,-0.182689026,-0.0908378661,-0.0801582187],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.2244275957,-0.3763993084,-0.0924410298,0.1825396121],"action_prob":0.769792676,"action_logp":-0.261634022,"action_dist_inputs":[0.604133606,-0.6030074954],"value_targets":38.8882751465} +{"eps_id":623467292,"obs":[0.2244275957,-0.3763993084,-0.0924410298,0.1825396121],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.2168996036,-0.1800845563,-0.0887902379,-0.137814194],"action_prob":0.4675737023,"action_logp":-0.7601982951,"action_dist_inputs":[0.0672104657,-0.0626771301],"value_targets":38.2709846497} +{"eps_id":623467292,"obs":[0.2168996036,-0.1800845563,-0.0887902379,-0.137814194],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.2132979184,-0.3738299906,-0.0915465206,0.1255890727],"action_prob":0.8010248542,"action_logp":-0.2218632847,"action_dist_inputs":[0.6965261102,-0.6961861849],"value_targets":37.6474609375} +{"eps_id":623467292,"obs":[0.2132979184,-0.3738299906,-0.0915465206,0.1255890727],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.2058213204,-0.5675293207,-0.0890347436,0.3880442977],"action_prob":0.6054160595,"action_logp":-0.5018393993,"action_dist_inputs":[0.2159231901,-0.2121606171],"value_targets":37.0176353455} +{"eps_id":623467292,"obs":[0.2058213204,-0.5675293207,-0.0890347436,0.3880442977],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.1944707334,-0.7612820864,-0.0812738538,0.6513806581],"action_prob":0.3124593794,"action_logp":-1.1632808447,"action_dist_inputs":[-0.3909544349,0.3976920247],"value_targets":36.3814506531} +{"eps_id":623467292,"obs":[0.1944707334,-0.7612820864,-0.0812738538,0.6513806581],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.1792450845,-0.5651278496,-0.0682462454,0.3342523277],"action_prob":0.8702932,"action_logp":-0.1389251351,"action_dist_inputs":[-0.9476340413,0.9559192657],"value_targets":35.7388381958} +{"eps_id":623467292,"obs":[0.1792450845,-0.5651278496,-0.0682462454,0.3342523277],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.167942524,-0.3691043556,-0.061561197,0.0208526962],"action_prob":0.6557988524,"action_logp":-0.4219011366,"action_dist_inputs":[-0.3191636801,0.3254644275],"value_targets":35.0897369385} +{"eps_id":623467292,"obs":[0.167942524,-0.3691043556,-0.061561197,0.0208526962],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.160560444,-0.5632919073,-0.0611441433,0.2934946716],"action_prob":0.6775663495,"action_logp":-0.3892478049,"action_dist_inputs":[0.3724666536,-0.3701434731],"value_targets":34.4340782166} +{"eps_id":623467292,"obs":[0.160560444,-0.5632919073,-0.0611441433,0.2934946716],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.1492945999,-0.3673538864,-0.0552742481,-0.0178282559],"action_prob":0.6137977242,"action_logp":-0.4880898297,"action_dist_inputs":[-0.2287279367,0.23457627],"value_targets":33.7717971802} +{"eps_id":623467292,"obs":[0.1492945999,-0.3673538864,-0.0552742481,-0.0178282559],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.1419475228,-0.1714846045,-0.0556308143,-0.3274258375],"action_prob":0.2940212488,"action_logp":-1.2241032124,"action_dist_inputs":[0.4388089478,-0.4371240437],"value_targets":33.1028251648} +{"eps_id":623467292,"obs":[0.1419475228,-0.1714846045,-0.0556308143,-0.3274258375],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.1385178268,-0.3657722473,-0.0621793307,-0.0527922586],"action_prob":0.8546273708,"action_logp":-0.1570896953,"action_dist_inputs":[0.8846258521,-0.8867397308],"value_targets":32.4270935059} +{"eps_id":623467292,"obs":[0.1385178268,-0.3657722473,-0.0621793307,-0.0527922586],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.1312023848,-0.5599500537,-0.0632351786,0.2196426392],"action_prob":0.7406100035,"action_logp":-0.3002811074,"action_dist_inputs":[0.5250682831,-0.5240731835],"value_targets":31.7445411682} +{"eps_id":623467292,"obs":[0.1312023848,-0.5599500537,-0.0632351786,0.2196426392],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.1200033873,-0.3639838696,-0.0588423237,-0.0922978818],"action_prob":0.507099092,"action_logp":-0.6790488958,"action_dist_inputs":[-0.0118402261,0.016557917],"value_targets":31.0550918579} +{"eps_id":623467292,"obs":[0.1200033873,-0.3639838696,-0.0588423237,-0.0922978818],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.1127237082,-0.5582152009,-0.0606882796,0.1812552363],"action_prob":0.7641223669,"action_logp":-0.2690273225,"action_dist_inputs":[0.5878697634,-0.5875450373],"value_targets":30.3586788177} +{"eps_id":623467292,"obs":[0.1127237082,-0.5582152009,-0.0606882796,0.1812552363],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.101559408,-0.3622797132,-0.0570631735,-0.1299383342],"action_prob":0.4609065354,"action_logp":-0.7745599747,"action_dist_inputs":[0.0803780556,-0.0763155669],"value_targets":29.6552295685} +{"eps_id":623467292,"obs":[0.101559408,-0.3622797132,-0.0570631735,-0.1299383342],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0943138078,-0.1663887352,-0.0596619435,-0.440064013],"action_prob":0.2156355083,"action_logp":-1.53416574,"action_dist_inputs":[0.6454823017,-0.6458020806],"value_targets":28.9446773529} +{"eps_id":623467292,"obs":[0.0943138078,-0.1663887352,-0.0596619435,-0.440064013],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0909860358,-0.3606178164,-0.0684632212,-0.1667689681],"action_prob":0.8784228563,"action_logp":-0.1296271831,"action_dist_inputs":[0.9869667888,-0.9906123281],"value_targets":28.2269458771} +{"eps_id":623467292,"obs":[0.0909860358,-0.3606178164,-0.0684632212,-0.1667689681],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.08377368,-0.5546963811,-0.0717986003,0.1035543904],"action_prob":0.8079989552,"action_logp":-0.2131945044,"action_dist_inputs":[0.7180247903,-0.719035089],"value_targets":27.5019664764} +{"eps_id":623467292,"obs":[0.08377368,-0.5546963811,-0.0717986003,0.1035543904],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0726797506,-0.358622849,-0.0697275102,-0.2108888179],"action_prob":0.3551630974,"action_logp":-1.0351781845,"action_dist_inputs":[0.2994470596,-0.2969733477],"value_targets":26.7696628571} +{"eps_id":623467292,"obs":[0.0726797506,-0.358622849,-0.0697275102,-0.2108888179],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0655072927,-0.1625767797,-0.0739452913,-0.5247265697],"action_prob":0.1750142574,"action_logp":-1.7428878546,"action_dist_inputs":[0.7744029164,-0.7760958672],"value_targets":26.0299625397} +{"eps_id":623467292,"obs":[0.0655072927,-0.1625767797,-0.0739452913,-0.5247265697],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0622557588,-0.3565844893,-0.0844398215,-0.2562304139],"action_prob":0.8912913799,"action_logp":-0.1150838882,"action_dist_inputs":[1.0496397018,-1.0543601513],"value_targets":25.2827911377} +{"eps_id":623467292,"obs":[0.0622557588,-0.3565844893,-0.0844398215,-0.2562304139],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0551240705,-0.5504056811,-0.0895644277,0.0086697629],"action_prob":0.8431787491,"action_logp":-0.170576334,"action_dist_inputs":[0.8398319483,-0.8422404528],"value_targets":24.5280704498} +{"eps_id":623467292,"obs":[0.0551240705,-0.5504056811,-0.0895644277,0.0086697629],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0441159569,-0.3541210294,-0.0893910378,-0.3108738661],"action_prob":0.2614606619,"action_logp":-1.3414714336,"action_dist_inputs":[0.5194755793,-0.518914938],"value_targets":23.7657279968} +{"eps_id":623467292,"obs":[0.0441159569,-0.3541210294,-0.0893910378,-0.3108738661],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0370335355,-0.547863245,-0.0956085101,-0.0476668626],"action_prob":0.8565312028,"action_logp":-0.1548645198,"action_dist_inputs":[0.8918253779,-0.8949475288],"value_targets":22.9956855774} +{"eps_id":623467292,"obs":[0.0370335355,-0.547863245,-0.0956085101,-0.0476668626],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0260762703,-0.7414934635,-0.0965618491,0.2133859098],"action_prob":0.7732492089,"action_logp":-0.2571538985,"action_dist_inputs":[0.6131637096,-0.6135860085],"value_targets":22.2178649902} +{"eps_id":623467292,"obs":[0.0260762703,-0.7414934635,-0.0965618491,0.2133859098],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0112464009,-0.9351117611,-0.0922941342,0.4741153121],"action_prob":0.6086108685,"action_logp":-0.4965761602,"action_dist_inputs":[0.2222517431,-0.2192250192],"value_targets":21.4321861267} +{"eps_id":623467292,"obs":[0.0112464009,-0.9351117611,-0.0922941342,0.4741153121],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0074558342,-0.7388159037,-0.082811825,0.1538287252],"action_prob":0.6284353733,"action_logp":-0.4645220935,"action_dist_inputs":[-0.259565413,0.2659449577],"value_targets":20.6385707855} +{"eps_id":623467292,"obs":[-0.0074558342,-0.7388159037,-0.082811825,0.1538287252],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0222321525,-0.542611897,-0.0797352493,-0.1637876183],"action_prob":0.3543245792,"action_logp":-1.0375418663,"action_dist_inputs":[0.3010689616,-0.2990146875],"value_targets":19.8369407654} +{"eps_id":623467292,"obs":[-0.0222321525,-0.542611897,-0.0797352493,-0.1637876183],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0330843888,-0.736507237,-0.0830110013,0.102714017],"action_prob":0.8115412593,"action_logp":-0.2088200748,"action_dist_inputs":[0.7289867997,-0.731069088],"value_targets":19.0272140503} +{"eps_id":623467292,"obs":[-0.0330843888,-0.736507237,-0.0830110013,0.102714017],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0478145331,-0.9303475022,-0.0809567198,0.3680959642],"action_prob":0.6862262487,"action_logp":-0.3765479028,"action_dist_inputs":[0.3918068707,-0.3907281756],"value_targets":18.2093067169} +{"eps_id":623467292,"obs":[-0.0478145331,-0.9303475022,-0.0809567198,0.3680959642],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0664214864,-1.1242313385,-0.0735948011,0.6341940165],"action_prob":0.4743695557,"action_logp":-0.7457686067,"action_dist_inputs":[-0.0489566922,0.0536549576],"value_targets":17.3831367493} +{"eps_id":623467292,"obs":[-0.0664214864,-1.1242313385,-0.0735948011,0.6341940165],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0889061093,-1.3182537556,-0.0609109215,0.9028224945],"action_prob":0.251740396,"action_logp":-1.3793568611,"action_dist_inputs":[-0.5408849716,0.5484665632],"value_targets":16.5486240387} +{"eps_id":623467292,"obs":[-0.0889061093,-1.3182537556,-0.0609109215,0.9028224945],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1152711883,-1.1223618984,-0.042854473,0.5916329026],"action_prob":0.8773613572,"action_logp":-0.1308363527,"action_dist_inputs":[-0.9793025255,0.9883742332],"value_targets":15.7056808472} +{"eps_id":623467292,"obs":[-0.1152711883,-1.1223618984,-0.042854473,0.5916329026],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1377184242,-1.31685853,-0.0310218148,0.8705146313],"action_prob":0.2461725771,"action_logp":-1.4017224312,"action_dist_inputs":[-0.5559570193,0.5631736517],"value_targets":14.8542232513} +{"eps_id":623467292,"obs":[-0.1377184242,-1.31685853,-0.0310218148,0.8705146313],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1640556008,-1.1213287115,-0.0136115225,0.5682420135],"action_prob":0.8842411041,"action_logp":-0.1230254993,"action_dist_inputs":[-1.0121809244,1.0210391283],"value_targets":13.9941644669} +{"eps_id":623467292,"obs":[-0.1640556008,-1.1213287115,-0.0136115225,0.5682420135],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1864821762,-0.9260184765,-0.0022466825,0.2713021636],"action_prob":0.7747859955,"action_logp":-0.2551684082,"action_dist_inputs":[-0.6142805219,0.6212553978],"value_targets":13.125418663} +{"eps_id":623467292,"obs":[-0.1864821762,-0.9260184765,-0.0022466825,0.2713021636],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2050025463,-0.7308645844,0.0031793609,-0.0220885277],"action_prob":0.553894341,"action_logp":-0.5907813311,"action_dist_inputs":[-0.1064732969,0.1099448204],"value_targets":12.2478981018} +{"eps_id":623467292,"obs":[-0.2050025463,-0.7308645844,0.0031793609,-0.0220885277],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2196198255,-0.9260319471,0.0027375903,0.2715958357],"action_prob":0.6894031763,"action_logp":-0.3719290495,"action_dist_inputs":[0.3982427418,-0.3990878463],"value_targets":11.3615131378} +{"eps_id":623467292,"obs":[-0.2196198255,-0.9260319471,0.0027375903,0.2715958357],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2381404638,-0.7309491634,0.0081695067,-0.0202223975],"action_prob":0.5666710138,"action_logp":-0.5679763556,"action_dist_inputs":[-0.1324962676,0.1357854903],"value_targets":10.4661741257} +{"eps_id":623467292,"obs":[-0.2381404638,-0.7309491634,0.0081695067,-0.0202223975],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2527594566,-0.5359453559,0.0077650589,-0.3103165925],"action_prob":0.3184634149,"action_logp":-1.1442476511,"action_dist_inputs":[0.3799185753,-0.3809236586],"value_targets":9.5617923737} +{"eps_id":623467292,"obs":[-0.2527594566,-0.5359453559,0.0077650589,-0.3103165925],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2634783685,-0.3409348726,0.001558727,-0.6005405784],"action_prob":0.1699109226,"action_logp":-1.7724809647,"action_dist_inputs":[0.790923059,-0.7953355908],"value_targets":8.6482753754} +{"eps_id":623467292,"obs":[-0.2634783685,-0.3409348726,0.001558727,-0.6005405784],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2702970505,-0.1458347589,-0.0104520842,-0.8927320838],"action_prob":0.1053971872,"action_logp":-2.2500193119,"action_dist_inputs":[1.0659874678,-1.0726561546],"value_targets":7.7255306244} +{"eps_id":623467292,"obs":[-0.2702970505,-0.1458347589,-0.0104520842,-0.8927320838],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2732137442,-0.3408133984,-0.0283067264,-0.6033530235],"action_prob":0.9224487543,"action_logp":-0.0807234347,"action_dist_inputs":[1.2337889671,-1.2423046827],"value_targets":6.7934651375} +{"eps_id":623467292,"obs":[-0.2732137442,-0.3408133984,-0.0283067264,-0.6033530235],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2800300121,-0.5355282426,-0.0403737873,-0.3197188079],"action_prob":0.9001438618,"action_logp":-0.105200693,"action_dist_inputs":[1.0959368944,-1.1028864384],"value_targets":5.8519849777} +{"eps_id":623467292,"obs":[-0.2800300121,-0.5355282426,-0.0403737873,-0.3197188079],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2907405794,-0.73005265,-0.0467681624,-0.0400366485],"action_prob":0.8528547883,"action_logp":-0.1591659933,"action_dist_inputs":[0.8760673404,-0.8811022639],"value_targets":4.9009947777} +{"eps_id":623467292,"obs":[-0.2907405794,-0.73005265,-0.0467681624,-0.0400366485],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.3053416312,-0.5342923403,-0.0475688949,-0.3471008837],"action_prob":0.2517757118,"action_logp":-1.379216671,"action_dist_inputs":[0.5434217453,-0.5457425714],"value_targets":3.9403989315} +{"eps_id":623467292,"obs":[-0.3053416312,-0.5342923403,-0.0475688949,-0.3471008837],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.3160274923,-0.7287065387,-0.0545109138,-0.0697894394],"action_prob":0.8615980148,"action_logp":-0.1489664465,"action_dist_inputs":[0.9115990996,-0.9170272946],"value_targets":2.970099926} +{"eps_id":623467292,"obs":[-0.3160274923,-0.7287065387,-0.0545109138,-0.0697894394],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.3306016028,-0.9230062962,-0.0559067018,0.2052090615],"action_prob":0.7695608139,"action_logp":-0.2619352639,"action_dist_inputs":[0.6014669538,-0.6043663025],"value_targets":1.9900000095} +{"eps_id":623467292,"obs":[-0.3306016028,-0.9230062962,-0.0559067018,0.2052090615],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":true,"new_obs":[-0.3490617275,-1.1172860861,-0.0518025197,0.4797453582],"action_prob":0.5902343988,"action_logp":-0.5272355676,"action_dist_inputs":[0.1827456951,-0.1821885407],"value_targets":1.0} +{"eps_id":1628128689,"obs":[-0.0457435101,0.0292170346,0.0443500988,0.0220946781],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":0.0,"dones":false,"new_obs":[-0.0451591685,0.2236758322,0.0447919928,-0.2562720478],"action_prob":0.6776459813,"action_logp":-0.3891302943,"action_dist_inputs":[-0.3715580702,0.3714165688],"value_targets":86.6020355225} +{"eps_id":1628128689,"obs":[-0.0451591685,0.2236758322,0.0447919928,-0.2562720478],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0406856537,0.0279439352,0.0396665521,0.0501957648],"action_prob":0.7042381763,"action_logp":-0.350638628,"action_dist_inputs":[0.4321121871,-0.4354502261],"value_targets":86.4666976929} +{"eps_id":1628128689,"obs":[-0.0406856537,0.0279439352,0.0396665521,0.0501957648],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0401267745,0.2224753201,0.0406704657,-0.2297128737],"action_prob":0.7068279982,"action_logp":-0.3469679058,"action_dist_inputs":[-0.4399451613,0.4400826395],"value_targets":86.3300018311} +{"eps_id":1628128689,"obs":[-0.0401267745,0.2224753201,0.0406704657,-0.2297128737],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0356772654,0.4169932008,0.0360762104,-0.5092945695],"action_prob":0.3256018162,"action_logp":-1.1220800877,"action_dist_inputs":[0.3625103235,-0.3656351864],"value_targets":86.1919174194} +{"eps_id":1628128689,"obs":[-0.0356772654,0.4169932008,0.0360762104,-0.5092945695],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0273374021,0.6115888357,0.0258903168,-0.7903940082],"action_prob":0.116502203,"action_logp":-2.1498451233,"action_dist_inputs":[1.0101794004,-1.0157991648],"value_targets":86.052444458} +{"eps_id":1628128689,"obs":[-0.0273374021,0.6115888357,0.0258903168,-0.7903940082],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0151056265,0.4161210954,0.0100824377,-0.4896797538],"action_prob":0.9376012683,"action_logp":-0.0644304976,"action_dist_inputs":[1.3510843515,-1.3586962223],"value_targets":85.9115600586} +{"eps_id":1628128689,"obs":[-0.0151056265,0.4161210954,0.0100824377,-0.4896797538],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0067832042,0.2208583653,0.0002888425,-0.1938363761],"action_prob":0.8909184337,"action_logp":-0.1155024245,"action_dist_inputs":[1.0473155975,-1.0528414249],"value_targets":85.7692489624} +{"eps_id":1628128689,"obs":[-0.0067832042,0.2208583653,0.0002888425,-0.1938363761],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0023660371,0.0257322863,-0.0035878848,0.0989376605],"action_prob":0.7043957114,"action_logp":-0.3504149616,"action_dist_inputs":[0.4327479005,-0.4355707467],"value_targets":85.62550354} +{"eps_id":1628128689,"obs":[-0.0023660371,0.0257322863,-0.0035878848,0.0989376605],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0018513913,-0.1693380624,-0.0016091317,0.390486449],"action_prob":0.3066785634,"action_logp":-1.1819550991,"action_dist_inputs":[-0.4075514674,0.4081420898],"value_targets":85.4803085327} +{"eps_id":1628128689,"obs":[-0.0018513913,-0.1693380624,-0.0016091317,0.390486449],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0052381526,0.0258066915,0.0062005976,0.0972966254],"action_prob":0.8867581487,"action_logp":-0.120182991,"action_dist_inputs":[-1.0271314383,1.0309151411],"value_targets":85.3336486816} +{"eps_id":1628128689,"obs":[-0.0052381526,0.0258066915,0.0062005976,0.0972966254],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0047220187,-0.1694035828,0.00814653,0.3919293582],"action_prob":0.2912259698,"action_logp":-1.2336558104,"action_dist_inputs":[-0.4443987608,0.445038408],"value_targets":85.1855010986} +{"eps_id":1628128689,"obs":[-0.0047220187,-0.1694035828,0.00814653,0.3919293582],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0081100902,0.0256018192,0.0159851182,0.101826027],"action_prob":0.8914640546,"action_logp":-0.1148901731,"action_dist_inputs":[-1.0509567261,1.054826498],"value_targets":85.0358581543} +{"eps_id":1628128689,"obs":[-0.0081100902,0.0256018192,0.0159851182,0.101826027],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0075980537,0.2204910815,0.0180216376,-0.1857710928],"action_prob":0.7307191491,"action_logp":-0.3137261271,"action_dist_inputs":[-0.4987615943,0.4995124638],"value_targets":84.8847045898} +{"eps_id":1628128689,"obs":[-0.0075980537,0.2204910815,0.0180216376,-0.1857710928],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0031882322,0.0251159761,0.0143062165,0.1125420555],"action_prob":0.6565135717,"action_logp":-0.4208119214,"action_dist_inputs":[0.3225815296,-0.3252142668],"value_targets":84.7320251465} +{"eps_id":1628128689,"obs":[-0.0031882322,0.0251159761,0.0143062165,0.1125420555],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0026859127,0.2200300395,0.0165570583,-0.175593242],"action_prob":0.7403929234,"action_logp":-0.3005742729,"action_dist_inputs":[-0.5235601068,0.524451673],"value_targets":84.5778045654} +{"eps_id":1628128689,"obs":[-0.0026859127,0.2200300395,0.0165570583,-0.175593242],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.001714688,0.0246750973,0.0130451927,0.1222665608],"action_prob":0.643881619,"action_logp":-0.440240413,"action_dist_inputs":[0.2948690355,-0.2973825336],"value_targets":84.4220275879} +{"eps_id":1628128689,"obs":[0.001714688,0.0246750973,0.0130451927,0.1222665608],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.00220819,0.2196077406,0.0154905235,-0.1662723571],"action_prob":0.7492189407,"action_logp":-0.288724035,"action_dist_inputs":[-0.5467122197,0.5477387905],"value_targets":84.2646713257} +{"eps_id":1628128689,"obs":[0.00220819,0.2196077406,0.0154905235,-0.1662723571],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.006600345,0.4145045578,0.012165077,-0.4540284872],"action_prob":0.3685211241,"action_logp":-0.9982572794,"action_dist_inputs":[0.2680848539,-0.2704816759],"value_targets":84.1057281494} +{"eps_id":1628128689,"obs":[0.006600345,0.4145045578,0.012165077,-0.4540284872],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0148904361,0.6094524264,0.0030845073,-0.7428521514],"action_prob":0.1256969869,"action_logp":-2.0738811493,"action_dist_inputs":[0.96721977,-0.972333014],"value_targets":83.9451828003} +{"eps_id":1628128689,"obs":[0.0148904361,0.6094524264,0.0030845073,-0.7428521514],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0270794854,0.4142880142,-0.0117725357,-0.4492001235],"action_prob":0.9357882738,"action_logp":-0.0663660541,"action_dist_inputs":[1.335960269,-1.3432420492],"value_targets":83.7830123901} +{"eps_id":1628128689,"obs":[0.0270794854,0.4142880142,-0.0117725357,-0.4492001235],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0353652462,0.6095744967,-0.020756539,-0.7455706],"action_prob":0.1129634455,"action_logp":-2.1806910038,"action_dist_inputs":[1.0278689861,-1.0329529047],"value_targets":83.6192016602} +{"eps_id":1628128689,"obs":[0.0353652462,0.6095744967,-0.020756539,-0.7455706],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0475567356,0.4147450328,-0.0356679484,-0.4594913125],"action_prob":0.9394389987,"action_logp":-0.0624723695,"action_dist_inputs":[1.3671290874,-1.3745038509],"value_targets":83.453742981} +{"eps_id":1628128689,"obs":[0.0475567356,0.4147450328,-0.0356679484,-0.4594913125],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0558516346,0.2201449275,-0.0448577777,-0.178260982],"action_prob":0.9018676877,"action_logp":-0.1032874286,"action_dist_inputs":[1.1064919233,-1.1116598845],"value_targets":83.286605835} +{"eps_id":1628128689,"obs":[0.0558516346,0.2201449275,-0.0448577777,-0.178260982],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0602545328,0.0256926604,-0.048422996,0.0999402404],"action_prob":0.7639594078,"action_logp":-0.2692406476,"action_dist_inputs":[0.5860506892,-0.5884600282],"value_targets":83.1177825928} +{"eps_id":1628128689,"obs":[0.0602545328,0.0256926604,-0.048422996,0.0999402404],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0607683882,0.2214739472,-0.0464241914,-0.2076182514],"action_prob":0.5988256931,"action_logp":-0.5127847195,"action_dist_inputs":[-0.199865073,0.2007095516],"value_targets":82.9472579956} +{"eps_id":1628128689,"obs":[0.0607683882,0.2214739472,-0.0464241914,-0.2076182514],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0651978627,0.0270455182,-0.0505765565,0.0700663626],"action_prob":0.7955265641,"action_logp":-0.2287510484,"action_dist_inputs":[0.6779822707,-0.6805841327],"value_targets":82.7750091553} +{"eps_id":1628128689,"obs":[0.0651978627,0.0270455182,-0.0505765565,0.0700663626],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0657387748,-0.1673162282,-0.0491752289,0.3463729918],"action_prob":0.4587254822,"action_logp":-0.7793033123,"action_dist_inputs":[-0.0824317783,0.0830428004],"value_targets":82.601020813} +{"eps_id":1628128689,"obs":[0.0657387748,-0.1673162282,-0.0491752289,0.3463729918],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0623924509,0.0284694694,-0.0422477685,0.0385977887],"action_prob":0.8367539644,"action_logp":-0.1782252342,"action_dist_inputs":[-0.8153170347,0.8189541101],"value_targets":82.4252700806} +{"eps_id":1628128689,"obs":[0.0623924509,0.0284694694,-0.0422477685,0.0385977887],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0629618391,-0.1660219878,-0.0414758138,0.3176575005],"action_prob":0.4949764907,"action_logp":-0.7032449841,"action_dist_inputs":[-0.0098601067,0.0102347229],"value_targets":82.2477493286} +{"eps_id":1628128689,"obs":[0.0629618391,-0.1660219878,-0.0414758138,0.3176575005],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0596414022,0.0296654105,-0.0351226628,0.0121884523],"action_prob":0.8245001435,"action_logp":-0.19297795,"action_dist_inputs":[-0.7718393207,0.7752997279],"value_targets":82.0684280396} +{"eps_id":1628128689,"obs":[0.0596414022,0.0296654105,-0.0351226628,0.0121884523],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0602347106,-0.1649356931,-0.0348788947,0.2935861349],"action_prob":0.5251702666,"action_logp":-0.6440327168,"action_dist_inputs":[0.0504697897,-0.0502964929],"value_targets":81.8873062134} +{"eps_id":1628128689,"obs":[0.0602347106,-0.1649356931,-0.0348788947,0.2935861349],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0569359958,0.0306657329,-0.0290071722,-0.0098898634],"action_prob":0.8131427765,"action_logp":-0.206848532,"action_dist_inputs":[-0.7336272597,0.7369349003],"value_targets":81.7043457031} +{"eps_id":1628128689,"obs":[0.0569359958,0.0306657329,-0.0290071722,-0.0098898634],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.057549309,0.2261914164,-0.0292049684,-0.3115819097],"action_prob":0.4502158761,"action_logp":-0.7980281115,"action_dist_inputs":[0.0999002606,-0.0998982713],"value_targets":81.5195465088} +{"eps_id":1628128689,"obs":[0.057549309,0.2261914164,-0.0292049684,-0.3115819097],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0620731376,0.4217170179,-0.0354366079,-0.6133303046],"action_prob":0.1478369683,"action_logp":-1.911645174,"action_dist_inputs":[0.8741773367,-0.8774904609],"value_targets":81.3328704834} +{"eps_id":1628128689,"obs":[0.0620731376,0.4217170179,-0.0354366079,-0.6133303046],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0705074817,0.2271077186,-0.047703214,-0.3320157826],"action_prob":0.9281150103,"action_logp":-0.0745996088,"action_dist_inputs":[1.2759716511,-1.2821166515],"value_targets":81.144317627} +{"eps_id":1628128689,"obs":[0.0705074817,0.2271077186,-0.047703214,-0.3320157826],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0750496313,0.0326960795,-0.054343529,-0.0547495],"action_prob":0.8735136986,"action_logp":-0.1352314651,"action_dist_inputs":[0.9644296765,-0.9679601789],"value_targets":80.9538574219} +{"eps_id":1628128689,"obs":[0.0750496313,0.0326960795,-0.054343529,-0.0547495],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0757035539,-0.1616062522,-0.0554385185,0.2203046083],"action_prob":0.6785609126,"action_logp":-0.3877810538,"action_dist_inputs":[0.3733687699,-0.3737974167],"value_targets":80.76146698} +{"eps_id":1628128689,"obs":[0.0757035539,-0.1616062522,-0.0554385185,0.2203046083],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0724714324,-0.3558937311,-0.0510324277,0.4949975312],"action_prob":0.2999658585,"action_logp":-1.2040866613,"action_dist_inputs":[-0.4223498106,0.425110817],"value_targets":80.5671386719} +{"eps_id":1628128689,"obs":[0.0724714324,-0.3558937311,-0.0510324277,0.4949975312],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0653535575,-0.1600906551,-0.0411324762,0.1866782457],"action_prob":0.8900613189,"action_logp":-0.1164649278,"action_dist_inputs":[-1.0430409908,1.0483266115],"value_targets":80.3708496094} +{"eps_id":1628128689,"obs":[0.0653535575,-0.1600906551,-0.0411324762,0.1866782457],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0621517412,0.0355949253,-0.037398912,-0.1186913922],"action_prob":0.6803021431,"action_logp":-0.3852182329,"action_dist_inputs":[-0.3763247132,0.3788360357],"value_targets":80.1725769043} +{"eps_id":1628128689,"obs":[0.0621517412,0.0355949253,-0.037398912,-0.1186913922],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0628636405,-0.1589717865,-0.0397727378,0.1619618684],"action_prob":0.7312480211,"action_logp":-0.3130026162,"action_dist_inputs":[0.4999799132,-0.5009837747],"value_targets":79.9722976685} +{"eps_id":1628128689,"obs":[0.0628636405,-0.1589717865,-0.0397727378,0.1619618684],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0596842058,0.0366963148,-0.036533501,-0.1429982632],"action_prob":0.6450493336,"action_logp":-0.4384284616,"action_dist_inputs":[-0.297521472,0.2998265624],"value_targets":79.7699966431} +{"eps_id":1628128689,"obs":[0.0596842058,0.0366963148,-0.036533501,-0.1429982632],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0604181327,-0.1578838974,-0.039393466,0.1379388273],"action_prob":0.7565790415,"action_logp":-0.2789482474,"action_dist_inputs":[0.5663805604,-0.5676341653],"value_targets":79.5656509399} +{"eps_id":1628128689,"obs":[0.0604181327,-0.1578838974,-0.039393466,0.1379388273],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0572604537,-0.3524201214,-0.0366346911,0.4179381132],"action_prob":0.3936288953,"action_logp":-0.9323467016,"action_dist_inputs":[-0.2149965018,0.2170870751],"value_targets":79.3592453003} +{"eps_id":1628128689,"obs":[0.0572604537,-0.3524201214,-0.0366346911,0.4179381132],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0502120517,-0.1567986906,-0.0282759275,0.113934651],"action_prob":0.8647680283,"action_logp":-0.1452939659,"action_dist_inputs":[-0.9253304601,0.9301396012],"value_targets":79.1507568359} +{"eps_id":1628128689,"obs":[0.0502120517,-0.1567986906,-0.0282759275,0.113934651],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0470760763,-0.3515042961,-0.0259972345,0.3975643218],"action_prob":0.4082270563,"action_logp":-0.8959317207,"action_dist_inputs":[-0.1847119778,0.1865874827],"value_targets":78.9401550293} +{"eps_id":1628128689,"obs":[0.0470760763,-0.3515042961,-0.0259972345,0.3975643218],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0400459915,-0.1560233682,-0.0180459488,0.0967996493],"action_prob":0.8620647192,"action_logp":-0.1484249085,"action_dist_inputs":[-0.9139364362,0.9186093807],"value_targets":78.727432251} +{"eps_id":1628128689,"obs":[0.0400459915,-0.1560233682,-0.0180459488,0.0967996493],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0369255245,-0.3508820832,-0.0161099564,0.3837349117],"action_prob":0.4132179022,"action_logp":-0.883780241,"action_dist_inputs":[-0.174485147,0.1761933863],"value_targets":78.5125579834} +{"eps_id":1628128689,"obs":[0.0369255245,-0.3508820832,-0.0161099564,0.3837349117],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0299078822,-0.1555351615,-0.0084352577,0.0860164315],"action_prob":0.8623383045,"action_logp":-0.1481076181,"action_dist_inputs":[-0.9151405096,0.9197080731],"value_targets":78.2955093384} +{"eps_id":1628128689,"obs":[0.0299078822,-0.1555351615,-0.0084352577,0.0860164315],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0267971791,-0.3505351841,-0.0067149289,0.3760261238],"action_prob":0.4086735249,"action_logp":-0.8948386908,"action_dist_inputs":[-0.1839311868,0.1855204403],"value_targets":78.0762710571} +{"eps_id":1628128689,"obs":[0.0267971791,-0.3505351841,-0.0067149289,0.3760261238],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0197864752,-0.1553185135,0.0008055939,0.0812335387],"action_prob":0.8655498624,"action_logp":-0.1443902999,"action_dist_inputs":[-0.9288383126,0.9333334565],"value_targets":77.8548202515} +{"eps_id":1628128689,"obs":[0.0197864752,-0.1553185135,0.0008055939,0.0812335387],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0166801047,0.0397918858,0.0024302646,-0.2111951113],"action_prob":0.6052923203,"action_logp":-0.5020437837,"action_dist_inputs":[-0.2130237371,0.2145421654],"value_targets":77.6311340332} +{"eps_id":1628128689,"obs":[0.0166801047,0.0397918858,0.0024302646,-0.2111951113],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0174759421,-0.155364722,-0.0017936375,0.0822534412],"action_prob":0.7714922428,"action_logp":-0.2594286501,"action_dist_inputs":[0.6073924303,-0.6093638539],"value_targets":77.4051818848} +{"eps_id":1628128689,"obs":[0.0174759421,-0.155364722,-0.0017936375,0.0822534412],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0143686486,0.0397828892,-0.0001485687,-0.2109948397],"action_prob":0.6011960506,"action_logp":-0.5088341832,"action_dist_inputs":[-0.2044848502,0.2059662938],"value_targets":77.1769561768} +{"eps_id":1628128689,"obs":[0.0143686486,0.0397828892,-0.0001485687,-0.2109948397],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0151643064,-0.1553369313,-0.0043684654,0.0816412196],"action_prob":0.7747705579,"action_logp":-0.2551883757,"action_dist_inputs":[0.6167138219,-0.6187335849],"value_targets":76.9464187622} +{"eps_id":1628128689,"obs":[0.0151643064,-0.1553369313,-0.0043684654,0.0816412196],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.012057567,0.0398473628,-0.0027356411,-0.2124167681],"action_prob":0.5943266749,"action_logp":-0.5203261971,"action_dist_inputs":[-0.1902251244,0.191655755],"value_targets":76.7135543823} +{"eps_id":1628128689,"obs":[0.012057567,0.0398473628,-0.0027356411,-0.2124167681],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0128545146,-0.155235365,-0.0069839763,0.0794019625],"action_prob":0.7794829011,"action_logp":-0.2491245121,"action_dist_inputs":[0.6302861571,-0.6323696375],"value_targets":76.4783401489} +{"eps_id":1628128689,"obs":[0.0128545146,-0.155235365,-0.0069839763,0.0794019625],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0097498074,0.0399859995,-0.0053959372,-0.2154762149],"action_prob":0.5844908357,"action_logp":-0.5370141268,"action_dist_inputs":[-0.1699356437,0.171300903],"value_targets":76.2407455444} +{"eps_id":1628128689,"obs":[0.0097498074,0.0399859995,-0.0053959372,-0.2154762149],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0105495276,-0.1550583988,-0.0097054616,0.0754997134],"action_prob":0.7856011391,"action_logp":-0.2413060814,"action_dist_inputs":[0.6482236981,-0.6503875852],"value_targets":76.0007553101} +{"eps_id":1628128689,"obs":[0.0105495276,-0.1550583988,-0.0097054616,0.0754997134],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0074483594,-0.3500398695,-0.0081954673,0.3651047945],"action_prob":0.4285895228,"action_logp":-0.8472556472,"action_dist_inputs":[-0.1431621015,0.1444461495],"value_targets":75.7583389282} +{"eps_id":1628128689,"obs":[0.0074483594,-0.3500398695,-0.0081954673,0.3651047945],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0004475618,-0.1548024267,-0.0008933712,0.0698490068],"action_prob":0.8579099774,"action_logp":-0.1532560736,"action_dist_inputs":[-0.8969011307,0.9011372924],"value_targets":75.5134735107} +{"eps_id":1628128689,"obs":[0.0004475618,-0.1548024267,-0.0008933712,0.0698490068],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0026484865,-0.3499115407,0.000503609,0.3622499406],"action_prob":0.4169254303,"action_logp":-0.8748478889,"action_dist_inputs":[-0.1671024561,0.1683052778],"value_targets":75.26612854} +{"eps_id":1628128689,"obs":[-0.0026484865,-0.3499115407,0.000503609,0.3622499406],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0096467175,-0.1547967643,0.0077486075,0.0697258562],"action_prob":0.8632657528,"action_logp":-0.147032693,"action_dist_inputs":[-0.9192476273,0.92343539],"value_targets":75.0162963867} +{"eps_id":1628128689,"obs":[-0.0096467175,-0.1547967643,0.0077486075,0.0697258562],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0127426526,0.0402132459,0.0091431253,-0.220502317],"action_prob":0.6037168503,"action_logp":-0.5046499968,"action_dist_inputs":[-0.2099041641,0.2110721469],"value_targets":74.7639312744} +{"eps_id":1628128689,"obs":[-0.0127426526,0.0402132459,0.0091431253,-0.220502317],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0119383875,-0.1550381929,0.0047330786,0.075050652],"action_prob":0.7719073892,"action_logp":-0.2588906884,"action_dist_inputs":[0.6084064245,-0.6107066274],"value_targets":74.5090255737} +{"eps_id":1628128689,"obs":[-0.0119383875,-0.1550381929,0.0047330786,0.075050652],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0150391515,0.0400155857,0.0062340917,-0.2161352187],"action_prob":0.606005609,"action_logp":-0.5008660555,"action_dist_inputs":[-0.2146938741,0.2158585489],"value_targets":74.2515411377} +{"eps_id":1628128689,"obs":[-0.0150391515,0.0400155857,0.0062340917,-0.2161352187],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.01423884,0.2350478619,0.0019113872,-0.5068451166],"action_prob":0.2283374071,"action_logp":-1.4769308567,"action_dist_inputs":[0.6077055335,-0.6100173593],"value_targets":73.9914550781} +{"eps_id":1628128689,"obs":[-0.01423884,0.2350478619,0.0019113872,-0.5068451166],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0095378831,0.0398990288,-0.0082255155,-0.2135604769],"action_prob":0.9058454633,"action_logp":-0.0988865793,"action_dist_inputs":[1.1293435097,-1.1345880032],"value_targets":73.7287445068} +{"eps_id":1628128689,"obs":[-0.0095378831,0.0398990288,-0.0082255155,-0.2135604769],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0087399026,-0.1551043689,-0.0124967247,0.0765164346],"action_prob":0.7879093885,"action_logp":-0.2383721918,"action_dist_inputs":[0.6549972296,-0.6573719978],"value_targets":73.4633712769} +{"eps_id":1628128689,"obs":[-0.0087399026,-0.1551043689,-0.0124967247,0.0765164346],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0118419891,0.0401944891,-0.010966396,-0.2200829089],"action_prob":0.5679828525,"action_logp":-0.565664053,"action_dist_inputs":[-0.1362795234,0.137346372],"value_targets":73.1953277588} +{"eps_id":1628128689,"obs":[-0.0118419891,0.0401944891,-0.010966396,-0.2200829089],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0110380994,-0.1547690034,-0.0153680546,0.0691206902],"action_prob":0.7967565656,"action_logp":-0.2272060513,"action_dist_inputs":[0.6818282008,-0.6843166947],"value_targets":72.9245758057} +{"eps_id":1628128689,"obs":[-0.0110380994,-0.1547690034,-0.0153680546,0.0691206902],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0141334794,0.0405698679,-0.0139856404,-0.228371039],"action_prob":0.5482252836,"action_logp":-0.6010689735,"action_dist_inputs":[-0.0962752104,0.0972276032],"value_targets":72.6510848999} +{"eps_id":1628128689,"obs":[-0.0141334794,0.0405698679,-0.0139856404,-0.228371039],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0133220823,-0.1543494612,-0.0185530614,0.0598677061],"action_prob":0.8067517877,"action_logp":-0.2147392631,"action_dist_inputs":[0.7132091522,-0.7158313394],"value_targets":72.3748321533} +{"eps_id":1628128689,"obs":[-0.0133220823,-0.1543494612,-0.0185530614,0.0598677061],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0164090712,-0.3492005467,-0.0173557065,0.3466397524],"action_prob":0.4757308066,"action_logp":-0.7429031134,"action_dist_inputs":[-0.0481683984,0.0489848219],"value_targets":72.0957946777} +{"eps_id":1628128689,"obs":[-0.0164090712,-0.3492005467,-0.0173557065,0.3466397524],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0233930834,-0.1538360864,-0.0104229124,0.0485348664],"action_prob":0.8371956944,"action_logp":-0.1776974201,"action_dist_inputs":[-0.816840589,0.8206683993],"value_targets":71.8139266968} +{"eps_id":1628128689,"obs":[-0.0233930834,-0.1538360864,-0.0104229124,0.0485348664],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0264698043,0.0414337628,-0.0094522154,-0.2474182695],"action_prob":0.5243900418,"action_logp":-0.645519495,"action_dist_inputs":[-0.0484793968,0.0491582453],"value_targets":71.5292205811} +{"eps_id":1628128689,"obs":[-0.0264698043,0.0414337628,-0.0094522154,-0.2474182695],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0256411284,-0.1535519361,-0.01440058,0.0422682911],"action_prob":0.8162927628,"action_logp":-0.2029822171,"action_dist_inputs":[0.7442734838,-0.7471562028],"value_targets":71.2416381836} +{"eps_id":1628128689,"obs":[-0.0256411284,-0.1535519361,-0.01440058,0.0422682911],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0287121683,-0.3484644592,-0.0135552147,0.3303731382],"action_prob":0.4961770773,"action_logp":-0.700822413,"action_dist_inputs":[-0.0073654535,0.0079264548],"value_targets":70.9511489868} +{"eps_id":1628128689,"obs":[-0.0287121683,-0.3484644592,-0.0135552147,0.3303731382],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0356814563,-0.1531521976,-0.0069477516,0.0334465504],"action_prob":0.8291553855,"action_logp":-0.187347725,"action_dist_inputs":[-0.7880095243,0.7916436195],"value_targets":70.6577301025} +{"eps_id":1628128689,"obs":[-0.0356814563,-0.1531521976,-0.0069477516,0.0334465504],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0387445018,-0.3481738269,-0.0062788208,0.3239293098],"action_prob":0.4931947291,"action_logp":-0.7068511844,"action_dist_inputs":[-0.0133908782,0.0138318045],"value_targets":70.3613433838} +{"eps_id":1628128689,"obs":[-0.0387445018,-0.3481738269,-0.0062788208,0.3239293098],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0457079783,-0.1529630274,0.0001997655,0.02927294],"action_prob":0.8319180012,"action_logp":-0.1840213686,"action_dist_inputs":[-0.7978671193,0.8014150858],"value_targets":70.061958313} +{"eps_id":1628128689,"obs":[-0.0457079783,-0.1529630274,0.0001997655,0.02927294],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0487672389,-0.3480878472,0.0007852243,0.3220188916],"action_prob":0.4826502502,"action_logp":-0.7284629941,"action_dist_inputs":[-0.0345324017,0.0348944813],"value_targets":69.7595596313} +{"eps_id":1628128689,"obs":[-0.0487672389,-0.3480878472,0.0007852243,0.3220188916],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0557289943,-0.1529770941,0.0072256019,0.0295836926],"action_prob":0.8375334144,"action_logp":-0.17729415,"action_dist_inputs":[-0.8182494044,0.8217390776],"value_targets":69.4540939331} +{"eps_id":1628128689,"obs":[-0.0557289943,-0.1529770941,0.0072256019,0.0295836926],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.058788538,-0.3482019007,0.0078172758,0.3245375752],"action_prob":0.4644238055,"action_logp":-0.7669577599,"action_dist_inputs":[-0.0711110085,0.0714345723],"value_targets":69.1455535889} +{"eps_id":1628128689,"obs":[-0.058788538,-0.3482019007,0.0078172758,0.3245375752],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0657525733,-0.1531921327,0.0143080279,0.0343301184],"action_prob":0.8456351757,"action_logp":-0.1676672399,"action_dist_inputs":[-0.8486539721,0.8521155119],"value_targets":68.8338928223} +{"eps_id":1628128689,"obs":[-0.0657525733,-0.1531921327,0.0143080279,0.0343301184],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.068816416,0.0417217463,0.0149946297,-0.2538043559],"action_prob":0.5616243482,"action_logp":-0.5769220591,"action_dist_inputs":[-0.1237162352,0.1240408272],"value_targets":68.5190811157} +{"eps_id":1628128689,"obs":[-0.068816416,0.0417217463,0.0149946297,-0.2538043559],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0679819807,-0.153611064,0.0099185426,0.0435701758],"action_prob":0.7947039604,"action_logp":-0.2297856212,"action_dist_inputs":[0.6751990318,-0.6783176661],"value_targets":68.2010955811} +{"eps_id":1628128689,"obs":[-0.0679819807,-0.153611064,0.0099185426,0.0435701758],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0710542053,-0.3488738239,0.0107899467,0.3393659294],"action_prob":0.4324211776,"action_logp":-0.8383551836,"action_dist_inputs":[-0.13581644,0.1361632198],"value_targets":67.8798904419} +{"eps_id":1628128689,"obs":[-0.0710542053,-0.3488738239,0.0107899467,0.3393659294],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0780316815,-0.1539070606,0.0175772645,0.0501049682],"action_prob":0.8566069603,"action_logp":-0.1547760963,"action_dist_inputs":[-0.8919619918,0.8954278827],"value_targets":67.5554504395} +{"eps_id":1628128689,"obs":[-0.0780316815,-0.1539070606,0.0175772645,0.0501049682],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0811098218,-0.3492765725,0.0185793638,0.3482814431],"action_prob":0.4026652277,"action_logp":-0.9096497297,"action_dist_inputs":[-0.197003752,0.197368294],"value_targets":67.227722168} +{"eps_id":1628128689,"obs":[-0.0811098218,-0.3492765725,0.0185793638,0.3482814431],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0880953521,-0.1544237435,0.0255449936,0.0615146458],"action_prob":0.8667724133,"action_logp":-0.1429788172,"action_dist_inputs":[-0.9346117377,0.9381060004],"value_targets":66.8966903687} +{"eps_id":1628128689,"obs":[-0.0880953521,-0.1544237435,0.0255449936,0.0615146458],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0911838263,-0.349902451,0.0267752856,0.362146467],"action_prob":0.3655105233,"action_logp":-1.0064601898,"action_dist_inputs":[-0.2755510211,0.2759745121],"value_targets":66.5623168945} +{"eps_id":1628128689,"obs":[-0.0911838263,-0.349902451,0.0267752856,0.362146467],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0981818736,-0.1551711112,0.0340182148,0.0780251324],"action_prob":0.8779219389,"action_logp":-0.1301976293,"action_dist_inputs":[-0.984665215,0.9882318974],"value_targets":66.2245635986} +{"eps_id":1628128689,"obs":[-0.0981818736,-0.1551711112,0.0340182148,0.0780251324],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1012853011,-0.3507637978,0.0355787165,0.3812438548],"action_prob":0.3223954439,"action_logp":-1.131976366,"action_dist_inputs":[-0.371129781,0.3716551065],"value_targets":65.883392334} +{"eps_id":1628128689,"obs":[-0.1012853011,-0.3507637978,0.0355787165,0.3812438548],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1083005741,-0.1561646461,0.043203596,0.0999877825],"action_prob":0.8893361092,"action_logp":-0.1172800064,"action_dist_inputs":[-1.0401426554,1.0438354015],"value_targets":65.5387802124} +{"eps_id":1628128689,"obs":[-0.1083005741,-0.1561646461,0.043203596,0.0999877825],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.111423865,0.0383123532,0.0452033505,-0.178757593],"action_prob":0.7241312861,"action_logp":-0.3227826059,"action_dist_inputs":[-0.4821861386,0.48286134],"value_targets":65.1906890869} +{"eps_id":1628128689,"obs":[-0.111423865,0.0383123532,0.0452033505,-0.178757593],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1106576249,-0.1574263424,0.0416281968,0.1278357804],"action_prob":0.6529519558,"action_logp":-0.4262517691,"action_dist_inputs":[0.3147258759,-0.317314446],"value_targets":64.8390808105} +{"eps_id":1628128689,"obs":[-0.1106576249,-0.1574263424,0.0416281968,0.1278357804],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1138061509,-0.3531191647,0.0441849157,0.4333558381],"action_prob":0.24597238,"action_logp":-1.4025360346,"action_dist_inputs":[-0.5596719384,0.5605377555],"value_targets":64.4839172363} +{"eps_id":1628128689,"obs":[-0.1138061509,-0.3531191647,0.0441849157,0.4333558381],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1208685338,-0.1586497277,0.0528520308,0.154922545],"action_prob":0.9062772989,"action_logp":-0.0984099284,"action_dist_inputs":[-1.1324671507,1.1365376711],"value_targets":64.1251678467} +{"eps_id":1628128689,"obs":[-0.1208685338,-0.1586497277,0.0528520308,0.154922545],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1240415275,0.0356772207,0.0559504814,-0.1206295788],"action_prob":0.7947728634,"action_logp":-0.2296989411,"action_dist_inputs":[-0.6764286757,0.6775100231],"value_targets":63.7627983093} +{"eps_id":1628128689,"obs":[-0.1240415275,0.0356772207,0.0559504814,-0.1206295788],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.123327978,-0.1601998508,0.0535378903,0.189167127],"action_prob":0.5300070643,"action_logp":-0.6348649263,"action_dist_inputs":[0.0590229891,-0.0611497201],"value_targets":63.3967666626} +{"eps_id":1628128689,"obs":[-0.123327978,-0.1601998508,0.0535378903,0.189167127],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1265319735,0.0341169015,0.0573212318,-0.0861577615],"action_prob":0.8215489388,"action_logp":-0.1965637505,"action_dist_inputs":[-0.7627607584,0.7641162872],"value_targets":63.0270347595} +{"eps_id":1628128689,"obs":[-0.1265319735,0.0341169015,0.0573212318,-0.0861577615],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1258496344,-0.161777854,0.0555980764,0.2240449786],"action_prob":0.4660874009,"action_logp":-0.7633820772,"action_dist_inputs":[-0.0688525736,0.0670063347],"value_targets":62.6535720825} +{"eps_id":1628128689,"obs":[-0.1258496344,-0.161777854,0.0555980764,0.2240449786],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.129085198,0.0325071998,0.0600789785,-0.0505948812],"action_prob":0.8443165421,"action_logp":-0.1692278236,"action_dist_inputs":[-0.8445224166,0.846180141],"value_targets":62.2763366699} +{"eps_id":1628128689,"obs":[-0.129085198,0.0325071998,0.0600789785,-0.0505948812],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1284350604,-0.1634224653,0.0590670779,0.2604219615],"action_prob":0.4000967443,"action_logp":-0.9160488844,"action_dist_inputs":[-0.2033005208,0.2017615139],"value_targets":61.8952865601} +{"eps_id":1628128689,"obs":[-0.1284350604,-0.1634224653,0.0590670779,0.2604219615],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.131703496,0.0308087245,0.0642755181,-0.0130610988],"action_prob":0.8635819554,"action_logp":-0.1466664672,"action_dist_inputs":[-0.9216829538,0.923681736],"value_targets":61.5103912354} +{"eps_id":1628128689,"obs":[-0.131703496,0.0308087245,0.0642755181,-0.0130610988],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.131087333,0.224952817,0.0640143007,-0.2847918868],"action_prob":0.6647544503,"action_logp":-0.4083375931,"action_dist_inputs":[-0.3428717256,0.3416827619],"value_targets":61.1216087341} +{"eps_id":1628128689,"obs":[-0.131087333,0.224952817,0.0640143007,-0.2847918868],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1265882701,0.02897905,0.0583184585,0.0273740143],"action_prob":0.7010017037,"action_logp":-0.3552449346,"action_dist_inputs":[0.4239166081,-0.4281560481],"value_targets":60.7288970947} +{"eps_id":1628128689,"obs":[-0.1265882701,0.02897905,0.0583184585,0.0273740143],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1260086894,0.2232182771,0.0588659383,-0.2463536114],"action_prob":0.706875205,"action_logp":-0.3469011784,"action_dist_inputs":[-0.4405336082,0.4397221208],"value_targets":60.3322181702} +{"eps_id":1628128689,"obs":[-0.1260086894,0.2232182771,0.0588659383,-0.2463536114],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1215443239,0.0273071565,0.0539388694,0.0643006712],"action_prob":0.6554754376,"action_logp":-0.4223944545,"action_dist_inputs":[0.319616437,-0.3235790133],"value_targets":59.9315338135} +{"eps_id":1628128689,"obs":[-0.1215443239,0.0273071565,0.0539388694,0.0643006712],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1209981814,-0.1685449928,0.0552248806,0.3735019565],"action_prob":0.2583343983,"action_logp":-1.3535003662,"action_dist_inputs":[-0.5275424719,0.5271009207],"value_targets":59.526802063} +{"eps_id":1628128689,"obs":[-0.1209981814,-0.1685449928,0.0552248806,0.3735019565],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1243690848,0.0257507674,0.0626949221,0.098730512],"action_prob":0.8957293034,"action_logp":-0.1101170182,"action_dist_inputs":[-1.0737768412,1.0768713951],"value_targets":59.117980957} +{"eps_id":1628128689,"obs":[-0.1243690848,0.0257507674,0.0626949221,0.098730512],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1238540635,0.2199207544,0.0646695271,-0.173532024],"action_prob":0.7836122513,"action_logp":-0.2438409925,"action_dist_inputs":[-0.6434357762,0.6434064507],"value_targets":58.7050323486} +{"eps_id":1628128689,"obs":[-0.1238540635,0.2199207544,0.0646695271,-0.173532024],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1194556504,0.023935724,0.0611988902,0.1388306022],"action_prob":0.5227696896,"action_logp":-0.6486142874,"action_dist_inputs":[0.0439086482,-0.0472330377],"value_targets":58.2879104614} +{"eps_id":1628128689,"obs":[-0.1194556504,0.023935724,0.0611988902,0.1388306022],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1189769357,-0.1720069349,0.0639755055,0.4501756132],"action_prob":0.1890725642,"action_logp":-1.6656243801,"action_dist_inputs":[-0.7278112769,0.7282363772],"value_targets":57.8665771484} +{"eps_id":1628128689,"obs":[-0.1189769357,-0.1720069349,0.0639755055,0.4501756132],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1224170774,0.0221545845,0.0729790181,0.1783251166],"action_prob":0.9101923108,"action_logp":-0.0940993875,"action_dist_inputs":[-1.1560291052,1.159956336],"value_targets":57.4409866333} +{"eps_id":1628128689,"obs":[-0.1224170774,0.0221545845,0.0729790181,0.1783251166],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1219739839,0.2161604166,0.0765455142,-0.0904722363],"action_prob":0.8406888843,"action_logp":-0.1735336334,"action_dist_inputs":[-0.831207335,0.8321554661],"value_targets":57.0110969543} +{"eps_id":1628128689,"obs":[-0.1219739839,0.2161604166,0.0765455142,-0.0904722363],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1176507771,0.4101065099,0.0747360736,-0.3580570817],"action_prob":0.6323052049,"action_logp":-0.4583830833,"action_dist_inputs":[-0.2722434998,0.2698755264],"value_targets":56.5768661499} +{"eps_id":1628128689,"obs":[-0.1176507771,0.4101065099,0.0747360736,-0.3580570817],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.109448649,0.2140060365,0.0675749332,-0.0427755564],"action_prob":0.707670927,"action_logp":-0.3457760811,"action_dist_inputs":[0.4393784106,-0.4447205365],"value_targets":56.1382484436} +{"eps_id":1628128689,"obs":[-0.109448649,0.2140060365,0.0675749332,-0.0427755564],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1051685214,0.4080971777,0.0667194203,-0.3133958876],"action_prob":0.678370595,"action_logp":-0.3880615532,"action_dist_inputs":[-0.3740494847,0.3722442687],"value_targets":55.6952018738} +{"eps_id":1628128689,"obs":[-0.1051685214,0.4080971777,0.0667194203,-0.3133958876],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0970065817,0.6022083759,0.0604515001,-0.5843138695],"action_prob":0.3386656642,"action_logp":-1.0827418566,"action_dist_inputs":[0.3321228623,-0.337123245],"value_targets":55.2476768494} +{"eps_id":1628128689,"obs":[-0.0970065817,0.6022083759,0.0604515001,-0.5843138695],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0849624127,0.406294018,0.0487652235,-0.273217231],"action_prob":0.8720928431,"action_logp":-0.1368593723,"action_dist_inputs":[0.9562917948,-0.9632997513],"value_targets":54.7956352234} +{"eps_id":1628128689,"obs":[-0.0849624127,0.406294018,0.0487652235,-0.273217231],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0768365338,0.6006874442,0.0433008783,-0.5501294732],"action_prob":0.360679388,"action_logp":-1.0197658539,"action_dist_inputs":[0.2838789523,-0.2885376811],"value_targets":54.3390235901} +{"eps_id":1628128689,"obs":[-0.0768365338,0.6006874442,0.0433008783,-0.5501294732],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0648227856,0.4049849212,0.0322982892,-0.2441243082],"action_prob":0.8678561449,"action_logp":-0.1417293251,"action_dist_inputs":[0.9376545548,-0.9444800019],"value_targets":53.8778038025} +{"eps_id":1628128689,"obs":[-0.0648227856,0.4049849212,0.0322982892,-0.2441243082],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0567230843,0.2094168663,0.0274158027,0.0585687868],"action_prob":0.6305145025,"action_logp":-0.4612191617,"action_dist_inputs":[0.2650261223,-0.2693983316],"value_targets":53.4119224548} +{"eps_id":1628128689,"obs":[-0.0567230843,0.2094168663,0.0274158027,0.0585687868],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0525347479,0.0139127774,0.0285871793,0.3597739637],"action_prob":0.2687197924,"action_logp":-1.3140860796,"action_dist_inputs":[-0.5008341074,0.5002933741],"value_targets":52.9413375854} +{"eps_id":1628128689,"obs":[-0.0525347479,0.0139127774,0.0285871793,0.3597739637],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.052256491,0.2086169422,0.0357826576,0.0762405321],"action_prob":0.8827120066,"action_logp":-0.1247563064,"action_dist_inputs":[-1.0076858997,1.0106810331],"value_targets":52.4659957886} +{"eps_id":1628128689,"obs":[-0.052256491,0.2086169422,0.0357826576,0.0762405321],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0480841547,0.4032081366,0.0373074673,-0.2049415559],"action_prob":0.7577426434,"action_logp":-0.2774114907,"action_dist_inputs":[-0.5702569485,0.5700861812],"value_targets":51.9858551025} +{"eps_id":1628128689,"obs":[-0.0480841547,0.4032081366,0.0373074673,-0.2049415559],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0400199927,0.5977772474,0.0332086384,-0.4856263101],"action_prob":0.4417537749,"action_logp":-0.8170026541,"action_dist_inputs":[0.115123421,-0.1189240292],"value_targets":51.5008621216} +{"eps_id":1628128689,"obs":[-0.0400199927,0.5977772474,0.0332086384,-0.4856263101],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0280644465,0.4022028148,0.0234961119,-0.1826648414],"action_prob":0.8369502425,"action_logp":-0.1779906899,"action_dist_inputs":[0.8146778345,-0.8210309148],"value_targets":51.0109710693} +{"eps_id":1628128689,"obs":[-0.0280644465,0.4022028148,0.0234961119,-0.1826648414],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0200203899,0.5969808102,0.0198428147,-0.4678439796],"action_prob":0.4462661147,"action_logp":-0.8068398237,"action_dist_inputs":[0.1061226726,-0.109646067],"value_targets":50.5161323547} +{"eps_id":1628128689,"obs":[-0.0200203899,0.5969808102,0.0198428147,-0.4678439796],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.008080774,0.4015842378,0.010485935,-0.1689734012],"action_prob":0.8386700153,"action_logp":-0.1759379655,"action_dist_inputs":[0.8210908771,-0.8272742629],"value_targets":50.0162963867} +{"eps_id":1628128689,"obs":[-0.008080774,0.4015842378,0.010485935,-0.1689734012],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.000049089,0.5965545177,0.0071064672,-0.4583299458],"action_prob":0.4384791553,"action_logp":-0.8244429827,"action_dist_inputs":[0.122007221,-0.1253293455],"value_targets":49.5114097595} +{"eps_id":1628128689,"obs":[-0.000049089,0.5965545177,0.0071064672,-0.4583299458],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0118820015,0.4013328552,-0.0020601319,-0.1634155363],"action_prob":0.8454686403,"action_logp":-0.1678642035,"action_dist_inputs":[0.8467263579,-0.8527677059],"value_targets":49.0014266968} +{"eps_id":1628128689,"obs":[0.0118820015,0.4013328552,-0.0020601319,-0.1634155363],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0199086592,0.2062404454,-0.0053284424,0.1286167651],"action_prob":0.581754446,"action_logp":-0.5417068601,"action_dist_inputs":[0.1633912325,-0.1665883809],"value_targets":48.486289978} +{"eps_id":1628128689,"obs":[0.0199086592,0.2062404454,-0.0053284424,0.1286167651],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0240334682,0.0111952322,-0.0027561074,0.419613868],"action_prob":0.2423403114,"action_logp":-1.417412281,"action_dist_inputs":[-0.5696009994,0.5702902675],"value_targets":47.9659461975} +{"eps_id":1628128689,"obs":[0.0240334682,0.0111952322,-0.0027561074,0.419613868],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0242573731,0.2063561231,0.00563617,0.1260633469],"action_prob":0.890689671,"action_logp":-0.1157592013,"action_dist_inputs":[-1.0469366312,1.0508685112],"value_targets":47.4403495789} +{"eps_id":1628128689,"obs":[0.0242573731,0.2063561231,0.00563617,0.1260633469],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0283844955,0.4013968706,0.0081574367,-0.1648361087],"action_prob":0.7686856985,"action_logp":-0.2630730867,"action_dist_inputs":[-0.6000313163,0.6008735299],"value_targets":46.9094467163} +{"eps_id":1628128689,"obs":[0.0283844955,0.4013968706,0.0081574367,-0.1648361087],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0364124328,0.2061591148,0.0048607145,0.1304090619],"action_prob":0.5624927282,"action_logp":-0.5753770471,"action_dist_inputs":[0.1241741478,-0.1271107942],"value_targets":46.3731765747} +{"eps_id":1628128689,"obs":[0.0364124328,0.2061591148,0.0048607145,0.1304090619],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0405356139,0.4012111127,0.007468896,-0.1607364267],"action_prob":0.7718735933,"action_logp":-0.258934468,"action_dist_inputs":[-0.6089568734,0.6099639535],"value_targets":45.8314933777} +{"eps_id":1628128689,"obs":[0.0405356139,0.4012111127,0.007468896,-0.1607364267],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.048559837,0.2059830278,0.0042541674,0.1342933625],"action_prob":0.5582006574,"action_logp":-0.5830367804,"action_dist_inputs":[0.115541473,-0.1183210984],"value_targets":45.2843360901} +{"eps_id":1628128689,"obs":[0.048559837,0.2059830278,0.0042541674,0.1342933625],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0526794977,0.4010437727,0.0069400347,-0.1570444107],"action_prob":0.7747992873,"action_logp":-0.255151242,"action_dist_inputs":[-0.6172225475,0.6183894277],"value_targets":44.7316513062} +{"eps_id":1628128689,"obs":[0.0526794977,0.4010437727,0.0069400347,-0.1570444107],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0607003719,0.2058231533,0.0037991465,0.1378198266],"action_prob":0.554237783,"action_logp":-0.5901615024,"action_dist_inputs":[0.107591331,-0.110216625],"value_targets":44.1733856201} +{"eps_id":1628128689,"obs":[0.0607003719,0.2058231533,0.0037991465,0.1378198266],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0648168325,0.4008904994,0.0065555428,-0.1536621153],"action_prob":0.7775306106,"action_logp":-0.251632303,"action_dist_inputs":[-0.625005424,0.6263280511],"value_targets":43.6094818115} +{"eps_id":1628128689,"obs":[0.0648168325,0.4008904994,0.0065555428,-0.1536621153],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0728346482,0.5959179401,0.0034823008,-0.4442697167],"action_prob":0.4495188892,"action_logp":-0.799577415,"action_dist_inputs":[0.1000708193,-0.1025438458],"value_targets":43.0398788452} +{"eps_id":1628128689,"obs":[0.0728346482,0.5959179401,0.0034823008,-0.4442697167],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0847530067,0.4007469118,-0.0054030935,-0.1504911333],"action_prob":0.8404044509,"action_logp":-0.1738720089,"action_dist_inputs":[0.827935636,-0.8333049417],"value_targets":42.4645233154} +{"eps_id":1628128689,"obs":[0.0847530067,0.4007469118,-0.0054030935,-0.1504911333],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0927679464,0.2057027519,-0.0084129162,0.1404823512],"action_prob":0.573810935,"action_logp":-0.5554552674,"action_dist_inputs":[0.1475229412,-0.149894014],"value_targets":41.8833580017} +{"eps_id":1628128689,"obs":[0.0927679464,0.2057027519,-0.0084129162,0.1404823512],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0968820006,0.4009441733,-0.0056032692,-0.1548427939],"action_prob":0.7663121819,"action_logp":-0.2661656439,"action_dist_inputs":[-0.5930452347,0.5945582986],"value_targets":41.2963218689} +{"eps_id":1628128689,"obs":[0.0968820006,0.4009441733,-0.0056032692,-0.1548427939],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.1049008816,0.2059029043,-0.0087001249,0.1360671818],"action_prob":0.5824273825,"action_logp":-0.5405507684,"action_dist_inputs":[0.1652240008,-0.1675220877],"value_targets":40.7033538818} +{"eps_id":1628128689,"obs":[0.1049008816,0.2059029043,-0.0087001249,0.1360671818],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.1090189368,0.4011483788,-0.0059787817,-0.1593477577],"action_prob":0.7621994615,"action_logp":-0.2715469897,"action_dist_inputs":[-0.5816019773,0.5831740499],"value_targets":40.1044006348} +{"eps_id":1628128689,"obs":[0.1090189368,0.4011483788,-0.0059787817,-0.1593477577],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.1170419082,0.2061125338,-0.0091657368,0.1314430237],"action_prob":0.5916412473,"action_logp":-0.5248548388,"action_dist_inputs":[0.1842631549,-0.1864911169],"value_targets":39.4993934631} +{"eps_id":1628128689,"obs":[0.1170419082,0.2061125338,-0.0091657368,0.1314430237],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.121164158,0.4013645649,-0.0065368763,-0.1641174108],"action_prob":0.7573920488,"action_logp":-0.2778742313,"action_dist_inputs":[-0.5684047341,0.5700295568],"value_targets":38.8882751465} +{"eps_id":1628128689,"obs":[0.121164158,0.4013645649,-0.0065368763,-0.1641174108],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.1291914433,0.5965794921,-0.0098192245,-0.4588553607],"action_prob":0.3983937502,"action_logp":-0.9203144312,"action_dist_inputs":[0.2050003111,-0.2071620375],"value_targets":38.2709846497} +{"eps_id":1628128689,"obs":[0.1291914433,0.5965794921,-0.0098192245,-0.4588553607],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.1411230415,0.4015977085,-0.0189963318,-0.1692835987],"action_prob":0.8609768152,"action_logp":-0.1496877223,"action_dist_inputs":[0.9091956615,-0.9142312407],"value_targets":37.6474609375} +{"eps_id":1628128689,"obs":[0.1411230415,0.4015977085,-0.0189963318,-0.1692835987],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.1491549909,0.2067527324,-0.0223820042,0.1173465401],"action_prob":0.6381910443,"action_logp":-0.4491175711,"action_dist_inputs":[0.2826941907,-0.2848274112],"value_targets":37.0176353455} +{"eps_id":1628128689,"obs":[0.1491549909,0.2067527324,-0.0223820042,0.1173465401],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.1532900482,0.0119585069,-0.0200350732,0.4028849304],"action_prob":0.2766664624,"action_logp":-1.284942627,"action_dist_inputs":[-0.4797306657,0.4813270569],"value_targets":36.3814506531} +{"eps_id":1628128689,"obs":[0.1532900482,0.0119585069,-0.0200350732,0.4028849304],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.1535292119,0.2073588073,-0.0119773746,0.1039533988],"action_prob":0.888826251,"action_logp":-0.1178534999,"action_dist_inputs":[-1.0370163918,1.041790843],"value_targets":35.7388381958} +{"eps_id":1628128689,"obs":[0.1535292119,0.2073588073,-0.0119773746,0.1039533988],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.1576763988,0.012410528,-0.0098983059,0.3928335607],"action_prob":0.2756344974,"action_logp":-1.2886795998,"action_dist_inputs":[-0.4823102057,0.4839101434],"value_targets":35.0897369385} +{"eps_id":1628128689,"obs":[0.1576763988,0.012410528,-0.0098983059,0.3928335607],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.1579246074,0.207671538,-0.0020416349,0.0970463529],"action_prob":0.890663743,"action_logp":-0.1157882884,"action_dist_inputs":[-1.0463559628,1.0511834621],"value_targets":34.4340782166} +{"eps_id":1628128689,"obs":[0.1579246074,0.207671538,-0.0020416349,0.0970463529],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.1620780379,0.4028227031,-0.0001007077,-0.1962800026],"action_prob":0.7317965627,"action_logp":-0.3122527301,"action_dist_inputs":[-0.501042068,0.5027147532],"value_targets":33.7717971802} +{"eps_id":1628128689,"obs":[0.1620780379,0.4028227031,-0.0001007077,-0.1962800026],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.1701344848,0.2077021748,-0.0040263077,0.0963711515],"action_prob":0.6411836147,"action_logp":-0.4444393814,"action_dist_inputs":[0.2892510593,-0.2912541628],"value_targets":33.1028251648} +{"eps_id":1628128689,"obs":[0.1701344848,0.2077021748,-0.0040263077,0.0963711515],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.1742885262,0.4028815925,-0.0020988847,-0.1975793391],"action_prob":0.7273834944,"action_logp":-0.3183014691,"action_dist_inputs":[-0.4898196161,0.4915679693],"value_targets":32.4270935059} +{"eps_id":1628128689,"obs":[0.1742885262,0.4028815925,-0.0020988847,-0.1975793391],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.1823461652,0.5980334878,-0.0060504717,-0.4909236431],"action_prob":0.3519926071,"action_logp":-1.0441451073,"action_dist_inputs":[0.304189086,-0.3061029017],"value_targets":31.7445411682} +{"eps_id":1628128689,"obs":[0.1823461652,0.5980334878,-0.0060504717,-0.4909236431],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.1943068355,0.4029974341,-0.015868945,-0.2001536936],"action_prob":0.8722563386,"action_logp":-0.1366719157,"action_dist_inputs":[0.9581646323,-0.9628932476],"value_targets":31.0550918579} +{"eps_id":1628128689,"obs":[0.1943068355,0.4029974341,-0.015868945,-0.2001536936],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.2023667842,0.2081059963,-0.0198720191,0.0874813721],"action_prob":0.6799957156,"action_logp":-0.3856687844,"action_dist_inputs":[0.3759473562,-0.3778046668],"value_targets":30.3586788177} +{"eps_id":1628128689,"obs":[0.2023667842,0.2081059963,-0.0198720191,0.0874813721],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.2065289021,0.0132744461,-0.0181223899,0.3738290071],"action_prob":0.312212199,"action_logp":-1.164072156,"action_dist_inputs":[-0.3940246999,0.3957725465],"value_targets":29.6552295685} +{"eps_id":1628128689,"obs":[0.2065289021,0.0132744461,-0.0181223899,0.3738290071],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.206794396,-0.181585446,-0.0106458105,0.6607431769],"action_prob":0.1170842275,"action_logp":-2.1448616982,"action_dist_inputs":[-1.0076793432,1.0126570463],"value_targets":28.9446773529} +{"eps_id":1628128689,"obs":[0.206794396,-0.181585446,-0.0106458105,0.6607431769],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.203162685,0.0136830136,0.0025690526,0.3647272587],"action_prob":0.9351775646,"action_logp":-0.0670188814,"action_dist_inputs":[-1.3308819532,1.3382031918],"value_targets":28.2269458771} +{"eps_id":1628128689,"obs":[0.203162685,0.0136830136,0.0025690526,0.3647272587],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.2034363449,0.2087683529,0.0098635983,0.0728555098],"action_prob":0.8899533749,"action_logp":-0.1165862009,"action_dist_inputs":[-1.0425978899,1.0476670265],"value_targets":27.5019664764} +{"eps_id":1628128689,"obs":[0.2034363449,0.2087683529,0.0098635983,0.0728555098],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.2076117098,0.0135064004,0.0113207083,0.3686340451],"action_prob":0.2796715498,"action_logp":-1.2741394043,"action_dist_inputs":[-0.4720892906,0.4740021229],"value_targets":26.7696628571} +{"eps_id":1628128689,"obs":[0.2076117098,0.0135064004,0.0113207083,0.3686340451],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.2078818381,0.2084656805,0.0186933894,0.0795420855],"action_prob":0.8948693275,"action_logp":-0.1110775694,"action_dist_inputs":[-1.0681295395,1.0733437538],"value_targets":26.0299625397} +{"eps_id":1628128689,"obs":[0.2078818381,0.2084656805,0.0186933894,0.0795420855],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.2120511532,0.4033147395,0.0202842299,-0.2071848959],"action_prob":0.7411182523,"action_logp":-0.299595058,"action_dist_inputs":[-0.5248332024,0.5269556046],"value_targets":25.2827911377} +{"eps_id":1628128689,"obs":[0.2120511532,0.4033147395,0.0202842299,-0.2071848959],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.2201174498,0.5981408358,0.0161405317,-0.4934007823],"action_prob":0.3818009198,"action_logp":-0.9628559351,"action_dist_inputs":[0.2402333021,-0.241677925],"value_targets":24.5280704498} +{"eps_id":1628128689,"obs":[0.2201174498,0.5981408358,0.0161405317,-0.4934007823],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.2320802659,0.7930314541,0.0062725167,-0.7809535265],"action_prob":0.1447104216,"action_logp":-1.9330205917,"action_dist_inputs":[0.8862178326,-0.8904875517],"value_targets":23.7657279968} +{"eps_id":1628128689,"obs":[0.2320802659,0.7930314541,0.0062725167,-0.7809535265],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.2479408979,0.5978238583,-0.0093465541,-0.4863037765],"action_prob":0.9251331687,"action_logp":-0.077817589,"action_dist_inputs":[1.2538481951,-1.2603778839],"value_targets":22.9956855774} +{"eps_id":1628128689,"obs":[0.2479408979,0.5978238583,-0.0093465541,-0.4863037765],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.2598973811,0.4028350413,-0.0190726295,-0.19658117],"action_prob":0.8686074018,"action_logp":-0.1408640444,"action_dist_inputs":[0.942263782,-0.9464377761],"value_targets":22.2178649902} +{"eps_id":1628128689,"obs":[0.2598973811,0.4028350413,-0.0190726295,-0.19658117],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.2679540813,0.5982245207,-0.0230042543,-0.4952191114],"action_prob":0.3171515465,"action_logp":-1.1483755112,"action_dist_inputs":[0.3828343153,-0.3840587735],"value_targets":21.4321861267} +{"eps_id":1628128689,"obs":[0.2679540813,0.5982245207,-0.0230042543,-0.4952191114],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.2799185514,0.4034344256,-0.0329086371,-0.2098739147],"action_prob":0.8791638613,"action_logp":-0.1287839711,"action_dist_inputs":[0.9901866913,-0.9943494201],"value_targets":20.6385707855} +{"eps_id":1628128689,"obs":[0.2799185514,0.4034344256,-0.0329086371,-0.2098739147],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.287987262,0.2087980956,-0.0371061154,0.0722491145],"action_prob":0.7229668498,"action_logp":-0.3243919313,"action_dist_inputs":[0.4789917469,-0.4802342653],"value_targets":19.8369407654} +{"eps_id":1628128689,"obs":[0.287987262,0.2087980956,-0.0371061154,0.0722491145],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.292163223,0.0142272143,-0.0356611311,0.3529978395],"action_prob":0.3769646585,"action_logp":-0.9756038785,"action_dist_inputs":[-0.2501635253,0.2522883713],"value_targets":19.0272140503} +{"eps_id":1628128689,"obs":[0.292163223,0.0142272143,-0.0356611311,0.3529978395],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.2924477458,0.2098376304,-0.0286011752,0.0492866077],"action_prob":0.8635804653,"action_logp":-0.1466682255,"action_dist_inputs":[-0.9200470448,0.9253050089],"value_targets":18.2093067169} +{"eps_id":1628128689,"obs":[0.2924477458,0.2098376304,-0.0286011752,0.0492866077],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.2966445088,0.4053577781,-0.0276154429,-0.2522812486],"action_prob":0.6052973866,"action_logp":-0.502035439,"action_dist_inputs":[-0.2127889842,0.2147982717],"value_targets":17.3831367493} +{"eps_id":1628128689,"obs":[0.2966445088,0.4053577781,-0.0276154429,-0.2522812486],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.3047516644,0.2106408179,-0.0326610692,0.03156491],"action_prob":0.7591969371,"action_logp":-0.2754940987,"action_dist_inputs":[0.5734619498,-0.574819684],"value_targets":16.5486240387} +{"eps_id":1628128689,"obs":[0.3047516644,0.2106408179,-0.0326610692,0.03156491],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.3089644909,0.0160021,-0.0320297703,0.3137666285],"action_prob":0.4327877164,"action_logp":-0.8375079632,"action_dist_inputs":[-0.1342893839,0.1361970454],"value_targets":15.7056808472} +{"eps_id":1628128689,"obs":[0.3089644909,0.0160021,-0.0320297703,0.3137666285],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.309284538,0.2115653455,-0.0257544369,0.0111570926],"action_prob":0.8448057175,"action_logp":-0.1686486155,"action_dist_inputs":[-0.8446766734,0.8497523069],"value_targets":14.8542232513} +{"eps_id":1628128689,"obs":[0.309284538,0.2115653455,-0.0257544369,0.0111570926],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.313515842,0.016822027,-0.0255312957,0.295604229],"action_prob":0.4508910477,"action_logp":-0.7965295315,"action_dist_inputs":[-0.0976283252,0.0994427949],"value_targets":13.9941644669} +{"eps_id":1628128689,"obs":[0.313515842,0.016822027,-0.0255312957,0.295604229],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.3138522804,0.2122984827,-0.0196192116,-0.0050202808],"action_prob":0.839143157,"action_logp":-0.1753739417,"action_dist_inputs":[-0.8234299421,0.8284366131],"value_targets":13.125418663} +{"eps_id":1628128689,"obs":[0.3138522804,0.2122984827,-0.0196192116,-0.0050202808],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.3180982471,0.0174633078,-0.0197196156,0.2814085484],"action_prob":0.4639593065,"action_logp":-0.7679584622,"action_dist_inputs":[-0.0713282228,0.0730850101],"value_targets":12.2478981018} +{"eps_id":1628128689,"obs":[0.3180982471,0.0174633078,-0.0197196156,0.2814085484],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.3184475005,-0.1773718894,-0.0140914451,0.5678073168],"action_prob":0.164959684,"action_logp":-1.8020541668,"action_dist_inputs":[-0.8084074855,0.8133714795],"value_targets":11.3615131378} +{"eps_id":1628128689,"obs":[0.3184475005,-0.1773718894,-0.0140914451,0.5678073168],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.3149000704,0.0179448575,-0.0027352986,0.2707185447],"action_prob":0.9284449816,"action_logp":-0.0742441714,"action_dist_inputs":[-1.2778419256,1.2852032185],"value_targets":10.4661741257} +{"eps_id":1628128689,"obs":[0.3149000704,0.0179448575,-0.0027352986,0.2707185447],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.3152589798,0.2131057382,0.0026790723,-0.0228258446],"action_prob":0.8440572619,"action_logp":-0.1695349663,"action_dist_inputs":[-0.841871202,0.8468601704],"value_targets":9.5617923737} +{"eps_id":1628128689,"obs":[0.3152589798,0.2131057382,0.0026790723,-0.0228258446],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.3195210695,0.4081891775,0.0022225555,-0.3146622777],"action_prob":0.556275785,"action_logp":-0.5864910483,"action_dist_inputs":[-0.1121398509,0.1139212176],"value_targets":8.6482753754} +{"eps_id":1628128689,"obs":[0.3195210695,0.4081891775,0.0022225555,-0.3146622777],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.3276848793,0.6032794118,-0.0040706904,-0.6066434383],"action_prob":0.2244214118,"action_logp":-1.4942296743,"action_dist_inputs":[0.6193594337,-0.6207242608],"value_targets":7.7255306244} +{"eps_id":1628128689,"obs":[0.3276848793,0.6032794118,-0.0040706904,-0.6066434383],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.3397504389,0.4082145989,-0.0162035599,-0.3152454793],"action_prob":0.8974328041,"action_logp":-0.108217001,"action_dist_inputs":[1.0823799372,-1.0866404772],"value_targets":6.7934651375} +{"eps_id":1628128689,"obs":[0.3397504389,0.4082145989,-0.0162035599,-0.3152454793],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.3479147553,0.2133271545,-0.0225084685,-0.0277163349],"action_prob":0.795165062,"action_logp":-0.2292055786,"action_dist_inputs":[0.6775161624,-0.6788290739],"value_targets":5.8519849777} +{"eps_id":1628128689,"obs":[0.3479147553,0.2133271545,-0.0225084685,-0.0277163349],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.3521812856,0.0185350981,-0.0230627954,0.2577807903],"action_prob":0.5097355247,"action_logp":-0.6738632917,"action_dist_inputs":[0.0203918014,-0.0185551718],"value_targets":4.9009947777} +{"eps_id":1628128689,"obs":[0.3521812856,0.0185350981,-0.0230627954,0.2577807903],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.3525519967,0.2139785886,-0.0179071799,-0.0420863293],"action_prob":0.8111796975,"action_logp":-0.2092656791,"action_dist_inputs":[-0.726366818,0.7313269973],"value_targets":3.9403989315} +{"eps_id":1628128689,"obs":[0.3525519967,0.2139785886,-0.0179071799,-0.0420863293],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.3568315506,0.0191179365,-0.0187489074,0.2448933572],"action_prob":0.5227237344,"action_logp":-0.6487022042,"action_dist_inputs":[0.0463754758,-0.0445820689],"value_targets":2.970099926} +{"eps_id":1628128689,"obs":[0.3568315506,0.0191179365,-0.0187489074,0.2448933572],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.3572139144,0.2145025879,-0.0138510391,-0.0536439717],"action_prob":0.805105865,"action_logp":-0.2167815119,"action_dist_inputs":[-0.7068003416,0.7117167711],"value_targets":1.9900000095} +{"eps_id":1628128689,"obs":[0.3572139144,0.2145025879,-0.0138510391,-0.0536439717],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":true,"new_obs":[0.3615039587,0.4098203778,-0.014923919,-0.3506646156],"action_prob":0.4676780105,"action_logp":-0.7599752545,"action_dist_inputs":[0.065619573,-0.0638489872],"value_targets":1.0} +{"eps_id":329428710,"obs":[-0.026324071,0.0406068377,-0.0064664264,-0.0233756844],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":0.0,"dones":false,"new_obs":[-0.0255119335,-0.1544217914,-0.0069339401,0.267260015],"action_prob":0.5094026923,"action_logp":-0.6745163798,"action_dist_inputs":[0.0183854774,-0.0192297846],"value_targets":86.6020355225} +{"eps_id":329428710,"obs":[-0.0255119335,-0.1544217914,-0.0069339401,0.267260015],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0286003705,0.0407984369,-0.0015887399,-0.0276018362],"action_prob":0.8231526017,"action_logp":-0.1946136951,"action_dist_inputs":[-0.7677081227,0.7701463699],"value_targets":86.4666976929} +{"eps_id":329428710,"obs":[-0.0286003705,0.0407984369,-0.0015887399,-0.0276018362],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0277844016,-0.1543006897,-0.0021407767,0.2645794153],"action_prob":0.5049988031,"action_logp":-0.6831992269,"action_dist_inputs":[0.0095657203,-0.0104302345],"value_targets":86.3300018311} +{"eps_id":329428710,"obs":[-0.0277844016,-0.1543006897,-0.0021407767,0.2645794153],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0308704153,0.0408517458,0.0031508114,-0.0287779775],"action_prob":0.8259915113,"action_logp":-0.1911707669,"action_dist_inputs":[-0.7775217295,0.7799585462],"value_targets":86.1919174194} +{"eps_id":329428710,"obs":[-0.0308704153,0.0408517458,0.0031508114,-0.0287779775],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0300533809,-0.1543152481,0.0025752517,0.2648974061],"action_prob":0.4954202175,"action_logp":-0.7023489475,"action_dist_inputs":[-0.0095892679,0.0087304106],"value_targets":86.052444458} +{"eps_id":329428710,"obs":[-0.0300533809,-0.1543152481,0.0025752517,0.2648974061],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.033139687,0.0407698527,0.0078731999,-0.0269721597],"action_prob":0.8306896687,"action_logp":-0.1854990274,"action_dist_inputs":[-0.7940314412,0.7964912057],"value_targets":85.9115600586} +{"eps_id":329428710,"obs":[-0.033139687,0.0407698527,0.0078731999,-0.0269721597],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.032324288,-0.1544641107,0.0073337564,0.2681844234],"action_prob":0.4805352092,"action_logp":-0.7328547835,"action_dist_inputs":[-0.0393627025,0.0385358743],"value_targets":85.7692489624} +{"eps_id":329428710,"obs":[-0.032324288,-0.1544641107,0.0073337564,0.2681844234],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0354135707,-0.3496899605,0.0126974452,0.5631713867],"action_prob":0.1629130244,"action_logp":-1.8145388365,"action_dist_inputs":[-0.8171015382,0.8196099997],"value_targets":85.62550354} +{"eps_id":329428710,"obs":[-0.0354135707,-0.3496899605,0.0126974452,0.5631713867],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0424073711,-0.5449877381,0.0239608735,0.8598274589],"action_prob":0.0761409327,"action_logp":-2.5751693249,"action_dist_inputs":[-1.2452819347,1.2506917715],"value_targets":85.4803085327} +{"eps_id":329428710,"obs":[-0.0424073711,-0.5449877381,0.0239608735,0.8598274589],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0533071235,-0.350200206,0.0411574207,0.5747737885],"action_prob":0.9466135502,"action_logp":-0.0548643619,"action_dist_inputs":[-1.4337542057,1.4415802956],"value_targets":85.3336486816} +{"eps_id":329428710,"obs":[-0.0533071235,-0.350200206,0.0411574207,0.5747737885],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0603111275,-0.1556786895,0.0526528992,0.2953354716],"action_prob":0.9295711517,"action_logp":-0.0730319396,"action_dist_inputs":[-1.287234664,1.292886138],"value_targets":85.1855010986} +{"eps_id":329428710,"obs":[-0.0603111275,-0.1556786895,0.0526528992,0.2953354716],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0634247065,0.0386546366,0.0585596077,0.0197122749],"action_prob":0.8792669773,"action_logp":-0.128666684,"action_dist_inputs":[-0.9912945628,0.9942122102],"value_targets":85.0358581543} +{"eps_id":329428710,"obs":[-0.0634247065,0.0386546366,0.0585596077,0.0197122749],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0626516119,0.2328900099,0.0589538515,-0.2539346814],"action_prob":0.702762723,"action_logp":-0.3527359664,"action_dist_inputs":[-0.43036744,0.4301211238],"value_targets":84.8847045898} +{"eps_id":329428710,"obs":[-0.0626516119,0.2328900099,0.0589538515,-0.2539346814],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0579938106,0.0369780101,0.0538751595,0.0567451827],"action_prob":0.6678190827,"action_logp":-0.4037379324,"action_dist_inputs":[0.3474518657,-0.3508856595],"value_targets":84.7320251465} +{"eps_id":329428710,"obs":[-0.0579938106,0.0369780101,0.0538751595,0.0567451827],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0572542511,0.2312877476,0.0550100617,-0.2184648961],"action_prob":0.7392320037,"action_logp":-0.3021434844,"action_dist_inputs":[-0.5209240317,0.5210565329],"value_targets":84.5778045654} +{"eps_id":329428710,"obs":[-0.0572542511,0.2312877476,0.0550100617,-0.2184648961],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0526284948,0.035424348,0.0506407656,0.0910505876],"action_prob":0.6206772327,"action_logp":-0.4769440889,"action_dist_inputs":[0.244639501,-0.2477842569],"value_targets":84.4220275879} +{"eps_id":329428710,"obs":[-0.0526284948,0.035424348,0.0506407656,0.0910505876],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0519200079,0.2297852486,0.0524617769,-0.1852349937],"action_prob":0.7695670724,"action_logp":-0.2619271874,"action_dist_inputs":[-0.6026811004,0.6031872034],"value_targets":84.2646713257} +{"eps_id":329428710,"obs":[-0.0519200079,0.2297852486,0.0524617769,-0.1852349937],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0473243035,0.4241188467,0.0487570763,-0.4609176517],"action_prob":0.429045707,"action_logp":-0.8461918235,"action_dist_inputs":[0.1414511055,-0.1442946196],"value_targets":84.1057281494} +{"eps_id":329428710,"obs":[-0.0473243035,0.4241188467,0.0487570763,-0.4609176517],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0388419256,0.6185190082,0.0395387225,-0.7378424406],"action_prob":0.153340891,"action_logp":-1.8750917912,"action_dist_inputs":[0.8516065478,-0.8570281863],"value_targets":83.9451828003} +{"eps_id":329428710,"obs":[-0.0388419256,0.6185190082,0.0395387225,-0.7378424406],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0264715459,0.422873944,0.0247818753,-0.432982862],"action_prob":0.9281573296,"action_logp":-0.0745540187,"action_dist_inputs":[1.2756967545,-1.2830260992],"value_targets":83.7830123901} +{"eps_id":329428710,"obs":[-0.0264715459,0.422873944,0.0247818753,-0.432982862],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0180140678,0.2274100333,0.0161222182,-0.1325919181],"action_prob":0.8513439894,"action_logp":-0.1609390378,"action_dist_inputs":[0.8699678779,-0.8752134442],"value_targets":83.6192016602} +{"eps_id":329428710,"obs":[-0.0180140678,0.2274100333,0.0161222182,-0.1325919181],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0134658664,0.0320609026,0.0134703796,0.1651334316],"action_prob":0.5673029423,"action_logp":-0.5668618083,"action_dist_inputs":[0.1342483759,-0.1366072297],"value_targets":83.453742981} +{"eps_id":329428710,"obs":[-0.0134658664,0.0320609026,0.0134703796,0.1651334316],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.012824649,-0.1632512659,0.0167730488,0.4620352685],"action_prob":0.2092597336,"action_logp":-1.5641790628,"action_dist_inputs":[-0.6640571952,0.6653361917],"value_targets":83.286605835} +{"eps_id":329428710,"obs":[-0.012824649,-0.1632512659,0.0167730488,0.4620352685],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0160896741,0.0316296667,0.0260137543,0.1746860445],"action_prob":0.9084364176,"action_logp":-0.0960303992,"action_dist_inputs":[-1.1451159716,1.1495751143],"value_targets":83.1177825928} +{"eps_id":329428710,"obs":[-0.0160896741,0.0316296667,0.0260137543,0.1746860445],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0154570807,0.2263698429,0.029507475,-0.1096782088],"action_prob":0.8109901547,"action_logp":-0.209499374,"action_dist_inputs":[-0.7274841666,0.7289726138],"value_targets":82.9472579956} +{"eps_id":329428710,"obs":[-0.0154570807,0.2263698429,0.029507475,-0.1096782088],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0109296832,0.421056807,0.0273139104,-0.3929075897],"action_prob":0.5040689707,"action_logp":-0.6850421429,"action_dist_inputs":[-0.0091365185,0.0071396483],"value_targets":82.7750091553} +{"eps_id":329428710,"obs":[-0.0109296832,0.421056807,0.0273139104,-0.3929075897],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0025085474,0.225558117,0.0194557589,-0.0917396396],"action_prob":0.8199146986,"action_logp":-0.1985549629,"action_dist_inputs":[0.7554465532,-0.7603228688],"value_targets":82.601020813} +{"eps_id":329428710,"obs":[-0.0025085474,0.225558117,0.0194557589,-0.0917396396],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.002002615,0.4203958809,0.0176209658,-0.378221333],"action_prob":0.5108566284,"action_logp":-0.6716663241,"action_dist_inputs":[-0.0226099938,0.0208233893],"value_targets":82.4252700806} +{"eps_id":329428710,"obs":[0.002002615,0.4203958809,0.0176209658,-0.378221333],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0104105324,0.6152632236,0.0100565394,-0.6652966738],"action_prob":0.1805862486,"action_logp":-1.7115467787,"action_dist_inputs":[0.7538214326,-0.7585591674],"value_targets":82.2477493286} +{"eps_id":329428710,"obs":[0.0104105324,0.6152632236,0.0100565394,-0.6652966738],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0227157958,0.4200028181,-0.0032493935,-0.369464308],"action_prob":0.9230555296,"action_logp":-0.0800658539,"action_dist_inputs":[1.2388705015,-1.2457352877],"value_targets":82.0684280396} +{"eps_id":329428710,"obs":[0.0227157958,0.4200028181,-0.0032493935,-0.369464308],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0311158523,0.224927187,-0.0106386803,-0.0778077319],"action_prob":0.8358567953,"action_logp":-0.1792979538,"action_dist_inputs":[0.8115437031,-0.8161743283],"value_targets":81.8873062134} +{"eps_id":329428710,"obs":[0.0311158523,0.224927187,-0.0106386803,-0.0778077319],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0356143974,0.0299593527,-0.0121948346,0.2114997655],"action_prob":0.5382928252,"action_logp":-0.6193525791,"action_dist_inputs":[0.075939849,-0.0775320977],"value_targets":81.7043457031} +{"eps_id":329428710,"obs":[0.0356143974,0.0299593527,-0.0121948346,0.2114997655],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0362135842,0.2252535224,-0.0079648392,-0.0850049034],"action_prob":0.8006513119,"action_logp":-0.2223297358,"action_dist_inputs":[-0.6941950917,0.6961749196],"value_targets":81.5195465088} +{"eps_id":329428710,"obs":[0.0362135842,0.2252535224,-0.0079648392,-0.0850049034],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.040718656,0.420488745,-0.0096649379,-0.3801900744],"action_prob":0.4550272524,"action_logp":-0.7873979807,"action_dist_inputs":[0.0893979371,-0.0909805894],"value_targets":81.3328704834} +{"eps_id":329428710,"obs":[0.040718656,0.420488745,-0.0096649379,-0.3801900744],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0491284281,0.225505352,-0.0172687396,-0.0905701071],"action_prob":0.8492830992,"action_logp":-0.1633626819,"action_dist_inputs":[0.8622106314,-0.8667790294],"value_targets":81.144317627} +{"eps_id":329428710,"obs":[0.0491284281,0.225505352,-0.0172687396,-0.0905701071],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0536385365,0.4208705127,-0.0190801416,-0.388650924],"action_prob":0.4223608673,"action_logp":-0.8618952036,"action_dist_inputs":[0.1557529718,-0.1573363394],"value_targets":80.9538574219} +{"eps_id":329428710,"obs":[0.0536385365,0.4208705127,-0.0190801416,-0.388650924],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0620559454,0.2260245085,-0.0268531591,-0.102044329],"action_prob":0.8620921969,"action_logp":-0.1483930498,"action_dist_inputs":[0.9141055346,-0.9186715484],"value_targets":80.76146698} +{"eps_id":329428710,"obs":[0.0620559454,0.2260245085,-0.0268531591,-0.102044329],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0665764362,0.0312974863,-0.0288940463,0.1820470244],"action_prob":0.6200300455,"action_logp":-0.4779873192,"action_dist_inputs":[0.2440217435,-0.2456540465],"value_targets":80.5671386719} +{"eps_id":329428710,"obs":[0.0665764362,0.0312974863,-0.0288940463,0.1820470244],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0672023892,-0.1633993536,-0.0252531059,0.4654767215],"action_prob":0.2470675558,"action_logp":-1.398093462,"action_dist_inputs":[-0.5562421679,0.5580714345],"value_targets":80.3708496094} +{"eps_id":329428710,"obs":[0.0672023892,-0.1633993536,-0.0252531059,0.4654767215],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0639344007,0.0320701487,-0.0159435701,0.1649423242],"action_prob":0.900444746,"action_logp":-0.1048664823,"action_dist_inputs":[-1.0987027884,1.1034729481],"value_targets":80.1725769043} +{"eps_id":329428710,"obs":[0.0639344007,0.0320701487,-0.0159435701,0.1649423242],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0645757988,0.2274166644,-0.0126447249,-0.1327274144],"action_prob":0.7539688349,"action_logp":-0.2824042737,"action_dist_inputs":[-0.559063375,0.5608292222],"value_targets":79.9722976685} +{"eps_id":329428710,"obs":[0.0645757988,0.2274166644,-0.0126447249,-0.1327274144],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0691241324,0.0324780978,-0.0152992727,0.1559396088],"action_prob":0.6387799978,"action_logp":-0.448195219,"action_dist_inputs":[0.2841621339,-0.2859107256],"value_targets":79.7699966431} +{"eps_id":329428710,"obs":[0.0691241324,0.0324780978,-0.0152992727,0.1559396088],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0697736964,0.2278157175,-0.0121804811,-0.1415303499],"action_prob":0.7450796366,"action_logp":-0.2942641675,"action_dist_inputs":[-0.5354019403,0.5371379256],"value_targets":79.5656509399} +{"eps_id":329428710,"obs":[0.0697736964,0.2278157175,-0.0121804811,-0.1415303499],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0743300095,0.4231099784,-0.015011088,-0.438030988],"action_prob":0.3480065763,"action_logp":-1.055533886,"action_dist_inputs":[0.3130248487,-0.3147883415],"value_targets":79.3592453003} +{"eps_id":329428710,"obs":[0.0743300095,0.4231099784,-0.015011088,-0.438030988],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0827922076,0.2282036841,-0.023771707,-0.1501175761],"action_prob":0.8818032146,"action_logp":-0.1257863641,"action_dist_inputs":[1.0024815798,-1.0071368217],"value_targets":79.1507568359} +{"eps_id":329428710,"obs":[0.0827922076,0.2282036841,-0.023771707,-0.1501175761],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0873562843,0.4236578345,-0.0267740581,-0.4502041638],"action_prob":0.3109725714,"action_logp":-1.1680505276,"action_dist_inputs":[0.3968937099,-0.3986826837],"value_targets":78.9401550293} +{"eps_id":329428710,"obs":[0.0873562843,0.4236578345,-0.0267740581,-0.4502041638],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0958294421,0.2289245874,-0.0357781425,-0.1660796404],"action_prob":0.892187953,"action_logp":-0.1140784696,"action_dist_inputs":[1.0542852879,-1.0590018034],"value_targets":78.727432251} +{"eps_id":329428710,"obs":[0.0958294421,0.2289245874,-0.0357781425,-0.1660796404],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.1004079357,0.4245399237,-0.0390997343,-0.4698314071],"action_prob":0.2676172256,"action_logp":-1.318197608,"action_dist_inputs":[0.5024362206,-0.5043092966],"value_targets":78.5125579834} +{"eps_id":329428710,"obs":[0.1004079357,0.4245399237,-0.0390997343,-0.4698314071],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.1088987291,0.2299914956,-0.0484963618,-0.1897241771],"action_prob":0.9028580785,"action_logp":-0.102189891,"action_dist_inputs":[1.1122727394,-1.1171197891],"value_targets":78.2955093384} +{"eps_id":329428710,"obs":[0.1088987291,0.2299914956,-0.0484963618,-0.1897241771],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.1134985611,0.0355956852,-0.0522908457,0.0872743577],"action_prob":0.7778362632,"action_logp":-0.2512392104,"action_dist_inputs":[0.6255390644,-0.6275624633],"value_targets":78.0762710571} +{"eps_id":329428710,"obs":[0.1134985611,0.0355956852,-0.0522908457,0.0872743577],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.1142104715,-0.1587392539,-0.0505453609,0.3630118072],"action_prob":0.43216151,"action_logp":-0.8389558792,"action_dist_inputs":[-0.1359179169,0.1371196359],"value_targets":77.8548202515} +{"eps_id":329428710,"obs":[0.1142104715,-0.1587392539,-0.0505453609,0.3630118072],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.1110356897,0.0370632969,-0.043285124,0.0548290908],"action_prob":0.8472968936,"action_logp":-0.1657041013,"action_dist_inputs":[-0.8546863794,0.8588692546],"value_targets":77.6311340332} +{"eps_id":329428710,"obs":[0.1110356897,0.0370632969,-0.043285124,0.0548290908],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.1117769554,0.2327782959,-0.0421885401,-0.2511903048],"action_prob":0.5322052836,"action_logp":-0.6307259798,"action_dist_inputs":[-0.0640164912,0.0649832264],"value_targets":77.4051818848} +{"eps_id":329428710,"obs":[0.1117769554,0.2327782959,-0.0421885401,-0.2511903048],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.1164325252,0.0382833965,-0.0472123474,0.0278927162],"action_prob":0.8220424056,"action_logp":-0.1959633231,"action_dist_inputs":[0.7639097571,-0.7663370371],"value_targets":77.1769561768} +{"eps_id":329428710,"obs":[0.1164325252,0.0382833965,-0.0472123474,0.0278927162],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.1171981916,0.234049499,-0.0466544926,-0.2793045044],"action_prob":0.4742217362,"action_logp":-0.7460802794,"action_dist_inputs":[0.0519799627,-0.0512245595],"value_targets":76.9464187622} +{"eps_id":329428710,"obs":[0.1171981916,0.234049499,-0.0466544926,-0.2793045044],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.1218791828,0.0396230631,-0.052240584,-0.001693628],"action_prob":0.8436803222,"action_logp":-0.1699816138,"action_dist_inputs":[0.8416146636,-0.8442557454],"value_targets":76.7135543823} +{"eps_id":329428710,"obs":[0.1218791828,0.0396230631,-0.052240584,-0.001693628],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.1226716414,0.2354537845,-0.0522744544,-0.3103907406],"action_prob":0.4109478295,"action_logp":-0.8892890215,"action_dist_inputs":[0.1802814007,-0.1797670722],"value_targets":76.4783401489} +{"eps_id":329428710,"obs":[0.1226716414,0.2354537845,-0.0522744544,-0.3103907406],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.1273807138,0.0411140732,-0.0584822707,-0.0346410498],"action_prob":0.8629639149,"action_logp":-0.1473824233,"action_dist_inputs":[0.918613553,-0.9215148091],"value_targets":76.2407455444} +{"eps_id":329428710,"obs":[0.1273807138,0.0411140732,-0.0584822707,-0.0346410498],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.1282030046,0.2370238006,-0.0591750927,-0.345187217],"action_prob":0.3455433846,"action_logp":-1.0626370907,"action_dist_inputs":[0.3194586635,-0.3192285299],"value_targets":76.0007553101} +{"eps_id":329428710,"obs":[0.1282030046,0.2370238006,-0.0591750927,-0.345187217],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.1329434812,0.0427913554,-0.0660788342,-0.0717358515],"action_prob":0.8797007203,"action_logp":-0.1281735152,"action_dist_inputs":[0.9931899905,-0.9964088798],"value_targets":75.7583389282} +{"eps_id":329428710,"obs":[0.1329434812,0.0427913554,-0.0660788342,-0.0717358515],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.1337992996,-0.1513240933,-0.0675135553,0.199388817],"action_prob":0.7175215483,"action_logp":-0.3319523335,"action_dist_inputs":[0.4660425782,-0.466157943],"value_targets":75.5134735107} +{"eps_id":329428710,"obs":[0.1337992996,-0.1513240933,-0.0675135553,0.199388817],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.1307728142,-0.3454187214,-0.0635257736,0.470033586],"action_prob":0.3568444252,"action_logp":-1.0304553509,"action_dist_inputs":[-0.2930151224,0.2960715592],"value_targets":75.26612854} +{"eps_id":329428710,"obs":[0.1307728142,-0.3454187214,-0.0635257736,0.470033586],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.1238644421,-0.1494596303,-0.0541251041,0.1580238193],"action_prob":0.8721919656,"action_logp":-0.136745736,"action_dist_inputs":[-0.9574773908,0.9630028605],"value_targets":75.0162963867} +{"eps_id":329428710,"obs":[0.1238644421,-0.1494596303,-0.0541251041,0.1580238193],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.1208752543,-0.3437665701,-0.0509646274,0.4331524074],"action_prob":0.3940249979,"action_logp":-0.9313409328,"action_dist_inputs":[-0.2138409317,0.2165833861],"value_targets":74.7639312744} +{"eps_id":329428710,"obs":[0.1208752543,-0.3437665701,-0.0509646274,0.4331524074],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.1139999181,-0.1479614824,-0.0423015803,0.124849014],"action_prob":0.8624904752,"action_logp":-0.1479312032,"action_dist_inputs":[-0.9154015779,0.9207291603],"value_targets":74.5090255737} +{"eps_id":329428710,"obs":[0.1139999181,-0.1479614824,-0.0423015803,0.124849014],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.1110406891,0.0477401577,-0.0398046002,-0.1808737963],"action_prob":0.5774397254,"action_logp":-0.5491511822,"action_dist_inputs":[-0.1549014896,0.157370612],"value_targets":74.2515411377} +{"eps_id":329428710,"obs":[0.1110406891,0.0477401577,-0.0398046002,-0.1808737963],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.1119954959,-0.146790266,-0.0434220769,0.098991245],"action_prob":0.7903381586,"action_logp":-0.2352943867,"action_dist_inputs":[0.6629178524,-0.6640471816],"value_targets":73.9914550781} +{"eps_id":329428710,"obs":[0.1119954959,-0.146790266,-0.0434220769,0.098991245],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.1090596914,0.048926238,-0.0414422527,-0.2070688754],"action_prob":0.5299703479,"action_logp":-0.6349342465,"action_dist_inputs":[-0.0589020438,0.0611231588],"value_targets":73.7287445068} +{"eps_id":329428710,"obs":[0.1090596914,0.048926238,-0.0414422527,-0.2070688754],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.1100382134,-0.145579353,-0.0455836281,0.0722584799],"action_prob":0.8111330867,"action_logp":-0.2093231082,"action_dist_inputs":[0.7279824018,-0.7294074893],"value_targets":73.4633712769} +{"eps_id":329428710,"obs":[0.1100382134,-0.145579353,-0.0455836281,0.0722584799],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.1071266308,0.0501654521,-0.0441384576,-0.234450683],"action_prob":0.4786960185,"action_logp":-0.736689508,"action_dist_inputs":[0.043604847,-0.0416627489],"value_targets":73.1953277588} +{"eps_id":329428710,"obs":[0.1071266308,0.0501654521,-0.0441384576,-0.234450683],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.1081299335,-0.1442989707,-0.0488274731,0.0439893641],"action_prob":0.8300930262,"action_logp":-0.1862175018,"action_dist_inputs":[0.7922698855,-0.7940168381],"value_targets":72.9245758057} +{"eps_id":329428710,"obs":[0.1081299335,-0.1442989707,-0.0488274731,0.0439893641],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.1052439585,-0.3386879861,-0.0479476862,0.3208757937],"action_prob":0.575543642,"action_logp":-0.5524402261,"action_dist_inputs":[0.1530630141,-0.1514429152],"value_targets":72.6510848999} +{"eps_id":329428710,"obs":[0.1052439585,-0.3386879861,-0.0479476862,0.3208757937],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0984701961,-0.1429171711,-0.0415301695,0.0134658441],"action_prob":0.7753540277,"action_logp":-0.2544355392,"action_dist_inputs":[-0.6170943379,0.6216995716],"value_targets":72.3748321533} +{"eps_id":329428710,"obs":[0.0984701961,-0.1429171711,-0.0415301695,0.0134658441],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0956118554,-0.3374196887,-0.0412608534,0.2927616835],"action_prob":0.6077995896,"action_logp":-0.4979100823,"action_dist_inputs":[0.2196774185,-0.2183948606],"value_targets":72.0957946777} +{"eps_id":329428710,"obs":[0.0956118554,-0.3374196887,-0.0412608534,0.2927616835],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0888634622,-0.141734466,-0.0354056172,-0.0126435589],"action_prob":0.7545077205,"action_logp":-0.2816897333,"action_dist_inputs":[-0.5592039227,0.5635963082],"value_targets":71.8139266968} +{"eps_id":329428710,"obs":[0.0888634622,-0.141734466,-0.0354056172,-0.0126435589],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0860287696,-0.3363312483,-0.0356584899,0.2686615884],"action_prob":0.634157896,"action_logp":-0.4554572701,"action_dist_inputs":[0.2755330503,-0.2745631337],"value_targets":71.5292205811} +{"eps_id":329428710,"obs":[0.0860287696,-0.3363312483,-0.0356584899,0.2686615884],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.079302147,-0.5309266448,-0.0302852578,0.5498877764],"action_prob":0.265076071,"action_logp":-1.3277384043,"action_dist_inputs":[-0.5077812076,0.5119688511],"value_targets":71.2416381836} +{"eps_id":329428710,"obs":[0.079302147,-0.5309266448,-0.0302852578,0.5498877764],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0686836094,-0.3353926837,-0.0192875024,0.2478187978],"action_prob":0.9032418728,"action_logp":-0.1017648801,"action_dist_inputs":[-1.1136957407,1.120080471],"value_targets":70.9511489868} +{"eps_id":329428710,"obs":[0.0686836094,-0.3353926837,-0.0192875024,0.2478187978],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0619757585,-0.1400006562,-0.0143311266,-0.0508848913],"action_prob":0.73820436,"action_logp":-0.3035345972,"action_dist_inputs":[-0.5163318515,0.5203244686],"value_targets":70.6577301025} +{"eps_id":329428710,"obs":[0.0619757585,-0.1400006562,-0.0143311266,-0.0508848913],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0591757484,-0.3349142075,-0.0153488247,0.2372422069],"action_prob":0.6479068398,"action_logp":-0.4340083599,"action_dist_inputs":[0.3051749468,-0.3046762347],"value_targets":70.3613433838} +{"eps_id":329428710,"obs":[0.0591757484,-0.3349142075,-0.0153488247,0.2372422069],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0524774641,-0.5298135281,-0.0106039802,0.5250444412],"action_prob":0.2678179145,"action_logp":-1.3174479008,"action_dist_inputs":[-0.5009335279,0.5047882795],"value_targets":70.061958313} +{"eps_id":329428710,"obs":[0.0524774641,-0.5298135281,-0.0106039802,0.5250444412],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0418811925,-0.334544003,-0.0001030922,0.2290390432],"action_prob":0.9048355222,"action_logp":-0.1000020951,"action_dist_inputs":[-1.122979641,1.1291667223],"value_targets":69.7595596313} +{"eps_id":329428710,"obs":[0.0418811925,-0.334544003,-0.0001030922,0.2290390432],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0351903103,-0.1394205689,0.0044776886,-0.0636764094],"action_prob":0.7492998242,"action_logp":-0.2886160612,"action_dist_inputs":[-0.5455745459,0.5493069887],"value_targets":69.4540939331} +{"eps_id":329428710,"obs":[0.0351903103,-0.1394205689,0.0044776886,-0.0636764094],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0324019007,0.055636894,0.0032041604,-0.3549432456],"action_prob":0.3706616163,"action_logp":-0.9924657345,"action_dist_inputs":[0.2648291588,-0.2645502687],"value_targets":69.1455535889} +{"eps_id":329428710,"obs":[0.0324019007,0.055636894,0.0032041604,-0.3549432456],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0335146375,0.2507131398,-0.0038947044,-0.6466140747],"action_prob":0.1409157813,"action_logp":-1.9595928192,"action_dist_inputs":[0.9022685885,-0.9054359794],"value_targets":68.8338928223} +{"eps_id":329428710,"obs":[0.0335146375,0.2507131398,-0.0038947044,-0.6466140747],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0385289006,0.0556456707,-0.0168269854,-0.355160147],"action_prob":0.9217927456,"action_logp":-0.0814348757,"action_dist_inputs":[1.2304961681,-1.2364617586],"value_targets":68.5190811157} +{"eps_id":329428710,"obs":[0.0385289006,0.0556456707,-0.0168269854,-0.355160147],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0396418124,-0.1392330378,-0.0239301883,-0.0678303689],"action_prob":0.8696178198,"action_logp":-0.1397014856,"action_dist_inputs":[0.9471263885,-0.9504575133],"value_targets":68.2010955811} +{"eps_id":329428710,"obs":[0.0396418124,-0.1392330378,-0.0239301883,-0.0678303689],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0368571542,0.0562236831,-0.0252867956,-0.3679662943],"action_prob":0.3120974004,"action_logp":-1.1644399166,"action_dist_inputs":[0.3951464593,-0.395185411],"value_targets":67.8798904419} +{"eps_id":329428710,"obs":[0.0368571542,0.0562236831,-0.0252867956,-0.3679662943],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0379816256,-0.138530001,-0.0326461233,-0.0833626911],"action_prob":0.8775382042,"action_logp":-0.1306347847,"action_dist_inputs":[0.9828801751,-0.9864413738],"value_targets":67.5554504395} +{"eps_id":329428710,"obs":[0.0379816256,-0.138530001,-0.0326461233,-0.0833626911],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0352110267,-0.3331691325,-0.034313377,0.1988441944],"action_prob":0.7200101614,"action_logp":-0.3284899592,"action_dist_inputs":[0.4720876515,-0.4724245071],"value_targets":67.227722168} +{"eps_id":329428710,"obs":[0.0352110267,-0.3331691325,-0.034313377,0.1988441944],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0285476446,-0.1375736445,-0.0303364918,-0.1044625118],"action_prob":0.6407062411,"action_logp":-0.445184201,"action_dist_inputs":[-0.2876257002,0.2908049822],"value_targets":66.8966903687} +{"eps_id":329428710,"obs":[0.0285476446,-0.1375736445,-0.0303364918,-0.1044625118],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0257961713,0.057969626,-0.0324257426,-0.4065600038],"action_prob":0.2610444725,"action_logp":-1.3430645466,"action_dist_inputs":[0.5199466348,-0.5206002593],"value_targets":66.5623168945} +{"eps_id":329428710,"obs":[0.0257961713,0.057969626,-0.0324257426,-0.4065600038],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0269555636,-0.1366778761,-0.0405569412,-0.1242737323],"action_prob":0.8903061748,"action_logp":-0.1161898747,"action_dist_inputs":[1.0448867083,-1.0489853621],"value_targets":66.2245635986} +{"eps_id":329428710,"obs":[0.0269555636,-0.1366778761,-0.0405569412,-0.1242737323],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.024222007,-0.3311960399,-0.0430424176,0.1553430855],"action_prob":0.7699302435,"action_logp":-0.2614553571,"action_dist_inputs":[0.603445828,-0.6044715047],"value_targets":65.883392334} +{"eps_id":329428710,"obs":[0.024222007,-0.3311960399,-0.0430424176,0.1553430855],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0175980851,-0.5256761312,-0.0399355553,0.4341424704],"action_prob":0.4493254721,"action_logp":-0.8000077605,"action_dist_inputs":[-0.1004038602,0.1029925868],"value_targets":65.5387802124} +{"eps_id":329428710,"obs":[0.0175980851,-0.5256761312,-0.0399355553,0.4341424704],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0070845634,-0.7202106118,-0.0312527046,0.7139735818],"action_prob":0.1633841246,"action_logp":-1.8116512299,"action_dist_inputs":[-0.8139664531,0.8192945123],"value_targets":65.1906890869} +{"eps_id":329428710,"obs":[0.0070845634,-0.7202106118,-0.0312527046,0.7139735818],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0073196488,-0.5246702433,-0.0169732347,0.4116194844],"action_prob":0.9270626307,"action_logp":-0.075734131,"action_dist_inputs":[-1.2676230669,1.2747967243],"value_targets":64.8390808105} +{"eps_id":329428710,"obs":[-0.0073196488,-0.5246702433,-0.0169732347,0.4116194844],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.017813053,-0.7195475101,-0.0087408442,0.6989033818],"action_prob":0.155483678,"action_logp":-1.8612145185,"action_dist_inputs":[-0.8435451984,0.8486781716],"value_targets":64.4839172363} +{"eps_id":329428710,"obs":[-0.017813053,-0.7195475101,-0.0087408442,0.6989033818],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0322040021,-0.5243054628,0.0052372236,0.4034817517],"action_prob":0.9304375052,"action_logp":-0.0721003413,"action_dist_inputs":[-1.2931661606,1.3002636433],"value_targets":64.1251678467} +{"eps_id":329428710,"obs":[-0.0322040021,-0.5243054628,0.0052372236,0.4034817517],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0426901132,-0.3292581737,0.0133068589,0.1124545783],"action_prob":0.860034585,"action_logp":-0.1507826596,"action_dist_inputs":[-0.9052866101,0.9102904201],"value_targets":63.7627983093} +{"eps_id":329428710,"obs":[-0.0426901132,-0.3292581737,0.0133068589,0.1124545783],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0492752753,-0.1343294084,0.0155559499,-0.1760005951],"action_prob":0.6118651628,"action_logp":-0.4912433028,"action_dist_inputs":[-0.2265614718,0.2285978198],"value_targets":63.3967666626} +{"eps_id":329428710,"obs":[-0.0492752753,-0.1343294084,0.0155559499,-0.1760005951],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0519618653,0.0605664998,0.012035938,-0.4637358189],"action_prob":0.2520805597,"action_logp":-1.3780065775,"action_dist_inputs":[0.5429075956,-0.5446389318],"value_targets":63.0270347595} +{"eps_id":329428710,"obs":[-0.0519618653,0.0605664998,0.012035938,-0.4637358189],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.050750535,-0.1347234547,0.0027612215,-0.1672836393],"action_prob":0.8921602368,"action_logp":-0.1141095236,"action_dist_inputs":[1.0540822744,-1.0589174032],"value_targets":62.6535720825} +{"eps_id":329428710,"obs":[-0.050750535,-0.1347234547,0.0027612215,-0.1672836393],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0534450039,-0.3298848271,-0.0005844512,0.1262691021],"action_prob":0.7567951679,"action_logp":-0.2786626816,"action_dist_inputs":[0.5666949153,-0.5684936047],"value_targets":62.2763366699} +{"eps_id":329428710,"obs":[-0.0534450039,-0.3298848271,-0.0005844512,0.1262691021],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0600427017,-0.1347544938,0.0019409307,-0.1665981561],"action_prob":0.6044217944,"action_logp":-0.5034829974,"action_dist_inputs":[-0.2109992653,0.2129245847],"value_targets":61.8952865601} +{"eps_id":329428710,"obs":[-0.0600427017,-0.1347544938,0.0019409307,-0.1665981561],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0627377927,-0.3299041688,-0.0013910325,0.1266964376],"action_prob":0.7573097944,"action_logp":-0.277982831,"action_dist_inputs":[0.5680469275,-0.5699398518],"value_targets":61.5103912354} +{"eps_id":329428710,"obs":[-0.0627377927,-0.3299041688,-0.0013910325,0.1266964376],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0693358704,-0.1347623318,0.0011428964,-0.1664250195],"action_prob":0.6040747762,"action_logp":-0.5040572882,"action_dist_inputs":[-0.2103229016,0.2121496201],"value_targets":61.1216087341} +{"eps_id":329428710,"obs":[-0.0693358704,-0.1347623318,0.0011428964,-0.1664250195],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.072031118,-0.3299006224,-0.0021856038,0.1266182512],"action_prob":0.7582282424,"action_logp":-0.2767708302,"action_dist_inputs":[0.570499301,-0.5724908113],"value_targets":60.7288970947} +{"eps_id":329428710,"obs":[-0.072031118,-0.3299006224,-0.0021856038,0.1266182512],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0786291361,-0.1347474307,0.0003467609,-0.1667534113],"action_prob":0.6029245257,"action_logp":-0.5059632659,"action_dist_inputs":[-0.2079709023,0.2096947134],"value_targets":60.3322181702} +{"eps_id":329428710,"obs":[-0.0786291361,-0.1347474307,0.0003467609,-0.1667534113],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0813240781,0.0603695586,-0.0029883073,-0.4593269229],"action_prob":0.2404315323,"action_logp":-1.42531991,"action_dist_inputs":[0.5741102099,-0.576204896],"value_targets":59.9315338135} +{"eps_id":329428710,"obs":[-0.0813240781,0.0603695586,-0.0029883073,-0.4593269229],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0801166892,-0.1347100288,-0.0121748457,-0.1675874144],"action_prob":0.8982750773,"action_logp":-0.1072789654,"action_dist_inputs":[1.0864924192,-1.0917109251],"value_targets":59.526802063} +{"eps_id":329428710,"obs":[-0.0801166892,-0.1347100288,-0.0121748457,-0.1675874144],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0828108937,-0.3296556175,-0.0155265946,0.1212299615],"action_prob":0.7760520577,"action_logp":-0.2535356879,"action_dist_inputs":[0.6202711463,-0.6225346327],"value_targets":59.117980957} +{"eps_id":329428710,"obs":[-0.0828108937,-0.3296556175,-0.0155265946,0.1212299615],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0894040018,-0.5245516896,-0.013101995,0.4089742303],"action_prob":0.4353218973,"action_logp":-0.8316695094,"action_dist_inputs":[-0.1293687969,0.1308012456],"value_targets":58.7050323486} +{"eps_id":329428710,"obs":[-0.0894040018,-0.5245516896,-0.013101995,0.4089742303],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0998950377,-0.7194854617,-0.0049225101,0.6974979043],"action_prob":0.153664723,"action_logp":-1.8729821444,"action_dist_inputs":[-0.8508689404,0.8552734256],"value_targets":58.2879104614} +{"eps_id":329428710,"obs":[-0.0998950377,-0.7194854617,-0.0049225101,0.6974979043],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1142847463,-0.5242956281,0.0090274476,0.4032694399],"action_prob":0.9285399318,"action_logp":-0.0741418973,"action_dist_inputs":[-1.2789392471,1.2855348587],"value_targets":57.8665771484} +{"eps_id":329428710,"obs":[-0.1142847463,-0.5242956281,0.0090274476,0.4032694399],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1247706562,-0.3293028474,0.017092837,0.113446258],"action_prob":0.8615953922,"action_logp":-0.1489695311,"action_dist_inputs":[-0.9121537209,0.9164503217],"value_targets":57.4409866333} +{"eps_id":329428710,"obs":[-0.1247706562,-0.3293028474,0.017092837,0.113446258],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1313567162,-0.1344299316,0.0193617623,-0.1737953573],"action_prob":0.6281294227,"action_logp":-0.4650090635,"action_dist_inputs":[-0.2614771128,0.2627233267],"value_targets":57.0110969543} +{"eps_id":329428710,"obs":[-0.1313567162,-0.1344299316,0.0193617623,-0.1737953573],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1340453178,-0.3298235536,0.0158858541,0.1249321476],"action_prob":0.7386043668,"action_logp":-0.3029928803,"action_dist_inputs":[0.5181375742,-0.5205896497],"value_targets":56.5768661499} +{"eps_id":329428710,"obs":[-0.1340453178,-0.3298235536,0.0158858541,0.1249321476],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1406417787,-0.1349327564,0.0183844976,-0.1626969278],"action_prob":0.6440601945,"action_logp":-0.4399631321,"action_dist_inputs":[-0.2958914936,0.2971390486],"value_targets":56.1382484436} +{"eps_id":329428710,"obs":[-0.1406417787,-0.1349327564,0.0183844976,-0.1626969278],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1433404386,-0.3303129971,0.015130559,0.1357286572],"action_prob":0.7274557352,"action_logp":-0.3182021081,"action_dist_inputs":[0.4896637201,-0.4920885265],"value_targets":55.6952018738} +{"eps_id":329428710,"obs":[-0.1433404386,-0.3303129971,0.015130559,0.1357286572],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1499467045,-0.5256483555,0.0178451315,0.4331463873],"action_prob":0.3406646848,"action_logp":-1.0768566132,"action_dist_inputs":[-0.3295449615,0.3307886422],"value_targets":55.2476768494} +{"eps_id":329428710,"obs":[-0.1499467045,-0.5256483555,0.0178451315,0.4331463873],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1604596674,-0.330783546,0.0265080594,0.1461419016],"action_prob":0.8806215525,"action_logp":-0.1271273196,"action_dist_inputs":[-0.9970547557,1.0012743473],"value_targets":54.7956352234} +{"eps_id":329428710,"obs":[-0.1604596674,-0.330783546,0.0265080594,0.1461419016],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.167075336,-0.5262749195,0.0294308979,0.4470683336],"action_prob":0.3030774593,"action_logp":-1.1937668324,"action_dist_inputs":[-0.4157153964,0.4169702828],"value_targets":54.3390235901} +{"eps_id":329428710,"obs":[-0.167075336,-0.5262749195,0.0294308979,0.4470683336],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1776008308,-0.3315813839,0.0383722633,0.1638059765],"action_prob":0.8914470077,"action_logp":-0.1149093062,"action_dist_inputs":[-1.0506798029,1.0549271107],"value_targets":53.8778038025} +{"eps_id":329428710,"obs":[-0.1776008308,-0.3315813839,0.0383722633,0.1638059765],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1842324585,-0.5272310376,0.0416483842,0.4683431089],"action_prob":0.2602828145,"action_logp":-1.3459864855,"action_dist_inputs":[-0.5215903521,0.5229088068],"value_targets":53.4119224548} +{"eps_id":329428710,"obs":[-0.1842324585,-0.5272310376,0.0416483842,0.4683431089],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1947770864,-0.332721442,0.0510152467,0.1890728176],"action_prob":0.9024241567,"action_logp":-0.1026706621,"action_dist_inputs":[-1.1100555658,1.1143988371],"value_targets":52.9413375854} +{"eps_id":329428710,"obs":[-0.1947770864,-0.332721442,0.0510152467,0.1890728176],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2014315128,-0.5285347104,0.0547967032,0.4974025786],"action_prob":0.2162498981,"action_logp":-1.5313205719,"action_dist_inputs":[-0.6431090236,0.6445465088],"value_targets":52.4659957886} +{"eps_id":329428710,"obs":[-0.2014315128,-0.5285347104,0.0547967032,0.4974025786],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2120022029,-0.3342265189,0.0647447556,0.2224798948],"action_prob":0.9127021432,"action_logp":-0.0913456902,"action_dist_inputs":[-1.1712802649,1.175804019],"value_targets":51.9858551025} +{"eps_id":329428710,"obs":[-0.2120022029,-0.3342265189,0.0647447556,0.2224798948],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2186867446,-0.1400868297,0.0691943541,-0.0490978099],"action_prob":0.824762404,"action_logp":-0.1926599443,"action_dist_inputs":[-0.773662746,0.7752898335],"value_targets":51.5008621216} +{"eps_id":329428710,"obs":[-0.2186867446,-0.1400868297,0.0691943541,-0.0490978099],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2214884758,-0.3361292481,0.0682123974,0.2645890117],"action_prob":0.4400615394,"action_logp":-0.8208407164,"action_dist_inputs":[-0.1212191433,0.1196930781],"value_targets":51.0109710693} +{"eps_id":329428710,"obs":[-0.2214884758,-0.3361292481,0.0682123974,0.2645890117],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2282110602,-0.1420438439,0.0735041723,-0.0058231447],"action_prob":0.8518117666,"action_logp":-0.1603897363,"action_dist_inputs":[-0.8734956384,0.8753867745],"value_targets":50.5161323547} +{"eps_id":329428710,"obs":[-0.2282110602,-0.1420438439,0.0735041723,-0.0058231447],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2310519367,0.051951196,0.0733877122,-0.2744391859],"action_prob":0.6372209787,"action_logp":-0.4506387413,"action_dist_inputs":[-0.2822724283,0.2810503542],"value_targets":50.0162963867} +{"eps_id":329428710,"obs":[-0.2310519367,0.051951196,0.0733877122,-0.2744391859],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2300129086,-0.1441369504,0.0678989291,0.040457949],"action_prob":0.7179030776,"action_logp":-0.3314207494,"action_dist_inputs":[0.4649041891,-0.4691793621],"value_targets":49.5114097595} +{"eps_id":329428710,"obs":[-0.2300129086,-0.1441369504,0.0678989291,0.040457949],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2328956574,0.0499489605,0.068708092,-0.2300531268],"action_prob":0.6902506351,"action_logp":-0.3707005382,"action_dist_inputs":[-0.4011010528,0.4001900554],"value_targets":49.0014266968} +{"eps_id":329428710,"obs":[-0.2328956574,0.0499489605,0.068708092,-0.2300531268],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2318966687,0.2440252453,0.0641070232,-0.5002964139],"action_prob":0.3337706625,"action_logp":-1.0973011255,"action_dist_inputs":[0.3436017334,-0.3475780487],"value_targets":48.486289978} +{"eps_id":329428710,"obs":[-0.2318966687,0.2440252453,0.0641070232,-0.5002964139],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2270161659,0.0480609164,0.0541010983,-0.1881191432],"action_prob":0.8775407076,"action_logp":-0.1306319535,"action_dist_inputs":[0.9814500213,-0.987894237],"value_targets":47.9659461975} +{"eps_id":329428710,"obs":[-0.2270161659,0.0480609164,0.0541010983,-0.1881191432],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2260549515,-0.147791639,0.0503387153,0.1211278215],"action_prob":0.6338638663,"action_logp":-0.4559210241,"action_dist_inputs":[0.2725611031,-0.276268065],"value_targets":47.4403495789} +{"eps_id":329428710,"obs":[-0.2260549515,-0.147791639,0.0503387153,0.1211278215],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2290107757,0.0465743132,0.0527612716,-0.1552584618],"action_prob":0.7538749576,"action_logp":-0.282528758,"action_dist_inputs":[-0.5598741174,0.5595126748],"value_targets":46.9094467163} +{"eps_id":329428710,"obs":[-0.2290107757,0.0465743132,0.0527612716,-0.1552584618],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2280792892,-0.1492618173,0.0496561006,0.1535915732],"action_prob":0.5840229392,"action_logp":-0.5378149748,"action_dist_inputs":[0.1679162979,-0.1713940054],"value_targets":46.3731765747} +{"eps_id":329428710,"obs":[-0.2280792892,-0.1492618173,0.0496561006,0.1535915732],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2310645282,0.045115266,0.052727934,-0.1230217665],"action_prob":0.7818020582,"action_logp":-0.2461536825,"action_dist_inputs":[-0.6381585598,0.638040483],"value_targets":45.8314933777} +{"eps_id":329428710,"obs":[-0.2310645282,0.045115266,0.052727934,-0.1230217665],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2301622331,-0.1507208943,0.0502674989,0.1858190447],"action_prob":0.5303852558,"action_logp":-0.6341516376,"action_dist_inputs":[0.059222471,-0.062468376],"value_targets":45.2843360901} +{"eps_id":329428710,"obs":[-0.2301622331,-0.1507208943,0.0502674989,0.1858190447],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2331766486,0.043647144,0.0539838783,-0.0905923173],"action_prob":0.8061870933,"action_logp":-0.2154394537,"action_dist_inputs":[-0.7126389146,0.7127833962],"value_targets":44.7316513062} +{"eps_id":329428710,"obs":[-0.2331766486,0.043647144,0.0539838783,-0.0905923173],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2323037088,0.2379554212,0.0521720313,-0.3657665849],"action_prob":0.5263085961,"action_logp":-0.641867578,"action_dist_inputs":[-0.0541669689,0.0511647053],"value_targets":44.1733856201} +{"eps_id":329428710,"obs":[-0.2323037088,0.2379554212,0.0521720313,-0.3657665849],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2275445908,0.0421323813,0.044856701,-0.05710008],"action_prob":0.8007429242,"action_logp":-0.2222152948,"action_dist_inputs":[0.692597568,-0.6983466148],"value_targets":43.6094818115} +{"eps_id":329428710,"obs":[-0.2275445908,0.0421323813,0.044856701,-0.05710008],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2267019451,-0.1536030769,0.0437146984,0.249391228],"action_prob":0.4396981001,"action_logp":-0.8216669559,"action_dist_inputs":[-0.1225692332,0.1198181659],"value_targets":43.0398788452} +{"eps_id":329428710,"obs":[-0.2267019451,-0.1536030769,0.0437146984,0.249391228],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2297740132,0.040868219,0.0487025231,-0.0291890111],"action_prob":0.8376992345,"action_logp":-0.1770961434,"action_dist_inputs":[-0.8202578425,0.8209499717],"value_targets":42.4645233154} +{"eps_id":329428710,"obs":[-0.2297740132,0.040868219,0.0487025231,-0.0291890111],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2289566398,0.2352591306,0.048118744,-0.3061167598],"action_prob":0.6106703877,"action_logp":-0.4931979477,"action_dist_inputs":[-0.2263218462,0.2238090783],"value_targets":41.8833580017} +{"eps_id":329428710,"obs":[-0.2289566398,0.2352591306,0.048118744,-0.3061167598],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.224251464,0.4296635389,0.0419964083,-0.5832443833],"action_prob":0.2557005882,"action_logp":-1.3637480736,"action_dist_inputs":[0.531517148,-0.5369189382],"value_targets":41.2963218689} +{"eps_id":329428710,"obs":[-0.224251464,0.4296635389,0.0419964083,-0.5832443833],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2156581879,0.2339791805,0.0303315204,-0.2776333392],"action_prob":0.9046282768,"action_logp":-0.1002311707,"action_dist_inputs":[1.1211630106,-1.1285792589],"value_targets":40.7033538818} +{"eps_id":329428710,"obs":[-0.2156581879,0.2339791805,0.0303315204,-0.2776333392],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2109786123,0.0384379402,0.0247788541,0.0244597029],"action_prob":0.7408832312,"action_logp":-0.2999122739,"action_dist_inputs":[0.5226687193,-0.5278953314],"value_targets":40.1044006348} +{"eps_id":329428710,"obs":[-0.2109786123,0.0384379402,0.0247788541,0.0244597029],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2102098465,0.2331959456,0.025268048,-0.2603033781],"action_prob":0.6413553953,"action_logp":-0.4441715181,"action_dist_inputs":[-0.2916600704,0.2895917296],"value_targets":39.4993934631} +{"eps_id":329428710,"obs":[-0.2102098465,0.2331959456,0.025268048,-0.2603033781],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2055459321,0.4279482365,0.0200619791,-0.5449105501],"action_prob":0.2714946866,"action_logp":-1.3038127422,"action_dist_inputs":[0.4909739792,-0.4960784316],"value_targets":38.8882751465} +{"eps_id":329428710,"obs":[-0.2055459321,0.4279482365,0.0200619791,-0.5449105501],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1969869584,0.2325502187,0.009163769,-0.2459746748],"action_prob":0.9035969377,"action_logp":-0.1013718992,"action_dist_inputs":[1.115298152,-1.1225466728],"value_targets":38.2709846497} +{"eps_id":329428710,"obs":[-0.1969869584,0.2325502187,0.009163769,-0.2459746748],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1923359632,0.0372985862,0.0042442754,0.0495845824],"action_prob":0.7395785451,"action_logp":-0.301674813,"action_dist_inputs":[0.5193903446,-0.5243887305],"value_targets":37.6474609375} +{"eps_id":329428710,"obs":[-0.1923359632,0.0372985862,0.0042442754,0.0495845824],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1915899813,-0.1578839719,0.0052359672,0.3436035812],"action_prob":0.3616308868,"action_logp":-1.0171312094,"action_dist_inputs":[-0.2850648761,0.2832277715],"value_targets":37.0176353455} +{"eps_id":329428710,"obs":[-0.1915899813,-0.1578839719,0.0052359672,0.3436035812],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1947476715,0.0371631086,0.0121080391,0.0525763556],"action_prob":0.8573496342,"action_logp":-0.1539094746,"action_dist_inputs":[-0.8959687948,0.8974804282],"value_targets":36.3814506531} +{"eps_id":329428710,"obs":[-0.1947476715,0.0371631086,0.0121080391,0.0525763556],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1940044016,-0.1581303477,0.0131595656,0.349054724],"action_prob":0.3431449831,"action_logp":-1.0696022511,"action_dist_inputs":[-0.3255383968,0.3237718344],"value_targets":35.7388381958} +{"eps_id":329428710,"obs":[-0.1940044016,-0.1581303477,0.0131595656,0.349054724],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1971670091,0.0368019938,0.0201406609,0.060550347],"action_prob":0.8633413315,"action_logp":-0.1469451189,"action_dist_inputs":[-0.9208479524,0.9224761724],"value_targets":35.0897369385} +{"eps_id":329428710,"obs":[-0.1971670091,0.0368019938,0.0201406609,0.060550347],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1964309663,0.2316294611,0.0213516671,-0.2257106304],"action_prob":0.6807963848,"action_logp":-0.3844920397,"action_dist_inputs":[-0.3795403838,0.3778935373],"value_targets":34.4340782166} +{"eps_id":329428710,"obs":[-0.1964309663,0.2316294611,0.0213516671,-0.2257106304],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.191798389,0.0362089723,0.0168374553,0.0736301988],"action_prob":0.6904880404,"action_logp":-0.3703566194,"action_dist_inputs":[0.3988072276,-0.4035946131],"value_targets":33.7717971802} +{"eps_id":329428710,"obs":[-0.191798389,0.0362089723,0.0168374553,0.0736301988],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1910742074,-0.1591502726,0.0183100589,0.3715775013],"action_prob":0.3085310161,"action_logp":-1.1759328842,"action_dist_inputs":[-0.4042488635,0.4027469754],"value_targets":33.1028251648} +{"eps_id":329428710,"obs":[-0.1910742074,-0.1591502726,0.0183100589,0.3715775013],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1942572147,-0.3545275033,0.0257416088,0.6699770689],"action_prob":0.1268962324,"action_logp":-2.0643856525,"action_dist_inputs":[-0.9633767605,0.9653078914],"value_targets":32.4270935059} +{"eps_id":329428710,"obs":[-0.1942572147,-0.3545275033,0.0257416088,0.6699770689],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2013477534,-0.1597727239,0.0391411521,0.3855088949],"action_prob":0.9234704375,"action_logp":-0.0796164721,"action_dist_inputs":[-1.242659688,1.2478028536],"value_targets":31.7445411682} +{"eps_id":329428710,"obs":[-0.2013477534,-0.1597727239,0.0391411521,0.3855088949],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.204543218,0.0347723402,0.0468513295,0.1054193676],"action_prob":0.8837978244,"action_logp":-0.1235269159,"action_dist_inputs":[-1.0133426189,1.0155546665],"value_targets":31.0550918579} +{"eps_id":329428710,"obs":[-0.204543218,0.0347723402,0.0468513295,0.1054193676],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2038477659,0.2291926593,0.0489597172,-0.1721218526],"action_prob":0.7628093958,"action_logp":-0.2707470953,"action_dist_inputs":[-0.5845841765,0.5835599303],"value_targets":30.3586788177} +{"eps_id":329428710,"obs":[-0.2038477659,0.2291926593,0.0489597172,-0.1721218526],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1992639154,0.0334054045,0.045517277,0.1355955303],"action_prob":0.5483816266,"action_logp":-0.6007838249,"action_dist_inputs":[0.0949297398,-0.0992041528],"value_targets":29.6552295685} +{"eps_id":329428710,"obs":[-0.1992639154,0.0334054045,0.045517277,0.1355955303],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.198595807,0.2278468311,0.0482291877,-0.1423870623],"action_prob":0.7847710848,"action_logp":-0.2423632145,"action_dist_inputs":[-0.6471818089,0.6465079784],"value_targets":28.9446773529} +{"eps_id":329428710,"obs":[-0.198595807,0.2278468311,0.0482291877,-0.1423870623],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.194038868,0.4222460687,0.0453814492,-0.4194729626],"action_prob":0.4978719056,"action_logp":-0.6974124312,"action_dist_inputs":[0.0022647334,-0.006247499],"value_targets":28.2269458771} +{"eps_id":329428710,"obs":[-0.194038868,0.4222460687,0.0453814492,-0.4194729626],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1855939478,0.2265114337,0.0369919874,-0.1128357425],"action_prob":0.8114137053,"action_logp":-0.2089772671,"action_dist_inputs":[0.7263483405,-0.7328737974],"value_targets":27.5019664764} +{"eps_id":329428710,"obs":[-0.1855939478,0.2265114337,0.0369919874,-0.1128357425],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1810637116,0.4210843444,0.0347352736,-0.3936222494],"action_prob":0.5206412673,"action_logp":-0.6526940465,"action_dist_inputs":[-0.0431530215,0.0394590087],"value_targets":26.7696628571} +{"eps_id":329428710,"obs":[-0.1810637116,0.4210843444,0.0347352736,-0.3936222494],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1726420373,0.615696609,0.0268628281,-0.6751544476],"action_prob":0.1974427104,"action_logp":-1.6223068237,"action_dist_inputs":[0.6979954243,-0.7043592334],"value_targets":26.0299625397} +{"eps_id":329428710,"obs":[-0.1726420373,0.615696609,0.0268628281,-0.6751544476],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1603281051,0.8104351759,0.0133597394,-0.9592601657],"action_prob":0.0790962502,"action_logp":-2.5370898247,"action_dist_inputs":[1.2233415842,-1.2313485146],"value_targets":25.2827911377} +{"eps_id":329428710,"obs":[-0.1603281051,0.8104351759,0.0133597394,-0.9592601657],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1441193968,0.6151362062,-0.0058254646,-0.6624101996],"action_prob":0.9519569874,"action_logp":-0.0492354073,"action_dist_inputs":[1.4885470867,-1.4978764057],"value_targets":24.5280704498} +{"eps_id":329428710,"obs":[-0.1441193968,0.6151362062,-0.0058254646,-0.6624101996],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1318166703,0.4200957716,-0.0190736689,-0.3715672195],"action_prob":0.9295270443,"action_logp":-0.0730793625,"action_dist_inputs":[1.2857424021,-1.2937043905],"value_targets":23.7657279968} +{"eps_id":329428710,"obs":[-0.1318166703,0.4200957716,-0.0190736689,-0.3715672195],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1234147549,0.2252499014,-0.0265050121,-0.0849589109],"action_prob":0.8493490815,"action_logp":-0.163285017,"action_dist_inputs":[0.861692369,-0.8678122759],"value_targets":22.9956855774} +{"eps_id":329428710,"obs":[-0.1234147549,0.2252499014,-0.0265050121,-0.0849589109],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1189097613,0.0305177215,-0.0282041915,0.1992452145],"action_prob":0.577037096,"action_logp":-0.5498486757,"action_dist_inputs":[0.1536172777,-0.1570048183],"value_targets":22.2178649902} +{"eps_id":329428710,"obs":[-0.1189097613,0.0305177215,-0.0282041915,0.1992452145],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1182994023,0.2260314822,-0.0242192876,-0.1021997184],"action_prob":0.7620638013,"action_logp":-0.2717249691,"action_dist_inputs":[-0.5820022225,0.582025528],"value_targets":21.4321861267} +{"eps_id":329428710,"obs":[-0.1182994023,0.2260314822,-0.0242192876,-0.1021997184],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1137787774,0.0312648416,-0.0262632817,0.1827447861],"action_prob":0.6011467576,"action_logp":-0.5089161396,"action_dist_inputs":[0.2033920437,-0.2068536133],"value_targets":20.6385707855} +{"eps_id":329428710,"obs":[-0.1137787774,0.0312648416,-0.0262632817,0.1827447861],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.11315348,0.2267525494,-0.0226083864,-0.1181061789],"action_prob":0.7501808405,"action_logp":-0.2874409556,"action_dist_inputs":[-0.5498188734,0.5497583747],"value_targets":19.8369407654} +{"eps_id":329428710,"obs":[-0.11315348,0.2267525494,-0.0226083864,-0.1181061789],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1086184233,0.0319617055,-0.0249705091,0.1673591286],"action_prob":0.6242306232,"action_logp":-0.4712353647,"action_dist_inputs":[0.2520090342,-0.2555353045],"value_targets":19.0272140503} +{"eps_id":329428710,"obs":[-0.1086184233,0.0319617055,-0.0249705091,0.1673591286],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1079791933,-0.1627940834,-0.0216233265,0.4520611465],"action_prob":0.2626000345,"action_logp":-1.3371231556,"action_dist_inputs":[-0.5163188577,0.5161793828],"value_targets":18.2093067169} +{"eps_id":329428710,"obs":[-0.1079791933,-0.1627940834,-0.0216233265,0.4520611465],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1112350747,-0.3576036692,-0.0125821037,0.7378504872],"action_prob":0.1142594665,"action_logp":-2.16928339,"action_dist_inputs":[-1.0224624872,1.0254898071],"value_targets":17.3831367493} +{"eps_id":329428710,"obs":[-0.1112350747,-0.3576036692,-0.0125821037,0.7378504872],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1183871478,-0.1623102278,0.0021749057,0.4412344694],"action_prob":0.9281844497,"action_logp":-0.0745248124,"action_dist_inputs":[-1.2766371965,1.2824915648],"value_targets":16.5486240387} +{"eps_id":329428710,"obs":[-0.1183871478,-0.1623102278,0.0021749057,0.4412344694],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1216333508,0.0327808745,0.0109995948,0.1492379159],"action_prob":0.8911539912,"action_logp":-0.1152380481,"action_dist_inputs":[-1.049737215,1.0528455973],"value_targets":15.7056808472} +{"eps_id":329428710,"obs":[-0.1216333508,0.0327808745,0.0109995948,0.1492379159],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.120977737,0.2277435958,0.0139843533,-0.1399546713],"action_prob":0.7664856315,"action_logp":-0.2659393251,"action_dist_inputs":[-0.5943243504,0.5942478776],"value_targets":14.8542232513} +{"eps_id":329428710,"obs":[-0.120977737,0.2277435958,0.0139843533,-0.1399546713],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1164228618,0.0324241817,0.0111852596,0.157107085],"action_prob":0.5778313875,"action_logp":-0.5484731793,"action_dist_inputs":[0.1551935971,-0.1586837322],"value_targets":13.9941644669} +{"eps_id":329428710,"obs":[-0.1164228618,0.0324241817,0.0111852596,0.157107085],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1157743782,0.2273842245,0.0143274013,-0.1320262849],"action_prob":0.7737918496,"action_logp":-0.2564523518,"action_dist_inputs":[-0.6148927808,0.6149545312],"value_targets":13.125418663} +{"eps_id":329428710,"obs":[-0.1157743782,0.2273842245,0.0143274013,-0.1320262849],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1112266928,0.422298044,0.0116868755,-0.4201548994],"action_prob":0.4360138774,"action_logp":-0.8300812244,"action_dist_inputs":[0.1269935071,-0.1303621531],"value_targets":12.2478981018} +{"eps_id":329428710,"obs":[-0.1112266928,0.422298044,0.0116868755,-0.4201548994],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1027807295,0.2270124555,0.0032837777,-0.1238106638],"action_prob":0.8533684015,"action_logp":-0.1585639268,"action_dist_inputs":[0.8776316047,-0.8836361766],"value_targets":11.3615131378} +{"eps_id":329428710,"obs":[-0.1027807295,0.2270124555,0.0032837777,-0.1238106638],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0982404873,0.4220872223,0.0008075645,-0.4154557884],"action_prob":0.4238248467,"action_logp":-0.8584350348,"action_dist_inputs":[0.1519152224,-0.155176267],"value_targets":10.4661741257} +{"eps_id":329428710,"obs":[-0.0982404873,0.4220872223,0.0008075645,-0.4154557884],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.089798741,0.2269538194,-0.0075015514,-0.122518383],"action_prob":0.8606634736,"action_logp":-0.150051713,"action_dist_inputs":[0.9074391723,-0.9133726358],"value_targets":9.5617923737} +{"eps_id":329428710,"obs":[-0.089798741,0.2269538194,-0.0075015514,-0.122518383],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0852596611,0.0319401473,-0.0099519193,0.1677884758],"action_prob":0.599521935,"action_logp":-0.5116227269,"action_dist_inputs":[0.200132817,-0.2033408433],"value_targets":8.6482753754} +{"eps_id":329428710,"obs":[-0.0852596611,0.0319401473,-0.0099519193,0.1677884758],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0846208632,0.2272031307,-0.0065961494,-0.1280173063],"action_prob":0.7598874569,"action_logp":-0.274584949,"action_dist_inputs":[-0.5758921504,0.5761705041],"value_targets":7.7255306244} +{"eps_id":329428710,"obs":[-0.0846208632,0.2272031307,-0.0065961494,-0.1280173063],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0800767988,0.032176286,-0.0091564953,0.1625773311],"action_prob":0.6070807576,"action_logp":-0.4990934432,"action_dist_inputs":[0.2159319967,-0.2191257626],"value_targets":6.7934651375} +{"eps_id":329428710,"obs":[-0.0800767988,0.032176286,-0.0091564953,0.1625773311],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0794332698,0.2274281085,-0.0059049488,-0.1329800934],"action_prob":0.7564678192,"action_logp":-0.2790952623,"action_dist_inputs":[-0.5665596724,0.5668513179],"value_targets":5.8519849777} +{"eps_id":329428710,"obs":[-0.0794332698,0.2274281085,-0.0059049488,-0.1329800934],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0748847127,0.032391239,-0.0085645514,0.1578340679],"action_prob":0.614195168,"action_logp":-0.4874424934,"action_dist_inputs":[0.2309024632,-0.234078899],"value_targets":4.9009947777} +{"eps_id":329428710,"obs":[-0.0748847127,0.032391239,-0.0085645514,0.1578340679],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0742368847,0.2276347578,-0.0054078698,-0.1375384182],"action_prob":0.7530956864,"action_logp":-0.2835629582,"action_dist_inputs":[-0.5574420691,0.5577493906],"value_targets":3.9403989315} +{"eps_id":329428710,"obs":[-0.0742368847,0.2276347578,-0.0054078698,-0.1375384182],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0696841925,0.0325906798,-0.0081586381,0.1534335166],"action_prob":0.6210371852,"action_logp":-0.4763643444,"action_dist_inputs":[0.2453979254,-0.2485548556],"value_targets":2.970099926} +{"eps_id":329428710,"obs":[-0.0696841925,0.0325906798,-0.0081586381,0.1534335166],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0690323785,0.2278284878,-0.005089968,-0.1418120712],"action_prob":0.7496833205,"action_logp":-0.2881043851,"action_dist_inputs":[-0.5482997894,0.5486243367],"value_targets":1.9900000095} +{"eps_id":329428710,"obs":[-0.0690323785,0.2278284878,-0.005089968,-0.1418120712],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":true,"new_obs":[-0.0644758046,0.0327798091,-0.0079262089,0.1492607147],"action_prob":0.6277673244,"action_logp":-0.4655856788,"action_dist_inputs":[0.2597570717,-0.2628933489],"value_targets":1.0} +{"eps_id":1065225377,"obs":[0.0169242918,0.0414283946,0.002293352,0.0150648775],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":0.0,"dones":false,"new_obs":[0.0177528597,0.2365173846,0.0025946496,-0.2768935859],"action_prob":0.5786419511,"action_logp":-0.5470713973,"action_dist_inputs":[-0.1585931033,0.1586077958],"value_targets":86.6020355225} +{"eps_id":1065225377,"obs":[0.0177528597,0.2365173846,0.0025946496,-0.2768935859],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0224832073,0.0413585082,-0.0029432222,0.01660656],"action_prob":0.7939415574,"action_logp":-0.230745405,"action_dist_inputs":[0.6728135347,-0.6760365963],"value_targets":86.4666976929} +{"eps_id":1065225377,"obs":[0.0224832073,0.0413585082,-0.0029432222,0.01660656],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0233103782,0.2365225405,-0.0026110909,-0.2770035267],"action_prob":0.5686523318,"action_logp":-0.5644860268,"action_dist_inputs":[-0.1381568015,0.1381979734],"value_targets":86.3300018311} +{"eps_id":1065225377,"obs":[0.0233103782,0.2365225405,-0.0026110909,-0.2770035267],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0280408282,0.4316816628,-0.0081511615,-0.5705088377],"action_prob":0.1993520707,"action_logp":-1.6126828194,"action_dist_inputs":[0.6935700774,-0.6967788935],"value_targets":86.1919174194} +{"eps_id":1065225377,"obs":[0.0280408282,0.4316816628,-0.0081511615,-0.5705088377],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0366744623,0.2366749495,-0.0195613392,-0.2804049253],"action_prob":0.9174178243,"action_logp":-0.0861922726,"action_dist_inputs":[1.2009501457,-1.2068194151],"value_targets":86.052444458} +{"eps_id":1065225377,"obs":[0.0366744623,0.2366749495,-0.0195613392,-0.2804049253],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0414079614,0.041837424,-0.0251694378,0.0060447762],"action_prob":0.8227456212,"action_logp":-0.1951082349,"action_dist_inputs":[0.7659117579,-0.7691493034],"value_targets":85.9115600586} +{"eps_id":1065225377,"obs":[0.0414079614,0.041837424,-0.0251694378,0.0060447762],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.04224471,0.2373111248,-0.0250485428,-0.2944719493],"action_prob":0.4945852757,"action_logp":-0.7040356994,"action_dist_inputs":[0.0108077442,-0.010852064],"value_targets":85.7692489624} +{"eps_id":1065225377,"obs":[0.04224471,0.2373111248,-0.0250485428,-0.2944719493],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.046990931,0.4327810705,-0.0309379809,-0.594948411],"action_prob":0.1621682942,"action_logp":-1.8191206455,"action_dist_inputs":[0.8194230199,-0.8227595687],"value_targets":85.62550354} +{"eps_id":1065225377,"obs":[0.046990931,0.4327810705,-0.0309379809,-0.594948411],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0556465536,0.2381054759,-0.0428369492,-0.3121690452],"action_prob":0.926076889,"action_logp":-0.0767980367,"action_dist_inputs":[1.2609125376,-1.2670183182],"value_targets":85.4803085327} +{"eps_id":1065225377,"obs":[0.0556465536,0.2381054759,-0.0428369492,-0.3121690452],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.060408663,0.0436191484,-0.0490803309,-0.0332973897],"action_prob":0.8617815375,"action_logp":-0.148753494,"action_dist_inputs":[0.9133313894,-0.9168345928],"value_targets":85.3336486816} +{"eps_id":1065225377,"obs":[0.060408663,0.0436191484,-0.0490803309,-0.0332973897],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.061281044,-0.150765866,-0.0497462787,0.2435051948],"action_prob":0.6309454441,"action_logp":-0.4605358541,"action_dist_inputs":[0.2679401338,-0.268334806],"value_targets":85.1855010986} +{"eps_id":1065225377,"obs":[0.061281044,-0.150765866,-0.0497462787,0.2435051948],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.058265727,0.0450300425,-0.0448761731,-0.0644447133],"action_prob":0.7451684475,"action_logp":-0.2941449583,"action_dist_inputs":[-0.5351161957,0.5378914475],"value_targets":85.0358581543} +{"eps_id":1065225377,"obs":[0.058265727,0.0450300425,-0.0448761731,-0.0644447133],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0591663308,-0.1494207233,-0.0461650677,0.213748619],"action_prob":0.6699897051,"action_logp":-0.4004929364,"action_dist_inputs":[0.3537391126,-0.3543993533],"value_targets":84.8847045898} +{"eps_id":1065225377,"obs":[0.0591663308,-0.1494207233,-0.0461650677,0.213748619],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0561779141,0.0463297926,-0.0418900959,-0.0931320339],"action_prob":0.7137251496,"action_logp":-0.3372573256,"action_dist_inputs":[-0.4554937184,0.4580518305],"value_targets":84.7320251465} +{"eps_id":1065225377,"obs":[0.0561779141,0.0463297926,-0.0418900959,-0.0931320339],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0571045093,-0.148167491,-0.0437527373,0.1860458702],"action_prob":0.7041956186,"action_logp":-0.3506990671,"action_dist_inputs":[0.4332203567,-0.4341374934],"value_targets":84.5778045654} +{"eps_id":1065225377,"obs":[0.0571045093,-0.148167491,-0.0437527373,0.1860458702],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0541411601,0.0475522652,-0.0400318205,-0.1201120093],"action_prob":0.6792005897,"action_logp":-0.3868387938,"action_dist_inputs":[-0.3738779724,0.3762224317],"value_targets":84.4220275879} +{"eps_id":1065225377,"obs":[0.0541411601,0.0475522652,-0.0400318205,-0.1201120093],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0550922044,-0.1469739527,-0.0424340591,0.1596773267],"action_prob":0.7344629765,"action_logp":-0.3086156845,"action_dist_inputs":[0.5081067681,-0.5092784762],"value_targets":84.2646713257} +{"eps_id":1065225377,"obs":[0.0550922044,-0.1469739527,-0.0424340591,0.1596773267],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0521527268,-0.3414635062,-0.039240513,0.4386771917],"action_prob":0.3588574231,"action_logp":-1.0248301029,"action_dist_inputs":[-0.2890992165,0.2912274003],"value_targets":84.1057281494} +{"eps_id":1065225377,"obs":[0.0521527268,-0.3414635062,-0.039240513,0.4386771917],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0453234576,-0.1458087713,-0.0304669682,0.1338872463],"action_prob":0.8748682737,"action_logp":-0.1336819381,"action_dist_inputs":[-0.9699437618,0.9747623801],"value_targets":83.9451828003} +{"eps_id":1065225377,"obs":[0.0453234576,-0.1458087713,-0.0304669682,0.1338872463],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0424072817,0.0497360416,-0.0277892239,-0.1682497561],"action_prob":0.6261789799,"action_logp":-0.4681190252,"action_dist_inputs":[-0.2569725811,0.2588866353],"value_targets":83.7830123901} +{"eps_id":1065225377,"obs":[0.0424072817,0.0497360416,-0.0277892239,-0.1682497561],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0434020013,-0.1449773461,-0.0311542191,0.1155385822],"action_prob":0.7680518031,"action_logp":-0.2638981044,"action_dist_inputs":[0.5978557467,-0.5994870663],"value_targets":83.6192016602} +{"eps_id":1065225377,"obs":[0.0434020013,-0.1449773461,-0.0311542191,0.1155385822],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0405024551,0.0505768247,-0.0288434476,-0.1868081987],"action_prob":0.5936914086,"action_logp":-0.5213956237,"action_dist_inputs":[-0.1887547523,0.1904920936],"value_targets":83.453742981} +{"eps_id":1065225377,"obs":[0.0405024551,0.0505768247,-0.0288434476,-0.1868081987],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0415139906,-0.1441208273,-0.032579612,0.0966380164],"action_prob":0.7865516543,"action_logp":-0.240096882,"action_dist_inputs":[0.651212275,-0.6530511975],"value_targets":83.286605835} +{"eps_id":1065225377,"obs":[0.0415139906,-0.1441208273,-0.032579612,0.0966380164],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0386315733,-0.3387610614,-0.0306468513,0.3788667023],"action_prob":0.442573458,"action_logp":-0.8151488304,"action_dist_inputs":[-0.1145897508,0.1161344871],"value_targets":83.1177825928} +{"eps_id":1065225377,"obs":[0.0386315733,-0.3387610614,-0.0306468513,0.3788667023],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0318563543,-0.533434689,-0.0230695158,0.6617312431],"action_prob":0.1513370275,"action_logp":-1.8882459402,"action_dist_inputs":[-0.8598764539,0.8642762899],"value_targets":82.9472579956} +{"eps_id":1065225377,"obs":[0.0318563543,-0.533434689,-0.0230695158,0.6617312431],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0211876612,-0.3379994631,-0.0098348912,0.3618746102],"action_prob":0.9302486181,"action_logp":-0.0723034069,"action_dist_inputs":[-1.2919384241,1.2985761166],"value_targets":82.7750091553} +{"eps_id":1065225377,"obs":[0.0211876612,-0.3379994631,-0.0098348912,0.3618746102],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0144276712,-0.1427391022,-0.0025973993,0.0661068261],"action_prob":0.8566859365,"action_logp":-0.1546838731,"action_dist_inputs":[-0.8918756247,0.8961572647],"value_targets":82.601020813} +{"eps_id":1065225377,"obs":[0.0144276712,-0.1427391022,-0.0025973993,0.0661068261],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0115728891,-0.3378237188,-0.0012752628,0.357969135],"action_prob":0.423112452,"action_logp":-0.8601173162,"action_dist_inputs":[-0.1543813646,0.1556281447],"value_targets":82.4252700806} +{"eps_id":1065225377,"obs":[0.0115728891,-0.3378237188,-0.0012752628,0.357969135],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0048164143,-0.1426836699,0.0058841198,0.0648843497],"action_prob":0.8614724874,"action_logp":-0.1491121799,"action_dist_inputs":[-0.9116722345,0.9159020782],"value_targets":82.2477493286} +{"eps_id":1065225377,"obs":[0.0048164143,-0.1426836699,0.0058841198,0.0648843497],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0019627411,0.0523534305,0.007181807,-0.2259363085],"action_prob":0.5953761935,"action_logp":-0.5185617805,"action_dist_inputs":[-0.1925148666,0.1937208325],"value_targets":82.0684280396} +{"eps_id":1065225377,"obs":[0.0019627411,0.0523534305,0.007181807,-0.2259363085],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0030098096,-0.1428704262,0.0026630806,0.069003351],"action_prob":0.7773396373,"action_logp":-0.2518779039,"action_dist_inputs":[0.6239886284,-0.6262412667],"value_targets":81.8873062134} +{"eps_id":1065225377,"obs":[0.0030098096,-0.1428704262,0.0026630806,0.069003351],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0001524013,-0.3380304575,0.0040431479,0.3625253141],"action_prob":0.4048915207,"action_logp":-0.9041360617,"action_dist_inputs":[-0.1919643432,0.1931601614],"value_targets":81.7043457031} +{"eps_id":1065225377,"obs":[0.0001524013,-0.3380304575,0.0040431479,0.3625253141],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0066082077,-0.5332096219,0.0112936534,0.656480372],"action_prob":0.1321405619,"action_logp":-2.0238890648,"action_dist_inputs":[-0.9389855266,0.9431779385],"value_targets":81.5195465088} +{"eps_id":1065225377,"obs":[-0.0066082077,-0.5332096219,0.0112936534,0.656480372],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0172723997,-0.3382467031,0.0244232602,0.367374897],"action_prob":0.935767889,"action_logp":-0.0663878098,"action_dist_inputs":[-1.336114645,1.3427503109],"value_targets":81.3328704834} +{"eps_id":1065225377,"obs":[-0.0172723997,-0.3382467031,0.0244232602,0.367374897],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0240373351,-0.5337070227,0.0317707583,0.6676576734],"action_prob":0.1166023239,"action_logp":-2.1489861012,"action_dist_inputs":[-1.0103901625,1.014616251],"value_targets":81.144317627} +{"eps_id":1065225377,"obs":[-0.0240373351,-0.5337070227,0.0317707583,0.6676576734],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0347114764,-0.3390409946,0.0451239124,0.3851449788],"action_prob":0.9396633506,"action_logp":-0.0622335784,"action_dist_inputs":[-1.3694047928,1.3761780262],"value_targets":80.9538574219} +{"eps_id":1065225377,"obs":[-0.0347114764,-0.3390409946,0.0451239124,0.3851449788],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0414922945,-0.1445877105,0.0528268106,0.1070239097],"action_prob":0.8998045325,"action_logp":-0.1055776924,"action_dist_inputs":[-1.095340848,1.0997138023],"value_targets":80.76146698} +{"eps_id":1065225377,"obs":[-0.0414922945,-0.1445877105,0.0528268106,0.1070239097],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0443840511,0.0497389771,0.0549672917,-0.1685356349],"action_prob":0.7525243759,"action_logp":-0.2843219042,"action_dist_inputs":[-0.5553306341,0.5567904115],"value_targets":80.5671386719} +{"eps_id":1065225377,"obs":[-0.0443840511,0.0497389771,0.0549672917,-0.1685356349],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0433892682,0.2440328151,0.0515965782,-0.4433839619],"action_prob":0.3840007782,"action_logp":-0.957110703,"action_dist_inputs":[0.2354056686,-0.2371955663],"value_targets":80.3708496094} +{"eps_id":1065225377,"obs":[-0.0433892682,0.2440328151,0.0515965782,-0.4433839619],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0385086127,0.4383881688,0.0427288972,-0.7193663716],"action_prob":0.1390384883,"action_logp":-1.9730044603,"action_dist_inputs":[0.9093490839,-0.9139499068],"value_targets":80.1725769043} +{"eps_id":1065225377,"obs":[-0.0385086127,0.4383881688,0.0427288972,-0.7193663716],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0297408495,0.2427018583,0.0283415709,-0.4135463536],"action_prob":0.9284231067,"action_logp":-0.074267745,"action_dist_inputs":[1.2779303789,-1.284784317],"value_targets":79.9722976685} +{"eps_id":1065225377,"obs":[-0.0297408495,0.2427018583,0.0283415709,-0.4135463536],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.024886813,0.0471898876,0.0200706441,-0.1120648608],"action_prob":0.8631258607,"action_logp":-0.1471947581,"action_dist_inputs":[0.9185166359,-0.9229818583],"value_targets":79.7699966431} +{"eps_id":1065225377,"obs":[-0.024886813,0.0471898876,0.0200706441,-0.1120648608],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0239430144,-0.1482138187,0.0178293455,0.1868820488],"action_prob":0.6020433903,"action_logp":-0.5074257255,"action_dist_inputs":[0.2062952965,-0.2076912075],"value_targets":79.5656509399} +{"eps_id":1065225377,"obs":[-0.0239430144,-0.1482138187,0.0178293455,0.1868820488],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0269072913,0.0466485582,0.021566987,-0.1001235172],"action_prob":0.7860133052,"action_logp":-0.2407815903,"action_dist_inputs":[-0.6495272517,0.6515325904],"value_targets":79.3592453003} +{"eps_id":1065225377,"obs":[-0.0269072913,0.0466485582,0.021566987,-0.1001235172],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0259743202,0.2414548844,0.0195645168,-0.3859248459],"action_prob":0.4221311212,"action_logp":-0.8624392748,"action_dist_inputs":[0.1563654095,-0.1576656848],"value_targets":79.1507568359} +{"eps_id":1065225377,"obs":[-0.0259743202,0.2414548844,0.0195645168,-0.3859248459],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0211452227,0.0460607335,0.0118460199,-0.0871381164],"action_prob":0.8555521369,"action_logp":-0.1560082734,"action_dist_inputs":[0.8872609138,-0.8915675879],"value_targets":78.9401550293} +{"eps_id":1065225377,"obs":[-0.0211452227,0.0460607335,0.0118460199,-0.0871381164],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0202240087,0.2410109043,0.0101032574,-0.3760601878],"action_prob":0.4214645922,"action_logp":-0.8640195131,"action_dist_inputs":[0.1577777117,-0.1589862406],"value_targets":78.727432251} +{"eps_id":1065225377,"obs":[-0.0202240087,0.2410109043,0.0101032574,-0.3760601878],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0154037904,0.045746915,0.0025820534,-0.0802088603],"action_prob":0.8576236367,"action_logp":-0.1535899341,"action_dist_inputs":[0.8957148194,-0.8999764323],"value_targets":78.5125579834} +{"eps_id":1065225377,"obs":[-0.0154037904,0.045746915,0.0025820534,-0.0802088603],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0144888517,0.2408317626,0.0009778761,-0.3720760345],"action_prob":0.4113606513,"action_logp":-0.8882849216,"action_dist_inputs":[0.178588748,-0.1797544658],"value_targets":78.2955093384} +{"eps_id":1065225377,"obs":[-0.0144888517,0.2408317626,0.0009778761,-0.3720760345],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0096722171,0.4359397888,-0.0064636446,-0.6644504666],"action_prob":0.1376291364,"action_logp":-1.9831926823,"action_dist_inputs":[0.91543293,-0.9196899533],"value_targets":78.0762710571} +{"eps_id":1065225377,"obs":[-0.0096722171,0.4359397888,-0.0064636446,-0.6644504666],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0009534206,0.2409083545,-0.0197526533,-0.3738096952],"action_prob":0.9314122796,"action_logp":-0.0710532889,"action_dist_inputs":[1.3009111881,-1.3076759577],"value_targets":77.8548202515} +{"eps_id":1065225377,"obs":[-0.0009534206,0.2409083545,-0.0197526533,-0.3738096952],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0038647465,0.4363052547,-0.0272288471,-0.672654748],"action_prob":0.1233500168,"action_logp":-2.0927293301,"action_dist_inputs":[0.9783763885,-0.9827053547],"value_targets":77.6311340332} +{"eps_id":1065225377,"obs":[0.0038647465,0.4363052547,-0.0272288471,-0.672654748],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0125908516,0.2415721416,-0.0406819433,-0.3886676729],"action_prob":0.9351310134,"action_logp":-0.0670686066,"action_dist_inputs":[1.3306978941,-1.3376201391],"value_targets":77.4051818848} +{"eps_id":1065225377,"obs":[0.0125908516,0.2415721416,-0.0406819433,-0.3886676729],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0174222942,0.0470505431,-0.0484552979,-0.1090838835],"action_prob":0.892419517,"action_logp":-0.1138189211,"action_dist_inputs":[1.0555890799,-1.0601080656],"value_targets":77.1769561768} +{"eps_id":1065225377,"obs":[0.0174222942,0.0470505431,-0.0484552979,-0.1090838835],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0183633044,0.242832154,-0.0506369732,-0.4166519046],"action_prob":0.2645607293,"action_logp":-1.3296844959,"action_dist_inputs":[0.5104311705,-0.5119659901],"value_targets":76.9464187622} +{"eps_id":1065225377,"obs":[0.0183633044,0.242832154,-0.0506369732,-0.4166519046],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0232199486,0.0484630354,-0.0589700118,-0.1403528303],"action_prob":0.9035072923,"action_logp":-0.1014711037,"action_dist_inputs":[1.1160120964,-1.1208050251],"value_targets":76.7135543823} +{"eps_id":1065225377,"obs":[0.0232199486,0.0484630354,-0.0589700118,-0.1403528303],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0241892096,-0.1457669288,-0.0617770702,0.1331585497],"action_prob":0.7833877206,"action_logp":-0.2441275567,"action_dist_inputs":[0.6418212056,-0.643697381],"value_targets":76.4783401489} +{"eps_id":1065225377,"obs":[0.0241892096,-0.1457669288,-0.0617770702,0.1331585497],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.02127387,-0.3399520814,-0.0591138974,0.4057297111],"action_prob":0.4464584887,"action_logp":-0.8064088821,"action_dist_inputs":[-0.1067796052,0.1082108468],"value_targets":76.2407455444} +{"eps_id":1065225377,"obs":[0.02127387,-0.3399520814,-0.0591138974,0.4057297111],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0144748287,-0.144043833,-0.0509993024,0.0950112864],"action_prob":0.8390828371,"action_logp":-0.1754458696,"action_dist_inputs":[-0.8236143589,0.8278052211],"value_targets":76.0007553101} +{"eps_id":1065225377,"obs":[0.0144748287,-0.144043833,-0.0509993024,0.0950112864],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0115939528,0.0517705716,-0.0490990765,-0.2133157253],"action_prob":0.5122430921,"action_logp":-0.6689559221,"action_dist_inputs":[-0.023947414,0.0250348635],"value_targets":75.7583389282} +{"eps_id":1065225377,"obs":[0.0115939528,0.0517705716,-0.0490990765,-0.2133157253],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0126293637,0.247558862,-0.0533653907,-0.5210735798],"action_prob":0.1710451841,"action_logp":-1.7658275366,"action_dist_inputs":[0.787815094,-0.7904227972],"value_targets":75.5134735107} +{"eps_id":1065225377,"obs":[0.0126293637,0.247558862,-0.0533653907,-0.5210735798],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0175805409,0.0532271191,-0.0637868643,-0.2456729114],"action_prob":0.9212895632,"action_logp":-0.0819808617,"action_dist_inputs":[1.2271419764,-1.2328567505],"value_targets":75.26612854} +{"eps_id":1065225377,"obs":[0.0175805409,0.0532271191,-0.0637868643,-0.2456729114],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0186450835,-0.1409285367,-0.068700321,0.0262276847],"action_prob":0.8571236134,"action_logp":-0.1541731209,"action_dist_inputs":[0.8942903876,-0.8973117471],"value_targets":75.0162963867} +{"eps_id":1065225377,"obs":[0.0186450835,-0.1409285367,-0.068700321,0.0262276847],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0158265121,-0.3350014687,-0.0681757703,0.2964683473],"action_prob":0.6424985528,"action_logp":-0.4423906803,"action_dist_inputs":[0.2932487428,-0.2929765284],"value_targets":74.7639312744} +{"eps_id":1065225377,"obs":[0.0158265121,-0.3350014687,-0.0681757703,0.2964683473],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0091264835,-0.1389771998,-0.0622464009,-0.0169131383],"action_prob":0.7180412412,"action_logp":-0.331228286,"action_dist_inputs":[-0.4656717479,0.4690944552],"value_targets":74.5090255737} +{"eps_id":1065225377,"obs":[0.0091264835,-0.1389771998,-0.0622464009,-0.0169131383],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0063469396,-0.3331537843,-0.0625846609,0.2554990351],"action_prob":0.689683497,"action_logp":-0.3715224862,"action_dist_inputs":[0.3992037773,-0.3994362652],"value_targets":74.2515411377} +{"eps_id":1065225377,"obs":[0.0063469396,-0.3331537843,-0.0625846609,0.2554990351],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0003161363,-0.1371966898,-0.057474684,-0.0562490299],"action_prob":0.6716617942,"action_logp":-0.3980003297,"action_dist_inputs":[-0.3563141227,0.3593967855],"value_targets":73.9914550781} +{"eps_id":1065225377,"obs":[-0.0003161363,-0.1371966898,-0.057474684,-0.0562490299],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0030600701,-0.3314494491,-0.0585996658,0.2177607417],"action_prob":0.7285217047,"action_logp":-0.3167378902,"action_dist_inputs":[0.4932044148,-0.4939305782],"value_targets":73.7287445068} +{"eps_id":1065225377,"obs":[-0.0030600701,-0.3314494491,-0.0585996658,0.2177607417],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0096890591,-0.1355409026,-0.0542444475,-0.0928161293],"action_prob":0.622104466,"action_logp":-0.4746472239,"action_dist_inputs":[-0.2478842884,0.2506060004],"value_targets":73.4633712769} +{"eps_id":1065225377,"obs":[-0.0096890591,-0.1355409026,-0.0542444475,-0.0928161293],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0123998774,0.0603148825,-0.0561007708,-0.4021077454],"action_prob":0.2391903102,"action_logp":-1.430495739,"action_dist_inputs":[0.5779568553,-0.5791670084],"value_targets":73.1953277588} +{"eps_id":1065225377,"obs":[-0.0123998774,0.0603148825,-0.0561007708,-0.4021077454],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.01119358,0.2561857998,-0.0641429275,-0.7119368911],"action_prob":0.0997612178,"action_logp":-2.3049757481,"action_dist_inputs":[1.0975986719,-1.1022820473],"value_targets":72.9245758057} +{"eps_id":1065225377,"obs":[-0.01119358,0.2561857998,-0.0641429275,-0.7119368911],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0060698637,0.062007919,-0.0783816651,-0.4401141107],"action_prob":0.9355285764,"action_logp":-0.0666435659,"action_dist_inputs":[1.333747983,-1.3411422968],"value_targets":72.6510848999} +{"eps_id":1065225377,"obs":[-0.0060698637,0.062007919,-0.0783816651,-0.4401141107],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0048297052,-0.1319223195,-0.0871839449,-0.173132509],"action_prob":0.9106708169,"action_logp":-0.093573764,"action_dist_inputs":[1.1583362818,-1.1635172367],"value_targets":72.3748321533} +{"eps_id":1065225377,"obs":[-0.0048297052,-0.1319223195,-0.0871839449,-0.173132509],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0074681519,0.0643322319,-0.0906465948,-0.4919958711],"action_prob":0.1574285626,"action_logp":-1.848783493,"action_dist_inputs":[0.8375043273,-0.8399822712],"value_targets":72.0957946777} +{"eps_id":1065225377,"obs":[-0.0074681519,0.0643322319,-0.0906465948,-0.4919958711],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0061815069,-0.1294020414,-0.100486517,-0.2292011529],"action_prob":0.9177128077,"action_logp":-0.085870795,"action_dist_inputs":[1.2029664516,-1.2087028027],"value_targets":71.8139266968} +{"eps_id":1065225377,"obs":[-0.0061815069,-0.1294020414,-0.100486517,-0.2292011529],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0087695476,-0.3229551613,-0.1050705388,0.0301706176],"action_prob":0.86886549,"action_logp":-0.1405669302,"action_dist_inputs":[0.9438649416,-0.9471003413],"value_targets":71.5292205811} +{"eps_id":1065225377,"obs":[-0.0087695476,-0.3229551613,-0.1050705388,0.0301706176],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0152286515,-0.5164257288,-0.1044671237,0.2879420519],"action_prob":0.7337822914,"action_logp":-0.3095428944,"action_dist_inputs":[0.50690943,-0.5069887042],"value_targets":71.2416381836} +{"eps_id":1065225377,"obs":[-0.0152286515,-0.5164257288,-0.1044671237,0.2879420519],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.025557166,-0.7099148035,-0.0987082869,0.5459359288],"action_prob":0.45131284,"action_logp":-0.7955945134,"action_dist_inputs":[-0.0959774703,0.0993902087],"value_targets":70.9511489868} +{"eps_id":1065225377,"obs":[-0.025557166,-0.7099148035,-0.0987082869,0.5459359288],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0397554599,-0.5135546327,-0.0877895653,0.2238562256],"action_prob":0.8035671711,"action_logp":-0.2186945081,"action_dist_inputs":[-0.7013770938,0.7073628902],"value_targets":70.6577301025} +{"eps_id":1065225377,"obs":[-0.0397554599,-0.5135546327,-0.0877895653,0.2238562256],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0500265546,-0.3172947466,-0.0833124369,-0.0951768234],"action_prob":0.4879693687,"action_logp":-0.7175026536,"action_dist_inputs":[0.025384441,-0.0227473788],"value_targets":70.3613433838} +{"eps_id":1065225377,"obs":[-0.0500265546,-0.3172947466,-0.0833124369,-0.0951768234],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0563724488,-0.5111299157,-0.0852159783,0.1701019257],"action_prob":0.8024309278,"action_logp":-0.2201095074,"action_dist_inputs":[0.6998800635,-0.70167768],"value_targets":70.061958313} +{"eps_id":1065225377,"obs":[-0.0563724488,-0.5111299157,-0.0852159783,0.1701019257],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0665950477,-0.3148981929,-0.0818139389,-0.1482007802],"action_prob":0.4179447889,"action_logp":-0.8724059463,"action_dist_inputs":[0.1665096283,-0.1647063941],"value_targets":69.7595596313} +{"eps_id":1065225377,"obs":[-0.0665950477,-0.3148981929,-0.0818139389,-0.1482007802],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0728930086,-0.5087590218,-0.0847779512,0.1175913736],"action_prob":0.8273006082,"action_logp":-0.1895871758,"action_dist_inputs":[0.782033205,-0.7845821977],"value_targets":69.4540939331} +{"eps_id":1065225377,"obs":[-0.0728930086,-0.5087590218,-0.0847779512,0.1175913736],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.083068192,-0.7025704384,-0.0824261233,0.3823694885],"action_prob":0.6454193592,"action_logp":-0.4378549755,"action_dist_inputs":[0.2999409437,-0.2990236282],"value_targets":69.1455535889} +{"eps_id":1065225377,"obs":[-0.083068192,-0.7025704384,-0.0824261233,0.3823694885],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0971195996,-0.5063807964,-0.0747787356,0.0648782626],"action_prob":0.6475813985,"action_logp":-0.4345107377,"action_dist_inputs":[-0.3019841313,0.3064408004],"value_targets":68.8338928223} +{"eps_id":1065225377,"obs":[-0.0971195996,-0.5063807964,-0.0747787356,0.0648782626],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1072472185,-0.700355351,-0.0734811723,0.3330625594],"action_prob":0.6856943369,"action_logp":-0.3773233593,"action_dist_inputs":[0.3900819719,-0.3899838924],"value_targets":68.5190811157} +{"eps_id":1065225377,"obs":[-0.1072472185,-0.700355351,-0.0734811723,0.3330625594],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1212543249,-0.5042686462,-0.0668199211,0.0181421805],"action_prob":0.5990938544,"action_logp":-0.512337029,"action_dist_inputs":[-0.1989395171,0.2027514726],"value_targets":68.2010955811} +{"eps_id":1065225377,"obs":[-0.1212543249,-0.5042686462,-0.0668199211,0.0181421805],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.131339699,-0.6983718872,-0.0664570779,0.289016813],"action_prob":0.7176539302,"action_logp":-0.3317678273,"action_dist_inputs":[0.4661026001,-0.4667513669],"value_targets":67.8798904419} +{"eps_id":1065225377,"obs":[-0.131339699,-0.6983718872,-0.0664570779,0.289016813],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1453071386,-0.5023682714,-0.0606767423,-0.0238635615],"action_prob":0.5535157323,"action_logp":-0.5914651155,"action_dist_inputs":[-0.1058674604,0.1090185791],"value_targets":67.5554504395} +{"eps_id":1065225377,"obs":[-0.1453071386,-0.5023682714,-0.0606767423,-0.0238635615],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1553544998,-0.6965699792,-0.0611540116,0.2490748614],"action_prob":0.7436344028,"action_logp":-0.296205759,"action_dist_inputs":[0.5318077803,-0.5331370831],"value_targets":67.227722168} +{"eps_id":1065225377,"obs":[-0.1553544998,-0.6965699792,-0.0611540116,0.2490748614],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1692858934,-0.5006303787,-0.0561725162,-0.0622531362],"action_prob":0.5109468102,"action_logp":-0.6714898348,"action_dist_inputs":[-0.0206541568,0.0231400467],"value_targets":66.8966903687} +{"eps_id":1065225377,"obs":[-0.1692858934,-0.5006303787,-0.0561725162,-0.0622531362],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1792985052,-0.6949038506,-0.057417579,0.2121913731],"action_prob":0.7653566599,"action_logp":-0.267413348,"action_dist_inputs":[0.5901610255,-0.5921142697],"value_targets":66.5623168945} +{"eps_id":1065225377,"obs":[-0.1792985052,-0.6949038506,-0.057417579,0.2121913731],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1931965798,-0.8891598582,-0.0531737506,0.4862237275],"action_prob":0.5289264917,"action_logp":-0.636905849,"action_dist_inputs":[0.0588285439,-0.0570068285],"value_targets":66.2245635986} +{"eps_id":1065225377,"obs":[-0.1931965798,-0.8891598582,-0.0531737506,0.4862237275],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2109797895,-0.6933295131,-0.0434492752,0.1772676408],"action_prob":0.7442882657,"action_logp":-0.2953268886,"action_dist_inputs":[-0.5315690637,0.5368087292],"value_targets":65.883392334} +{"eps_id":1065225377,"obs":[-0.2109797895,-0.6933295131,-0.0434492752,0.1772676408],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.224846378,-0.8878036141,-0.0399039239,0.455933392],"action_prob":0.5453340411,"action_logp":-0.6063567996,"action_dist_inputs":[0.0915557891,-0.0902796686],"value_targets":65.5387802124} +{"eps_id":1065225377,"obs":[-0.224846378,-0.8878036141,-0.0399039239,0.455933392],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2426024377,-0.6921408772,-0.0307852551,0.1509442776],"action_prob":0.7350293994,"action_logp":-0.3078448176,"action_dist_inputs":[-0.5077181458,0.5125731826],"value_targets":65.1906890869} +{"eps_id":1065225377,"obs":[-0.2426024377,-0.6921408772,-0.0307852551,0.1509442776],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2564452589,-0.4965919256,-0.0277663693,-0.1512895823],"action_prob":0.4475516081,"action_logp":-0.8039634228,"action_dist_inputs":[0.1057035178,-0.1048647016],"value_targets":64.8390808105} +{"eps_id":1065225377,"obs":[-0.2564452589,-0.4965919256,-0.0277663693,-0.1512895823],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2663770914,-0.6913055182,-0.03079216,0.1325059533],"action_prob":0.7928292155,"action_logp":-0.2321474403,"action_dist_inputs":[0.6693810225,-0.672683239],"value_targets":64.4839172363} +{"eps_id":1065225377,"obs":[-0.2663770914,-0.6913055182,-0.03079216,0.1325059533],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2802032232,-0.8859731555,-0.0281420425,0.4153175354],"action_prob":0.5724811554,"action_logp":-0.5577754378,"action_dist_inputs":[0.1461863369,-0.1457951069],"value_targets":64.1251678467} +{"eps_id":1065225377,"obs":[-0.2802032232,-0.8859731555,-0.0281420425,0.4153175354],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2979226708,-0.6904639006,-0.01983569,0.1138970852],"action_prob":0.7143386602,"action_logp":-0.3363981247,"action_dist_inputs":[-0.4562017024,0.4603484869],"value_targets":63.7627983093} +{"eps_id":1065225377,"obs":[-0.2979226708,-0.6904639006,-0.01983569,0.1138970852],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.3117319643,-0.4950633943,-0.0175577495,-0.1849772781],"action_prob":0.4278811812,"action_logp":-0.8489097357,"action_dist_inputs":[0.1452772021,-0.1452238858],"value_targets":63.3967666626} +{"eps_id":1065225377,"obs":[-0.3117319643,-0.4950633943,-0.0175577495,-0.1849772781],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.3216332197,-0.2996946871,-0.0212572943,-0.4831468761],"action_prob":0.1982352138,"action_logp":-1.618301034,"action_dist_inputs":[0.6967259645,-0.7006351352],"value_targets":63.0270347595} +{"eps_id":1065225377,"obs":[-0.3216332197,-0.2996946871,-0.0212572943,-0.4831468761],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.3276271224,-0.494510293,-0.0309202317,-0.1972387135],"action_prob":0.8955895901,"action_logp":-0.110273011,"action_dist_inputs":[1.071269393,-1.0778838396],"value_targets":62.6535720825} +{"eps_id":1065225377,"obs":[-0.3276271224,-0.494510293,-0.0309202317,-0.1972387135],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.3375173211,-0.2989600301,-0.0348650068,-0.4995130002],"action_prob":0.183359623,"action_logp":-1.696305871,"action_dist_inputs":[0.7447494864,-0.7489999533],"value_targets":62.2763366699} +{"eps_id":1065225377,"obs":[-0.3375173211,-0.2989600301,-0.0348650068,-0.4995130002],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.3434965312,-0.4935735464,-0.0448552668,-0.218018353],"action_prob":0.9010737538,"action_logp":-0.104168199,"action_dist_inputs":[1.1011668444,-1.108045578],"value_targets":61.8952865601} +{"eps_id":1065225377,"obs":[-0.3434965312,-0.4935735464,-0.0448552668,-0.218018353],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.3533679843,-0.6880265474,-0.0492156334,0.0601847395],"action_prob":0.833216846,"action_logp":-0.1824613661,"action_dist_inputs":[0.801972568,-0.8066266179],"value_targets":61.5103912354} +{"eps_id":1065225377,"obs":[-0.3533679843,-0.6880265474,-0.0492156334,0.0601847395],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.3671285212,-0.8824095726,-0.04801194,0.3369426429],"action_prob":0.6693319678,"action_logp":-0.4014751017,"action_dist_inputs":[0.351844281,-0.3533209264],"value_targets":61.1216087341} +{"eps_id":1065225377,"obs":[-0.3671285212,-0.8824095726,-0.04801194,0.3369426429],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.3847767115,-0.6866384149,-0.0412730835,0.0295142476],"action_prob":0.5985507369,"action_logp":-0.5132440329,"action_dist_inputs":[-0.1985545754,0.2008753717],"value_targets":60.7288970947} +{"eps_id":1065225377,"obs":[-0.3847767115,-0.6866384149,-0.0412730835,0.0295142476],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.3985094726,-0.8811449409,-0.0406828001,0.3088946939],"action_prob":0.6834726334,"action_logp":-0.3805686831,"action_dist_inputs":[0.3839035332,-0.3858733773],"value_targets":60.3322181702} +{"eps_id":1065225377,"obs":[-0.3985094726,-0.8811449409,-0.0406828001,0.3088946939],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.4161323905,-0.6854676604,-0.0345049053,0.0036641946],"action_prob":0.5816651583,"action_logp":-0.5418602824,"action_dist_inputs":[-0.1638883203,0.1657247394],"value_targets":59.9315338135} +{"eps_id":1065225377,"obs":[-0.4161323905,-0.6854676604,-0.0345049053,0.0036641946],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.429841727,-0.4898682535,-0.0344316214,-0.2997026443],"action_prob":0.3060956299,"action_logp":-1.1838576794,"action_dist_inputs":[0.4080199301,-0.4104167223],"value_targets":59.526802063} +{"eps_id":1065225377,"obs":[-0.429841727,-0.4898682535,-0.0344316214,-0.2997026443],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.4396390915,-0.6844829321,-0.0404256769,-0.0180745553],"action_prob":0.8539119959,"action_logp":-0.1579271108,"action_dist_inputs":[0.8799673915,-0.8856519461],"value_targets":59.117980957} +{"eps_id":1065225377,"obs":[-0.4396390915,-0.6844829321,-0.0404256769,-0.0180745553],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.4533287585,-0.8790025115,-0.0407871678,0.2615845203],"action_prob":0.7162230015,"action_logp":-0.3337636888,"action_dist_inputs":[0.4614727795,-0.4643300176],"value_targets":58.7050323486} +{"eps_id":1065225377,"obs":[-0.4533287585,-0.8790025115,-0.0407871678,0.2615845203],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.4709087908,-0.6833227873,-0.0355554745,-0.0436790474],"action_prob":0.5325335264,"action_logp":-0.6301094294,"action_dist_inputs":[-0.0647390857,0.0655790716],"value_targets":58.2879104614} +{"eps_id":1065225377,"obs":[-0.4709087908,-0.6833227873,-0.0355554745,-0.0436790474],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.4845752716,-0.8779173493,-0.0364290588,0.2375771701],"action_prob":0.7268002629,"action_logp":-0.3191035688,"action_dist_inputs":[0.4875924587,-0.4908560216],"value_targets":57.8665771484} +{"eps_id":1065225377,"obs":[-0.4845752716,-0.8779173493,-0.0364290588,0.2375771701],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.5021336079,-1.0725004673,-0.0316775143,0.518550396],"action_prob":0.4831631482,"action_logp":-0.7274008989,"action_dist_inputs":[-0.0334961191,0.0338767245],"value_targets":57.4409866333} +{"eps_id":1065225377,"obs":[-0.5021336079,-1.0725004673,-0.0316775143,0.518550396],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.523583591,-0.8769471645,-0.0213065054,0.2160559744],"action_prob":0.7524833679,"action_logp":-0.284376353,"action_dist_inputs":[-0.5540471077,0.5578539968],"value_targets":57.0110969543} +{"eps_id":1065225377,"obs":[-0.523583591,-0.8769471645,-0.0213065054,0.2160559744],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.5411225557,-1.0717581511,-0.0169853866,0.5019424558],"action_prob":0.4753054976,"action_logp":-0.7437975407,"action_dist_inputs":[-0.0494014472,0.0494570471],"value_targets":56.5768661499} +{"eps_id":1065225377,"obs":[-0.5411225557,-1.0717581511,-0.0169853866,0.5019424558],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.5625576973,-0.876400888,-0.0069465372,0.2039554864],"action_prob":0.7589225173,"action_logp":-0.2758556008,"action_dist_inputs":[-0.5716307163,0.5751504898],"value_targets":56.1382484436} +{"eps_id":1065225377,"obs":[-0.5625576973,-0.876400888,-0.0069465372,0.2039554864],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.5800857544,-0.6811802983,-0.0028674277,-0.090910621],"action_prob":0.5424383879,"action_logp":-0.6116807461,"action_dist_inputs":[-0.0851593465,0.0850036442],"value_targets":55.6952018738} +{"eps_id":1065225377,"obs":[-0.5800857544,-0.6811802983,-0.0028674277,-0.090910621],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.5937093496,-0.4860173762,-0.0046856399,-0.3844968379],"action_prob":0.2892431319,"action_logp":-1.2404876947,"action_dist_inputs":[0.4475517571,-0.4515111446],"value_targets":55.2476768494} +{"eps_id":1065225377,"obs":[-0.5937093496,-0.4860173762,-0.0046856399,-0.3844968379],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.6034296751,-0.6810724735,-0.0123755774,-0.0932949558],"action_prob":0.8610266447,"action_logp":-0.1496298313,"action_dist_inputs":[0.9085677862,-0.9152754545],"value_targets":54.7956352234} +{"eps_id":1065225377,"obs":[-0.6034296751,-0.6810724735,-0.0123755774,-0.0932949558],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.6170511246,-0.8760148883,-0.0142414756,0.1954579353],"action_prob":0.7200222015,"action_logp":-0.3284732103,"action_dist_inputs":[0.4702067077,-0.4743652046],"value_targets":54.3390235901} +{"eps_id":1065225377,"obs":[-0.6170511246,-0.8760148883,-0.0142414756,0.1954579353],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.6345714331,-0.6806921363,-0.0103323171,-0.1016833112],"action_prob":0.5304985046,"action_logp":-0.6339381337,"action_dist_inputs":[-0.0614032857,0.0607423969],"value_targets":53.8778038025} +{"eps_id":1065225377,"obs":[-0.6345714331,-0.6806921363,-0.0103323171,-0.1016833112],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.6481853127,-0.4854236841,-0.0123659838,-0.3976080716],"action_prob":0.2810074389,"action_logp":-1.2693741322,"action_dist_inputs":[0.4675626755,-0.4719070792],"value_targets":53.4119224548} +{"eps_id":1065225377,"obs":[-0.6481853127,-0.4854236841,-0.0123659838,-0.3976080716],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.6578937769,-0.6803680062,-0.0203181449,-0.1088493988],"action_prob":0.8658384085,"action_logp":-0.1440569758,"action_dist_inputs":[0.9288418889,-0.9358112216],"value_targets":52.9413375854} +{"eps_id":1065225377,"obs":[-0.6578937769,-0.6803680062,-0.0203181449,-0.1088493988],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.6715011001,-0.8751929998,-0.0224951319,0.1773546338],"action_prob":0.7317948937,"action_logp":-0.3122549951,"action_dist_inputs":[0.4995869994,-0.5041611791],"value_targets":52.4659957886} +{"eps_id":1065225377,"obs":[-0.6715011001,-0.8751929998,-0.0224951319,0.1773546338],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.6890049577,-1.0699859858,-0.018948039,0.4628570974],"action_prob":0.4929800332,"action_logp":-0.7072865963,"action_dist_inputs":[-0.0146740992,0.0134075256],"value_targets":51.9858551025} +{"eps_id":1065225377,"obs":[-0.6890049577,-1.0699859858,-0.018948039,0.4628570974],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.7104046941,-0.8746014237,-0.0096908975,0.1642625034],"action_prob":0.7334682941,"action_logp":-0.3099708855,"action_dist_inputs":[-0.505079031,0.5072122216],"value_targets":51.5008621216} +{"eps_id":1065225377,"obs":[-0.7104046941,-0.8746014237,-0.0096908975,0.1642625034],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.72789675,-1.0695832968,-0.0064056478,0.4538725317],"action_prob":0.4790353477,"action_logp":-0.7359808683,"action_dist_inputs":[-0.0426999182,0.0412079319],"value_targets":51.0109710693} +{"eps_id":1065225377,"obs":[-0.72789675,-1.0695832968,-0.0064056478,0.4538725317],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.7492883801,-0.8743713498,0.0026718029,0.1591773778],"action_prob":0.7424292564,"action_logp":-0.2978276908,"action_dist_inputs":[-0.5283673406,0.5302659869],"value_targets":50.5161323547} +{"eps_id":1065225377,"obs":[-0.7492883801,-0.8743713498,0.0026718029,0.1591773778],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.7667758465,-0.6792877316,0.0058553503,-0.1326614618],"action_prob":0.5422065854,"action_logp":-0.6121082306,"action_dist_inputs":[-0.0854300708,0.0837988406],"value_targets":50.0162963867} +{"eps_id":1065225377,"obs":[-0.7667758465,-0.6792877316,0.0058553503,-0.1326614618],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.7803615928,-0.8744930625,0.0032021212,0.1618629843],"action_prob":0.6994484067,"action_logp":-0.3574632406,"action_dist_inputs":[0.4198473096,-0.4248253405],"value_targets":49.5114097595} +{"eps_id":1065225377,"obs":[-0.7803615928,-0.8744930625,0.0032021212,0.1618629843],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.7978514433,-1.0696607828,0.0064393808,0.4555543661],"action_prob":0.4490056932,"action_logp":-0.800719738,"action_dist_inputs":[-0.1032222062,0.1014668569],"value_targets":49.0014266968} +{"eps_id":1065225377,"obs":[-0.7978514433,-1.0696607828,0.0064393808,0.4555543661],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.8192446232,-1.264873147,0.0155504681,0.7502600551],"action_prob":0.2421866655,"action_logp":-1.4180464745,"action_dist_inputs":[-0.5695880055,0.5711403489],"value_targets":48.486289978} +{"eps_id":1065225377,"obs":[-0.8192446232,-1.264873147,0.0155504681,0.7502600551],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.8445420861,-1.069969058,0.0305556692,0.4625109732],"action_prob":0.8630450368,"action_logp":-0.147288397,"action_dist_inputs":[-0.9183778167,0.9224373698],"value_targets":47.9659461975} +{"eps_id":1065225377,"obs":[-0.8445420861,-1.069969058,0.0305556692,0.4625109732],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.8659415245,-0.8752920032,0.0398058891,0.17961362],"action_prob":0.7826163769,"action_logp":-0.2451126724,"action_dist_inputs":[-0.6397602558,0.6412185431],"value_targets":47.4403495789} +{"eps_id":1065225377,"obs":[-0.8659415245,-0.8752920032,0.0398058891,0.17961362],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.8834473491,-1.0709602833,0.0433981605,0.4845834076],"action_prob":0.36951828,"action_logp":-0.9955550432,"action_dist_inputs":[-0.2679991722,0.2662847042],"value_targets":46.9094467163} +{"eps_id":1065225377,"obs":[-0.8834473491,-1.0709602833,0.0433981605,0.4845834076],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.9048665166,-0.8764767647,0.053089831,0.2058878988],"action_prob":0.8014094234,"action_logp":-0.2213833332,"action_dist_inputs":[-0.6968447566,0.6982819438],"value_targets":46.3731765747} +{"eps_id":1065225377,"obs":[-0.9048665166,-0.8764767647,0.053089831,0.2058878988],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.9223960638,-0.6821526289,0.0572075881,-0.0695865676],"action_prob":0.6704080701,"action_logp":-0.3998687267,"action_dist_inputs":[-0.3558039069,0.3542273343],"value_targets":45.8314933777} +{"eps_id":1065225377,"obs":[-0.9223960638,-0.6821526289,0.0572075881,-0.0695865676],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.9360391498,-0.8780460954,0.0558158569,0.2405828089],"action_prob":0.5378964543,"action_logp":-0.6200892329,"action_dist_inputs":[0.0735991597,-0.078277953],"value_targets":45.2843360901} +{"eps_id":1065225377,"obs":[-0.9360391498,-0.8780460954,0.0558158569,0.2405828089],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.953600049,-0.6837641001,0.0606275126,-0.0339850746],"action_prob":0.698700428,"action_logp":-0.3585331738,"action_dist_inputs":[-0.4212692976,0.419847697],"value_targets":44.7316513062} +{"eps_id":1065225377,"obs":[-0.953600049,-0.6837641001,0.0606275126,-0.0339850746],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.9672753215,-0.4895615876,0.0599478111,-0.3069400489],"action_prob":0.5053752661,"action_logp":-0.68245399,"action_dist_inputs":[-0.0129893459,0.0085125417],"value_targets":44.1733856201} +{"eps_id":1065225377,"obs":[-0.9672753215,-0.4895615876,0.0599478111,-0.3069400489],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.9770665765,-0.6854842305,0.0538090095,0.0040301546],"action_prob":0.7178990841,"action_logp":-0.3314262331,"action_dist_inputs":[0.4635890722,-0.4704752266],"value_targets":43.6094818115} +{"eps_id":1065225377,"obs":[-0.9770665765,-0.6854842305,0.0538090095,0.0040301546],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.9907762408,-0.491173625,0.0538896136,-0.2712017],"action_prob":0.5334545374,"action_logp":-0.6283814311,"action_dist_inputs":[-0.069153145,0.0648652986],"value_targets":43.0398788452} +{"eps_id":1065225377,"obs":[-0.9907762408,-0.491173625,0.0538896136,-0.2712017],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-1.0005997419,-0.6870214939,0.0484655797,0.0379795209],"action_prob":0.6923539042,"action_logp":-0.3676579893,"action_dist_inputs":[0.4021897912,-0.408957541],"value_targets":42.4645233154} +{"eps_id":1065225377,"obs":[-1.0005997419,-0.6870214939,0.0484655797,0.0379795209],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-1.0143401623,-0.8828037381,0.0492251702,0.3455513418],"action_prob":0.441986233,"action_logp":-0.8164765239,"action_dist_inputs":[-0.1186180413,0.1144868881],"value_targets":41.8833580017} +{"eps_id":1065225377,"obs":[-1.0143401623,-0.8828037381,0.0492251702,0.3455513418],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-1.0319962502,-0.6884152889,0.0561361983,0.0687882602],"action_prob":0.7503965497,"action_logp":-0.2871534824,"action_dist_inputs":[-0.5509141684,0.5498141646],"value_targets":41.2963218689} +{"eps_id":1065225377,"obs":[-1.0319962502,-0.6884152889,0.0561361983,0.0687882602],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-1.0457645655,-0.4941412508,0.057511963,-0.2056684345],"action_prob":0.5941513777,"action_logp":-0.5206211805,"action_dist_inputs":[-0.1925651282,0.188588798],"value_targets":40.7033538818} +{"eps_id":1065225377,"obs":[-1.0457645655,-0.4941412508,0.057511963,-0.2056684345],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-1.0556473732,-0.6900364757,0.0533985943,0.1045877412],"action_prob":0.6234371066,"action_logp":-0.4725074172,"action_dist_inputs":[0.2488117814,-0.2553511262],"value_targets":40.1044006348} +{"eps_id":1065225377,"obs":[-1.0556473732,-0.6900364757,0.0533985943,0.1045877412],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-1.0694481134,-0.4957187772,0.0554903485,-0.1707815528],"action_prob":0.6183200479,"action_logp":-0.4807490408,"action_dist_inputs":[-0.2431202084,0.2393036634],"value_targets":39.4993934631} +{"eps_id":1065225377,"obs":[-1.0694481134,-0.4957187772,0.0554903485,-0.1707815528],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-1.0793625116,-0.3014331758,0.0520747192,-0.4454556406],"action_prob":0.4073789716,"action_logp":-0.8980113864,"action_dist_inputs":[0.1842034757,-0.1906077564],"value_targets":38.8882751465} +{"eps_id":1065225377,"obs":[-1.0793625116,-0.3014331758,0.0520747192,-0.4454556406],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-1.0853911638,-0.4972516894,0.0431656055,-0.1368229538],"action_prob":0.7912600636,"action_logp":-0.2341285497,"action_dist_inputs":[0.6621649861,-0.6703727245],"value_targets":38.2709846497} +{"eps_id":1065225377,"obs":[-1.0853911638,-0.4972516894,0.0431656055,-0.1368229538],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-1.0953361988,-0.6929644942,0.0404291451,0.1691595167],"action_prob":0.5771898031,"action_logp":-0.5495840907,"action_dist_inputs":[0.1524831057,-0.15876472],"value_targets":37.6474609375} +{"eps_id":1065225377,"obs":[-1.0953361988,-0.6929644942,0.0404291451,0.1691595167],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-1.1091954708,-0.4984438419,0.0438123345,-0.1105000973],"action_prob":0.6480635405,"action_logp":-0.4337665737,"action_dist_inputs":[-0.3070629537,0.3034749627],"value_targets":37.0176353455} +{"eps_id":1065225377,"obs":[-1.1091954708,-0.4984438419,0.0438123345,-0.1105000973],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-1.1191643476,-0.303976208,0.0416023359,-0.3890449405],"action_prob":0.4482657313,"action_logp":-0.8023690581,"action_dist_inputs":[0.1007470191,-0.1069332063],"value_targets":36.3814506531} +{"eps_id":1065225377,"obs":[-1.1191643476,-0.303976208,0.0416023359,-0.3890449405],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-1.1252439022,-0.4996631742,0.0338214338,-0.0835408717],"action_prob":0.7597839236,"action_logp":-0.2747211754,"action_dist_inputs":[0.5716980696,-0.5797972083],"value_targets":35.7388381958} +{"eps_id":1065225377,"obs":[-1.1252439022,-0.4996631742,0.0338214338,-0.0835408717],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-1.1352370977,-0.6952531934,0.0321506187,0.2196180075],"action_prob":0.5403810143,"action_logp":-0.6154807806,"action_dist_inputs":[0.0778896213,-0.0839870125],"value_targets":35.0897369385} +{"eps_id":1065225377,"obs":[-1.1352370977,-0.6952531934,0.0321506187,0.2196180075],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-1.1491421461,-0.8908196688,0.0365429781,0.522266686],"action_prob":0.331036061,"action_logp":-1.105527997,"action_dist_inputs":[-0.3534920514,0.3500109613],"value_targets":34.4340782166} +{"eps_id":1065225377,"obs":[-1.1491421461,-0.8908196688,0.0365429781,0.522266686],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-1.1669585705,-0.6962305903,0.0469883122,0.2413192242],"action_prob":0.7975793481,"action_logp":-0.2261739224,"action_dist_inputs":[-0.6860719919,0.6851615906],"value_targets":33.7717971802} +{"eps_id":1065225377,"obs":[-1.1669585705,-0.6962305903,0.0469883122,0.2413192242],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-1.1808831692,-0.5018102527,0.0518146977,-0.03617993],"action_prob":0.691398859,"action_logp":-0.3690384328,"action_dist_inputs":[-0.4050703943,0.4015968442],"value_targets":33.1028251648} +{"eps_id":1065225377,"obs":[-1.1808831692,-0.5018102527,0.0518146977,-0.03617993],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-1.1909193993,-0.3074681461,0.0510910973,-0.3120751381],"action_prob":0.5206705332,"action_logp":-0.6526378393,"action_dist_inputs":[-0.044324588,0.0384045169],"value_targets":32.4270935059} +{"eps_id":1065225377,"obs":[-1.1909193993,-0.3074681461,0.0510910973,-0.3120751381],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-1.1970686913,-0.5032793283,0.0448495932,-0.0037273914],"action_prob":0.6863503456,"action_logp":-0.3763670921,"action_dist_inputs":[0.3875912726,-0.3955201209],"value_targets":31.7445411682} +{"eps_id":1065225377,"obs":[-1.1970686913,-0.5032793283,0.0448495932,-0.0037273914],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-1.207134366,-0.3088283241,0.0447750464,-0.2819292247],"action_prob":0.5369395018,"action_logp":-0.6218698621,"action_dist_inputs":[-0.0769125521,0.071115151],"value_targets":31.0550918579} +{"eps_id":1065225377,"obs":[-1.207134366,-0.3088283241,0.0447750464,-0.2819292247],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-1.213310957,-0.5045593977,0.0391364619,0.0245325807],"action_prob":0.6690307856,"action_logp":-0.4019251764,"action_dist_inputs":[0.3479851484,-0.355819732],"value_targets":30.3586788177} +{"eps_id":1065225377,"obs":[-1.213310957,-0.5045593977,0.0391364619,0.0245325807],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-1.2234021425,-0.3100199103,0.0396271124,-0.2555499971],"action_prob":0.5507227182,"action_logp":-0.596523881,"action_dist_inputs":[-0.1046468467,0.098944217],"value_targets":29.6552295685} +{"eps_id":1065225377,"obs":[-1.2234021425,-0.3100199103,0.0396271124,-0.2555499971],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-1.2296024561,-0.5056845546,0.0345161147,0.0493639112],"action_prob":0.6532819271,"action_logp":-0.4257464707,"action_dist_inputs":[0.312872529,-0.3206242919],"value_targets":28.9446773529} +{"eps_id":1065225377,"obs":[-1.2296024561,-0.5056845546,0.0345161147,0.0493639112],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-1.2397161722,-0.3110741079,0.0355033912,-0.2322321832],"action_prob":0.5625737906,"action_logp":-0.575232923,"action_dist_inputs":[-0.1286226511,0.1229917556],"value_targets":28.2269458771} +{"eps_id":1065225377,"obs":[-1.2397161722,-0.3110741079,0.0355033912,-0.2322321832],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-1.245937705,-0.5066848993,0.0308587477,0.0714349076],"action_prob":0.6388413906,"action_logp":-0.4480991066,"action_dist_inputs":[0.281329751,-0.289008975],"value_targets":27.5019664764} +{"eps_id":1065225377,"obs":[-1.245937705,-0.5066848993,0.0308587477,0.0714349076],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-1.2560713291,-0.3120186329,0.0322874449,-0.2113544345],"action_prob":0.5729564428,"action_logp":-0.556945622,"action_dist_inputs":[-0.1497506797,0.1441729367],"value_targets":26.7696628571} +{"eps_id":1065225377,"obs":[-1.2560713291,-0.3120186329,0.0322874449,-0.2113544345],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-1.262311697,-0.1173728555,0.028060358,-0.4936800897],"action_prob":0.3745687902,"action_logp":-0.981979847,"action_dist_inputs":[0.2525253594,-0.2601406574],"value_targets":26.0299625397} +{"eps_id":1065225377,"obs":[-1.262311697,-0.1173728555,0.028060358,-0.4936800897],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-1.2646591663,-0.3128790855,0.0181867555,-0.1922875196],"action_prob":0.803458333,"action_logp":-0.2188299447,"action_dist_inputs":[0.6994137764,-0.7086372972],"value_targets":25.2827911377} +{"eps_id":1065225377,"obs":[-1.2646591663,-0.3128790855,0.0181867555,-0.1922875196],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-1.2709168196,-0.118021965,0.0143410051,-0.4791782498],"action_prob":0.3741386533,"action_logp":-0.9831287861,"action_dist_inputs":[0.2534686327,-0.2610336244],"value_targets":24.5280704498} +{"eps_id":1065225377,"obs":[-1.2709168196,-0.118021965,0.0143410051,-0.4791782498],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-1.2732771635,-0.3133434057,0.0047574402,-0.1820101142],"action_prob":0.8050133586,"action_logp":-0.2168963999,"action_dist_inputs":[0.7043632865,-0.7135647535],"value_targets":23.7657279968} +{"eps_id":1065225377,"obs":[-1.2732771635,-0.3133434057,0.0047574402,-0.1820101142],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-1.2795441151,-0.1182898507,0.0011172377,-0.4731884599],"action_prob":0.3681125641,"action_logp":-0.9993665218,"action_dist_inputs":[0.2663823068,-0.2739401162],"value_targets":22.9956855774} +{"eps_id":1065225377,"obs":[-1.2795441151,-0.1182898507,0.0011172377,-0.4731884599],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-1.2819098234,0.0768162981,-0.0083465315,-0.7655190229],"action_prob":0.1896635592,"action_logp":-1.6625034809,"action_dist_inputs":[0.7214967608,-0.7307010293],"value_targets":22.2178649902} +{"eps_id":1065225377,"obs":[-1.2819098234,0.0768162981,-0.0083465315,-0.7655190229],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-1.2803735733,-0.1181897298,-0.0236569121,-0.4754740298],"action_prob":0.9087480903,"action_logp":-0.0956873745,"action_dist_inputs":[1.1441924572,-1.1542513371],"value_targets":21.4321861267} +{"eps_id":1065225377,"obs":[-1.2803735733,-0.1181897298,-0.0236569121,-0.4754740298],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-1.2827373743,0.0772581473,-0.0331663936,-0.7755182981],"action_prob":0.1718453616,"action_logp":-1.7611602545,"action_dist_inputs":[0.7816985846,-0.7909061909],"value_targets":20.6385707855} +{"eps_id":1065225377,"obs":[-1.2827373743,0.0772581473,-0.0331663936,-0.7755182981],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-1.2811921835,-0.1173922941,-0.048676759,-0.4934523404],"action_prob":0.918088913,"action_logp":-0.085461013,"action_dist_inputs":[1.2033355236,-1.2133252621],"value_targets":19.8369407654} +{"eps_id":1065225377,"obs":[-1.2811921835,-0.1173922941,-0.048676759,-0.4934523404],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-1.2835400105,-0.3117951453,-0.0585458055,-0.2164987922],"action_prob":0.8508099318,"action_logp":-0.1615665406,"action_dist_inputs":[0.8658695221,-0.8750983477],"value_targets":19.0272140503} +{"eps_id":1065225377,"obs":[-1.2835400105,-0.3117951453,-0.0585458055,-0.2164987922],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-1.2897759676,-0.506033361,-0.0628757775,0.0571567602],"action_prob":0.7179454565,"action_logp":-0.331361711,"action_dist_inputs":[0.4632183611,-0.4710747004],"value_targets":18.2093067169} +{"eps_id":1065225377,"obs":[-1.2897759676,-0.506033361,-0.0628757775,0.0571567602],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-1.2998965979,-0.7002000809,-0.0617326461,0.3293579817],"action_prob":0.5343328714,"action_logp":-0.6267362833,"action_dist_inputs":[0.0657773316,-0.0717706382],"value_targets":17.3831367493} +{"eps_id":1065225377,"obs":[-1.2998965979,-0.7002000809,-0.0617326461,0.3293579817],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-1.3139005899,-0.5042561293,-0.0551454872,0.0178640746],"action_prob":0.6366994381,"action_logp":-0.4514575899,"action_dist_inputs":[-0.2824545801,0.2786126435],"value_targets":16.5486240387} +{"eps_id":1065225377,"obs":[-1.3139005899,-0.5042561293,-0.0551454872,0.0178640746],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-1.3239856958,-0.3083884716,-0.0547882058,-0.2916953564],"action_prob":0.4474514127,"action_logp":-0.8041872978,"action_dist_inputs":[0.1023477912,-0.1086255908],"value_targets":15.7056808472} +{"eps_id":1065225377,"obs":[-1.3239856958,-0.3083884716,-0.0547882058,-0.2916953564],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-1.3301534653,-0.1125298813,-0.060622111,-0.6011421084],"action_prob":0.2454157472,"action_logp":-1.4048016071,"action_dist_inputs":[0.5574728251,-0.5657404065],"value_targets":14.8542232513} +{"eps_id":1065225377,"obs":[-1.3301534653,-0.1125298813,-0.060622111,-0.6011421084],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-1.3324041367,-0.3067537546,-0.0726449564,-0.3281534612],"action_prob":0.8857992291,"action_logp":-0.1212649792,"action_dist_inputs":[1.0195220709,-1.0290099382],"value_targets":13.9941644669} +{"eps_id":1065225377,"obs":[-1.3324041367,-0.3067537546,-0.0726449564,-0.3281534612],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-1.3385391235,-0.50077039,-0.0792080238,-0.059234947],"action_prob":0.7876526713,"action_logp":-0.2386980653,"action_dist_inputs":[0.6512103677,-0.6596237421],"value_targets":13.125418663} +{"eps_id":1065225377,"obs":[-1.3385391235,-0.50077039,-0.0792080238,-0.059234947],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-1.3485546112,-0.6946725249,-0.0803927183,0.2074435949],"action_prob":0.6295959353,"action_logp":-0.462677002,"action_dist_inputs":[0.2618158162,-0.2686679363],"value_targets":12.2478981018} +{"eps_id":1065225377,"obs":[-1.3485546112,-0.6946725249,-0.0803927183,0.2074435949],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-1.3624479771,-0.8885583878,-0.0762438476,0.4737230539],"action_prob":0.4537871182,"action_logp":-0.7901270986,"action_dist_inputs":[-0.0950677097,0.0903129131],"value_targets":11.3615131378} +{"eps_id":1065225377,"obs":[-1.3624479771,-0.8885583878,-0.0762438476,0.4737230539],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-1.3802192211,-0.6924471259,-0.0667693913,0.1580168009],"action_prob":0.6893606782,"action_logp":-0.3719906807,"action_dist_inputs":[-0.3998662233,0.3972659111],"value_targets":10.4661741257} +{"eps_id":1065225377,"obs":[-1.3802192211,-0.6924471259,-0.0667693913,0.1580168009],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-1.3940681219,-0.4964359403,-0.0636090562,-0.1549604833],"action_prob":0.5288499594,"action_logp":-0.6370505095,"action_dist_inputs":[-0.0603324957,0.0551956668],"value_targets":9.5617923737} +{"eps_id":1065225377,"obs":[-1.3940681219,-0.4964359403,-0.0636090562,-0.1549604833],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-1.4039968252,-0.3004636467,-0.0667082593,-0.467012912],"action_prob":0.3291364908,"action_logp":-1.1112827063,"action_dist_inputs":[0.3523114026,-0.3597816825],"value_targets":8.6482753754} +{"eps_id":1065225377,"obs":[-1.4039968252,-0.3004636467,-0.0667082593,-0.467012912],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-1.4100061655,-0.1044657379,-0.0760485232,-0.7799535394],"action_prob":0.1642264277,"action_logp":-1.8065091372,"action_dist_inputs":[0.8090758324,-0.8180357218],"value_targets":7.7255306244} +{"eps_id":1065225377,"obs":[-1.4100061655,-0.1044657379,-0.0760485232,-0.7799535394],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-1.4120954275,-0.2984643877,-0.0916475952,-0.5121333599],"action_prob":0.9202561975,"action_logp":-0.0831032023,"action_dist_inputs":[1.2180546522,-1.2277786732],"value_targets":6.7934651375} +{"eps_id":1065225377,"obs":[-1.4120954275,-0.2984643877,-0.0916475952,-0.5121333599],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-1.4180647135,-0.4921840131,-0.1018902585,-0.2496800721],"action_prob":0.8638669848,"action_logp":-0.146336481,"action_dist_inputs":[0.9193941355,-0.9283921719],"value_targets":5.8519849777} +{"eps_id":1065225377,"obs":[-1.4180647135,-0.4921840131,-0.1018902585,-0.2496800721],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-1.4279084206,-0.6857144833,-0.1068838611,0.0092061423],"action_prob":0.7575938702,"action_logp":-0.2776078582,"action_dist_inputs":[0.5658095479,-0.5737234354],"value_targets":4.9009947777} +{"eps_id":1065225377,"obs":[-1.4279084206,-0.6857144833,-0.1068838611,0.0092061423],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-1.4416227341,-0.8791541457,-0.1066997349,0.2663457394],"action_prob":0.6058934927,"action_logp":-0.5010510683,"action_dist_inputs":[0.2118669599,-0.2182160318],"value_targets":3.9403989315} +{"eps_id":1065225377,"obs":[-1.4416227341,-0.8791541457,-0.1066997349,0.2663457394],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-1.4592057467,-0.6826839447,-0.1013728231,-0.0579937771],"action_prob":0.5522790551,"action_logp":-0.5937017798,"action_dist_inputs":[-0.107052967,0.1028304473],"value_targets":2.970099926} +{"eps_id":1065225377,"obs":[-1.4592057467,-0.6826839447,-0.1013728231,-0.0579937771],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-1.4728595018,-0.8762173057,-0.1025326997,0.20106332],"action_prob":0.6397118568,"action_logp":-0.4467374384,"action_dist_inputs":[0.2836571336,-0.2904565334],"value_targets":1.9900000095} +{"eps_id":1065225377,"obs":[-1.4728595018,-0.8762173057,-0.1025326997,0.20106332],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":true,"new_obs":[-1.4903838634,-1.0697348118,-0.0985114351,0.4597232044],"action_prob":0.481746912,"action_logp":-0.7303363681,"action_dist_inputs":[-0.0389072634,0.0341375098],"value_targets":1.0} +{"eps_id":768633253,"obs":[-0.0128158424,-0.0483878925,0.0421475321,0.0473228581],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":0.0,"dones":false,"new_obs":[-0.0137836002,-0.2440880686,0.0430939905,0.3530001342],"action_prob":0.3157020509,"action_logp":-1.1529563665,"action_dist_inputs":[-0.3864299655,0.3871644437],"value_targets":86.6020355225} +{"eps_id":768633253,"obs":[-0.0137836002,-0.2440880686,0.0430939905,0.3530001342],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0186653621,-0.0496045612,0.0501539931,0.0742114186],"action_prob":0.8941495419,"action_logp":-0.1118822321,"action_dist_inputs":[-1.0649197102,1.0689263344],"value_targets":86.4666976929} +{"eps_id":768633253,"obs":[-0.0186653621,-0.0496045612,0.0501539931,0.0742114186],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0196574535,0.1447638422,0.0516382195,-0.2022354752],"action_prob":0.734611094,"action_logp":-0.3084140122,"action_dist_inputs":[-0.5085784197,0.5095663667],"value_targets":86.3300018311} +{"eps_id":768633253,"obs":[-0.0196574535,0.1447638422,0.0516382195,-0.2022354752],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0167621765,0.3391107023,0.0475935116,-0.4781922996],"action_prob":0.3584933579,"action_logp":-1.02584517,"action_dist_inputs":[0.2898245752,-0.2920848727],"value_targets":86.1919174194} +{"eps_id":768633253,"obs":[-0.0167621765,0.3391107023,0.0475935116,-0.4781922996],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0099799624,0.1433502585,0.0380296633,-0.1708969325],"action_prob":0.8703786135,"action_logp":-0.1388269812,"action_dist_inputs":[0.9496970773,-0.9546128511],"value_targets":86.052444458} +{"eps_id":768633253,"obs":[-0.0099799624,0.1433502585,0.0380296633,-0.1708969325],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0071129566,-0.0522947833,0.0346117243,0.1335364729],"action_prob":0.6212910414,"action_logp":-0.4759556055,"action_dist_inputs":[0.246506691,-0.2485250831],"value_targets":85.9115600586} +{"eps_id":768633253,"obs":[-0.0071129566,-0.0522947833,0.0346117243,0.1335364729],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0081588523,-0.2478949875,0.0372824557,0.4369348586],"action_prob":0.2237353176,"action_logp":-1.4972915649,"action_dist_inputs":[-0.6212610006,0.6227687597],"value_targets":85.7692489624} +{"eps_id":768633253,"obs":[-0.0081588523,-0.2478949875,0.0372824557,0.4369348586],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0131167518,-0.0533200577,0.0460211523,0.1562338024],"action_prob":0.9122587442,"action_logp":-0.0918315947,"action_dist_inputs":[-1.168422699,1.173109293],"value_targets":85.62550354} +{"eps_id":768633253,"obs":[-0.0131167518,-0.0533200577,0.0460211523,0.1562338024],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0141831534,0.1411138028,0.0491458289,-0.1215827465],"action_prob":0.8097763658,"action_logp":-0.2109971344,"action_dist_inputs":[-0.7233934999,0.7251644135],"value_targets":85.4803085327} +{"eps_id":768633253,"obs":[-0.0141831534,0.1411138028,0.0491458289,-0.1215827465],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0113608772,-0.0546765588,0.0467141718,0.1861912459],"action_prob":0.5087525249,"action_logp":-0.6757935882,"action_dist_inputs":[0.0167311728,-0.0182824768],"value_targets":85.3336486816} +{"eps_id":768633253,"obs":[-0.0113608772,-0.0546765588,0.0467141718,0.1861912459],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0124544082,0.1397469789,0.050437998,-0.0913969353],"action_prob":0.8315332532,"action_logp":-0.184484005,"action_dist_inputs":[-0.7972251177,0.7993077636],"value_targets":85.1855010986} +{"eps_id":768633253,"obs":[-0.0124544082,0.1397469789,0.050437998,-0.0913969353],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0096594691,0.3341110647,0.0486100614,-0.3677498102],"action_prob":0.5476159453,"action_logp":-0.6021810174,"action_dist_inputs":[-0.0961441547,0.0948986709],"value_targets":85.0358581543} +{"eps_id":768633253,"obs":[-0.0096594691,0.3341110647,0.0486100614,-0.3677498102],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0029772476,0.1383333057,0.0412550643,-0.0601443313],"action_prob":0.7942522764,"action_logp":-0.2303541601,"action_dist_inputs":[0.6732864976,-0.6774635315],"value_targets":84.8847045898} +{"eps_id":768633253,"obs":[-0.0029772476,0.1383333057,0.0412550643,-0.0601443313],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0002105815,-0.0573551357,0.0400521755,0.2452641129],"action_prob":0.420548141,"action_logp":-0.8661963344,"action_dist_inputs":[-0.1607293934,0.1597943008],"value_targets":84.7320251465} +{"eps_id":768633253,"obs":[-0.0002105815,-0.0573551357,0.0400521755,0.2452641129],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0013576843,0.13717255,0.044957459,-0.0345212147],"action_prob":0.8587604165,"action_logp":-0.1522652954,"action_dist_inputs":[-0.9011695385,0.9038626552],"value_targets":84.5778045654} +{"eps_id":768633253,"obs":[-0.0013576843,0.13717255,0.044957459,-0.0345212147],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0013857668,0.3316219449,0.044267036,-0.3126875162],"action_prob":0.6288912296,"action_logp":-0.4637969434,"action_dist_inputs":[-0.2640430629,0.2634200156],"value_targets":84.4220275879} +{"eps_id":768633253,"obs":[0.0013857668,0.3316219449,0.044267036,-0.3126875162],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0080182059,0.1358982176,0.0380132832,-0.0063793096],"action_prob":0.7417643666,"action_logp":-0.2987236679,"action_dist_inputs":[0.5256999731,-0.5294590592],"value_targets":84.2646713257} +{"eps_id":768633253,"obs":[0.0080182059,0.1358982176,0.0380132832,-0.0063793096],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0107361702,-0.0597476922,0.0378856994,0.2980507612],"action_prob":0.3433187008,"action_logp":-1.0690960884,"action_dist_inputs":[-0.3244222403,0.3241174519],"value_targets":84.1057281494} +{"eps_id":768633253,"obs":[0.0107361702,-0.0597476922,0.0378856994,0.2980507612],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0095412163,0.1348143071,0.0438467115,0.0175529309],"action_prob":0.8785589337,"action_logp":-0.1294723004,"action_dist_inputs":[-0.9877877831,0.991066277],"value_targets":83.9451828003} +{"eps_id":768633253,"obs":[0.0095412163,0.1348143071,0.0438467115,0.0175529309],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0122375023,0.3292809129,0.0441977717,-0.2609798312],"action_prob":0.699809432,"action_logp":-0.3569472134,"action_dist_inputs":[-0.4231780469,0.423212558],"value_targets":83.7830123901} +{"eps_id":768633253,"obs":[0.0122375023,0.3292809129,0.0441977717,-0.2609798312],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0188231207,0.133556813,0.0389781743,0.0453095324],"action_prob":0.6722558141,"action_logp":-0.3971163034,"action_dist_inputs":[0.3575543761,-0.3608513176],"value_targets":83.6192016602} +{"eps_id":768633253,"obs":[0.0188231207,0.133556813,0.0389781743,0.0453095324],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0214942563,0.328098774,0.0398843661,-0.2348251045],"action_prob":0.7251365185,"action_logp":-0.3213953078,"action_dist_inputs":[-0.4848512411,0.4852343202],"value_targets":83.453742981} +{"eps_id":768633253,"obs":[0.0214942563,0.328098774,0.0398843661,-0.2348251045],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0280562323,0.1324303448,0.0351878628,0.0701670349],"action_prob":0.6417961121,"action_logp":-0.4434845746,"action_dist_inputs":[0.2900648415,-0.2931036353],"value_targets":83.286605835} +{"eps_id":768633253,"obs":[0.0280562323,0.1324303448,0.0351878628,0.0701670349],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0307048392,0.3270306289,0.0365912057,-0.2112095803],"action_prob":0.7464984655,"action_logp":-0.2923617065,"action_dist_inputs":[-0.539655149,0.540368557],"value_targets":83.1177825928} +{"eps_id":768633253,"obs":[0.0307048392,0.3270306289,0.0365912057,-0.2112095803],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0372454524,0.1314051151,0.0323670134,0.0927876756],"action_prob":0.611682713,"action_logp":-0.4915415943,"action_dist_inputs":[0.2258051932,-0.2285856009],"value_targets":82.9472579956} +{"eps_id":768633253,"obs":[0.0372454524,0.1314051151,0.0323670134,0.0927876756],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0398735553,0.326048553,0.0342227668,-0.1895103306],"action_prob":0.764944911,"action_logp":-0.2679514289,"action_dist_inputs":[-0.589477241,0.5905066133],"value_targets":82.7750091553} +{"eps_id":768633253,"obs":[0.0398735553,0.326048553,0.0342227668,-0.1895103306],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0463945232,0.5206646323,0.0304325595,-0.471203953],"action_prob":0.4183328748,"action_logp":-0.8714778423,"action_dist_inputs":[0.1635508984,-0.1660699844],"value_targets":82.601020813} +{"eps_id":768633253,"obs":[0.0463945232,0.5206646323,0.0304325595,-0.471203953],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.056807816,0.7153437734,0.0210084803,-0.7541416883],"action_prob":0.1495145112,"action_logp":-1.9003617764,"action_dist_inputs":[0.8666111231,-0.871802628],"value_targets":82.4252700806} +{"eps_id":768633253,"obs":[0.056807816,0.7153437734,0.0210084803,-0.7541416883],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0711146891,0.9101698995,0.005925647,-1.0401403904],"action_prob":0.0714199021,"action_logp":-2.6391787529,"action_dist_inputs":[1.278958559,-1.2861214876],"value_targets":82.2477493286} +{"eps_id":768633253,"obs":[0.0711146891,0.9101698995,0.005925647,-1.0401403904],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0893180892,0.7149696946,-0.0148771601,-0.7456030846],"action_prob":0.950316906,"action_logp":-0.0509597585,"action_dist_inputs":[1.4711549282,-1.4799765348],"value_targets":82.0684280396} +{"eps_id":768633253,"obs":[0.0893180892,0.7149696946,-0.0148771601,-0.7456030846],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.1036174819,0.5200561881,-0.0297892224,-0.4576388597],"action_prob":0.934679985,"action_logp":-0.067551069,"action_dist_inputs":[1.3268566132,-1.3340491056],"value_targets":81.8873062134} +{"eps_id":768633253,"obs":[0.1036174819,0.5200561881,-0.0297892224,-0.4576388597],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.1140186116,0.3253677487,-0.038941998,-0.1744925231],"action_prob":0.886847198,"action_logp":-0.1200825572,"action_dist_inputs":[1.0269534588,-1.0319797993],"value_targets":81.7043457031} +{"eps_id":768633253,"obs":[0.1140186116,0.3253677487,-0.038941998,-0.1744925231],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.1205259636,0.1308241487,-0.04243185,0.1056555957],"action_prob":0.715731442,"action_logp":-0.3344502747,"action_dist_inputs":[0.4605646431,-0.4628210068],"value_targets":81.5195465088} +{"eps_id":768633253,"obs":[0.1205259636,0.1308241487,-0.04243185,0.1056555957],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.1231424436,-0.0636648536,-0.0403187387,0.3846552968],"action_prob":0.3450450301,"action_logp":-1.0640803576,"action_dist_inputs":[-0.3198623955,0.3210290968],"value_targets":81.3328704834} +{"eps_id":768633253,"obs":[0.1231424436,-0.0636648536,-0.0403187387,0.3846552968],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.1218691468,0.132005617,-0.0326256305,0.0795376077],"action_prob":0.8719193339,"action_logp":-0.1370583326,"action_dist_inputs":[-0.9568715096,0.961165607],"value_targets":81.144317627} +{"eps_id":768633253,"obs":[0.1218691468,0.132005617,-0.0326256305,0.0795376077],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.1245092601,0.3275797069,-0.0310348794,-0.223257646],"action_prob":0.6357491016,"action_logp":-0.4529512525,"action_dist_inputs":[-0.2779761851,0.2789849937],"value_targets":80.9538574219} +{"eps_id":768633253,"obs":[0.1245092601,0.3275797069,-0.0310348794,-0.223257646],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.1310608536,0.1329147667,-0.035500031,0.0594763793],"action_prob":0.7613806129,"action_logp":-0.2726219296,"action_dist_inputs":[0.5789039135,-0.5813595653],"value_targets":80.76146698} +{"eps_id":768633253,"obs":[0.1310608536,0.1329147667,-0.035500031,0.0594763793],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.1337191463,-0.0616806783,-0.0343105048,0.3407508135],"action_prob":0.403301388,"action_logp":-0.9080711007,"action_dist_inputs":[-0.1954299361,0.1962980628],"value_targets":80.5671386719} +{"eps_id":768633253,"obs":[0.1337191463,-0.0616806783,-0.0343105048,0.3407508135],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.1324855387,0.1339122206,-0.0274954885,0.0374488272],"action_prob":0.8566888571,"action_logp":-0.1546805054,"action_dist_inputs":[-0.8919866681,0.8960698843],"value_targets":80.3708496094} +{"eps_id":768633253,"obs":[0.1324855387,0.1339122206,-0.0274954885,0.0374488272],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.135163784,-0.06080487,-0.0267465115,0.3213314116],"action_prob":0.4220999479,"action_logp":-0.8625131845,"action_dist_inputs":[-0.1567091048,0.157449767],"value_targets":80.1725769043} +{"eps_id":768633253,"obs":[0.135163784,-0.06080487,-0.0267465115,0.3213314116],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.1339476854,0.1346875578,-0.0203198846,0.0203351155],"action_prob":0.8524523973,"action_logp":-0.159637928,"action_dist_inputs":[-0.8749812245,0.8789851665],"value_targets":79.9722976685} +{"eps_id":768633253,"obs":[0.1339476854,0.1346875578,-0.0203198846,0.0203351155],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.1366414428,-0.0601371787,-0.0199131817,0.306538254],"action_prob":0.4348371327,"action_logp":-0.832783699,"action_dist_inputs":[-0.1307435185,0.131398797],"value_targets":79.7699966431} +{"eps_id":768633253,"obs":[0.1366414428,-0.0601371787,-0.0199131817,0.306538254],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.1354386955,-0.2549698055,-0.0137824165,0.592875123],"action_prob":0.1501257271,"action_logp":-1.896282196,"action_dist_inputs":[-0.8648299575,0.8687855005],"value_targets":79.5656509399} +{"eps_id":768633253,"obs":[0.1354386955,-0.2549698055,-0.0137824165,0.592875123],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.1303392947,-0.0596576482,-0.0019249136,0.2958828509],"action_prob":0.9284619093,"action_logp":-0.0742259026,"action_dist_inputs":[-1.2783967257,1.2849024534],"value_targets":79.3592453003} +{"eps_id":768633253,"obs":[0.1303392947,-0.0596576482,-0.0019249136,0.2958828509],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.1291461438,-0.2547520995,0.0039927433,0.5879580975],"action_prob":0.141756326,"action_logp":-1.9536457062,"action_dist_inputs":[-0.8984004259,0.9023779631],"value_targets":79.1507568359} +{"eps_id":768633253,"obs":[0.1291461438,-0.2547520995,0.0039927433,0.5879580975],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.1240511015,-0.0596862957,0.0157519057,0.2965355515],"action_prob":0.9312400818,"action_logp":-0.0712381452,"action_dist_inputs":[-1.2996524572,1.3062440157],"value_targets":78.9401550293} +{"eps_id":768633253,"obs":[0.1240511015,-0.0596862957,0.0157519057,0.2965355515],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.1228573769,0.1352075934,0.0216826163,0.0088618472],"action_prob":0.8707071543,"action_logp":-0.1384495497,"action_dist_inputs":[-0.9515641928,0.9556618929],"value_targets":78.727432251} +{"eps_id":768633253,"obs":[0.1228573769,0.1352075934,0.0216826163,0.0088618472],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.1255615354,0.3300119936,0.0218598526,-0.276901871],"action_prob":0.6406878233,"action_logp":-0.4452129304,"action_dist_inputs":[-0.2887610495,0.2895897627],"value_targets":78.5125579834} +{"eps_id":768633253,"obs":[0.1255615354,0.3300119936,0.0218598526,-0.276901871],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.1321617663,0.5248153806,0.0163218156,-0.562610805],"action_prob":0.2612810135,"action_logp":-1.3421587944,"action_dist_inputs":[0.5184431076,-0.5208780169],"value_targets":78.2955093384} +{"eps_id":768633253,"obs":[0.1321617663,0.5248153806,0.0163218156,-0.562610805],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.1426580697,0.3294682205,0.0050695995,-0.2648307383],"action_prob":0.8967703581,"action_logp":-0.1089554429,"action_dist_inputs":[1.0783959627,-1.0834475756],"value_targets":78.0762710571} +{"eps_id":768633253,"obs":[0.1426580697,0.3294682205,0.0050695995,-0.2648307383],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.1492474377,0.5245174766,-0.0002270155,-0.5559103489],"action_prob":0.2484392524,"action_logp":-1.3925569057,"action_dist_inputs":[0.5523258448,-0.5546278358],"value_targets":77.8548202515} +{"eps_id":768633253,"obs":[0.1492474377,0.5245174766,-0.0002270155,-0.5559103489],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.1597377956,0.3293986917,-0.0113452226,-0.2632989585],"action_prob":0.9011975527,"action_logp":-0.1040308028,"action_dist_inputs":[1.1027956009,-1.1078064442],"value_targets":77.6311340332} +{"eps_id":768633253,"obs":[0.1597377956,0.3293986917,-0.0113452226,-0.2632989585],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.1663257629,0.1344404966,-0.0166112017,0.0257840678],"action_prob":0.7736151814,"action_logp":-0.256680727,"action_dist_inputs":[0.6132988334,-0.615539372],"value_targets":77.4051818848} +{"eps_id":768633253,"obs":[0.1663257629,0.1344404966,-0.0166112017,0.0257840678],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.1690145731,-0.0604393445,-0.0160955209,0.3131800294],"action_prob":0.4200272262,"action_logp":-0.8674357533,"action_dist_inputs":[-0.1608038843,0.161857754],"value_targets":77.1769561768} +{"eps_id":768633253,"obs":[0.1690145731,-0.0604393445,-0.0160955209,0.3131800294],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.1678057909,0.1349081546,-0.0098319203,0.0154648563],"action_prob":0.8564505577,"action_logp":-0.1549586654,"action_dist_inputs":[-0.890894413,0.8952230215],"value_targets":76.9464187622} +{"eps_id":768633253,"obs":[0.1678057909,0.1349081546,-0.0098319203,0.0154648563],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.1705039442,-0.0600714236,-0.0095226234,0.3050295413],"action_prob":0.4220692515,"action_logp":-0.8625859022,"action_dist_inputs":[-0.1566274911,0.1576573104],"value_targets":76.7135543823} +{"eps_id":768633253,"obs":[0.1705039442,-0.0600714236,-0.0095226234,0.3050295413],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.1693025231,0.1351849288,-0.0034220321,0.0093586948],"action_prob":0.8572394252,"action_logp":-0.1540380418,"action_dist_inputs":[-0.8941081166,0.8984403014],"value_targets":76.4783401489} +{"eps_id":768633253,"obs":[0.1693025231,0.1351849288,-0.0034220321,0.0093586948],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.1720062196,0.3303557932,-0.0032348582,-0.2844019532],"action_prob":0.5821433067,"action_logp":-0.5410386324,"action_dist_inputs":[-0.1652677655,0.1663103104],"value_targets":76.2407455444} +{"eps_id":768633253,"obs":[0.1720062196,0.3303557932,-0.0032348582,-0.2844019532],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.1786133349,0.1352801174,-0.0089228973,0.0072589628],"action_prob":0.7823537588,"action_logp":-0.2454482913,"action_dist_inputs":[0.6386157274,-0.6408204436],"value_targets":76.0007553101} +{"eps_id":768633253,"obs":[0.1786133349,0.1352801174,-0.0089228973,0.0072589628],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.1813189387,0.3305288851,-0.0087777181,-0.2882258296],"action_prob":0.5650407076,"action_logp":-0.5708574653,"action_dist_inputs":[-0.1302909255,0.131354481],"value_targets":75.7583389282} +{"eps_id":768633253,"obs":[0.1813189387,0.3305288851,-0.0087777181,-0.2882258296],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.1879295111,0.1355332136,-0.0145422351,0.0016758064],"action_prob":0.7923007607,"action_logp":-0.2328141928,"action_dist_inputs":[0.6683350205,-0.6705152988],"value_targets":75.5134735107} +{"eps_id":768633253,"obs":[0.1879295111,0.1355332136,-0.0145422351,0.0016758064],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.1906401813,-0.0593771972,-0.0145087186,0.2897352278],"action_prob":0.4585143924,"action_logp":-0.7797635794,"action_dist_inputs":[-0.0826354027,0.0836892948],"value_targets":75.26612854} +{"eps_id":768633253,"obs":[0.1906401813,-0.0593771972,-0.0145087186,0.2897352278],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.1894526333,0.1359486133,-0.0087140137,-0.0074880412],"action_prob":0.8439552784,"action_logp":-0.1696557999,"action_dist_inputs":[-0.8418151736,0.8461412191],"value_targets":75.0162963867} +{"eps_id":768633253,"obs":[0.1894526333,0.1359486133,-0.0087140137,-0.0074880412],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.1921716034,-0.0590472966,-0.0088637751,0.2824328244],"action_prob":0.4608459473,"action_logp":-0.7746914625,"action_dist_inputs":[-0.0779517591,0.0789858103],"value_targets":74.7639312744} +{"eps_id":768633253,"obs":[0.1921716034,-0.0590472966,-0.0088637751,0.2824328244],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.1909906566,0.1361999512,-0.0032151183,-0.013032468],"action_prob":0.8444444537,"action_logp":-0.1690763384,"action_dist_inputs":[-0.8436746597,0.8480012417],"value_targets":74.5090255737} +{"eps_id":768633253,"obs":[0.1909906566,0.1361999512,-0.0032151183,-0.013032468],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.1937146634,-0.0588757396,-0.0034757678,0.2786343098],"action_prob":0.4576983154,"action_logp":-0.7815449834,"action_dist_inputs":[-0.0842840374,0.0853280872],"value_targets":74.2515411377} +{"eps_id":768633253,"obs":[0.1937146634,-0.0588757396,-0.0034757678,0.2786343098],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.1925371438,0.1362956166,0.0020969186,-0.0151428301],"action_prob":0.8468515277,"action_logp":-0.1662299037,"action_dist_inputs":[-0.8528818488,0.8572359681],"value_targets":73.9914550781} +{"eps_id":768633253,"obs":[0.1925371438,0.1362956166,0.0020969186,-0.0151428301],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.1952630579,0.3313874304,0.001794062,-0.3071634173],"action_prob":0.5509123802,"action_logp":-0.5961795449,"action_dist_inputs":[-0.1016372442,0.1027205139],"value_targets":73.7287445068} +{"eps_id":768633253,"obs":[0.1952630579,0.3313874304,0.001794062,-0.3071634173],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.2018908113,0.1362399757,-0.0043492066,-0.0139152305],"action_prob":0.7947420478,"action_logp":-0.2297376841,"action_dist_inputs":[0.6758285165,-0.677921474],"value_targets":73.4633712769} +{"eps_id":768633253,"obs":[0.2018908113,0.1362399757,-0.0043492066,-0.0139152305],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.2046156079,0.3314240277,-0.0046275109,-0.3079672158],"action_prob":0.5371277332,"action_logp":-0.6215193868,"action_dist_inputs":[-0.0738271326,0.074957557],"value_targets":73.1953277588} +{"eps_id":768633253,"obs":[0.2046156079,0.3314240277,-0.0046275109,-0.3079672158],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.2112440914,0.5266116261,-0.0107868547,-0.6021059155],"action_prob":0.1978445202,"action_logp":-1.6202738285,"action_dist_inputs":[0.6988826394,-0.7009382844],"value_targets":72.9245758057} +{"eps_id":768633253,"obs":[0.2112440914,0.5266116261,-0.0107868547,-0.6021059155],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.2217763215,0.3316421807,-0.0228289738,-0.3128400743],"action_prob":0.9105316401,"action_logp":-0.0937266052,"action_dist_inputs":[1.1576069593,-1.1625370979],"value_targets":72.6510848999} +{"eps_id":768633253,"obs":[0.2217763215,0.3316421807,-0.0228289738,-0.3128400743],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.2284091562,0.136852771,-0.0290857758,-0.0274432283],"action_prob":0.823048234,"action_logp":-0.1947404742,"action_dist_inputs":[0.7675349116,-0.7696027756],"value_targets":72.3748321533} +{"eps_id":768633253,"obs":[0.2284091562,0.136852771,-0.0290857758,-0.0274432283],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.2311462164,-0.0578402579,-0.0296346396,0.2559227645],"action_prob":0.5453847051,"action_logp":-0.606263876,"action_dist_inputs":[0.0915597975,-0.0904801786],"value_targets":72.0957946777} +{"eps_id":768633253,"obs":[0.2311462164,-0.0578402579,-0.0296346396,0.2559227645],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.2299894094,0.1376919895,-0.0245161839,-0.0459581278],"action_prob":0.8002098799,"action_logp":-0.2228812575,"action_dist_inputs":[-0.6916769147,0.6959293485],"value_targets":71.8139266968} +{"eps_id":768633253,"obs":[0.2299894094,0.1376919895,-0.0245161839,-0.0459581278],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.2327432483,0.3331567645,-0.0254353471,-0.3462741971],"action_prob":0.4341346323,"action_logp":-0.8344005942,"action_dist_inputs":[0.1329902858,-0.1320112497],"value_targets":71.5292205811} +{"eps_id":768633253,"obs":[0.2327432483,0.3331567645,-0.0254353471,-0.3462741971],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.239406392,0.1384056658,-0.0323608294,-0.0617192499],"action_prob":0.8437398076,"action_logp":-0.1699111164,"action_dist_inputs":[0.842042923,-0.8442787528],"value_targets":71.2416381836} +{"eps_id":768633253,"obs":[0.239406392,0.1384056658,-0.0323608294,-0.0617192499],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.2421745062,-0.0562377125,-0.0335952155,0.2205805331],"action_prob":0.6081241369,"action_logp":-0.4973762333,"action_dist_inputs":[0.2201619148,-0.2192720771],"value_targets":70.9511489868} +{"eps_id":768633253,"obs":[0.2421745062,-0.0562377125,-0.0335952155,0.2205805331],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.2410497516,-0.2508637607,-0.0291836057,0.502479732],"action_prob":0.2419110537,"action_logp":-1.4191851616,"action_dist_inputs":[-0.569099009,0.5731314421],"value_targets":70.6577301025} +{"eps_id":768633253,"obs":[0.2410497516,-0.2508637607,-0.0291836057,0.502479732],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.2360324711,-0.0553428717,-0.0191340111,0.2007445246],"action_prob":0.9101893306,"action_logp":-0.0941026434,"action_dist_inputs":[-1.1547076702,1.1612409353],"value_targets":70.3613433838} +{"eps_id":768633253,"obs":[0.2360324711,-0.0553428717,-0.0191340111,0.2007445246],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.2349256128,0.140047431,-0.0151191205,-0.0979123414],"action_prob":0.7588787079,"action_logp":-0.2759133279,"action_dist_inputs":[-0.5713045597,0.5752373934],"value_targets":70.061958313} +{"eps_id":768633253,"obs":[0.2349256128,0.140047431,-0.0151191205,-0.0979123414],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.2377265692,-0.0548545979,-0.0170773678,0.1899624467],"action_prob":0.6272780299,"action_logp":-0.466365397,"action_dist_inputs":[0.2606232464,-0.2599339187],"value_targets":69.7595596313} +{"eps_id":768633253,"obs":[0.2377265692,-0.0548545979,-0.0170773678,0.1899624467],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.2366294712,0.1405074447,-0.0132781183,-0.1080584005],"action_prob":0.7495833635,"action_logp":-0.2882377803,"action_dist_inputs":[-0.5462570786,0.5501342416],"value_targets":69.4540939331} +{"eps_id":768633253,"obs":[0.2366294712,0.1405074447,-0.0132781183,-0.1080584005],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.2394396216,-0.0544217303,-0.0154392859,0.1804059446],"action_prob":0.6384063959,"action_logp":-0.448780179,"action_dist_inputs":[0.2845480442,-0.2839061618],"value_targets":69.1455535889} +{"eps_id":768633253,"obs":[0.2394396216,-0.0544217303,-0.0154392859,0.1804059446],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.2383511811,0.1409177035,-0.0118311672,-0.1171072647],"action_prob":0.7406928539,"action_logp":-0.3001692593,"action_dist_inputs":[-0.5228721499,0.5267005563],"value_targets":68.8338928223} +{"eps_id":768633253,"obs":[0.2383511811,0.1409177035,-0.0118311672,-0.1171072647],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.2411695421,-0.054032743,-0.0141733121,0.171819672],"action_prob":0.6484288573,"action_logp":-0.4332029819,"action_dist_inputs":[0.3063704073,-0.3057698309],"value_targets":68.5190811157} +{"eps_id":768633253,"obs":[0.2411695421,-0.054032743,-0.0141733121,0.171819672],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.2400888801,0.1412891746,-0.010736919,-0.1253005862],"action_prob":0.7320581675,"action_logp":-0.3118953407,"action_dist_inputs":[-0.5006522536,0.5044374466],"value_targets":68.2010955811} +{"eps_id":768633253,"obs":[0.2400888801,0.1412891746,-0.010736919,-0.1253005862],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.2429146618,-0.0536773317,-0.0132429302,0.1639757156],"action_prob":0.6576564312,"action_logp":-0.4190726578,"action_dist_inputs":[0.326715678,-0.3261520565],"value_targets":67.8798904419} +{"eps_id":768633253,"obs":[0.2429146618,-0.0536773317,-0.0132429302,0.1639757156],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.2418411225,0.1416316628,-0.0099634165,-0.1328554004],"action_prob":0.7235121131,"action_logp":-0.3236379623,"action_dist_inputs":[-0.4791023731,0.482848078],"value_targets":67.5554504395} +{"eps_id":768633253,"obs":[0.2418411225,0.1416316628,-0.0099634165,-0.1328554004],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.2446737587,0.3368948996,-0.0126205245,-0.4286649227],"action_prob":0.3336417079,"action_logp":-1.097687602,"action_dist_inputs":[0.3461446464,-0.3456151485],"value_targets":67.227722168} +{"eps_id":768633253,"obs":[0.2446737587,0.3368948996,-0.0126205245,-0.4286649227],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.2514116466,0.1419539452,-0.0211938228,-0.1399870962],"action_prob":0.8692592978,"action_logp":-0.1401137859,"action_dist_inputs":[0.9458841681,-0.9485414028],"value_targets":66.8966903687} +{"eps_id":768633253,"obs":[0.2514116466,0.1419539452,-0.0211938228,-0.1399870962],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.254250735,-0.0528581515,-0.0239935648,0.1459348947],"action_prob":0.6945742369,"action_logp":-0.3644562066,"action_dist_inputs":[0.4110400677,-0.4105523229],"value_targets":66.5623168945} +{"eps_id":768633253,"obs":[0.254250735,-0.0528581515,-0.0239935648,0.1459348947],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.2531935573,0.1425990462,-0.0210748669,-0.1542199254],"action_prob":0.6773030162,"action_logp":-0.3896364868,"action_dist_inputs":[-0.3688868284,0.3725183606],"value_targets":66.2245635986} +{"eps_id":768633253,"obs":[0.2531935573,0.1425990462,-0.0210748669,-0.1542199254],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.2560455501,0.3380163014,-0.0241592657,-0.45347628],"action_prob":0.2890827656,"action_logp":-1.2410422564,"action_dist_inputs":[0.4501195252,-0.4497234523],"value_targets":65.883392334} +{"eps_id":768633253,"obs":[0.2560455501,0.3380163014,-0.0241592657,-0.45347628],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.2628058791,0.1432441771,-0.0332287923,-0.1685054749],"action_prob":0.8811817765,"action_logp":-0.1264913678,"action_dist_inputs":[1.0003926754,-1.0032764673],"value_targets":65.5387802124} +{"eps_id":768633253,"obs":[0.2628058791,0.1432441771,-0.0332287923,-0.1685054749],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.2656707466,-0.051386781,-0.0365989022,0.1135123223],"action_prob":0.7428388596,"action_logp":-0.2972761393,"action_dist_inputs":[0.5305291414,-0.530246973],"value_targets":65.1906890869} +{"eps_id":768633253,"obs":[0.2656707466,-0.051386781,-0.0365989022,0.1135123223],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.2646430135,0.1442399621,-0.0343286544,-0.1904889494],"action_prob":0.5996443033,"action_logp":-0.511418581,"action_dist_inputs":[-0.2002874315,0.2036958486],"value_targets":64.8390808105} +{"eps_id":768633253,"obs":[0.2646430135,0.1442399621,-0.0343286544,-0.1904889494],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.2675278187,0.339835763,-0.0381384343,-0.4938004613],"action_prob":0.2358895689,"action_logp":-1.444391489,"action_dist_inputs":[0.5877292752,-0.5876192451],"value_targets":64.4839172363} +{"eps_id":768633253,"obs":[0.2675278187,0.339835763,-0.0381384343,-0.4938004613],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.2743245363,0.1452718824,-0.0480144434,-0.2133768201],"action_prob":0.8942933083,"action_logp":-0.1117214784,"action_dist_inputs":[1.0660358667,-1.0693298578],"value_targets":64.1251678467} +{"eps_id":768633253,"obs":[0.2743245363,0.1452718824,-0.0480144434,-0.2133768201],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.2772299647,-0.0491319001,-0.0522819795,0.0637819543],"action_prob":0.7953244448,"action_logp":-0.2290051728,"action_dist_inputs":[0.6786044836,-0.678719461],"value_targets":63.7627983093} +{"eps_id":768633253,"obs":[0.2772299647,-0.0491319001,-0.0522819795,0.0637819543],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.2762473524,-0.2434667945,-0.0510063395,0.3395220935],"action_prob":0.5169716477,"action_logp":-0.6597672701,"action_dist_inputs":[0.0354749411,-0.0324377269],"value_targets":63.3967666626} +{"eps_id":768633253,"obs":[0.2762473524,-0.2434667945,-0.0510063395,0.3395220935],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.2713780105,-0.0476575792,-0.044215899,0.0312008895],"action_prob":0.804692328,"action_logp":-0.2172952443,"action_dist_inputs":[-0.7050719261,0.7108120322],"value_targets":63.0270347595} +{"eps_id":768633253,"obs":[0.2713780105,-0.0476575792,-0.044215899,0.0312008895],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.2704248428,-0.2421184778,-0.0435918793,0.3096119165],"action_prob":0.5512169003,"action_logp":-0.5956269503,"action_dist_inputs":[0.1041877866,-0.1014008075],"value_targets":62.6535720825} +{"eps_id":768633253,"obs":[0.2704248428,-0.2421184778,-0.0435918793,0.3096119165],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.2655824721,-0.0464034192,-0.0373996422,0.0035062532],"action_prob":0.7864648104,"action_logp":-0.2402073294,"action_dist_inputs":[-0.649083972,0.6546621323],"value_targets":62.2763366699} +{"eps_id":768633253,"obs":[0.2655824721,-0.0464034192,-0.0373996422,0.0035062532],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.264654398,-0.2409695983,-0.0373295173,0.284158498],"action_prob":0.579490602,"action_logp":-0.5456058383,"action_dist_inputs":[0.1616211534,-0.159061417],"value_targets":61.8952865601} +{"eps_id":768633253,"obs":[0.264654398,-0.2409695983,-0.0373295173,0.284158498],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.2598350346,-0.0453356653,-0.0316463485,-0.0200602859],"action_prob":0.7693684697,"action_logp":-0.2621853054,"action_dist_inputs":[-0.5996593833,0.6050890684],"value_targets":61.5103912354} +{"eps_id":768633253,"obs":[0.2598350346,-0.0453356653,-0.0316463485,-0.0200602859],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.258928299,-0.2399898171,-0.0320475511,0.2624722719],"action_prob":0.6027895808,"action_logp":-0.5061871409,"action_dist_inputs":[0.2097274959,-0.2073744237],"value_targets":61.1216087341} +{"eps_id":768633253,"obs":[0.258928299,-0.2399898171,-0.0320475511,0.2624722719],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.2541285157,-0.0444254167,-0.0267981067,-0.0401440673],"action_prob":0.7536076307,"action_logp":-0.2828834057,"action_dist_inputs":[-0.5563269258,0.5616198182],"value_targets":60.7288970947} +{"eps_id":768633253,"obs":[0.2541285157,-0.0444254167,-0.0267981067,-0.0401440673],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.2532399893,-0.2391530424,-0.0276009887,0.2439647913],"action_prob":0.622046113,"action_logp":-0.4747410417,"action_dist_inputs":[0.2502034009,-0.2480387986],"value_targets":60.3322181702} +{"eps_id":768633253,"obs":[0.2532399893,-0.2391530424,-0.0276009887,0.2439647913],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.2484569401,-0.0436479487,-0.0227216929,-0.0572948083],"action_prob":0.739256084,"action_logp":-0.3021108806,"action_dist_inputs":[-0.5184699893,0.5236355662],"value_targets":59.9315338135} +{"eps_id":768633253,"obs":[0.2484569401,-0.0436479487,-0.0227216929,-0.0572948083],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.2475839853,-0.2384368628,-0.0238675885,0.2281335443],"action_prob":0.6380630136,"action_logp":-0.4493182302,"action_dist_inputs":[0.2844794393,-0.282487303],"value_targets":59.526802063} +{"eps_id":768633253,"obs":[0.2475839853,-0.2384368628,-0.0238675885,0.2281335443],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.2428152412,-0.0429821052,-0.0193049181,-0.0719814971],"action_prob":0.7262892723,"action_logp":-0.3198068738,"action_dist_inputs":[-0.4854150414,0.4904617965],"value_targets":59.117980957} +{"eps_id":768633253,"obs":[0.2428152412,-0.0429821052,-0.0193049181,-0.0719814971],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.2419556081,-0.2378220558,-0.0207445472,0.2145486623],"action_prob":0.6515036225,"action_logp":-0.4284723401,"action_dist_inputs":[0.3137442172,-0.3119106889],"value_targets":58.7050323486} +{"eps_id":768633253,"obs":[0.2419556081,-0.2378220558,-0.0207445472,0.2145486623],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.2371991575,-0.042409759,-0.0164535753,-0.0846051574],"action_prob":0.7146157622,"action_logp":-0.3360102475,"action_dist_inputs":[-0.456486702,0.4614217877],"value_targets":58.2879104614} +{"eps_id":768633253,"obs":[0.2371991575,-0.042409759,-0.0164535753,-0.0846051574],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.2363509685,-0.2372920364,-0.0181456786,0.202841565],"action_prob":0.6629068255,"action_logp":-0.411120832,"action_dist_inputs":[0.3389809132,-0.3372942209],"value_targets":57.8665771484} +{"eps_id":768633253,"obs":[0.2363509685,-0.2372920364,-0.0181456786,0.202841565],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.2316051275,-0.4321498573,-0.0140888467,0.4897456169],"action_prob":0.2958991826,"action_logp":-1.2177364826,"action_dist_inputs":[-0.4310366511,0.4358661175],"value_targets":57.4409866333} +{"eps_id":768633253,"obs":[0.2316051275,-0.4321498573,-0.0140888467,0.4897456169],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.2229621261,-0.2368320078,-0.0042939344,0.1926559359],"action_prob":0.8993570209,"action_logp":-0.1060751677,"action_dist_inputs":[-1.0915464163,1.0985543728],"value_targets":57.0110969543} +{"eps_id":768633253,"obs":[0.2229621261,-0.2368320078,-0.0042939344,0.1926559359],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.218225494,-0.0416488871,-0.0004408154,-0.1013784409],"action_prob":0.7171650529,"action_logp":-0.3324492574,"action_dist_inputs":[-0.4628577232,0.4675849974],"value_targets":56.5768661499} +{"eps_id":768633253,"obs":[0.218225494,-0.0416488871,-0.0004408154,-0.1013784409],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.2173925042,0.1534793824,-0.0024683843,-0.3942004144],"action_prob":0.3421275318,"action_logp":-1.0725717545,"action_dist_inputs":[0.327655822,-0.3261717558],"value_targets":56.1382484436} +{"eps_id":768633253,"obs":[0.2173925042,0.1534793824,-0.0024683843,-0.3942004144],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.2204620987,-0.0416074619,-0.0103523927,-0.1022967547],"action_prob":0.8574698567,"action_logp":-0.1537692398,"action_dist_inputs":[0.8962730765,-0.898159802],"value_targets":55.6952018738} +{"eps_id":768633253,"obs":[0.2204620987,-0.0416074619,-0.0103523927,-0.1022967547],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.2196299434,-0.2365795374,-0.0123983277,0.187102139],"action_prob":0.6756530404,"action_logp":-0.3920755982,"action_dist_inputs":[0.3676354885,-0.3662303388],"value_targets":55.2476768494} +{"eps_id":768633253,"obs":[0.2196299434,-0.2365795374,-0.0123983277,0.187102139],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.2148983628,-0.4315219223,-0.0086562848,0.4758482277],"action_prob":0.3053404391,"action_logp":-1.1863279343,"action_dist_inputs":[-0.408690691,0.4133040011],"value_targets":54.7956352234} +{"eps_id":768633253,"obs":[0.2148983628,-0.4315219223,-0.0086562848,0.4758482277],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.2062679231,-0.2362788171,0.0008606797,0.1804496199],"action_prob":0.8976635933,"action_logp":-0.1079598889,"action_dist_inputs":[-1.0823349953,1.0891952515],"value_targets":54.3390235901} +{"eps_id":768633253,"obs":[0.2062679231,-0.2362788171,0.0008606797,0.1804496199],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.2015423477,-0.4314130843,0.0044696722,0.4734039307],"action_prob":0.2882573605,"action_logp":-1.2439016104,"action_dist_inputs":[-0.4496671557,0.4541956782],"value_targets":53.8778038025} +{"eps_id":768633253,"obs":[0.2015423477,-0.4314130843,0.0044696722,0.4734039307],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.1929140836,-0.2363545299,0.0139377508,0.1821331233],"action_prob":0.9044085145,"action_logp":-0.1004741043,"action_dist_inputs":[-1.1201874018,1.1270104647],"value_targets":53.4119224548} +{"eps_id":768633253,"obs":[0.1929140836,-0.2363545299,0.0139377508,0.1821331233],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.1881869882,-0.0414347574,0.0175804142,-0.1061205566],"action_prob":0.738755703,"action_logp":-0.3027879894,"action_dist_inputs":[-0.5175080895,0.5220032334],"value_targets":52.9413375854} +{"eps_id":768633253,"obs":[0.1881869882,-0.0414347574,0.0175804142,-0.1061205566],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.18735829,0.1534309089,0.0154580027,-0.3932054937],"action_prob":0.3669238091,"action_logp":-1.0026010275,"action_dist_inputs":[0.2733632624,-0.2720731497],"value_targets":52.4659957886} +{"eps_id":768633253,"obs":[0.18735829,0.1534309089,0.0154580027,-0.3932054937],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.1904269159,-0.0419069566,0.0075938925,-0.095689252],"action_prob":0.8507139683,"action_logp":-0.1616793126,"action_dist_inputs":[0.8691171408,-0.8710947037],"value_targets":51.9858551025} +{"eps_id":768633253,"obs":[0.1904269159,-0.0419069566,0.0075938925,-0.095689252],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.1895887703,-0.2371369153,0.005680107,0.1993798316],"action_prob":0.6374399066,"action_logp":-0.4502952397,"action_dist_inputs":[0.2827932537,-0.2814765573],"value_targets":51.5008621216} +{"eps_id":768633253,"obs":[0.1895887703,-0.2371369153,0.005680107,0.1993798316],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.1848460436,-0.0420966707,0.0096677039,-0.0915058851],"action_prob":0.7466086745,"action_logp":-0.2922141254,"action_dist_inputs":[-0.5380414724,0.5425644517],"value_targets":51.0109710693} +{"eps_id":768633253,"obs":[0.1848460436,-0.0420966707,0.0096677039,-0.0915058851],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.1840040982,0.1528853923,0.0078375861,-0.3811230361],"action_prob":0.3721501827,"action_logp":-0.988457799,"action_dist_inputs":[0.2621588409,-0.2608445883],"value_targets":50.5161323547} +{"eps_id":768633253,"obs":[0.1840040982,0.1528853923,0.0078375861,-0.3811230361],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.1870618165,-0.042346973,0.0002151254,-0.085979268],"action_prob":0.8510087729,"action_logp":-0.1613328308,"action_dist_inputs":[0.8702722192,-0.8722627759],"value_targets":50.0162963867} +{"eps_id":768633253,"obs":[0.1870618165,-0.042346973,0.0002151254,-0.085979268],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.1862148792,-0.2374720126,-0.0015044599,0.2067715228],"action_prob":0.6381177902,"action_logp":-0.4492324293,"action_dist_inputs":[0.2842527032,-0.2829511464],"value_targets":49.5114097595} +{"eps_id":768633253,"obs":[0.1862148792,-0.2374720126,-0.0015044599,0.2067715228],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.181465432,-0.0423285738,0.0026309707,-0.0863856003],"action_prob":0.743917644,"action_logp":-0.295824945,"action_dist_inputs":[-0.5309684277,0.5354628563],"value_targets":49.0014266968} +{"eps_id":768633253,"obs":[0.181465432,-0.0423285738,0.0026309707,-0.0863856003],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.1806188673,0.1527555585,0.0009032586,-0.3782373071],"action_prob":0.3656709194,"action_logp":-1.0060214996,"action_dist_inputs":[0.2760473192,-0.2747865915],"value_targets":48.486289978} +{"eps_id":768633253,"obs":[0.1806188673,0.1527555585,0.0009032586,-0.3782373071],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.1836739779,0.3478646874,-0.0066614873,-0.670635283],"action_prob":0.1455991864,"action_logp":-1.9268977642,"action_dist_inputs":[0.8837321401,-0.8858108521],"value_targets":47.9659461975} +{"eps_id":768633253,"obs":[0.1836739779,0.3478646874,-0.0066614873,-0.670635283],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.1906312704,0.1528359652,-0.0200741924,-0.3800571859],"action_prob":0.9168289304,"action_logp":-0.0868343636,"action_dist_inputs":[1.1974362135,-1.2025856972],"value_targets":47.4403495789} +{"eps_id":768633253,"obs":[0.1906312704,0.1528359652,-0.0200741924,-0.3800571859],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.1936879903,-0.0419952534,-0.0276753362,-0.0937706456],"action_prob":0.8651085496,"action_logp":-0.1449003071,"action_dist_inputs":[0.9280744195,-0.9303104281],"value_targets":46.9094467163} +{"eps_id":768633253,"obs":[0.1936879903,-0.0419952534,-0.0276753362,-0.0937706456],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.1928480864,-0.2367098331,-0.0295507498,0.1900538653],"action_prob":0.6959653497,"action_logp":-0.3624554276,"action_dist_inputs":[0.4145939946,-0.4135642946],"value_targets":46.3731765747} +{"eps_id":768633253,"obs":[0.1928480864,-0.2367098331,-0.0295507498,0.1900538653],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.1881138831,-0.0411778539,-0.0257496722,-0.1118027046],"action_prob":0.667481184,"action_logp":-0.4042440653,"action_dist_inputs":[-0.34627226,0.3505426347],"value_targets":45.8314933777} +{"eps_id":768633253,"obs":[0.1881138831,-0.0411778539,-0.0257496722,-0.1118027046],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.1872903258,0.1543034315,-0.0279857256,-0.412496984],"action_prob":0.2865883708,"action_logp":-1.2497082949,"action_dist_inputs":[0.4564117789,-0.4555998743],"value_targets":45.2843360901} +{"eps_id":768633253,"obs":[0.1872903258,0.1543034315,-0.0279857256,-0.412496984],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.1903764009,-0.0404108688,-0.036235664,-0.128766641],"action_prob":0.8777924776,"action_logp":-0.1303450763,"action_dist_inputs":[0.9845092893,-0.9871798754],"value_targets":44.7316513062} +{"eps_id":768633253,"obs":[0.1903764009,-0.0404108688,-0.036235664,-0.128766641],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.1895681769,-0.2349954993,-0.0388109982,0.1522677541],"action_prob":0.7441959977,"action_logp":-0.2954508066,"action_dist_inputs":[0.5342227817,-0.5336702466],"value_targets":44.1733856201} +{"eps_id":768633253,"obs":[0.1895681769,-0.2349954993,-0.0388109982,0.1522677541],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.1848682612,-0.4295408428,-0.0357656442,0.4324584603],"action_prob":0.4115735292,"action_logp":-0.8877675533,"action_dist_inputs":[-0.1767838448,0.1806803048],"value_targets":43.6094818115} +{"eps_id":768633253,"obs":[0.1848682612,-0.4295408428,-0.0357656442,0.4324584603],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.1762774438,-0.2339312285,-0.0271164738,0.1287188083],"action_prob":0.8567263484,"action_logp":-0.154636696,"action_dist_inputs":[-0.8910185695,0.8973436952],"value_targets":43.0398788452} +{"eps_id":768633253,"obs":[0.1762774438,-0.2339312285,-0.0271164738,0.1287188083],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.1715988219,-0.4286544621,-0.0245420989,0.4127250016],"action_prob":0.4235047996,"action_logp":-0.859190464,"action_dist_inputs":[-0.1523664296,0.1560357362],"value_targets":42.4645233154} +{"eps_id":768633253,"obs":[0.1715988219,-0.4286544621,-0.0245420989,0.4127250016],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.1630257368,-0.6234200597,-0.0162875988,0.6975708008],"action_prob":0.14497298,"action_logp":-1.9312078953,"action_dist_inputs":[-0.8841969371,0.8903887868],"value_targets":41.8833580017} +{"eps_id":768633253,"obs":[0.1630257368,-0.6234200597,-0.0162875988,0.6975708008],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.1505573392,-0.4280760884,-0.0023361826,0.3998053372],"action_prob":0.936099112,"action_logp":-0.0660339072,"action_dist_inputs":[-1.3382441998,1.3461434841],"value_targets":41.2963218689} +{"eps_id":768633253,"obs":[0.1505573392,-0.4280760884,-0.0023361826,0.3998053372],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.1419958174,-0.2329210788,0.0056599244,0.1063867956],"action_prob":0.8676917553,"action_logp":-0.1419187337,"action_dist_inputs":[-0.9373143315,0.9433876872],"value_targets":40.7033538818} +{"eps_id":768633253,"obs":[0.1419958174,-0.2329210788,0.0056599244,0.1063867956],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.1373373866,-0.4281236827,0.0077876602,0.4008500278],"action_prob":0.3851565123,"action_logp":-0.9541054964,"action_dist_inputs":[-0.2321564704,0.2355615348],"value_targets":40.1044006348} +{"eps_id":768633253,"obs":[0.1373373866,-0.4281236827,0.0077876602,0.4008500278],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.1287749261,-0.2331130505,0.0158046614,0.1106324941],"action_prob":0.8768171668,"action_logp":-0.1314567924,"action_dist_inputs":[-0.9783039093,0.9843252897],"value_targets":39.4993934631} +{"eps_id":768633253,"obs":[0.1287749261,-0.2331130505,0.0158046614,0.1106324941],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.1241126582,-0.428457886,0.0180173106,0.408259511],"action_prob":0.3550948799,"action_logp":-1.0353702307,"action_dist_inputs":[-0.2966674864,0.3000506461],"value_targets":38.8882751465} +{"eps_id":768633253,"obs":[0.1241126582,-0.428457886,0.0180173106,0.408259511],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.1155434996,-0.2335959524,0.0261825006,0.1213108748],"action_prob":0.8880156875,"action_logp":-0.118765898,"action_dist_inputs":[-1.0323129892,1.0383174419],"value_targets":38.2709846497} +{"eps_id":768633253,"obs":[0.1155434996,-0.2335959524,0.0261825006,0.1213108748],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.1108715832,-0.4290830493,0.0286087189,0.4221377969],"action_prob":0.3157657087,"action_logp":-1.1527547836,"action_dist_inputs":[-0.3849458396,0.3883541524],"value_targets":37.6474609375} +{"eps_id":768633253,"obs":[0.1108715832,-0.4290830493,0.0286087189,0.4221377969],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.1022899225,-0.2343778461,0.0370514728,0.1386093199],"action_prob":0.9002464414,"action_logp":-0.1050867289,"action_dist_inputs":[-1.0969674587,1.1029978991],"value_targets":37.0176353455} +{"eps_id":768633253,"obs":[0.1022899225,-0.2343778461,0.0370514728,0.1386093199],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.09760236,-0.0398056284,0.0398236588,-0.1421580613],"action_prob":0.7306232452,"action_logp":-0.313857317,"action_dist_inputs":[-0.4971531928,0.5006337762],"value_targets":36.3814506531} +{"eps_id":768633253,"obs":[0.09760236,-0.0398056284,0.0398236588,-0.1421580613],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0968062505,0.1547240168,0.0369804986,-0.422016114],"action_prob":0.3563928604,"action_logp":-1.0317215919,"action_dist_inputs":[0.2956230938,-0.2954316437],"value_targets":35.7388381958} +{"eps_id":768633253,"obs":[0.0968062505,0.1547240168,0.0369804986,-0.422016114],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.09990073,-0.0409018137,0.0285401773,-0.1179082096],"action_prob":0.858246088,"action_logp":-0.1528643966,"action_dist_inputs":[0.8989346027,-0.9018638134],"value_targets":35.0897369385} +{"eps_id":768633253,"obs":[0.09990073,-0.0409018137,0.0285401773,-0.1179082096],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0990826935,-0.2364208102,0.0261820126,0.1836405247],"action_prob":0.6306652427,"action_logp":-0.4609801173,"action_dist_inputs":[0.2677033544,-0.2673684061],"value_targets":34.4340782166} +{"eps_id":768633253,"obs":[0.0990826935,-0.2364208102,0.0261820126,0.1836405247],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0943542793,-0.0416830815,0.0298548229,-0.1006693244],"action_prob":0.7683019042,"action_logp":-0.2635725439,"action_dist_inputs":[-0.5975282192,0.601219058],"value_targets":33.7717971802} +{"eps_id":768633253,"obs":[0.0943542793,-0.0416830815,0.0298548229,-0.1006693244],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.093520619,0.1529985666,0.0278414357,-0.3837856352],"action_prob":0.3986935318,"action_logp":-0.9195622802,"action_dist_inputs":[0.2056790739,-0.2052327096],"value_targets":33.1028251648} +{"eps_id":768633253,"obs":[0.093520619,0.1529985666,0.0278414357,-0.3837856352],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0965805873,-0.042507384,0.0201657228,-0.0824560598],"action_prob":0.848310411,"action_logp":-0.1645086855,"action_dist_inputs":[0.8593354821,-0.8620744348],"value_targets":32.4270935059} +{"eps_id":768633253,"obs":[0.0965805873,-0.042507384,0.0201657228,-0.0824560598],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0957304388,-0.2379125208,0.018516602,0.2165203691],"action_prob":0.5932189822,"action_logp":-0.5221916437,"action_dist_inputs":[0.1889187992,-0.188370049],"value_targets":31.7445411682} +{"eps_id":768633253,"obs":[0.0957304388,-0.2379125208,0.018516602,0.2165203691],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0909721926,-0.0430600978,0.0228470098,-0.0702645853],"action_prob":0.7925816178,"action_logp":-0.2324597687,"action_dist_inputs":[-0.6683632135,0.6721946001],"value_targets":31.0550918579} +{"eps_id":768633253,"obs":[0.0909721926,-0.0430600978,0.0228470098,-0.0702645853],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0901109874,0.151726976,0.0214417186,-0.3556525111],"action_prob":0.4330899715,"action_logp":-0.8368097544,"action_dist_inputs":[0.1349391341,-0.1343159527],"value_targets":30.3586788177} +{"eps_id":768633253,"obs":[0.0901109874,0.151726976,0.0214417186,-0.3556525111],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0931455269,-0.0436931662,0.0143286679,-0.0562862903],"action_prob":0.8395029306,"action_logp":-0.1749453098,"action_dist_inputs":[0.8259658813,-0.8285687566],"value_targets":29.6552295685} +{"eps_id":768633253,"obs":[0.0931455269,-0.0436931662,0.0143286679,-0.0562862903],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0922716632,-0.2390176058,0.013202942,0.2408827692],"action_prob":0.5628226399,"action_logp":-0.5747907162,"action_dist_inputs":[0.1266614348,-0.1259641498],"value_targets":28.9446773529} +{"eps_id":768633253,"obs":[0.0922716632,-0.2390176058,0.013202942,0.2408827692],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0874913111,-0.0440867171,0.0180205964,-0.0476065166],"action_prob":0.8090415597,"action_logp":-0.2119050026,"action_dist_inputs":[-0.7199338078,0.7238606215],"value_targets":28.2269458771} +{"eps_id":768633253,"obs":[0.0874913111,-0.0440867171,0.0180205964,-0.0476065166],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0866095796,0.1507722586,0.0170684662,-0.3345497549],"action_prob":0.4609369934,"action_logp":-0.7744939327,"action_dist_inputs":[0.0786589533,-0.0779121146],"value_targets":27.5019664764} +{"eps_id":768633253,"obs":[0.0866095796,0.1507722586,0.0170684662,-0.3345497549],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0896250233,0.345647186,0.0103774713,-0.6218016744],"action_prob":0.1682876796,"action_logp":-1.7820804119,"action_dist_inputs":[0.7976531386,-0.8001586199],"value_targets":26.7696628571} +{"eps_id":768633253,"obs":[0.0896250233,0.345647186,0.0103774713,-0.6218016744],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.09653797,0.1503818631,-0.0020585617,-0.325868547],"action_prob":0.9160636067,"action_logp":-0.087669462,"action_dist_inputs":[1.1923388243,-1.1976883411],"value_targets":26.0299625397} +{"eps_id":768633253,"obs":[0.09653797,0.1503818631,-0.0020585617,-0.325868547],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0995456055,-0.0447107181,-0.0085759321,-0.0338355117],"action_prob":0.8417295218,"action_logp":-0.1722965837,"action_dist_inputs":[0.8343153596,-0.8368374109],"value_targets":25.2827911377} +{"eps_id":768633253,"obs":[0.0995456055,-0.0447107181,-0.0085759321,-0.0338355117],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0986513868,0.1505331546,-0.0092526432,-0.3292118609],"action_prob":0.422647953,"action_logp":-0.8612157106,"action_dist_inputs":[0.1563396752,-0.1555728912],"value_targets":24.5280704498} +{"eps_id":768633253,"obs":[0.0986513868,0.1505331546,-0.0092526432,-0.3292118609],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.1016620547,-0.0444558598,-0.0158368796,-0.03946108],"action_prob":0.8485616446,"action_logp":-0.1642125547,"action_dist_inputs":[0.8603796959,-0.8629844189],"value_targets":23.7657279968} +{"eps_id":768633253,"obs":[0.1016620547,-0.0444558598,-0.0158368796,-0.03946108],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.1007729322,-0.2393471748,-0.016626101,0.2481833845],"action_prob":0.6026453972,"action_logp":-0.5064263344,"action_dist_inputs":[0.2085861564,-0.2079137266],"value_targets":22.9956855774} +{"eps_id":768633253,"obs":[0.1007729322,-0.2393471748,-0.016626101,0.2481833845],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0959859937,-0.0439917743,-0.0116624339,-0.0496970639],"action_prob":0.7745229602,"action_logp":-0.2555079758,"action_dist_inputs":[-0.6150892973,0.6189396977],"value_targets":22.2178649902} +{"eps_id":768633253,"obs":[0.0959859937,-0.0439917743,-0.0116624339,-0.0496970639],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0951061547,-0.2389445752,-0.0126563748,0.2392835617],"action_prob":0.609852016,"action_logp":-0.4945389926,"action_dist_inputs":[0.2236225009,-0.2230676562],"value_targets":21.4321861267} +{"eps_id":768633253,"obs":[0.0951061547,-0.2389445752,-0.0126563748,0.2392835617],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0903272629,-0.4338834584,-0.0078707039,0.5279476047],"action_prob":0.2289353162,"action_logp":-1.4743157625,"action_dist_inputs":[-0.6052848697,0.6090479493],"value_targets":20.6385707855} +{"eps_id":768633253,"obs":[0.0903272629,-0.4338834584,-0.0078707039,0.5279476047],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.081649594,-0.2386516482,0.0026882482,0.2327950001],"action_prob":0.9152547121,"action_logp":-0.088552855,"action_dist_inputs":[-1.1866818666,1.1928700209],"value_targets":19.8369407654} +{"eps_id":768633253,"obs":[0.081649594,-0.2386516482,0.0026882482,0.2327950001],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0768765658,-0.04356822,0.0073441481,-0.0590387471],"action_prob":0.7874957323,"action_logp":-0.2388972938,"action_dist_inputs":[-0.6531009078,0.6567951441],"value_targets":19.0272140503} +{"eps_id":768633253,"obs":[0.0768765658,-0.04356822,0.0073441481,-0.0590387471],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0760051981,-0.2387946993,0.0061633731,0.2359522283],"action_prob":0.5827748179,"action_logp":-0.5399543643,"action_dist_inputs":[0.1673059314,-0.1668690145],"value_targets":18.2093067169} +{"eps_id":768633253,"obs":[0.0760051981,-0.2387946993,0.0061633731,0.2359522283],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0712293014,-0.0437613428,0.0108824177,-0.0547802225],"action_prob":0.7957285643,"action_logp":-0.2284971476,"action_dist_inputs":[-0.6780616045,0.6817467809],"value_targets":17.3831367493} +{"eps_id":768633253,"obs":[0.0712293014,-0.0437613428,0.0108824177,-0.0547802225],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0703540742,-0.2390376329,0.0097868135,0.2413162291],"action_prob":0.5673294067,"action_logp":-0.5668151975,"action_dist_inputs":[0.1357052922,-0.1352580935],"value_targets":16.5486240387} +{"eps_id":768633253,"obs":[0.0703540742,-0.2390376329,0.0097868135,0.2413162291],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0655733272,-0.4342980087,0.0146131376,0.5370700359],"action_prob":0.1941992939,"action_logp":-1.6388703585,"action_dist_inputs":[-0.709629178,0.713322401],"value_targets":15.7056808472} +{"eps_id":768633253,"obs":[0.0655733272,-0.4342980087,0.0146131376,0.5370700359],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0568873659,-0.239384532,0.0253545381,0.2490271479],"action_prob":0.9243607521,"action_logp":-0.0786528587,"action_dist_inputs":[-1.2484489679,1.2546781301],"value_targets":14.8542232513} +{"eps_id":768633253,"obs":[0.0568873659,-0.239384532,0.0253545381,0.2490271479],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0520996749,-0.0446336679,0.0303350817,-0.0355518088],"action_prob":0.8308820724,"action_logp":-0.1852673888,"action_dist_inputs":[-0.7940798402,0.7978120446],"value_targets":13.9941644669} +{"eps_id":768633253,"obs":[0.0520996749,-0.0446336679,0.0303350817,-0.0355518088],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0512070023,0.1500404328,0.0296240449,-0.3185114563],"action_prob":0.5140689611,"action_logp":-0.6653978825,"action_dist_inputs":[-0.0278531052,0.0284375325],"value_targets":13.125418663} +{"eps_id":768633253,"obs":[0.0512070023,0.1500404328,0.0296240449,-0.3185114563],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0542078093,-0.0454906374,0.0232538171,-0.0166353527],"action_prob":0.8115193844,"action_logp":-0.2088469714,"action_dist_inputs":[0.7286452055,-0.7312679887],"value_targets":12.2478981018} +{"eps_id":768633253,"obs":[0.0542078093,-0.0454906374,0.0232538171,-0.0166353527],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0532979965,-0.2409382164,0.0229211096,0.2832927704],"action_prob":0.4691522717,"action_logp":-0.7568278909,"action_dist_inputs":[-0.0614172332,0.0621304773],"value_targets":11.3615131378} +{"eps_id":768633253,"obs":[0.0532979965,-0.2409382164,0.0229211096,0.2832927704],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0484792329,-0.0461505726,0.0285869651,-0.0020737408],"action_prob":0.852193296,"action_logp":-0.1599419266,"action_dist_inputs":[-0.8739913702,0.8779165745],"value_targets":10.4661741257} +{"eps_id":768633253,"obs":[0.0484792329,-0.0461505726,0.0285869651,-0.0020737408],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0475562215,-0.2416705936,0.0285454895,0.2994898856],"action_prob":0.4297977984,"action_logp":-0.8444404006,"action_dist_inputs":[-0.1409257203,0.1417505443],"value_targets":9.5617923737} +{"eps_id":768633253,"obs":[0.0475562215,-0.2416705936,0.0285454895,0.2994898856],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0427228101,-0.0469669066,0.0345352888,0.0159445759],"action_prob":0.866032958,"action_logp":-0.1438323408,"action_dist_inputs":[-0.9311466813,0.9351822138],"value_targets":8.6482753754} +{"eps_id":768633253,"obs":[0.0427228101,-0.0469669066,0.0345352888,0.0159445759],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0417834707,-0.2425666749,0.0348541774,0.3193206489],"action_prob":0.3841978312,"action_logp":-0.9565976858,"action_dist_inputs":[-0.2353984267,0.2363697141],"value_targets":7.7255306244} +{"eps_id":768633253,"obs":[0.0417834707,-0.2425666749,0.0348541774,0.3193206489],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0369321369,-0.0479580089,0.0412405916,0.0378299803],"action_prob":0.879761219,"action_logp":-0.1281047165,"action_dist_inputs":[-0.9929935336,0.9971776605],"value_targets":6.7934651375} +{"eps_id":768633253,"obs":[0.0369321369,-0.0479580089,0.0412405916,0.0378299803],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0359729789,0.1465490311,0.0419971906,-0.2415612042],"action_prob":0.6662529111,"action_logp":-0.4060859084,"action_dist_inputs":[-0.3450632393,0.3462227583],"value_targets":5.8519849777} +{"eps_id":768633253,"obs":[0.0359729789,0.1465490311,0.0419971906,-0.2415612042],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0389039591,-0.0491468869,0.0371659659,0.0640673786],"action_prob":0.7190217376,"action_logp":-0.3298636973,"action_dist_inputs":[0.468775034,-0.4708391726],"value_targets":4.9009947777} +{"eps_id":768633253,"obs":[0.0389039591,-0.0491468869,0.0371659659,0.0640673786],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0379210189,0.1454230398,0.0384473167,-0.2166616768],"action_prob":0.6968421936,"action_logp":-0.3611962795,"action_dist_inputs":[-0.4154644907,0.4168409109],"value_targets":3.9403989315} +{"eps_id":768633253,"obs":[0.0379210189,0.1454230398,0.0384473167,-0.2166616768],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0408294797,-0.0502268523,0.0341140814,0.0878970027],"action_prob":0.6925555468,"action_logp":-0.3673668504,"action_dist_inputs":[0.405107826,-0.4069859684],"value_targets":2.970099926} +{"eps_id":768633253,"obs":[0.0408294797,-0.0502268523,0.0341140814,0.0878970027],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.039824944,0.1443899274,0.0358720236,-0.1938307136],"action_prob":0.723489821,"action_logp":-0.3236688375,"action_dist_inputs":[-0.4801285565,0.4817101359],"value_targets":1.9900000095} +{"eps_id":768633253,"obs":[0.039824944,0.1443899274,0.0358720236,-0.1938307136],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":true,"new_obs":[0.0427127406,0.3389808536,0.0319954082,-0.4749852121],"action_prob":0.3352512121,"action_logp":-1.092875123,"action_dist_inputs":[0.3414151073,-0.3431140482],"value_targets":1.0} +{"eps_id":894432818,"obs":[-0.0083768768,-0.0138207199,-0.0309758633,-0.0052513955],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":0.0,"dones":false,"new_obs":[-0.0086532915,0.1817314625,-0.0310808923,-0.3075444102],"action_prob":0.4386285841,"action_logp":-0.8241022825,"action_dist_inputs":[0.1231386214,-0.1235911548],"value_targets":86.6020355225} +{"eps_id":894432818,"obs":[-0.0086532915,0.1817314625,-0.0310808923,-0.3075444102],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0050186622,-0.0129341446,-0.0372317806,-0.0248233303],"action_prob":0.858453393,"action_logp":-0.1526229233,"action_dist_inputs":[0.8993439078,-0.9031592011],"value_targets":86.4666976929} +{"eps_id":894432818,"obs":[-0.0050186622,-0.0129341446,-0.0372317806,-0.0248233303],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.005277345,-0.2075029314,-0.0377282463,0.2558839619],"action_prob":0.6098353267,"action_logp":-0.4945663214,"action_dist_inputs":[0.2229846567,-0.2236354202],"value_targets":86.3300018311} +{"eps_id":894432818,"obs":[-0.005277345,-0.2075029314,-0.0377282463,0.2558839619],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0094274031,-0.4020664692,-0.0326105654,0.536432147],"action_prob":0.2363304943,"action_logp":-1.4425240755,"action_dist_inputs":[-0.5851873755,0.5877165198],"value_targets":86.1919174194} +{"eps_id":894432818,"obs":[-0.0094274031,-0.4020664692,-0.0326105654,0.536432147],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0174687337,-0.2065015286,-0.0218819249,0.2336548716],"action_prob":0.9061444998,"action_logp":-0.098556526,"action_dist_inputs":[-1.1311529875,1.1362891197],"value_targets":86.052444458} +{"eps_id":894432818,"obs":[-0.0174687337,-0.2065015286,-0.0218819249,0.2336548716],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0215987638,-0.0110738724,-0.0172088277,-0.0658492073],"action_prob":0.7635598779,"action_logp":-0.2697637081,"action_dist_inputs":[-0.5849666595,0.5873299241],"value_targets":85.9115600586} +{"eps_id":894432818,"obs":[-0.0215987638,-0.0110738724,-0.0172088277,-0.0658492073],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0218202416,-0.2059449255,-0.0185258109,0.2213549465],"action_prob":0.6317048073,"action_logp":-0.4593330324,"action_dist_inputs":[0.2692560852,-0.2702814639],"value_targets":85.7692489624} +{"eps_id":894432818,"obs":[-0.0218202416,-0.2059449255,-0.0185258109,0.2213549465],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0259391386,-0.010563137,-0.0140987122,-0.0771137252],"action_prob":0.7550463676,"action_logp":-0.2809761465,"action_dist_inputs":[-0.5617282987,0.5639817715],"value_targets":85.62550354} +{"eps_id":894432818,"obs":[-0.0259391386,-0.010563137,-0.0140987122,-0.0771137252],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0261504017,0.1847580671,-0.0156409871,-0.3742112815],"action_prob":0.3573666811,"action_logp":-1.0289928913,"action_dist_inputs":[0.2928320467,-0.2939797938],"value_targets":85.4803085327} +{"eps_id":894432818,"obs":[-0.0261504017,0.1847580671,-0.0156409871,-0.3742112815],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0224552415,-0.0101382602,-0.0231252126,-0.0865008235],"action_prob":0.8783249855,"action_logp":-0.1297385991,"action_dist_inputs":[0.9861387014,-0.9905247688],"value_targets":85.3336486816} +{"eps_id":894432818,"obs":[-0.0224552415,-0.0101382602,-0.0231252126,-0.0865008235],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0226580054,-0.2049212307,-0.0248552281,0.1987973005],"action_prob":0.6754407883,"action_logp":-0.3923897743,"action_dist_inputs":[0.36580652,-0.3670911193],"value_targets":85.1855010986} +{"eps_id":894432818,"obs":[-0.0226580054,-0.2049212307,-0.0248552281,0.1987973005],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.02675643,-0.3996790349,-0.0208792835,0.4835370779],"action_prob":0.2835510969,"action_logp":-1.2603629827,"action_dist_inputs":[-0.4624429345,0.4644716978],"value_targets":85.0358581543} +{"eps_id":894432818,"obs":[-0.02675643,-0.3996790349,-0.0208792835,0.4835370779],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0347500108,-0.2042687088,-0.0112085417,0.1843474358],"action_prob":0.895634234,"action_logp":-0.1102231517,"action_dist_inputs":[-1.0724414587,1.0771896839],"value_targets":84.8847045898} +{"eps_id":894432818,"obs":[-0.0347500108,-0.2042687088,-0.0112085417,0.1843474358],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.038835384,-0.008988196,-0.007521593,-0.1118501872],"action_prob":0.7223892212,"action_logp":-0.3251912296,"action_dist_inputs":[-0.4772229493,0.4791209102],"value_targets":84.7320251465} +{"eps_id":894432818,"obs":[-0.038835384,-0.008988196,-0.007521593,-0.1118501872],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0390151478,-0.2040015608,-0.0097585963,0.1784502715],"action_prob":0.6805673838,"action_logp":-0.3848284781,"action_dist_inputs":[0.3774155378,-0.378964901],"value_targets":84.5778045654} +{"eps_id":894432818,"obs":[-0.0390151478,-0.2040015608,-0.0097585963,0.1784502715],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0430951789,-0.0087413341,-0.0061895913,-0.1172951162],"action_prob":0.7173599601,"action_logp":-0.3321775198,"action_dist_inputs":[-0.464792043,0.4666117132],"value_targets":84.4220275879} +{"eps_id":894432818,"obs":[-0.0430951789,-0.0087413341,-0.0061895913,-0.1172951162],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0432700068,-0.2037740499,-0.0085354932,0.1734286398],"action_prob":0.6854942441,"action_logp":-0.3776152134,"action_dist_inputs":[0.3887526393,-0.3903850913],"value_targets":84.2646713257} +{"eps_id":894432818,"obs":[-0.0432700068,-0.2037740499,-0.0085354932,0.1734286398],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0473454893,-0.0085309902,-0.005066921,-0.121934697],"action_prob":0.7130063176,"action_logp":-0.338264972,"action_dist_inputs":[-0.4541420937,0.4558882713],"value_targets":84.1057281494} +{"eps_id":894432818,"obs":[-0.0473454893,-0.0085309902,-0.005066921,-0.121934697],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0475161076,-0.2035799772,-0.0075056148,0.1691453606],"action_prob":0.689627707,"action_logp":-0.3716033697,"action_dist_inputs":[0.398334831,-0.4000445306],"value_targets":83.9451828003} +{"eps_id":894432818,"obs":[-0.0475161076,-0.2035799772,-0.0075056148,0.1691453606],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0515877083,-0.0083514079,-0.0041227075,-0.1258958727],"action_prob":0.7092368603,"action_logp":-0.343565762,"action_dist_inputs":[-0.445001781,0.4466786087],"value_targets":83.7830123901} +{"eps_id":894432818,"obs":[-0.0515877083,-0.0083514079,-0.0041227075,-0.1258958727],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0517547354,-0.2034140527,-0.0066406252,0.1654835343],"action_prob":0.6931109428,"action_logp":-0.3665651977,"action_dist_inputs":[0.4064609408,-0.4082427621],"value_targets":83.6192016602} +{"eps_id":894432818,"obs":[-0.0517547354,-0.2034140527,-0.0066406252,0.1654835343],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0558230169,-0.0081976783,-0.0033309546,-0.1292869002],"action_prob":0.7059667706,"action_logp":-0.348187089,"action_dist_inputs":[-0.4371322691,0.4387432039],"value_targets":83.453742981} +{"eps_id":894432818,"obs":[-0.0558230169,-0.0081976783,-0.0033309546,-0.1292869002],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0559869707,-0.2032717615,-0.0059166928,0.1623432934],"action_prob":0.69606179,"action_logp":-0.3623168766,"action_dist_inputs":[0.4133822322,-0.4152318537],"value_targets":83.286605835} +{"eps_id":894432818,"obs":[-0.0559869707,-0.2032717615,-0.0059166928,0.1623432934],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.060052406,-0.3983085155,-0.0026698268,0.453153789],"action_prob":0.2968809307,"action_logp":-1.2144241333,"action_dist_inputs":[-0.4303236008,0.4318715632],"value_targets":83.1177825928} +{"eps_id":894432818,"obs":[-0.060052406,-0.3983085155,-0.0026698268,0.453153789],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.068018578,-0.2031489015,0.0063932491,0.1596305072],"action_prob":0.893343091,"action_logp":-0.1127845421,"action_dist_inputs":[-1.0604716539,1.0648819208],"value_targets":82.9472579956} +{"eps_id":894432818,"obs":[-0.068018578,-0.2031489015,0.0063932491,0.1596305072],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.072081551,-0.3983618021,0.0095858593,0.4543234706],"action_prob":0.2778360546,"action_logp":-1.2807240486,"action_dist_inputs":[-0.4768620431,0.4783588052],"value_targets":82.7750091553} +{"eps_id":894432818,"obs":[-0.072081551,-0.3983618021,0.0095858593,0.4543234706],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0800487921,-0.5936179757,0.0186723284,0.7500124574],"action_prob":0.10106574,"action_logp":-2.2919840813,"action_dist_inputs":[-1.0905098915,1.0949287415],"value_targets":82.601020813} +{"eps_id":894432818,"obs":[-0.0800487921,-0.5936179757,0.0186723284,0.7500124574],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0919211507,-0.398758471,0.0336725786,0.4632635117],"action_prob":0.9411511421,"action_logp":-0.0606515482,"action_dist_inputs":[-1.3826005459,1.3895306587],"value_targets":82.4252700806} +{"eps_id":894432818,"obs":[-0.0919211507,-0.398758471,0.0336725786,0.4632635117],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0998963192,-0.2041281611,0.0429378487,0.1813814193],"action_prob":0.9096679091,"action_logp":-0.0946756974,"action_dist_inputs":[-1.1525268555,1.1570594311],"value_targets":82.2477493286} +{"eps_id":894432818,"obs":[-0.0998963192,-0.2041281611,0.0429378487,0.1813814193],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1039788797,-0.009646087,0.0465654768,-0.0974529684],"action_prob":0.7988623977,"action_logp":-0.224566564,"action_dist_inputs":[-0.6887844205,0.6904149055],"value_targets":82.0684280396} +{"eps_id":894432818,"obs":[-0.1039788797,-0.009646087,0.0465654768,-0.0974529684],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1041718051,0.1847786158,0.0446164161,-0.3750886023],"action_prob":0.4679659009,"action_logp":-0.7593598366,"action_dist_inputs":[0.0633558184,-0.0649563819],"value_targets":81.8873062134} +{"eps_id":894432818,"obs":[-0.1041718051,0.1847786158,0.0446164161,-0.3750886023],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1004762277,-0.0109477378,0.0371146463,-0.0686783567],"action_prob":0.8341382146,"action_logp":-0.181356132,"action_dist_inputs":[0.8053328395,-0.8099119067],"value_targets":81.7043457031} +{"eps_id":894432818,"obs":[-0.1004762277,-0.0109477378,0.0371146463,-0.0686783567],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1006951854,-0.2065815926,0.0357410796,0.2354795486],"action_prob":0.5037671924,"action_logp":-0.6856410503,"action_dist_inputs":[0.0068381932,-0.0082308883],"value_targets":81.5195465088} +{"eps_id":894432818,"obs":[-0.1006951854,-0.2065815926,0.0357410796,0.2354795486],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1048268154,-0.0119880373,0.0404506698,-0.0457187407],"action_prob":0.8319379091,"action_logp":-0.1839974672,"action_dist_inputs":[-0.7987132668,0.8007110357],"value_targets":81.3328704834} +{"eps_id":894432818,"obs":[-0.1048268154,-0.0119880373,0.0404506698,-0.0457187407],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1050665826,-0.2076659799,0.0395362936,0.2594473362],"action_prob":0.4548321664,"action_logp":-0.7878267765,"action_dist_inputs":[-0.0911873132,0.0899777859],"value_targets":81.144317627} +{"eps_id":894432818,"obs":[-0.1050665826,-0.2076659799,0.0395362936,0.2594473362],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1092199013,-0.0131300893,0.0447252393,-0.0205078349],"action_prob":0.849085331,"action_logp":-0.1635956019,"action_dist_inputs":[-0.8626285791,0.8648162484],"value_targets":80.9538574219} +{"eps_id":894432818,"obs":[-0.1092199013,-0.0131300893,0.0447252393,-0.0205078349],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.109482497,0.1813228726,0.0443150848,-0.2987507582],"action_prob":0.5981373191,"action_logp":-0.5139349103,"action_dist_inputs":[-0.1993548125,0.1983550787],"value_targets":80.76146698} +{"eps_id":894432818,"obs":[-0.109482497,0.1813228726,0.0443150848,-0.2987507582],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1058560461,-0.0144018317,0.0383400694,0.007572439],"action_prob":0.7680384517,"action_logp":-0.263915509,"action_dist_inputs":[0.5965926051,-0.6006756425],"value_targets":80.5671386719} +{"eps_id":894432818,"obs":[-0.1058560461,-0.0144018317,0.0383400694,0.007572439],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1061440781,0.1801498979,0.0384915173,-0.2727714777],"action_prob":0.6293248534,"action_logp":-0.4631077349,"action_dist_inputs":[-0.2650476694,0.2642737925],"value_targets":80.3708496094} +{"eps_id":894432818,"obs":[-0.1061440781,0.1801498979,0.0384915173,-0.2727714777],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1025410816,-0.0154995453,0.0330360867,0.0317990221],"action_prob":0.7480247021,"action_logp":-0.2903192639,"action_dist_inputs":[0.5420983434,-0.5460067391],"value_targets":80.1725769043} +{"eps_id":894432818,"obs":[-0.1025410816,-0.0154995453,0.0330360867,0.0317990221],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1028510705,0.1791334599,0.0336720683,-0.2502803504],"action_prob":0.6551762819,"action_logp":-0.4228509367,"action_dist_inputs":[-0.321221292,0.3206497729],"value_targets":79.9722976685} +{"eps_id":894432818,"obs":[-0.1028510705,0.1791334599,0.0336720683,-0.2502803504],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0992684066,0.3737587929,0.0286664627,-0.5321551561],"action_prob":0.2712681293,"action_logp":-1.3046475649,"action_dist_inputs":[0.4922243357,-0.4959737659],"value_targets":79.7699966431} +{"eps_id":894432818,"obs":[-0.0992684066,0.3737587929,0.0286664627,-0.5321551561],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0917932242,0.1782456189,0.0180233605,-0.2305790782],"action_prob":0.9004954696,"action_logp":-0.1048101336,"action_dist_inputs":[1.098272562,-1.1044690609],"value_targets":79.5656509399} +{"eps_id":894432818,"obs":[-0.0917932242,0.1782456189,0.0180233605,-0.2305790782],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0882283151,-0.0171291847,0.013411778,0.0677341074],"action_prob":0.731849432,"action_logp":-0.3121805191,"action_dist_inputs":[0.500205934,-0.5038199425],"value_targets":79.3592453003} +{"eps_id":894432818,"obs":[-0.0882283151,-0.0171291847,0.013411778,0.0677341074],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0885709003,0.1777979434,0.0147664603,-0.2206873447],"action_prob":0.6686903238,"action_logp":-0.4024342597,"action_dist_inputs":[-0.3512708247,0.3509966135],"value_targets":79.1507568359} +{"eps_id":894432818,"obs":[-0.0885709003,0.1777979434,0.0147664603,-0.2206873447],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0850149393,0.3727057278,0.0103527131,-0.5086759925],"action_prob":0.2749188542,"action_logp":-1.2912793159,"action_dist_inputs":[0.4831372797,-0.4866704047],"value_targets":78.9401550293} +{"eps_id":894432818,"obs":[-0.0850149393,0.3727057278,0.0103527131,-0.5086759925],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0775608271,0.1774394661,0.0001791938,-0.2127486318],"action_prob":0.9019123912,"action_logp":-0.1032378674,"action_dist_inputs":[1.1062861681,-1.1123708487],"value_targets":78.727432251} +{"eps_id":894432818,"obs":[-0.0775608271,0.1774394661,0.0001791938,-0.2127486318],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0740120336,-0.0176850408,-0.0040757791,0.0799908116],"action_prob":0.7411748767,"action_logp":-0.2995186448,"action_dist_inputs":[0.5243044496,-0.527779758],"value_targets":78.5125579834} +{"eps_id":894432818,"obs":[-0.0740120336,-0.0176850408,-0.0040757791,0.0799908116],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0743657351,-0.2127483338,-0.0024759627,0.3713850379],"action_prob":0.3481489122,"action_logp":-1.0551249981,"action_dist_inputs":[-0.3136771619,0.3135088086],"value_targets":78.2955093384} +{"eps_id":894432818,"obs":[-0.0743657351,-0.2127483338,-0.0024759627,0.3713850379],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.078620702,-0.0175912902,0.0049517383,0.0779224411],"action_prob":0.8736497164,"action_logp":-0.1350757778,"action_dist_inputs":[-0.9652984738,0.9683232307],"value_targets":78.0762710571} +{"eps_id":894432818,"obs":[-0.078620702,-0.0175912902,0.0049517383,0.0779224411],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0789725259,0.1774593294,0.0065101869,-0.2131940871],"action_prob":0.6668441296,"action_logp":-0.4051989317,"action_dist_inputs":[-0.3470512629,0.3468947709],"value_targets":77.8548202515} +{"eps_id":894432818,"obs":[-0.0789725259,0.1774593294,0.0065101869,-0.2131940871],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.075423345,0.3724876046,0.0022463051,-0.5038163066],"action_prob":0.2693122029,"action_logp":-1.3118839264,"action_dist_inputs":[0.4973350763,-0.500779748],"value_targets":77.6311340332} +{"eps_id":894432818,"obs":[-0.075423345,0.3724876046,0.0022463051,-0.5038163066],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0679735914,0.1773340553,-0.007830021,-0.2104263306],"action_prob":0.9040729403,"action_logp":-0.1008452401,"action_dist_inputs":[1.1186404228,-1.1246820688],"value_targets":77.4051818848} +{"eps_id":894432818,"obs":[-0.0679735914,0.1773340553,-0.007830021,-0.2104263306],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0644269064,-0.0176750645,-0.0120385475,0.0797763839],"action_prob":0.7522249818,"action_logp":-0.2847198546,"action_dist_inputs":[0.5535446405,-0.556969285],"value_targets":77.1769561768} +{"eps_id":894432818,"obs":[-0.0644269064,-0.0176750645,-0.0120385475,0.0797763839],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0647804141,0.1776173711,-0.0104430206,-0.2166803032],"action_prob":0.6348655224,"action_logp":-0.4543420374,"action_dist_inputs":[-0.2766424417,0.2765052915],"value_targets":76.9464187622} +{"eps_id":894432818,"obs":[-0.0647804141,0.1776173711,-0.0104430206,-0.2166803032],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.061228063,-0.0173537415,-0.0147766257,0.072690241],"action_prob":0.7637321949,"action_logp":-0.2695380449,"action_dist_inputs":[0.5848949552,-0.5883563757],"value_targets":76.7135543823} +{"eps_id":894432818,"obs":[-0.061228063,-0.0173537415,-0.0147766257,0.072690241],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0615751371,0.1779769063,-0.0133228209,-0.2246179134],"action_prob":0.6176288724,"action_logp":-0.4818675518,"action_dist_inputs":[-0.2398421615,0.2396539897],"value_targets":76.4783401489} +{"eps_id":894432818,"obs":[-0.0615751371,0.1779769063,-0.0133228209,-0.2246179134],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0580155998,-0.0169521291,-0.0178151801,0.063832894],"action_prob":0.7768024802,"action_logp":-0.252569139,"action_dist_inputs":[0.6218076944,-0.6253215671],"value_targets":76.2407455444} +{"eps_id":894432818,"obs":[-0.0580155998,-0.0169521291,-0.0178151801,0.063832894],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0583546422,-0.21181418,-0.0165385213,0.3508422077],"action_prob":0.40384987,"action_logp":-0.9067121148,"action_dist_inputs":[-0.1948529929,0.1945964843],"value_targets":76.0007553101} +{"eps_id":894432818,"obs":[-0.0583546422,-0.21181418,-0.0165385213,0.3508422077],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0625909269,-0.0164609849,-0.0095216772,0.0529903807],"action_prob":0.8569309711,"action_logp":-0.15439789,"action_dist_inputs":[-0.8935760856,0.8964541554],"value_targets":75.7583389282} +{"eps_id":894432818,"obs":[-0.0625909269,-0.0164609849,-0.0095216772,0.0529903807],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0629201457,-0.211445123,-0.0084618693,0.3426539898],"action_prob":0.4030371904,"action_logp":-0.9087264538,"action_dist_inputs":[-0.1965776682,0.1962482482],"value_targets":75.5134735107} +{"eps_id":894432818,"obs":[-0.0629201457,-0.211445123,-0.0084618693,0.3426539898],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0671490505,-0.0162038133,-0.0016087898,0.0473147593],"action_prob":0.8584913015,"action_logp":-0.1525787115,"action_dist_inputs":[-0.8999858499,0.9028299451],"value_targets":75.26612854} +{"eps_id":894432818,"obs":[-0.0671490505,-0.0162038133,-0.0016087898,0.0473147593],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0674731284,-0.211302653,-0.0006624946,0.3394896686],"action_prob":0.3945096731,"action_logp":-0.9301116467,"action_dist_inputs":[-0.2143776566,0.2140172422],"value_targets":75.0162963867} +{"eps_id":894432818,"obs":[-0.0674731284,-0.211302653,-0.0006624946,0.3394896686],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0716991797,-0.4064151645,0.0061272988,0.6319636106],"action_prob":0.1378574967,"action_logp":-1.9815347195,"action_dist_inputs":[-0.9151758552,0.9180241227],"value_targets":74.7639312744} +{"eps_id":894432818,"obs":[-0.0716991797,-0.4064151645,0.0061272988,0.6319636106],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.07982748,-0.2113792449,0.0187665708,0.3412166238],"action_prob":0.9283680916,"action_logp":-0.0743269548,"action_dist_inputs":[-1.2780989408,1.2837888002],"value_targets":74.5090255737} +{"eps_id":894432818,"obs":[-0.07982748,-0.2113792449,0.0187665708,0.3412166238],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0840550661,-0.0165292621,0.0255909022,0.0545101501],"action_prob":0.8741149902,"action_logp":-0.1345433444,"action_dist_inputs":[-0.9674552083,0.9703876972],"value_targets":74.2515411377} +{"eps_id":894432818,"obs":[-0.0840550661,-0.0165292621,0.0255909022,0.0545101501],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0843856484,-0.2120086253,0.0266811065,0.3551560938],"action_prob":0.3265020847,"action_logp":-1.1193189621,"action_dist_inputs":[-0.3621441722,0.3619044721],"value_targets":73.9914550781} +{"eps_id":894432818,"obs":[-0.0843856484,-0.2120086253,0.0266811065,0.3551560938],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0886258259,-0.0172759946,0.0337842293,0.071004428],"action_prob":0.8828279972,"action_logp":-0.1246248633,"action_dist_inputs":[-1.0081993341,1.011288166],"value_targets":73.7287445068} +{"eps_id":894432818,"obs":[-0.0886258259,-0.0172759946,0.0337842293,0.071004428],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0889713466,0.1773457229,0.0352043174,-0.2108308971],"action_prob":0.7102212906,"action_logp":-0.3421786427,"action_dist_inputs":[-0.4482612014,0.4481979311],"value_targets":73.4633712769} +{"eps_id":894432818,"obs":[-0.0889713466,0.1773457229,0.0352043174,-0.2108308971],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0854244307,-0.0182614308,0.0309876986,0.0927459672],"action_prob":0.6717178226,"action_logp":-0.3979169726,"action_dist_inputs":[0.3563277423,-0.3596370518],"value_targets":73.1953277588} +{"eps_id":894432818,"obs":[-0.0854244307,-0.0182614308,0.0309876986,0.0927459672],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0857896581,0.176402986,0.0328426175,-0.1900015473],"action_prob":0.7317044139,"action_logp":-0.3123786151,"action_dist_inputs":[-0.5015709996,0.5017166138],"value_targets":72.9245758057} +{"eps_id":894432818,"obs":[-0.0857896581,0.176402986,0.0328426175,-0.1900015473],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0822615996,-0.019173054,0.0290425867,0.1128583029],"action_prob":0.6446421742,"action_logp":-0.4390598834,"action_dist_inputs":[0.2962168753,-0.2993532419],"value_targets":72.6510848999} +{"eps_id":894432818,"obs":[-0.0822615996,-0.019173054,0.0290425867,0.1128583029],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0826450586,-0.214698866,0.031299755,0.4145606458],"action_prob":0.2491770834,"action_logp":-1.3895914555,"action_dist_inputs":[-0.5513282418,0.5516777635],"value_targets":72.3748321533} +{"eps_id":894432818,"obs":[-0.0826450586,-0.214698866,0.031299755,0.4145606458],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0869390368,-0.0200342014,0.039590966,0.1319073141],"action_prob":0.9002596736,"action_logp":-0.1050720215,"action_dist_inputs":[-1.0982187986,1.1018941402],"value_targets":72.0957946777} +{"eps_id":894432818,"obs":[-0.0869390368,-0.0200342014,0.039590966,0.1319073141],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0873397216,0.1744989157,0.0422291122,-0.1480270177],"action_prob":0.7823879123,"action_logp":-0.2454046458,"action_dist_inputs":[-0.6395238638,0.6401126981],"value_targets":71.8139266968} +{"eps_id":894432818,"obs":[-0.0873397216,0.1744989157,0.0422291122,-0.1480270177],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.083849743,0.3689914942,0.0392685719,-0.4270941317],"action_prob":0.4473947287,"action_logp":-0.8043140173,"action_dist_inputs":[0.1042321771,-0.1069704741],"value_targets":71.5292205811} +{"eps_id":894432818,"obs":[-0.083849743,0.3689914942,0.0392685719,-0.4270941317],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0764699131,0.1733360142,0.0307266898,-0.1222951487],"action_prob":0.8435198069,"action_logp":-0.1701718867,"action_dist_inputs":[0.8396217227,-0.8450320363],"value_targets":71.2416381836} +{"eps_id":894432818,"obs":[-0.0764699131,0.1733360142,0.0307266898,-0.1222951487],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0730031952,0.3680045903,0.0282807872,-0.4051279426],"action_prob":0.4645078182,"action_logp":-0.7667769194,"action_dist_inputs":[0.069851324,-0.0723565742],"value_targets":70.9511489868} +{"eps_id":894432818,"obs":[-0.0730031952,0.3680045903,0.0282807872,-0.4051279426],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0656431019,0.1724932194,0.0201782286,-0.1036647335],"action_prob":0.8395406008,"action_logp":-0.1749004722,"action_dist_inputs":[0.8247739077,-0.8300395608],"value_targets":70.6577301025} +{"eps_id":894432818,"obs":[-0.0656431019,0.1724932194,0.0201782286,-0.1036647335],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0621932372,0.3673202693,0.0181049332,-0.3899138272],"action_prob":0.4715528488,"action_logp":-0.751724124,"action_dist_inputs":[0.0557947904,-0.0581169166],"value_targets":70.3613433838} +{"eps_id":894432818,"obs":[-0.0621932372,0.3673202693,0.0181049332,-0.3899138272],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0548468307,0.1719461083,0.0103066564,-0.0915780291],"action_prob":0.8393662572,"action_logp":-0.1751081198,"action_dist_inputs":[0.8241852522,-0.8293352723],"value_targets":70.061958313} +{"eps_id":894432818,"obs":[-0.0548468307,0.1719461083,0.0103066564,-0.0915780291],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0514079072,0.3669188321,0.008475096,-0.3809914589],"action_prob":0.4687102139,"action_logp":-0.7577705979,"action_dist_inputs":[0.0615666024,-0.0637562051],"value_targets":69.7595596313} +{"eps_id":894432818,"obs":[-0.0514079072,0.3669188321,0.008475096,-0.3809914589],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0440695323,0.1716775596,0.0008552665,-0.0856484473],"action_prob":0.8430016041,"action_logp":-0.1707863957,"action_dist_inputs":[0.8378344774,-0.8428990841],"value_targets":69.4540939331} +{"eps_id":894432818,"obs":[-0.0440695323,0.1716775596,0.0008552665,-0.0856484473],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0406359807,-0.0234566443,-0.0008577024,0.2073041946],"action_prob":0.54403615,"action_logp":-0.6087395549,"action_dist_inputs":[0.0872473419,-0.0893549025],"value_targets":69.1455535889} +{"eps_id":894432818,"obs":[-0.0406359807,-0.0234566443,-0.0008577024,0.2073041946],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0411051139,0.1716775596,0.0032883815,-0.08564917],"action_prob":0.8002766371,"action_logp":-0.2227977812,"action_dist_inputs":[-0.6933157444,0.6947084665],"value_targets":68.8338928223} +{"eps_id":894432818,"obs":[-0.0411051139,0.1716775596,0.0032883815,-0.08564917],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0376715623,-0.0234913714,0.0015753983,0.2080694288],"action_prob":0.5384407043,"action_logp":-0.6190778613,"action_dist_inputs":[0.0760056078,-0.0780613646],"value_targets":68.5190811157} +{"eps_id":894432818,"obs":[-0.0376715623,-0.0234913714,0.0015753983,0.2080694288],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0381413884,0.1716080159,0.0057367869,-0.0841161236],"action_prob":0.8036526442,"action_logp":-0.2185881734,"action_dist_inputs":[-0.7039120197,0.7053694129],"value_targets":68.2010955811} +{"eps_id":894432818,"obs":[-0.0381413884,0.1716080159,0.0057367869,-0.0841161236],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0347092301,0.3666472733,0.0040544644,-0.3749835789],"action_prob":0.46991539,"action_logp":-0.755202651,"action_dist_inputs":[0.059247043,-0.061236944],"value_targets":67.8798904419} +{"eps_id":894432818,"obs":[-0.0347092301,0.3666472733,0.0040544644,-0.3749835789],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0273762848,0.1714679599,-0.0034452071,-0.0810250118],"action_prob":0.8434555531,"action_logp":-0.1702481061,"action_dist_inputs":[0.839630425,-0.8445364833],"value_targets":67.5554504395} +{"eps_id":894432818,"obs":[-0.0273762848,0.1714679599,-0.0034452071,-0.0810250118],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0239469241,-0.023604434,-0.0050657075,0.2105689496],"action_prob":0.5472761393,"action_logp":-0.6028017998,"action_dist_inputs":[0.0938702673,-0.095800899],"value_targets":67.227722168} +{"eps_id":894432818,"obs":[-0.0239469241,-0.023604434,-0.0050657075,0.2105689496],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0244190134,0.1715895832,-0.0008543282,-0.0837076232],"action_prob":0.7991623282,"action_logp":-0.2241911888,"action_dist_inputs":[-0.6897546649,0.6913125515],"value_targets":66.8966903687} +{"eps_id":894432818,"obs":[-0.0244190134,0.1715895832,-0.0008543282,-0.0837076232],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.020987222,0.3667237759,-0.0025284807,-0.3766599596],"action_prob":0.4539544284,"action_logp":-0.7897584438,"action_dist_inputs":[0.0914021134,-0.0933035314],"value_targets":66.5623168945} +{"eps_id":894432818,"obs":[-0.020987222,0.3667237759,-0.0025284807,-0.3766599596],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0136527466,0.1716378182,-0.0100616803,-0.084775351],"action_prob":0.8508626223,"action_logp":-0.1615045667,"action_dist_inputs":[0.8682742715,-0.8731088042],"value_targets":66.2245635986} +{"eps_id":894432818,"obs":[-0.0136527466,0.1716378182,-0.0100616803,-0.084775351],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0102199903,-0.0233384687,-0.0117571866,0.2047161907],"action_prob":0.570622921,"action_logp":-0.5610266328,"action_dist_inputs":[0.1412576735,-0.143135488],"value_targets":65.883392334} +{"eps_id":894432818,"obs":[-0.0102199903,-0.0233384687,-0.0117571866,0.2047161907],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0106867598,-0.218290329,-0.0076628635,0.4936672449],"action_prob":0.2127295732,"action_logp":-1.5477335453,"action_dist_inputs":[-0.653485477,0.655064702],"value_targets":65.5387802124} +{"eps_id":894432818,"obs":[-0.0106867598,-0.218290329,-0.0076628635,0.4936672449],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0150525663,-0.023061147,0.0022104816,0.1985792071],"action_prob":0.9080189466,"action_logp":-0.0964900032,"action_dist_inputs":[-1.1425491571,1.147133112],"value_targets":65.1906890869} +{"eps_id":894432818,"obs":[-0.0150525663,-0.023061147,0.0022104816,0.1985792071],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0155137889,0.1720291227,0.0061820657,-0.0934056044],"action_prob":0.7983380556,"action_logp":-0.2252231687,"action_dist_inputs":[-0.6871629357,0.688776195],"value_targets":64.8390808105} +{"eps_id":894432818,"obs":[-0.0155137889,0.1720291227,0.0061820657,-0.0934056044],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0120732067,-0.0231808927,0.0043139532,0.201221332],"action_prob":0.5470330715,"action_logp":-0.6032459736,"action_dist_inputs":[0.0934279561,-0.0952623188],"value_targets":64.4839172363} +{"eps_id":894432818,"obs":[-0.0120732067,-0.0231808927,0.0043139532,0.201221332],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0125368247,0.1718790978,0.0083383797,-0.0900976285],"action_prob":0.8028894067,"action_logp":-0.2195382714,"action_dist_inputs":[-0.7013803124,0.7030720115],"value_targets":64.1251678467} +{"eps_id":894432818,"obs":[-0.0125368247,0.1718790978,0.0083383797,-0.0900976285],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0090992423,0.3668805361,0.0065364274,-0.3801381588],"action_prob":0.463791281,"action_logp":-0.7683206797,"action_dist_inputs":[0.0716668665,-0.073422052],"value_targets":63.7627983093} +{"eps_id":894432818,"obs":[-0.0090992423,0.3668805361,0.0065364274,-0.3801381588],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0017616319,0.1716663837,-0.0010663358,-0.0854014903],"action_prob":0.8446326256,"action_logp":-0.1688535362,"action_dist_inputs":[0.8442137837,-0.8488954902],"value_targets":63.3967666626} +{"eps_id":894432818,"obs":[-0.0017616319,0.1716663837,-0.0010663358,-0.0854014903],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0016716958,0.3668036163,-0.0027743657,-0.378420651],"action_prob":0.4488331974,"action_logp":-0.8011039495,"action_dist_inputs":[0.1018514931,-0.1035345495],"value_targets":63.0270347595} +{"eps_id":894432818,"obs":[0.0016716958,0.3668036163,-0.0027743657,-0.378420651],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0090077678,0.1717211604,-0.0103427786,-0.0866137892],"action_prob":0.8520798087,"action_logp":-0.1600750983,"action_dist_inputs":[0.8731850386,-0.8778219819],"value_targets":62.6535720825} +{"eps_id":894432818,"obs":[0.0090077678,0.1717211604,-0.0103427786,-0.0866137892],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0124421911,-0.0232510176,-0.0120750545,0.2027881145],"action_prob":0.5761538148,"action_logp":-0.5513806343,"action_dist_inputs":[0.1526715457,-0.1543324143],"value_targets":62.2763366699} +{"eps_id":894432818,"obs":[0.0124421911,-0.0232510176,-0.0120750545,0.2027881145],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0119771706,0.1720415205,-0.0080192927,-0.0936793163],"action_prob":0.7859928608,"action_logp":-0.2408075482,"action_dist_inputs":[-0.6495711207,0.6513672471],"value_targets":61.8952865601} +{"eps_id":894432818,"obs":[0.0119771706,0.1720415205,-0.0080192927,-0.0936793163],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0154180015,0.367277503,-0.0098928791,-0.3888815045],"action_prob":0.4167806208,"action_logp":-0.8751952648,"action_dist_inputs":[0.1671657562,-0.1688376963],"value_targets":61.5103912354} +{"eps_id":894432818,"obs":[0.0154180015,0.367277503,-0.0098928791,-0.3888815045],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0227635503,0.562538445,-0.0176705085,-0.6846670508],"action_prob":0.1361903846,"action_logp":-1.993701458,"action_dist_inputs":[0.921335876,-0.9259627461],"value_targets":61.1216087341} +{"eps_id":894432818,"obs":[0.0227635503,0.562538445,-0.0176705085,-0.6846670508],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0340143219,0.3676662445,-0.0313638486,-0.3975992501],"action_prob":0.9340635538,"action_logp":-0.0682108104,"action_dist_inputs":[1.3219323158,-1.3289210796],"value_targets":60.7288970947} +{"eps_id":894432818,"obs":[0.0340143219,0.3676662445,-0.0313638486,-0.3975992501],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0413676463,0.1730029732,-0.0393158346,-0.1149673983],"action_prob":0.8822391033,"action_logp":-0.125292182,"action_dist_inputs":[1.0045624971,-1.0092445612],"value_targets":60.3322181702} +{"eps_id":894432818,"obs":[0.0413676463,0.1730029732,-0.0393158346,-0.1149673983],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0448277034,0.3686655462,-0.0416151844,-0.4197903574],"action_prob":0.3122959137,"action_logp":-1.1638040543,"action_dist_inputs":[0.3938041329,-0.3956033885],"value_targets":59.9315338135} +{"eps_id":894432818,"obs":[0.0448277034,0.3686655462,-0.0416151844,-0.4197903574],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.052201014,0.1741572171,-0.0500109904,-0.1405117959],"action_prob":0.8954123855,"action_logp":-0.1104709283,"action_dist_inputs":[1.0712165833,-1.0760421753],"value_targets":59.526802063} +{"eps_id":894432818,"obs":[0.052201014,0.1741572171,-0.0500109904,-0.1405117959],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0556841604,-0.0202141143,-0.0528212264,0.1359835714],"action_prob":0.7413173914,"action_logp":-0.2993264496,"action_dist_inputs":[0.5254219174,-0.5274048448],"value_targets":59.117980957} +{"eps_id":894432818,"obs":[0.0556841604,-0.0202141143,-0.0528212264,0.1359835714],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.055279877,-0.2145412564,-0.0501015559,0.4115455151],"action_prob":0.3688333333,"action_logp":-0.9974104166,"action_dist_inputs":[-0.2679774463,0.2692477107],"value_targets":58.7050323486} +{"eps_id":894432818,"obs":[0.055279877,-0.2145412564,-0.0501015559,0.4115455151],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0509890541,-0.4089184701,-0.0418706425,0.6880217791],"action_prob":0.1327358931,"action_logp":-2.0193939209,"action_dist_inputs":[-0.93641752,0.9405646324],"value_targets":58.2879104614} +{"eps_id":894432818,"obs":[0.0509890541,-0.4089184701,-0.0418706425,0.6880217791],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0428106822,-0.2132411599,-0.028110208,0.3824567199],"action_prob":0.9311198592,"action_logp":-0.0713672414,"action_dist_inputs":[-1.2987574339,1.305262208],"value_targets":57.8665771484} +{"eps_id":894432818,"obs":[0.0428106822,-0.2132411599,-0.028110208,0.3824567199],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0385458581,-0.0177315976,-0.0204610731,0.0810449049],"action_prob":0.8687183261,"action_logp":-0.1407363713,"action_dist_inputs":[-0.9428309202,0.9468429685],"value_targets":57.4409866333} +{"eps_id":894432818,"obs":[0.0385458581,-0.0177315976,-0.0204610731,0.0810449049],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0381912291,0.1776776016,-0.0188401751,-0.2180226743],"action_prob":0.6140655875,"action_logp":-0.4876534939,"action_dist_inputs":[-0.2317734808,0.2326608747],"value_targets":57.0110969543} +{"eps_id":894432818,"obs":[0.0381912291,0.1776776016,-0.0188401751,-0.2180226743],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0417447798,-0.0171700362,-0.0232006293,0.0686582252],"action_prob":0.7790890932,"action_logp":-0.249629885,"action_dist_inputs":[0.6289336085,-0.6314322948],"value_targets":56.5768661499} +{"eps_id":894432818,"obs":[0.0417447798,-0.0171700362,-0.0232006293,0.0686582252],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0414013788,0.1782767177,-0.0218274649,-0.2312533855],"action_prob":0.5864146352,"action_logp":-0.5337281227,"action_dist_inputs":[-0.1741869301,0.1749763191],"value_targets":56.1382484436} +{"eps_id":894432818,"obs":[0.0414013788,0.1782767177,-0.0218274649,-0.2312533855],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0449669138,-0.0165266376,-0.026452532,0.0544652827],"action_prob":0.7953666449,"action_logp":-0.22895208,"action_dist_inputs":[0.6774926782,-0.6800906658],"value_targets":55.6952018738} +{"eps_id":894432818,"obs":[0.0449669138,-0.0165266376,-0.026452532,0.0544652827],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0446363799,0.1789644212,-0.0253632274,-0.2464448512],"action_prob":0.5534306169,"action_logp":-0.5916188955,"action_dist_inputs":[-0.1069346815,0.1076068357],"value_targets":55.2476768494} +{"eps_id":894432818,"obs":[0.0446363799,0.1789644212,-0.0253632274,-0.2464448512],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0482156686,-0.0157862771,-0.0302921236,0.0381312482],"action_prob":0.8121368885,"action_logp":-0.2080864012,"action_dist_inputs":[0.7306178212,-0.7333375812],"value_targets":54.7956352234} +{"eps_id":894432818,"obs":[0.0482156686,-0.0157862771,-0.0302921236,0.0381312482],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0478999428,-0.2104610354,-0.0295294989,0.3211048841],"action_prob":0.4855479002,"action_logp":-0.7224773169,"action_dist_inputs":[-0.0286465231,0.0291780122],"value_targets":54.3390235901} +{"eps_id":894432818,"obs":[0.0478999428,-0.2104610354,-0.0295294989,0.3211048841],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0436907224,-0.0149312783,-0.0231074002,0.0192577131],"action_prob":0.8322403431,"action_logp":-0.1836340278,"action_dist_inputs":[-0.7989945412,0.8025944233],"value_targets":53.8778038025} +{"eps_id":894432818,"obs":[0.0436907224,-0.0149312783,-0.0231074002,0.0192577131],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0433920957,-0.2097143531,-0.0227222461,0.3045613468],"action_prob":0.5018885732,"action_logp":-0.6893771291,"action_dist_inputs":[0.0039663799,-0.0035880487],"value_targets":53.4119224548} +{"eps_id":894432818,"obs":[0.0433920957,-0.2097143531,-0.0227222461,0.3045613468],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.03919781,-0.0142760817,-0.0166310202,0.0047998573],"action_prob":0.8272840977,"action_logp":-0.1896070987,"action_dist_inputs":[-0.7815043926,0.7849956751],"value_targets":52.9413375854} +{"eps_id":894432818,"obs":[0.03919781,-0.0142760817,-0.0166310202,0.0047998573],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0389122888,0.181080386,-0.0165350232,-0.2930836082],"action_prob":0.4880415201,"action_logp":-0.7173547745,"action_dist_inputs":[0.0240503959,-0.0237924941],"value_targets":52.4659957886} +{"eps_id":894432818,"obs":[0.0389122888,0.181080386,-0.0165350232,-0.2930836082],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0425338969,-0.0138019603,-0.0223966949,-0.0056611304],"action_prob":0.8362228274,"action_logp":-0.1788601279,"action_dist_inputs":[0.8136523962,-0.8167360425],"value_targets":51.9858551025} +{"eps_id":894432818,"obs":[0.0425338969,-0.0138019603,-0.0223966949,-0.0056611304],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0422578566,-0.2085956633,-0.0225099176,0.2798720896],"action_prob":0.5449020863,"action_logp":-0.6071491838,"action_dist_inputs":[0.0901224166,-0.0899711326],"value_targets":51.5008621216} +{"eps_id":894432818,"obs":[0.0422578566,-0.2085956633,-0.0225099176,0.2798720896],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.038085945,-0.4033893943,-0.0169124752,0.5653712749],"action_prob":0.1923271567,"action_logp":-1.6485574245,"action_dist_inputs":[-0.7158276439,0.7191314697],"value_targets":51.0109710693} +{"eps_id":894432818,"obs":[0.038085945,-0.4033893943,-0.0169124752,0.5653712749],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0300181545,-0.2080343068,-0.0056050499,0.2674085498],"action_prob":0.919901669,"action_logp":-0.0834885165,"action_dist_inputs":[-1.2175811529,1.2234303951],"value_targets":50.5161323547} +{"eps_id":894432818,"obs":[0.0300181545,-0.2080343068,-0.0056050499,0.2674085498],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0258574691,-0.4030758142,-0.0002568791,0.5583183169],"action_prob":0.1827322543,"action_logp":-1.6997332573,"action_dist_inputs":[-0.7473601103,0.7505846024],"value_targets":50.0162963867} +{"eps_id":894432818,"obs":[0.0258574691,-0.4030758142,-0.0002568791,0.5583183169],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.017795952,-0.2079502642,0.0109094875,0.2655544877],"action_prob":0.9230909944,"action_logp":-0.0800274462,"action_dist_inputs":[-1.2396245003,1.2454807758],"value_targets":49.5114097595} +{"eps_id":894432818,"obs":[0.017795952,-0.2079502642,0.0109094875,0.2655544877],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0136369476,-0.0129857082,0.0162205771,-0.0236676261],"action_prob":0.8334963918,"action_logp":-0.1821258813,"action_dist_inputs":[-0.803694427,0.806918323],"value_targets":49.0014266968} +{"eps_id":894432818,"obs":[0.0136369476,-0.0129857082,0.0162205771,-0.0236676261],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0133772334,-0.2083364725,0.0157472249,0.2740886211],"action_prob":0.4815442562,"action_logp":-0.7307571173,"action_dist_inputs":[-0.0369214341,0.0369351059],"value_targets":48.486289978} +{"eps_id":894432818,"obs":[0.0133772334,-0.2083364725,0.0157472249,0.2740886211],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0092105037,-0.0134427072,0.021228997,-0.0135862958],"action_prob":0.8437934518,"action_logp":-0.169847548,"action_dist_inputs":[-0.8417208791,0.8450074792],"value_targets":47.9659461975} +{"eps_id":894432818,"obs":[0.0092105037,-0.0134427072,0.021228997,-0.0135862958],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0089416495,-0.2088625729,0.0209572706,0.2857183218],"action_prob":0.4508500397,"action_logp":-0.7966204882,"action_dist_inputs":[-0.0985691696,0.098667562],"value_targets":47.4403495789} +{"eps_id":894432818,"obs":[0.0089416495,-0.2088625729,0.0209572706,0.2857183218],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0047643981,-0.0140456846,0.0266716368,-0.000281865],"action_prob":0.8551418185,"action_logp":-0.1564879268,"action_dist_inputs":[-0.886066854,0.8894457817],"value_targets":46.9094467163} +{"eps_id":894432818,"obs":[0.0047643981,-0.0140456846,0.0266716368,-0.000281865],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0044834842,0.1806838065,0.0266660005,-0.2844316661],"action_prob":0.5859524608,"action_logp":-0.5345166326,"action_dist_inputs":[-0.173521176,0.1737367064],"value_targets":46.3731765747} +{"eps_id":894432818,"obs":[0.0044834842,0.1806838065,0.0266660005,-0.2844316661],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0080971606,-0.0148081221,0.0209773667,0.0165408347],"action_prob":0.7818812728,"action_logp":-0.2460523695,"action_dist_inputs":[0.6368384361,-0.6398251057],"value_targets":45.8314933777} +{"eps_id":894432818,"obs":[0.0080971606,-0.0148081221,0.0209773667,0.0165408347],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0078009982,0.1800068021,0.0213081837,-0.2694503665],"action_prob":0.6015155911,"action_logp":-0.5083028078,"action_dist_inputs":[-0.2057165205,0.2060675323],"value_targets":45.2843360901} +{"eps_id":894432818,"obs":[0.0078009982,0.1800068021,0.0213081837,-0.2694503665],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0114011345,0.3748182952,0.015919175,-0.555337131],"action_prob":0.2254757881,"action_logp":-1.4895424843,"action_dist_inputs":[0.6155769229,-0.6184592247],"value_targets":44.7316513062} +{"eps_id":894432818,"obs":[0.0114011345,0.3748182952,0.015919175,-0.555337131],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0188974999,0.1794764847,0.0048124329,-0.2576815188],"action_prob":0.9084750414,"action_logp":-0.0959878415,"action_dist_inputs":[1.1448071003,-1.150349021],"value_targets":44.1733856201} +{"eps_id":894432818,"obs":[0.0188974999,0.1794764847,0.0048124329,-0.2576815188],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0224870294,0.3745294213,-0.0003411972,-0.5488426685],"action_prob":0.2143717855,"action_logp":-1.5400434732,"action_dist_inputs":[0.6479787827,-0.6507931352],"value_targets":43.6094818115} +{"eps_id":894432818,"obs":[0.0224870294,0.3745294213,-0.0003411972,-0.5488426685],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0299776178,0.5696561337,-0.0113180503,-0.8416330814],"action_prob":0.0878017396,"action_logp":-2.4326739311,"action_dist_inputs":[1.1676089764,-1.1731668711],"value_targets":43.0398788452} +{"eps_id":894432818,"obs":[0.0299776178,0.5696561337,-0.0113180503,-0.8416330814],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.041370742,0.3746905029,-0.0281507112,-0.5525307655],"action_prob":0.9434645176,"action_logp":-0.0581965372,"action_dist_inputs":[1.4034210443,-1.4112682343],"value_targets":42.4645233154} +{"eps_id":894432818,"obs":[0.041370742,0.3746905029,-0.0281507112,-0.5525307655],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0488645509,0.1799749732,-0.0392013267,-0.2688483894],"action_prob":0.91991961,"action_logp":-0.083468996,"action_dist_inputs":[1.217777729,-1.2234773636],"value_targets":41.8833580017} +{"eps_id":894432818,"obs":[0.0488645509,0.1799749732,-0.0392013267,-0.2688483894],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0524640493,-0.014566252,-0.0445782952,0.0112170083],"action_prob":0.8409936428,"action_logp":-0.1731711775,"action_dist_inputs":[0.8313243389,-0.8343157172],"value_targets":41.2963218689} +{"eps_id":894432818,"obs":[0.0524640493,-0.014566252,-0.0445782952,0.0112170083],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0521727242,0.181165725,-0.0443539545,-0.2951909602],"action_prob":0.4316240549,"action_logp":-0.840200305,"action_dist_inputs":[0.1377222836,-0.1375059485],"value_targets":40.7033538818} +{"eps_id":894432818,"obs":[0.0521727242,0.181165725,-0.0443539545,-0.2951909602],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0557960384,-0.0132967569,-0.0502577722,-0.0168198999],"action_prob":0.8589766026,"action_logp":-0.1520136297,"action_dist_inputs":[0.9017906189,-0.9050249457],"value_targets":40.1044006348} +{"eps_id":894432818,"obs":[0.0557960384,-0.0132967569,-0.0502577722,-0.0168198999],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0555301048,0.1825085729,-0.0505941734,-0.3249266148],"action_prob":0.3718928993,"action_logp":-0.989149332,"action_dist_inputs":[0.2620242834,-0.2620803118],"value_targets":39.4993934631} +{"eps_id":894432818,"obs":[0.0555301048,0.1825085729,-0.0505941734,-0.3249266148],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0591802746,-0.0118578505,-0.0570927039,-0.0486183837],"action_prob":0.8752111197,"action_logp":-0.1332901418,"action_dist_inputs":[0.9721587896,-0.9756829143],"value_targets":38.8882751465} +{"eps_id":894432818,"obs":[0.0591802746,-0.0118578505,-0.0570927039,-0.0486183837],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0589431189,0.1840343177,-0.0580650717,-0.358754009],"action_prob":0.3113778234,"action_logp":-1.1667482853,"action_dist_inputs":[0.396648854,-0.3970369697],"value_targets":38.2709846497} +{"eps_id":894432818,"obs":[0.0589431189,0.1840343177,-0.0580650717,-0.358754009],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0626238063,-0.0102161895,-0.0652401522,-0.0849308446],"action_prob":0.8894412518,"action_logp":-0.117161788,"action_dist_inputs":[1.0405887365,-1.0444574356],"value_targets":37.6474609375} +{"eps_id":894432818,"obs":[0.0626238063,-0.0102161895,-0.0652401522,-0.0849308446],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0624194816,0.1857773364,-0.0669387653,-0.3974626958],"action_prob":0.2541980445,"action_logp":-1.3696416616,"action_dist_inputs":[0.5377750993,-0.5385714173],"value_targets":37.0176353455} +{"eps_id":894432818,"obs":[0.0624194816,0.1857773364,-0.0669387653,-0.3974626958],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0661350265,-0.0083342418,-0.0748880208,-0.1266127378],"action_prob":0.9015174508,"action_logp":-0.1036758944,"action_dist_inputs":[1.1049625874,-1.1092374325],"value_targets":36.3814506531} +{"eps_id":894432818,"obs":[0.0661350265,-0.0083342418,-0.0748880208,-0.1266127378],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0659683421,0.1877762079,-0.0774202794,-0.4419504106],"action_prob":0.2041918486,"action_logp":-1.5886952877,"action_dist_inputs":[0.6795012355,-0.6807968616],"value_targets":35.7388381958} +{"eps_id":894432818,"obs":[0.0659683421,0.1877762079,-0.0774202794,-0.4419504106],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0697238669,-0.0061697029,-0.0862592831,-0.1746418178],"action_prob":0.9114156961,"action_logp":-0.0927561894,"action_dist_inputs":[1.1631484032,-1.1678961515],"value_targets":35.0897369385} +{"eps_id":894432818,"obs":[0.0697238669,-0.0061697029,-0.0862592831,-0.1746418178],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0696004704,-0.1999580115,-0.0897521228,0.0896310955],"action_prob":0.8362936974,"action_logp":-0.1787753999,"action_dist_inputs":[0.8145058155,-0.8164004087],"value_targets":34.4340782166} +{"eps_id":894432818,"obs":[0.0696004704,-0.1999580115,-0.0897521228,0.0896310955],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0656013116,-0.3936865032,-0.0879594982,0.3527024984],"action_prob":0.6091558337,"action_logp":-0.4956811965,"action_dist_inputs":[0.2226011455,-0.2211638987],"value_targets":33.7717971802} +{"eps_id":894432818,"obs":[0.0656013116,-0.3936865032,-0.0879594982,0.3527024984],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0577275828,-0.5874547362,-0.0809054524,0.6164044142],"action_prob":0.2701930702,"action_logp":-1.3086185455,"action_dist_inputs":[-0.4946168065,0.4990264773],"value_targets":33.1028251648} +{"eps_id":894432818,"obs":[0.0577275828,-0.5874547362,-0.0809054524,0.6164044142],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0459784903,-0.3913012445,-0.0685773641,0.2993763685],"action_prob":0.8923884034,"action_logp":-0.1138538197,"action_dist_inputs":[-1.0544852018,1.0608875751],"value_targets":32.4270935059} +{"eps_id":894432818,"obs":[0.0459784903,-0.3913012445,-0.0685773641,0.2993763685],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0381524637,-0.5853821039,-0.0625898317,0.5696673989],"action_prob":0.30440256,"action_logp":-1.1894042492,"action_dist_inputs":[-0.4112009108,0.4152191579],"value_targets":31.7445411682} +{"eps_id":894432818,"obs":[0.0381524637,-0.5853821039,-0.0625898317,0.5696673989],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0264448225,-0.7795729637,-0.0511964858,0.841993928],"action_prob":0.115512535,"action_logp":-2.1583762169,"action_dist_inputs":[-1.0147241354,1.0209050179],"value_targets":31.0550918579} +{"eps_id":894432818,"obs":[0.0264448225,-0.7795729637,-0.0511964858,0.841993928],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0108533623,-0.5837909579,-0.034356609,0.533660531],"action_prob":0.9385238886,"action_logp":-0.0634469911,"action_dist_inputs":[-1.3589293957,1.3667303324],"value_targets":30.3586788177} +{"eps_id":894432818,"obs":[0.0108533623,-0.5837909579,-0.034356609,0.533660531],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0008224571,-0.3882031143,-0.0236833971,0.2303530425],"action_prob":0.8872220516,"action_logp":-0.1196600124,"action_dist_inputs":[-1.0283466578,1.034327507],"value_targets":29.6552295685} +{"eps_id":894432818,"obs":[-0.0008224571,-0.3882031143,-0.0236833971,0.2303530425],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0085865194,-0.1927508861,-0.0190763362,-0.0697053522],"action_prob":0.6883157492,"action_logp":-0.3735076189,"action_dist_inputs":[-0.3944296837,0.3978273273],"value_targets":28.9446773529} +{"eps_id":894432818,"obs":[-0.0085865194,-0.1927508861,-0.0190763362,-0.0697053522],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0124415373,-0.387594223,-0.0204704441,0.2168983668],"action_prob":0.6943265796,"action_logp":-0.364812851,"action_dist_inputs":[0.4100174606,-0.4104076326],"value_targets":28.2269458771} +{"eps_id":894432818,"obs":[-0.0124415373,-0.387594223,-0.0204704441,0.2168983668],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0201934222,-0.1921856999,-0.0161324758,-0.0821708813],"action_prob":0.6759466529,"action_logp":-0.39164114,"action_dist_inputs":[-0.3660031557,0.3692028224],"value_targets":27.5019664764} +{"eps_id":894432818,"obs":[-0.0201934222,-0.1921856999,-0.0161324758,-0.0821708813],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0240371358,-0.387072742,-0.0177758932,0.2053788751],"action_prob":0.7041810751,"action_logp":-0.3507197201,"action_dist_inputs":[0.4333323538,-0.4339556694],"value_targets":26.7696628571} +{"eps_id":894432818,"obs":[-0.0240371358,-0.387072742,-0.0177758932,0.2053788751],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0317785889,-0.191701144,-0.0136683164,-0.0928580388],"action_prob":0.6651245356,"action_logp":-0.4077809751,"action_dist_inputs":[-0.3416029513,0.3446127474],"value_targets":26.0299625397} +{"eps_id":894432818,"obs":[-0.0317785889,-0.191701144,-0.0136683164,-0.0928580388],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.035612613,0.0036140168,-0.015525477,-0.3898217678],"action_prob":0.2875702381,"action_logp":-1.2462881804,"action_dist_inputs":[0.4531872869,-0.4540269375],"value_targets":25.2827911377} +{"eps_id":894432818,"obs":[-0.035612613,0.0036140168,-0.015525477,-0.3898217678],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0355403312,-0.1912841797,-0.0233219117,-0.1020740271],"action_prob":0.8828625083,"action_logp":-0.1245858222,"action_dist_inputs":[1.0077719688,-1.0120487213],"value_targets":24.5280704498} +{"eps_id":894432818,"obs":[-0.0355403312,-0.1912841797,-0.0233219117,-0.1020740271],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.039366018,0.0041641095,-0.0253633931,-0.4020227492],"action_prob":0.2636460066,"action_logp":-1.3331480026,"action_dist_inputs":[0.5129977465,-0.514105916],"value_targets":23.7657279968} +{"eps_id":894432818,"obs":[-0.039366018,0.0041641095,-0.0253633931,-0.4020227492],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0392827354,-0.1905890852,-0.0334038474,-0.1174429581],"action_prob":0.8895568848,"action_logp":-0.1170317903,"action_dist_inputs":[1.0408506393,-1.0453726053],"value_targets":22.9956855774} +{"eps_id":894432818,"obs":[-0.0392827354,-0.1905890852,-0.0334038474,-0.1174429581],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0430945158,-0.3852168918,-0.0357527062,0.1645169854],"action_prob":0.7636711001,"action_logp":-0.2696180642,"action_dist_inputs":[0.5857281685,-0.5871846676],"value_targets":22.2178649902} +{"eps_id":894432818,"obs":[-0.0430945158,-0.3852168918,-0.0357527062,0.1645169854],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0507988557,-0.1896018684,-0.0324623659,-0.1392270923],"action_prob":0.5625437498,"action_logp":-0.5752863884,"action_dist_inputs":[-0.1246174723,0.1268747151],"value_targets":21.4321861267} +{"eps_id":894432818,"obs":[-0.0507988557,-0.1896018684,-0.0324623659,-0.1392270923],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.054590892,-0.3842442036,-0.0352469087,0.1430403143],"action_prob":0.7804825306,"action_logp":-0.2478429526,"action_dist_inputs":[0.63333112,-0.6351494789],"value_targets":20.6385707855} +{"eps_id":894432818,"obs":[-0.054590892,-0.3842442036,-0.0352469087,0.1430403143],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0622757748,-0.1886356473,-0.0323861018,-0.1605506986],"action_prob":0.5292872787,"action_logp":-0.6362239122,"action_dist_inputs":[-0.0576835871,0.0595997982],"value_targets":19.8369407654} +{"eps_id":894432818,"obs":[-0.0622757748,-0.1886356473,-0.0323861018,-0.1605506986],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.066048488,-0.3832793534,-0.0355971158,0.1217418835],"action_prob":0.7960926294,"action_logp":-0.2280397266,"action_dist_inputs":[0.6799355745,-0.6821142435],"value_targets":19.0272140503} +{"eps_id":894432818,"obs":[-0.066048488,-0.3832793534,-0.0355971158,0.1217418835],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0737140775,-0.5778737068,-0.0331622809,0.4029851556],"action_prob":0.5052932501,"action_logp":-0.6826162934,"action_dist_inputs":[0.0113643743,-0.0098095583],"value_targets":18.2093067169} +{"eps_id":894432818,"obs":[-0.0737140775,-0.5778737068,-0.0331622809,0.4029851556],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0852715522,-0.7725099921,-0.0251025762,0.6850311756],"action_prob":0.194906503,"action_logp":-1.6352353096,"action_dist_inputs":[-0.706887126,0.7115513086],"value_targets":17.3831367493} +{"eps_id":894432818,"obs":[-0.0852715522,-0.7725099921,-0.0251025762,0.6850311756],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1007217467,-0.5770486593,-0.0114019532,0.3845521808],"action_prob":0.9164810777,"action_logp":-0.0872138739,"action_dist_inputs":[-1.1943885088,1.2010800838],"value_targets":16.5486240387} +{"eps_id":894432818,"obs":[-0.1007217467,-0.5770486593,-0.0114019532,0.3845521808],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1122627258,-0.3817667365,-0.0037109097,0.0882961899],"action_prob":0.8162487745,"action_logp":-0.2030361295,"action_dist_inputs":[-0.7433387637,0.7477976084],"value_targets":15.7056808472} +{"eps_id":894432818,"obs":[-0.1122627258,-0.3817667365,-0.0037109097,0.0882961899],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1198980585,-0.1865917891,-0.0019449859,-0.2055552155],"action_prob":0.5154300928,"action_logp":-0.6627536416,"action_dist_inputs":[-0.0303259715,0.0314138569],"value_targets":14.8542232513} +{"eps_id":894432818,"obs":[-0.1198980585,-0.1865917891,-0.0019449859,-0.2055552155],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1236298904,-0.3816858828,-0.0060560904,0.0865135267],"action_prob":0.7981311083,"action_logp":-0.2254823744,"action_dist_inputs":[0.6858962774,-0.6887581348],"value_targets":13.9941644669} +{"eps_id":894432818,"obs":[-0.1236298904,-0.3816858828,-0.0060560904,0.0865135267],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1312636137,-0.5767204762,-0.0043258197,0.3772796094],"action_prob":0.4915655553,"action_logp":-0.7101599574,"action_dist_inputs":[-0.0164053012,0.0173356533],"value_targets":13.125418663} +{"eps_id":894432818,"obs":[-0.1312636137,-0.5767204762,-0.0043258197,0.3772796094],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1427980214,-0.3815373778,0.0032197726,0.0832358822],"action_prob":0.8186681271,"action_logp":-0.2000765204,"action_dist_inputs":[-0.7515816689,0.7557681799],"value_targets":12.2478981018} +{"eps_id":894432818,"obs":[-0.1427980214,-0.3815373778,0.0032197726,0.0832358822],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.150428772,-0.1864617169,0.0048844903,-0.2084294558],"action_prob":0.5258319974,"action_logp":-0.6427735686,"action_dist_inputs":[-0.0513024032,0.0521175526],"value_targets":11.3615131378} +{"eps_id":894432818,"obs":[-0.150428772,-0.1864617169,0.0048844903,-0.2084294558],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1541579962,-0.3816531599,0.000715901,0.0857902616],"action_prob":0.793243587,"action_logp":-0.2316249311,"action_dist_inputs":[0.6707541943,-0.6738349199],"value_targets":10.4661741257} +{"eps_id":894432818,"obs":[-0.1541579962,-0.3816531599,0.000715901,0.0857902616],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1617910713,-0.1865414977,0.0024317061,-0.206666708],"action_prob":0.5256267786,"action_logp":-0.6431639194,"action_dist_inputs":[-0.0509441532,0.0516527034],"value_targets":9.5617923737} +{"eps_id":894432818,"obs":[-0.1617910713,-0.1865414977,0.0024317061,-0.206666708],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1655218899,-0.3816981316,-0.0017016281,0.086782299],"action_prob":0.7944043279,"action_logp":-0.2301626951,"action_dist_inputs":[0.6742483377,-0.6774327159],"value_targets":8.6482753754} +{"eps_id":894432818,"obs":[-0.1655218899,-0.3816981316,-0.0017016281,0.086782299],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1731558591,-0.5767956376,0.0000340179,0.3789278865],"action_prob":0.4769122601,"action_logp":-0.7404227257,"action_dist_inputs":[-0.0459151082,0.046501454],"value_targets":7.7255306244} +{"eps_id":894432818,"obs":[-0.1731558591,-0.5767956376,0.0000340179,0.3789278865],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1846917719,-0.3816741705,0.0076125753,0.086255677],"action_prob":0.824351728,"action_logp":-0.1931580007,"action_dist_inputs":[-0.771132946,0.7749806643],"value_targets":6.7934651375} +{"eps_id":894432818,"obs":[-0.1846917719,-0.3816741705,0.0076125753,0.086255677],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1923252493,-0.5769044161,0.0093376888,0.3813306391],"action_prob":0.4548828304,"action_logp":-0.787715435,"action_dist_inputs":[-0.0902293026,0.0907316804],"value_targets":5.8519849777} +{"eps_id":894432818,"obs":[-0.1923252493,-0.5769044161,0.0093376888,0.3813306391],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2038633376,-0.3819162846,0.0169643015,0.0916064307],"action_prob":0.835082233,"action_logp":-0.1802250892,"action_dist_inputs":[-0.8091645241,0.8129188418],"value_targets":4.9009947777} +{"eps_id":894432818,"obs":[-0.2038633376,-0.3819162846,0.0169643015,0.0916064307],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2115016729,-0.1870415509,0.0187964309,-0.1956763566],"action_prob":0.5763754249,"action_logp":-0.5509960651,"action_dist_inputs":[-0.153717041,0.1541944593],"value_targets":3.9403989315} +{"eps_id":894432818,"obs":[-0.2115016729,-0.1870415509,0.0187964309,-0.1956763566],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2152425051,0.0078065558,0.0148829026,-0.482371062],"action_prob":0.2354987711,"action_logp":-1.446049571,"action_dist_inputs":[0.5871177912,-0.5904000998],"value_targets":2.970099926} +{"eps_id":894432818,"obs":[-0.2152425051,0.0078065558,0.0148829026,-0.482371062],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2150863707,-0.1875222623,0.0052354815,-0.1850348711],"action_prob":0.9006795883,"action_logp":-0.1046057194,"action_dist_inputs":[1.0993050337,-1.1054931879],"value_targets":1.9900000095} +{"eps_id":894432818,"obs":[-0.2150863707,-0.1875222623,0.0052354815,-0.1850348711],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":true,"new_obs":[-0.2188368142,-0.3827187419,0.0015347842,0.1092950404],"action_prob":0.7719677091,"action_logp":-0.2588125467,"action_dist_inputs":[0.6080585122,-0.6113970876],"value_targets":1.0} +{"eps_id":1050046105,"obs":[0.0347725712,-0.0160310343,-0.0132595934,-0.0168379899],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":0.0,"dones":false,"new_obs":[0.0344519503,0.1792785376,-0.0135963531,-0.3136748075],"action_prob":0.4564121366,"action_logp":-0.7843590975,"action_dist_inputs":[0.0874277502,-0.0873674899],"value_targets":86.6020355225} +{"eps_id":1050046105,"obs":[0.0344519503,0.1792785376,-0.0135963531,-0.3136748075],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0380375199,-0.0156471133,-0.0198698491,-0.0253105555],"action_prob":0.8463360667,"action_logp":-0.1668387651,"action_dist_inputs":[0.851442337,-0.8547061682],"value_targets":86.4666976929} +{"eps_id":1050046105,"obs":[0.0380375199,-0.0156471133,-0.0198698491,-0.0253105555],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0377245806,-0.2104785591,-0.0203760602,0.2610375881],"action_prob":0.5743224621,"action_logp":-0.5545642376,"action_dist_inputs":[0.1497352123,-0.1497738063],"value_targets":86.3300018311} +{"eps_id":1050046105,"obs":[0.0377245806,-0.2104785591,-0.0203760602,0.2610375881],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.03351501,-0.0150717581,-0.0151553089,-0.0380019322],"action_prob":0.7926357388,"action_logp":-0.2323915362,"action_dist_inputs":[-0.6688676476,0.6720191836],"value_targets":86.1919174194} +{"eps_id":1050046105,"obs":[0.03351501,-0.0150717581,-0.0151553089,-0.0380019322],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0332135744,-0.2099731416,-0.0159153473,0.2498610765],"action_prob":0.5849553943,"action_logp":-0.5362196565,"action_dist_inputs":[0.1714945883,-0.1716551185],"value_targets":86.052444458} +{"eps_id":1050046105,"obs":[0.0332135744,-0.2099731416,-0.0159153473,0.2498610765],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0290141106,-0.0146275666,-0.0109181255,-0.0477990843],"action_prob":0.78789258,"action_logp":-0.2383935153,"action_dist_inputs":[-0.6546046138,0.6576643586],"value_targets":85.9115600586} +{"eps_id":1050046105,"obs":[0.0290141106,-0.0146275666,-0.0109181255,-0.0477990843],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0287215598,0.1806492209,-0.0118741076,-0.3439066708],"action_prob":0.4083532691,"action_logp":-0.895622611,"action_dist_inputs":[0.1852584183,-0.185518682],"value_targets":85.7692489624} +{"eps_id":1050046105,"obs":[0.0287215598,0.1806492209,-0.0118741076,-0.3439066708],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0323345438,-0.0143018076,-0.0187522415,-0.0549916513],"action_prob":0.8606978059,"action_logp":-0.1500117928,"action_dist_inputs":[0.9087700248,-0.912327826],"value_targets":85.62550354} +{"eps_id":1050046105,"obs":[0.0323345438,-0.0143018076,-0.0187522415,-0.0549916513],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0320485085,-0.2091499269,-0.0198520739,0.231716305],"action_prob":0.6212099791,"action_logp":-0.4760861695,"action_dist_inputs":[0.2471626699,-0.2475243211],"value_targets":85.4803085327} +{"eps_id":1050046105,"obs":[0.0320485085,-0.2091499269,-0.0198520739,0.231716305],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0278655086,-0.0137500195,-0.0152177485,-0.0671619326],"action_prob":0.7629650235,"action_logp":-0.2705430984,"action_dist_inputs":[-0.5830585957,0.5859459043],"value_targets":85.3336486816} +{"eps_id":1050046105,"obs":[0.0278655086,-0.0137500195,-0.0152177485,-0.0671619326],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0275905076,-0.2086505294,-0.0165609866,0.2206811309],"action_prob":0.6328017116,"action_logp":-0.4575981796,"action_dist_inputs":[0.2718823552,-0.2723727226],"value_targets":85.1855010986} +{"eps_id":1050046105,"obs":[0.0275905076,-0.2086505294,-0.0165609866,0.2206811309],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0234174971,-0.4035318792,-0.0121473642,0.5080943704],"action_prob":0.2443304807,"action_logp":-1.4092335701,"action_dist_inputs":[-0.5631479621,0.5659345388],"value_targets":85.0358581543} +{"eps_id":1050046105,"obs":[0.0234174971,-0.4035318792,-0.0121473642,0.5080943704],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0153468605,-0.2082409114,-0.0019854764,0.2116082907],"action_prob":0.9084831476,"action_logp":-0.0959789604,"action_dist_inputs":[-1.1449195147,1.150333643],"value_targets":84.8847045898} +{"eps_id":1050046105,"obs":[0.0153468605,-0.2082409114,-0.0019854764,0.2116082907],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0111820418,-0.0130906263,0.0022466895,-0.0817002878],"action_prob":0.7685046792,"action_logp":-0.2633086443,"action_dist_inputs":[-0.5985910296,0.6012960672],"value_targets":84.7320251465} +{"eps_id":1050046105,"obs":[0.0111820418,-0.0130906263,0.0022466895,-0.0817002878],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0109202294,-0.2082447112,0.0006126837,0.2116906345],"action_prob":0.6174641252,"action_logp":-0.4821343422,"action_dist_inputs":[0.2390775681,-0.2397209257],"value_targets":84.5778045654} +{"eps_id":1050046105,"obs":[0.0109202294,-0.2082447112,0.0006126837,0.2116906345],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0067553353,-0.0131315254,0.0048464965,-0.0807989687],"action_prob":0.7725752592,"action_logp":-0.2580258846,"action_dist_inputs":[-0.6101148725,0.6127951741],"value_targets":84.4220275879} +{"eps_id":1050046105,"obs":[0.0067553353,-0.0131315254,0.0048464965,-0.0807989687],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0064927046,-0.2083226144,0.003230517,0.213409096],"action_prob":0.6099994779,"action_logp":-0.4942971766,"action_dist_inputs":[0.2233258486,-0.2239841521],"value_targets":84.2646713257} +{"eps_id":1050046105,"obs":[0.0064927046,-0.2083226144,0.003230517,0.213409096],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0023262524,-0.0132469991,0.0074986988,-0.0782530308],"action_prob":0.7782847881,"action_logp":-0.2506627738,"action_dist_inputs":[-0.6265155077,0.6291834712],"value_targets":84.1057281494} +{"eps_id":1050046105,"obs":[0.0023262524,-0.0132469991,0.0074986988,-0.0782530308],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0020613123,0.181766659,0.0059336382,-0.3685607016],"action_prob":0.4004488885,"action_logp":-0.9151691198,"action_dist_inputs":[0.2014686763,-0.2021264732],"value_targets":83.9451828003} +{"eps_id":1050046105,"obs":[0.0020613123,0.181766659,0.0059336382,-0.3685607016],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0056966455,-0.0134391012,-0.0014375757,-0.074012734],"action_prob":0.8611169457,"action_logp":-0.1495249271,"action_dist_inputs":[0.9103732705,-0.9142249823],"value_targets":83.7830123901} +{"eps_id":1050046105,"obs":[0.0056966455,-0.0134391012,-0.0014375757,-0.074012734],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0054278634,-0.2085404098,-0.0029178304,0.2182162851],"action_prob":0.6127890348,"action_logp":-0.4897345901,"action_dist_inputs":[0.2291939259,-0.2298570126],"value_targets":83.6192016602} +{"eps_id":1050046105,"obs":[0.0054278634,-0.2085404098,-0.0029178304,0.2182162851],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0012570552,-0.0133768739,0.0014464954,-0.0753856152],"action_prob":0.774384737,"action_logp":-0.2556864321,"action_dist_inputs":[-0.6152964234,0.6179415584],"value_targets":83.453742981} +{"eps_id":1050046105,"obs":[0.0012570552,-0.0133768739,0.0014464954,-0.0753856152],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0009895178,0.1817243099,-0.0000612169,-0.3676118255],"action_prob":0.3916267753,"action_logp":-0.9374459982,"action_dist_inputs":[0.2198913395,-0.2205879539],"value_targets":83.286605835} +{"eps_id":1050046105,"obs":[0.0009895178,0.1817243099,-0.0000612169,-0.3676118255],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.004624004,-0.0133967707,-0.0074134534,-0.0749481991],"action_prob":0.8648385406,"action_logp":-0.1452124268,"action_dist_inputs":[0.9260800481,-0.9299929738],"value_targets":83.1177825928} +{"eps_id":1050046105,"obs":[0.004624004,-0.0133967707,-0.0074134534,-0.0749481991],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0043560686,-0.2084116638,-0.0089124171,0.2153865695],"action_prob":0.627396822,"action_logp":-0.466176033,"action_dist_inputs":[0.2601640224,-0.2609010935],"value_targets":82.9472579956} +{"eps_id":1050046105,"obs":[0.0043560686,-0.2084116638,-0.0089124171,0.2153865695],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0001878353,-0.013163439,-0.0046046861,-0.0800943524],"action_prob":0.7622515559,"action_logp":-0.271478653,"action_dist_inputs":[-0.5812481046,0.5838153362],"value_targets":82.7750091553} +{"eps_id":1050046105,"obs":[0.0001878353,-0.013163439,-0.0046046861,-0.0800943524],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0000754335,-0.2082190812,-0.006206573,0.2111322284],"action_prob":0.6293219924,"action_logp":-0.4631122351,"action_dist_inputs":[0.2642515898,-0.2650577724],"value_targets":82.601020813} +{"eps_id":1050046105,"obs":[-0.0000754335,-0.2082190812,-0.006206573,0.2111322284],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0042398148,-0.4032517374,-0.0019839283,0.5018508434],"action_prob":0.2382501364,"action_logp":-1.4344341755,"action_dist_inputs":[-0.579893291,0.5824040174],"value_targets":82.4252700806} +{"eps_id":1050046105,"obs":[-0.0042398148,-0.4032517374,-0.0019839283,0.5018508434],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0123048499,-0.2081018835,0.0080530895,0.20854339],"action_prob":0.9099096656,"action_logp":-0.0944099799,"action_dist_inputs":[-1.1536507607,1.1588810682],"value_targets":82.2477493286} +{"eps_id":1050046105,"obs":[-0.0123048499,-0.2081018835,0.0080530895,0.20854339],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0164668877,-0.0130960019,0.0122239571,-0.0815883651],"action_prob":0.7802678943,"action_logp":-0.2481179535,"action_dist_inputs":[-0.6323760152,0.6348522305],"value_targets":82.0684280396} +{"eps_id":1050046105,"obs":[-0.0164668877,-0.0130960019,0.0122239571,-0.0815883651],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0167288072,-0.2083910257,0.0105921896,0.2149260491],"action_prob":0.5932484269,"action_logp":-0.522141993,"action_dist_inputs":[0.1882836521,-0.1891272217],"value_targets":81.8873062134} +{"eps_id":1050046105,"obs":[-0.0167288072,-0.2083910257,0.0105921896,0.2149260491],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0208966285,-0.0134220989,0.0148907108,-0.0743969008],"action_prob":0.7900830507,"action_logp":-0.2356172204,"action_dist_inputs":[-0.6614636183,0.6639626026],"value_targets":81.7043457031} +{"eps_id":1050046105,"obs":[-0.0208966285,-0.0134220989,0.0148907108,-0.0743969008],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0211650711,0.1814832538,0.0134027721,-0.3623448014],"action_prob":0.4255971313,"action_logp":-0.8542621136,"action_dist_inputs":[0.1495185345,-0.1503193825],"value_targets":81.5195465088} +{"eps_id":1050046105,"obs":[-0.0211650711,0.1814832538,0.0134027721,-0.3623448014],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0175354052,-0.0138266124,0.0061558764,-0.0654660314],"action_prob":0.8534570336,"action_logp":-0.1584600657,"action_dist_inputs":[0.87900877,-0.8829679489],"value_targets":81.3328704834} +{"eps_id":1050046105,"obs":[-0.0175354052,-0.0138266124,0.0061558764,-0.0654660314],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0178119373,0.1812065393,0.0048465556,-0.356200397],"action_prob":0.4203864336,"action_logp":-0.8665809035,"action_dist_inputs":[0.160213232,-0.1609740257],"value_targets":81.144317627} +{"eps_id":1050046105,"obs":[-0.0178119373,0.1812065393,0.0048465556,-0.356200397],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0141878063,-0.0139839798,-0.0022774525,-0.061993178],"action_prob":0.8567323089,"action_logp":-0.154629752,"action_dist_inputs":[0.892226398,-0.896184206],"value_targets":80.9538574219} +{"eps_id":1050046105,"obs":[-0.0141878063,-0.0139839798,-0.0022774525,-0.061993178],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0144674862,-0.2090732008,-0.003517316,0.229970336],"action_prob":0.5934307575,"action_logp":-0.5218347311,"action_dist_inputs":[0.1886991262,-0.1894671619],"value_targets":80.76146698} +{"eps_id":1050046105,"obs":[-0.0144674862,-0.2090732008,-0.003517316,0.229970336],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0186489504,-0.0139011675,0.0010820907,-0.0638200045],"action_prob":0.7855873704,"action_logp":-0.2413236052,"action_dist_inputs":[-0.6480031013,0.6505264044],"value_targets":80.5671386719} +{"eps_id":1050046105,"obs":[-0.0186489504,-0.0139011675,0.0010820907,-0.0638200045],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0189269744,0.1812052578,-0.0001943094,-0.3561613262],"action_prob":0.4114928842,"action_logp":-0.8879635334,"action_dist_inputs":[0.1784977168,-0.1792995334],"value_targets":80.3708496094} +{"eps_id":1050046105,"obs":[-0.0189269744,0.1812052578,-0.0001943094,-0.3561613262],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0153028686,0.3763299584,-0.007317536,-0.6489055157],"action_prob":0.139524281,"action_logp":-1.9695166349,"action_dist_inputs":[0.9076155424,-0.9116311669],"value_targets":80.1725769043} +{"eps_id":1050046105,"obs":[-0.0153028686,0.3763299584,-0.007317536,-0.6489055157],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0077762692,0.1813107133,-0.020295646,-0.3585357964],"action_prob":0.9296240807,"action_logp":-0.0729749799,"action_dist_inputs":[1.2871574163,-1.2937715054],"value_targets":79.9722976685} +{"eps_id":1050046105,"obs":[-0.0077762692,0.1813107133,-0.020295646,-0.3585357964],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0041500549,-0.0135169225,-0.0274663623,-0.0723210499],"action_prob":0.874296844,"action_logp":-0.1343353242,"action_dist_inputs":[0.9676877856,-0.9718087316],"value_targets":79.7699966431} +{"eps_id":1050046105,"obs":[-0.0041500549,-0.0135169225,-0.0274663623,-0.0723210499],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0044203936,0.1819878072,-0.0289127827,-0.3735416234],"action_prob":0.3350438178,"action_logp":-1.0934939384,"action_dist_inputs":[0.3422353268,-0.3432244956],"value_targets":79.5656509399} +{"eps_id":1050046105,"obs":[-0.0044203936,0.1819878072,-0.0289127827,-0.3735416234],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0007806373,-0.0127117513,-0.0363836177,-0.0901135281],"action_prob":0.8843047023,"action_logp":-0.1229536086,"action_dist_inputs":[1.0147712231,-1.0190706253],"value_targets":79.3592453003} +{"eps_id":1050046105,"obs":[-0.0007806373,-0.0127117513,-0.0363836177,-0.0901135281],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0010348723,0.182912305,-0.038185887,-0.3940498531],"action_prob":0.2933509648,"action_logp":-1.2263855934,"action_dist_inputs":[0.438975662,-0.4401889145],"value_targets":79.1507568359} +{"eps_id":1050046105,"obs":[-0.0010348723,0.182912305,-0.038185887,-0.3940498531],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0026233739,-0.0116475709,-0.046066884,-0.1136466935],"action_prob":0.8944325447,"action_logp":-0.1115658209,"action_dist_inputs":[1.0661528111,-1.0706865788],"value_targets":78.9401550293} +{"eps_id":1050046105,"obs":[0.0026233739,-0.0116475709,-0.046066884,-0.1136466935],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0023904224,0.1841031611,-0.0483398177,-0.4205001593],"action_prob":0.2497337312,"action_logp":-1.3873599768,"action_dist_inputs":[0.5492601991,-0.5507725477],"value_targets":78.727432251} +{"eps_id":1050046105,"obs":[0.0023904224,0.1841031611,-0.0483398177,-0.4205001593],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0060724858,-0.0103017315,-0.0567498207,-0.1434398293],"action_prob":0.9040902853,"action_logp":-0.1008260548,"action_dist_inputs":[1.119346261,-1.1241755486],"value_targets":78.5125579834} +{"eps_id":1050046105,"obs":[0.0060724858,-0.0103017315,-0.0567498207,-0.1434398293],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0058664512,-0.2045669556,-0.0596186183,0.1308134198],"action_prob":0.7923903465,"action_logp":-0.2327011377,"action_dist_inputs":[0.6687485576,-0.670646131],"value_targets":78.2955093384} +{"eps_id":1050046105,"obs":[0.0058664512,-0.2045669556,-0.0596186183,0.1308134198],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0017751118,-0.3987864554,-0.0570023507,0.4041075706],"action_prob":0.4711170793,"action_logp":-0.7526486516,"action_dist_inputs":[-0.0570783168,0.0585821532],"value_targets":78.0762710571} +{"eps_id":1050046105,"obs":[0.0017751118,-0.3987864554,-0.0570023507,0.4041075706],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0062006176,-0.2029043883,-0.0489201955,0.0940120667],"action_prob":0.827521503,"action_logp":-0.1893201619,"action_dist_inputs":[-0.781935513,0.7862272859],"value_targets":77.8548202515} +{"eps_id":1050046105,"obs":[-0.0062006176,-0.2029043883,-0.0489201955,0.0940120667],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0102587054,-0.0071166409,-0.0470399559,-0.2136949897],"action_prob":0.4905519783,"action_logp":-0.7122240067,"action_dist_inputs":[0.019463649,-0.0183328167],"value_targets":77.6311340332} +{"eps_id":1050046105,"obs":[-0.0102587054,-0.0071166409,-0.0470399559,-0.2136949897],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0104010375,-0.2015356123,-0.0513138548,0.0637862086],"action_prob":0.8322262168,"action_logp":-0.1836509854,"action_dist_inputs":[0.799400866,-0.8020867705],"value_targets":77.4051818848} +{"eps_id":1050046105,"obs":[-0.0104010375,-0.2015356123,-0.0513138548,0.0637862086],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0144317504,-0.3958857358,-0.0500381328,0.3398480117],"action_prob":0.5662781596,"action_logp":-0.5686698556,"action_dist_inputs":[0.1337185949,-0.1329634935],"value_targets":77.1769561768} +{"eps_id":1050046105,"obs":[-0.0144317504,-0.3958857358,-0.0500381328,0.3398480117],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0223494638,-0.5902612805,-0.043241173,0.6163413525],"action_prob":0.221172452,"action_logp":-1.5088125467,"action_dist_inputs":[-0.6275129914,0.6313338876],"value_targets":76.9464187622} +{"eps_id":1050046105,"obs":[-0.0223494638,-0.5902612805,-0.043241173,0.6163413525],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0341546908,-0.3945627511,-0.0309143439,0.3103590012],"action_prob":0.9098750949,"action_logp":-0.094447948,"action_dist_inputs":[-1.153036952,1.1590739489],"value_targets":76.7135543823} +{"eps_id":1050046105,"obs":[-0.0341546908,-0.3945627511,-0.0309143439,0.3103590012],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0420459472,-0.199014321,-0.0247071628,0.0080891224],"action_prob":0.7764765024,"action_logp":-0.2529889047,"action_dist_inputs":[-0.6208360791,0.6244137883],"value_targets":76.4783401489} +{"eps_id":1050046105,"obs":[-0.0420459472,-0.199014321,-0.0247071628,0.0080891224],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0460262336,-0.0035469006,-0.0245453808,-0.2922856808],"action_prob":0.4036968052,"action_logp":-0.9070912004,"action_dist_inputs":[0.195099026,-0.1949861497],"value_targets":76.2407455444} +{"eps_id":1050046105,"obs":[-0.0460262336,-0.0035469006,-0.0245453808,-0.2922856808],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0460971706,-0.1983104348,-0.0303910952,-0.0074439896],"action_prob":0.8565198183,"action_logp":-0.1548777968,"action_dist_inputs":[0.8915526271,-0.8951283097],"value_targets":76.0007553101} +{"eps_id":1050046105,"obs":[-0.0460971706,-0.1983104348,-0.0303910952,-0.0074439896],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0500633791,-0.3929836452,-0.0305399746,0.2754973769],"action_prob":0.6317885518,"action_logp":-0.4592005014,"action_dist_inputs":[0.2698653936,-0.2700321972],"value_targets":75.7583389282} +{"eps_id":1050046105,"obs":[-0.0500633791,-0.3929836452,-0.0305399746,0.2754973769],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0579230525,-0.5876568556,-0.0250300262,0.5583936572],"action_prob":0.262314558,"action_logp":-1.3382109404,"action_dist_inputs":[-0.5153896809,0.5185835361],"value_targets":75.5134735107} +{"eps_id":1050046105,"obs":[-0.0579230525,-0.5876568556,-0.0250300262,0.5583936572],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0696761906,-0.3921926618,-0.0138621535,0.2579311728],"action_prob":0.9001594186,"action_logp":-0.1051834151,"action_dist_inputs":[-1.0966920853,1.1023045778],"value_targets":75.26612854} +{"eps_id":1050046105,"obs":[-0.0696761906,-0.3921926618,-0.0138621535,0.2579311728],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0775200427,-0.1968755871,-0.0087035298,-0.0390916504],"action_prob":0.7451117635,"action_logp":-0.2942210138,"action_dist_inputs":[-0.5348551869,0.5378540158],"value_targets":75.0162963867} +{"eps_id":1050046105,"obs":[-0.0775200427,-0.1968755871,-0.0087035298,-0.0390916504],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0814575553,-0.3918716609,-0.009485363,0.2508325577],"action_prob":0.6330341101,"action_logp":-0.457231015,"action_dist_inputs":[0.2723292112,-0.2729260921],"value_targets":74.7639312744} +{"eps_id":1050046105,"obs":[-0.0814575553,-0.3918716609,-0.009485363,0.2508325577],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0892949849,-0.5868569016,-0.0044687116,0.5405085683],"action_prob":0.2557786107,"action_logp":-1.363443017,"action_dist_inputs":[-0.532581389,0.5354449153],"value_targets":74.5090255737} +{"eps_id":1050046105,"obs":[-0.0892949849,-0.5868569016,-0.0044687116,0.5405085683],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.101032123,-0.3916724026,0.0063414606,0.2464209944],"action_prob":0.9033998847,"action_logp":-0.1015900001,"action_dist_inputs":[-1.1150830984,1.1205016375],"value_targets":74.2515411377} +{"eps_id":1050046105,"obs":[-0.101032123,-0.3916724026,0.0063414606,0.2464209944],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.108865574,-0.5868843198,0.0112698805,0.5410974026],"action_prob":0.2352926135,"action_logp":-1.4469254017,"action_dist_inputs":[-0.5879542828,0.5907091498],"value_targets":73.9914550781} +{"eps_id":1050046105,"obs":[-0.108865574,-0.5868843198,0.0112698805,0.5410974026],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1206032634,-0.3919225931,0.0220918283,0.2519866228],"action_prob":0.9095447063,"action_logp":-0.094811134,"action_dist_inputs":[-1.1513519287,1.1567360163],"value_targets":73.7287445068} +{"eps_id":1050046105,"obs":[-0.1206032634,-0.3919225931,0.0220918283,0.2519866228],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1284417063,-0.1971229613,0.0271315612,-0.0336470529],"action_prob":0.7926442623,"action_logp":-0.2323807776,"action_dist_inputs":[-0.6691111922,0.6718271971],"value_targets":73.4633712769} +{"eps_id":1050046105,"obs":[-0.1284417063,-0.1971229613,0.0271315612,-0.0336470529],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1323841661,-0.3926232755,0.026458621,0.2674711943],"action_prob":0.5417240858,"action_logp":-0.6129984856,"action_dist_inputs":[0.0833150297,-0.0839702263],"value_targets":73.1953277588} +{"eps_id":1050046105,"obs":[-0.1323841661,-0.3926232755,0.026458621,0.2674711943],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.140236631,-0.197888732,0.0318080448,-0.0167504717],"action_prob":0.811442852,"action_logp":-0.2089412808,"action_dist_inputs":[-0.7283368707,0.7310758829],"value_targets":72.9245758057} +{"eps_id":1050046105,"obs":[-0.140236631,-0.197888732,0.0318080448,-0.0167504717],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1441944093,-0.0032370365,0.0314730331,-0.2992303371],"action_prob":0.4993323088,"action_logp":-0.694483459,"action_dist_inputs":[0.0010589757,-0.0016118069],"value_targets":72.6510848999} +{"eps_id":1050046105,"obs":[-0.1441944093,-0.0032370365,0.0314730331,-0.2992303371],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1442591548,-0.198793143,0.0254884288,0.0032101078],"action_prob":0.8165245056,"action_logp":-0.2026983351,"action_dist_inputs":[0.7445037961,-0.7484720349],"value_targets":72.3748321533} +{"eps_id":1050046105,"obs":[-0.1442591548,-0.198793143,0.0254884288,0.0032101078],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1482350081,-0.3942711949,0.0255526304,0.3038247228],"action_prob":0.4805796444,"action_logp":-0.7327623367,"action_dist_inputs":[-0.0390879586,0.0386325605],"value_targets":72.0957946777} +{"eps_id":1050046105,"obs":[-0.1482350081,-0.3942711949,0.0255526304,0.3038247228],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1561204344,-0.19952254,0.0316291228,0.0193087682],"action_prob":0.8367434144,"action_logp":-0.1782377958,"action_dist_inputs":[-0.8156839013,0.8185107112],"value_targets":71.8139266968} +{"eps_id":1050046105,"obs":[-0.1561204344,-0.19952254,0.0316291228,0.0193087682],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1601108909,-0.395083487,0.0320153013,0.3218008578],"action_prob":0.4376428723,"action_logp":-0.8263520598,"action_dist_inputs":[-0.1255500913,0.125183776],"value_targets":71.5292205811} +{"eps_id":1050046105,"obs":[-0.1601108909,-0.395083487,0.0320153013,0.3218008578],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1680125594,-0.2004317194,0.0384513177,0.0393837392],"action_prob":0.8526136279,"action_logp":-0.1594487876,"action_dist_inputs":[-0.8761808276,0.8790683746],"value_targets":71.2416381836} +{"eps_id":1050046105,"obs":[-0.1680125594,-0.2004317194,0.0384513177,0.0393837392],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1720211953,-0.0058816457,0.0392389931,-0.2409237623],"action_prob":0.6115991473,"action_logp":-0.4916782379,"action_dist_inputs":[-0.2271437049,0.2268954366],"value_targets":70.9511489868} +{"eps_id":1050046105,"obs":[-0.1720211953,-0.0058816457,0.0392389931,-0.2409237623],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1721388251,0.188658461,0.0344205163,-0.520976007],"action_prob":0.246106267,"action_logp":-1.4019918442,"action_dist_inputs":[0.5579448938,-0.5615430474],"value_targets":70.6577301025} +{"eps_id":1050046105,"obs":[-0.1721388251,0.188658461,0.0344205163,-0.520976007],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1683656573,-0.0069306861,0.0240009967,-0.2176484168],"action_prob":0.9030928016,"action_logp":-0.1019299328,"action_dist_inputs":[1.1129083633,-1.119163394],"value_targets":70.3613433838} +{"eps_id":1050046105,"obs":[-0.1683656573,-0.0069306861,0.0240009967,-0.2176484168],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1685042679,0.1878400892,0.0196480285,-0.5026648045],"action_prob":0.2480088919,"action_logp":-1.3942906857,"action_dist_inputs":[0.5528845787,-0.5563752055],"value_targets":70.061958313} +{"eps_id":1050046105,"obs":[-0.1685042679,0.1878400892,0.0196480285,-0.5026648045],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1647474617,-0.0075532096,0.0095947329,-0.203855291],"action_prob":0.9044596553,"action_logp":-0.1004176065,"action_dist_inputs":[1.1207786798,-1.1270104647],"value_targets":69.7595596313} +{"eps_id":1050046105,"obs":[-0.1647474617,-0.0075532096,0.0095947329,-0.203855291],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1648985296,-0.2028110474,0.0055176266,0.0918387994],"action_prob":0.759049356,"action_logp":-0.2756884992,"action_dist_inputs":[0.572006166,-0.575468421],"value_targets":69.4540939331} +{"eps_id":1050046105,"obs":[-0.1648985296,-0.2028110474,0.0055176266,0.0918387994],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1689547598,-0.3980116546,0.0073544024,0.38625741],"action_prob":0.3764042258,"action_logp":-0.9770916104,"action_dist_inputs":[-0.2524375319,0.2524012029],"value_targets":69.1455535889} +{"eps_id":1050046105,"obs":[-0.1689547598,-0.3980116546,0.0073544024,0.38625741],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1769149899,-0.2029948682,0.0150795504,0.0959023312],"action_prob":0.8660828471,"action_logp":-0.1437747329,"action_dist_inputs":[-0.931851387,0.9349074364],"value_targets":68.8338928223} +{"eps_id":1050046105,"obs":[-0.1769149899,-0.2029948682,0.0150795504,0.0959023312],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1809748858,-0.0080922563,0.0169975981,-0.1919851303],"action_prob":0.6500159502,"action_logp":-0.430758357,"action_dist_inputs":[-0.30957973,0.3095296621],"value_targets":68.5190811157} +{"eps_id":1050046105,"obs":[-0.1809748858,-0.0080922563,0.0169975981,-0.1919851303],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1811367273,-0.2034531832,0.0131578948,0.1060109884],"action_prob":0.7326557636,"action_logp":-0.3110793233,"action_dist_inputs":[0.5023568869,-0.5057818294],"value_targets":68.2010955811} +{"eps_id":1050046105,"obs":[-0.1811367273,-0.2034531832,0.0131578948,0.1060109884],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1852057874,-0.0085222451,0.0152781149,-0.1824918091],"action_prob":0.66099298,"action_logp":-0.4140120149,"action_dist_inputs":[-0.3338742852,0.3338481486],"value_targets":67.8798904419} +{"eps_id":1050046105,"obs":[-0.1852057874,-0.0085222451,0.0152781149,-0.1824918091],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1853762418,-0.2038594335,0.0116282785,0.114971377],"action_prob":0.7235683203,"action_logp":-0.3235603273,"action_dist_inputs":[0.4794209599,-0.4828101099],"value_targets":67.5554504395} +{"eps_id":1050046105,"obs":[-0.1853762418,-0.2038594335,0.0116282785,0.114971377],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.189453423,-0.0089060199,0.0139277056,-0.17402035],"action_prob":0.6707504988,"action_logp":-0.3993580043,"action_dist_inputs":[-0.3557955027,0.3557859659],"value_targets":67.227722168} +{"eps_id":1050046105,"obs":[-0.189453423,-0.0089060199,0.0139277056,-0.17402035],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1896315515,-0.204224512,0.0104472991,0.1230235845],"action_prob":0.7147983909,"action_logp":-0.3357547522,"action_dist_inputs":[0.4577220976,-0.4610820711],"value_targets":66.8966903687} +{"eps_id":1050046105,"obs":[-0.1896315515,-0.204224512,0.0104472991,0.1230235845],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1937160343,-0.0092537757,0.0129077705,-0.1663451046],"action_prob":0.6796035767,"action_logp":-0.3862456083,"action_dist_inputs":[-0.3759745061,0.3759762645],"value_targets":66.5623168945} +{"eps_id":1050046105,"obs":[-0.1937160343,-0.0092537757,0.0129077705,-0.1663451046],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1939011067,-0.2045580894,0.0095808683,0.1303817481],"action_prob":0.7061910629,"action_logp":-0.347869426,"action_dist_inputs":[0.4368102551,-0.4401459098],"value_targets":66.2245635986} +{"eps_id":1050046105,"obs":[-0.1939011067,-0.2045580894,0.0095808683,0.1303817481],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1979922801,-0.3998159766,0.0121885035,0.4260718524],"action_prob":0.3121777773,"action_logp":-1.1641824245,"action_dist_inputs":[-0.3949741721,0.3949834704],"value_targets":65.883392334} +{"eps_id":1050046105,"obs":[-0.1979922801,-0.3998159766,0.0121885035,0.4260718524],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2059886009,-0.2048687637,0.0207099412,0.1372561008],"action_prob":0.8820496798,"action_logp":-0.1255069226,"action_dist_inputs":[-1.0044299364,1.0075550079],"value_targets":65.5387802124} +{"eps_id":1050046105,"obs":[-0.2059886009,-0.2048687637,0.0207099412,0.1372561008],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2100859731,-0.0100494698,0.0234550629,-0.1488218904],"action_prob":0.7154527903,"action_logp":-0.3348396719,"action_dist_inputs":[-0.4609963894,0.461019963],"value_targets":65.1906890869} +{"eps_id":1050046105,"obs":[-0.2100859731,-0.0100494698,0.0234550629,-0.1488218904],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.21028696,0.1847288907,0.0204786249,-0.4340139031],"action_prob":0.3405855,"action_logp":-1.0770890713,"action_dist_inputs":[0.3287257254,-0.3319603205],"value_targets":64.8390808105} +{"eps_id":1050046105,"obs":[-0.21028696,0.1847288907,0.0204786249,-0.4340139031],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.206592381,-0.0106769111,0.0117983464,-0.1349463761],"action_prob":0.8824738264,"action_logp":-0.1250261813,"action_dist_inputs":[1.0050026178,-1.0110652447],"value_targets":64.4839172363} +{"eps_id":1050046105,"obs":[-0.206592381,-0.0106769111,0.0117983464,-0.1349463761],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2068059146,0.1842740774,0.0090994192,-0.4238838553],"action_prob":0.33717677,"action_logp":-1.0871479511,"action_dist_inputs":[0.3363621831,-0.3395388126],"value_targets":64.1251678467} +{"eps_id":1050046105,"obs":[-0.2068059146,0.1842740774,0.0090994192,-0.4238838553],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2031204402,-0.0109755909,0.0006217415,-0.1283462942],"action_prob":0.8853481412,"action_logp":-0.1217743456,"action_dist_inputs":[1.0190117359,-1.0250684023],"value_targets":63.7627983093} +{"eps_id":1050046105,"obs":[-0.2031204402,-0.0109755909,0.0006217415,-0.1283462942],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2033399493,-0.2061064392,-0.0019451844,0.1645327061],"action_prob":0.6758648753,"action_logp":-0.3917620778,"action_dist_inputs":[0.3658273518,-0.3690055013],"value_targets":63.3967666626} +{"eps_id":1050046105,"obs":[-0.2033399493,-0.2061064392,-0.0019451844,0.1645327061],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2074620724,-0.0109567009,0.0013454697,-0.1287632287],"action_prob":0.7106726766,"action_logp":-0.341543287,"action_dist_inputs":[-0.4492823482,0.4493710101],"value_targets":63.0270347595} +{"eps_id":1050046105,"obs":[-0.2074620724,-0.0109567009,0.0013454697,-0.1287632287],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2076812088,-0.2060979009,-0.0012297948,0.1643438786],"action_prob":0.6745551825,"action_logp":-0.393701762,"action_dist_inputs":[0.3628235757,-0.3660371602],"value_targets":62.6535720825} +{"eps_id":1050046105,"obs":[-0.2076812088,-0.2060979009,-0.0012297948,0.1643438786],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2118031681,-0.0109583661,0.0020570825,-0.1287267655],"action_prob":0.7114519477,"action_logp":-0.3404474258,"action_dist_inputs":[-0.4511987567,0.4512474537],"value_targets":62.2763366699} +{"eps_id":1050046105,"obs":[-0.2118031681,-0.0109583661,0.0020570825,-0.1287267655],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2120223343,0.1841340512,-0.0005174528,-0.4207600057],"action_prob":0.3273949623,"action_logp":-1.1165879965,"action_dist_inputs":[0.3583730459,-0.361618042],"value_targets":61.8952865601} +{"eps_id":1050046105,"obs":[-0.2120223343,0.1841340512,-0.0005174528,-0.4207600057],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2083396614,-0.0109805595,-0.0089326529,-0.1282402426],"action_prob":0.8895823956,"action_logp":-0.1170031577,"action_dist_inputs":[1.0401674509,-1.0463149548],"value_targets":61.5103912354} +{"eps_id":1050046105,"obs":[-0.2083396614,-0.0109805595,-0.0089326529,-0.1282402426],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2085592747,-0.2059734166,-0.0114974575,0.1616111994],"action_prob":0.6935620904,"action_logp":-0.3659144938,"action_dist_inputs":[0.4067632556,-0.4100624025],"value_targets":61.1216087341} +{"eps_id":1050046105,"obs":[-0.2085592747,-0.2059734166,-0.0114974575,0.1616111994],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2126787305,-0.4009288847,-0.0082652336,0.4506449103],"action_prob":0.309702754,"action_logp":-1.1721422672,"action_dist_inputs":[-0.4007823765,0.4007269144],"value_targets":60.7288970947} +{"eps_id":1050046105,"obs":[-0.2126787305,-0.4009288847,-0.0082652336,0.4506449103],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2206973135,-0.5959329605,0.0007476646,0.7407110929],"action_prob":0.1217312664,"action_logp":-2.1059393883,"action_dist_inputs":[-0.9865753055,0.9895614982],"value_targets":60.3322181702} +{"eps_id":1050046105,"obs":[-0.2206973135,-0.5959329605,0.0007476646,0.7407110929],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2326159775,-0.400821358,0.0155618871,0.4482635558],"action_prob":0.9296327233,"action_logp":-0.0729656667,"action_dist_inputs":[-1.2876311541,1.293430686],"value_targets":59.9315338135} +{"eps_id":1050046105,"obs":[-0.2326159775,-0.400821358,0.0155618871,0.4482635558],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2406323999,-0.2059229612,0.0245271586,0.160526365],"action_prob":0.8872541189,"action_logp":-0.1196238399,"action_dist_inputs":[-1.0299869776,1.0330084562],"value_targets":59.526802063} +{"eps_id":1050046105,"obs":[-0.2406323999,-0.2059229612,0.0245271586,0.160526365],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2447508574,-0.0111605739,0.0277376845,-0.1243190765],"action_prob":0.7449179888,"action_logp":-0.2944811285,"action_dist_inputs":[-0.5359032154,0.5357857347],"value_targets":59.117980957} +{"eps_id":1050046105,"obs":[-0.2447508574,-0.0111605739,0.0277376845,-0.1243190765],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2449740767,-0.2066687047,0.0252513047,0.1769841462],"action_prob":0.6085801721,"action_logp":-0.4966266453,"action_dist_inputs":[0.2190262973,-0.2223215848],"value_targets":58.7050323486} +{"eps_id":1050046105,"obs":[-0.2449740767,-0.2066687047,0.0252513047,0.1769841462],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2491074502,-0.0119170472,0.0287909862,-0.1076271161],"action_prob":0.7615643144,"action_logp":-0.27238065,"action_dist_inputs":[-0.5806517005,0.5806232691],"value_targets":58.2879104614} +{"eps_id":1050046105,"obs":[-0.2491074502,-0.0119170472,0.0287909862,-0.1076271161],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2493457794,0.1827807426,0.0266384445,-0.3910893798],"action_prob":0.421176374,"action_logp":-0.8647035956,"action_dist_inputs":[0.1573778987,-0.1605683416],"value_targets":57.8665771484} +{"eps_id":1050046105,"obs":[-0.2493457794,0.1827807426,0.0266384445,-0.3910893798],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.245690167,-0.012708935,0.0188166574,-0.0901281238],"action_prob":0.8555313349,"action_logp":-0.1560325474,"action_dist_inputs":[0.8863249421,-0.8923355341],"value_targets":57.4409866333} +{"eps_id":1050046105,"obs":[-0.245690167,-0.012708935,0.0188166574,-0.0901281238],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.245944351,-0.208095476,0.0170140937,0.2084315717],"action_prob":0.5726299286,"action_logp":-0.5575156212,"action_dist_inputs":[0.144748643,-0.1478405893],"value_targets":57.0110969543} +{"eps_id":1050046105,"obs":[-0.245944351,-0.208095476,0.0170140937,0.2084315717],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2501062453,-0.0132208858,0.0211827252,-0.0788361058],"action_prob":0.7789068818,"action_logp":-0.2498638034,"action_dist_inputs":[-0.6295833588,0.6297240853],"value_targets":56.5768661499} +{"eps_id":1050046105,"obs":[-0.2501062453,-0.0132208858,0.0211827252,-0.0788361058],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2503706813,0.1815910935,0.0196060035,-0.3647612631],"action_prob":0.4517641068,"action_logp":-0.7945951223,"action_dist_inputs":[0.0952602923,-0.0982852355],"value_targets":56.1382484436} +{"eps_id":1050046105,"obs":[-0.2503706813,0.1815910935,0.0196060035,-0.3647612631],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2467388511,-0.0138039198,0.0123107778,-0.0659612715],"action_prob":0.8454914689,"action_logp":-0.1678371876,"action_dist_inputs":[0.8468879461,-0.852781117],"value_targets":55.6952018738} +{"eps_id":1050046105,"obs":[-0.2467388511,-0.0138039198,0.0123107778,-0.0659612715],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2470149398,-0.2091001868,0.0109915528,0.2305802107],"action_prob":0.5470821857,"action_logp":-0.6031562686,"action_dist_inputs":[0.0929663032,-0.0959220156],"value_targets":55.2476768494} +{"eps_id":1050046105,"obs":[-0.2470149398,-0.2091001868,0.0109915528,0.2305802107],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2511969507,-0.0141370138,0.0156031568,-0.0586154349],"action_prob":0.7896382213,"action_logp":-0.23618038,"action_dist_inputs":[-0.6612457633,0.6615001559],"value_targets":54.7956352234} +{"eps_id":1050046105,"obs":[-0.2511969507,-0.0141370138,0.0156031568,-0.0586154349],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2514796853,-0.2094791681,0.014430848,0.2389492691],"action_prob":0.52708143,"action_logp":-0.6404001713,"action_dist_inputs":[0.0527581796,-0.0556737781],"value_targets":54.3390235901} +{"eps_id":1050046105,"obs":[-0.2514796853,-0.2094791681,0.014430848,0.2389492691],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.255669266,-0.0145663144,0.0192098338,-0.0491470434],"action_prob":0.7989631295,"action_logp":-0.2244404703,"action_dist_inputs":[-0.6897600889,0.6900663972],"value_targets":53.8778038025} +{"eps_id":1050046105,"obs":[-0.255669266,-0.0145663144,0.0192098338,-0.0491470434],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2559605837,-0.2099583745,0.0182268918,0.2495343089],"action_prob":0.502989769,"action_logp":-0.6871854067,"action_dist_inputs":[0.0045505352,-0.0074086338],"value_targets":53.4119224548} +{"eps_id":1050046105,"obs":[-0.2559605837,-0.2099583745,0.0182268918,0.2495343089],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2601597607,-0.0151013946,0.0232175793,-0.0373441949],"action_prob":0.8093570471,"action_logp":-0.2115150988,"action_dist_inputs":[-0.7227278948,0.7231097817],"value_targets":52.9413375854} +{"eps_id":1050046105,"obs":[-0.2601597607,-0.0151013946,0.0232175793,-0.0373441949],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2604617774,0.1796800494,0.0224706959,-0.3226122558],"action_prob":0.5254328847,"action_logp":-0.6435328126,"action_dist_inputs":[-0.0523002855,0.0495191105],"value_targets":52.4659957886} +{"eps_id":1050046105,"obs":[-0.2604617774,0.1796800494,0.0224706959,-0.3226122558],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2568681836,-0.0157545526,0.0160184503,-0.0229285397],"action_prob":0.8098267317,"action_logp":-0.2109349668,"action_dist_inputs":[0.7216053605,-0.7272794843],"value_targets":51.9858551025} +{"eps_id":1050046105,"obs":[-0.2568681836,-0.0157545526,0.0160184503,-0.0229285397],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2571832836,0.179134056,0.0155598791,-0.3105147183],"action_prob":0.5328799486,"action_logp":-0.6294590831,"action_dist_inputs":[-0.0672000647,0.0645097345],"value_targets":51.5008621216} +{"eps_id":1050046105,"obs":[-0.2571832836,0.179134056,0.0155598791,-0.3105147183],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2536005974,-0.0162060838,0.0093495846,-0.0129655665],"action_prob":0.8080635667,"action_logp":-0.2131145597,"action_dist_inputs":[0.7159304023,-0.7215456963],"value_targets":51.0109710693} +{"eps_id":1050046105,"obs":[-0.2536005974,-0.0162060838,0.0093495846,-0.0129655665],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2539247274,-0.2114608586,0.0090902727,0.2826525271],"action_prob":0.4657096565,"action_logp":-0.7641928792,"action_dist_inputs":[-0.0700034648,0.0673736334],"value_targets":50.5161323547} +{"eps_id":1050046105,"obs":[-0.2539247274,-0.2114608586,0.0090902727,0.2826525271],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2581539452,-0.0164697431,0.0147433234,-0.0071495525],"action_prob":0.8217137456,"action_logp":-0.1963631511,"action_dist_inputs":[-0.7637038827,0.7642977834],"value_targets":50.0162963867} +{"eps_id":1050046105,"obs":[-0.2581539452,-0.0164697431,0.0147433234,-0.0071495525],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2584833205,0.1784376949,0.0146003328,-0.295144558],"action_prob":0.5553374887,"action_logp":-0.5881792903,"action_dist_inputs":[-0.1124255508,0.1098347753],"value_targets":49.5114097595} +{"eps_id":1050046105,"obs":[-0.2584833205,0.1784376949,0.0146003328,-0.295144558],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2549145818,-0.0168893244,0.0086974418,0.0021070931],"action_prob":0.7951577902,"action_logp":-0.2292146832,"action_dist_inputs":[0.6753842831,-0.6809163094],"value_targets":49.0014266968} +{"eps_id":1050046105,"obs":[-0.2549145818,-0.0168893244,0.0086974418,0.0021070931],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2552523613,-0.2121349275,0.0087395832,0.2975214422],"action_prob":0.4430258274,"action_logp":-0.8141272068,"action_dist_inputs":[-0.1157112569,0.1131795272],"value_targets":48.486289978} +{"eps_id":1050046105,"obs":[-0.2552523613,-0.2121349275,0.0087395832,0.2975214422],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2594950497,-0.0171386395,0.0146900117,0.0076075778],"action_prob":0.8294486403,"action_logp":-0.1869941056,"action_dist_inputs":[-0.7905103564,0.7912141681],"value_targets":47.9659461975} +{"eps_id":1050046105,"obs":[-0.2594950497,-0.0171386395,0.0146900117,0.0076075778],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.259837836,0.1777695864,0.0148421638,-0.2804045081],"action_prob":0.5781371593,"action_logp":-0.5479441881,"action_dist_inputs":[-0.1588108838,0.1563198268],"value_targets":47.4403495789} +{"eps_id":1050046105,"obs":[-0.259837836,0.1777695864,0.0148421638,-0.2804045081],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2562824488,-0.017560903,0.0092340736,0.0169223901],"action_prob":0.7801626921,"action_logp":-0.248252809,"action_dist_inputs":[0.6305841208,-0.6360304356],"value_targets":46.9094467163} +{"eps_id":1050046105,"obs":[-0.2562824488,-0.017560903,0.0092340736,0.0169223901],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2566336691,-0.2128140479,0.0095725209,0.3125044107],"action_prob":0.4189275503,"action_logp":-0.8700572848,"action_dist_inputs":[-0.1648025811,0.1623748392],"value_targets":46.3731765747} +{"eps_id":1050046105,"obs":[-0.2566336691,-0.2128140479,0.0095725209,0.3125044107],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2608899474,-0.4080710709,0.0158226099,0.6081907749],"action_prob":0.1625713259,"action_logp":-1.8166384697,"action_dist_inputs":[-0.8191953301,0.8200238943],"value_targets":45.8314933777} +{"eps_id":1050046105,"obs":[-0.2608899474,-0.4080710709,0.0158226099,0.6081907749],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.269051373,-0.2131738514,0.027986424,0.320533216],"action_prob":0.9133614898,"action_logp":-0.0906235203,"action_dist_inputs":[-1.1756258011,1.179762125],"value_targets":45.2843360901} +{"eps_id":1050046105,"obs":[-0.269051373,-0.2131738514,0.027986424,0.320533216],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2733148336,-0.0184614081,0.034397088,0.0368058532],"action_prob":0.8514237404,"action_logp":-0.1608453542,"action_dist_inputs":[-0.872420311,0.8733915091],"value_targets":44.7316513062} +{"eps_id":1050046105,"obs":[-0.2733148336,-0.0184614081,0.034397088,0.0368058532],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2736840546,-0.2140593082,0.0351332054,0.3401398957],"action_prob":0.3436609507,"action_logp":-1.0680997372,"action_dist_inputs":[-0.3246361315,0.3223859072],"value_targets":44.1733856201} +{"eps_id":1050046105,"obs":[-0.2736840546,-0.2140593082,0.0351332054,0.3401398957],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2779652476,-0.0194544028,0.0419360027,0.0587397218],"action_prob":0.8619147539,"action_logp":-0.1485988796,"action_dist_inputs":[-0.9150485992,0.9162369967],"value_targets":43.6094818115} +{"eps_id":1050046105,"obs":[-0.2779652476,-0.0194544028,0.0419360027,0.0587397218],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2783543468,0.1750419587,0.043110799,-0.2204227448],"action_prob":0.6944621801,"action_logp":-0.3646175861,"action_dist_inputs":[-0.4115594625,0.4095045924],"value_targets":43.0398788452} +{"eps_id":1050046105,"obs":[-0.2783543468,0.1750419587,0.043110799,-0.2204227448],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2748534977,-0.0206688587,0.0387023427,0.0855413675],"action_prob":0.6535315514,"action_logp":-0.4253644943,"action_dist_inputs":[0.3147770762,-0.319821924],"value_targets":42.4645233154} +{"eps_id":1050046105,"obs":[-0.2748534977,-0.0206688587,0.0387023427,0.0855413675],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2752668858,-0.2163235992,0.0404131711,0.3901793361],"action_prob":0.2815421224,"action_logp":-1.2674732208,"action_dist_inputs":[-0.4693239331,0.4675011635],"value_targets":41.8833580017} +{"eps_id":1050046105,"obs":[-0.2752668858,-0.2163235992,0.0404131711,0.3901793361],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2795933485,-0.0217978302,0.0482167564,0.1105071828],"action_prob":0.8781659007,"action_logp":-0.1299197227,"action_dist_inputs":[-0.9867274761,0.988447547],"value_targets":41.2963218689} +{"eps_id":1050046105,"obs":[-0.2795933485,-0.0217978302,0.0482167564,0.1105071828],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2800292969,0.172601223,0.0504269004,-0.1665822715],"action_prob":0.7529322505,"action_logp":-0.2837800086,"action_dist_inputs":[-0.5579360723,0.5563768744],"value_targets":40.7033538818} +{"eps_id":1050046105,"obs":[-0.2800292969,0.172601223,0.0504269004,-0.1665822715],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2765772939,-0.0232049301,0.0470952578,0.1415728331],"action_prob":0.5555010438,"action_logp":-0.5878847837,"action_dist_inputs":[0.1091373563,-0.1137854084],"value_targets":40.1044006348} +{"eps_id":1050046105,"obs":[-0.2765772939,-0.0232049301,0.0470952578,0.1415728331],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2770413756,0.1712120026,0.0499267131,-0.1358883232],"action_prob":0.7760725021,"action_logp":-0.2535093427,"action_dist_inputs":[-0.6220853925,0.6208384633],"value_targets":39.4993934631} +{"eps_id":1050046105,"obs":[-0.2770413756,0.1712120026,0.0499267131,-0.1358883232],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2736171484,-0.0245882068,0.0472089462,0.1721183658],"action_prob":0.5085395575,"action_logp":-0.6762122512,"action_dist_inputs":[0.0148820467,-0.0192796104],"value_targets":38.8882751465} +{"eps_id":1050046105,"obs":[-0.2736171484,-0.0245882068,0.0472089462,0.1721183658],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2741089165,0.1698273867,0.0506513156,-0.1053058356],"action_prob":0.7966098785,"action_logp":-0.2273901999,"action_dist_inputs":[-0.6830776334,0.6821615696],"value_targets":38.2709846497} +{"eps_id":1050046105,"obs":[-0.2741089165,0.1698273867,0.0506513156,-0.1053058356],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2707123458,0.3641882539,0.0485451967,-0.3815881312],"action_prob":0.5396225452,"action_logp":-0.6168854237,"action_dist_inputs":[-0.081471771,0.0773514062],"value_targets":37.6474609375} +{"eps_id":1050046105,"obs":[-0.2707123458,0.3641882539,0.0485451967,-0.3815881312],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2634285986,0.1684118211,0.0409134328,-0.0740027428],"action_prob":0.7817983031,"action_logp":-0.2461585253,"action_dist_inputs":[0.6347337365,-0.6414430737],"value_targets":37.0176353455} +{"eps_id":1050046105,"obs":[-0.2634285986,0.1684118211,0.0409134328,-0.0740027428],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2600603402,0.3629240692,0.0394333787,-0.3535017669],"action_prob":0.5665557384,"action_logp":-0.5681797862,"action_dist_inputs":[-0.1358240396,0.1319882274],"value_targets":36.3814506531} +{"eps_id":1050046105,"obs":[-0.2600603402,0.3629240692,0.0394333787,-0.3535017669],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2528018653,0.167264238,0.032363344,-0.0486497544],"action_prob":0.7667326331,"action_logp":-0.2656171024,"action_dist_inputs":[0.5917012095,-0.5982517004],"value_targets":35.7388381958} +{"eps_id":1050046105,"obs":[-0.2528018653,0.167264238,0.032363344,-0.0486497544],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2494565845,0.3619075418,0.0313903503,-0.3309486508],"action_prob":0.5866357088,"action_logp":-0.5333512425,"action_dist_inputs":[-0.1768292189,0.1732456386],"value_targets":35.0897369385} +{"eps_id":1050046105,"obs":[-0.2494565845,0.3619075418,0.0313903503,-0.3309486508],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2422184348,0.1663531363,0.0247713756,-0.0285343286],"action_prob":0.7550904751,"action_logp":-0.2809177339,"action_dist_inputs":[0.5597726703,-0.5661759377],"value_targets":34.4340782166} +{"eps_id":1050046105,"obs":[-0.2422184348,0.1663531363,0.0247713756,-0.0285343286],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.238891378,-0.0291151274,0.024200689,0.2718601525],"action_prob":0.3991625011,"action_logp":-0.9183866978,"action_dist_inputs":[-0.2061602622,0.2027955949],"value_targets":33.7717971802} +{"eps_id":1050046105,"obs":[-0.238891378,-0.0291151274,0.024200689,0.2718601525],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2394736707,0.1656532884,0.0296378937,-0.0130926007],"action_prob":0.8342247605,"action_logp":-0.1812524199,"action_dist_inputs":[-0.807836771,0.8080334663],"value_targets":33.1028251648} +{"eps_id":1050046105,"obs":[-0.2394736707,0.1656532884,0.0296378937,-0.0130926007],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2361606061,0.3603379428,0.0293760411,-0.296279043],"action_prob":0.6301693916,"action_logp":-0.4617666602,"action_dist_inputs":[-0.2680419683,0.2649016082],"value_targets":32.4270935059} +{"eps_id":1050046105,"obs":[-0.2361606061,0.3603379428,0.0293760411,-0.296279043],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2289538532,0.1648097783,0.0234504603,0.0055219401],"action_prob":0.717756331,"action_logp":-0.331625104,"action_dist_inputs":[0.4636127055,-0.4697467089],"value_targets":31.7445411682} +{"eps_id":1050046105,"obs":[-0.2289538532,0.1648097783,0.0234504603,0.0055219401],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2256576568,0.3595876992,0.0235608984,-0.2796708047],"action_prob":0.6438056827,"action_logp":-0.4403583109,"action_dist_inputs":[-0.2974163294,0.2945042253],"value_targets":31.0550918579} +{"eps_id":1050046105,"obs":[-0.2256576568,0.3595876992,0.0235608984,-0.2796708047],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2184658945,0.1641377211,0.017967483,0.020349063],"action_prob":0.7074712515,"action_logp":-0.3460583091,"action_dist_inputs":[0.4385734499,-0.4445606172],"value_targets":30.3586788177} +{"eps_id":1050046105,"obs":[-0.2184658945,0.1641377211,0.017967483,0.020349063],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2151831537,-0.0312372334,0.0183744635,0.3186463416],"action_prob":0.3463279903,"action_logp":-1.0603690147,"action_dist_inputs":[-0.3189657331,0.3162538707],"value_targets":29.6552295685} +{"eps_id":1050046105,"obs":[-0.2151831537,-0.0312372334,0.0183744635,0.3186463416],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2158078849,0.1636182666,0.0247473903,0.0318142213],"action_prob":0.8510218263,"action_logp":-0.1613175124,"action_dist_inputs":[-0.8708922863,0.8717452884],"value_targets":28.9446773529} +{"eps_id":1050046105,"obs":[-0.2158078849,0.1636182666,0.0247473903,0.0318142213],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2125355303,-0.0318496712,0.0253836755,0.3322013319],"action_prob":0.3221144378,"action_logp":-1.132848382,"action_dist_inputs":[-0.3732809126,0.3707906902],"value_targets":28.2269458771} +{"eps_id":1050046105,"obs":[-0.2125355303,-0.0318496712,0.0253836755,0.3322013319],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2131725252,-0.2273235619,0.0320277028,0.6327797174],"action_prob":0.1412731409,"action_logp":-1.9570600986,"action_dist_inputs":[-0.9018250108,0.9029306769],"value_targets":27.5019664764} +{"eps_id":1050046105,"obs":[-0.2131725252,-0.2273235619,0.0320277028,0.6327797174],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2177189887,-0.032662712,0.0446832962,0.3503527343],"action_prob":0.9135007858,"action_logp":-0.090471074,"action_dist_inputs":[-1.1763569117,1.1807910204],"value_targets":26.7696628571} +{"eps_id":1050046105,"obs":[-0.2177189887,-0.032662712,0.0446832962,0.3503527343],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2183722407,0.1617962271,0.0516903512,0.0720879808],"action_prob":0.8703018427,"action_logp":-0.1389151812,"action_dist_inputs":[-0.9510756135,0.9525543451],"value_targets":26.0299625397} +{"eps_id":1050046105,"obs":[-0.2183722407,0.1617962271,0.0516903512,0.0720879808],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2151363194,0.3561404943,0.0531321093,-0.2038488388],"action_prob":0.7496832609,"action_logp":-0.2881044745,"action_dist_inputs":[-0.5493389964,0.5475848317],"value_targets":25.2827911377} +{"eps_id":1050046105,"obs":[-0.2151363194,0.3561404943,0.0531321093,-0.2038488388],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2080135047,0.5504639149,0.049055133,-0.4793091714],"action_prob":0.4621636868,"action_logp":-0.7718361616,"action_dist_inputs":[0.0732518435,-0.0783833414],"value_targets":24.5280704498} +{"eps_id":1050046105,"obs":[-0.2080135047,0.5504639149,0.049055133,-0.4793091714],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1970042288,0.3546850085,0.0394689515,-0.1715776622],"action_prob":0.8186110854,"action_logp":-0.2001461983,"action_dist_inputs":[0.7497680783,-0.7571973205],"value_targets":23.7657279968} +{"eps_id":1050046105,"obs":[-0.1970042288,0.3546850085,0.0394689515,-0.1715776622],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.189910531,0.5492205024,0.0360373966,-0.4515527487],"action_prob":0.4829119146,"action_logp":-0.7279210091,"action_dist_inputs":[0.031792324,-0.0365866721],"value_targets":22.9956855774} +{"eps_id":1050046105,"obs":[-0.189910531,0.5492205024,0.0360373966,-0.4515527487],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1789261252,0.353607893,0.0270063411,-0.1477316767],"action_prob":0.8120085001,"action_logp":-0.2082444876,"action_dist_inputs":[0.7279286385,-0.735185504],"value_targets":22.2178649902} +{"eps_id":1050046105,"obs":[-0.1789261252,0.353607893,0.0270063411,-0.1477316767],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1718539596,0.1581098288,0.0240517072,0.1533473879],"action_prob":0.506544292,"action_logp":-0.6801434755,"action_dist_inputs":[0.010832997,-0.0153457904],"value_targets":21.4321861267} +{"eps_id":1050046105,"obs":[-0.1718539596,0.1581098288,0.0240517072,0.1533473879],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1686917692,0.3528792858,0.0271186568,-0.131651774],"action_prob":0.7844457626,"action_logp":-0.2427778542,"action_dist_inputs":[-0.6462154984,0.6455494165],"value_targets":20.6385707855} +{"eps_id":1050046105,"obs":[-0.1686917692,0.3528792858,0.0271186568,-0.131651774],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.161634177,0.5476024747,0.0244856197,-0.4156572819],"action_prob":0.5179526806,"action_logp":-0.6578713655,"action_dist_inputs":[-0.0380420871,0.0337995589],"value_targets":19.8369407654} +{"eps_id":1050046105,"obs":[-0.161634177,0.5476024747,0.0244856197,-0.4156572819],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1506821364,0.3521422148,0.0161724743,-0.1153566986],"action_prob":0.7961643934,"action_logp":-0.2279495597,"action_dist_inputs":[0.6777696013,-0.6847225428],"value_targets":19.0272140503} +{"eps_id":1050046105,"obs":[-0.1506821364,0.3521422148,0.0161724743,-0.1153566986],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1436392814,0.54702878,0.0138653405,-0.4028937817],"action_prob":0.5203025341,"action_logp":-0.65334481,"action_dist_inputs":[-0.0426324345,0.0386223979],"value_targets":18.2093067169} +{"eps_id":1050046105,"obs":[-0.1436392814,0.54702878,0.0138653405,-0.4028937817],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1326987147,0.3517129123,0.0058074654,-0.1058719084],"action_prob":0.7995255589,"action_logp":-0.2237367481,"action_dist_inputs":[0.6882653832,-0.695066452],"value_targets":17.3831367493} +{"eps_id":1050046105,"obs":[-0.1326987147,0.3517129123,0.0058074654,-0.1058719084],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1256644577,0.5467511415,0.0036900272,-0.3967169523],"action_prob":0.5134273767,"action_logp":-0.666646719,"action_dist_inputs":[-0.0287824385,0.0249399804],"value_targets":16.5486240387} +{"eps_id":1050046105,"obs":[-0.1256644577,0.5467511415,0.0036900272,-0.3967169523],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1147294343,0.7418205738,-0.0042443122,-0.68823421],"action_prob":0.1916655898,"action_logp":-1.6520031691,"action_dist_inputs":[0.7162767649,-0.7229468822],"value_targets":15.7056808472} +{"eps_id":1050046105,"obs":[-0.1147294343,0.7418205738,-0.0042443122,-0.68823421],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0998930186,0.5467577577,-0.0180089958,-0.3968904912],"action_prob":0.9242308736,"action_logp":-0.0787933469,"action_dist_inputs":[1.2465296984,-1.2547413111],"value_targets":14.8542232513} +{"eps_id":1050046105,"obs":[-0.0998930186,0.5467577577,-0.0180089958,-0.3968904912],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0889578611,0.3518958986,-0.0259468053,-0.1099394336],"action_prob":0.8358060122,"action_logp":-0.1793587357,"action_dist_inputs":[0.8103988171,-0.8169493675],"value_targets":13.9941644669} +{"eps_id":1050046105,"obs":[-0.0889578611,0.3518958986,-0.0259468053,-0.1099394336],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0819199458,0.1571552008,-0.0281455945,0.1744458079],"action_prob":0.562677443,"action_logp":-0.5750487447,"action_dist_inputs":[0.1241277456,-0.1279077381],"value_targets":13.125418663} +{"eps_id":1050046105,"obs":[-0.0819199458,0.1571552008,-0.0281455945,0.1744458079],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0787768438,-0.0375528745,-0.0246566776,0.4581185281],"action_prob":0.2426569164,"action_logp":-1.4161067009,"action_dist_inputs":[-0.5691313148,0.5690364838],"value_targets":12.2478981018} +{"eps_id":1050046105,"obs":[-0.0787768438,-0.0375528745,-0.0246566776,0.4581185281],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0795278996,0.1579088122,-0.0154943075,0.1577668041],"action_prob":0.8838064075,"action_logp":-0.1235172227,"action_dist_inputs":[-1.0129560232,1.016024828],"value_targets":11.3615131378} +{"eps_id":1050046105,"obs":[-0.0795278996,0.1579088122,-0.0154943075,0.1577668041],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0763697252,0.3532491326,-0.0123389717,-0.1397636533],"action_prob":0.7586507797,"action_logp":-0.2762137055,"action_dist_inputs":[-0.5726975799,0.5725992322],"value_targets":10.4661741257} +{"eps_id":1050046105,"obs":[-0.0763697252,0.3532491326,-0.0123389717,-0.1397636533],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0693047419,0.1583060473,-0.0151342452,0.1490011364],"action_prob":0.5820481181,"action_logp":-0.5412021279,"action_dist_inputs":[0.163692385,-0.1674944013],"value_targets":9.5617923737} +{"eps_id":1050046105,"obs":[-0.0693047419,0.1583060473,-0.0151342452,0.1490011364],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0661386177,0.3536414206,-0.0121542225,-0.1484176517],"action_prob":0.7527098656,"action_logp":-0.2840754688,"action_dist_inputs":[-0.5565992594,0.5565182567],"value_targets":8.6482753754} +{"eps_id":1050046105,"obs":[-0.0661386177,0.3536414206,-0.0121542225,-0.1484176517],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0590657927,0.5489352942,-0.0151225757,-0.4449100792],"action_prob":0.4035651386,"action_logp":-0.907417357,"action_dist_inputs":[0.1934280843,-0.1972039938],"value_targets":7.7255306244} +{"eps_id":1050046105,"obs":[-0.0590657927,0.5489352942,-0.0151225757,-0.4449100792],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0480870865,0.3540305197,-0.0240207762,-0.1570322365],"action_prob":0.8670648336,"action_logp":-0.1426414996,"action_dist_inputs":[0.9344383478,-0.9408141971],"value_targets":6.7934651375} +{"eps_id":1050046105,"obs":[-0.0480870865,0.3540305197,-0.0240207762,-0.1570322365],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0410064757,0.159260571,-0.0271614213,0.1279769838],"action_prob":0.6375115514,"action_logp":-0.4501828551,"action_dist_inputs":[0.2804020345,-0.2841777205],"value_targets":5.8519849777} +{"eps_id":1050046105,"obs":[-0.0410064757,0.159260571,-0.0271614213,0.1279769838],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0378212631,0.3547608852,-0.0246018823,-0.1731497943],"action_prob":0.7185906768,"action_logp":-0.3304633498,"action_dist_inputs":[-0.4688197374,0.4686621726],"value_targets":4.9009947777} +{"eps_id":1050046105,"obs":[-0.0378212631,0.3547608852,-0.0246018823,-0.1731497943],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0307260454,0.1599995196,-0.0280648787,0.1116716191],"action_prob":0.6646106243,"action_logp":-0.4085538983,"action_dist_inputs":[0.3400496542,-0.343859762],"value_targets":3.9403989315} +{"eps_id":1050046105,"obs":[-0.0307260454,0.1599995196,-0.0280648787,0.1116716191],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0275260564,-0.034709271,-0.0258314461,0.3953697979],"action_prob":0.3004277349,"action_logp":-1.202548027,"action_dist_inputs":[-0.4227510989,0.4225106835],"value_targets":2.970099926} +{"eps_id":1050046105,"obs":[-0.0275260564,-0.034709271,-0.0258314461,0.3953697979],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0282202419,0.1607694924,-0.0179240499,0.0946558788],"action_prob":0.8738145232,"action_logp":-0.134887144,"action_dist_inputs":[-0.9660459161,0.9690693617],"value_targets":1.9900000095} +{"eps_id":1050046105,"obs":[-0.0282202419,0.1607694924,-0.0179240499,0.0946558788],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":true,"new_obs":[-0.0250048507,-0.0340910293,-0.0160309318,0.3816303313],"action_prob":0.3038372993,"action_logp":-1.1912629604,"action_dist_inputs":[-0.4146879017,0.4144031107],"value_targets":1.0} +{"eps_id":1306964917,"obs":[0.0092445957,-0.0369887985,-0.0080326134,0.0393385179],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":0.0,"dones":false,"new_obs":[0.0085048191,0.1582474113,-0.0072458428,-0.2558678985],"action_prob":0.569418788,"action_logp":-0.5631391406,"action_dist_inputs":[-0.1395182312,0.1399619281],"value_targets":86.6020355225} +{"eps_id":1306964917,"obs":[0.0085048191,0.1582474113,-0.0072458428,-0.2558678985],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0116697671,-0.0367703401,-0.012363201,0.0345207788],"action_prob":0.8273358345,"action_logp":-0.1895445585,"action_dist_inputs":[0.7817291617,-0.7851330638],"value_targets":86.4666976929} +{"eps_id":1306964917,"obs":[0.0116697671,-0.0367703401,-0.012363201,0.0345207788],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0109343603,0.1585267037,-0.0116727855,-0.2620370984],"action_prob":0.5489407778,"action_logp":-0.5997647047,"action_dist_inputs":[-0.098005034,0.0983868465],"value_targets":86.3300018311} +{"eps_id":1306964917,"obs":[0.0109343603,0.1585267037,-0.0116727855,-0.2620370984],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0141048944,-0.0364267007,-0.0169135276,0.0269413721],"action_prob":0.8365744352,"action_logp":-0.1784397662,"action_dist_inputs":[0.8147390485,-0.8182188869],"value_targets":86.1919174194} +{"eps_id":1306964917,"obs":[0.0141048944,-0.0364267007,-0.0169135276,0.0269413721],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0133763608,-0.2313020676,-0.0163746998,0.3142402768],"action_prob":0.47824049,"action_logp":-0.737641573,"action_dist_inputs":[-0.0434027053,0.0436903201],"value_targets":86.052444458} +{"eps_id":1306964917,"obs":[0.0133763608,-0.2313020676,-0.0163746998,0.3142402768],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0087503195,-0.0359507278,-0.0100898948,0.0164386034],"action_prob":0.8569760919,"action_logp":-0.154345274,"action_dist_inputs":[-0.8932709694,0.8971272111],"value_targets":85.9115600586} +{"eps_id":1306964917,"obs":[0.0087503195,-0.0359507278,-0.0100898948,0.0164386034],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0080313049,-0.2309265435,-0.0097611221,0.3059210479],"action_prob":0.484277606,"action_logp":-0.725096941,"action_dist_inputs":[-0.0313701928,0.0315400325],"value_targets":85.7692489624} +{"eps_id":1306964917,"obs":[0.0080313049,-0.2309265435,-0.0097611221,0.3059210479],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0034127741,-0.0356668569,-0.0036427015,0.0101757599],"action_prob":0.8565001488,"action_logp":-0.1549007744,"action_dist_inputs":[-0.8913698792,0.8951508999],"value_targets":85.62550354} +{"eps_id":1306964917,"obs":[0.0034127741,-0.0356668569,-0.0036427015,0.0101757599],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0026994371,-0.2307363749,-0.0034391864,0.3017071486],"action_prob":0.4820455909,"action_logp":-0.729716599,"action_dist_inputs":[-0.0358759426,0.0359725915],"value_targets":85.4803085327} +{"eps_id":1306964917,"obs":[0.0026994371,-0.2307363749,-0.0034391864,0.3017071486],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0019152906,-0.0355655774,0.0025949567,0.0079415608],"action_prob":0.8585451841,"action_logp":-0.1525159776,"action_dist_inputs":[-0.8997585177,0.9035005569],"value_targets":85.3336486816} +{"eps_id":1306964917,"obs":[-0.0019152906,-0.0355655774,0.0025949567,0.0079415608],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0026266021,0.1595190614,0.0027537879,-0.28392151],"action_prob":0.5283808112,"action_logp":-0.6379380226,"action_dist_inputs":[-0.0567892194,0.0568561703],"value_targets":85.1855010986} +{"eps_id":1306964917,"obs":[-0.0026266021,0.1595190614,0.0027537879,-0.28392151],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0005637791,-0.0356420577,-0.0029246421,0.0096286852],"action_prob":0.8407414556,"action_logp":-0.1734710932,"action_dist_inputs":[0.8300230503,-0.8337324262],"value_targets":85.0358581543} +{"eps_id":1306964917,"obs":[0.0005637791,-0.0356420577,-0.0029246421,0.0096286852],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0001490621,0.1595217139,-0.0027320683,-0.2839755714],"action_prob":0.5188003182,"action_logp":-0.6562361717,"action_dist_inputs":[-0.037583936,0.037652839],"value_targets":84.8847045898} +{"eps_id":1306964917,"obs":[-0.0001490621,0.1595217139,-0.0027320683,-0.2839755714],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0030413722,-0.035561163,-0.0084115798,0.0078444323],"action_prob":0.8454321623,"action_logp":-0.1679073274,"action_dist_inputs":[0.8477421403,-0.851472795],"value_targets":84.7320251465} +{"eps_id":1306964917,"obs":[0.0030413722,-0.035561163,-0.0084115798,0.0078444323],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0023301488,-0.2305614799,-0.008254691,0.2978615761],"action_prob":0.498223871,"action_logp":-0.6967057586,"action_dist_inputs":[-0.0035360758,0.003568369],"value_targets":84.5778045654} +{"eps_id":1306964917,"obs":[0.0023301488,-0.2305614799,-0.008254691,0.2978615761],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0022810807,-0.0353228338,-0.0022974594,0.0025867424],"action_prob":0.8521544337,"action_logp":-0.1599875391,"action_dist_inputs":[-0.8739657998,0.8776332736],"value_targets":84.4220275879} +{"eps_id":1306964917,"obs":[-0.0022810807,-0.0353228338,-0.0022974594,0.0025867424],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0029875373,0.1598319858,-0.0022457244,-0.2908201814],"action_prob":0.5054001212,"action_logp":-0.6824048162,"action_dist_inputs":[-0.0108166356,0.0107846586],"value_targets":84.2646713257} +{"eps_id":1306964917,"obs":[-0.0029875373,0.1598319858,-0.0022457244,-0.2908201814],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0002091024,-0.0352578685,-0.008062128,0.0011536317],"action_prob":0.8498744369,"action_logp":-0.1626666933,"action_dist_inputs":[0.864897728,-0.8687186241],"value_targets":84.1057281494} +{"eps_id":1306964917,"obs":[0.0002091024,-0.0352578685,-0.008062128,0.0011536317],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0004960549,0.1599787772,-0.0080390554,-0.2940620482],"action_prob":0.488483876,"action_logp":-0.7164488435,"action_dist_inputs":[0.0230029859,-0.023069758],"value_targets":83.9451828003} +{"eps_id":1306964917,"obs":[-0.0004960549,0.1599787772,-0.0080390554,-0.2940620482],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0027035205,-0.0350276493,-0.013920296,-0.0039253295],"action_prob":0.8564612269,"action_logp":-0.154946208,"action_dist_inputs":[0.8911631703,-0.8950411081],"value_targets":83.7830123901} +{"eps_id":1306964917,"obs":[0.0027035205,-0.0350276493,-0.013920296,-0.0039253295],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0020029675,0.1602911502,-0.0139988028,-0.3009675741],"action_prob":0.4635952711,"action_logp":-0.7687433958,"action_dist_inputs":[0.0728666931,-0.0730103478],"value_targets":83.6192016602} +{"eps_id":1306964917,"obs":[0.0020029675,0.1602911502,-0.0139988028,-0.3009675741],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0052087903,-0.0346285105,-0.0200181548,-0.012732246],"action_prob":0.8648462892,"action_logp":-0.1452034563,"action_dist_inputs":[0.926082015,-0.93005687],"value_targets":83.453742981} +{"eps_id":1306964917,"obs":[0.0052087903,-0.0346285105,-0.0200181548,-0.012732246],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0045162202,0.1607747227,-0.0202727988,-0.3116632998],"action_prob":0.4306706786,"action_logp":-0.8424115777,"action_dist_inputs":[0.1394247562,-0.1396905631],"value_targets":83.286605835} +{"eps_id":1306964917,"obs":[0.0045162202,0.1607747227,-0.0202727988,-0.3116632998],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0077317148,-0.0340526327,-0.0265060663,-0.0254421309],"action_prob":0.8745412827,"action_logp":-0.1340557635,"action_dist_inputs":[0.968804419,-0.9729182124],"value_targets":83.1177825928} +{"eps_id":1306964917,"obs":[0.0077317148,-0.0340526327,-0.0265060663,-0.0254421309],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0070506618,0.1614392102,-0.0270149074,-0.3263687789],"action_prob":0.390116632,"action_logp":-0.9413095117,"action_dist_inputs":[0.223191753,-0.2236302197],"value_targets":82.9472579956} +{"eps_id":1306964917,"obs":[0.0070506618,0.1614392102,-0.0270149074,-0.3263687789],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0102794459,-0.0332879052,-0.0335422829,-0.0423260853],"action_prob":0.884988904,"action_logp":-0.1221801788,"action_dist_inputs":[1.0181236267,-1.0224231482],"value_targets":82.7750091553} +{"eps_id":1306964917,"obs":[0.0102794459,-0.0332879052,-0.0335422829,-0.0423260853],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0096136881,-0.2279132158,-0.0343888067,0.2395880818],"action_prob":0.6567715406,"action_logp":-0.4204190373,"action_dist_inputs":[0.3241351843,-0.3248049021],"value_targets":82.601020813} +{"eps_id":1306964917,"obs":[0.0096136881,-0.2279132158,-0.0343888067,0.2395880818],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0050554238,-0.0323173068,-0.029597044,-0.063740626],"action_prob":0.7650242448,"action_logp":-0.2678477466,"action_dist_inputs":[-0.5887040496,0.5917211175],"value_targets":82.4252700806} +{"eps_id":1306964917,"obs":[0.0050554238,-0.0323173068,-0.029597044,-0.063740626],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0044090776,-0.2270026803,-0.0308718551,0.2194592804],"action_prob":0.6862698197,"action_logp":-0.3764844239,"action_dist_inputs":[0.3908977211,-0.3918397725],"value_targets":82.2477493286} +{"eps_id":1306964917,"obs":[0.0044090776,-0.2270026803,-0.0308718551,0.2194592804],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.000130976,-0.0314533524,-0.0264826696,-0.0827998295],"action_prob":0.7429757714,"action_logp":-0.2970918417,"action_dist_inputs":[-0.5293431282,0.5321497321],"value_targets":82.0684280396} +{"eps_id":1306964917,"obs":[-0.000130976,-0.0314533524,-0.0264826696,-0.0827998295],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.000760043,0.1640380174,-0.0281386673,-0.3837190568],"action_prob":0.2887879908,"action_logp":-1.2420624495,"action_dist_inputs":[0.4500417113,-0.451236099],"value_targets":81.8873062134} +{"eps_id":1306964917,"obs":[-0.000760043,0.1640380174,-0.0281386673,-0.3837190568],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0025207172,-0.0306733623,-0.0358130485,-0.100039281],"action_prob":0.9054784775,"action_logp":-0.0992917642,"action_dist_inputs":[1.1273428202,-1.1322927475],"value_targets":81.7043457031} +{"eps_id":1306964917,"obs":[0.0025207172,-0.0306733623,-0.0358130485,-0.100039281],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0019072501,-0.2252642363,-0.0378138348,0.1811330765],"action_prob":0.7506662607,"action_logp":-0.2867941558,"action_dist_inputs":[0.550352037,-0.5518165231],"value_targets":81.5195465088} +{"eps_id":1306964917,"obs":[0.0019072501,-0.2252642363,-0.0378138348,0.1811330765],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0025980347,-0.0296221655,-0.0341911726,-0.1232348606],"action_prob":0.6681807041,"action_logp":-0.4031966031,"action_dist_inputs":[-0.3487960696,0.3511720896],"value_targets":81.3328704834} +{"eps_id":1306964917,"obs":[-0.0025980347,-0.0296221655,-0.0341911726,-0.1232348606],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0031904778,-0.2242380083,-0.0366558693,0.1584680527],"action_prob":0.7768839002,"action_logp":-0.2524643838,"action_dist_inputs":[0.6229067445,-0.6246919036],"value_targets":81.144317627} +{"eps_id":1306964917,"obs":[-0.0031904778,-0.2242380083,-0.0366558693,0.1584680527],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0076752384,-0.0286109559,-0.0334865078,-0.1455498338],"action_prob":0.6285348535,"action_logp":-0.4643638432,"action_dist_inputs":[-0.2619122565,0.2640240192],"value_targets":80.9538574219} +{"eps_id":1306964917,"obs":[-0.0076752384,-0.0286109559,-0.0334865078,-0.1455498338],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0082474574,-0.2232377529,-0.0363975056,0.1363835037],"action_prob":0.7998046279,"action_logp":-0.2233877629,"action_dist_inputs":[0.6914852858,-0.6935886741],"value_targets":80.76146698} +{"eps_id":1306964917,"obs":[-0.0082474574,-0.2232377529,-0.0363975056,0.1363835037],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0127122123,-0.0276138801,-0.0336698368,-0.1675564647],"action_prob":0.5855512023,"action_logp":-0.5352016687,"action_dist_inputs":[-0.1718829125,0.173721239],"value_targets":80.5671386719} +{"eps_id":1306964917,"obs":[-0.0127122123,-0.0276138801,-0.0336698368,-0.1675564647],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0132644903,-0.2222381085,-0.0370209627,0.1143173501],"action_prob":0.8201587796,"action_logp":-0.1982572973,"action_dist_inputs":[0.7574996948,-0.7599240541],"value_targets":80.3708496094} +{"eps_id":1306964917,"obs":[-0.0132644903,-0.2222381085,-0.0370209627,0.1143173501],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0177092515,-0.0266057868,-0.0347346179,-0.1898117363],"action_prob":0.5389013886,"action_logp":-0.6182226539,"action_dist_inputs":[-0.0771863312,0.0787342861],"value_targets":80.1725769043} +{"eps_id":1306964917,"obs":[-0.0177092515,-0.0266057868,-0.0347346179,-0.1898117363],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0182413682,-0.2212140411,-0.0385308526,0.0917146429],"action_prob":0.8384504914,"action_logp":-0.1761997789,"action_dist_inputs":[0.8219949007,-0.8247491717],"value_targets":79.9722976685} +{"eps_id":1306964917,"obs":[-0.0182413682,-0.2212140411,-0.0385308526,0.0917146429],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0226656478,-0.0255616065,-0.0366965607,-0.2128714472],"action_prob":0.4885109663,"action_logp":-0.7163933516,"action_dist_inputs":[0.0235979725,-0.0223662034],"value_targets":79.7699966431} +{"eps_id":1306964917,"obs":[-0.0226656478,-0.0255616065,-0.0366965607,-0.2128714472],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0231768806,-0.2201402038,-0.0409539863,0.0680135861],"action_prob":0.8550024629,"action_logp":-0.1566509157,"action_dist_inputs":[0.8856448531,-0.8887429833],"value_targets":79.5656509399} +{"eps_id":1306964917,"obs":[-0.0231768806,-0.2201402038,-0.0409539863,0.0680135861],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0275796857,-0.0244557578,-0.0395937152,-0.2373040766],"action_prob":0.4347888231,"action_logp":-0.8328948021,"action_dist_inputs":[0.1316091865,-0.1307297647],"value_targets":79.3592453003} +{"eps_id":1306964917,"obs":[-0.0275796857,-0.0244557578,-0.0395937152,-0.2373040766],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0280687995,-0.2189903408,-0.0443397984,0.0426316001],"action_prob":0.8699986935,"action_logp":-0.1392636001,"action_dist_inputs":[0.9487427473,-0.9522038698],"value_targets":79.1507568359} +{"eps_id":1306964917,"obs":[-0.0280687995,-0.2189903408,-0.0443397984,0.0426316001],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0324486084,-0.413449347,-0.0434871651,0.3210017979],"action_prob":0.6211539507,"action_logp":-0.4761762917,"action_dist_inputs":[0.2474641204,-0.2469849885],"value_targets":78.9401550293} +{"eps_id":1306964917,"obs":[-0.0324486084,-0.413449347,-0.0434871651,0.3210017979],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0407175943,-0.2177359462,-0.0370671302,0.0149280494],"action_prob":0.7762226462,"action_logp":-0.253315866,"action_dist_inputs":[-0.6198958755,0.6238921285],"value_targets":78.727432251} +{"eps_id":1306964917,"obs":[-0.0407175943,-0.2177359462,-0.0370671302,0.0149280494],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0450723134,-0.4123072624,-0.0367685705,0.2956892848],"action_prob":0.655618608,"action_logp":-0.4221760333,"action_dist_inputs":[0.321947962,-0.3218814731],"value_targets":78.5125579834} +{"eps_id":1306964917,"obs":[-0.0450723134,-0.4123072624,-0.0367685705,0.2956892848],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0533184595,-0.6068862677,-0.030854784,0.5765529871],"action_prob":0.2460075915,"action_logp":-1.4023928642,"action_dist_inputs":[-0.5581468344,0.561873138],"value_targets":78.2955093384} +{"eps_id":1306964917,"obs":[-0.0533184595,-0.6068862677,-0.030854784,0.5765529871],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0654561818,-0.4113457203,-0.0193237234,0.2743119001],"action_prob":0.9109876752,"action_logp":-0.0932258815,"action_dist_inputs":[-1.1597534418,1.1660013199],"value_targets":78.0762710571} +{"eps_id":1306964917,"obs":[-0.0654561818,-0.4113457203,-0.0193237234,0.2743119001],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.073683098,-0.6061866879,-0.0138374856,0.5608379841],"action_prob":0.2471003383,"action_logp":-1.3979607821,"action_dist_inputs":[-0.5553324819,0.5588050485],"value_targets":77.8548202515} +{"eps_id":1306964917,"obs":[-0.073683098,-0.6061866879,-0.0138374856,0.5608379841],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0858068317,-0.4108732939,-0.0026207254,0.2638278604],"action_prob":0.912432611,"action_logp":-0.0916410461,"action_dist_inputs":[-1.1687937975,1.174911499],"value_targets":77.6311340332} +{"eps_id":1306964917,"obs":[-0.0858068317,-0.4108732939,-0.0026207254,0.2638278604],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0940243006,-0.2157140374,0.0026558319,-0.029680511],"action_prob":0.7652036548,"action_logp":-0.2676132321,"action_dist_inputs":[-0.589060545,0.5923631787],"value_targets":77.4051818848} +{"eps_id":1306964917,"obs":[-0.0940243006,-0.2157140374,0.0026558319,-0.029680511],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0983385742,-0.4108739793,0.0020622218,0.2638391852],"action_prob":0.6531373262,"action_logp":-0.4259678423,"action_dist_inputs":[0.3161211014,-0.3167373836],"value_targets":77.1769561768} +{"eps_id":1306964917,"obs":[-0.0983385742,-0.4108739793,0.0020622218,0.2638391852],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1065560579,-0.6060252786,0.0073390054,0.5571718216],"action_prob":0.2273000628,"action_logp":-1.4814842939,"action_dist_inputs":[-0.6101995111,0.6134203076],"value_targets":76.9464187622} +{"eps_id":1306964917,"obs":[-0.1065560579,-0.6060252786,0.0073390054,0.5571718216],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1186765656,-0.4110071361,0.0184824411,0.2668101192],"action_prob":0.918171823,"action_logp":-0.085370712,"action_dist_inputs":[-1.2058718204,1.2118914127],"value_targets":76.7135543823} +{"eps_id":1306964917,"obs":[-0.1186765656,-0.4110071361,0.0184824411,0.2668101192],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1268967092,-0.2161537856,0.0238186438,-0.0199864674],"action_prob":0.7986184955,"action_logp":-0.2248719335,"action_dist_inputs":[-0.6872579455,0.6904242039],"value_targets":76.4783401489} +{"eps_id":1306964917,"obs":[-0.1268967092,-0.2161537856,0.0238186438,-0.0199864674],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1312197745,-0.0213813707,0.0234189145,-0.3050602078],"action_prob":0.4121302366,"action_logp":-0.8864158392,"action_dist_inputs":[0.1772902906,-0.1778757125],"value_targets":76.2407455444} +{"eps_id":1306964917,"obs":[-0.1312197745,-0.0213813707,0.0234189145,-0.3050602078],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.131647408,-0.2168290913,0.0173177104,-0.0050844713],"action_prob":0.868245244,"action_logp":-0.1412810534,"action_dist_inputs":[0.9405239224,-0.9450080991],"value_targets":76.0007553101} +{"eps_id":1306964917,"obs":[-0.131647408,-0.2168290913,0.0173177104,-0.0050844713],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1359839886,-0.4121950567,0.0172160212,0.2930116951],"action_prob":0.571614027,"action_logp":-0.5592913032,"action_dist_inputs":[0.1439723074,-0.1444671005],"value_targets":75.7583389282} +{"eps_id":1306964917,"obs":[-0.1359839886,-0.4121950567,0.0172160212,0.2930116951],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1442278922,-0.6075581908,0.0230762549,0.5910742283],"action_prob":0.1761717647,"action_logp":-1.7362958193,"action_dist_inputs":[-0.7696331143,0.7728695869],"value_targets":75.5134735107} +{"eps_id":1306964917,"obs":[-0.1442278922,-0.6075581908,0.0230762549,0.5910742283],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.156379059,-0.4127667844,0.0348977409,0.3057487309],"action_prob":0.9285705686,"action_logp":-0.0741089061,"action_dist_inputs":[-1.279389739,1.285546422],"value_targets":75.26612854} +{"eps_id":1306964917,"obs":[-0.156379059,-0.4127667844,0.0348977409,0.3057487309],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1646343917,-0.2181590796,0.0410127155,0.0242727492],"action_prob":0.8502894044,"action_logp":-0.1621785462,"action_dist_inputs":[-0.8667903543,0.8700823188],"value_targets":75.0162963867} +{"eps_id":1306964917,"obs":[-0.1646343917,-0.2181590796,0.0410127155,0.0242727492],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1689975709,-0.0236485526,0.0414981693,-0.2551933229],"action_prob":0.5475571156,"action_logp":-0.6022884846,"action_dist_inputs":[-0.0955304652,0.0952748805],"value_targets":74.7639312744} +{"eps_id":1306964917,"obs":[-0.1689975709,-0.0236485526,0.0414981693,-0.2551933229],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1694705486,0.1708570719,0.0363943018,-0.5345036387],"action_prob":0.1821231842,"action_logp":-1.7030719519,"action_dist_inputs":[0.7489786148,-0.7530497909],"value_targets":74.5090255737} +{"eps_id":1306964917,"obs":[-0.1694705486,0.1708570719,0.0363943018,-0.5345036387],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1660533994,-0.0247572679,0.0257042311,-0.2305788547],"action_prob":0.9240037799,"action_logp":-0.0790391192,"action_dist_inputs":[1.2455314398,-1.2525012493],"value_targets":74.2515411377} +{"eps_id":1306964917,"obs":[-0.1660533994,-0.0247572679,0.0257042311,-0.2305788547],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1665485501,-0.2202369124,0.0210926533,0.0700999573],"action_prob":0.8117611408,"action_logp":-0.2085491568,"action_dist_inputs":[0.7287959456,-0.7326985598],"value_targets":73.9914550781} +{"eps_id":1306964917,"obs":[-0.1665485501,-0.2202369124,0.0210926533,0.0700999573],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1709532887,-0.0254236106,0.0224946532,-0.2158542126],"action_prob":0.5969471335,"action_logp":-0.5159267187,"action_dist_inputs":[-0.1963341832,0.1964265555],"value_targets":73.7287445068} +{"eps_id":1306964917,"obs":[-0.1709532887,-0.0254236106,0.0224946532,-0.2158542126],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.171461761,-0.2208597958,0.0181775689,0.0838387758],"action_prob":0.8004097342,"action_logp":-0.2226314843,"action_dist_inputs":[0.6925289035,-0.6963282824],"value_targets":73.4633712769} +{"eps_id":1306964917,"obs":[-0.171461761,-0.2208597958,0.0181775689,0.0838387758],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1758789569,-0.026003072,0.0198543444,-0.2030541152],"action_prob":0.6181612015,"action_logp":-0.4810060263,"action_dist_inputs":[-0.2407873422,0.2409632653],"value_targets":73.1953277588} +{"eps_id":1306964917,"obs":[-0.1758789569,-0.026003072,0.0198543444,-0.2030541152],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1763990223,0.1688293964,0.015793262,-0.4894084334],"action_prob":0.2105082273,"action_logp":-1.5582305193,"action_dist_inputs":[0.6590758562,-0.6627886891],"value_targets":72.9245758057} +{"eps_id":1306964917,"obs":[-0.1763990223,0.1688293964,0.015793262,-0.4894084334],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1730224341,-0.0265117642,0.006005093,-0.1917902082],"action_prob":0.9203673601,"action_logp":-0.0829824135,"action_dist_inputs":[1.2202731371,-1.227075696],"value_targets":72.6510848999} +{"eps_id":1306964917,"obs":[-0.1730224341,-0.0265117642,0.006005093,-0.1917902082],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1735526621,-0.2217191011,0.0021692889,0.1027810127],"action_prob":0.7942607403,"action_logp":-0.230343461,"action_dist_inputs":[0.6735647321,-0.6772373915],"value_targets":72.3748321533} +{"eps_id":1306964917,"obs":[-0.1735526621,-0.2217191011,0.0021692889,0.1027810127],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1779870391,-0.026628308,0.004224909,-0.1892167181],"action_prob":0.621245265,"action_logp":-0.4760292768,"action_dist_inputs":[-0.2472925782,0.2475447059],"value_targets":72.0957946777} +{"eps_id":1306964917,"obs":[-0.1779870391,-0.026628308,0.004224909,-0.1892167181],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1785196066,-0.2218104452,0.0004405748,0.1047959924],"action_prob":0.7931879163,"action_logp":-0.2316951007,"action_dist_inputs":[0.670278132,-0.6739717126],"value_targets":71.8139266968} +{"eps_id":1306964917,"obs":[-0.1785196066,-0.2218104452,0.0004405748,0.1047959924],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1829558164,-0.0266948137,0.0025364948,-0.1877478957],"action_prob":0.6219962239,"action_logp":-0.4748212695,"action_dist_inputs":[-0.2489055693,0.2491243482],"value_targets":71.5292205811} +{"eps_id":1306964917,"obs":[-0.1829558164,-0.0266948137,0.0025364948,-0.1877478957],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.183489725,-0.2218529582,-0.0012184633,0.1057341099],"action_prob":0.7931870222,"action_logp":-0.2316962332,"action_dist_inputs":[0.6702589393,-0.6739851236],"value_targets":71.2416381836} +{"eps_id":1306964917,"obs":[-0.183489725,-0.2218529582,-0.0012184633,0.1057341099],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1879267842,-0.0267135706,0.0008962189,-0.1873329878],"action_prob":0.6208347082,"action_logp":-0.4766904116,"action_dist_inputs":[-0.2464587539,0.2466337383],"value_targets":70.9511489868} +{"eps_id":1306964917,"obs":[-0.1879267842,-0.0267135706,0.0008962189,-0.1873329878],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1884610504,-0.2218483388,-0.0028504408,0.1056325287],"action_prob":0.7942593694,"action_logp":-0.2303451747,"action_dist_inputs":[0.6735121012,-0.6772819161],"value_targets":70.6577301025} +{"eps_id":1306964917,"obs":[-0.1884610504,-0.2218483388,-0.0028504408,0.1056325287],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1928980201,-0.0266856495,-0.0007377903,-0.1879483312],"action_prob":0.6177434921,"action_logp":-0.4816820025,"action_dist_inputs":[-0.2399302721,0.2400510311],"value_targets":70.3613433838} +{"eps_id":1306964917,"obs":[-0.1928980201,-0.0266856495,-0.0007377903,-0.1879483312],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.193431735,-0.2217970341,-0.0044967569,0.104501754],"action_prob":0.7964115739,"action_logp":-0.2276391536,"action_dist_inputs":[0.6800953746,-0.6839200854],"value_targets":70.061958313} +{"eps_id":1306964917,"obs":[-0.193431735,-0.2217970341,-0.0044967569,0.104501754],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1978676617,-0.0266109332,-0.0024067219,-0.1895964891],"action_prob":0.6126400232,"action_logp":-0.4899777472,"action_dist_inputs":[-0.229183495,0.2292396277],"value_targets":69.7595596313} +{"eps_id":1306964917,"obs":[-0.1978676617,-0.0266109332,-0.0024067219,-0.1895964891],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1983998865,-0.2216983736,-0.0061986516,0.1023262516],"action_prob":0.7996532321,"action_logp":-0.2235770971,"action_dist_inputs":[0.6901189089,-0.6940097809],"value_targets":69.4540939331} +{"eps_id":1306964917,"obs":[-0.1983998865,-0.2216983736,-0.0061986516,0.1023262516],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2028338611,-0.0264881365,-0.0041521266,-0.1923058629],"action_prob":0.6053746343,"action_logp":-0.5019077659,"action_dist_inputs":[-0.213964954,0.2139456719],"value_targets":69.1455535889} +{"eps_id":1306964917,"obs":[-0.2028338611,-0.0264881365,-0.0041521266,-0.1923058629],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2033636123,-0.2215504497,-0.0079982439,0.0990643501],"action_prob":0.8039947748,"action_logp":-0.2181625217,"action_dist_inputs":[0.7037411928,-0.7077101469],"value_targets":68.8338928223} +{"eps_id":1306964917,"obs":[-0.2033636123,-0.2215504497,-0.0079982439,0.0990643501],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2077946216,-0.416556865,-0.0060169571,0.3892131448],"action_prob":0.4042728841,"action_logp":-0.9056651592,"action_dist_inputs":[-0.1938993931,0.1937931776],"value_targets":68.5190811157} +{"eps_id":1306964917,"obs":[-0.2077946216,-0.416556865,-0.0060169571,0.3892131448],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2161257565,-0.2213500291,0.0017673061,0.0946392268],"action_prob":0.8718910217,"action_logp":-0.1370908618,"action_dist_inputs":[-0.9572172761,0.9605652094],"value_targets":68.2010955811} +{"eps_id":1306964917,"obs":[-0.2161257565,-0.2213500291,0.0017673061,0.0946392268],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2205527574,-0.0262534469,0.0036600907,-0.1974855959],"action_prob":0.609161973,"action_logp":-0.4956710339,"action_dist_inputs":[-0.2219953239,0.2217959464],"value_targets":67.8798904419} +{"eps_id":1306964917,"obs":[-0.2205527574,-0.0262534469,0.0036600907,-0.1974855959],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2210778296,-0.2214275599,-0.0002896211,0.0963496789],"action_prob":0.7986752391,"action_logp":-0.2248008996,"action_dist_inputs":[0.6869731545,-0.6910617352],"value_targets":67.5554504395} +{"eps_id":1306964917,"obs":[-0.2210778296,-0.2214275599,-0.0002896211,0.0963496789],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2255063802,-0.0263014566,0.0016373725,-0.1964246035],"action_prob":0.6086509824,"action_logp":-0.4965102673,"action_dist_inputs":[-0.2209406644,0.2207046449],"value_targets":67.227722168} +{"eps_id":1306964917,"obs":[-0.2255063802,-0.0263014566,0.0016373725,-0.1964246035],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2260324061,-0.2214467973,-0.0022911197,0.0967743844],"action_prob":0.7994223237,"action_logp":-0.2238658965,"action_dist_inputs":[0.689281404,-0.6934063435],"value_targets":66.8966903687} +{"eps_id":1306964917,"obs":[-0.2260324061,-0.2214467973,-0.0022911197,0.0967743844],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2304613441,-0.0262920763,-0.000355632,-0.1966305077],"action_prob":0.6057932377,"action_logp":-0.5012165308,"action_dist_inputs":[-0.2149742842,0.2146890163],"value_targets":66.5623168945} +{"eps_id":1306964917,"obs":[-0.2304613441,-0.0262920763,-0.000355632,-0.1966305077],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2309871912,0.1688349545,-0.0042882422,-0.4894255996],"action_prob":0.1985605955,"action_logp":-1.6166609526,"action_dist_inputs":[0.6955702305,-0.6997447014],"value_targets":66.2245635986} +{"eps_id":1306964917,"obs":[-0.2309871912,0.1688349545,-0.0042882422,-0.4894255996],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2276104987,-0.0262262337,-0.0140767535,-0.1980972439],"action_prob":0.9252874851,"action_logp":-0.0776508227,"action_dist_inputs":[1.2546049356,-1.2618519068],"value_targets":65.883392334} +{"eps_id":1306964917,"obs":[-0.2276104987,-0.0262262337,-0.0140767535,-0.1980972439],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2281350195,-0.2211440355,-0.0180386994,0.0901120827],"action_prob":0.8180985451,"action_logp":-0.200772509,"action_dist_inputs":[0.7496224642,-0.7538952827],"value_targets":65.5387802124} +{"eps_id":1306964917,"obs":[-0.2281350195,-0.2211440355,-0.0180386994,0.0901120827],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2325578928,-0.0257682409,-0.0162364561,-0.2082070261],"action_prob":0.5590968132,"action_logp":-0.5814326406,"action_dist_inputs":[-0.1189819574,0.1185154691],"value_targets":65.1906890869} +{"eps_id":1306964917,"obs":[-0.2325578928,-0.0257682409,-0.0162364561,-0.2082070261],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2330732644,-0.2206543088,-0.0204005968,0.0793102235],"action_prob":0.8287748694,"action_logp":-0.1878067404,"action_dist_inputs":[0.7862685919,-0.7907004952],"value_targets":64.8390808105} +{"eps_id":1306964917,"obs":[-0.2330732644,-0.2206543088,-0.0204005968,0.0793102235],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2374863476,-0.0252459366,-0.0188143924,-0.219738692],"action_prob":0.5326095819,"action_logp":-0.6299666166,"action_dist_inputs":[-0.0656289682,0.0649948493],"value_targets":64.4839172363} +{"eps_id":1306964917,"obs":[-0.2374863476,-0.0252459366,-0.0188143924,-0.219738692],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2379912734,-0.2200939655,-0.0232091676,0.0669505149],"action_prob":0.8400592208,"action_logp":-0.1742828786,"action_dist_inputs":[0.8270304203,-0.8316383958],"value_targets":64.1251678467} +{"eps_id":1306964917,"obs":[-0.2379912734,-0.2200939655,-0.0232091676,0.0669505149],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2423931509,-0.0246470831,-0.0218701568,-0.2329637408],"action_prob":0.5017136931,"action_logp":-0.6897256374,"action_dist_inputs":[-0.0038387205,0.0030161226],"value_targets":63.7627983093} +{"eps_id":1306964917,"obs":[-0.2423931509,-0.0246470831,-0.0218701568,-0.2329637408],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2428860813,0.1707804203,-0.0265294313,-0.5324642062],"action_prob":0.1481680423,"action_logp":-1.9094082117,"action_dist_inputs":[0.8721190691,-0.8769231439],"value_targets":63.3967666626} +{"eps_id":1306964917,"obs":[-0.2428860813,0.1707804203,-0.0265294313,-0.5324642062],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2394704819,-0.0239585489,-0.0371787138,-0.2482572794],"action_prob":0.9361636639,"action_logp":-0.0659649372,"action_dist_inputs":[1.3388450146,-1.3466233015],"value_targets":63.0270347595} +{"eps_id":1306964917,"obs":[-0.2394704819,-0.0239585489,-0.0371787138,-0.2482572794],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2399496436,0.1716741025,-0.0421438627,-0.5524314642],"action_prob":0.1280998886,"action_logp":-2.0549449921,"action_dist_inputs":[0.9563986659,-0.9614660144],"value_targets":62.6535720825} +{"eps_id":1306964917,"obs":[-0.2399496436,0.1716741025,-0.0421438627,-0.5524314642],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2365161628,-0.0228314474,-0.0531924888,-0.273318857],"action_prob":0.9404773116,"action_logp":-0.0613677613,"action_dist_inputs":[1.3760082722,-1.3840218782],"value_targets":62.2763366699} +{"eps_id":1306964917,"obs":[-0.2365161628,-0.0228314474,-0.0531924888,-0.273318857],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2369727939,-0.2171556652,-0.0586588681,0.0021239764],"action_prob":0.8920726776,"action_logp":-0.114207685,"action_dist_inputs":[1.0533252954,-1.0587641001],"value_targets":61.8952865601} +{"eps_id":1306964917,"obs":[-0.2369727939,-0.2171556652,-0.0586588681,0.0021239764],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2413159162,-0.0212436505,-0.0586163886,-0.3084744215],"action_prob":0.306071043,"action_logp":-1.1839380264,"action_dist_inputs":[0.40835917,-0.4101930559],"value_targets":61.5103912354} +{"eps_id":1306964917,"obs":[-0.2413159162,-0.0212436505,-0.0586163886,-0.3084744215],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2417407781,-0.2154835463,-0.0647858754,-0.03483833],"action_prob":0.9060826898,"action_logp":-0.0986246839,"action_dist_inputs":[1.1304255724,-1.1362903118],"value_targets":61.1216087341} +{"eps_id":1306964917,"obs":[-0.2417407781,-0.2154835463,-0.0647858754,-0.03483833],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2460504472,-0.4096195102,-0.0654826462,0.2367209792],"action_prob":0.7555286288,"action_logp":-0.2803376019,"action_dist_inputs":[0.5629612207,-0.5653584003],"value_targets":60.7288970947} +{"eps_id":1306964917,"obs":[-0.2460504472,-0.4096195102,-0.0654826462,0.2367209792],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2542428374,-0.2136261016,-0.0607482232,-0.0758771747],"action_prob":0.627188921,"action_logp":-0.4665074646,"action_dist_inputs":[-0.2594121993,0.260763824],"value_targets":60.3322181702} +{"eps_id":1306964917,"obs":[-0.2542428374,-0.2136261016,-0.0607482232,-0.0758771747],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.258515358,-0.0176882595,-0.0622657686,-0.3870908022],"action_prob":0.2043511271,"action_logp":-1.5879155397,"action_dist_inputs":[0.6781701446,-0.6811480522],"value_targets":59.9315338135} +{"eps_id":1306964917,"obs":[-0.258515358,-0.0176882595,-0.0622657686,-0.3870908022],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2588691413,0.1782597601,-0.070007585,-0.6987375617],"action_prob":0.0754712,"action_logp":-2.5840041637,"action_dist_inputs":[1.2494074106,-1.256125927],"value_targets":59.526802063} +{"eps_id":1306964917,"obs":[-0.2588691413,0.1782597601,-0.070007585,-0.6987375617],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2553039491,-0.015825294,-0.0839823335,-0.4288884997],"action_prob":0.9513737559,"action_logp":-0.049848251,"action_dist_inputs":[1.4822382927,-1.4915058613],"value_targets":59.117980957} +{"eps_id":1306964917,"obs":[-0.2553039491,-0.015825294,-0.0839823335,-0.4288884997],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2556204498,-0.2096637189,-0.092560105,-0.1638174206],"action_prob":0.9338106513,"action_logp":-0.068481572,"action_dist_inputs":[1.3197718859,-1.3269828558],"value_targets":58.7050323486} +{"eps_id":1306964917,"obs":[-0.2556204498,-0.2096637189,-0.092560105,-0.1638174206],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2598137259,-0.4033471346,-0.0958364531,0.0982902497],"action_prob":0.879322648,"action_logp":-0.1286033839,"action_dist_inputs":[0.9908201694,-0.9952109456],"value_targets":58.2879104614} +{"eps_id":1306964917,"obs":[-0.2598137259,-0.4033471346,-0.0958364531,0.0982902497],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2678806484,-0.5969742537,-0.0938706473,0.3592652977],"action_prob":0.6880148649,"action_logp":-0.3739448488,"action_dist_inputs":[0.3950945437,-0.3957603276],"value_targets":57.8665771484} +{"eps_id":1306964917,"obs":[-0.2678806484,-0.5969742537,-0.0938706473,0.3592652977],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2798201442,-0.7906451225,-0.0866853446,0.6209352016],"action_prob":0.3275915384,"action_logp":-1.1159877777,"action_dist_inputs":[-0.3581077754,0.3609906733],"value_targets":57.4409866333} +{"eps_id":1306964917,"obs":[-0.2798201442,-0.7906451225,-0.0866853446,0.6209352016],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2956330478,-0.5944263935,-0.0742666349,0.3022586703],"action_prob":0.8704974651,"action_logp":-0.1386904269,"action_dist_inputs":[-0.9500108957,0.95535326],"value_targets":57.0110969543} +{"eps_id":1306964917,"obs":[-0.2956330478,-0.5944263935,-0.0742666349,0.3022586703],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.3075215816,-0.7884156108,-0.0682214648,0.57062608],"action_prob":0.3832781613,"action_logp":-0.9589942694,"action_dist_inputs":[-0.2366987914,0.2389582545],"value_targets":56.5768661499} +{"eps_id":1306964917,"obs":[-0.3075215816,-0.7884156108,-0.0682214648,0.57062608],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.323289901,-0.9825178981,-0.0568089411,0.8410606384],"action_prob":0.1455378234,"action_logp":-1.9273192883,"action_dist_inputs":[-0.8825395107,0.8874966502],"value_targets":56.1382484436} +{"eps_id":1306964917,"obs":[-0.323289901,-0.9825178981,-0.0568089411,0.8410606384],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.3429402411,-0.7866683602,-0.039987728,0.5310673714],"action_prob":0.9253368378,"action_logp":-0.0775974318,"action_dist_inputs":[-1.255073905,1.2620981932],"value_targets":55.6952018738} +{"eps_id":1306964917,"obs":[-0.3429402411,-0.7866683602,-0.039987728,0.5310673714],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.3586736023,-0.5910074115,-0.0293663815,0.2260573357],"action_prob":0.8509344459,"action_logp":-0.1614201814,"action_dist_inputs":[-0.868671298,0.8732779026],"value_targets":55.2476768494} +{"eps_id":1306964917,"obs":[-0.3586736023,-0.5910074115,-0.0293663815,0.2260573357],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.3704937696,-0.395478338,-0.0248452332,-0.0757422745],"action_prob":0.5795463324,"action_logp":-0.545509696,"action_dist_inputs":[-0.1598089188,0.1611024439],"value_targets":54.7956352234} +{"eps_id":1306964917,"obs":[-0.3704937696,-0.395478338,-0.0248452332,-0.0757422745],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.3784033358,-0.5902354717,-0.0263600796,0.2089994997],"action_prob":0.7905658484,"action_logp":-0.2350063324,"action_dist_inputs":[0.6626579165,-0.6656816006],"value_targets":54.3390235901} +{"eps_id":1306964917,"obs":[-0.3784033358,-0.5902354717,-0.0263600796,0.2089994997],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.3902080357,-0.7849707603,-0.0221800897,0.4932520092],"action_prob":0.4419195652,"action_logp":-0.8166273832,"action_dist_inputs":[-0.1161960289,0.1171792522],"value_targets":53.8778038025} +{"eps_id":1306964917,"obs":[-0.3902080357,-0.7849707603,-0.0221800897,0.4932520092],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.4059074521,-0.5895431042,-0.0123150488,0.1936622411],"action_prob":0.8403420448,"action_logp":-0.1739462465,"action_dist_inputs":[-0.8283316493,0.8324438334],"value_targets":53.4119224548} +{"eps_id":1306964917,"obs":[-0.4059074521,-0.5895431042,-0.0123150488,0.1936622411],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.4176983237,-0.3942471743,-0.008441804,-0.1028799713],"action_prob":0.5635230541,"action_logp":-0.5735470653,"action_dist_inputs":[-0.127379179,0.1280934811],"value_targets":52.9413375854} +{"eps_id":1306964917,"obs":[-0.4176983237,-0.3942471743,-0.008441804,-0.1028799713],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.4255832434,-0.199005276,-0.0104994038,-0.3982142508],"action_prob":0.2068388164,"action_logp":-1.5758154392,"action_dist_inputs":[0.6703208685,-0.6737658381],"value_targets":52.4659957886} +{"eps_id":1306964917,"obs":[-0.4255832434,-0.199005276,-0.0104994038,-0.3982142508],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.4295633733,-0.3939767182,-0.018463688,-0.1088599935],"action_prob":0.9165567756,"action_logp":-0.0871312767,"action_dist_inputs":[1.1947926283,-1.2016648054],"value_targets":51.9858551025} +{"eps_id":1306964917,"obs":[-0.4295633733,-0.3939767182,-0.018463688,-0.1088599935],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.4374428988,-0.5888292789,-0.0206408892,0.1779410243],"action_prob":0.8069064021,"action_logp":-0.2145475745,"action_dist_inputs":[0.7131937146,-0.7168389559],"value_targets":51.5008621216} +{"eps_id":1306964917,"obs":[-0.4374428988,-0.5888292789,-0.0206408892,0.1779410243],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.4492194951,-0.7836498618,-0.0170820672,0.4640416503],"action_prob":0.4756217301,"action_logp":-0.7431324124,"action_dist_inputs":[-0.0486607887,0.0489297248],"value_targets":51.0109710693} +{"eps_id":1306964917,"obs":[-0.4492194951,-0.7836498618,-0.0170820672,0.4640416503],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.4648924768,-0.588290751,-0.0078012347,0.1660238355],"action_prob":0.8246154785,"action_logp":-0.1928380728,"action_dist_inputs":[-0.7722107172,0.7757254243],"value_targets":50.5161323547} +{"eps_id":1306964917,"obs":[-0.4648924768,-0.588290751,-0.0078012347,0.1660238355],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.4766582847,-0.393058002,-0.0044807582,-0.1291099191],"action_prob":0.5340355039,"action_logp":-0.6272929907,"action_dist_inputs":[-0.0681574121,0.0681952909],"value_targets":50.0162963867} +{"eps_id":1306964917,"obs":[-0.4766582847,-0.393058002,-0.0044807582,-0.1291099191],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.4845194519,-0.5881154537,-0.0070629562,0.1621560305],"action_prob":0.8048413396,"action_logp":-0.2171100825,"action_dist_inputs":[0.7064341903,-0.7103981376],"value_targets":49.5114097595} +{"eps_id":1306964917,"obs":[-0.4845194519,-0.5881154537,-0.0070629562,0.1621560305],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.4962817729,-0.3928931057,-0.0038198358,-0.1327466816],"action_prob":0.532525897,"action_logp":-0.6301237941,"action_dist_inputs":[-0.0652068779,0.0650806129],"value_targets":49.0014266968} +{"eps_id":1306964917,"obs":[-0.4962817729,-0.3928931057,-0.0038198358,-0.1327466816],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.5041396022,-0.587960124,-0.0064747692,0.158728689],"action_prob":0.8046192527,"action_logp":-0.2173860818,"action_dist_inputs":[0.7056744099,-0.7097446918],"value_targets":48.486289978} +{"eps_id":1306964917,"obs":[-0.5041396022,-0.587960124,-0.0064747692,0.158728689],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.5158988237,-0.3927460909,-0.0033001956,-0.135989815],"action_prob":0.5315579176,"action_logp":-0.6319431067,"action_dist_inputs":[-0.0633419305,0.0630577579],"value_targets":47.9659461975} +{"eps_id":1306964917,"obs":[-0.5158988237,-0.3927460909,-0.0033001956,-0.135989815],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.5237537622,-0.5878206491,-0.0060199918,0.1556501389],"action_prob":0.804089427,"action_logp":-0.2180448174,"action_dist_inputs":[0.703941226,-0.7081109285],"value_targets":47.4403495789} +{"eps_id":1306964917,"obs":[-0.5237537622,-0.5878206491,-0.0060199918,0.1556501389],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.5355101824,-0.3926129937,-0.0029069891,-0.1389258653],"action_prob":0.5309861898,"action_logp":-0.6330192685,"action_dist_inputs":[-0.0622703098,0.0618334115],"value_targets":46.9094467163} +{"eps_id":1306964917,"obs":[-0.5355101824,-0.3926129937,-0.0029069891,-0.1389258653],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.5433624387,-0.5876932144,-0.0056855064,0.1528385431],"action_prob":0.8033152223,"action_logp":-0.2190080881,"action_dist_inputs":[0.7014406919,-0.7057043314],"value_targets":46.3731765747} +{"eps_id":1306964917,"obs":[-0.5433624387,-0.5876932144,-0.0056855064,0.1528385431],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.5551162958,-0.7827332616,-0.0026287355,0.4437223971],"action_prob":0.4693211317,"action_logp":-0.7564680576,"action_dist_inputs":[-0.061727643,0.0611422956],"value_targets":45.8314933777} +{"eps_id":1306964917,"obs":[-0.5551162958,-0.7827332616,-0.0026287355,0.4437223971],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.5707709789,-0.9778179526,0.0062457123,0.7355755568],"action_prob":0.1763985008,"action_logp":-1.7350096703,"action_dist_inputs":[-0.7691257,0.7718154788],"value_targets":45.2843360901} +{"eps_id":1306964917,"obs":[-0.5707709789,-0.9778179526,0.0062457123,0.7355755568],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.5903273225,-0.782782793,0.0209572222,0.4448647797],"action_prob":0.9147726893,"action_logp":-0.0890797004,"action_dist_inputs":[-1.1839309931,1.1894222498],"value_targets":44.7316513062} +{"eps_id":1306964917,"obs":[-0.5903273225,-0.782782793,0.0209572222,0.4448647797],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.6059829593,-0.5879635215,0.0298545193,0.1588610262],"action_prob":0.8419857621,"action_logp":-0.1719921976,"action_dist_inputs":[-0.835254252,0.8378234506],"value_targets":44.1733856201} +{"eps_id":1306964917,"obs":[-0.6059829593,-0.5879635215,0.0298545193,0.1588610262],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.6177422404,-0.3932814598,0.0330317393,-0.1242560595],"action_prob":0.6169058681,"action_logp":-0.4830388725,"action_dist_inputs":[-0.2385932952,0.2378422916],"value_targets":43.6094818115} +{"eps_id":1306964917,"obs":[-0.6177422404,-0.3932814598,0.0330317393,-0.1242560595],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.6256078482,-0.5888606906,0.0305466186,0.178662315],"action_prob":0.7312259674,"action_logp":-0.3130327761,"action_dist_inputs":[0.4983298182,-0.5025218725],"value_targets":43.0398788452} +{"eps_id":1306964917,"obs":[-0.6256078482,-0.5888606906,0.0305466186,0.178662315],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.6373850703,-0.7844061255,0.0341198631,0.4808229804],"action_prob":0.3497818708,"action_logp":-1.0504455566,"action_dist_inputs":[-0.3103435934,0.3096547127],"value_targets":42.4645233154} +{"eps_id":1306964917,"obs":[-0.6373850703,-0.7844061255,0.0341198631,0.4808229804],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.6530731916,-0.5897819996,0.0437363237,0.1990860105],"action_prob":0.866710186,"action_logp":-0.1430506259,"action_dist_inputs":[-0.9347710013,0.9374074936],"value_targets":41.8833580017} +{"eps_id":1306964917,"obs":[-0.6530731916,-0.5897819996,0.0437363237,0.1990860105],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.6648688316,-0.7855013609,0.0477180444,0.5052386522],"action_prob":0.3003276885,"action_logp":-1.2028810978,"action_dist_inputs":[-0.4231877327,0.4225501418],"value_targets":41.2963218689} +{"eps_id":1306964917,"obs":[-0.6648688316,-0.7855013609,0.0477180444,0.5052386522],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.6805788875,-0.591083169,0.0578228161,0.2279671431],"action_prob":0.8808962703,"action_logp":-0.1268153787,"action_dist_inputs":[-0.999088347,1.0018571615],"value_targets":40.7033538818} +{"eps_id":1306964917,"obs":[-0.6805788875,-0.591083169,0.0578228161,0.2279671431],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.6924005151,-0.3968332112,0.0623821579,-0.0459304526],"action_prob":0.7504994869,"action_logp":-0.2870163321,"action_dist_inputs":[-0.5508975387,0.5503802896],"value_targets":40.1044006348} +{"eps_id":1306964917,"obs":[-0.6924005151,-0.3968332112,0.0623821579,-0.0459304526],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.7003371716,-0.5927916169,0.0614635497,0.2657639086],"action_prob":0.5537902117,"action_logp":-0.5909693837,"action_dist_inputs":[0.1061648354,-0.1098317802],"value_targets":39.4993934631} +{"eps_id":1306964917,"obs":[-0.7003371716,-0.5927916169,0.0614635497,0.2657639086],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.7121930122,-0.3985983133,0.0667788312,-0.0069171176],"action_prob":0.7879005075,"action_logp":-0.238383472,"action_dist_inputs":[-0.6563085914,0.6560074687],"value_targets":38.8882751465} +{"eps_id":1306964917,"obs":[-0.7121930122,-0.3985983133,0.0667788312,-0.0069171176],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.7201650143,-0.2044944614,0.0666404888,-0.2778055668],"action_prob":0.5198912024,"action_logp":-0.654135704,"action_dist_inputs":[-0.0415170863,0.0380898304],"value_targets":38.2709846497} +{"eps_id":1306964917,"obs":[-0.7201650143,-0.2044944614,0.0666404888,-0.2778055668],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.7242549062,-0.0103833741,0.0610843748,-0.548748076],"action_prob":0.2179474831,"action_logp":-1.5235011578,"action_dist_inputs":[0.6357322931,-0.6419354677],"value_targets":37.6474609375} +{"eps_id":1306964917,"obs":[-0.7242549062,-0.0103833741,0.0610843748,-0.548748076],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.7244625688,-0.2063078284,0.0501094125,-0.2374618948],"action_prob":0.9095306993,"action_logp":-0.0948265269,"action_dist_inputs":[1.1497499943,-1.1581685543],"value_targets":37.0176353455} +{"eps_id":1306964917,"obs":[-0.7244625688,-0.2063078284,0.0501094125,-0.2374618948],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.7285887003,-0.4021085203,0.045360174,0.0705964863],"action_prob":0.7593987584,"action_logp":-0.2752282619,"action_dist_inputs":[0.5717111826,-0.5776748061],"value_targets":36.3814506531} +{"eps_id":1306964917,"obs":[-0.7285887003,-0.4021085203,0.045360174,0.0705964863],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.736630857,-0.2076652348,0.0467721038,-0.207437098],"action_prob":0.6039129496,"action_logp":-0.504325211,"action_dist_inputs":[-0.2123731375,0.2094229907],"value_targets":35.7388381958} +{"eps_id":1306964917,"obs":[-0.736630857,-0.2076652348,0.0467721038,-0.207437098],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.7407841682,-0.013242228,0.0426233634,-0.48500669],"action_prob":0.2722423673,"action_logp":-1.3010625839,"action_dist_inputs":[0.4887416065,-0.4945338368],"value_targets":35.0897369385} +{"eps_id":1306964917,"obs":[-0.7407841682,-0.013242228,0.0426233634,-0.48500669],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.7410490513,-0.2089389414,0.032923229,-0.1792007685],"action_prob":0.8945724368,"action_logp":-0.1114093885,"action_dist_inputs":[1.0651003122,-1.0732221603],"value_targets":34.4340782166} +{"eps_id":1306964917,"obs":[-0.7410490513,-0.2089389414,0.032923229,-0.1792007685],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.7452278137,-0.4045161903,0.0293392129,0.1236837953],"action_prob":0.7114039063,"action_logp":-0.3405149281,"action_dist_inputs":[0.4482881427,-0.4539240301],"value_targets":33.7717971802} +{"eps_id":1306964917,"obs":[-0.7452278137,-0.4045161903,0.0293392129,0.1236837953],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.753318131,-0.2098265737,0.0318128876,-0.1596004367],"action_prob":0.6531334519,"action_logp":-0.425973773,"action_dist_inputs":[-0.3177538812,0.3150875866],"value_targets":33.1028251648} +{"eps_id":1306964917,"obs":[-0.753318131,-0.2098265737,0.0318128876,-0.1596004367],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.7575146556,-0.4053891897,0.0286208801,0.1429464817],"action_prob":0.6857433915,"action_logp":-0.3772517443,"action_dist_inputs":[0.38737607,-0.3929177523],"value_targets":32.4270935059} +{"eps_id":1306964917,"obs":[-0.7575146556,-0.4053891897,0.0286208801,0.1429464817],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.765622437,-0.6009091139,0.0314798094,0.4445196092],"action_prob":0.3237524331,"action_logp":-1.1277761459,"action_dist_inputs":[-0.3695838749,0.3669961393],"value_targets":31.7445411682} +{"eps_id":1306964917,"obs":[-0.765622437,-0.6009091139,0.0314798094,0.4445196092],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.7776406407,-0.4062463641,0.0403702036,0.1619237065],"action_prob":0.8635068536,"action_logp":-0.1467534602,"action_dist_inputs":[-0.9219416976,0.9227858186],"value_targets":31.0550918579} +{"eps_id":1306964917,"obs":[-0.7776406407,-0.4062463641,0.0403702036,0.1619237065],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.7857655883,-0.2117249072,0.0436086766,-0.1177551374],"action_prob":0.7117837667,"action_logp":-0.3399811089,"action_dist_inputs":[-0.4532933235,0.4507698417],"value_targets":30.3586788177} +{"eps_id":1306964917,"obs":[-0.7857655883,-0.2117249072,0.0436086766,-0.1177551374],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.7900000811,-0.0172540266,0.0412535742,-0.3963672519],"action_prob":0.3957121372,"action_logp":-0.9270682335,"action_dist_inputs":[0.2090076208,-0.2143559754],"value_targets":29.6552295685} +{"eps_id":1306964917,"obs":[-0.7900000811,-0.0172540266,0.0412535742,-0.3963672519],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.7903451324,-0.2129362673,0.0333262309,-0.0909684971],"action_prob":0.8461385369,"action_logp":-0.1670721918,"action_dist_inputs":[0.8484449983,-0.8561850786],"value_targets":28.9446773529} +{"eps_id":1306964917,"obs":[-0.7903451324,-0.2129362673,0.0333262309,-0.0909684971],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.7946038842,-0.0183074512,0.0315068588,-0.3729535043],"action_prob":0.4189745188,"action_logp":-0.8699451685,"action_dist_inputs":[0.1608919203,-0.1660925299],"value_targets":28.2269458771} +{"eps_id":1306964917,"obs":[-0.7946038842,-0.0183074512,0.0315068588,-0.3729535043],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.7949700356,0.1763530821,0.0240477882,-0.6555379033],"action_prob":0.1624555886,"action_logp":-1.817350626,"action_dist_inputs":[0.8162125945,-0.823857069],"value_targets":27.5019664764} +{"eps_id":1306964917,"obs":[-0.7949700356,0.1763530821,0.0240477882,-0.6555379033],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.7914429307,-0.0190952644,0.0109370304,-0.3553808033],"action_prob":0.9283232093,"action_logp":-0.0743753165,"action_dist_inputs":[1.2759447098,-1.2852678299],"value_targets":26.7696628571} +{"eps_id":1306964917,"obs":[-0.7914429307,-0.0190952644,0.0109370304,-0.3553808033],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.7918248773,-0.2143709958,0.0038294147,-0.0592692867],"action_prob":0.8417445421,"action_logp":-0.1722787172,"action_dist_inputs":[0.8318460584,-0.839419663],"value_targets":26.0299625397} +{"eps_id":1306964917,"obs":[-0.7918248773,-0.2143709958,0.0038294147,-0.0592692867],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.796112299,-0.0193041638,0.002644029,-0.3507415652],"action_prob":0.4183814824,"action_logp":-0.8713616133,"action_dist_inputs":[0.1621923745,-0.1672286391],"value_targets":25.2827911377} +{"eps_id":1306964917,"obs":[-0.796112299,-0.0193041638,0.002644029,-0.3507415652],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.7964983582,-0.2144636214,-0.0043708021,-0.0572260506],"action_prob":0.8440591097,"action_logp":-0.169532761,"action_dist_inputs":[0.8405872583,-0.8481582999],"value_targets":24.5280704498} +{"eps_id":1306964917,"obs":[-0.7964983582,-0.2144636214,-0.0043708021,-0.0572260506],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.8007876277,-0.0192792695,-0.005515323,-0.351284802],"action_prob":0.4090179503,"action_logp":-0.8939962387,"action_dist_inputs":[0.1814866364,-0.1865398884],"value_targets":23.7657279968} +{"eps_id":1306964917,"obs":[-0.8007876277,-0.0192792695,-0.005515323,-0.351284802],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.8011732101,-0.2143223584,-0.0125410194,-0.0603461079],"action_prob":0.8494125605,"action_logp":-0.1632102877,"action_dist_inputs":[0.8612031937,-0.8687974811],"value_targets":22.9956855774} +{"eps_id":1306964917,"obs":[-0.8011732101,-0.2143223584,-0.0125410194,-0.0603461079],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.8054596782,-0.4092622697,-0.0137479408,0.2283538133],"action_prob":0.6075715423,"action_logp":-0.4982853234,"action_dist_inputs":[0.2160052508,-0.2211105376],"value_targets":22.2178649902} +{"eps_id":1306964917,"obs":[-0.8054596782,-0.4092622697,-0.0137479408,0.2283538133],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.813644886,-0.6041851044,-0.0091808653,0.5166686177],"action_prob":0.2823000252,"action_logp":-1.2647848129,"action_dist_inputs":[-0.4676883817,0.4653927386],"value_targets":21.4321861267} +{"eps_id":1306964917,"obs":[-0.813644886,-0.6041851044,-0.0091808653,0.5166686177],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.8257285953,-0.7991765738,0.0011525077,0.8064444065],"action_prob":0.1316726059,"action_logp":-2.0274367332,"action_dist_inputs":[-0.9426255822,0.9436247945],"value_targets":20.6385707855} +{"eps_id":1306964917,"obs":[-0.8257285953,-0.7991765738,0.0011525077,0.8064444065],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.8417121172,-0.604070425,0.0172813963,0.5141242146],"action_prob":0.9174734354,"action_logp":-0.0861316845,"action_dist_inputs":[-1.2020190954,1.206484437],"value_targets":19.8369407654} +{"eps_id":1306964917,"obs":[-0.8417121172,-0.604070425,0.0172813963,0.5141242146],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.8537935615,-0.4091960788,0.0275638793,0.2269368023],"action_prob":0.8746299148,"action_logp":-0.1339544207,"action_dist_inputs":[-0.9707667232,0.9717638493],"value_targets":19.0272140503} +{"eps_id":1306964917,"obs":[-0.8537935615,-0.4091960788,0.0275638793,0.2269368023],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.861977458,-0.2144786566,0.0321026146,-0.0569256023],"action_prob":0.7552322149,"action_logp":-0.2807300389,"action_dist_inputs":[-0.5645860434,0.5621290803],"value_targets":18.2093067169} +{"eps_id":1306964917,"obs":[-0.861977458,-0.2144786566,0.0321026146,-0.0569256023],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.8662670255,-0.019831365,0.0309641045,-0.3393095732],"action_prob":0.4753896594,"action_logp":-0.7436204553,"action_dist_inputs":[0.0466346443,-0.0518862121],"value_targets":17.3831367493} +{"eps_id":1306964917,"obs":[-0.8662670255,-0.019831365,0.0309641045,-0.3393095732],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.8666636944,-0.2153799087,0.0241779126,-0.0370253585],"action_prob":0.8018773198,"action_logp":-0.2207996249,"action_dist_inputs":[0.6952053308,-0.7028639317],"value_targets":16.5486240387} +{"eps_id":1306964917,"obs":[-0.8666636944,-0.2153799087,0.0241779126,-0.0370253585],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.8709712625,-0.0206128675,0.023437405,-0.3219829202],"action_prob":0.4916981757,"action_logp":-0.7098901868,"action_dist_inputs":[0.0140308049,-0.0191793889],"value_targets":15.7056808472} +{"eps_id":1306964917,"obs":[-0.8709712625,-0.0206128675,0.023437405,-0.3219829202],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.8713835478,-0.2160605937,0.0169977471,-0.0220018774],"action_prob":0.7938763499,"action_logp":-0.2308275551,"action_dist_inputs":[0.6704286337,-0.6780229807],"value_targets":14.8542232513} +{"eps_id":1306964917,"obs":[-0.8713835478,-0.2160605937,0.0169977471,-0.0220018774],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.8757047653,-0.4114221334,0.0165577102,0.2759951949],"action_prob":0.4978270531,"action_logp":-0.6975025535,"action_dist_inputs":[-0.0068841758,0.0018078117],"value_targets":13.9941644669} +{"eps_id":1306964917,"obs":[-0.8757047653,-0.4114221334,0.0165577102,0.2759951949],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.8839331865,-0.216540277,0.0220776126,-0.0114197703],"action_prob":0.7807039618,"action_logp":-0.2475592345,"action_dist_inputs":[-0.6359861493,0.6337874532],"value_targets":13.125418663} +{"eps_id":1306964917,"obs":[-0.8839331865,-0.216540277,0.0220776126,-0.0114197703],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.8882640004,-0.4119717777,0.0218492188,0.2881463468],"action_prob":0.4746323824,"action_logp":-0.7452147007,"action_dist_inputs":[-0.0533035211,0.048254136],"value_targets":12.2478981018} +{"eps_id":1306964917,"obs":[-0.8882640004,-0.4119717777,0.0218492188,0.2881463468],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.8965034485,-0.2171681225,0.0276121441,0.0024337363],"action_prob":0.7913469672,"action_logp":-0.234018743,"action_dist_inputs":[-0.6676014066,0.6654623151],"value_targets":11.3615131378} +{"eps_id":1306964917,"obs":[-0.8965034485,-0.2171681225,0.0276121441,0.0024337363],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.9008467793,-0.0224528238,0.0276608188,-0.2814109325],"action_prob":0.5527479649,"action_logp":-0.5928531289,"action_dist_inputs":[-0.1083907411,0.1033891365],"value_targets":10.4661741257} +{"eps_id":1306964917,"obs":[-0.9008467793,-0.0224528238,0.0276608188,-0.2814109325],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.9012958407,0.1722638756,0.0220325999,-0.5652430058],"action_prob":0.2520192266,"action_logp":-1.3782498837,"action_dist_inputs":[0.5402031541,-0.5476689339],"value_targets":9.5617923737} +{"eps_id":1306964917,"obs":[-0.9012958407,0.1722638756,0.0220325999,-0.5652430058],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.8978505731,-0.0231601503,0.0107277408,-0.2657010555],"action_prob":0.8964726329,"action_logp":-0.1092875376,"action_dist_inputs":[1.0747462511,-1.0838855505],"value_targets":8.6482753754} +{"eps_id":1306964917,"obs":[-0.8978505731,-0.0231601503,0.0107277408,-0.2657010555],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.8983137608,-0.2184335589,0.0054137195,0.0303460974],"action_prob":0.7514230609,"action_logp":-0.2857864797,"action_dist_inputs":[0.5494106412,-0.5568056107],"value_targets":7.7255306244} +{"eps_id":1306964917,"obs":[-0.8983137608,-0.2184335589,0.0054137195,0.0303460974],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.9026824236,-0.4136327207,0.0060206414,0.3247321844],"action_prob":0.4423397779,"action_logp":-0.8156769872,"action_dist_inputs":[-0.1182606965,0.1134108678],"value_targets":6.7934651375} +{"eps_id":1306964917,"obs":[-0.9026824236,-0.4136327207,0.0060206414,0.3247321844],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.9109550714,-0.2185970098,0.0125152851,0.0339539796],"action_prob":0.8011796474,"action_logp":-0.2216700912,"action_dist_inputs":[-0.6978186369,0.6958646774],"value_targets":5.8519849777} +{"eps_id":1306964917,"obs":[-0.9109550714,-0.2185970098,0.0125152851,0.0339539796],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.9153270125,-0.023656752,0.0131943645,-0.2547541261],"action_prob":0.5727633834,"action_logp":-0.557282567,"action_dist_inputs":[-0.1490010172,0.144133687],"value_targets":4.9009947777} +{"eps_id":1306964917,"obs":[-0.9153270125,-0.023656752,0.0131943645,-0.2547541261],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.9158001542,-0.2189645767,0.0080992822,0.0420611612],"action_prob":0.7349100113,"action_logp":-0.3080071807,"action_dist_inputs":[0.5061487556,-0.5135301948],"value_targets":3.9403989315} +{"eps_id":1306964917,"obs":[-0.9158001542,-0.2189645767,0.0080992822,0.0420611612],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.9201794863,-0.0239597056,0.0089405058,-0.2480553985],"action_prob":0.5770518184,"action_logp":-0.5498232245,"action_dist_inputs":[-0.1577598751,0.1529225409],"value_targets":2.970099926} +{"eps_id":1306964917,"obs":[-0.9201794863,-0.0239597056,0.0089405058,-0.2480553985],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.920658648,-0.2192081958,0.0039793979,0.047434099],"action_prob":0.7320538163,"action_logp":-0.3119012117,"action_dist_inputs":[0.4988523126,-0.5062156916],"value_targets":1.9900000095} +{"eps_id":1306964917,"obs":[-0.920658648,-0.2192081958,0.0039793979,0.047434099],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":true,"new_obs":[-0.9250428081,-0.4143869877,0.0049280799,0.3413698971],"action_prob":0.4217315614,"action_logp":-0.8633862734,"action_dist_inputs":[-0.1602469534,0.1554223746],"value_targets":1.0} +{"eps_id":1237240122,"obs":[0.0139165418,0.0438513383,-0.0455932468,0.0170547832],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":0.0,"dones":false,"new_obs":[0.0147935692,0.2395964861,-0.0452521518,-0.2896575928],"action_prob":0.488527894,"action_logp":-0.7163587213,"action_dist_inputs":[0.0227766819,-0.0231197961],"value_targets":86.6020355225} +{"eps_id":1237240122,"obs":[0.0147935692,0.2395964861,-0.0452521518,-0.2896575928],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0195854995,0.0451480485,-0.0510453023,-0.0115831085],"action_prob":0.8654364347,"action_logp":-0.1445213407,"action_dist_inputs":[0.9285109639,-0.9326860309],"value_targets":86.4666976929} +{"eps_id":1237240122,"obs":[0.0195854995,0.0451480485,-0.0510453023,-0.0115831085],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0204884596,-0.149206087,-0.0512769632,0.2645675838],"action_prob":0.5856164694,"action_logp":-0.5350902081,"action_dist_inputs":[0.1726119518,-0.1732612699],"value_targets":86.3300018311} +{"eps_id":1237240122,"obs":[0.0204884596,-0.149206087,-0.0512769632,0.2645675838],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0175043382,0.0466088168,-0.045985613,-0.043837551],"action_prob":0.8025045991,"action_logp":-0.2200176716,"action_dist_inputs":[-0.6995300055,0.7024921179],"value_targets":86.1919174194} +{"eps_id":1237240122,"obs":[0.0175043382,0.0466088168,-0.045985613,-0.043837551],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0184365138,-0.1478245854,-0.0468623638,0.2339892238],"action_prob":0.6388258934,"action_logp":-0.4481233358,"action_dist_inputs":[0.2846393585,-0.2856323123],"value_targets":86.052444458} +{"eps_id":1237240122,"obs":[0.0184365138,-0.1478245854,-0.0468623638,0.2339892238],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0154800229,0.0479345247,-0.0421825796,-0.073099561],"action_prob":0.7732926607,"action_logp":-0.2570976615,"action_dist_inputs":[-0.6121611595,0.6148369908],"value_targets":85.9115600586} +{"eps_id":1237240122,"obs":[0.0154800229,0.0479345247,-0.0421825796,-0.073099561],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0164387133,0.2436350286,-0.0436445698,-0.3787871897],"action_prob":0.3156294227,"action_logp":-1.1531864405,"action_dist_inputs":[0.3863084316,-0.3876222372],"value_targets":85.7692489624} +{"eps_id":1237240122,"obs":[0.0164387133,0.2436350286,-0.0436445698,-0.3787871897],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0213114135,0.0491591953,-0.0512203164,-0.100178659],"action_prob":0.9038877487,"action_logp":-0.1010500938,"action_dist_inputs":[1.1180894375,-1.1230990887],"value_targets":85.62550354} +{"eps_id":1237240122,"obs":[0.0213114135,0.0491591953,-0.0512203164,-0.100178659],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0222945977,-0.1451926976,-0.0532238893,0.1759146452],"action_prob":0.7424871325,"action_logp":-0.2977496982,"action_dist_inputs":[0.5286463499,-0.53028965],"value_targets":85.4803085327} +{"eps_id":1237240122,"obs":[0.0222945977,-0.1451926976,-0.0532238893,0.1759146452],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0193907432,0.0506489649,-0.0497055948,-0.1330722868],"action_prob":0.67432338,"action_logp":-0.3940454721,"action_dist_inputs":[-0.3628552556,0.3649496734],"value_targets":85.3336486816} +{"eps_id":1237240122,"obs":[0.0193907432,0.0506489649,-0.0497055948,-0.1330722868],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0204037223,0.2464463711,-0.052367039,-0.4410130978],"action_prob":0.2178021371,"action_logp":-1.5241682529,"action_dist_inputs":[0.638243854,-0.6402768493],"value_targets":85.1855010986} +{"eps_id":1237240122,"obs":[0.0204037223,0.2464463711,-0.052367039,-0.4410130978],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0253326502,0.0521030948,-0.0611873008,-0.1652866751],"action_prob":0.9212679267,"action_logp":-0.0820043609,"action_dist_inputs":[1.2270300388,-1.232670784],"value_targets":85.0358581543} +{"eps_id":1237240122,"obs":[0.0253326502,0.0521030948,-0.0611873008,-0.1652866751],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0263747126,-0.1420920342,-0.0644930378,0.1074830592],"action_prob":0.826374352,"action_logp":-0.1907073855,"action_dist_inputs":[0.7788419724,-0.7813042998],"value_targets":84.8847045898} +{"eps_id":1237240122,"obs":[0.0263747126,-0.1420920342,-0.0644930378,0.1074830592],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0235328712,-0.3362333477,-0.0623433739,0.3791418076],"action_prob":0.4882336855,"action_logp":-0.7169611454,"action_dist_inputs":[-0.0228690729,0.0242049433],"value_targets":84.7320251465} +{"eps_id":1237240122,"obs":[0.0235328712,-0.3362333477,-0.0623433739,0.3791418076],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0168082044,-0.5304170847,-0.054760538,0.6515344977],"action_prob":0.1595569551,"action_logp":-1.8353543282,"action_dist_inputs":[-0.8285138607,0.833014369],"value_targets":84.5778045654} +{"eps_id":1237240122,"obs":[0.0168082044,-0.5304170847,-0.054760538,0.6515344977],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0061998628,-0.3345769644,-0.0417298488,0.3421230614],"action_prob":0.9293065667,"action_logp":-0.0733165741,"action_dist_inputs":[-1.284610033,1.2914761305],"value_targets":84.4220275879} +{"eps_id":1237240122,"obs":[0.0061998628,-0.3345769644,-0.0417298488,0.3421230614],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0004916763,-0.1388869286,-0.0348873883,0.036578536],"action_prob":0.8296061158,"action_logp":-0.186804235,"action_dist_inputs":[-0.7893247008,0.7935135961],"value_targets":84.2646713257} +{"eps_id":1237240122,"obs":[-0.0004916763,-0.1388869286,-0.0348873883,0.036578536],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0032694147,-0.3334916532,-0.0341558158,0.3180532157],"action_prob":0.566865027,"action_logp":-0.5676340461,"action_dist_inputs":[0.1347863376,-0.1342854351],"value_targets":84.1057281494} +{"eps_id":1237240122,"obs":[-0.0032694147,-0.3334916532,-0.0341558158,0.3180532157],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0099392477,-0.1379003078,-0.0277947523,0.0147974417],"action_prob":0.8154480457,"action_logp":-0.2040175498,"action_dist_inputs":[-0.7409231663,0.7448836565],"value_targets":83.9451828003} +{"eps_id":1237240122,"obs":[-0.0099392477,-0.1379003078,-0.0277947523,0.0147974417],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0126972543,0.0576089993,-0.027498804,-0.2865238488],"action_prob":0.4056364,"action_logp":-0.9022980928,"action_dist_inputs":[0.1911214888,-0.1909126341],"value_targets":83.7830123901} +{"eps_id":1237240122,"obs":[-0.0126972543,0.0576089993,-0.027498804,-0.2865238488],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0115450742,-0.1371102035,-0.0332292803,-0.0026390965],"action_prob":0.8784705997,"action_logp":-0.1295728385,"action_dist_inputs":[0.9870683551,-0.9909581542],"value_targets":83.6192016602} +{"eps_id":1237240122,"obs":[-0.0115450742,-0.1371102035,-0.0332292803,-0.0026390965],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0142872781,0.0584721528,-0.0332820639,-0.3056182861],"action_prob":0.3610798717,"action_logp":-1.0186561346,"action_dist_inputs":[0.2853049934,-0.2853752077],"value_targets":83.453742981} +{"eps_id":1237240122,"obs":[-0.0142872781,0.0584721528,-0.0332820639,-0.3056182861],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0131178349,-0.1361601055,-0.0393944271,-0.0236147139],"action_prob":0.8890048265,"action_logp":-0.1176525876,"action_dist_inputs":[1.0382256508,-1.0423909426],"value_targets":83.286605835} +{"eps_id":1237240122,"obs":[-0.0131178349,-0.1361601055,-0.0393944271,-0.0236147139],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.015841037,-0.3306955993,-0.0398667231,0.2563831806],"action_prob":0.6867742538,"action_logp":-0.3757496774,"action_dist_inputs":[0.3923347294,-0.3927465379],"value_targets":83.1177825928} +{"eps_id":1237240122,"obs":[-0.015841037,-0.3306955993,-0.0398667231,0.2563831806],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0224549491,-0.1350278109,-0.0347390585,-0.0486030169],"action_prob":0.7320048213,"action_logp":-0.3119682074,"action_dist_inputs":[-0.5007548928,0.5040631294],"value_targets":82.9472579956} +{"eps_id":1237240122,"obs":[-0.0224549491,-0.1350278109,-0.0347390585,-0.0486030169],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0251555052,-0.3296348453,-0.0357111208,0.2329201996],"action_prob":0.7170324326,"action_logp":-0.3326342404,"action_dist_inputs":[0.4645034969,-0.4652850926],"value_targets":82.7750091553} +{"eps_id":1237240122,"obs":[-0.0251555052,-0.3296348453,-0.0357111208,0.2329201996],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0317482017,-0.5242288113,-0.0310527161,0.5141282082],"action_prob":0.2967804074,"action_logp":-1.2147628069,"action_dist_inputs":[-0.4298166335,0.432860136],"value_targets":82.601020813} +{"eps_id":1237240122,"obs":[-0.0317482017,-0.5242288113,-0.0310527161,0.5141282082],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0422327779,-0.3286836147,-0.0207701512,0.2118237913],"action_prob":0.9003608227,"action_logp":-0.1049596518,"action_dist_inputs":[-1.0977321863,1.1035083532],"value_targets":82.4252700806} +{"eps_id":1237240122,"obs":[-0.0422327779,-0.3286836147,-0.0207701512,0.2118237913],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0488064513,-0.133270964,-0.0165336765,-0.0873379707],"action_prob":0.6971996427,"action_logp":-0.3606834412,"action_dist_inputs":[-0.415604651,0.4183935523],"value_targets":82.2477493286} +{"eps_id":1237240122,"obs":[-0.0488064513,-0.133270964,-0.0165336765,-0.0873379707],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0514718704,-0.3281520605,-0.0182804354,0.2000830472],"action_prob":0.7420170307,"action_logp":-0.2983830571,"action_dist_inputs":[0.5275625587,-0.5289158821],"value_targets":82.0684280396} +{"eps_id":1237240122,"obs":[-0.0514718704,-0.3281520605,-0.0182804354,0.2000830472],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0580349118,-0.5230078697,-0.0142787741,0.4869437516],"action_prob":0.3171909153,"action_logp":-1.1482514143,"action_dist_inputs":[-0.3820501864,0.3846612573],"value_targets":81.8873062134} +{"eps_id":1237240122,"obs":[-0.0580349118,-0.5230078697,-0.0142787741,0.4869437516],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0684950724,-0.3276873827,-0.0045398986,0.1897950768],"action_prob":0.8975996971,"action_logp":-0.1080310494,"action_dist_inputs":[-1.0826648474,1.0881696939],"value_targets":81.7043457031} +{"eps_id":1237240122,"obs":[-0.0684950724,-0.3276873827,-0.0045398986,0.1897950768],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0750488192,-0.1325007677,-0.0007439973,-0.1043165401],"action_prob":0.6927332878,"action_logp":-0.3671102226,"action_dist_inputs":[-0.4052414596,0.4076874256],"value_targets":81.5195465088} +{"eps_id":1237240122,"obs":[-0.0750488192,-0.1325007677,-0.0007439973,-0.1043165401],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0776988342,-0.3276120424,-0.002830328,0.1881315708],"action_prob":0.7400815487,"action_logp":-0.3009949327,"action_dist_inputs":[0.5223697424,-0.5240225196],"value_targets":81.3328704834} +{"eps_id":1237240122,"obs":[-0.0776988342,-0.3276120424,-0.002830328,0.1881315708],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.084251076,-0.1324497163,0.0009323033,-0.1054428741],"action_prob":0.6939861774,"action_logp":-0.3653032184,"action_dist_inputs":[-0.4082320035,0.4105898142],"value_targets":81.144317627} +{"eps_id":1237240122,"obs":[-0.084251076,-0.1324497163,0.0009323033,-0.1054428741],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0869000703,-0.3275850117,-0.0011765541,0.1875340492],"action_prob":0.7385205626,"action_logp":-0.303106308,"action_dist_inputs":[0.5182816386,-0.520011723],"value_targets":80.9538574219} +{"eps_id":1237240122,"obs":[-0.0869000703,-0.3275850117,-0.0011765541,0.1875340492],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0934517682,-0.1324462593,0.0025741269,-0.1055198014],"action_prob":0.6969000101,"action_logp":-0.3611133099,"action_dist_inputs":[-0.4151498377,0.417429477],"value_targets":80.76146698} +{"eps_id":1237240122,"obs":[-0.0934517682,-0.1324462593,0.0025741269,-0.1055198014],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0961006954,0.0626387149,0.0004637308,-0.3973895013],"action_prob":0.264490068,"action_logp":-1.3299515247,"action_dist_inputs":[0.5104834437,-0.5122768283],"value_targets":80.5671386719} +{"eps_id":1237240122,"obs":[-0.0961006954,0.0626387149,0.0004637308,-0.3973895013],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0948479176,-0.1324898154,-0.0074840588,-0.1045603976],"action_prob":0.9068510532,"action_logp":-0.0977770612,"action_dist_inputs":[1.1351617575,-1.1406164169],"value_targets":80.3708496094} +{"eps_id":1237240122,"obs":[-0.0948479176,-0.1324898154,-0.0074840588,-0.1045603976],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0974977165,-0.327503711,-0.0095752673,0.1857519746],"action_prob":0.7489694953,"action_logp":-0.2890570462,"action_dist_inputs":[0.5456189513,-0.5475050211],"value_targets":80.1725769043} +{"eps_id":1237240122,"obs":[-0.0974977165,-0.327503711,-0.0095752673,0.1857519746],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1040477902,-0.5224873424,-0.0058602276,0.4753989577],"action_prob":0.3208033442,"action_logp":-1.1369270086,"action_dist_inputs":[-0.3739838004,0.3760986924],"value_targets":79.9722976685} +{"eps_id":1237240122,"obs":[-0.1040477902,-0.5224873424,-0.0058602276,0.4753989577],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1144975349,-0.327283144,0.0036477516,0.1808747649],"action_prob":0.8968396187,"action_logp":-0.1088782549,"action_dist_inputs":[-1.0787221193,1.0838700533],"value_targets":79.7699966431} +{"eps_id":1237240122,"obs":[-0.1144975349,-0.327283144,0.0036477516,0.1808747649],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1210431978,-0.1322135925,0.0072652469,-0.110655196],"action_prob":0.697168529,"action_logp":-0.360728085,"action_dist_inputs":[-0.41592592,0.417924881],"value_targets":79.5656509399} +{"eps_id":1237240122,"obs":[-0.1210431978,-0.1322135925,0.0072652469,-0.110655196],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1236874685,-0.3274388909,0.0050521432,0.1843109876],"action_prob":0.7332718372,"action_logp":-0.3102387786,"action_dist_inputs":[0.5046208501,-0.5066655278],"value_targets":79.3592453003} +{"eps_id":1237240122,"obs":[-0.1236874685,-0.3274388909,0.0050521432,0.1843109876],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1302362531,-0.1323895901,0.0087383632,-0.106773898],"action_prob":0.7060962319,"action_logp":-0.3480037749,"action_dist_inputs":[-0.4372699559,0.4392290711],"value_targets":79.1507568359} +{"eps_id":1237240122,"obs":[-0.1302362531,-0.1323895901,0.0087383632,-0.106773898],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1328840405,-0.3276356757,0.0066028852,0.1886530817],"action_prob":0.724580884,"action_logp":-0.3221618533,"action_dist_inputs":[0.482621491,-0.4846780896],"value_targets":78.9401550293} +{"eps_id":1237240122,"obs":[-0.1328840405,-0.3276356757,0.0066028852,0.1886530817],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1394367516,-0.5228514671,0.0103759468,0.4834116101],"action_prob":0.2835981548,"action_logp":-1.2601970434,"action_dist_inputs":[-0.4623771906,0.4643059075],"value_targets":78.727432251} +{"eps_id":1237240122,"obs":[-0.1394367516,-0.5228514671,0.0103759468,0.4834116101],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1498937905,-0.3278774619,0.0200441796,0.1940168589],"action_prob":0.9060407281,"action_logp":-0.0986710191,"action_dist_inputs":[-1.1305749416,1.1356480122],"value_targets":78.5125579834} +{"eps_id":1237240122,"obs":[-0.1498937905,-0.3278774619,0.0200441796,0.1940168589],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1564513296,-0.1330478936,0.0239245165,-0.0922762007],"action_prob":0.747125566,"action_logp":-0.2915219963,"action_dist_inputs":[-0.5407136083,0.5426267385],"value_targets":78.2955093384} +{"eps_id":1237240122,"obs":[-0.1564513296,-0.1330478936,0.0239245165,-0.0922762007],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1591122895,-0.3285044432,0.0220789928,0.2078578621],"action_prob":0.6721425056,"action_logp":-0.3972848952,"action_dist_inputs":[0.357966125,-0.3599251509],"value_targets":78.0762710571} +{"eps_id":1237240122,"obs":[-0.1591122895,-0.3285044432,0.0220789928,0.2078578621],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1656823754,-0.52393502,0.0262361486,0.5074229836],"action_prob":0.2312936932,"action_logp":-1.4640669823,"action_dist_inputs":[-0.599522233,0.6014984846],"value_targets":77.8548202515} +{"eps_id":1237240122,"obs":[-0.1656823754,-0.52393502,0.0262361486,0.5074229836],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1761610806,-0.3291924,0.0363846086,0.223122105],"action_prob":0.9171800017,"action_logp":-0.0864515454,"action_dist_inputs":[-1.1997096539,1.204923749],"value_targets":77.6311340332} +{"eps_id":1237240122,"obs":[-0.1761610806,-0.3291924,0.0363846086,0.223122105],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1827449352,-0.5248149633,0.0408470519,0.5270565152],"action_prob":0.1958572716,"action_logp":-1.6303690672,"action_dist_inputs":[-0.7051618099,0.7072287798],"value_targets":77.4051818848} +{"eps_id":1237240122,"obs":[-0.1827449352,-0.5248149633,0.0408470519,0.5270565152],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1932412237,-0.330290854,0.0513881817,0.2475193888],"action_prob":0.9243685603,"action_logp":-0.07864438,"action_dist_inputs":[-1.2489243746,1.2543150187],"value_targets":77.1769561768} +{"eps_id":1237240122,"obs":[-0.1932412237,-0.330290854,0.0513881817,0.2475193888],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1998470426,-0.1359390467,0.0563385673,-0.0285220221],"action_prob":0.8400098681,"action_logp":-0.1743416637,"action_dist_inputs":[-0.8280177116,0.8302839398],"value_targets":76.9464187622} +{"eps_id":1237240122,"obs":[-0.1998470426,-0.1359390467,0.0563385673,-0.0285220221],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2025658339,0.0583316088,0.0557681285,-0.3029108942],"action_prob":0.5300230384,"action_logp":-0.6348347664,"action_dist_inputs":[-0.0607910044,0.0594458506],"value_targets":76.7135543823} +{"eps_id":1237240122,"obs":[-0.2025658339,0.0583316088,0.0557681285,-0.3029108942],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2013991922,-0.1375389695,0.0497099124,0.0068255463],"action_prob":0.8228437304,"action_logp":-0.194988966,"action_dist_inputs":[0.7654290199,-0.7703048587],"value_targets":76.4783401489} +{"eps_id":1237240122,"obs":[-0.2013991922,-0.1375389695,0.0497099124,0.0068255463],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2041499764,0.056836132,0.0498464219,-0.2697682381],"action_prob":0.587336421,"action_logp":-0.5321575403,"action_dist_inputs":[-0.1769935489,0.1759714186],"value_targets":76.2407455444} +{"eps_id":1237240122,"obs":[-0.2041499764,0.056836132,0.0498464219,-0.2697682381],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2030132562,-0.1389604062,0.0444510579,0.0382105447],"action_prob":0.7967566848,"action_logp":-0.227205947,"action_dist_inputs":[0.6807700992,-0.685375154],"value_targets":76.0007553101} +{"eps_id":1237240122,"obs":[-0.2030132562,-0.1389604062,0.0444510579,0.0382105447],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2057924569,0.055496864,0.0452152677,-0.2401229143],"action_prob":0.636457026,"action_logp":-0.4518383741,"action_dist_inputs":[-0.2803781927,0.2796411514],"value_targets":75.7583389282} +{"eps_id":1237240122,"obs":[-0.2057924569,0.055496864,0.0452152677,-0.2401229143],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.204682529,-0.1402408481,0.0404128097,0.0664723366],"action_prob":0.7681136727,"action_logp":-0.2638175189,"action_dist_inputs":[0.5966669917,-0.6010237336],"value_targets":75.5134735107} +{"eps_id":1237240122,"obs":[-0.204682529,-0.1402408481,0.0404128097,0.0664723366],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2074873447,0.0542790927,0.0417422578,-0.2131912112],"action_prob":0.6782428026,"action_logp":-0.3882499337,"action_dist_inputs":[-0.3730934262,0.3726146221],"value_targets":75.26612854} +{"eps_id":1237240122,"obs":[-0.2074873447,0.0542790927,0.0417422578,-0.2131912112],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2064017504,-0.1414140314,0.0374784321,0.0923614204],"action_prob":0.7368257642,"action_logp":-0.3054038286,"action_dist_inputs":[0.5127043724,-0.5168306231],"value_targets":75.0162963867} +{"eps_id":1237240122,"obs":[-0.2064017504,-0.1414140314,0.0374784321,0.0923614204],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2092300355,0.0531512573,0.039325662,-0.1882654727],"action_prob":0.7139165401,"action_logp":-0.3369891942,"action_dist_inputs":[-0.4573603868,0.4571222663],"value_targets":74.7639312744} +{"eps_id":1237240122,"obs":[-0.2092300355,0.0531512573,0.039325662,-0.1882654727],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2081670165,0.2476891577,0.0355603509,-0.4682879448],"action_prob":0.2973411679,"action_logp":-1.2128751278,"action_dist_inputs":[0.4280412495,-0.4319500029],"value_targets":74.5090255737} +{"eps_id":1237240122,"obs":[-0.2081670165,0.2476891577,0.0355603509,-0.4682879448],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2032132298,0.0520833507,0.0261945911,-0.1646121889],"action_prob":0.901162684,"action_logp":-0.1040694788,"action_dist_inputs":[1.1017007828,-1.1085101366],"value_targets":74.2515411377} +{"eps_id":1237240122,"obs":[-0.2032132298,0.0520833507,0.0261945911,-0.1646121889],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2021715641,-0.1434035897,0.0229023471,0.1362179518],"action_prob":0.6883001328,"action_logp":-0.3735303283,"action_dist_inputs":[0.3942364752,-0.3979474902],"value_targets":73.9914550781} +{"eps_id":1237240122,"obs":[-0.2021715641,-0.1434035897,0.0229023471,0.1362179518],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2050396353,0.0513829589,0.0256267078,-0.1491525471],"action_prob":0.7521482706,"action_logp":-0.2848218083,"action_dist_inputs":[-0.5549693108,0.555133462],"value_targets":73.7287445068} +{"eps_id":1237240122,"obs":[-0.2050396353,0.0513829589,0.0256267078,-0.1491525471],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2040119767,0.2461287528,0.0226436555,-0.4336418808],"action_prob":0.3376738727,"action_logp":-1.0856747627,"action_dist_inputs":[0.3350514472,-0.3386260569],"value_targets":73.4633712769} +{"eps_id":1237240122,"obs":[-0.2040119767,0.2461287528,0.0226436555,-0.4336418808],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1990894079,0.0506936498,0.0139708184,-0.1339077204],"action_prob":0.8936493397,"action_logp":-0.1124417856,"action_dist_inputs":[1.060985446,-1.0675865412],"value_targets":73.1953277588} +{"eps_id":1237240122,"obs":[-0.1990894079,0.0506936498,0.0139708184,-0.1339077204],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1980755329,-0.1446256042,0.0112926643,0.1631498486],"action_prob":0.6586766243,"action_logp":-0.4175225496,"action_dist_inputs":[0.3269771636,-0.3304253817],"value_targets":72.9245758057} +{"eps_id":1237240122,"obs":[-0.1980755329,-0.1446256042,0.0112926643,0.1631498486],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.200968042,0.0503328815,0.014555661,-0.1259492487],"action_prob":0.769308269,"action_logp":-0.2622635365,"action_dist_inputs":[-0.6020029783,0.6024060845],"value_targets":72.6510848999} +{"eps_id":1237240122,"obs":[-0.200968042,0.0503328815,0.014555661,-0.1259492487],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1999613792,0.2452433109,0.0120366756,-0.4140047133],"action_prob":0.3573229015,"action_logp":-1.0291154385,"action_dist_inputs":[0.2918116152,-0.2951910496],"value_targets":72.3748321533} +{"eps_id":1237240122,"obs":[-0.1999613792,0.2452433109,0.0120366756,-0.4140047133],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.195056513,0.0499528423,0.0037565816,-0.1175515354],"action_prob":0.8906626105,"action_logp":-0.1157895625,"action_dist_inputs":[1.0455195904,-1.0520080328],"value_targets":72.0957946777} +{"eps_id":1237240122,"obs":[-0.195056513,0.0499528423,0.0037565816,-0.1175515354],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1940574646,-0.1452227384,0.0014055509,0.17631419],"action_prob":0.6497628093,"action_logp":-0.4311479032,"action_dist_inputs":[0.3073406219,-0.3106559813],"value_targets":71.8139266968} +{"eps_id":1237240122,"obs":[-0.1940574646,-0.1452227384,0.0014055509,0.17631419],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1969619095,-0.340364784,0.0049318345,0.469440192],"action_prob":0.2280368358,"action_logp":-1.4782481194,"action_dist_inputs":[-0.6094626188,0.6099670529],"value_targets":71.5292205811} +{"eps_id":1237240122,"obs":[-0.1969619095,-0.340364784,0.0049318345,0.469440192],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.203769207,-0.1453128308,0.014320639,0.1783158034],"action_prob":0.9117663503,"action_logp":-0.0923714936,"action_dist_inputs":[-1.1656559706,1.1697391272],"value_targets":71.2416381836} +{"eps_id":1237240122,"obs":[-0.203769207,-0.1453128308,0.014320639,0.1783158034],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2066754699,0.0496012866,0.0178869553,-0.1098152772],"action_prob":0.7892875671,"action_logp":-0.2366245836,"action_dist_inputs":[-0.6600470543,0.6605889797],"value_targets":70.9511489868} +{"eps_id":1237240122,"obs":[-0.2066754699,0.0496012866,0.0178869553,-0.1098152772],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.20568344,0.2444624156,0.0156906489,-0.3968017399],"action_prob":0.3958358467,"action_logp":-0.9267556667,"action_dist_inputs":[0.2098021656,-0.2130442411],"value_targets":70.6577301025} +{"eps_id":1237240122,"obs":[-0.20568344,0.2444624156,0.0156906489,-0.3968017399],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2007941902,0.0491214022,0.0077546146,-0.0992134213],"action_prob":0.8798953295,"action_logp":-0.1279523224,"action_dist_inputs":[0.9925333261,-0.998906076],"value_targets":70.3613433838} +{"eps_id":1237240122,"obs":[-0.2007941902,0.0491214022,0.0077546146,-0.0992134213],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1998117566,-0.1461108178,0.0057703461,0.1959059536],"action_prob":0.605805099,"action_logp":-0.5011969209,"action_dist_inputs":[0.2132809311,-0.2164319605],"value_targets":70.061958313} +{"eps_id":1237240122,"obs":[-0.1998117566,-0.1461108178,0.0057703461,0.1959059536],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2027339786,0.0489281155,0.0096884649,-0.0949511155],"action_prob":0.7974733114,"action_logp":-0.2263069302,"action_dist_inputs":[-0.6849380136,0.6856385469],"value_targets":69.7595596313} +{"eps_id":1237240122,"obs":[-0.2027339786,0.0489281155,0.0096884649,-0.0949511155],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2017554194,-0.1463313401,0.0077894428,0.2007727027],"action_prob":0.5930017829,"action_logp":-0.5225578547,"action_dist_inputs":[0.1866384745,-0.1897500306],"value_targets":69.4540939331} +{"eps_id":1237240122,"obs":[-0.2017554194,-0.1463313401,0.0077894428,0.2007727027],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2046820521,0.0486783385,0.0118048964,-0.0894428864],"action_prob":0.8041050434,"action_logp":-0.2180253565,"action_dist_inputs":[-0.7056989074,0.7064523697],"value_targets":69.1455535889} +{"eps_id":1237240122,"obs":[-0.2046820521,0.0486783385,0.0118048964,-0.0894428864],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2037084848,0.2436291128,0.010016039,-0.3783780932],"action_prob":0.4228321314,"action_logp":-0.8607800007,"action_dist_inputs":[0.1540498137,-0.1571080387],"value_targets":68.8338928223} +{"eps_id":1237240122,"obs":[-0.2037084848,0.2436291128,0.010016039,-0.3783780932],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1988358945,0.0483663529,0.0024484769,-0.0825539976],"action_prob":0.8734280467,"action_logp":-0.1353295594,"action_dist_inputs":[0.9626817703,-0.9689329863],"value_targets":68.5190811157} +{"eps_id":1237240122,"obs":[-0.1988358945,0.0483663529,0.0024484769,-0.0825539976],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1978685707,-0.1467906088,0.0007973968,0.2109004259],"action_prob":0.5845123529,"action_logp":-0.5369773507,"action_dist_inputs":[0.1691628695,-0.1721621603],"value_targets":68.2010955811} +{"eps_id":1237240122,"obs":[-0.1978685707,-0.1467906088,0.0007973968,0.2109004259],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2008043826,-0.3419239521,0.0050154054,0.503834784],"action_prob":0.1936061233,"action_logp":-1.6419295073,"action_dist_inputs":[-0.7129526734,0.7137939334],"value_targets":67.8798904419} +{"eps_id":1237240122,"obs":[-0.2008043826,-0.3419239521,0.0050154054,0.503834784],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2076428533,-0.146873042,0.0150921009,0.2127366215],"action_prob":0.9182534814,"action_logp":-0.085281834,"action_dist_inputs":[-1.2072262764,1.2116241455],"value_targets":67.5554504395} +{"eps_id":1237240122,"obs":[-0.2076428533,-0.146873042,0.0150921009,0.2127366215],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2105803192,0.0480299145,0.0193468332,-0.0751476213],"action_prob":0.8210964799,"action_logp":-0.1971146911,"action_dist_inputs":[-0.7614485025,0.7623454928],"value_targets":67.227722168} +{"eps_id":1237240122,"obs":[-0.2105803192,0.0480299145,0.0193468332,-0.0751476213],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2096197158,0.242869243,0.0178438798,-0.3616642952],"action_prob":0.4691570401,"action_logp":-0.7568176985,"action_dist_inputs":[0.0603079572,-0.0632206872],"value_targets":66.8966903687} +{"eps_id":1237240122,"obs":[-0.2096197158,0.242869243,0.0178438798,-0.3616642952],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2047623396,0.437733084,0.0106105944,-0.6486676931],"action_prob":0.1433302462,"action_logp":-1.9426039457,"action_dist_inputs":[0.8908902407,-0.897010982],"value_targets":66.5623168945} +{"eps_id":1237240122,"obs":[-0.2047623396,0.437733084,0.0106105944,-0.6486676931],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.196007669,0.2424649298,-0.0023627596,-0.3526625037],"action_prob":0.9370869994,"action_logp":-0.0649791434,"action_dist_inputs":[1.3463369608,-1.3546868563],"value_targets":66.2245635986} +{"eps_id":1237240122,"obs":[-0.196007669,0.2424649298,-0.0023627596,-0.3526625037],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1911583841,0.4376204014,-0.0094160102,-0.6460895538],"action_prob":0.1330613345,"action_logp":-2.0169451237,"action_dist_inputs":[0.9340438843,-0.9401142001],"value_targets":65.883392334} +{"eps_id":1237240122,"obs":[-0.1911583841,0.4376204014,-0.0094160102,-0.6460895538],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1824059635,0.2426309139,-0.0223378018,-0.3563865423],"action_prob":0.9402633905,"action_logp":-0.0615952164,"action_dist_inputs":[1.3739160299,-1.3822989464],"value_targets":65.5387802124} +{"eps_id":1237240122,"obs":[-0.1824059635,0.2426309139,-0.0223378018,-0.3563865423],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1775533557,0.0478335544,-0.0294655319,-0.0708301738],"action_prob":0.8828698397,"action_logp":-0.1245775074,"action_dist_inputs":[1.0068844557,-1.0130078793],"value_targets":65.1906890869} +{"eps_id":1237240122,"obs":[-0.1775533557,0.0478335544,-0.0294655319,-0.0708301738],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1765966862,-0.1468538344,-0.030882135,0.2124125361],"action_prob":0.632445693,"action_logp":-0.458160907,"action_dist_inputs":[0.2699089944,-0.2728144228],"value_targets":64.8390808105} +{"eps_id":1237240122,"obs":[-0.1765966862,-0.1468538344,-0.030882135,0.2124125361],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1795337498,0.0486957207,-0.0266338848,-0.0898498446],"action_prob":0.7726117969,"action_logp":-0.2579785287,"action_dist_inputs":[-0.6111615896,0.6119567752],"value_targets":64.4839172363} +{"eps_id":1237240122,"obs":[-0.1795337498,0.0486957207,-0.0266338848,-0.0898498446],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1785598397,0.2441891134,-0.028430881,-0.3908153474],"action_prob":0.3381707668,"action_logp":-1.0842043161,"action_dist_inputs":[0.3341785669,-0.3372781575],"value_targets":64.1251678467} +{"eps_id":1237240122,"obs":[-0.1785598397,0.2441891134,-0.028430881,-0.3908153474],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1736760587,0.0494819544,-0.0362471901,-0.1072302163],"action_prob":0.9013716578,"action_logp":-0.1038376242,"action_dist_inputs":[1.1030724049,-1.1094864607],"value_targets":63.7627983093} +{"eps_id":1237240122,"obs":[-0.1736760587,0.0494819544,-0.0362471901,-0.1072302163],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1726864278,-0.1451023221,-0.0383917913,0.1738001406],"action_prob":0.7102916837,"action_logp":-0.3420795798,"action_dist_inputs":[0.4467530847,-0.450048089],"value_targets":63.3967666626} +{"eps_id":1237240122,"obs":[-0.1726864278,-0.1451023221,-0.0383917913,0.1738001406],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1755884737,0.050547462,-0.03491579,-0.1307426989],"action_prob":0.7106396556,"action_logp":-0.3415897787,"action_dist_inputs":[-0.4490553737,0.4494374394],"value_targets":63.0270347595} +{"eps_id":1237240122,"obs":[-0.1755884737,0.050547462,-0.03491579,-0.1307426989],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1745775193,-0.1440573782,-0.0375306457,0.150723502],"action_prob":0.743542254,"action_logp":-0.2963296771,"action_dist_inputs":[0.5304570198,-0.5340043902],"value_targets":62.6535720825} +{"eps_id":1237240122,"obs":[-0.1745775193,-0.1440573782,-0.0375306457,0.150723502],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1774586588,-0.3386223912,-0.0345161743,0.4313340485],"action_prob":0.3245897293,"action_logp":-1.1251932383,"action_dist_inputs":[-0.3663098216,0.3664484322],"value_targets":62.2763366699} +{"eps_id":1237240122,"obs":[-0.1774586588,-0.3386223912,-0.0345161743,0.4313340485],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1842311174,-0.1430291235,-0.0258894935,0.127973184],"action_prob":0.8875276446,"action_logp":-0.1193155795,"action_dist_inputs":[-1.031113863,1.0346188545],"value_targets":61.8952865601} +{"eps_id":1237240122,"obs":[-0.1842311174,-0.1430291235,-0.0258894935,0.127973184],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1870916933,-0.3377708197,-0.0233300291,0.4123771489],"action_prob":0.3412262499,"action_logp":-1.0752094984,"action_dist_inputs":[-0.3289679289,0.3288665116],"value_targets":61.5103912354} +{"eps_id":1237240122,"obs":[-0.1870916933,-0.3377708197,-0.0233300291,0.4123771489],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1938471198,-0.1423260421,-0.015082486,0.1124313846],"action_prob":0.8848497868,"action_logp":-0.1223373562,"action_dist_inputs":[-1.0179182291,1.0212627649],"value_targets":61.1216087341} +{"eps_id":1237240122,"obs":[-0.1938471198,-0.1423260421,-0.015082486,0.1124313846],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.196693629,0.0530087389,-0.0128338588,-0.1849715561],"action_prob":0.6527858973,"action_logp":-0.4265061021,"action_dist_inputs":[-0.3157910109,0.3155165315],"value_targets":60.7288970947} +{"eps_id":1237240122,"obs":[-0.196693629,0.0530087389,-0.0128338588,-0.1849715561],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1956334561,-0.1419272572,-0.0165332891,0.1036353111],"action_prob":0.7804374695,"action_logp":-0.247900635,"action_dist_inputs":[0.6320483088,-0.6361693144],"value_targets":60.3322181702} +{"eps_id":1237240122,"obs":[-0.1956334561,-0.1419272572,-0.0165332891,0.1036353111],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1984720081,0.0534276851,-0.0144605832,-0.1942176223],"action_prob":0.63419801,"action_logp":-0.4553940594,"action_dist_inputs":[-0.275329113,0.2749398649],"value_targets":59.9315338135} +{"eps_id":1237240122,"obs":[-0.1984720081,0.0534276851,-0.0144605832,-0.1942176223],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1974034458,-0.1414844543,-0.018344935,0.0938687697],"action_prob":0.7930475473,"action_logp":-0.2318720967,"action_dist_inputs":[0.6695770025,-0.6738169789],"value_targets":59.526802063} +{"eps_id":1237240122,"obs":[-0.1974034458,-0.1414844543,-0.018344935,0.0938687697],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2002331465,0.0538955592,-0.0164675601,-0.2045450509],"action_prob":0.6123541594,"action_logp":-0.4904444516,"action_dist_inputs":[-0.2288672328,0.2283513844],"value_targets":59.117980957} +{"eps_id":1237240122,"obs":[-0.2002331465,0.0538955592,-0.0164675601,-0.2045450509],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1991552263,-0.1409870684,-0.0205584615,0.0828980133],"action_prob":0.8063514233,"action_logp":-0.2152355909,"action_dist_inputs":[0.7110511661,-0.715423584],"value_targets":58.7050323486} +{"eps_id":1237240122,"obs":[-0.1991552263,-0.1409870684,-0.0205584615,0.0828980133],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2019749731,-0.3358083963,-0.0189005006,0.3690244555],"action_prob":0.4134733975,"action_logp":-0.8831620812,"action_dist_inputs":[-0.1751409918,0.1744837463],"value_targets":58.2879104614} +{"eps_id":1237240122,"obs":[-0.2019749731,-0.3358083963,-0.0189005006,0.3690244555],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.208691135,-0.530656755,-0.0115200114,0.655688405],"action_prob":0.1336029172,"action_logp":-2.0128831863,"action_dist_inputs":[-0.933311522,0.9361599088],"value_targets":57.8665771484} +{"eps_id":1237240122,"obs":[-0.208691135,-0.530656755,-0.0115200114,0.655688405],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2193042785,-0.3353763223,0.0015937564,0.3594003916],"action_prob":0.9322078824,"action_logp":-0.0701994151,"action_dist_inputs":[-1.3076040745,1.3135068417],"value_targets":57.4409866333} +{"eps_id":1237240122,"obs":[-0.2193042785,-0.3353763223,0.0015937564,0.3594003916],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2260117978,-0.1402770728,0.0087817647,0.0672204494],"action_prob":0.8728659749,"action_logp":-0.13597323,"action_dist_inputs":[-0.9618790746,0.9646611214],"value_targets":57.0110969543} +{"eps_id":1237240122,"obs":[-0.2260117978,-0.1402770728,0.0087817647,0.0672204494],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2288173437,0.0547178797,0.0101261735,-0.2226788998],"action_prob":0.6200957298,"action_logp":-0.4778814018,"action_dist_inputs":[-0.2454091311,0.2445453405],"value_targets":56.5768661499} +{"eps_id":1237240122,"obs":[-0.2288173437,0.0547178797,0.0101261735,-0.2226788998],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2277229875,-0.1405473351,0.0056725955,0.0731809437],"action_prob":0.7910718322,"action_logp":-0.2343665063,"action_dist_inputs":[0.6634106636,-0.6679878831],"value_targets":56.1382484436} +{"eps_id":1237240122,"obs":[-0.2277229875,-0.1405473351,0.0056725955,0.0731809437],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2305339277,0.0544928387,0.0071362141,-0.217706874],"action_prob":0.6249927878,"action_logp":-0.470015198,"action_dist_inputs":[-0.2558156848,0.254979223],"value_targets":55.6952018738} +{"eps_id":1237240122,"obs":[-0.2305339277,0.0544928387,0.0071362141,-0.217706874],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2294440717,-0.1407303959,0.0027820768,0.0772185549],"action_prob":0.7890092731,"action_logp":-0.2369771749,"action_dist_inputs":[0.657202661,-0.6617615819],"value_targets":55.2476768494} +{"eps_id":1237240122,"obs":[-0.2294440717,-0.1407303959,0.0027820768,0.0772185549],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2322586775,-0.3358921111,0.0043264478,0.3707779348],"action_prob":0.3732474148,"action_logp":-0.9855138063,"action_dist_inputs":[-0.2595691085,0.2587414086],"value_targets":54.7956352234} +{"eps_id":1237240122,"obs":[-0.2322586775,-0.3358921111,0.0043264478,0.3707779348],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2389765233,-0.1408319026,0.011742007,0.0794623196],"action_prob":0.8789650798,"action_logp":-0.1290101111,"action_dist_inputs":[-0.9899238944,0.9927417636],"value_targets":54.3390235901} +{"eps_id":1237240122,"obs":[-0.2389765233,-0.1408319026,0.011742007,0.0794623196],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2417931557,-0.3361201882,0.0133312531,0.375826627],"action_prob":0.3511646986,"action_logp":-1.0464999676,"action_dist_inputs":[-0.3073762059,0.3065473735],"value_targets":53.8778038025} +{"eps_id":1237240122,"obs":[-0.2417931557,-0.3361201882,0.0133312531,0.375826627],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2485155612,-0.1411901116,0.0208477862,0.0873767585],"action_prob":0.8848928213,"action_logp":-0.1222887263,"action_dist_inputs":[-1.0183649063,1.0212384462],"value_targets":53.4119224548} +{"eps_id":1237240122,"obs":[-0.2485155612,-0.1411901116,0.0208477862,0.0873767585],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.251339376,0.0536269061,0.0225953218,-0.1986564547],"action_prob":0.6791672111,"action_logp":-0.3868879378,"action_dist_inputs":[-0.3753580451,0.3745892048],"value_targets":52.9413375854} +{"eps_id":1237240122,"obs":[-0.251339376,0.0536269061,0.0225953218,-0.1986564547],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2502668202,0.24841851,0.0186221916,-0.4841268063],"action_prob":0.2597570717,"action_logp":-1.3480083942,"action_dist_inputs":[0.5214051604,-0.525826335],"value_targets":52.4659957886} +{"eps_id":1237240122,"obs":[-0.2502668202,0.24841851,0.0186221916,-0.4841268063],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2452984601,0.0530387685,0.0089396555,-0.185633406],"action_prob":0.9118629098,"action_logp":-0.0922656283,"action_dist_inputs":[1.1646662951,-1.1719304323],"value_targets":51.9858551025} +{"eps_id":1237240122,"obs":[-0.2452984601,0.0530387685,0.0089396555,-0.185633406],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2442376763,-0.1422099471,0.0052269874,0.1098561808],"action_prob":0.7439357638,"action_logp":-0.2958005667,"action_dist_inputs":[0.5310954452,-0.535431087],"value_targets":51.5008621216} +{"eps_id":1237240122,"obs":[-0.2442376763,-0.1422099471,0.0052269874,0.1098561808],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2470818758,0.0528367199,0.007424111,-0.1811730862],"action_prob":0.6876911521,"action_logp":-0.3744154572,"action_dist_inputs":[-0.3949749768,0.3943722844],"value_targets":51.0109710693} +{"eps_id":1237240122,"obs":[-0.2470818758,0.0528367199,0.007424111,-0.1811730862],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2460251451,0.2478516549,0.0038006492,-0.4715047479],"action_prob":0.2602555156,"action_logp":-1.3460913897,"action_dist_inputs":[0.5201635361,-0.5244774222],"value_targets":50.5161323547} +{"eps_id":1237240122,"obs":[-0.2460251451,0.2478516549,0.0038006492,-0.4715047479],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2410681099,0.0526762269,-0.0056294459,-0.1776263416],"action_prob":0.9135292768,"action_logp":-0.0904398263,"action_dist_inputs":[1.1751372814,-1.1823722124],"value_targets":50.0162963867} +{"eps_id":1237240122,"obs":[-0.2410681099,0.0526762269,-0.0056294459,-0.1776263416],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2400145829,-0.1423647106,-0.0091819726,0.1132753789],"action_prob":0.7555251718,"action_logp":-0.2803421915,"action_dist_inputs":[0.5619921684,-0.5663084984],"value_targets":49.5114097595} +{"eps_id":1237240122,"obs":[-0.2400145829,-0.1423647106,-0.0091819726,0.1132753789],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2428618819,-0.337353915,-0.0069164652,0.4030473828],"action_prob":0.3323706388,"action_logp":-1.1015045643,"action_dist_inputs":[-0.3490535617,0.3484289944],"value_targets":49.0014266968} +{"eps_id":1237240122,"obs":[-0.2428618819,-0.337353915,-0.0069164652,0.4030473828],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2496089637,-0.1421345472,0.0011444823,0.1081919074],"action_prob":0.8861895204,"action_logp":-0.1208244413,"action_dist_inputs":[-1.0247153044,1.0276811123],"value_targets":48.486289978} +{"eps_id":1237240122,"obs":[-0.2496089637,-0.1421345472,0.0011444823,0.1081919074],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2524516582,0.0529709905,0.0033083204,-0.184129715],"action_prob":0.6781042814,"action_logp":-0.388454169,"action_dist_inputs":[-0.3728825748,0.3721909225],"value_targets":47.9659461975} +{"eps_id":1237240122,"obs":[-0.2524516582,0.0529709905,0.0033083204,-0.184129715],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2513922453,-0.1421981454,-0.000374274,0.1095950231],"action_prob":0.7494373918,"action_logp":-0.2884324789,"action_dist_inputs":[0.5456047654,-0.550009191],"value_targets":47.4403495789} +{"eps_id":1237240122,"obs":[-0.2513922453,-0.1421981454,-0.000374274,0.1095950231],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2542361915,-0.3373147249,0.0018176264,0.4021598399],"action_prob":0.322252661,"action_logp":-1.1324193478,"action_dist_inputs":[-0.3720703423,0.3713683188],"value_targets":46.9094467163} +{"eps_id":1237240122,"obs":[-0.2542361915,-0.3373147249,0.0018176264,0.4021598399],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2609824836,-0.5324624181,0.0098608229,0.6954152584],"action_prob":0.1109879389,"action_logp":-2.1983337402,"action_dist_inputs":[-1.0388703346,1.0418188572],"value_targets":46.3731765747} +{"eps_id":1237240122,"obs":[-0.2609824836,-0.5324624181,0.0098608229,0.6954152584],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2716317475,-0.3374786079,0.0237691291,0.4058528244],"action_prob":0.9366115928,"action_logp":-0.0654866174,"action_dist_inputs":[-1.3434212208,1.3495663404],"value_targets":45.8314933777} +{"eps_id":1237240122,"obs":[-0.2716317475,-0.3374786079,0.0237691291,0.4058528244],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2783813179,-0.1427016407,0.0318861865,0.1207573563],"action_prob":0.8975406885,"action_logp":-0.1080968529,"action_dist_inputs":[-1.083566308,1.0866258144],"value_targets":45.2843360901} +{"eps_id":1237240122,"obs":[-0.2783813179,-0.1427016407,0.0318861865,0.1207573563],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2812353373,0.0519493036,0.0343013331,-0.1616976857],"action_prob":0.7439782619,"action_logp":-0.2957434356,"action_dist_inputs":[-0.5336709619,0.5330786109],"value_targets":44.7316513062} +{"eps_id":1237240122,"obs":[-0.2812353373,0.0519493036,0.0343013331,-0.1616976857],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2801963687,-0.1436464787,0.0310673788,0.1416060179],"action_prob":0.6564078927,"action_logp":-0.4209728837,"action_dist_inputs":[0.3215563297,-0.3257708848],"value_targets":44.1733856201} +{"eps_id":1237240122,"obs":[-0.2801963687,-0.1436464787,0.0310673788,0.1416060179],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2830692828,0.0510170646,0.0338994972,-0.1411160529],"action_prob":0.7673034072,"action_logp":-0.264872998,"action_dist_inputs":[-0.5967668295,0.5963798165],"value_targets":43.6094818115} +{"eps_id":1237240122,"obs":[-0.2830692828,0.0510170646,0.0338994972,-0.1411160529],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2820489407,-0.1445735842,0.0310771782,0.1620658934],"action_prob":0.619717896,"action_logp":-0.4784909189,"action_dist_inputs":[0.2421566993,-0.246194303],"value_targets":43.0398788452} +{"eps_id":1237240122,"obs":[-0.2820489407,-0.1445735842,0.0310771782,0.1620658934],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2849404216,0.0500900075,0.0343184955,-0.1206532717],"action_prob":0.7883739471,"action_logp":-0.2377827317,"action_dist_inputs":[-0.6576635838,0.6574880481],"value_targets":42.4645233154} +{"eps_id":1237240122,"obs":[-0.2849404216,0.0500900075,0.0343184955,-0.1206532717],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2839386165,-0.1455063969,0.0319054313,0.1826562285],"action_prob":0.5798034072,"action_logp":-0.5450662374,"action_dist_inputs":[0.1590552032,-0.1629110277],"value_targets":41.8833580017} +{"eps_id":1237240122,"obs":[-0.2839386165,-0.1455063969,0.0319054313,0.1826562285],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2868487537,-0.3410699964,0.0355585553,0.4852309525],"action_prob":0.1923401058,"action_logp":-1.6484900713,"action_dist_inputs":[-0.7174136639,0.717462182],"value_targets":41.2963218689} +{"eps_id":1237240122,"obs":[-0.2868487537,-0.3410699964,0.0355585553,0.4852309525],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2936701477,-0.1464674175,0.0452631749,0.2039636075],"action_prob":0.9169763923,"action_logp":-0.0866735801,"action_dist_inputs":[-1.1990215778,1.2029350996],"value_targets":40.7033538818} +{"eps_id":1237240122,"obs":[-0.2936701477,-0.1464674175,0.0452631749,0.2039636075],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2965995073,0.0479789823,0.049342446,-0.0741044357],"action_prob":0.8339393735,"action_logp":-0.1815945506,"action_dist_inputs":[-0.8067371845,0.8070710301],"value_targets":40.1044006348} +{"eps_id":1237240122,"obs":[-0.2965995073,0.0479789823,0.049342446,-0.0741044357],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2956399024,0.2423601002,0.0478603579,-0.350820452],"action_prob":0.5434015393,"action_logp":-0.6099067926,"action_dist_inputs":[-0.0887206122,0.0853234082],"value_targets":39.4993934631} +{"eps_id":1237240122,"obs":[-0.2956399024,0.2423601002,0.0478603579,-0.350820452],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2907927036,0.0465913452,0.0408439487,-0.043437995],"action_prob":0.8106583357,"action_logp":-0.2099086195,"action_dist_inputs":[0.7239188552,-0.7303743958],"value_targets":38.8882751465} +{"eps_id":1237240122,"obs":[-0.2907927036,0.0465913452,0.0408439487,-0.043437995],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2898608744,0.2411045283,0.0399751887,-0.3229596615],"action_prob":0.5830085874,"action_logp":-0.5395534039,"action_dist_inputs":[-0.1691137105,0.1660225242],"value_targets":38.2709846497} +{"eps_id":1237240122,"obs":[-0.2898608744,0.2411045283,0.0399751887,-0.3229596615],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.285038799,0.4356351197,0.0335159972,-0.6027728319],"action_prob":0.2084825933,"action_logp":-1.567899704,"action_dist_inputs":[0.6639146805,-0.6701815128],"value_targets":37.6474609375} +{"eps_id":1237240122,"obs":[-0.285038799,0.4356351197,0.0335159972,-0.6027728319],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2763260901,0.2400608063,0.0214605387,-0.2997242808],"action_prob":0.9209988117,"action_logp":-0.0822965577,"action_dist_inputs":[1.2238208055,-1.2321741581],"value_targets":37.0176353455} +{"eps_id":1237240122,"obs":[-0.2763260901,0.2400608063,0.0214605387,-0.2997242808],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2715248764,0.0446396433,0.015466053,-0.0003511154],"action_prob":0.79033494,"action_logp":-0.2352984399,"action_dist_inputs":[0.6604229212,-0.6665226221],"value_targets":36.3814506531} +{"eps_id":1237240122,"obs":[-0.2715248764,0.0446396433,0.015466053,-0.0003511154],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2706320882,0.2395364046,0.0154590309,-0.2881144583],"action_prob":0.6103503108,"action_logp":-0.4937222302,"action_dist_inputs":[-0.2257192582,0.2230656445],"value_targets":35.7388381958} +{"eps_id":1237240122,"obs":[-0.2706320882,0.2395364046,0.0154590309,-0.2881144583],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2658413649,0.0441974588,0.0096967407,0.0094037252],"action_prob":0.7854389548,"action_logp":-0.2415125668,"action_dist_inputs":[0.6458208561,-0.6518275738],"value_targets":35.0897369385} +{"eps_id":1237240122,"obs":[-0.2658413649,0.0441974588,0.0096967407,0.0094037252],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2649573982,0.2391790152,0.0098848157,-0.2802040577],"action_prob":0.6163580418,"action_logp":-0.4839272797,"action_dist_inputs":[-0.2383303344,0.2357878834],"value_targets":34.4340782166} +{"eps_id":1237240122,"obs":[-0.2649573982,0.2391790152,0.0098848157,-0.2802040577],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2601738274,0.0439174585,0.0042807343,0.0155800479],"action_prob":0.7841616273,"action_logp":-0.2431401461,"action_dist_inputs":[0.6420741081,-0.6480110288],"value_targets":33.7717971802} +{"eps_id":1237240122,"obs":[-0.2601738274,0.0439174585,0.0042807343,0.0155800479],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2592954636,0.2389777601,0.0045923353,-0.2757491767],"action_prob":0.6167689562,"action_logp":-0.4832607806,"action_dist_inputs":[-0.2391620576,0.2366944253],"value_targets":33.1028251648} +{"eps_id":1237240122,"obs":[-0.2592954636,0.2389777601,0.0045923353,-0.2757491767],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2545159161,0.04379059,-0.0009226484,0.018378621],"action_prob":0.7865177989,"action_logp":-0.2401399165,"action_dist_inputs":[0.6490859389,-0.6549758315],"value_targets":32.4270935059} +{"eps_id":1237240122,"obs":[-0.2545159161,0.04379059,-0.0009226484,0.018378621],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2536401153,0.238925755,-0.000555076,-0.2745952606],"action_prob":0.6116257906,"action_logp":-0.4916346371,"action_dist_inputs":[-0.228289783,0.2258616686],"value_targets":31.7445411682} +{"eps_id":1237240122,"obs":[-0.2536401153,0.238925755,-0.000555076,-0.2745952606],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.248861596,0.0438117348,-0.0060469816,0.0179125331],"action_prob":0.7924104929,"action_logp":-0.2326757312,"action_dist_inputs":[0.6668255329,-0.6726914644],"value_targets":31.0550918579} +{"eps_id":1237240122,"obs":[-0.248861596,0.0438117348,-0.0060469816,0.0179125331],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.247985363,0.2390198857,-0.0056887306,-0.2766721249],"action_prob":0.6006563902,"action_logp":-0.5097322464,"action_dist_inputs":[-0.2053119689,0.2028888166],"value_targets":30.3586788177} +{"eps_id":1237240122,"obs":[-0.247985363,0.2390198857,-0.0056887306,-0.2766721249],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2432049662,0.0439795516,-0.0112221735,0.014211149],"action_prob":0.8016252518,"action_logp":-0.2211140841,"action_dist_inputs":[0.6953086853,-0.701174438],"value_targets":29.6552295685} +{"eps_id":1237240122,"obs":[-0.2432049662,0.0439795516,-0.0112221735,0.014211149],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2423253655,-0.1509796828,-0.0109379506,0.3033323288],"action_prob":0.4167193174,"action_logp":-0.8753423691,"action_dist_inputs":[-0.1693540365,0.1669016033],"value_targets":28.9446773529} +{"eps_id":1237240122,"obs":[-0.2423253655,-0.1509796828,-0.0109379506,0.3033323288],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2453449667,0.044296436,-0.0048713041,0.0072199847],"action_prob":0.858315289,"action_logp":-0.1527837813,"action_dist_inputs":[-0.899987638,0.9013795853],"value_targets":28.2269458771} +{"eps_id":1237240122,"obs":[-0.2453449667,0.044296436,-0.0048713041,0.0072199847],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.244459033,-0.150755316,-0.0047269044,0.2983619869],"action_prob":0.4168103039,"action_logp":-0.8751240969,"action_dist_inputs":[-0.1691911221,0.1666902006],"value_targets":27.5019664764} +{"eps_id":1237240122,"obs":[-0.244459033,-0.150755316,-0.0047269044,0.2983619869],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2474741489,0.0444336943,0.0012403354,0.0041920333],"action_prob":0.8592525721,"action_logp":-0.1516923606,"action_dist_inputs":[-0.9038592577,0.9052363634],"value_targets":26.7696628571} +{"eps_id":1237240122,"obs":[-0.2474741489,0.0444336943,0.0012403354,0.0041920333],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2465854734,0.2395378351,0.0013241761,-0.2880992889],"action_prob":0.5899647474,"action_logp":-0.5276924968,"action_dist_inputs":[-0.1831640005,0.1806557328],"value_targets":26.0299625397} +{"eps_id":1237240122,"obs":[-0.2465854734,0.2395378351,0.0013241761,-0.2880992889],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2417947054,0.0443970263,-0.0044378098,0.0050009708],"action_prob":0.8056212068,"action_logp":-0.2161416262,"action_dist_inputs":[0.7079486251,-0.7138560414],"value_targets":25.2827911377} +{"eps_id":1237240122,"obs":[-0.2417947054,0.0443970263,-0.0044378098,0.0050009708],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.240906775,-0.1506610066,-0.0043377904,0.2962804139],"action_prob":0.4202575684,"action_logp":-0.8668875098,"action_dist_inputs":[-0.1621041298,0.1596119553],"value_targets":24.5280704498} +{"eps_id":1237240122,"obs":[-0.240906775,-0.1506610066,-0.0043377904,0.2962804139],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2439199984,0.0445225202,0.0015878179,0.0022325846],"action_prob":0.8586679697,"action_logp":-0.1523729861,"action_dist_inputs":[-0.9014417529,0.9028282166],"value_targets":23.7657279968} +{"eps_id":1237240122,"obs":[-0.2439199984,0.0445225202,0.0015878179,0.0022325846],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2430295348,-0.1506221592,0.0016324696,0.2954160571],"action_prob":0.4131206274,"action_logp":-0.8840156794,"action_dist_inputs":[-0.1767885536,0.1742911339],"value_targets":22.9956855774} +{"eps_id":1237240122,"obs":[-0.2430295348,-0.1506221592,0.0016324696,0.2954160571],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2460419834,0.0444764756,0.0075407908,0.0032484396],"action_prob":0.8614946008,"action_logp":-0.1490865052,"action_dist_inputs":[-0.9131715894,0.9145880342],"value_targets":22.2178649902} +{"eps_id":1237240122,"obs":[-0.2460419834,0.0444764756,0.0075407908,0.0032484396],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2451524585,0.239489466,0.0076057599,-0.2870457768],"action_prob":0.6006916165,"action_logp":-0.5096735954,"action_dist_inputs":[-0.205404669,0.2029430866],"value_targets":21.4321861267} +{"eps_id":1237240122,"obs":[-0.2451524585,0.239489466,0.0076057599,-0.2870457768],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2403626591,0.4345021248,0.0018648444,-0.5773202181],"action_prob":0.2028182,"action_logp":-1.5954452753,"action_dist_inputs":[0.6814561486,-0.6873166561],"value_targets":20.6385707855} +{"eps_id":1237240122,"obs":[-0.2403626591,0.4345021248,0.0018648444,-0.5773202181],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2316726297,0.2393540889,-0.0096815601,-0.284050405],"action_prob":0.9256122112,"action_logp":-0.0772998855,"action_dist_inputs":[1.2565100193,-1.2646540403],"value_targets":19.8369407654} +{"eps_id":1237240122,"obs":[-0.2316726297,0.2393540889,-0.0096815601,-0.284050405],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2268855423,0.0443715602,-0.0153625682,0.0055633467],"action_prob":0.8156328797,"action_logp":-0.2037909329,"action_dist_inputs":[0.7406042218,-0.7464309931],"value_targets":19.0272140503} +{"eps_id":1237240122,"obs":[-0.2268855423,0.0443715602,-0.0153625682,0.0055633467],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2259981036,0.2397104204,-0.0152513022,-0.2919268012],"action_prob":0.5572743416,"action_logp":-0.5846976638,"action_dist_inputs":[-0.1162719503,0.1138352528],"value_targets":18.2093067169} +{"eps_id":1237240122,"obs":[-0.2259981036,0.2397104204,-0.0152513022,-0.2919268012],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2212039083,0.0448092148,-0.021089837,-0.0040926863],"action_prob":0.8294987679,"action_logp":-0.1869336814,"action_dist_inputs":[0.788102448,-0.7939764261],"value_targets":17.3831367493} +{"eps_id":1237240122,"obs":[-0.2212039083,0.0448092148,-0.021089837,-0.0040926863],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2203077227,-0.1500040293,-0.0211716909,0.2818622291],"action_prob":0.4745295048,"action_logp":-0.7454314828,"action_dist_inputs":[-0.0522494391,0.0497208089],"value_targets":16.5486240387} +{"eps_id":1237240122,"obs":[-0.2203077227,-0.1500040293,-0.0211716909,0.2818622291],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2233078033,0.0454134159,-0.0155344466,-0.017422244],"action_prob":0.8407580256,"action_logp":-0.1734513491,"action_dist_inputs":[-0.8313193321,0.8325596452],"value_targets":15.7056808472} +{"eps_id":1237240122,"obs":[-0.2233078033,0.0454134159,-0.0155344466,-0.017422244],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2223995328,0.2407546639,-0.0158828907,-0.3149656951],"action_prob":0.5109684467,"action_logp":-0.6714473963,"action_dist_inputs":[-0.0232646111,0.0206161961],"value_targets":14.8542232513} +{"eps_id":1237240122,"obs":[-0.2223995328,0.2407546639,-0.0158828907,-0.3149656951],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2175844312,0.0458625183,-0.0221822057,-0.0273337346],"action_prob":0.8494777679,"action_logp":-0.1631335318,"action_dist_inputs":[0.8622415066,-0.8682695031],"value_targets":13.9941644669} +{"eps_id":1237240122,"obs":[-0.2175844312,0.0458625183,-0.0221822057,-0.0273337346],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2166671902,-0.148934409,-0.0227288809,0.2582687438],"action_prob":0.5243812203,"action_logp":-0.6455363631,"action_dist_inputs":[0.0474285744,-0.0501736291],"value_targets":13.125418663} +{"eps_id":1237240122,"obs":[-0.2166671902,-0.148934409,-0.0227288809,0.2582687438],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2196458727,0.0465045236,-0.0175635051,-0.0414956547],"action_prob":0.8231242299,"action_logp":-0.1946481466,"action_dist_inputs":[-0.7683303952,0.7693289518],"value_targets":12.2478981018} +{"eps_id":1237240122,"obs":[-0.2196458727,0.0465045236,-0.0175635051,-0.0414956547],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2187157869,-0.148361221,-0.0183934178,0.2455944717],"action_prob":0.542770505,"action_logp":-0.6110687256,"action_dist_inputs":[0.0843118727,-0.0871891826],"value_targets":11.3615131378} +{"eps_id":1237240122,"obs":[-0.2187157869,-0.148361221,-0.0183934178,0.2455944717],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2216830105,0.0470185354,-0.0134815285,-0.0528329499],"action_prob":0.8168591857,"action_logp":-0.2022885531,"action_dist_inputs":[-0.7471660376,0.7480456233],"value_targets":10.4661741257} +{"eps_id":1237240122,"obs":[-0.2216830105,0.0470185354,-0.0134815285,-0.0528329499],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2207426429,0.242331177,-0.0145381875,-0.3497387469],"action_prob":0.443483144,"action_logp":-0.81309551,"action_dist_inputs":[0.1120271534,-0.1150105596],"value_targets":9.5617923737} +{"eps_id":1237240122,"obs":[-0.2207426429,0.242331177,-0.0145381875,-0.3497387469],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2158960104,0.0474189743,-0.0215329621,-0.0616754033],"action_prob":0.8720611334,"action_logp":-0.1368957609,"action_dist_inputs":[0.9565155506,-0.9627915025],"value_targets":8.6482753754} +{"eps_id":1237240122,"obs":[-0.2158960104,0.0474189743,-0.0215329621,-0.0616754033],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2149476409,0.2428429276,-0.0227664709,-0.3610735834],"action_prob":0.4075363576,"action_logp":-0.8976251483,"action_dist_inputs":[0.1855424345,-0.1886170357],"value_targets":7.7255306244} +{"eps_id":1237240122,"obs":[-0.2149476409,0.2428429276,-0.0227664709,-0.3610735834],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2100907713,0.0480518639,-0.0299879424,-0.0756554678],"action_prob":0.8838845491,"action_logp":-0.1234288216,"action_dist_inputs":[1.0116844177,-1.0180568695],"value_targets":6.7934651375} +{"eps_id":1237240122,"obs":[-0.2100907713,0.0480518639,-0.0299879424,-0.0756554678],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2091297358,-0.1466276348,-0.031501051,0.207417354],"action_prob":0.637992382,"action_logp":-0.4494289756,"action_dist_inputs":[0.2817204893,-0.2849403322],"value_targets":5.8519849777} +{"eps_id":1237240122,"obs":[-0.2091297358,-0.1466276348,-0.031501051,0.207417354],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2120622844,0.0489302725,-0.0273527056,-0.0950337574],"action_prob":0.7656220794,"action_logp":-0.2670665979,"action_dist_inputs":[-0.5916506052,0.5921034217],"value_targets":4.9009947777} +{"eps_id":1237240122,"obs":[-0.2120622844,0.0489302725,-0.0273527056,-0.0950337574],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2110836804,-0.1457891762,-0.0292533804,0.1888954788],"action_prob":0.6678400636,"action_logp":-0.4037065804,"action_dist_inputs":[0.3475091159,-0.3509230912],"value_targets":3.9403989315} +{"eps_id":1237240122,"obs":[-0.2110836804,-0.1457891762,-0.0292533804,0.1888954788],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2139994651,0.0497388095,-0.0254754703,-0.1128702909],"action_prob":0.7460917234,"action_logp":-0.2929067016,"action_dist_inputs":[-0.5388072729,0.5390684605],"value_targets":2.970099926} +{"eps_id":1237240122,"obs":[-0.2139994651,0.0497388095,-0.0254754703,-0.1128702909],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2130046934,-0.145009011,-0.0277328771,0.1716676801],"action_prob":0.6946983933,"action_logp":-0.3642775118,"action_dist_inputs":[0.4092903733,-0.4128870964],"value_targets":1.9900000095} +{"eps_id":1237240122,"obs":[-0.2130046934,-0.145009011,-0.0277328771,0.1716676801],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":true,"new_obs":[-0.2159048766,0.0504986681,-0.0242995229,-0.1296335012],"action_prob":0.7253143787,"action_logp":-0.3211501241,"action_dist_inputs":[-0.4854495227,0.4855284393],"value_targets":1.0} +{"eps_id":904116594,"obs":[0.0156429354,0.0093969591,-0.0189328082,-0.0421961322],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":0.0,"dones":false,"new_obs":[0.0158308744,0.2047852129,-0.0197767317,-0.3407919109],"action_prob":0.3992424309,"action_logp":-0.9181864262,"action_dist_inputs":[0.2039852291,-0.2046373338],"value_targets":86.6020355225} +{"eps_id":904116594,"obs":[0.0158308744,0.2047852129,-0.0197767317,-0.3407919109],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0199265778,0.0099501489,-0.0265925694,-0.0544104353],"action_prob":0.882645607,"action_logp":-0.1248315424,"action_dist_inputs":[1.0066689253,-1.011056304],"value_targets":86.4666976929} +{"eps_id":904116594,"obs":[0.0199265778,0.0099501489,-0.0265925694,-0.0544104353],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.020125581,-0.1847806126,-0.027680777,0.2297651321],"action_prob":0.6413950324,"action_logp":-0.4441097379,"action_dist_inputs":[0.2903073132,-0.2911166847],"value_targets":86.3300018311} +{"eps_id":904116594,"obs":[0.020125581,-0.1847806126,-0.027680777,0.2297651321],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0164299682,0.0107257394,-0.023085475,-0.0715191513],"action_prob":0.77941221,"action_logp":-0.2492152005,"action_dist_inputs":[-0.6296673417,0.6325771809],"value_targets":86.1919174194} +{"eps_id":904116594,"obs":[0.0164299682,0.0107257394,-0.023085475,-0.0715191513],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0166444834,-0.1840577722,-0.0245158579,0.213791728],"action_prob":0.6655384898,"action_logp":-0.4071588218,"action_dist_inputs":[0.3435314596,-0.3445430398],"value_targets":86.052444458} +{"eps_id":904116594,"obs":[0.0166444834,-0.1840577722,-0.0245158579,0.213791728],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0129633285,0.0114059448,-0.0202400237,-0.0865226761],"action_prob":0.7637816668,"action_logp":-0.2694733143,"action_dist_inputs":[-0.5853896141,0.5881358385],"value_targets":85.9115600586} +{"eps_id":904116594,"obs":[0.0129633285,0.0114059448,-0.0202400237,-0.0865226761],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0131914476,-0.1834201217,-0.021970477,0.1997064054],"action_prob":0.6860024929,"action_logp":-0.3768739998,"action_dist_inputs":[0.3901506066,-0.3913456798],"value_targets":85.7692489624} +{"eps_id":904116594,"obs":[0.0131914476,-0.1834201217,-0.021970477,0.1997064054],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0095230453,0.01200906,-0.0179763492,-0.0998254195],"action_prob":0.7485982776,"action_logp":-0.2895528078,"action_dist_inputs":[-0.5442774296,0.5468727946],"value_targets":85.62550354} +{"eps_id":904116594,"obs":[0.0095230453,0.01200906,-0.0179763492,-0.0998254195],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0097632259,-0.1828507036,-0.0199728571,0.1871322393],"action_prob":0.7036324143,"action_logp":-0.3514991701,"action_dist_inputs":[0.4316460192,-0.4330095947],"value_targets":85.4803085327} +{"eps_id":904116594,"obs":[0.0097632259,-0.1828507036,-0.0199728571,0.1871322393],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0061062118,0.012551222,-0.0162302125,-0.1117838174],"action_prob":0.7337396741,"action_logp":-0.3096009791,"action_dist_inputs":[-0.505612433,0.5080672503],"value_targets":85.3336486816} +{"eps_id":904116594,"obs":[0.0061062118,0.012551222,-0.0162302125,-0.1117838174],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0063572363,-0.1823344529,-0.0184658878,0.1757347137],"action_prob":0.7191264033,"action_logp":-0.3297181427,"action_dist_inputs":[0.4693058431,-0.4708265364],"value_targets":85.1855010986} +{"eps_id":904116594,"obs":[0.0063572363,-0.1823344529,-0.0184658878,0.1757347137],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0027105473,-0.3771873116,-0.0149511946,0.4625355303],"action_prob":0.2809870541,"action_logp":-1.2694467306,"action_dist_inputs":[-0.4686246216,0.4709462225],"value_targets":85.0358581543} +{"eps_id":904116594,"obs":[0.0027105473,-0.3771873116,-0.0149511946,0.4625355303],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0048331991,-0.1818573028,-0.0057004835,0.1651777625],"action_prob":0.9071794748,"action_logp":-0.0974149555,"action_dist_inputs":[-1.1371369362,1.1425356865],"value_targets":84.8847045898} +{"eps_id":904116594,"obs":[-0.0048331991,-0.1818573028,-0.0057004835,0.1651777625],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0084703453,0.0133457873,-0.0023969284,-0.1292980462],"action_prob":0.725333631,"action_logp":-0.3211235702,"action_dist_inputs":[-0.4844378531,0.486636728],"value_targets":84.7320251465} +{"eps_id":904116594,"obs":[-0.0084703453,0.0133457873,-0.0023969284,-0.1292980462],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0082034292,-0.1817417443,-0.0049828896,0.1626277119],"action_prob":0.7217715979,"action_logp":-0.3260464966,"action_dist_inputs":[0.4757600725,-0.477506429],"value_targets":84.5778045654} +{"eps_id":904116594,"obs":[-0.0082034292,-0.1817417443,-0.0049828896,0.1626277119],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0118382638,0.0134511832,-0.0017303352,-0.1316230148],"action_prob":0.7228599191,"action_logp":-0.3245398104,"action_dist_inputs":[-0.4782726169,0.4804197252],"value_targets":84.4220275879} +{"eps_id":904116594,"obs":[-0.0118382638,0.0134511832,-0.0017303352,-0.1316230148],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0115692401,-0.1816459447,-0.0043627955,0.1605135202],"action_prob":0.723947227,"action_logp":-0.3230367899,"action_dist_inputs":[0.48116377,-0.4829626679],"value_targets":84.2646713257} +{"eps_id":904116594,"obs":[-0.0115692401,-0.1816459447,-0.0043627955,0.1605135202],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0152021591,0.0135381985,-0.0011525251,-0.1335425675],"action_prob":0.7208806872,"action_logp":-0.3272816539,"action_dist_inputs":[-0.473367691,0.4754665792],"value_targets":84.1057281494} +{"eps_id":904116594,"obs":[-0.0152021591,0.0135381985,-0.0011525251,-0.1335425675],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0149313956,0.2086766362,-0.0038233763,-0.4265888631],"action_prob":0.2743542194,"action_logp":-1.2933351994,"action_dist_inputs":[0.4853973389,-0.487244606],"value_targets":83.9451828003} +{"eps_id":904116594,"obs":[-0.0149313956,0.2086766362,-0.0038233763,-0.4265888631],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0107578626,0.0136090508,-0.0123551534,-0.1351137012],"action_prob":0.907117188,"action_logp":-0.0974836275,"action_dist_inputs":[1.1368041039,-1.1421290636],"value_targets":83.7830123901} +{"eps_id":904116594,"obs":[-0.0107578626,0.0136090508,-0.0123551534,-0.1351137012],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0104856817,0.2089057714,-0.0150574278,-0.4316687286],"action_prob":0.2542451024,"action_logp":-1.3694565296,"action_dist_inputs":[0.5370906591,-0.5390076637],"value_targets":83.6192016602} +{"eps_id":904116594,"obs":[-0.0104856817,0.2089057714,-0.0150574278,-0.4316687286],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0063075661,0.0140002314,-0.023690803,-0.1437702924],"action_prob":0.9118766785,"action_logp":-0.0922505185,"action_dist_inputs":[1.1656686068,-1.1710991859],"value_targets":83.453742981} +{"eps_id":904116594,"obs":[-0.0063075661,0.0140002314,-0.023690803,-0.1437702924],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0060275616,0.2094533145,-0.0265662074,-0.4438320994],"action_prob":0.226945594,"action_logp":-1.483044982,"action_dist_inputs":[0.6117820144,-0.6138570905],"value_targets":83.286605835} +{"eps_id":904116594,"obs":[-0.0060275616,0.2094533145,-0.0265662074,-0.4438320994],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0018384951,0.01471713,-0.0354428515,-0.1596404314],"action_prob":0.9176011682,"action_logp":-0.0859924406,"action_dist_inputs":[1.2022930384,-1.2078982592],"value_targets":83.1177825928} +{"eps_id":904116594,"obs":[-0.0018384951,0.01471713,-0.0354428515,-0.1596404314],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0015441525,-0.1798799485,-0.03863566,0.1216538474],"action_prob":0.8044737577,"action_logp":-0.217566967,"action_dist_inputs":[0.7060823441,-0.7084109187],"value_targets":82.9472579956} +{"eps_id":904116594,"obs":[-0.0015441525,-0.1798799485,-0.03863566,0.1216538474],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0051417514,0.0157736298,-0.0362025797,-0.1829635501],"action_prob":0.5785945654,"action_logp":-0.5471532345,"action_dist_inputs":[-0.1577224731,0.1592843831],"value_targets":82.7750091553} +{"eps_id":904116594,"obs":[-0.0051417514,0.0157736298,-0.0362025797,-0.1829635501],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0048262789,-0.1788121015,-0.0398618542,0.0980825648],"action_prob":0.8261758685,"action_logp":-0.1909476221,"action_dist_inputs":[0.7780587673,-0.7807045579],"value_targets":82.601020813} +{"eps_id":904116594,"obs":[-0.0048262789,-0.1788121015,-0.0398618542,0.0980825648],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0084025208,0.0168578047,-0.037900202,-0.2069054395],"action_prob":0.5266512632,"action_logp":-0.6412166357,"action_dist_inputs":[-0.0527203381,0.0539858714],"value_targets":82.4252700806} +{"eps_id":904116594,"obs":[-0.0084025208,0.0168578047,-0.037900202,-0.2069054395],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0080653653,-0.177702263,-0.0420383103,0.0735851005],"action_prob":0.8455489874,"action_logp":-0.1677691489,"action_dist_inputs":[0.8485660553,-0.8515428901],"value_targets":82.2477493286} +{"eps_id":904116594,"obs":[-0.0080653653,-0.177702263,-0.0420383103,0.0735851005],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0116194105,-0.3721971214,-0.0405666083,0.3527140319],"action_prob":0.5298097134,"action_logp":-0.6352373362,"action_dist_inputs":[0.0601592287,-0.0592212714],"value_targets":82.0684280396} +{"eps_id":904116594,"obs":[-0.0116194105,-0.3721971214,-0.0405666083,0.3527140319],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0190633535,-0.5667194128,-0.0335123278,0.6323341131],"action_prob":0.1709945202,"action_logp":-1.7661237717,"action_dist_inputs":[-0.7871670127,0.791428268],"value_targets":81.8873062134} +{"eps_id":904116594,"obs":[-0.0190633535,-0.5667194128,-0.0335123278,0.6323341131],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0303977411,-0.3711463809,-0.0208656453,0.3292884529],"action_prob":0.9279791117,"action_logp":-0.0747460797,"action_dist_inputs":[-1.2746660709,1.2813870907],"value_targets":81.7043457031} +{"eps_id":904116594,"obs":[-0.0303977411,-0.3711463809,-0.0208656453,0.3292884529],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0378206708,-0.1757336855,-0.0142798759,0.0300991945],"action_prob":0.8287211657,"action_logp":-0.1878715456,"action_dist_inputs":[-0.7862784266,0.7903122902],"value_targets":81.5195465088} +{"eps_id":904116594,"obs":[-0.0378206708,-0.1757336855,-0.0142798759,0.0300991945],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0413353443,-0.3706479669,-0.0136778923,0.3182426691],"action_prob":0.5542267561,"action_logp":-0.5901813507,"action_dist_inputs":[0.1090675071,-0.1086961627],"value_targets":81.3328704834} +{"eps_id":904116594,"obs":[-0.0413353443,-0.3706479669,-0.0136778923,0.3182426691],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0487483032,-0.1753339171,-0.0073130387,0.0212778132],"action_prob":0.8264647126,"action_logp":-0.1905980259,"action_dist_inputs":[-0.7784407735,0.7823354006],"value_targets":81.144317627} +{"eps_id":904116594,"obs":[-0.0487483032,-0.1753339171,-0.0073130387,0.0212778132],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0522549823,-0.3703502417,-0.006887482,0.3116444647],"action_prob":0.5555292368,"action_logp":-0.5878340602,"action_dist_inputs":[0.1116293594,-0.1114075556],"value_targets":80.9538574219} +{"eps_id":904116594,"obs":[-0.0522549823,-0.3703502417,-0.006887482,0.3116444647],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0596619844,-0.1751308441,-0.0006545932,0.016797401],"action_prob":0.8277906775,"action_logp":-0.1889949739,"action_dist_inputs":[-0.7831306458,0.7869190574],"value_targets":80.76146698} +{"eps_id":904116594,"obs":[-0.0596619844,-0.1751308441,-0.0006545932,0.016797401],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0631645992,-0.3702434003,-0.0003186452,0.3092737198],"action_prob":0.5484887362,"action_logp":-0.6005885005,"action_dist_inputs":[0.0973443761,-0.0972220674],"value_targets":80.5671386719} +{"eps_id":904116594,"obs":[-0.0631645992,-0.3702434003,-0.0003186452,0.3092737198],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0705694705,-0.1751169115,0.0058668293,0.0164903216],"action_prob":0.8325489759,"action_logp":-0.1832631975,"action_dist_inputs":[-0.8000426292,0.8037586808],"value_targets":80.3708496094} +{"eps_id":904116594,"obs":[-0.0705694705,-0.1751169115,0.0058668293,0.0164903216],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0740718096,0.019920418,0.0061966358,-0.2743358016],"action_prob":0.4670735002,"action_logp":-0.7612686157,"action_dist_inputs":[0.0659841001,-0.0659127906],"value_targets":80.1725769043} +{"eps_id":904116594,"obs":[-0.0740718096,0.019920418,0.0061966358,-0.2743358016],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0736733973,-0.1752893925,0.0007099198,0.0202951059],"action_prob":0.8568461537,"action_logp":-0.1544968784,"action_dist_inputs":[0.8927128911,-0.8966257572],"value_targets":79.9722976685} +{"eps_id":904116594,"obs":[-0.0736733973,-0.1752893925,0.0007099198,0.0202951059],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.077179186,-0.3704215288,0.001115822,0.3132019341],"action_prob":0.5363126993,"action_logp":-0.6230379343,"action_dist_inputs":[0.0727773458,-0.0727295876],"value_targets":79.7699966431} +{"eps_id":904116594,"obs":[-0.077179186,-0.3704215288,0.001115822,0.3132019341],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0845876187,-0.1753154844,0.0073798606,0.020871114],"action_prob":0.8375521898,"action_logp":-0.177271679,"action_dist_inputs":[-0.818243742,0.8218830824],"value_targets":79.5656509399} +{"eps_id":904116594,"obs":[-0.0845876187,-0.1753154844,0.0073798606,0.020871114],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.088093929,0.0196998585,0.0077972831,-0.269474268],"action_prob":0.4816126525,"action_logp":-0.730615139,"action_dist_inputs":[0.0367950685,-0.0367874354],"value_targets":79.3592453003} +{"eps_id":904116594,"obs":[-0.088093929,0.0196998585,0.0077972831,-0.269474268],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0876999274,-0.17553249,0.0024077976,0.0256577265],"action_prob":0.8524856567,"action_logp":-0.1595989019,"action_dist_inputs":[0.8751360178,-0.8790952563],"value_targets":79.1507568359} +{"eps_id":904116594,"obs":[-0.0876999274,-0.17553249,0.0024077976,0.0256577265],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0912105814,-0.3706888855,0.0029209519,0.3190993667],"action_prob":0.519284606,"action_logp":-0.6553031206,"action_dist_inputs":[0.0385870971,-0.0385896303],"value_targets":78.9401550293} +{"eps_id":904116594,"obs":[-0.0912105814,-0.3706888855,0.0029209519,0.3190993667],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0986243561,-0.565852344,0.0093029393,0.6127020121],"action_prob":0.1559110731,"action_logp":-1.8584694862,"action_dist_inputs":[-0.8426944017,0.846277535],"value_targets":78.727432251} +{"eps_id":904116594,"obs":[-0.0986243561,-0.565852344,0.0093029393,0.6127020121],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.109941408,-0.3708616197,0.021556979,0.3229636252],"action_prob":0.9324567914,"action_logp":-0.0699324533,"action_dist_inputs":[-1.3093210459,1.3157352209],"value_targets":78.5125579834} +{"eps_id":904116594,"obs":[-0.109941408,-0.3708616197,0.021556979,0.3229636252],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1173586398,-0.1760531515,0.0280162524,0.0371560976],"action_prob":0.8616551757,"action_logp":-0.1489000916,"action_dist_inputs":[-0.9127598405,0.9163460732],"value_targets":78.2955093384} +{"eps_id":904116594,"obs":[-0.1173586398,-0.1760531515,0.0280162524,0.0371560976],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1208797023,0.0186560787,0.0287593752,-0.2465574294],"action_prob":0.5676494241,"action_logp":-0.5662512183,"action_dist_inputs":[-0.1361100376,0.1361574233],"value_targets":78.0762710571} +{"eps_id":904116594,"obs":[-0.1208797023,0.0186560787,0.0287593752,-0.2465574294],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1205065772,-0.1768645644,0.0238282252,0.0550563484],"action_prob":0.8148959279,"action_logp":-0.2046948671,"action_dist_inputs":[0.7391722202,-0.7429701686],"value_targets":77.8548202515} +{"eps_id":904116594,"obs":[-0.1205065772,-0.1768645644,0.0238282252,0.0550563484],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.124043867,0.0179077666,0.0249293521,-0.2300143689],"action_prob":0.5953341126,"action_logp":-0.5186324716,"action_dist_inputs":[-0.1929378808,0.1931231171],"value_targets":77.6311340332} +{"eps_id":904116594,"obs":[-0.124043867,0.0179077666,0.0249293521,-0.2300143689],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1236857176,0.2126647681,0.0203290656,-0.5147306323],"action_prob":0.1972191632,"action_logp":-1.6234396696,"action_dist_inputs":[0.7000466585,-0.7037195563],"value_targets":77.4051818848} +{"eps_id":904116594,"obs":[-0.1236857176,0.2126647681,0.0203290656,-0.5147306323],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1194324195,0.0172625184,0.0100344522,-0.2157114893],"action_prob":0.9222375154,"action_logp":-0.0809524655,"action_dist_inputs":[1.2332299948,-1.2399145365],"value_targets":77.1769561768} +{"eps_id":904116594,"obs":[-0.1194324195,0.0172625184,0.0100344522,-0.2157114893],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1190871671,-0.1780014336,0.0057202228,0.0801197812],"action_prob":0.8054471612,"action_logp":-0.2163576931,"action_dist_inputs":[0.7085487843,-0.7121447921],"value_targets":76.9464187622} +{"eps_id":904116594,"obs":[-0.1190871671,-0.1780014336,0.0057202228,0.0801197812],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1226471961,-0.3732049167,0.0073226183,0.3746019602],"action_prob":0.3940764368,"action_logp":-0.9312103987,"action_dist_inputs":[-0.2149327546,0.2152762711],"value_targets":76.7135543823} +{"eps_id":904116594,"obs":[-0.1226471961,-0.3732049167,0.0073226183,0.3746019602],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.130111292,-0.1781877428,0.0148146572,0.0842368603],"action_prob":0.8810766935,"action_logp":-0.126610592,"action_dist_inputs":[-0.9994104505,1.0032560825],"value_targets":76.4783401489} +{"eps_id":904116594,"obs":[-0.130111292,-0.1781877428,0.0148146572,0.0842368603],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1336750537,-0.3735188842,0.0164993946,0.3815567791],"action_prob":0.3656953275,"action_logp":-1.0059547424,"action_dist_inputs":[-0.2751898766,0.2755388916],"value_targets":76.2407455444} +{"eps_id":904116594,"obs":[-0.1336750537,-0.3735188842,0.0164993946,0.3815567791],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1411454231,-0.1786350608,0.0241305307,0.0941214189],"action_prob":0.8887331486,"action_logp":-0.1179582849,"action_dist_inputs":[-1.0369861126,1.0408787727],"value_targets":76.0007553101} +{"eps_id":904116594,"obs":[-0.1411454231,-0.1786350608,0.0241305307,0.0941214189],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1447181255,-0.3740943968,0.026012959,0.3943187892],"action_prob":0.3282222748,"action_logp":-1.1140642166,"action_dist_inputs":[-0.3579099774,0.3583265245],"value_targets":75.7583389282} +{"eps_id":904116594,"obs":[-0.1447181255,-0.3740943968,0.026012959,0.3943187892],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1522000134,-0.1793510467,0.0338993333,0.1099495143],"action_prob":0.8975988626,"action_logp":-0.1080320105,"action_dist_inputs":[-1.0834113359,1.0874142647],"value_targets":75.5134735107} +{"eps_id":904116594,"obs":[-0.1522000134,-0.1793510467,0.0338993333,0.1099495143],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1557870358,0.015269151,0.0360983238,-0.1718485653],"action_prob":0.7158146501,"action_logp":-0.3343340456,"action_dist_inputs":[-0.4616219103,0.462172538],"value_targets":75.26612854} +{"eps_id":904116594,"obs":[-0.1557870358,0.015269151,0.0360983238,-0.1718485653],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1554816514,-0.1803503633,0.0326613523,0.1320001483],"action_prob":0.7079858184,"action_logp":-0.3453312516,"action_dist_inputs":[0.4412037134,-0.4444179833],"value_targets":75.0162963867} +{"eps_id":904116594,"obs":[-0.1554816514,-0.1803503633,0.0326613523,0.1320001483],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1590886563,-0.3759245872,0.0353013575,0.4348057508],"action_prob":0.2542443871,"action_logp":-1.3694592714,"action_dist_inputs":[-0.537678659,0.5384232402],"value_targets":74.7639312744} +{"eps_id":904116594,"obs":[-0.1590886563,-0.3759245872,0.0353013575,0.4348057508],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1666071564,-0.1813197136,0.0439974703,0.1534567475],"action_prob":0.9121852517,"action_logp":-0.0919121727,"action_dist_inputs":[-1.1681249142,1.1724885702],"value_targets":74.5090255737} +{"eps_id":904116594,"obs":[-0.1666071564,-0.1813197136,0.0439974703,0.1534567475],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1702335477,0.0131455474,0.0470666066,-0.1250277758],"action_prob":0.7871268988,"action_logp":-0.2393658161,"action_dist_inputs":[-0.6533672214,0.6543260217],"value_targets":74.2515411377} +{"eps_id":904116594,"obs":[-0.1702335477,0.0131455474,0.0470666066,-0.1250277758],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1699706316,-0.1826179922,0.0445660502,0.1821248084],"action_prob":0.5997811556,"action_logp":-0.5111904144,"action_dist_inputs":[0.2008961141,-0.2036574185],"value_targets":73.9914550781} +{"eps_id":904116594,"obs":[-0.1699706316,-0.1826179922,0.0445660502,0.1821248084],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1736229956,0.0118388701,0.0482085459,-0.0961727798],"action_prob":0.8158886433,"action_logp":-0.2034773976,"action_dist_inputs":[-0.743745625,0.7449916005],"value_targets":73.7287445068} +{"eps_id":904116594,"obs":[-0.1736229956,0.0118388701,0.0482085459,-0.0961727798],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1733862162,0.2062379122,0.0462850928,-0.37326473],"action_prob":0.4619573951,"action_logp":-0.7722826004,"action_dist_inputs":[0.07499367,-0.0774713904],"value_targets":73.4633712769} +{"eps_id":904116594,"obs":[-0.1733862162,0.2062379122,0.0462850928,-0.37326473],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1692614555,0.0104900654,0.0388197973,-0.066354461],"action_prob":0.8532626033,"action_logp":-0.1586879343,"action_dist_inputs":[0.8773600459,-0.8830627203],"value_targets":73.1953277588} +{"eps_id":904116594,"obs":[-0.1692614555,0.0104900654,0.0388197973,-0.066354461],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1690516621,0.2050345689,0.0374927074,-0.3465412259],"action_prob":0.5022981167,"action_logp":-0.6885614395,"action_dist_inputs":[-0.005690224,0.0035023438],"value_targets":72.9245758057} +{"eps_id":904116594,"obs":[-0.1690516621,0.2050345689,0.0374927074,-0.3465412259],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1649509668,0.0093999254,0.030561883,-0.0422752053],"action_prob":0.8410146236,"action_logp":-0.1731462181,"action_dist_inputs":[0.8301456571,-0.8356516361],"value_targets":72.6510848999} +{"eps_id":904116594,"obs":[-0.1649509668,0.0093999254,0.030561883,-0.0422752053],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1647629738,0.2040705979,0.0297163781,-0.3251610696],"action_prob":0.5332254767,"action_logp":-0.6288109422,"action_dist_inputs":[-0.0675235614,0.0655745044],"value_targets":72.3748321533} +{"eps_id":904116594,"obs":[-0.1647629738,0.2040705979,0.0297163781,-0.3251610696],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1606815606,0.0085384184,0.0232131574,-0.0232569501],"action_prob":0.8308452368,"action_logp":-0.1853117645,"action_dist_inputs":[0.7931429744,-0.79848665],"value_targets":72.0957946777} +{"eps_id":904116594,"obs":[-0.1606815606,0.0085384184,0.0232131574,-0.0232569501],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1605107933,-0.1869086027,0.0227480195,0.2766586542],"action_prob":0.4445361495,"action_logp":-0.8107239008,"action_dist_inputs":[-0.1122645661,0.1105075553],"value_targets":71.8139266968} +{"eps_id":904116594,"obs":[-0.1605107933,-0.1869086027,0.0227480195,0.2766586542],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1642489582,-0.3823475838,0.0282811914,0.5764286518],"action_prob":0.1351321936,"action_logp":-2.0015017986,"action_dist_inputs":[-0.9270690084,0.9292542338],"value_targets":71.5292205811} +{"eps_id":904116594,"obs":[-0.1642489582,-0.3823475838,0.0282811914,0.5764286518],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1718959212,-0.1876332462,0.0398097634,0.2927874625],"action_prob":0.9329790473,"action_logp":-0.0693725049,"action_dist_inputs":[-1.3138841391,1.3194943666],"value_targets":71.2416381836} +{"eps_id":904116594,"obs":[-0.1718959212,-0.1876332462,0.0398097634,0.2927874625],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.175648585,-0.3832994998,0.0456655137,0.5977553129],"action_prob":0.119083412,"action_logp":-2.127931118,"action_dist_inputs":[-0.9993560314,1.0017827749],"value_targets":70.9511489868} +{"eps_id":904116594,"obs":[-0.175648585,-0.3832994998,0.0456655137,0.5977553129],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1833145767,-0.5790296793,0.0576206185,0.9044654965],"action_prob":0.0635528341,"action_logp":-2.7558836937,"action_dist_inputs":[-1.3421653509,1.3480561972],"value_targets":70.6577301025} +{"eps_id":904116594,"obs":[-0.1833145767,-0.5790296793,0.0576206185,0.9044654965],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1948951632,-0.3847334683,0.0757099316,0.6304359436],"action_prob":0.9510720968,"action_logp":-0.0501654148,"action_dist_inputs":[-1.4793561697,1.4878855944],"value_targets":70.3613433838} +{"eps_id":904116594,"obs":[-0.1948951632,-0.3847334683,0.0757099316,0.6304359436],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2025898397,-0.1907450557,0.088318646,0.3625243306],"action_prob":0.9404995441,"action_logp":-0.0613441058,"action_dist_inputs":[-1.3770468235,1.3833807707],"value_targets":70.061958313} +{"eps_id":904116594,"obs":[-0.2025898397,-0.1907450557,0.088318646,0.3625243306],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2064047307,0.0030178658,0.0955691338,0.0989447013],"action_prob":0.9138911963,"action_logp":-0.0900437832,"action_dist_inputs":[-1.1793267727,1.1827725172],"value_targets":69.7595596313} +{"eps_id":904116594,"obs":[-0.2064047307,0.0030178658,0.0955691338,0.0989447013],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.206344381,0.1966494471,0.0975480303,-0.1621227115],"action_prob":0.8277480602,"action_logp":-0.1890464723,"action_dist_inputs":[-0.7848966718,0.7848537564],"value_targets":69.4540939331} +{"eps_id":904116594,"obs":[-0.206344381,0.1966494471,0.0975480303,-0.1621227115],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2024113834,0.0002761639,0.0943055749,0.1596705168],"action_prob":0.4479058683,"action_logp":-0.8031721711,"action_dist_inputs":[-0.1064511687,0.1026842594],"value_targets":69.1455535889} +{"eps_id":904116594,"obs":[-0.2024113834,0.0002761639,0.0943055749,0.1596705168],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.20240587,0.1939303279,0.0974989831,-0.1018352285],"action_prob":0.8620004058,"action_logp":-0.1484995186,"action_dist_inputs":[-0.915612638,0.9163927436],"value_targets":68.8338928223} +{"eps_id":904116594,"obs":[-0.20240587,0.1939303279,0.0974989831,-0.1018352285],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1985272616,-0.0024439059,0.095462285,0.2199464887],"action_prob":0.3435184062,"action_logp":-1.0685145855,"action_dist_inputs":[-0.3253290355,0.3223249614],"value_targets":68.5190811157} +{"eps_id":904116594,"obs":[-0.1985272616,-0.0024439059,0.095462285,0.2199464887],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1985761374,0.1911930442,0.0998612121,-0.0411631763],"action_prob":0.8855349422,"action_logp":-0.1215633452,"action_dist_inputs":[-1.0221580267,1.0237644911],"value_targets":68.2010955811} +{"eps_id":904116594,"obs":[-0.1985761374,0.1911930442,0.0998612121,-0.0411631763],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1947522759,0.3847517371,0.0990379453,-0.3007443249],"action_prob":0.7417244315,"action_logp":-0.2987774909,"action_dist_inputs":[-0.528531909,0.5264189243],"value_targets":67.8798904419} +{"eps_id":904116594,"obs":[-0.1947522759,0.3847517371,0.0990379453,-0.3007443249],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1870572418,0.5783328414,0.0930230618,-0.5606235266],"action_prob":0.4036538899,"action_logp":-0.9071974754,"action_dist_inputs":[0.192302078,-0.1979612112],"value_targets":67.5554504395} +{"eps_id":904116594,"obs":[-0.1870572418,0.5783328414,0.0930230618,-0.5606235266],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1754905879,0.3820369244,0.0818105936,-0.2401432991],"action_prob":0.8476299644,"action_logp":-0.165311113,"action_dist_inputs":[0.8542113304,-0.8619205952],"value_targets":67.227722168} +{"eps_id":904116594,"obs":[-0.1754905879,0.3820369244,0.0818105936,-0.2401432991],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1678498387,0.5759006739,0.0770077258,-0.5059408545],"action_prob":0.4791647196,"action_logp":-0.7357108593,"action_dist_inputs":[0.0391691327,-0.044220224],"value_targets":66.8966903687} +{"eps_id":904116594,"obs":[-0.1678498387,0.5759006739,0.0770077258,-0.5059408545],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1563318372,0.7698578238,0.0668889061,-0.7733964324],"action_prob":0.1803245097,"action_logp":-1.7129971981,"action_dist_inputs":[0.7533606887,-0.7607896924],"value_targets":66.5623168945} +{"eps_id":904116594,"obs":[-0.1563318372,0.7698578238,0.0668889061,-0.7733964324],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1409346759,0.5738825202,0.0514209792,-0.460440129],"action_prob":0.9206051826,"action_logp":-0.0827239975,"action_dist_inputs":[1.2209039927,-1.2296949625],"value_targets":66.2245635986} +{"eps_id":904116594,"obs":[-0.1409346759,0.5738825202,0.0514209792,-0.460440129],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1294570267,0.3780728877,0.0422121771,-0.1520028263],"action_prob":0.8064604998,"action_logp":-0.2151003331,"action_dist_inputs":[0.710018456,-0.7171550393],"value_targets":65.883392334} +{"eps_id":904116594,"obs":[-0.1294570267,0.3780728877,0.0422121771,-0.1520028263],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1218955666,0.572565794,0.0391721204,-0.4310756028],"action_prob":0.5603272319,"action_logp":-0.5792343616,"action_dist_inputs":[-0.1232339814,0.1192560643],"value_targets":65.5387802124} +{"eps_id":904116594,"obs":[-0.1218955666,0.572565794,0.0391721204,-0.4310756028],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1104442477,0.37691167,0.0305506084,-0.126305908],"action_prob":0.7920210361,"action_logp":-0.2331672907,"action_dist_inputs":[0.6651227474,-0.672028482],"value_targets":65.1906890869} +{"eps_id":904116594,"obs":[-0.1104442477,0.37691167,0.0305506084,-0.126305908],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1029060185,0.1813656688,0.0280244891,0.1758566946],"action_prob":0.4169372022,"action_logp":-0.8748196959,"action_dist_inputs":[-0.1694733202,0.16588597],"value_targets":64.8390808105} +{"eps_id":904116594,"obs":[-0.1029060185,0.1813656688,0.0280244891,0.1758566946],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0992787033,0.3760755956,0.0315416232,-0.1078553721],"action_prob":0.8539350033,"action_logp":-0.1579002291,"action_dist_inputs":[-0.8823871613,0.8834158778],"value_targets":64.4839172363} +{"eps_id":904116594,"obs":[-0.0992787033,0.3760755956,0.0315416232,-0.1078553721],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0917571932,0.5707316995,0.0293845162,-0.3904225826],"action_prob":0.6161800623,"action_logp":-0.4842160046,"action_dist_inputs":[-0.2382905781,0.2350752056],"value_targets":64.1251678467} +{"eps_id":904116594,"obs":[-0.0917571932,0.5707316995,0.0293845162,-0.3904225826],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.080342561,0.7654245496,0.0215760656,-0.6736979485],"action_prob":0.240012601,"action_logp":-1.4270638227,"action_dist_inputs":[0.5730540752,-0.5795561671],"value_targets":63.7627983093} +{"eps_id":904116594,"obs":[-0.080342561,0.7654245496,0.0215760656,-0.6736979485],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0650340691,0.57000947,0.008102105,-0.3743007183],"action_prob":0.9101897478,"action_logp":-0.0941022038,"action_dist_inputs":[1.1538779736,-1.1620755196],"value_targets":63.3967666626} +{"eps_id":904116594,"obs":[-0.0650340691,0.57000947,0.008102105,-0.3743007183],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0536338799,0.3747733533,0.0006160912,-0.0790741816],"action_prob":0.7720806003,"action_logp":-0.2586663067,"action_dist_inputs":[0.6069049239,-0.6131919026],"value_targets":63.0270347595} +{"eps_id":904116594,"obs":[-0.0536338799,0.3747733533,0.0006160912,-0.0790741816],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0461384133,0.569886446,-0.0009653924,-0.3715626597],"action_prob":0.6075309515,"action_logp":-0.49835217,"action_dist_inputs":[-0.2198422253,0.2171031982],"value_targets":62.6535720825} +{"eps_id":904116594,"obs":[-0.0461384133,0.569886446,-0.0009653924,-0.3715626597],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0347406827,0.3747782409,-0.008396646,-0.0791842937],"action_prob":0.7826638222,"action_logp":-0.2450520247,"action_dist_inputs":[0.6375507712,-0.6437069774],"value_targets":62.2763366699} +{"eps_id":904116594,"obs":[-0.0347406827,0.3747782409,-0.008396646,-0.0791842937],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0272451192,0.5700195432,-0.0099803312,-0.3745045066],"action_prob":0.5895593762,"action_logp":-0.5283798575,"action_dist_inputs":[-0.1824057102,0.1797384471],"value_targets":61.8952865601} +{"eps_id":904116594,"obs":[-0.0272451192,0.5700195432,-0.0099803312,-0.3745045066],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0158447288,0.3750407696,-0.0174704213,-0.0849850923],"action_prob":0.7989748716,"action_logp":-0.2244258076,"action_dist_inputs":[0.6869208217,-0.6929789782],"value_targets":61.5103912354} +{"eps_id":904116594,"obs":[-0.0158447288,0.3750407696,-0.0174704213,-0.0849850923],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0083439136,0.1801735461,-0.0191701241,0.202135101],"action_prob":0.4401890337,"action_logp":-0.8205510378,"action_dist_inputs":[-0.121532388,0.1188625321],"value_targets":61.1216087341} +{"eps_id":904116594,"obs":[-0.0083439136,0.1801735461,-0.0191701241,0.202135101],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0047404421,0.3755643368,-0.0151274214,-0.0965329185],"action_prob":0.8457416892,"action_logp":-0.1675413251,"action_dist_inputs":[-0.8499422669,0.8516430259],"value_targets":60.7288970947} +{"eps_id":904116594,"obs":[-0.0047404421,0.3755643368,-0.0151274214,-0.0965329185],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0027708444,0.5708997846,-0.0170580801,-0.3939498663],"action_prob":0.5419385433,"action_logp":-0.6126027107,"action_dist_inputs":[-0.085423395,0.0827257261],"value_targets":60.3322181702} +{"eps_id":904116594,"obs":[0.0027708444,0.5708997846,-0.0170580801,-0.3939498663],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0141888401,0.3760240078,-0.0249370784,-0.1066935435],"action_prob":0.828330338,"action_logp":-0.1883432716,"action_dist_inputs":[0.7839395404,-0.7899004221],"value_targets":59.9315338135} +{"eps_id":904116594,"obs":[0.0141888401,0.3760240078,-0.0249370784,-0.1066935435],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0217093211,0.5714942813,-0.0270709489,-0.4071385264],"action_prob":0.4993661344,"action_logp":-0.6944156885,"action_dist_inputs":[-0.000106134,-0.0026415652],"value_targets":59.526802063} +{"eps_id":904116594,"obs":[0.0217093211,0.5714942813,-0.0270709489,-0.4071385264],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0331392065,0.3767664433,-0.0352137201,-0.1231116652],"action_prob":0.8497216105,"action_logp":-0.1628465056,"action_dist_inputs":[0.8632425666,-0.8691769838],"value_targets":59.117980957} +{"eps_id":904116594,"obs":[0.0331392065,0.3767664433,-0.0352137201,-0.1231116652],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0406745337,0.1821662039,-0.0376759507,0.1582570076],"action_prob":0.5581627488,"action_logp":-0.58310467,"action_dist_inputs":[0.115425393,-0.1182835773],"value_targets":58.7050323486} +{"eps_id":904116594,"obs":[0.0406745337,0.1821662039,-0.0376759507,0.1582570076],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0443178602,0.3778067529,-0.0345108137,-0.1460696459],"action_prob":0.800747633,"action_logp":-0.2222094685,"action_dist_inputs":[-0.6948152781,0.6961580515],"value_targets":58.2879104614} +{"eps_id":904116594,"obs":[0.0443178602,0.3778067529,-0.0345108137,-0.1460696459],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0518739931,0.1831955761,-0.0374322049,0.1355291307],"action_prob":0.6035535336,"action_logp":-0.5049205422,"action_dist_inputs":[0.2086449713,-0.211648792],"value_targets":57.8665771484} +{"eps_id":904116594,"obs":[0.0518739931,0.1831955761,-0.0374322049,0.1355291307],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0555379055,-0.0113707799,-0.0347216204,0.4161717296],"action_prob":0.2211954743,"action_logp":-1.508708477,"action_dist_inputs":[-0.6287891269,0.6299241781],"value_targets":57.4409866333} +{"eps_id":904116594,"obs":[0.0555379055,-0.0113707799,-0.0347216204,0.4161717296],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0553104915,0.1842256188,-0.0263981875,0.1127478555],"action_prob":0.9100135565,"action_logp":-0.094295755,"action_dist_inputs":[-1.1545598507,1.1592410803],"value_targets":57.0110969543} +{"eps_id":904116594,"obs":[0.0553104915,0.1842256188,-0.0263981875,0.1127478555],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.058995001,-0.0105083222,-0.0241432302,0.3969868422],"action_prob":0.2328069359,"action_logp":-1.4575457573,"action_dist_inputs":[-0.5957792997,0.5967496634],"value_targets":56.5768661499} +{"eps_id":904116594,"obs":[0.058995001,-0.0105083222,-0.0241432302,0.3969868422],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.058784835,0.1849476993,-0.0162034947,0.0967908204],"action_prob":0.9088231921,"action_logp":-0.0956047103,"action_dist_inputs":[-1.1473671198,1.1519825459],"value_targets":56.1382484436} +{"eps_id":904116594,"obs":[0.058784835,0.1849476993,-0.0162034947,0.0967908204],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0624837913,0.3802980781,-0.0142676774,-0.2009599209],"action_prob":0.7616939545,"action_logp":-0.2722104192,"action_dist_inputs":[-0.580550313,0.5814387798],"value_targets":55.6952018738} +{"eps_id":904116594,"obs":[0.0624837913,0.3802980781,-0.0142676774,-0.2009599209],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.07008975,0.1853830665,-0.0182868764,0.087188296],"action_prob":0.6669955254,"action_logp":-0.4049719274,"action_dist_inputs":[0.3456870615,-0.3489401937],"value_targets":55.2476768494} +{"eps_id":904116594,"obs":[0.07008975,0.1853830665,-0.0182868764,0.087188296],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0737974122,0.3807623088,-0.016543109,-0.2112075984],"action_prob":0.7467365265,"action_logp":-0.2920428813,"action_dist_inputs":[-0.5402242541,0.5410578847],"value_targets":54.7956352234} +{"eps_id":904116594,"obs":[0.0737974122,0.3807623088,-0.016543109,-0.2112075984],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0814126581,0.1858807504,-0.0207672622,0.0762112886],"action_prob":0.6902884841,"action_logp":-0.3706456721,"action_dist_inputs":[0.3991028666,-0.4023654759],"value_targets":54.3390235901} +{"eps_id":904116594,"obs":[0.0814126581,0.1858807504,-0.0207672622,0.0762112886],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0851302743,0.3812941611,-0.0192430355,-0.2229507565],"action_prob":0.7278869748,"action_logp":-0.3176094592,"action_dist_inputs":[-0.4915856719,0.4923427999],"value_targets":53.8778038025} +{"eps_id":904116594,"obs":[0.0851302743,0.3812941611,-0.0192430355,-0.2229507565],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0927561596,0.1864524633,-0.0237020515,0.0636005774],"action_prob":0.7153723836,"action_logp":-0.3349520862,"action_dist_inputs":[0.4591679573,-0.4624535441],"value_targets":53.4119224548} +{"eps_id":904116594,"obs":[0.0927561596,0.1864524633,-0.0237020515,0.0636005774],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.096485205,0.3819060922,-0.0224300399,-0.2364652604],"action_prob":0.7039556503,"action_logp":-0.3510399163,"action_dist_inputs":[-0.4327744842,0.4334316552],"value_targets":52.9413375854} +{"eps_id":904116594,"obs":[0.096485205,0.3819060922,-0.0224300399,-0.2364652604],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.1041233316,0.1871116459,-0.0271593444,0.0490590148],"action_prob":0.7419181466,"action_logp":-0.2985163629,"action_dist_inputs":[0.5263189077,-0.5296432376],"value_targets":52.4659957886} +{"eps_id":904116594,"obs":[0.1041233316,0.1871116459,-0.0271593444,0.0490590148],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.1078655645,-0.0076105455,-0.0261781644,0.3330507278],"action_prob":0.3265711069,"action_logp":-1.119107604,"action_dist_inputs":[-0.3616017103,0.3621330559],"value_targets":51.9858551025} +{"eps_id":904116594,"obs":[0.1078655645,-0.0076105455,-0.0261781644,0.3330507278],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.1077133492,0.187874034,-0.0195171498,0.03222882],"action_prob":0.8908677101,"action_logp":-0.1155593544,"action_dist_inputs":[-1.0476548672,1.0519800186],"value_targets":51.5008621216} +{"eps_id":904116594,"obs":[0.1077133492,0.187874034,-0.0195171498,0.03222882],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.1114708334,0.3832703531,-0.018872574,-0.2665475011],"action_prob":0.6584396958,"action_logp":-0.4178823233,"action_dist_inputs":[-0.3279672563,0.3283814192],"value_targets":51.0109710693} +{"eps_id":904116594,"obs":[0.1114708334,0.3832703531,-0.018872574,-0.2665475011],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.1191362366,0.1884227693,-0.024203524,0.0201236084],"action_prob":0.7774939537,"action_logp":-0.2516793907,"action_dist_inputs":[0.6238443255,-0.6272773147],"value_targets":50.5161323547} +{"eps_id":904116594,"obs":[0.1191362366,0.1884227693,-0.024203524,0.0201236084],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.1229046956,-0.0063438672,-0.0238010511,0.305072844],"action_prob":0.3742844462,"action_logp":-0.9827392101,"action_dist_inputs":[-0.2567810714,0.2570987344],"value_targets":50.0162963867} +{"eps_id":904116594,"obs":[0.1229046956,-0.0063438672,-0.0238010511,0.305072844],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.1227778196,0.1891090423,-0.0176995937,0.0049796863],"action_prob":0.8806625009,"action_logp":-0.1270808131,"action_dist_inputs":[-0.9972766042,1.001442194],"value_targets":49.5114097595} +{"eps_id":904116594,"obs":[0.1227778196,0.1891090423,-0.0176995937,0.0049796863],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.1265600026,-0.0057546641,-0.0176000018,0.2920260429],"action_prob":0.3901744783,"action_logp":-0.9411612749,"action_dist_inputs":[-0.2231815457,0.2233972996],"value_targets":49.0014266968} +{"eps_id":904116594,"obs":[0.1265600026,-0.0057546641,-0.0176000018,0.2920260429],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.126444906,0.1896137595,-0.0117594795,-0.0061553274],"action_prob":0.8779460788,"action_logp":-0.130170092,"action_dist_inputs":[-0.9845076799,0.9886143804],"value_targets":48.486289978} +{"eps_id":904116594,"obs":[0.126444906,0.1896137595,-0.0117594795,-0.0061553274],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.130237177,0.3849023581,-0.0118825864,-0.3025251925],"action_prob":0.6002277136,"action_logp":-0.5104461312,"action_dist_inputs":[-0.2031271756,0.2032869011],"value_targets":47.9659461975} +{"eps_id":904116594,"obs":[0.130237177,0.3849023581,-0.0118825864,-0.3025251925],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.1379352212,0.1899517626,-0.0179330911,-0.0136133339],"action_prob":0.809352994,"action_logp":-0.2115201056,"action_dist_inputs":[0.7211381197,-0.7246736288],"value_targets":47.4403495789} +{"eps_id":904116594,"obs":[0.1379352212,0.1899517626,-0.0179330911,-0.0136133339],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.1417342573,-0.0049084784,-0.0182053577,0.2733579874],"action_prob":0.4297953844,"action_logp":-0.844446063,"action_dist_inputs":[-0.1412834376,0.1414026022],"value_targets":46.9094467163} +{"eps_id":904116594,"obs":[0.1417342573,-0.0049084784,-0.0182053577,0.2733579874],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.1416360885,-0.1997659951,-0.0127381971,0.5602437258],"action_prob":0.1321672499,"action_logp":-2.0236871243,"action_dist_inputs":[-0.9389582276,0.9429726601],"value_targets":46.3731765747} +{"eps_id":904116594,"obs":[0.1416360885,-0.1997659951,-0.0127381971,0.5602437258],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.1376407743,-0.0044676014,-0.0015333226,0.2635749876],"action_prob":0.9366633296,"action_logp":-0.065431349,"action_dist_inputs":[-1.3434642553,1.350394845],"value_targets":45.8314933777} +{"eps_id":904116594,"obs":[0.1376407743,-0.0044676014,-0.0015333226,0.2635749876],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.137551412,0.1906761974,0.003738177,-0.0295911729],"action_prob":0.8729193211,"action_logp":-0.1359121501,"action_dist_inputs":[-0.9615014195,0.9655193686],"value_targets":45.2843360901} +{"eps_id":904116594,"obs":[0.137551412,0.1906761974,0.003738177,-0.0295911729],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.141364947,0.3857443333,0.0031463536,-0.3210923374],"action_prob":0.5856952667,"action_logp":-0.5349556208,"action_dist_inputs":[-0.1730508804,0.173147127],"value_targets":44.7316513062} +{"eps_id":904116594,"obs":[0.141364947,0.3857443333,0.0031463536,-0.3210923374],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.1490798295,0.1905777305,-0.0032754927,-0.0274188221],"action_prob":0.8112999797,"action_logp":-0.2091174275,"action_dist_inputs":[0.7274863124,-0.7309927344],"value_targets":44.1733856201} +{"eps_id":904116594,"obs":[0.1490798295,0.1905777305,-0.0032754927,-0.0274188221],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.1528913826,-0.0044970945,-0.0038238692,0.2642288506],"action_prob":0.4269078076,"action_logp":-0.8511872292,"action_dist_inputs":[-0.1471565664,0.1473219544],"value_targets":43.6094818115} +{"eps_id":904116594,"obs":[0.1528913826,-0.0044970945,-0.0038238692,0.2642288506],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.1528014392,0.1906792223,0.0014607079,-0.0296577029],"action_prob":0.8716828823,"action_logp":-0.1373295784,"action_dist_inputs":[-0.9559011459,0.9600200653],"value_targets":43.0398788452} +{"eps_id":904116594,"obs":[0.1528014392,0.1906792223,0.0014607079,-0.0296577029],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.1566150337,0.3857801855,0.0008675538,-0.3218794167],"action_prob":0.5785181522,"action_logp":-0.547285378,"action_dist_inputs":[-0.1582429409,0.1584502459],"value_targets":42.4645233154} +{"eps_id":904116594,"obs":[0.1566150337,0.3857801855,0.0008675538,-0.3218794167],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.1643306315,0.1906459033,-0.005570034,-0.0289230086],"action_prob":0.8148264289,"action_logp":-0.2047801465,"action_dist_inputs":[0.7391417027,-0.7425397038],"value_targets":41.8833580017} +{"eps_id":904116594,"obs":[0.1643306315,0.1906459033,-0.005570034,-0.0289230086],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.1681435555,0.3858473003,-0.0061484943,-0.3233581185],"action_prob":0.562790513,"action_logp":-0.5748477578,"action_dist_inputs":[-0.1261177063,0.1263774186],"value_targets":41.2963218689} +{"eps_id":904116594,"obs":[0.1681435555,0.3858473003,-0.0061484943,-0.3233581185],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.1758604944,0.1908134222,-0.0126156565,-0.0326205082],"action_prob":0.8235075474,"action_logp":-0.19418253,"action_dist_inputs":[0.7684688568,-0.7718260288],"value_targets":40.7033538818} +{"eps_id":904116594,"obs":[0.1758604944,0.1908134222,-0.0126156565,-0.0326205082],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.1796767712,0.3861140013,-0.0132680675,-0.3292569518],"action_prob":0.5375808477,"action_logp":-0.6206761599,"action_dist_inputs":[-0.0751723498,0.0754350573],"value_targets":40.1044006348} +{"eps_id":904116594,"obs":[0.1796767712,0.3861140013,-0.0132680675,-0.3292569518],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.1873990446,0.191183418,-0.0198532064,-0.040787518],"action_prob":0.8351513743,"action_logp":-0.1801422685,"action_dist_inputs":[0.8096138239,-0.8129716516],"value_targets":39.4993934631} +{"eps_id":904116594,"obs":[0.1873990446,0.191183418,-0.0198532064,-0.040787518],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.1912227124,0.3865843415,-0.0206689574,-0.3396676183],"action_prob":0.5022748113,"action_logp":-0.6886078715,"action_dist_inputs":[-0.0044406061,0.0046586171],"value_targets":38.8882751465} +{"eps_id":904116594,"obs":[0.1912227124,0.3865843415,-0.0206689574,-0.3396676183],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.1989544034,0.191762507,-0.0274623092,-0.0535735004],"action_prob":0.8489066958,"action_logp":-0.1638060212,"action_dist_inputs":[0.8613233566,-0.8647280335],"value_targets":38.2709846497} +{"eps_id":904116594,"obs":[0.1989544034,0.191762507,-0.0274623092,-0.0535735004],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.2027896494,-0.0029551343,-0.0285337791,0.2303199172],"action_prob":0.5433576107,"action_logp":-0.6099875569,"action_dist_inputs":[0.0869968832,-0.0868703872],"value_targets":37.6474609375} +{"eps_id":904116594,"obs":[0.2027896494,-0.0029551343,-0.0285337791,0.2303199172],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.2027305514,0.1925626844,-0.0239273813,-0.0712252334],"action_prob":0.8266111612,"action_logp":-0.190420866,"action_dist_inputs":[-0.7789408565,0.7828569412],"value_targets":37.0176353455} +{"eps_id":904116594,"obs":[0.2027305514,0.1925626844,-0.0239273813,-0.0712252334],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.2065818012,0.3880193532,-0.0253518857,-0.371360302],"action_prob":0.4278627932,"action_logp":-0.8489527106,"action_dist_inputs":[0.1452854574,-0.1452907622],"value_targets":36.3814506531} +{"eps_id":904116594,"obs":[0.2065818012,0.3880193532,-0.0253518857,-0.371360302],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.2143421918,0.1932665855,-0.0327790901,-0.0867776573],"action_prob":0.8707227111,"action_logp":-0.1384316981,"action_dist_inputs":[0.9518802166,-0.9554841518],"value_targets":35.7388381958} +{"eps_id":904116594,"obs":[0.2143421918,0.1932665855,-0.0327790901,-0.0867776573],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.2182075232,-0.0013705504,-0.0345146433,0.1953858733],"action_prob":0.622720778,"action_logp":-0.4736570716,"action_dist_inputs":[0.2504948378,-0.2506177425],"value_targets":35.0897369385} +{"eps_id":904116594,"obs":[0.2182075232,-0.0013705504,-0.0345146433,0.1953858733],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.2181801051,-0.1959822327,-0.0306069274,0.4769842625],"action_prob":0.2173860967,"action_logp":-1.5260802507,"action_dist_inputs":[-0.6386759877,0.6422883272],"value_targets":34.4340782166} +{"eps_id":904116594,"obs":[0.2181801051,-0.1959822327,-0.0306069274,0.4769842625],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.2142604589,-0.0004417991,-0.0210672412,0.1748139411],"action_prob":0.920309186,"action_logp":-0.0830456093,"action_dist_inputs":[-1.2199761868,1.2265793085],"value_targets":33.7717971802} +{"eps_id":904116594,"obs":[0.2142604589,-0.0004417991,-0.0210672412,0.1748139411],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.2142516226,0.1949752271,-0.0175709631,-0.1244398504],"action_prob":0.7762016654,"action_logp":-0.2533428967,"action_dist_inputs":[-0.6201034188,0.6235636473],"value_targets":33.1028251648} +{"eps_id":904116594,"obs":[0.2142516226,0.1949752271,-0.0175709631,-0.1244398504],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.2181511372,0.0001093509,-0.0200597588,0.1626482457],"action_prob":0.661024332,"action_logp":-0.4139645994,"action_dist_inputs":[0.333725363,-0.3341369927],"value_targets":32.4270935059} +{"eps_id":904116594,"obs":[0.2181511372,0.0001093509,-0.0200597588,0.1626482457],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.2181533128,0.1955126375,-0.0168067943,-0.1362949014],"action_prob":0.7617185116,"action_logp":-0.2721782029,"action_dist_inputs":[-0.5793789029,0.5827457309],"value_targets":31.7445411682} +{"eps_id":904116594,"obs":[0.2181533128,0.1955126375,-0.0168067943,-0.1362949014],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.2220635712,0.3908712268,-0.0195326935,-0.434232384],"action_prob":0.3200196624,"action_logp":-1.1393728256,"action_dist_inputs":[0.3765947521,-0.3770867884],"value_targets":31.0550918579} +{"eps_id":904116594,"obs":[0.2220635712,0.3908712268,-0.0195326935,-0.434232384],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.2298810035,0.196031183,-0.0282173399,-0.1477703005],"action_prob":0.8938661814,"action_logp":-0.1121991798,"action_dist_inputs":[1.0634125471,-1.0674426556],"value_targets":30.3586788177} +{"eps_id":904116594,"obs":[0.2298810035,0.196031183,-0.0282173399,-0.1477703005],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.2338016182,0.0013244409,-0.0311727468,0.1358787566],"action_prob":0.7177456021,"action_logp":-0.3316400945,"action_dist_inputs":[0.4663633108,-0.466943115],"value_targets":29.6552295685} +{"eps_id":904116594,"obs":[0.2338016182,0.0013244409,-0.0311727468,0.1358787566],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.2338281125,-0.1933374554,-0.0284551717,0.4185664654],"action_prob":0.2977835536,"action_logp":-1.2113883495,"action_dist_inputs":[-0.4273717403,0.4305030406],"value_targets":28.9446773529} +{"eps_id":904116594,"obs":[0.2338281125,-0.1933374554,-0.0284551717,0.4185664654],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.2299613655,-0.3880448639,-0.0200838428,0.7021446228],"action_prob":0.0962528959,"action_logp":-2.3407762051,"action_dist_inputs":[-1.1166625023,1.1229081154],"value_targets":28.2269458771} +{"eps_id":904116594,"obs":[0.2299613655,-0.3880448639,-0.0200838428,0.7021446228],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.2222004682,-0.1926503927,-0.0060409489,0.4032077789],"action_prob":0.9477027655,"action_logp":-0.0537143759,"action_dist_inputs":[-1.4443109035,1.4527856112],"value_targets":27.5019664764} +{"eps_id":904116594,"obs":[0.2222004682,-0.1926503927,-0.0060409489,0.4032077789],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.21834746,0.0025567107,0.0020232066,0.1086264402],"action_prob":0.9079115391,"action_logp":-0.0966083035,"action_dist_inputs":[-1.1411024332,1.1472952366],"value_targets":26.7696628571} +{"eps_id":904116594,"obs":[0.21834746,0.0025567107,0.0020232066,0.1086264402],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.2183985859,0.197649613,0.0041957353,-0.1834174842],"action_prob":0.7188330889,"action_logp":-0.3301261067,"action_dist_inputs":[-0.467867136,0.4708136022],"value_targets":26.0299625397} +{"eps_id":904116594,"obs":[0.2183985859,0.197649613,0.0041957353,-0.1834174842],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.2223515809,0.0024678768,0.0005273857,0.110586077],"action_prob":0.7175414562,"action_logp":-0.3319245279,"action_dist_inputs":[0.4657369852,-0.4665618539],"value_targets":25.2827911377} +{"eps_id":904116594,"obs":[0.2223515809,0.0024678768,0.0005273857,0.110586077],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.2224009335,0.1975822598,0.0027391072,-0.1819304079],"action_prob":0.7188030481,"action_logp":-0.3301678598,"action_dist_inputs":[-0.467772156,0.470759958],"value_targets":24.5280704498} +{"eps_id":904116594,"obs":[0.2224009335,0.1975822598,0.0027391072,-0.1819304079],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.2263525873,0.0024212284,-0.0008995009,0.1116153449],"action_prob":0.7178087831,"action_logp":-0.3315520287,"action_dist_inputs":[0.4664170444,-0.4672014117],"value_targets":23.7657279968} +{"eps_id":904116594,"obs":[0.2263525873,0.0024212284,-0.0008995009,0.1116153449],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.2264010012,0.1975560635,0.001332806,-0.1813512295],"action_prob":0.7174318433,"action_logp":-0.3320772946,"action_dist_inputs":[-0.464369148,0.4673889577],"value_targets":22.9956855774} +{"eps_id":904116594,"obs":[0.2264010012,0.1975560635,0.001332806,-0.1813512295],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.2303521335,0.0024150601,-0.0022942189,0.111751847],"action_prob":0.7193052173,"action_logp":-0.329469502,"action_dist_inputs":[0.4701325595,-0.4708851576],"value_targets":22.2178649902} +{"eps_id":904116594,"obs":[0.2303521335,0.0024150601,-0.0022942189,0.111751847],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.2304004282,0.1975698173,-0.0000591819,-0.1816540062],"action_prob":0.7147009373,"action_logp":-0.335891068,"action_dist_inputs":[-0.4576419294,0.4606844783],"value_targets":21.4321861267} +{"eps_id":904116594,"obs":[0.2304004282,0.1975698173,-0.0000591819,-0.1816540062],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.2343518287,0.0024487062,-0.0036922621,0.111010246],"action_prob":0.7220218182,"action_logp":-0.3256999552,"action_dist_inputs":[0.476891458,-0.4776210189],"value_targets":20.6385707855} +{"eps_id":904116594,"obs":[0.2343518287,0.0024487062,-0.0036922621,0.111010246],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.2344007939,0.1976233721,-0.0014720571,-0.1828352809],"action_prob":0.7105408907,"action_logp":-0.3417288065,"action_dist_inputs":[-0.4474781752,0.4505342245],"value_targets":19.8369407654} +{"eps_id":904116594,"obs":[0.2344007939,0.1976233721,-0.0014720571,-0.1828352809],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.2383532673,0.0025225133,-0.0051287627,0.1093829051],"action_prob":0.725961566,"action_logp":-0.3202582002,"action_dist_inputs":[0.4867566526,-0.4874721169],"value_targets":19.0272140503} +{"eps_id":904116594,"obs":[0.2383532673,0.0025225133,-0.0051287627,0.1093829051],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.2384037226,-0.1925255656,-0.0029411046,0.4004433453],"action_prob":0.2951719463,"action_logp":-1.2201972008,"action_dist_inputs":[-0.4336676598,0.4367280602],"value_targets":18.2093067169} +{"eps_id":904116594,"obs":[0.2384037226,-0.1925255656,-0.0029411046,0.4004433453],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.234553203,-0.3876056671,0.005067762,0.6921975613],"action_prob":0.0921942294,"action_logp":-2.3838577271,"action_dist_inputs":[-1.140422821,1.1467097998],"value_targets":17.3831367493} +{"eps_id":904116594,"obs":[0.234553203,-0.3876056671,0.005067762,0.6921975613],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.2268010974,-0.1925543994,0.0189117137,0.4011143148],"action_prob":0.949641645,"action_logp":-0.0516706035,"action_dist_inputs":[-1.4641654491,1.472753644],"value_targets":16.5486240387} +{"eps_id":904116594,"obs":[0.2268010974,-0.1925543994,0.0189117137,0.4011143148],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.2229500115,0.0022942617,0.0269339997,0.1144532859],"action_prob":0.9161543846,"action_logp":-0.0875704139,"action_dist_inputs":[-1.1924206018,1.1987861395],"value_targets":15.7056808472} +{"eps_id":904116594,"obs":[0.2229500115,0.0022942617,0.0269339997,0.1144532859],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.2229958922,0.1970201433,0.0292230658,-0.1696119457],"action_prob":0.7656821012,"action_logp":-0.266988188,"action_dist_inputs":[-0.5904375315,0.5936509967],"value_targets":14.8542232513} +{"eps_id":904116594,"obs":[0.2229958922,0.1970201433,0.0292230658,-0.1696119457],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.2269362956,0.3917118907,0.0258308258,-0.4529345036],"action_prob":0.3489925861,"action_logp":-1.0527045727,"action_dist_inputs":[0.3114890158,-0.3119813204],"value_targets":13.9941644669} +{"eps_id":904116594,"obs":[0.2269362956,0.3917118907,0.0258308258,-0.4529345036],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.2347705364,0.1962343603,0.0167721361,-0.1522226185],"action_prob":0.8803359866,"action_logp":-0.1274516433,"action_dist_inputs":[0.9959113002,-0.999704659],"value_targets":13.125418663} +{"eps_id":904116594,"obs":[0.2347705364,0.1962343603,0.0167721361,-0.1522226185],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.2386952192,0.0008763127,0.0137276836,0.1457040012],"action_prob":0.6451407671,"action_logp":-0.4382867515,"action_dist_inputs":[0.2987255156,-0.2990217507],"value_targets":12.2478981018} +{"eps_id":904116594,"obs":[0.2386952192,0.0008763127,0.0137276836,0.1457040012],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.2387127429,0.195799008,0.016641764,-0.1426166892],"action_prob":0.7847023606,"action_logp":-0.2424507737,"action_dist_inputs":[-0.6448566914,0.6484265327],"value_targets":11.3615131378} +{"eps_id":904116594,"obs":[0.2387127429,0.195799008,0.016641764,-0.1426166892],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.2426287234,0.3906787336,0.0137894293,-0.4300033152],"action_prob":0.3717148602,"action_logp":-0.9896282554,"action_dist_inputs":[0.2623490095,-0.2625180483],"value_targets":10.4661741257} +{"eps_id":904116594,"obs":[0.2426287234,0.3906787336,0.0137894293,-0.4300033152],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.2504422963,0.1953642368,0.0051893634,-0.1330054849],"action_prob":0.8758787513,"action_logp":-0.1325275898,"action_dist_inputs":[0.975204289,-0.9787645936],"value_targets":9.5617923737} +{"eps_id":904116594,"obs":[0.2504422963,0.1953642368,0.0051893634,-0.1330054849],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.2543495893,0.0001683389,0.0025292535,0.1613100916],"action_prob":0.6344639659,"action_logp":-0.4549747705,"action_dist_inputs":[0.2756865919,-0.2757290602],"value_targets":8.6482753754} +{"eps_id":904116594,"obs":[0.2543495893,0.0001683389,0.0025292535,0.1613100916],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.254352957,-0.1949897259,0.0057554552,0.4547898471],"action_prob":0.2132448405,"action_logp":-1.545314312,"action_dist_inputs":[-0.6508547068,0.6546213627],"value_targets":7.7255306244} +{"eps_id":904116594,"obs":[0.254352957,-0.1949897259,0.0057554552,0.4547898471],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.2504531443,0.0000503703,0.0148512525,0.1639266461],"action_prob":0.9249731898,"action_logp":-0.0779904947,"action_dist_inputs":[-1.2525256872,1.2593938112],"value_targets":6.7934651375} +{"eps_id":904116594,"obs":[0.2504531443,0.0000503703,0.0148512525,0.1639266461],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.2504541576,0.1949566156,0.0181297846,-0.1240343675],"action_prob":0.8051074147,"action_logp":-0.2167796046,"action_dist_inputs":[-0.7073372602,0.7111896276],"value_targets":5.8519849777} +{"eps_id":904116594,"obs":[0.2504541576,0.1949566156,0.0181297846,-0.1240343675],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.2543532848,0.389814198,0.0156490989,-0.4109428525],"action_prob":0.4089399874,"action_logp":-0.8941868544,"action_dist_inputs":[0.1842357516,-0.1841134131],"value_targets":4.9009947777} +{"eps_id":904116594,"obs":[0.2543532848,0.389814198,0.0156490989,-0.4109428525],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.2621495724,0.1944739223,0.007430241,-0.1133676767],"action_prob":0.865060389,"action_logp":-0.1449559927,"action_dist_inputs":[0.9273454547,-0.9306259155],"value_targets":3.9403989315} +{"eps_id":904116594,"obs":[0.2621495724,0.1944739223,0.007430241,-0.1133676767],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.2660390437,0.3894886374,0.0051628877,-0.4036971927],"action_prob":0.4058764577,"action_logp":-0.9017064571,"action_dist_inputs":[0.190650925,-0.1903875619],"value_targets":2.970099926} +{"eps_id":904116594,"obs":[0.2660390437,0.3894886374,0.0051628877,-0.4036971927],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.2738288343,0.5845369697,-0.0029110559,-0.6947479248],"action_prob":0.1328468621,"action_logp":-2.0185582638,"action_dist_inputs":[0.9364075065,-0.9396111369],"value_targets":1.9900000095} +{"eps_id":904116594,"obs":[0.2738288343,0.5845369697,-0.0029110559,-0.6947479248],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":true,"new_obs":[0.2855195701,0.3894555271,-0.0168060139,-0.402982831],"action_prob":0.9308399558,"action_logp":-0.0716678947,"action_dist_inputs":[1.2967139482,-1.302950263],"value_targets":1.0} +{"eps_id":1277027834,"obs":[-0.0446956195,-0.0238093529,0.0492884554,0.0201165006],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":0.0,"dones":false,"new_obs":[-0.0451718085,0.1705723703,0.049690783,-0.2566170394],"action_prob":0.66679883,"action_logp":-0.405266881,"action_dist_inputs":[-0.3468164504,0.3469256461],"value_targets":86.6020355225} +{"eps_id":1277027834,"obs":[-0.0451718085,0.1705723703,0.049690783,-0.2566170394],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.041760359,0.3649509549,0.0445584431,-0.5332219601],"action_prob":0.2494263649,"action_logp":-1.3885915279,"action_dist_inputs":[0.5490761995,-0.5525978804],"value_targets":86.4666976929} +{"eps_id":1277027834,"obs":[-0.041760359,0.3649509549,0.0445584431,-0.5332219601],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0344613418,0.1692315936,0.0338940024,-0.2268384546],"action_prob":0.909318924,"action_logp":-0.0950593948,"action_dist_inputs":[1.1495314837,-1.1558160782],"value_targets":86.3300018311} +{"eps_id":1277027834,"obs":[-0.0344613418,0.1692315936,0.0338940024,-0.2268384546],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0310767088,-0.0263579451,0.029357234,0.076340273],"action_prob":0.7339441776,"action_logp":-0.3093222678,"action_dist_inputs":[0.5057396889,-0.5089873075],"value_targets":86.1919174194} +{"eps_id":1277027834,"obs":[-0.0310767088,-0.0263579451,0.029357234,0.076340273],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0316038691,0.1683311164,0.0308840405,-0.2069377005],"action_prob":0.7230490446,"action_logp":-0.3242782652,"action_dist_inputs":[-0.4794550538,0.4801816642],"value_targets":86.052444458} +{"eps_id":1277027834,"obs":[-0.0316038691,0.1683311164,0.0308840405,-0.2069377005],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.028237246,-0.027218543,0.0267452858,0.0953253731],"action_prob":0.7085297704,"action_logp":-0.3445631862,"action_dist_inputs":[0.4426026642,-0.445651561],"value_targets":85.9115600586} +{"eps_id":1277027834,"obs":[-0.028237246,-0.027218543,0.0267452858,0.0953253731],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0287816171,0.1675100774,0.0286517944,-0.1888009012],"action_prob":0.7461782694,"action_logp":-0.2927907109,"action_dist_inputs":[-0.5386895537,0.5396431088],"value_targets":85.7692489624} +{"eps_id":1277027834,"obs":[-0.0287816171,0.1675100774,0.0286517944,-0.1888009012],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0254314151,-0.0280098263,0.024875775,0.1127810404],"action_prob":0.6824715734,"action_logp":-0.3820343614,"action_dist_inputs":[0.3811469376,-0.3840068281],"value_targets":85.62550354} +{"eps_id":1277027834,"obs":[-0.0254314151,-0.0280098263,0.024875775,0.1127810404],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0259916112,0.1667470038,0.0271313954,-0.1719511002],"action_prob":0.7662700415,"action_logp":-0.2662206292,"action_dist_inputs":[-0.5930994749,0.5942687392],"value_targets":85.4803085327} +{"eps_id":1277027834,"obs":[-0.0259916112,0.1667470038,0.0271313954,-0.1719511002],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0226566717,0.3614703417,0.0236923732,-0.4559529722],"action_prob":0.3445228636,"action_logp":-1.0655947924,"action_dist_inputs":[0.3202631176,-0.3229396939],"value_targets":85.3336486816} +{"eps_id":1277027834,"obs":[-0.0226566717,0.3614703417,0.0236923732,-0.4559529722],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0154272644,0.1660215706,0.0145733152,-0.1558971703],"action_prob":0.8907012939,"action_logp":-0.1157461405,"action_dist_inputs":[1.0461124182,-1.0518125296],"value_targets":85.1855010986} +{"eps_id":1277027834,"obs":[-0.0154272644,0.1660215706,0.0145733152,-0.1558971703],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.012106833,0.3609318733,0.011455372,-0.4439471662],"action_prob":0.3476651907,"action_logp":-1.056515336,"action_dist_inputs":[0.3134078979,-0.3159100413],"value_targets":85.0358581543} +{"eps_id":1277027834,"obs":[-0.012106833,0.3609318733,0.011455372,-0.4439471662],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.004888196,0.1656497121,0.0025764282,-0.1476753503],"action_prob":0.8918303251,"action_logp":-0.1144793555,"action_dist_inputs":[1.0519841909,-1.0575909615],"value_targets":84.8847045898} +{"eps_id":1277027834,"obs":[-0.004888196,0.1656497121,0.0025764282,-0.1476753503],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0015752016,0.3607346714,-0.0003770788,-0.4395443499],"action_prob":0.3374714851,"action_logp":-1.0862742662,"action_dist_inputs":[0.3360901773,-0.3384924531],"value_targets":84.7320251465} +{"eps_id":1277027834,"obs":[-0.0015752016,0.3607346714,-0.0003770788,-0.4395443499],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0056394921,0.165618062,-0.0091679664,-0.1469803154],"action_prob":0.8957862258,"action_logp":-0.1100534871,"action_dist_inputs":[1.0728428364,-1.0784140825],"value_targets":84.5778045654} +{"eps_id":1277027834,"obs":[0.0056394921,0.165618062,-0.0091679664,-0.1469803154],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.008951853,-0.029371405,-0.0121075725,0.1427962482],"action_prob":0.6852864027,"action_logp":-0.3779184222,"action_dist_inputs":[0.3878981471,-0.3902757466],"value_targets":84.4220275879} +{"eps_id":1277027834,"obs":[0.008951853,-0.029371405,-0.0121075725,0.1427962482],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.008364425,0.1659218371,-0.0092516476,-0.1536816806],"action_prob":0.7513327599,"action_logp":-0.2859066129,"action_dist_inputs":[-0.5521044731,0.5536286235],"value_targets":84.2646713257} +{"eps_id":1277027834,"obs":[0.008364425,0.1659218371,-0.0092516476,-0.1536816806],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0116828615,0.3611750305,-0.0123252813,-0.4492688775],"action_prob":0.3028481901,"action_logp":-1.1945235729,"action_dist_inputs":[0.4156750739,-0.4180963337],"value_targets":84.1057281494} +{"eps_id":1277027834,"obs":[0.0116828615,0.3611750305,-0.0123252813,-0.4492688775],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0189063624,0.166229561,-0.0213106591,-0.1604964286],"action_prob":0.9043658376,"action_logp":-0.1005213261,"action_dist_inputs":[1.1205347776,-1.1261686087],"value_targets":83.9451828003} +{"eps_id":1277027834,"obs":[0.0189063624,0.166229561,-0.0213106591,-0.1604964286],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.022230953,-0.0285809226,-0.0245205872,0.1253880858],"action_prob":0.7299906611,"action_logp":-0.3147235513,"action_dist_inputs":[0.4960492849,-0.4985257983],"value_targets":83.7830123901} +{"eps_id":1277027834,"obs":[0.022230953,-0.0285809226,-0.0245205872,0.1253880858],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0216593351,0.1668835729,-0.0220128261,-0.1749288589],"action_prob":0.7044057846,"action_logp":-0.3504006863,"action_dist_inputs":[-0.4335075319,0.4348594248],"value_targets":83.6192016602} +{"eps_id":1277027834,"obs":[0.0216593351,0.1668835729,-0.0220128261,-0.1749288589],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0249970071,-0.0279165339,-0.0255114026,0.1107292697],"action_prob":0.7521193027,"action_logp":-0.2848602831,"action_dist_inputs":[0.5536719561,-0.556275785],"value_targets":83.453742981} +{"eps_id":1277027834,"obs":[0.0249970071,-0.0279165339,-0.0255114026,0.1107292697],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0244386755,0.1675615162,-0.0232968163,-0.1898919344],"action_prob":0.6775780916,"action_logp":-0.3892304599,"action_dist_inputs":[-0.3707285225,0.3719353676],"value_targets":83.286605835} +{"eps_id":1277027834,"obs":[0.0244386755,0.1675615162,-0.0232968163,-0.1898919344],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0277899057,-0.0272195302,-0.0270946566,0.0953517109],"action_prob":0.7737005353,"action_logp":-0.2565703988,"action_dist_inputs":[0.6132924557,-0.6160332561],"value_targets":83.1177825928} +{"eps_id":1277027834,"obs":[0.0277899057,-0.0272195302,-0.0270946566,0.0953517109],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.027245516,0.1682800651,-0.0251876209,-0.2057549059],"action_prob":0.6461256742,"action_logp":-0.4367612898,"action_dist_inputs":[-0.3005006909,0.301551491],"value_targets":82.9472579956} +{"eps_id":1277027834,"obs":[0.027245516,0.1682800651,-0.0251876209,-0.2057549059],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0306111164,-0.0264728088,-0.02930272,0.0788773671],"action_prob":0.7947769165,"action_logp":-0.2296938151,"action_dist_inputs":[0.6755358577,-0.678427875],"value_targets":82.7750091553} +{"eps_id":1277027834,"obs":[0.0306111164,-0.0264728088,-0.02930272,0.0788773671],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0300816614,-0.2211627066,-0.0277251732,0.3621730804],"action_prob":0.3910367787,"action_logp":-0.9389536977,"action_dist_inputs":[-0.2210385054,0.2219178677],"value_targets":82.601020813} +{"eps_id":1277027834,"obs":[0.0300816614,-0.2211627066,-0.0277251732,0.3621730804],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0256584063,-0.0256578811,-0.0204817113,0.0608783886],"action_prob":0.8794653416,"action_logp":-0.1284410954,"action_dist_inputs":[-0.9915257096,0.9958509207],"value_targets":82.4252700806} +{"eps_id":1277027834,"obs":[0.0256584063,-0.0256578811,-0.0204817113,0.0608783886],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0251452494,0.169751659,-0.019264143,-0.2381956428],"action_prob":0.5924270153,"action_logp":-0.5235275626,"action_dist_inputs":[-0.1866545379,0.1873531193],"value_targets":82.2477493286} +{"eps_id":1277027834,"obs":[0.0251452494,0.169751659,-0.019264143,-0.2381956428],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0285402816,0.3651434481,-0.0240280554,-0.5368922353],"action_prob":0.1793907881,"action_logp":-1.7181886435,"action_dist_inputs":[0.7586402297,-0.7618401051],"value_targets":82.0684280396} +{"eps_id":1277027834,"obs":[0.0285402816,0.3651434481,-0.0240280554,-0.5368922353],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0358431526,0.1703674197,-0.0347658992,-0.2518761158],"action_prob":0.9276378751,"action_logp":-0.0751138404,"action_dist_inputs":[1.2723125219,-1.2786455154],"value_targets":81.8873062134} +{"eps_id":1277027834,"obs":[0.0358431526,0.1703674197,-0.0347658992,-0.2518761158],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0392505005,0.365968138,-0.039803423,-0.5553190112],"action_prob":0.1540483087,"action_logp":-1.8704890013,"action_dist_inputs":[0.8499140143,-0.8532819152],"value_targets":81.7043457031} +{"eps_id":1277027834,"obs":[0.0392505005,0.365968138,-0.039803423,-0.5553190112],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0465698615,0.1714269817,-0.0509098023,-0.2754375339],"action_prob":0.9327269793,"action_logp":-0.0696427301,"action_dist_inputs":[1.3114054203,-1.3179478645],"value_targets":81.5195465088} +{"eps_id":1277027834,"obs":[0.0465698615,0.1714269817,-0.0509098023,-0.2754375339],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0499984026,-0.0229330491,-0.056418553,0.0007637744],"action_prob":0.8716586232,"action_logp":-0.1373574287,"action_dist_inputs":[0.9560235739,-0.9596806169],"value_targets":81.3328704834} +{"eps_id":1277027834,"obs":[0.0499984026,-0.0229330491,-0.056418553,0.0007637744],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0495397411,0.1729507297,-0.0564032793,-0.3091726601],"action_prob":0.3814387023,"action_logp":-0.9638050795,"action_dist_inputs":[0.2417147756,-0.2417312711],"value_targets":81.144317627} +{"eps_id":1277027834,"obs":[0.0495397411,0.1729507297,-0.0564032793,-0.3091726601],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0529987551,-0.0213241056,-0.0625867322,-0.0347975791],"action_prob":0.8896611929,"action_logp":-0.1169146001,"action_dist_inputs":[1.04162395,-1.0456610918],"value_targets":80.9538574219} +{"eps_id":1277027834,"obs":[0.0529987551,-0.0213241056,-0.0625867322,-0.0347975791],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0525722727,-0.2154953033,-0.0632826835,0.2375004739],"action_prob":0.6944850087,"action_logp":-0.3645847142,"action_dist_inputs":[0.4103539586,-0.410817802],"value_targets":80.76146698} +{"eps_id":1277027834,"obs":[0.0525722727,-0.2154953033,-0.0632826835,0.2375004739],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0482623689,-0.0195290372,-0.0585326739,-0.0744532272],"action_prob":0.7182587385,"action_logp":-0.3309253752,"action_dist_inputs":[-0.4663412571,0.469499588],"value_targets":80.5671386719} +{"eps_id":1277027834,"obs":[0.0482623689,-0.0195290372,-0.0585326739,-0.0744532272],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0478717871,-0.2137651443,-0.0600217395,0.1992031932],"action_prob":0.7463663816,"action_logp":-0.2925386727,"action_dist_inputs":[0.5391823053,-0.5401435494],"value_targets":80.3708496094} +{"eps_id":1277027834,"obs":[0.0478717871,-0.2137651443,-0.0600217395,0.1992031932],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0435964838,-0.407979548,-0.0560376756,0.4723643959],"action_prob":0.3406020701,"action_logp":-1.0770404339,"action_dist_inputs":[-0.3289162219,0.3316962421],"value_targets":80.1725769043} +{"eps_id":1277027834,"obs":[0.0435964838,-0.407979548,-0.0560376756,0.4723643959],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.035436891,-0.2121127695,-0.0465903878,0.162558943],"action_prob":0.8875770569,"action_logp":-0.1192599311,"action_dist_inputs":[-1.0303225517,1.0359054804],"value_targets":79.9722976685} +{"eps_id":1277027834,"obs":[0.035436891,-0.2121127695,-0.0465903878,0.162558943],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0311946385,-0.4065378606,-0.0433392078,0.440187484],"action_prob":0.3810116053,"action_logp":-0.9649254084,"action_dist_inputs":[-0.2414307445,0.2438259125],"value_targets":79.7699966431} +{"eps_id":1277027834,"obs":[0.0311946385,-0.4065378606,-0.0433392078,0.440187484],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0230638813,-0.2108301967,-0.0345354564,0.134163931],"action_prob":0.8790721893,"action_logp":-0.1288882494,"action_dist_inputs":[-0.9891688228,0.9945047498],"value_targets":79.5656509399} +{"eps_id":1277027834,"obs":[0.0230638813,-0.2108301967,-0.0345354564,0.134163931],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0188472774,-0.4054408967,-0.0318521783,0.4157544971],"action_prob":0.4101459086,"action_logp":-0.8912423253,"action_dist_inputs":[-0.1806451678,0.1827169657],"value_targets":79.3592453003} +{"eps_id":1277027834,"obs":[0.0188472774,-0.4054408967,-0.0318521783,0.4157544971],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0107384585,-0.209882319,-0.02353709,0.113202475],"action_prob":0.8731239438,"action_logp":-0.1356777698,"action_dist_inputs":[-0.9618676901,0.9669989944],"value_targets":79.1507568359} +{"eps_id":1277027834,"obs":[0.0107384585,-0.209882319,-0.02353709,0.113202475],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0065408126,-0.0144311506,-0.0212730393,-0.1868123412],"action_prob":0.5728411078,"action_logp":-0.5571469069,"action_dist_inputs":[-0.1458199322,0.1476324648],"value_targets":78.9401550293} +{"eps_id":1277027834,"obs":[0.0065408126,-0.0144311506,-0.0212730393,-0.1868123412],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0062521896,-0.2092423886,-0.0250092857,0.0990845859],"action_prob":0.8220502734,"action_logp":-0.1959537119,"action_dist_inputs":[0.7639530301,-0.7663475275],"value_targets":78.727432251} +{"eps_id":1277027834,"obs":[0.0062521896,-0.2092423886,-0.0250092857,0.0990845859],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0020673419,-0.0137710944,-0.0230275951,-0.2013826221],"action_prob":0.5405396223,"action_logp":-0.6151872873,"action_dist_inputs":[-0.0804551393,0.0820603371],"value_targets":78.5125579834} +{"eps_id":1277027834,"obs":[0.0020673419,-0.0137710944,-0.0230275951,-0.2013826221],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0017919199,-0.2085562646,-0.0270552468,0.0839481652],"action_prob":0.834978044,"action_logp":-0.1803498268,"action_dist_inputs":[0.8093488216,-0.8119784594],"value_targets":78.2955093384} +{"eps_id":1277027834,"obs":[0.0017919199,-0.2085562646,-0.0270552468,0.0839481652],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0023792053,-0.0130571481,-0.0253762845,-0.2171464711],"action_prob":0.5044791102,"action_logp":-0.6842288375,"action_dist_inputs":[-0.0082704127,0.0096466308],"value_targets":78.0762710571} +{"eps_id":1277027834,"obs":[-0.0023792053,-0.0130571481,-0.0253762845,-0.2171464711],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0026403484,-0.2078073174,-0.029719213,0.0674249083],"action_prob":0.8476908803,"action_logp":-0.1652392596,"action_dist_inputs":[0.85686028,-0.8597431183],"value_targets":77.8548202515} +{"eps_id":1277027834,"obs":[-0.0026403484,-0.2078073174,-0.029719213,0.0674249083],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0067964946,-0.4024908543,-0.0283707157,0.350585103],"action_prob":0.5356363654,"action_logp":-0.6242998242,"action_dist_inputs":[0.071952939,-0.0708344281],"value_targets":77.6311340332} +{"eps_id":1277027834,"obs":[-0.0067964946,-0.4024908543,-0.0283707157,0.350585103],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0148463119,-0.2069771588,-0.0213590134,0.0490926988],"action_prob":0.8291860819,"action_logp":-0.1873106509,"action_dist_inputs":[-0.7877013683,0.7921687961],"value_targets":77.4051818848} +{"eps_id":1277027834,"obs":[-0.0148463119,-0.2069771588,-0.0213590134,0.0490926988],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0189858545,-0.0115555525,-0.0203771591,-0.250251919],"action_prob":0.4466199279,"action_logp":-0.8060473204,"action_dist_inputs":[0.1075952724,-0.1067418233],"value_targets":77.1769561768} +{"eps_id":1277027834,"obs":[-0.0189858545,-0.0115555525,-0.0203771591,-0.250251919],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0192169659,-0.2063806653,-0.0253821965,0.0359346196],"action_prob":0.8639784455,"action_logp":-0.1462074369,"action_dist_inputs":[0.9226803184,-0.9260544777],"value_targets":76.9464187622} +{"eps_id":1277027834,"obs":[-0.0192169659,-0.2063806653,-0.0253821965,0.0359346196],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0233445801,-0.0109041044,-0.0246635042,-0.2646473348],"action_prob":0.4118823707,"action_logp":-0.8870174885,"action_dist_inputs":[0.1784015894,-0.1777876914],"value_targets":76.7135543823} +{"eps_id":1277027834,"obs":[-0.0233445801,-0.0109041044,-0.0246635042,-0.2646473348],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0235626623,-0.2056655139,-0.0299564525,0.0201555714],"action_prob":0.8735698462,"action_logp":-0.1351672113,"action_dist_inputs":[0.9646391869,-0.9682587981],"value_targets":76.4783401489} +{"eps_id":1277027834,"obs":[-0.0235626623,-0.2056655139,-0.0299564525,0.0201555714],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0276759714,-0.0101270406,-0.0295533407,-0.2818264067],"action_prob":0.3727453649,"action_logp":-0.9868597388,"action_dist_inputs":[0.2603927255,-0.2600643039],"value_targets":76.2407455444} +{"eps_id":1277027834,"obs":[-0.0276759714,-0.0101270406,-0.0295533407,-0.2818264067],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0278785117,-0.2048152536,-0.035189867,0.001390966],"action_prob":0.8833293915,"action_logp":-0.1240571365,"action_dist_inputs":[1.0102219582,-1.0141209364],"value_targets":76.0007553101} +{"eps_id":1277027834,"obs":[-0.0278785117,-0.2048152536,-0.035189867,0.001390966],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0319748186,-0.3994153142,-0.0351620503,0.28276667],"action_prob":0.669827044,"action_logp":-0.4007357061,"action_dist_inputs":[0.3536953032,-0.3537076414],"value_targets":75.7583389282} +{"eps_id":1277027834,"obs":[-0.0319748186,-0.3994153142,-0.0351620503,0.28276667],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0399631225,-0.2038099468,-0.0295067169,-0.0207955539],"action_prob":0.7437144518,"action_logp":-0.2960981429,"action_dist_inputs":[-0.5308355689,0.5345292091],"value_targets":75.5134735107} +{"eps_id":1277027834,"obs":[-0.0399631225,-0.2038099468,-0.0295067169,-0.0207955539],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0440393239,-0.0082775299,-0.0299226269,-0.3226402104],"action_prob":0.3052488565,"action_logp":-1.1866278648,"action_dist_inputs":[0.4110297263,-0.4113964736],"value_targets":75.26612854} +{"eps_id":1277027834,"obs":[-0.0440393239,-0.0082775299,-0.0299226269,-0.3226402104],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0442048721,-0.2029608935,-0.0363754332,-0.0395418368],"action_prob":0.897716701,"action_logp":-0.1079007089,"action_dist_inputs":[1.0838106871,-1.0882977247],"value_targets":75.0162963867} +{"eps_id":1277027834,"obs":[-0.0442048721,-0.2029608935,-0.0363754332,-0.0395418368],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.04826409,-0.3975428641,-0.0371662676,0.2414460033],"action_prob":0.7323971987,"action_logp":-0.311432302,"action_dist_inputs":[0.5030412674,-0.5037779212],"value_targets":74.7639312744} +{"eps_id":1277027834,"obs":[-0.04826409,-0.3975428641,-0.0371662676,0.2414460033],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0562149473,-0.5921147466,-0.0323373489,0.522177875],"action_prob":0.3236585855,"action_logp":-1.1280660629,"action_dist_inputs":[-0.3669134378,0.370095253],"value_targets":74.5090255737} +{"eps_id":1277027834,"obs":[-0.0562149473,-0.5921147466,-0.0323373489,0.522177875],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0680572391,-0.3965528905,-0.02189379,0.2194829881],"action_prob":0.892048955,"action_logp":-0.1142342687,"action_dist_inputs":[-1.0529981852,1.0588451624],"value_targets":74.2515411377} +{"eps_id":1277027834,"obs":[-0.0680572391,-0.3965528905,-0.02189379,0.2194829881],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0759883001,-0.2011249363,-0.0175041296,-0.0800249502],"action_prob":0.6695001721,"action_logp":-0.4012238681,"action_dist_inputs":[-0.3515132368,0.3544120789],"value_targets":73.9914550781} +{"eps_id":1277027834,"obs":[-0.0759883001,-0.2011249363,-0.0175041296,-0.0800249502],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0800108016,-0.3959916532,-0.0191046298,0.2070843875],"action_prob":0.7562747598,"action_logp":-0.2793505192,"action_dist_inputs":[0.565492928,-0.566870451],"value_targets":73.7287445068} +{"eps_id":1277027834,"obs":[-0.0800108016,-0.3959916532,-0.0191046298,0.2070843875],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0879306346,-0.5908352733,-0.0149629414,0.4936800599],"action_prob":0.3458336592,"action_logp":-1.0617973804,"action_dist_inputs":[-0.3173560798,0.3200476766],"value_targets":73.4633712769} +{"eps_id":1277027834,"obs":[-0.0879306346,-0.5908352733,-0.0149629414,0.4936800599],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0997473374,-0.3955055177,-0.0050893403,0.196319297],"action_prob":0.8887491226,"action_logp":-0.1179402769,"action_dist_inputs":[-1.0362433195,1.0417836905],"value_targets":73.1953277588} +{"eps_id":1277027834,"obs":[-0.0997473374,-0.3955055177,-0.0050893403,0.196319297],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1076574475,-0.590554297,-0.0011629544,0.4873924255],"action_prob":0.3352738321,"action_logp":-1.0928076506,"action_dist_inputs":[-0.3409620821,0.3434655368],"value_targets":72.9245758057} +{"eps_id":1277027834,"obs":[-0.1076574475,-0.590554297,-0.0011629544,0.4873924255],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1194685325,-0.7856598496,0.0085848942,0.7797086239],"action_prob":0.1067130566,"action_logp":-2.2376117706,"action_dist_inputs":[-1.0596626997,1.065101862],"value_targets":72.6510848999} +{"eps_id":1277027834,"obs":[-0.1194685325,-0.7856598496,0.0085848942,0.7797086239],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.135181725,-0.5906569362,0.0241790656,0.4897390008],"action_prob":0.9432110786,"action_logp":-0.058465194,"action_dist_inputs":[-1.4011135101,1.4088356495],"value_targets":72.3748321533} +{"eps_id":1277027834,"obs":[-0.135181725,-0.5906569362,0.0241790656,0.4897390008],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1469948739,-0.3958843052,0.0339738466,0.2047733665],"action_prob":0.9054492712,"action_logp":-0.0993240401,"action_dist_inputs":[-1.1269356012,1.1323590279],"value_targets":72.0957946777} +{"eps_id":1277027834,"obs":[-0.1469948739,-0.3958843052,0.0339738466,0.2047733665],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1549125612,-0.2012642473,0.0380693153,-0.0770018548],"action_prob":0.750436902,"action_logp":-0.287099719,"action_dist_inputs":[-0.5492666364,0.5516770482],"value_targets":71.8139266968} +{"eps_id":1277027834,"obs":[-0.1549125612,-0.2012642473,0.0380693153,-0.0770018548],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1589378417,-0.0067081433,0.0365292765,-0.3574348986],"action_prob":0.3454259336,"action_logp":-1.0629770756,"action_dist_inputs":[0.3188765645,-0.3203299642],"value_targets":71.5292205811} +{"eps_id":1277027834,"obs":[-0.1589378417,-0.0067081433,0.0365292765,-0.3574348986],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1590719968,0.1878759563,0.0293805785,-0.638379097],"action_prob":0.116008386,"action_logp":-2.1540927887,"action_dist_inputs":[1.0128244162,-1.0179606676],"value_targets":71.2416381836} +{"eps_id":1277027834,"obs":[-0.1590719968,0.1878759563,0.0293805785,-0.638379097],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1553144902,-0.0076430915,0.0166129973,-0.3365904689],"action_prob":0.9379115701,"action_logp":-0.0640996024,"action_dist_inputs":[1.3536756039,-1.3614202738],"value_targets":70.9511489868} +{"eps_id":1277027834,"obs":[-0.1553144902,-0.0076430915,0.0166129973,-0.3365904689],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1554673463,-0.2029974759,0.0098811872,-0.038715329],"action_prob":0.8850925565,"action_logp":-0.1220630631,"action_dist_inputs":[1.0182517767,-1.0233137608],"value_targets":70.6577301025} +{"eps_id":1277027834,"obs":[-0.1554673463,-0.2029974759,0.0098811872,-0.038715329],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1595273018,-0.0080186017,0.009106881,-0.3282643259],"action_prob":0.3587907553,"action_logp":-1.0250159502,"action_dist_inputs":[0.2897023261,-0.2909141779],"value_targets":70.3613433838} +{"eps_id":1277027834,"obs":[-0.1595273018,-0.0080186017,0.009106881,-0.3282643259],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1596876681,-0.2032690048,0.0025415937,-0.0327234864],"action_prob":0.8852592707,"action_logp":-0.1218747124,"action_dist_inputs":[1.0190695524,-1.0241363049],"value_targets":70.061958313} +{"eps_id":1277027834,"obs":[-0.1596876681,-0.2032690048,0.0025415937,-0.0327234864],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1637530476,-0.3984273076,0.0018871241,0.2607602477],"action_prob":0.6443044543,"action_logp":-0.4395839274,"action_dist_inputs":[0.2964322269,-0.2976638973],"value_targets":69.7595596313} +{"eps_id":1277027834,"obs":[-0.1637530476,-0.3984273076,0.0018871241,0.2607602477],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1717215925,-0.2033323497,0.0071023293,-0.031326849],"action_prob":0.7776125669,"action_logp":-0.2515268922,"action_dist_inputs":[-0.6246134043,0.6271938086],"value_targets":69.4540939331} +{"eps_id":1277027834,"obs":[-0.1717215925,-0.2033323497,0.0071023293,-0.031326849],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1757882386,-0.3985554278,0.0064757923,0.2635884583],"action_prob":0.6309752464,"action_logp":-0.4604886174,"action_dist_inputs":[0.2675655782,-0.2688373923],"value_targets":69.1455535889} +{"eps_id":1277027834,"obs":[-0.1757882386,-0.3985554278,0.0064757923,0.2635884583],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1837593466,-0.2035265118,0.0117475614,-0.0270449314],"action_prob":0.787283957,"action_logp":-0.2391663045,"action_dist_inputs":[-0.653050065,0.6555806398],"value_targets":68.8338928223} +{"eps_id":1277027834,"obs":[-0.1837593466,-0.2035265118,0.0117475614,-0.0270449314],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1878298819,-0.3988149464,0.0112066623,0.2693212032],"action_prob":0.6115954518,"action_logp":-0.4916841984,"action_dist_inputs":[0.2263734192,-0.2276502401],"value_targets":68.5190811157} +{"eps_id":1277027834,"obs":[-0.1878298819,-0.3988149464,0.0112066623,0.2693212032],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1958061755,-0.203854695,0.0165930875,-0.0198061075],"action_prob":0.7995523214,"action_logp":-0.223703295,"action_dist_inputs":[-0.6904951334,0.6930036545],"value_targets":68.2010955811} +{"eps_id":1277027834,"obs":[-0.1958061755,-0.203854695,0.0165930875,-0.0198061075],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1998832673,-0.0089745903,0.0161969643,-0.3072078824],"action_prob":0.4146470726,"action_logp":-0.8803275824,"action_dist_inputs":[0.1717699319,-0.1730172634],"value_targets":67.8798904419} +{"eps_id":1277027834,"obs":[-0.1998832673,-0.0089745903,0.0161969643,-0.3072078824],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2000627667,-0.2043235451,0.0100528067,-0.0094611803],"action_prob":0.8706880808,"action_logp":-0.1384714544,"action_dist_inputs":[0.9509973526,-0.9560590386],"value_targets":67.5554504395} +{"eps_id":1277027834,"obs":[-0.2000627667,-0.2043235451,0.0100528067,-0.0094611803],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2041492313,-0.0093471995,0.0098635834,-0.2989554405],"action_prob":0.421707809,"action_logp":-0.8634425998,"action_dist_inputs":[0.1572798342,-0.1584867388],"value_targets":67.227722168} +{"eps_id":1277027834,"obs":[-0.2041492313,-0.0093471995,0.0098635834,-0.2989554405],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2043361813,-0.204608351,0.0038844743,-0.0031781252],"action_prob":0.8700292706,"action_logp":-0.1392284483,"action_dist_inputs":[0.9480826855,-0.9531348348],"value_targets":66.8966903687} +{"eps_id":1277027834,"obs":[-0.2043361813,-0.204608351,0.0038844743,-0.0031781252],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2084283531,-0.3997857869,0.003820912,0.2907278538],"action_prob":0.5784878135,"action_logp":-0.5473377705,"action_dist_inputs":[0.1576784253,-0.158890292],"value_targets":66.5623168945} +{"eps_id":1277027834,"obs":[-0.2084283531,-0.3997857869,0.003820912,0.2907278538],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2164240628,-0.2047185302,0.0096354689,-0.0007475454],"action_prob":0.8128626347,"action_logp":-0.2071931809,"action_dist_inputs":[-0.7331181169,0.7356007695],"value_targets":66.2245635986} +{"eps_id":1277027834,"obs":[-0.2164240628,-0.2047185302,0.0096354689,-0.0007475454],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.22051844,-0.0097360853,0.0096205184,-0.2903748453],"action_prob":0.4409101903,"action_logp":-0.8189140558,"action_dist_inputs":[0.1181159317,-0.1193529591],"value_targets":65.883392334} +{"eps_id":1277027834,"obs":[-0.22051844,-0.0097360853,0.0096205184,-0.2903748453],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2207131535,-0.2049938887,0.0038130211,0.0053266799],"action_prob":0.8647016287,"action_logp":-0.1453707665,"action_dist_inputs":[0.9249169827,-0.9299848676],"value_targets":65.5387802124} +{"eps_id":1277027834,"obs":[-0.2207131535,-0.2049938887,0.0038130211,0.0053266799],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2248130292,-0.0099268258,0.0039195549,-0.2861507535],"action_prob":0.4410600662,"action_logp":-0.8185741901,"action_dist_inputs":[0.1178097203,-0.1190513074],"value_targets":65.1906890869} +{"eps_id":1277027834,"obs":[-0.2248130292,-0.0099268258,0.0039195549,-0.2861507535],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2250115722,-0.2051044554,-0.0018034604,0.0077657849],"action_prob":0.8658356667,"action_logp":-0.1440601647,"action_dist_inputs":[0.9297682643,-0.9348615408],"value_targets":64.8390808105} +{"eps_id":1277027834,"obs":[-0.2250115722,-0.2051044554,-0.0018034604,0.0077657849],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2291136533,-0.4002004862,-0.0016481447,0.2998791635],"action_prob":0.5656577349,"action_logp":-0.5697661042,"action_dist_inputs":[0.1314350963,-0.1327212453],"value_targets":64.4839172363} +{"eps_id":1277027834,"obs":[-0.2291136533,-0.4002004862,-0.0016481447,0.2998791635],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.237117663,-0.2050550878,0.0043494385,0.0066768895],"action_prob":0.8157231212,"action_logp":-0.2036802769,"action_dist_inputs":[-0.7426375151,0.7449982762],"value_targets":64.1251678467} +{"eps_id":1277027834,"obs":[-0.237117663,-0.2050550878,0.0043494385,0.0066768895],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2412187755,-0.4002391398,0.0044829762,0.3007289469],"action_prob":0.552372992,"action_logp":-0.5935317278,"action_dist_inputs":[0.1044567674,-0.1058065072],"value_targets":63.7627983093} +{"eps_id":1277027834,"obs":[-0.2412187755,-0.4002391398,0.0044829762,0.3007289469],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2492235601,-0.2051813751,0.0104975551,0.0094632152],"action_prob":0.8224776983,"action_logp":-0.1954338998,"action_dist_inputs":[-0.7654580474,0.7677669525],"value_targets":63.3967666626} +{"eps_id":1277027834,"obs":[-0.2492235601,-0.2051813751,0.0104975551,0.0094632152],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2533271909,-0.400452286,0.0106868194,0.3054396808],"action_prob":0.5311620831,"action_logp":-0.6326880455,"action_dist_inputs":[0.061719574,-0.0630906373],"value_targets":63.0270347595} +{"eps_id":1277027834,"obs":[-0.2533271909,-0.400452286,0.0106868194,0.3054396808],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2613362372,-0.2054842561,0.0167956129,0.0161461756],"action_prob":0.8318775296,"action_logp":-0.1840700656,"action_dist_inputs":[-0.7983486652,0.8006438017],"value_targets":62.6535720825} +{"eps_id":1277027834,"obs":[-0.2613362372,-0.2054842561,0.0167956129,0.0161461756],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2654459178,-0.0106071495,0.0171185359,-0.2711905837],"action_prob":0.4983440638,"action_logp":-0.6964645386,"action_dist_inputs":[0.0026361248,-0.0039877435],"value_targets":62.2763366699} +{"eps_id":1277027834,"obs":[-0.2654459178,-0.0106071495,0.0171185359,-0.2711905837],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2656580508,-0.2059691399,0.0116947247,0.0268420801],"action_prob":0.8435752392,"action_logp":-0.1701062173,"action_dist_inputs":[0.8399941921,-0.845079422],"value_targets":61.8952865601} +{"eps_id":1277027834,"obs":[-0.2656580508,-0.2059691399,0.0116947247,0.0268420801],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.269777447,-0.4012568295,0.012231566,0.323191762],"action_prob":0.4905336499,"action_logp":-0.7122613788,"action_dist_inputs":[-0.0195860565,0.0182838589],"value_targets":61.5103912354} +{"eps_id":1277027834,"obs":[-0.269777447,-0.4012568295,0.012231566,0.323191762],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2778025866,-0.2063111663,0.018695401,0.0343911313],"action_prob":0.8456709981,"action_logp":-0.1676248908,"action_dist_inputs":[-0.8493511677,0.8516923785],"value_targets":61.1216087341} +{"eps_id":1277027834,"obs":[-0.2778025866,-0.2063111663,0.018695401,0.0343911313],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2819288075,-0.4016961753,0.0193832237,0.332913518],"action_prob":0.4582287967,"action_logp":-0.7803866863,"action_dist_inputs":[-0.0843753368,0.0830998719],"value_targets":60.7288970947} +{"eps_id":1277027834,"obs":[-0.2819288075,-0.4016961753,0.0193832237,0.332913518],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.289962709,-0.2068553865,0.0260414947,0.0464055724],"action_prob":0.8565577269,"action_logp":-0.154833585,"action_dist_inputs":[-0.8922989368,0.8946900368],"value_targets":60.3322181702} +{"eps_id":1277027834,"obs":[-0.289962709,-0.2068553865,0.0260414947,0.0464055724],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2940998375,-0.0121163437,0.0269696061,-0.2379486114],"action_prob":0.5824716687,"action_logp":-0.5404747725,"action_dist_inputs":[-0.1670663953,0.1658616066],"value_targets":59.9315338135} +{"eps_id":1277027834,"obs":[-0.2940998375,-0.0121163437,0.0269696061,-0.2379486114],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2943421602,-0.2076130062,0.0222106334,0.0631178319],"action_prob":0.8017200828,"action_logp":-0.2209957689,"action_dist_inputs":[0.6961154342,-0.700964272],"value_targets":59.526802063} +{"eps_id":1277027834,"obs":[-0.2943421602,-0.2076130062,0.0222106334,0.0631178319],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2984944284,-0.4030462503,0.0234729908,0.362724781],"action_prob":0.393427372,"action_logp":-0.9328588247,"action_dist_inputs":[-0.2170104533,0.2159175277],"value_targets":59.117980957} +{"eps_id":1277027834,"obs":[-0.2984944284,-0.4030462503,0.0234729908,0.362724781],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.3065553308,-0.2082656473,0.0307274852,0.0775347948],"action_prob":0.8739268184,"action_logp":-0.1347586066,"action_dist_inputs":[-0.9667714238,0.9693632722],"value_targets":58.7050323486} +{"eps_id":1277027834,"obs":[-0.3065553308,-0.2082656473,0.0307274852,0.0775347948],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.3107206523,-0.4038143158,0.0322781801,0.3797517717],"action_prob":0.3503781557,"action_logp":-1.0487422943,"action_dist_inputs":[-0.3091875315,0.3081899285],"value_targets":58.2879104614} +{"eps_id":1277027834,"obs":[-0.3107206523,-0.4038143158,0.0322781801,0.3797517717],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.3187969327,-0.2091652453,0.0398732163,0.09741842],"action_prob":0.8847503066,"action_logp":-0.1224497929,"action_dist_inputs":[-1.0177280903,1.0204768181],"value_targets":57.8665771484} +{"eps_id":1277027834,"obs":[-0.3187969327,-0.2091652453,0.0398732163,0.09741842],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.3229802549,-0.0146367745,0.0418215841,-0.1824227422],"action_prob":0.6976069808,"action_logp":-0.3600994051,"action_dist_inputs":[-0.4183856547,0.4175426364],"value_targets":57.4409866333} +{"eps_id":1277027834,"obs":[-0.3229802549,-0.0146367745,0.0418215841,-0.1824227422],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.3232729733,-0.2103314251,0.0381731316,0.123154521],"action_prob":0.7050459385,"action_logp":-0.3494922817,"action_dist_inputs":[0.4335279167,-0.4379155636],"value_targets":57.0110969543} +{"eps_id":1277027834,"obs":[-0.3232729733,-0.2103314251,0.0381731316,0.123154521],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.3274796009,-0.4059788883,0.0406362228,0.4276320934],"action_prob":0.2690984309,"action_logp":-1.3126780987,"action_dist_inputs":[-0.4999154806,0.4992862344],"value_targets":56.5768661499} +{"eps_id":1277027834,"obs":[-0.3274796009,-0.4059788883,0.0406362228,0.4276320934],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.335599184,-0.2114553154,0.0491888635,0.1480313838],"action_prob":0.9020530581,"action_logp":-0.1030819565,"action_dist_inputs":[-1.1085182428,1.1117284298],"value_targets":56.1382484436} +{"eps_id":1277027834,"obs":[-0.335599184,-0.2114553154,0.0491888635,0.1480313838],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.3398282826,-0.017071005,0.0521494895,-0.1287367195],"action_prob":0.7739650607,"action_logp":-0.2562285364,"action_dist_inputs":[-0.6156160831,0.6152209044],"value_targets":55.6952018738} +{"eps_id":1277027834,"obs":[-0.3398282826,-0.017071005,0.0521494895,-0.1287367195],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.340169698,0.1772666126,0.0495747551,-0.4045218527],"action_prob":0.4101526439,"action_logp":-0.8912258744,"action_dist_inputs":[0.1797000766,-0.1836342067],"value_targets":55.2476768494} +{"eps_id":1277027834,"obs":[-0.340169698,0.1772666126,0.0495747551,-0.4045218527],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.3366243839,-0.0185220633,0.0414843187,-0.0966306329],"action_prob":0.8649875522,"action_logp":-0.1450401396,"action_dist_inputs":[0.9252403378,-0.9321081042],"value_targets":54.7956352234} +{"eps_id":1277027834,"obs":[-0.3366243839,-0.0185220633,0.0414843187,-0.0966306329],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.3369948268,0.1759815216,0.0395517088,-0.375942111],"action_prob":0.4487559199,"action_logp":-0.8012761474,"action_dist_inputs":[0.1010149568,-0.1046836078],"value_targets":54.3390235901} +{"eps_id":1277027834,"obs":[-0.3369948268,0.1759815216,0.0395517088,-0.375942111],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.3334752023,-0.0196792278,0.032032866,-0.0710553452],"action_prob":0.8540594578,"action_logp":-0.157754451,"action_dist_inputs":[0.8800533414,-0.8867483139],"value_targets":53.8778038025} +{"eps_id":1277027834,"obs":[-0.3334752023,-0.0196792278,0.032032866,-0.0710553452],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.3338687718,0.1749691814,0.0306117591,-0.3534621298],"action_prob":0.4780031443,"action_logp":-0.7381379604,"action_dist_inputs":[0.0422943868,-0.0457498729],"value_targets":53.4119224548} +{"eps_id":1277027834,"obs":[-0.3338687718,0.1749691814,0.0306117591,-0.3534621298],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.3303693831,-0.0205743723,0.0235425159,-0.0512857549],"action_prob":0.8454370499,"action_logp":-0.1679015905,"action_dist_inputs":[0.8463470936,-0.8529052138],"value_targets":52.9413375854} +{"eps_id":1277027834,"obs":[-0.3303693831,-0.0205743723,0.0235425159,-0.0512857549],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.3307808936,-0.2160258442,0.0225168001,0.2487310767],"action_prob":0.5018458366,"action_logp":-0.6894623041,"action_dist_inputs":[0.002046362,-0.0053369747],"value_targets":52.4659957886} +{"eps_id":1277027834,"obs":[-0.3307808936,-0.2160258442,0.0225168001,0.2487310767],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.3351013958,-0.0212325789,0.0274914224,-0.0367653854],"action_prob":0.8339361548,"action_logp":-0.1815984249,"action_dist_inputs":[-0.8066224456,0.8071621656],"value_targets":51.9858551025} +{"eps_id":1277027834,"obs":[-0.3351013958,-0.0212325789,0.0274914224,-0.0367653854],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.3355260491,-0.2167377472,0.0267561153,0.2644630373],"action_prob":0.4652901292,"action_logp":-0.765094161,"action_dist_inputs":[-0.0711131617,0.0679499879],"value_targets":51.5008621216} +{"eps_id":1277027834,"obs":[-0.3355260491,-0.2167377472,0.0267561153,0.2644630373],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.3398607969,-0.0220077131,0.0320453756,-0.0196621269],"action_prob":0.8461739421,"action_logp":-0.1670303345,"action_dist_inputs":[-0.8520924449,0.8528100252],"value_targets":51.0109710693} +{"eps_id":1277027834,"obs":[-0.3398607969,-0.0220077131,0.0320453756,-0.0196621269],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.3403009474,0.1726403683,0.0316521339,-0.3020646274],"action_prob":0.5759894252,"action_logp":-0.5516660213,"action_dist_inputs":[-0.154667452,0.1516631544],"value_targets":50.5161323547} +{"eps_id":1277027834,"obs":[-0.3403009474,0.1726403683,0.0316521339,-0.3020646274],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.3368481398,-0.0229180716,0.0256108399,0.0004302842],"action_prob":0.797170639,"action_logp":-0.2266865522,"action_dist_inputs":[0.6812428236,-0.6874606609],"value_targets":50.0162963867} +{"eps_id":1277027834,"obs":[-0.3368481398,-0.0229180716,0.0256108399,0.0004302842],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.3373064995,0.1718274057,0.0256194454,-0.284063369],"action_prob":0.5992946029,"action_logp":-0.5120019317,"action_dist_inputs":[-0.2026731968,0.1998536885],"value_targets":49.5114097595} +{"eps_id":1277027834,"obs":[-0.3373064995,0.1718274057,0.0256194454,-0.284063369],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.3338699639,-0.0236504097,0.0199381784,0.0165884234],"action_prob":0.7852289081,"action_logp":-0.2417800277,"action_dist_inputs":[0.6451528072,-0.6512492895],"value_targets":49.0014266968} +{"eps_id":1277027834,"obs":[-0.3338699639,-0.0236504097,0.0199381784,0.0165884234],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.3343429863,-0.2190525383,0.0202699471,0.3154948056],"action_prob":0.3834610283,"action_logp":-0.958517313,"action_dist_inputs":[-0.2387757748,0.2361077517],"value_targets":48.486289978} +{"eps_id":1277027834,"obs":[-0.3343429863,-0.2190525383,0.0202699471,0.3154948056],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.3387240171,-0.0242250971,0.0265798438,0.0292726252],"action_prob":0.867851913,"action_logp":-0.1417341828,"action_dist_inputs":[-0.9404203296,0.9416777492],"value_targets":47.9659461975} +{"eps_id":1277027834,"obs":[-0.3387240171,-0.0242250971,0.0265798438,0.0292726252],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.3392085135,-0.2197179347,0.0271652956,0.3302218616],"action_prob":0.3506881297,"action_logp":-1.0478579998,"action_dist_inputs":[-0.3092732728,0.3067426682],"value_targets":47.4403495789} +{"eps_id":1277027834,"obs":[-0.3392085135,-0.2197179347,0.0271652956,0.3302218616],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.3436028957,-0.0249930061,0.0337697342,0.0462278463],"action_prob":0.8764458895,"action_logp":-0.131880343,"action_dist_inputs":[-0.9788678288,0.980327785],"value_targets":46.9094467163} +{"eps_id":1277027834,"obs":[-0.3436028957,-0.0249930061,0.0337697342,0.0462278463],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.3441027403,-0.220582515,0.0346942879,0.3493712544],"action_prob":0.3136090636,"action_logp":-1.1596081257,"action_dist_inputs":[-0.3928183615,0.3904819191],"value_targets":46.3731765747} +{"eps_id":1277027834,"obs":[-0.3441027403,-0.220582515,0.0346942879,0.3493712544],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.3485144079,-0.4161802828,0.0416817144,0.6527894139],"action_prob":0.1145502031,"action_logp":-2.1667420864,"action_dist_inputs":[-1.0216786861,1.023403883],"value_targets":45.8314933777} +{"eps_id":1277027834,"obs":[-0.3485144079,-0.4161802828,0.0416817144,0.6527894139],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.3568379879,-0.2216628045,0.0547375046,0.3735174835],"action_prob":0.932232976,"action_logp":-0.0701725259,"action_dist_inputs":[-1.3080579042,1.3134496212],"value_targets":45.2843360901} +{"eps_id":1277027834,"obs":[-0.3568379879,-0.2216628045,0.0547375046,0.3735174835],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.3612712622,-0.0273594093,0.0622078516,0.0985837132],"action_prob":0.8972386718,"action_logp":-0.1084334031,"action_dist_inputs":[-1.0823937654,1.0845187902],"value_targets":44.7316513062} +{"eps_id":1277027834,"obs":[-0.3612712622,-0.0273594093,0.0622078516,0.0985837132],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.3618184328,0.1668183655,0.0641795248,-0.1738422364],"action_prob":0.7808024287,"action_logp":-0.247433126,"action_dist_inputs":[-0.6359906793,0.6343581676],"value_targets":44.1733856201} +{"eps_id":1277027834,"obs":[-0.3618184328,0.1668183655,0.0641795248,-0.1738422364],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.3584820628,-0.0291606095,0.0607026815,0.138376832],"action_prob":0.5450543761,"action_logp":-0.6068697572,"action_dist_inputs":[0.0877449736,-0.0929626077],"value_targets":43.6094818115} +{"eps_id":1277027834,"obs":[-0.3584820628,-0.0291606095,0.0607026815,0.138376832],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.3590652943,0.1650417447,0.0634702221,-0.1345545352],"action_prob":0.8114522696,"action_logp":-0.2089296877,"action_dist_inputs":[-0.7302989364,0.7291753292],"value_targets":43.0398788452} +{"eps_id":1277027834,"obs":[-0.3590652943,0.1650417447,0.0634702221,-0.1345545352],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.3557644486,0.3591998518,0.0607791282,-0.40655756],"action_prob":0.5242971182,"action_logp":-0.6456967592,"action_dist_inputs":[-0.0510502532,0.0462147035],"value_targets":42.4645233154} +{"eps_id":1277027834,"obs":[-0.3557644486,0.3591998518,0.0607791282,-0.40655756],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.3485804498,0.1632710695,0.0526479781,-0.0953489915],"action_prob":0.8034591079,"action_logp":-0.2188289911,"action_dist_inputs":[0.7002729774,-0.7077829242],"value_targets":41.8833580017} +{"eps_id":1277027834,"obs":[-0.3485804498,0.1632710695,0.0526479781,-0.0953489915],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.3453150392,0.3576004505,0.0507409982,-0.3709678352],"action_prob":0.5720129013,"action_logp":-0.55859375,"action_dist_inputs":[-0.1472498029,0.1428185552],"value_targets":41.2963218689} +{"eps_id":1277027834,"obs":[-0.3453150392,0.3576004505,0.0507409982,-0.3709678352],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.3381630182,0.1617957205,0.0433216393,-0.06272728],"action_prob":0.7791313529,"action_logp":-0.2495756447,"action_dist_inputs":[0.6266493797,-0.633961916],"value_targets":40.7033538818} +{"eps_id":1277027834,"obs":[-0.3381630182,0.1617957205,0.0433216393,-0.06272728],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.3349271119,0.3562706411,0.0420670956,-0.3414333761],"action_prob":0.6094056368,"action_logp":-0.4952711165,"action_dist_inputs":[-0.2244388908,0.2203758657],"value_targets":40.1044006348} +{"eps_id":1277027834,"obs":[-0.3349271119,0.3562706411,0.0420670956,-0.3414333761],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.3278017044,0.1605761945,0.0352384262,-0.0357873365],"action_prob":0.7570068836,"action_logp":-0.278382957,"action_dist_inputs":[0.5646052957,-0.5717337728],"value_targets":39.4993934631} +{"eps_id":1277027834,"obs":[-0.3278017044,0.1605761945,0.0352384262,-0.0357873365],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.3245901763,0.3551755548,0.0345226824,-0.3171472251],"action_prob":0.6381350756,"action_logp":-0.4492052794,"action_dist_inputs":[-0.2855068743,0.2817721069],"value_targets":38.8882751465} +{"eps_id":1277027834,"obs":[-0.3245901763,0.3551755548,0.0345226824,-0.3171472251],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.3174866736,0.1595793366,0.0281797368,-0.0137800481],"action_prob":0.7378572822,"action_logp":-0.304004848,"action_dist_inputs":[0.5139520764,-0.5209095478],"value_targets":38.2709846497} +{"eps_id":1277027834,"obs":[-0.3174866736,0.1595793366,0.0281797368,-0.0137800481],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.3142950833,0.3542860746,0.0279041361,-0.2974404395],"action_prob":0.6598166227,"action_logp":-0.4157933593,"action_dist_inputs":[-0.3329624236,0.3295146525],"value_targets":37.6474609375} +{"eps_id":1277027834,"obs":[-0.3142950833,0.3542860746,0.0279041361,-0.2974404395],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.3072093427,0.158777684,0.0219553262,0.0039107339],"action_prob":0.7221769691,"action_logp":-0.3254850209,"action_dist_inputs":[0.4742427766,-0.4810432196],"value_targets":37.0176353455} +{"eps_id":1277027834,"obs":[-0.3072093427,0.158777684,0.0219553262,0.0039107339],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.3040338159,0.3535780013,0.0220335424,-0.2817649543],"action_prob":0.6757795811,"action_logp":-0.3918883204,"action_dist_inputs":[-0.368822366,0.3656210303],"value_targets":36.3814506531} +{"eps_id":1277027834,"obs":[-0.3040338159,0.3535780013,0.0220335424,-0.2817649543],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2969622314,0.1581487954,0.0163982417,0.0177850109],"action_prob":0.7102437615,"action_logp":-0.3421470523,"action_dist_inputs":[0.4449549317,-0.4516130984],"value_targets":35.7388381958} +{"eps_id":1277027834,"obs":[-0.2969622314,0.1581487954,0.0163982417,0.0177850109],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2937992513,0.3530317843,0.0167539418,-0.2696793079],"action_prob":0.687020421,"action_logp":-0.3753912449,"action_dist_inputs":[-0.3946100771,0.3916160166],"value_targets":35.0897369385} +{"eps_id":1277027834,"obs":[-0.2937992513,0.3530317843,0.0167539418,-0.2696793079],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2867386341,0.1576748043,0.0113603566,0.0282404721],"action_prob":0.7021908164,"action_logp":-0.3535501063,"action_dist_inputs":[0.4256104827,-0.4321417809],"value_targets":34.4340782166} +{"eps_id":1277027834,"obs":[-0.2867386341,0.1576748043,0.0113603566,0.0282404721],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2835851312,-0.0376082063,0.0119251655,0.3244859576],"action_prob":0.3057714999,"action_logp":-1.1849172115,"action_dist_inputs":[-0.411393702,0.4085694253],"value_targets":33.7717971802} +{"eps_id":1277027834,"obs":[-0.2835851312,-0.0376082063,0.0119251655,0.3244859576],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.284337312,0.1573419273,0.0184148848,0.0355874561],"action_prob":0.8802162409,"action_logp":-0.1275876462,"action_dist_inputs":[-0.9965717196,0.9979077578],"value_targets":33.1028251648} +{"eps_id":1277027834,"obs":[-0.284337312,0.1573419273,0.0184148848,0.0355874561],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.281190455,0.3521950245,0.0191266332,-0.2512289882],"action_prob":0.7134179473,"action_logp":-0.33768785,"action_dist_inputs":[-0.4573391676,0.4547032714],"value_targets":32.4270935059} +{"eps_id":1277027834,"obs":[-0.281190455,0.3521950245,0.0191266332,-0.2512289882],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2741465569,0.156805262,0.0141020548,0.0474249795],"action_prob":0.6709439754,"action_logp":-0.3990696371,"action_dist_inputs":[0.3530765474,-0.3593810499],"value_targets":31.7445411682} +{"eps_id":1277027834,"obs":[-0.2741465569,0.156805262,0.0141020548,0.0474249795],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2710104585,0.3517221808,0.0150505537,-0.2407754809],"action_prob":0.7227014303,"action_logp":-0.3247590959,"action_dist_inputs":[-0.4801689088,0.4777323008],"value_targets":31.0550918579} +{"eps_id":1277027834,"obs":[-0.2710104585,0.3517221808,0.0150505537,-0.2407754809],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2639760077,0.1563885063,0.0102350442,0.0566165484],"action_prob":0.6623330116,"action_logp":-0.411986798,"action_dist_inputs":[0.3337676525,-0.3399407268],"value_targets":30.3586788177} +{"eps_id":1277027834,"obs":[-0.2639760077,0.1563885063,0.0102350442,0.0566165484],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.260848254,0.3513622284,0.011367376,-0.2328196466],"action_prob":0.7292324901,"action_logp":-0.3157626688,"action_dist_inputs":[-0.4964995384,0.4942325652],"value_targets":29.6552295685} +{"eps_id":1277027834,"obs":[-0.260848254,0.3513622284,0.011367376,-0.2328196466],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2538209856,0.1560797095,0.0067109824,0.0634271353],"action_prob":0.6569433808,"action_logp":-0.4201574326,"action_dist_inputs":[0.3218235075,-0.327878803],"value_targets":28.9446773529} +{"eps_id":1277027834,"obs":[-0.2538209856,0.1560797095,0.0067109824,0.0634271353],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2506994009,0.3511047959,0.0079795253,-0.2271309048],"action_prob":0.7333573699,"action_logp":-0.3101221621,"action_dist_inputs":[-0.5069243908,0.5047992468],"value_targets":28.2269458771} +{"eps_id":1277027834,"obs":[-0.2506994009,0.3511047959,0.0079795253,-0.2271309048],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2436773032,0.5461118221,0.0034369072,-0.5172861814],"action_prob":0.345282644,"action_logp":-1.0633919239,"action_dist_inputs":[0.3169445097,-0.3228957653],"value_targets":27.5019664764} +{"eps_id":1277027834,"obs":[-0.2436773032,0.5461118221,0.0034369072,-0.5172861814],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.232755065,0.3509416282,-0.0069088158,-0.2235221714],"action_prob":0.8880056143,"action_logp":-0.1187772229,"action_dist_inputs":[1.0312223434,-1.0393072367],"value_targets":26.7696628571} +{"eps_id":1277027834,"obs":[-0.232755065,0.3509416282,-0.0069088158,-0.2235221714],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2257362306,0.1559191048,-0.0113792596,0.0669734627],"action_prob":0.6777232289,"action_logp":-0.3890163004,"action_dist_inputs":[0.3687340915,-0.374594152],"value_targets":26.0299625397} +{"eps_id":1277027834,"obs":[-0.2257362306,0.1559191048,-0.0113792596,0.0669734627],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2226178497,-0.0390378684,-0.0100397905,0.3560445607],"action_prob":0.284265995,"action_logp":-1.2578449249,"action_dist_inputs":[-0.4627113938,0.4606869519],"value_targets":25.2827911377} +{"eps_id":1277027834,"obs":[-0.2226178497,-0.0390378684,-0.0100397905,0.3560445607],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2233986109,0.1562253833,-0.0029188991,0.0602128021],"action_prob":0.88657552,"action_logp":-0.1203889996,"action_dist_inputs":[-1.0271277428,1.0291012526],"value_targets":24.5280704498} +{"eps_id":1277027834,"obs":[-0.2233986109,0.1562253833,-0.0029188991,0.0602128021],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2202741057,0.3513890505,-0.001714643,-0.2333896309],"action_prob":0.7189770937,"action_logp":-0.3299257457,"action_dist_inputs":[-0.4706998467,0.4686936736],"value_targets":23.7657279968} +{"eps_id":1277027834,"obs":[-0.2202741057,0.3513890505,-0.001714643,-0.2333896309],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2132463157,0.5465354919,-0.0063824356,-0.5266129375],"action_prob":0.3140417337,"action_logp":-1.158229351,"action_dist_inputs":[0.3877409697,-0.3935498893],"value_targets":22.9956855774} +{"eps_id":1277027834,"obs":[-0.2132463157,0.5465354919,-0.0063824356,-0.5266129375],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2023156136,0.3515039086,-0.0169146936,-0.2359479517],"action_prob":0.8992025256,"action_logp":-0.1062470153,"action_dist_inputs":[1.0902090073,-1.0981853008],"value_targets":22.2178649902} +{"eps_id":1277027834,"obs":[-0.2023156136,0.3515039086,-0.0169146936,-0.2359479517],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1952855289,0.1566276401,-0.021633653,0.0513519384],"action_prob":0.7181091309,"action_logp":-0.3311337531,"action_dist_inputs":[0.4646718502,-0.4704295993],"value_targets":21.4321861267} +{"eps_id":1277027834,"obs":[-0.1952855289,0.1566276401,-0.021633653,0.0513519384],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.192152977,0.3520530164,-0.0206066146,-0.2480773181],"action_prob":0.680544734,"action_logp":-0.3848617375,"action_dist_inputs":[-0.3791704774,0.3771059513],"value_targets":20.6385707855} +{"eps_id":1277027834,"obs":[-0.192152977,0.3520530164,-0.0206066146,-0.2480773181],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.185111925,0.157231316,-0.0255681593,0.0380352624],"action_prob":0.7428696752,"action_logp":-0.2972346246,"action_dist_inputs":[0.5275791883,-0.5333583951],"value_targets":19.8369407654} +{"eps_id":1277027834,"obs":[-0.185111925,0.157231316,-0.0255681593,0.0380352624],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1819673032,-0.0375148281,-0.024807455,0.3225427866],"action_prob":0.3462465107,"action_logp":-1.0606043339,"action_dist_inputs":[-0.3188806474,0.3166988492],"value_targets":19.0272140503} +{"eps_id":1277027834,"obs":[-0.1819673032,-0.0375148281,-0.024807455,0.3225427866],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1827175915,0.1579514295,-0.0183565989,0.0221409649],"action_prob":0.8728330135,"action_logp":-0.1360110044,"action_dist_inputs":[-0.9622367024,0.9640070796],"value_targets":18.2093067169} +{"eps_id":1277027834,"obs":[-0.1827175915,0.1579514295,-0.0183565989,0.0221409649],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1795585603,0.3533317745,-0.0179137792,-0.2762766778],"action_prob":0.6406608224,"action_logp":-0.4452550709,"action_dist_inputs":[-0.290263176,0.2879704535],"value_targets":17.3831367493} +{"eps_id":1277027834,"obs":[-0.1795585603,0.3533317745,-0.0179137792,-0.2762766778],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.172491923,0.5487046242,-0.0234393142,-0.5745553374],"action_prob":0.2225408107,"action_logp":-1.5026447773,"action_dist_inputs":[0.6225302815,-0.6283904314],"value_targets":16.5486240387} +{"eps_id":1277027834,"obs":[-0.172491923,0.5487046242,-0.0234393142,-0.5745553374],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1615178287,0.3539189994,-0.0349304192,-0.2893476784],"action_prob":0.9231029153,"action_logp":-0.0800145715,"action_dist_inputs":[1.2386120558,-1.2466601133],"value_targets":15.7056808472} +{"eps_id":1277027834,"obs":[-0.1615178287,0.3539189994,-0.0349304192,-0.2893476784],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1544394493,0.1593121141,-0.0407173745,-0.0078828763],"action_prob":0.8149595261,"action_logp":-0.2046168596,"action_dist_inputs":[0.7383409142,-0.7442230582],"value_targets":14.8542232513} +{"eps_id":1277027834,"obs":[-0.1544394493,0.1593121141,-0.0407173745,-0.0078828763],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1512532085,-0.0352029502,-0.0408750325,0.2716802955],"action_prob":0.4598164856,"action_logp":-0.7769278288,"action_dist_inputs":[-0.0818343386,0.0792472884],"value_targets":13.9941644669} +{"eps_id":1277027834,"obs":[-0.1512532085,-0.0352029502,-0.0408750325,0.2716802955],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1519572735,0.1604776978,-0.0354414247,-0.0336094312],"action_prob":0.8404560685,"action_logp":-0.1738106012,"action_dist_inputs":[-0.8301721215,0.8314530253],"value_targets":13.125418663} +{"eps_id":1277027834,"obs":[-0.1519572735,0.1604776978,-0.0354414247,-0.0336094312],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1487477124,-0.0341185518,-0.0361136161,0.2476840466],"action_prob":0.5002001524,"action_logp":-0.6927469373,"action_dist_inputs":[-0.0010051378,-0.0018058093],"value_targets":12.2478981018} +{"eps_id":1277027834,"obs":[-0.1487477124,-0.0341185518,-0.0361136161,0.2476840466],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1494300961,0.1615000516,-0.0311599337,-0.0561676286],"action_prob":0.8281047344,"action_logp":-0.1886156648,"action_dist_inputs":[-0.7855926156,0.7866616845],"value_targets":11.3615131378} +{"eps_id":1277027834,"obs":[-0.1494300961,0.1615000516,-0.0311599337,-0.0561676286],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1462000906,0.357054621,-0.0322832875,-0.3585166037],"action_prob":0.4629915953,"action_logp":-0.7700463533,"action_dist_inputs":[0.0726523474,-0.0756524131],"value_targets":10.4661741257} +{"eps_id":1277027834,"obs":[-0.1462000906,0.357054621,-0.0322832875,-0.3585166037],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1390589923,0.1624061167,-0.0394536182,-0.0761856437],"action_prob":0.8695323467,"action_logp":-0.1397997588,"action_dist_inputs":[0.9453015924,-0.9515284896],"value_targets":9.5617923737} +{"eps_id":1277027834,"obs":[-0.1390589923,0.1624061167,-0.0394536182,-0.0761856437],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.135810867,-0.0321286805,-0.0409773327,0.2037931085],"action_prob":0.596955657,"action_logp":-0.5159124732,"action_dist_inputs":[0.1948124915,-0.1979837716],"value_targets":8.6482753754} +{"eps_id":1277027834,"obs":[-0.135810867,-0.0321286805,-0.0409773327,0.2037931085],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1364534497,0.1635545939,-0.0369014703,-0.1015293673],"action_prob":0.7862030268,"action_logp":-0.2405401915,"action_dist_inputs":[-0.6507787704,0.6514096856],"value_targets":7.7255306244} +{"eps_id":1277027834,"obs":[-0.1364534497,0.1635545939,-0.0369014703,-0.1015293673],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1331823617,-0.0310196038,-0.0389320552,0.1792866737],"action_prob":0.6411325336,"action_logp":-0.444519043,"action_dist_inputs":[0.2884488702,-0.291834265],"value_targets":6.7934651375} +{"eps_id":1277027834,"obs":[-0.1331823617,-0.0310196038,-0.0389320552,0.1792866737],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1338027418,0.1646372229,-0.0353463218,-0.1254193038],"action_prob":0.7618478537,"action_logp":-0.2720084488,"action_dist_inputs":[-0.5812222958,0.5816144943],"value_targets":5.8519849777} +{"eps_id":1277027834,"obs":[-0.1338027418,0.1646372229,-0.0353463218,-0.1254193038],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1305100024,0.3602472544,-0.0378547087,-0.42904073],"action_prob":0.3181686103,"action_logp":-1.1451737881,"action_dist_inputs":[0.3793074489,-0.3828933537],"value_targets":4.9009947777} +{"eps_id":1277027834,"obs":[-0.1305100024,0.3602472544,-0.0378547087,-0.42904073],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.12330506,0.5558843017,-0.0464355238,-0.733412683],"action_prob":0.0932254717,"action_logp":-2.3727343082,"action_dist_inputs":[1.1341100931,-1.1407629251],"value_targets":3.9403989315} +{"eps_id":1277027834,"obs":[-0.12330506,0.5558843017,-0.0464355238,-0.733412683],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1121873707,0.3614336252,-0.0611037761,-0.4556982219],"action_prob":0.9495974183,"action_logp":-0.0517171323,"action_dist_inputs":[1.4635642767,-1.4724317789],"value_targets":2.970099926} +{"eps_id":1277027834,"obs":[-0.1121873707,0.3614336252,-0.0611037761,-0.4556982219],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1049586982,0.1672264189,-0.0702177435,-0.1828844249],"action_prob":0.9226163626,"action_logp":-0.0805417597,"action_dist_inputs":[1.2357803583,-1.2426584959],"value_targets":1.9900000095} +{"eps_id":1277027834,"obs":[-0.1049586982,0.1672264189,-0.0702177435,-0.1828844249],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":true,"new_obs":[-0.1016141698,-0.026824208,-0.0738754272,0.0868471786],"action_prob":0.8159505129,"action_logp":-0.2034015357,"action_dist_inputs":[0.7425119281,-0.7466370463],"value_targets":1.0} +{"eps_id":1834071864,"obs":[0.0168847647,0.0124382693,0.036739327,0.0435316823],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":0.0,"dones":false,"new_obs":[0.0171335302,-0.1831907183,0.0376099609,0.3475760818],"action_prob":0.3008689284,"action_logp":-1.2010805607,"action_dist_inputs":[-0.4212169051,0.4219466448],"value_targets":86.6020355225} +{"eps_id":1834071864,"obs":[0.0171335302,-0.1831907183,0.0376099609,0.3475760818],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0134697165,0.0113766575,0.044561483,0.0669861212],"action_prob":0.9069843888,"action_logp":-0.0976300314,"action_dist_inputs":[-1.1364084482,1.1409502029],"value_targets":86.4666976929} +{"eps_id":1834071864,"obs":[0.0134697165,0.0113766575,0.044561483,0.0669861212],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0136972498,0.2058323324,0.0459012054,-0.2113111019],"action_prob":0.7475781441,"action_logp":-0.2909164429,"action_dist_inputs":[-0.5423446894,0.5433924794],"value_targets":86.3300018311} +{"eps_id":1834071864,"obs":[0.0136972498,0.2058323324,0.0459012054,-0.2113111019],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0178138968,0.0100851757,0.0416749828,0.0954905897],"action_prob":0.6711040139,"action_logp":-0.3988311589,"action_dist_inputs":[0.3552331924,-0.3579494059],"value_targets":86.1919174194} +{"eps_id":1834071864,"obs":[0.0178138968,0.0100851757,0.0416749828,0.0954905897],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0180155989,0.2045858055,0.0435847938,-0.1837581396],"action_prob":0.7795708776,"action_logp":-0.2490116954,"action_dist_inputs":[-0.6308786869,0.6322885752],"value_targets":86.052444458} +{"eps_id":1834071864,"obs":[0.0180155989,0.2045858055,0.0435847938,-0.1837581396],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0221073162,0.0088681979,0.039909631,0.1223493516],"action_prob":0.624874115,"action_logp":-0.4702050984,"action_dist_inputs":[0.2539339662,-0.256354481],"value_targets":85.9115600586} +{"eps_id":1834071864,"obs":[0.0221073162,0.0088681979,0.039909631,0.1223493516],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0222846791,0.2033963203,0.0423566177,-0.1574801654],"action_prob":0.8062108159,"action_logp":-0.2154100388,"action_dist_inputs":[-0.711905539,0.7136684656],"value_targets":85.7692489624} +{"eps_id":1834071864,"obs":[0.0222846791,0.2033963203,0.0423566177,-0.1574801654],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0263526067,0.3978870511,0.0392070152,-0.4365054667],"action_prob":0.4247383177,"action_logp":-0.8562820554,"action_dist_inputs":[0.1506161392,-0.1527357548],"value_targets":85.62550354} +{"eps_id":1834071864,"obs":[0.0263526067,0.3978870511,0.0392070152,-0.4365054667],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0343103483,0.2022326738,0.0304769054,-0.1317255348],"action_prob":0.8650363088,"action_logp":-0.1449838281,"action_dist_inputs":[0.9262985587,-0.9314671159],"value_targets":85.4803085327} +{"eps_id":1834071864,"obs":[0.0343103483,0.2022326738,0.0304769054,-0.1317255348],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0383550003,0.0066877059,0.0278423931,0.1704145223],"action_prob":0.5499870181,"action_logp":-0.5978605747,"action_dist_inputs":[0.0993989781,-0.1012193933],"value_targets":85.3336486816} +{"eps_id":1834071864,"obs":[0.0383550003,0.0066877059,0.0278423931,0.1704145223],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0384887531,0.2014003098,0.0312506855,-0.1133565828],"action_prob":0.8363133669,"action_logp":-0.178751871,"action_dist_inputs":[-0.8143339753,0.8167160153],"value_targets":85.1855010986} +{"eps_id":1834071864,"obs":[0.0384887531,0.2014003098,0.0312506855,-0.1133565828],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0425167605,0.0058448315,0.028983552,0.1890196204],"action_prob":0.509878099,"action_logp":-0.6735835671,"action_dist_inputs":[0.0189722218,-0.0205453783],"value_targets":85.0358581543} +{"eps_id":1834071864,"obs":[0.0425167605,0.0058448315,0.028983552,0.1890196204],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0426336564,0.2005403936,0.0327639468,-0.0943811014],"action_prob":0.8504847288,"action_logp":-0.1619488448,"action_dist_inputs":[-0.8678770065,0.8705310225],"value_targets":84.8847045898} +{"eps_id":1834071864,"obs":[0.0426336564,0.2005403936,0.0327639468,-0.0943811014],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0466444641,0.0049645379,0.0308763236,0.2084561139],"action_prob":0.4668383896,"action_logp":-0.7617721558,"action_dist_inputs":[-0.067073226,0.0657683313],"value_targets":84.7320251465} +{"eps_id":1834071864,"obs":[0.0466444641,0.0049645379,0.0308763236,0.2084561139],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0467437543,0.199631691,0.035045445,-0.0743292421],"action_prob":0.8636972904,"action_logp":-0.1465329379,"action_dist_inputs":[-0.9217016697,0.9246420264],"value_targets":84.5778045654} +{"eps_id":1834071864,"obs":[0.0467437543,0.199631691,0.035045445,-0.0743292421],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0507363901,0.0040253103,0.0335588604,0.2292014211],"action_prob":0.4209465683,"action_logp":-0.8652493954,"action_dist_inputs":[-0.1599481106,0.1589407325],"value_targets":84.4220275879} +{"eps_id":1834071864,"obs":[0.0507363901,0.0040253103,0.0335588604,0.2292014211],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0508168973,0.198652029,0.0381428897,-0.0527098253],"action_prob":0.8760380745,"action_logp":-0.1323456913,"action_dist_inputs":[-0.9760949612,0.9793403745],"value_targets":84.2646713257} +{"eps_id":1834071864,"obs":[0.0508168973,0.198652029,0.0381428897,-0.0527098253],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.054789938,0.3932068944,0.0370886922,-0.3331185579],"action_prob":0.6272352934,"action_logp":-0.4664335847,"action_dist_inputs":[-0.2605225444,0.2598514855],"value_targets":84.1057281494} +{"eps_id":1834071864,"obs":[0.054789938,0.3932068944,0.0370886922,-0.3331185579],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.062654078,0.5877818465,0.0304263216,-0.613878727],"action_prob":0.2240364254,"action_logp":-1.4959466457,"action_dist_inputs":[0.6190615296,-0.6232354641],"value_targets":83.9451828003} +{"eps_id":1834071864,"obs":[0.062654078,0.5877818465,0.0304263216,-0.613878727],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0744097084,0.3922482431,0.0181487463,-0.3117701113],"action_prob":0.9146029353,"action_logp":-0.0892652869,"action_dist_inputs":[1.1822710037,-1.1889071465],"value_targets":83.7830123901} +{"eps_id":1834071864,"obs":[0.0744097084,0.3922482431,0.0181487463,-0.3117701113],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.082254678,0.1968725026,0.0119133443,-0.0134193059],"action_prob":0.7770548463,"action_logp":-0.2522443235,"action_dist_inputs":[0.6223208904,-0.6262643933],"value_targets":83.6192016602} +{"eps_id":1834071864,"obs":[0.082254678,0.1968725026,0.0119133443,-0.0134193059],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0861921236,0.391821593,0.0116449585,-0.3023197353],"action_prob":0.6442992091,"action_logp":-0.4395920634,"action_dist_inputs":[-0.2971011698,0.2969721854],"value_targets":83.453742981} +{"eps_id":1834071864,"obs":[0.0861921236,0.391821593,0.0116449585,-0.3023197353],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0940285549,0.1965356171,0.0055985632,-0.0059871217],"action_prob":0.7751124501,"action_logp":-0.2547471523,"action_dist_inputs":[0.616802454,-0.6206049919],"value_targets":83.286605835} +{"eps_id":1834071864,"obs":[0.0940285549,0.1965356171,0.0055985632,-0.0059871217],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0979592726,0.3915768266,0.0054788208,-0.2968983948],"action_prob":0.6441901326,"action_logp":-0.4397613406,"action_dist_inputs":[-0.2967927456,0.2968046367],"value_targets":83.1177825928} +{"eps_id":1834071864,"obs":[0.0979592726,0.3915768266,0.0054788208,-0.2968983948],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.1057908088,0.1963772178,-0.0004591468,-0.0024925822],"action_prob":0.777597487,"action_logp":-0.2515462637,"action_dist_inputs":[0.6240142584,-0.6277058721],"value_targets":82.9472579956} +{"eps_id":1834071864,"obs":[0.1057908088,0.1963772178,-0.0004591468,-0.0024925822],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.1097183526,0.391505748,-0.0005089985,-0.2953203321],"action_prob":0.6373081803,"action_logp":-0.4505019784,"action_dist_inputs":[-0.2817979157,0.281901896],"value_targets":82.7750091553} +{"eps_id":1834071864,"obs":[0.1097183526,0.391505748,-0.0005089985,-0.2953203321],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.1175484657,0.1963910609,-0.0064154053,-0.0027979868],"action_prob":0.7843122482,"action_logp":-0.2429480255,"action_dist_inputs":[0.643682003,-0.6472935677],"value_targets":82.601020813} +{"eps_id":1834071864,"obs":[0.1175484657,0.1963910609,-0.0064154053,-0.0027979868],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.1214762852,0.0013616952,-0.0064713648,0.2878539264],"action_prob":0.3767136931,"action_logp":-0.9762698412,"action_dist_inputs":[-0.2516857088,0.2518348396],"value_targets":82.4252700806} +{"eps_id":1834071864,"obs":[0.1214762852,0.0013616952,-0.0064713648,0.2878539264],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.1215035245,0.1965753287,-0.0007142866,-0.0068629738],"action_prob":0.8826283813,"action_logp":-0.1248510033,"action_dist_inputs":[-1.0067249537,1.0108345747],"value_targets":82.2477493286} +{"eps_id":1834071864,"obs":[0.1215035245,0.1965753287,-0.0007142866,-0.0068629738],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.1254350245,0.3917075098,-0.0008515461,-0.2997711897],"action_prob":0.6268932819,"action_logp":-0.4669789672,"action_dist_inputs":[-0.2593653798,0.2595464587],"value_targets":82.0684280396} +{"eps_id":1834071864,"obs":[0.1254350245,0.3917075098,-0.0008515461,-0.2997711897],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.1332691759,0.5868415833,-0.0068469699,-0.5927225351],"action_prob":0.2094208151,"action_logp":-1.5634095669,"action_dist_inputs":[0.6624522805,-0.6659678817],"value_targets":81.8873062134} +{"eps_id":1834071864,"obs":[0.1332691759,0.5868415833,-0.0068469699,-0.5927225351],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.1450060159,0.7820587158,-0.0187014211,-0.8875543475],"action_prob":0.08016821,"action_logp":-2.5236282349,"action_dist_inputs":[1.2168844938,-1.2231792212],"value_targets":81.7043457031} +{"eps_id":1834071864,"obs":[0.1450060159,0.7820587158,-0.0187014211,-0.8875543475],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.1606471837,0.5871955156,-0.0364525095,-0.6008085608],"action_prob":0.9482873678,"action_logp":-0.0530976802,"action_dist_inputs":[1.4502171278,-1.4587386847],"value_targets":81.5195465088} +{"eps_id":1834071864,"obs":[0.1606471837,0.5871955156,-0.0364525095,-0.6008085608],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.1723911017,0.7828079462,-0.0484686792,-0.9047469497],"action_prob":0.0721985027,"action_logp":-2.6283359528,"action_dist_inputs":[1.273496151,-1.2799025774],"value_targets":81.3328704834} +{"eps_id":1834071864,"obs":[0.1723911017,0.7828079462,-0.0484686792,-0.9047469497],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.18804726,0.9785516262,-0.0665636212,-1.2122619152],"action_prob":0.0494844951,"action_logp":-3.0060958862,"action_dist_inputs":[1.4733046293,-1.4820406437],"value_targets":81.144317627} +{"eps_id":1834071864,"obs":[0.18804726,0.9785516262,-0.0665636212,-1.2122619152],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.2076182812,0.7843489647,-0.0908088535,-0.9411582351],"action_prob":0.9574307203,"action_logp":-0.0435018875,"action_dist_inputs":[1.5513333082,-1.5617882013],"value_targets":80.9538574219} +{"eps_id":1834071864,"obs":[0.2076182812,0.7843489647,-0.0908088535,-0.9411582351],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.2233052701,0.5905604362,-0.1096320227,-0.6783342957],"action_prob":0.9528695345,"action_logp":-0.0482772738,"action_dist_inputs":[1.4987125397,-1.507846117],"value_targets":80.76146698} +{"eps_id":1834071864,"obs":[0.2233052701,0.5905604362,-0.1096320227,-0.6783342957],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.2351164818,0.3971185386,-0.1231987104,-0.4220807254],"action_prob":0.9433848262,"action_logp":-0.0582810007,"action_dist_inputs":[1.4029655457,-1.410230875],"value_targets":80.5671386719} +{"eps_id":1834071864,"obs":[0.2351164818,0.3971185386,-0.1231987104,-0.4220807254],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.2430588454,0.2039377093,-0.1316403151,-0.1706346273],"action_prob":0.9204140902,"action_logp":-0.0829316154,"action_dist_inputs":[1.2216278315,-1.2263592482],"value_targets":80.3708496094} +{"eps_id":1834071864,"obs":[0.2430588454,0.2039377093,-0.1316403151,-0.1706346273],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.2471376061,0.0109215444,-0.1350530088,0.0777940676],"action_prob":0.8507450223,"action_logp":-0.1616428047,"action_dist_inputs":[0.8694486022,-0.8710075021],"value_targets":80.1725769043} +{"eps_id":1834071864,"obs":[0.2471376061,0.0109215444,-0.1350530088,0.0777940676],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.2473560274,0.2076950818,-0.1334971339,-0.25426355],"action_prob":0.3762532175,"action_logp":-0.9774929285,"action_dist_inputs":[0.2536937594,-0.251788348],"value_targets":79.9722976685} +{"eps_id":1834071864,"obs":[0.2473560274,0.2076950818,-0.1334971339,-0.25426355],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.2515099347,0.0147067737,-0.1385824084,-0.006488462],"action_prob":0.8894329071,"action_logp":-0.1171712279,"action_dist_inputs":[1.0412133932,-1.0437474251],"value_targets":79.7699966431} +{"eps_id":1834071864,"obs":[0.2515099347,0.0147067737,-0.1385824084,-0.006488462],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.2518040538,0.2115160376,-0.1387121677,-0.3394850492],"action_prob":0.2460002899,"action_logp":-1.4024225473,"action_dist_inputs":[0.560493052,-0.5595661402],"value_targets":79.5656509399} +{"eps_id":1834071864,"obs":[0.2518040538,0.2115160376,-0.1387121677,-0.3394850492],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.256034404,0.0186123103,-0.1455018669,-0.0935594589],"action_prob":0.911583066,"action_logp":-0.0925725549,"action_dist_inputs":[1.164784193,-1.1683357954],"value_targets":79.3592453003} +{"eps_id":1834071864,"obs":[0.256034404,0.0186123103,-0.1455018669,-0.0935594589],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.256406635,-0.1741569787,-0.1473730654,0.1499122977],"action_prob":0.8357921243,"action_logp":-0.1793753803,"action_dist_inputs":[0.8135097027,-0.813736856],"value_targets":79.1507568359} +{"eps_id":1834071864,"obs":[0.256406635,-0.1741569787,-0.1473730654,0.1499122977],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.2529234886,0.0227341894,-0.1443748176,-0.1853940934],"action_prob":0.3881077468,"action_logp":-0.9464722872,"action_dist_inputs":[0.2292969227,-0.2259762585],"value_targets":78.9401550293} +{"eps_id":1834071864,"obs":[0.2529234886,0.0227341894,-0.1443748176,-0.1853940934],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.2533781826,-0.1700586826,-0.1480827034,0.0584876239],"action_prob":0.8780505657,"action_logp":-0.1300510913,"action_dist_inputs":[0.9863194227,-0.987778008],"value_targets":78.727432251} +{"eps_id":1834071864,"obs":[0.2533781826,-0.1700586826,-0.1480827034,0.0584876239],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.2499770075,-0.3627813458,-0.1469129473,0.3010304272],"action_prob":0.7414741516,"action_logp":-0.2991149426,"action_dist_inputs":[0.5278668404,-0.5257778764],"value_targets":78.5125579834} +{"eps_id":1834071864,"obs":[0.2499770075,-0.3627813458,-0.1469129473,0.3010304272],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.2427213788,-0.5555372238,-0.1408923417,0.5440124273],"action_prob":0.4386736751,"action_logp":-0.8239994645,"action_dist_inputs":[-0.1205439568,0.1260026246],"value_targets":78.2955093384} +{"eps_id":1834071864,"obs":[0.2427213788,-0.5555372238,-0.1408923417,0.5440124273],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.2316106409,-0.3587458134,-0.130012095,0.210464865],"action_prob":0.8173651695,"action_logp":-0.2016693354,"action_dist_inputs":[-0.7455502748,0.7530469298],"value_targets":78.0762710571} +{"eps_id":1834071864,"obs":[0.2316106409,-0.3587458134,-0.130012095,0.210464865],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.2244357169,-0.5517923236,-0.1258027852,0.459474802],"action_prob":0.5667242408,"action_logp":-0.5678824186,"action_dist_inputs":[0.1364937872,-0.1320047677],"value_targets":77.8548202515} +{"eps_id":1834071864,"obs":[0.2244357169,-0.5517923236,-0.1258027852,0.459474802],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.2133998722,-0.7449324131,-0.1166132987,0.7100068927],"action_prob":0.2542560101,"action_logp":-1.3694136143,"action_dist_inputs":[-0.5344665647,0.5415741205],"value_targets":77.6311340332} +{"eps_id":1834071864,"obs":[0.2133998722,-0.7449324131,-0.1166132987,0.7100068927],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.1985012293,-0.5484050512,-0.1024131551,0.3830113113],"action_prob":0.8917451501,"action_logp":-0.1145749241,"action_dist_inputs":[-1.0501304865,1.0585614443],"value_targets":77.4051818848} +{"eps_id":1834071864,"obs":[0.1985012293,-0.5484050512,-0.1024131551,0.3830113113],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.1875331253,-0.3519894481,-0.0947529301,0.0598747507],"action_prob":0.6768912077,"action_logp":-0.3902446926,"action_dist_inputs":[-0.3664559424,0.3730656505],"value_targets":77.1769561768} +{"eps_id":1834071864,"obs":[0.1875331253,-0.3519894481,-0.0947529301,0.0598747507],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.180493325,-0.5456341505,-0.0935554355,0.32122311],"action_prob":0.7272676229,"action_logp":-0.3184607625,"action_dist_inputs":[0.4915626347,-0.489240855],"value_targets":76.9464187622} +{"eps_id":1834071864,"obs":[0.180493325,-0.5456341505,-0.0935554355,0.32122311],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.1695806533,-0.73930794,-0.0871309713,0.5829981565],"action_prob":0.4070624113,"action_logp":-0.8987887502,"action_dist_inputs":[-0.185038656,0.1910839528],"value_targets":76.7135543823} +{"eps_id":1834071864,"obs":[0.1695806533,-0.73930794,-0.0871309713,0.5829981565],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.1547944844,-0.5430802703,-0.0754710138,0.2641891837],"action_prob":0.8400312066,"action_logp":-0.1743162274,"action_dist_inputs":[-0.8252552152,0.8332054019],"value_targets":76.4783401489} +{"eps_id":1834071864,"obs":[0.1547944844,-0.5430802703,-0.0754710138,0.2641891837],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.143932879,-0.3469667733,-0.0701872259,-0.0513111129],"action_prob":0.5294644833,"action_logp":-0.6358891726,"action_dist_inputs":[-0.0563003644,0.0616943724],"value_targets":76.2407455444} +{"eps_id":1834071864,"obs":[0.143932879,-0.3469667733,-0.0701872259,-0.0513111129],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.1369935572,-0.5410158038,-0.0712134466,0.2184276432],"action_prob":0.8037487268,"action_logp":-0.2184686065,"action_dist_inputs":[0.7051765919,-0.7047139406],"value_targets":76.0007553101} +{"eps_id":1834071864,"obs":[0.1369935572,-0.5410158038,-0.0712134466,0.2184276432],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.126173228,-0.3449519277,-0.0668448955,-0.0958417952],"action_prob":0.4591974616,"action_logp":-0.7782749534,"action_dist_inputs":[0.0841390863,-0.0794347748],"value_targets":75.7583389282} +{"eps_id":1834071864,"obs":[0.126173228,-0.3449519277,-0.0668448955,-0.0958417952],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.119274199,-0.5390552878,-0.0687617362,0.1750252843],"action_prob":0.8292778134,"action_logp":-0.1872000396,"action_dist_inputs":[0.79008919,-0.7904286385],"value_targets":75.5134735107} +{"eps_id":1834071864,"obs":[0.119274199,-0.5390552878,-0.0687617362,0.1750252843],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.1084930897,-0.3430200815,-0.0652612299,-0.1385326833],"action_prob":0.3940424323,"action_logp":-0.9312966466,"action_dist_inputs":[0.2171451449,-0.2132061571],"value_targets":75.26612854} +{"eps_id":1834071864,"obs":[0.1084930897,-0.3430200815,-0.0652612299,-0.1385326833],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.1016326845,-0.5371495485,-0.0680318773,0.1328684688],"action_prob":0.8494207859,"action_logp":-0.1632005721,"action_dist_inputs":[0.8644840717,-0.8655813932],"value_targets":75.0162963867} +{"eps_id":1834071864,"obs":[0.1016326845,-0.5371495485,-0.0680318773,0.1328684688],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0908896998,-0.7312343717,-0.0653745085,0.4033362865],"action_prob":0.6646649837,"action_logp":-0.4084721208,"action_dist_inputs":[0.3436306715,-0.3405225277],"value_targets":74.7639312744} +{"eps_id":1834071864,"obs":[0.0908896998,-0.7312343717,-0.0653745085,0.4033362865],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0762650073,-0.535249114,-0.0573077872,0.0907799378],"action_prob":0.6694805622,"action_logp":-0.4012531042,"action_dist_inputs":[-0.3495700061,0.3562669456],"value_targets":74.5090255737} +{"eps_id":1834071864,"obs":[0.0762650073,-0.535249114,-0.0573077872,0.0907799378],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.065560028,-0.7295048237,-0.0554921851,0.3648458719],"action_prob":0.7027746439,"action_logp":-0.3527190387,"action_dist_inputs":[0.4314323068,-0.4291131198],"value_targets":74.2515411377} +{"eps_id":1834071864,"obs":[0.065560028,-0.7295048237,-0.0554921851,0.3648458719],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0509699285,-0.5336399674,-0.048195269,0.0551944859],"action_prob":0.6285217404,"action_logp":-0.464384675,"action_dist_inputs":[-0.259819448,0.2660608888],"value_targets":73.9914550781} +{"eps_id":1834071864,"obs":[0.0509699285,-0.5336399674,-0.048195269,0.0551944859],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.040297132,-0.7280389071,-0.0470913798,0.332290411],"action_prob":0.7314304709,"action_logp":-0.3127530813,"action_dist_inputs":[0.5017533898,-0.5001389384],"value_targets":73.7287445068} +{"eps_id":1834071864,"obs":[0.040297132,-0.7280389071,-0.0470913798,0.332290411],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0257363524,-0.5322794318,-0.0404455699,0.0251369346],"action_prob":0.5923479795,"action_logp":-0.5236610174,"action_dist_inputs":[-0.1839508563,0.1897295266],"value_targets":73.4633712769} +{"eps_id":1834071864,"obs":[0.0257363524,-0.5322794318,-0.0404455699,0.0251369346],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0150907654,-0.3366014659,-0.0399428345,-0.2800276875],"action_prob":0.2467935085,"action_logp":-1.3992033005,"action_dist_inputs":[0.5583894849,-0.557397902],"value_targets":73.1953277588} +{"eps_id":1834071864,"obs":[0.0150907654,-0.3366014659,-0.0399428345,-0.2800276875],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0083587356,-0.5311315656,-0.0455433875,-0.0002053708],"action_prob":0.8884947896,"action_logp":-0.1182264984,"action_dist_inputs":[1.0360325575,-1.039424777],"value_targets":72.9245758057} +{"eps_id":1834071864,"obs":[0.0083587356,-0.5311315656,-0.0455433875,-0.0002053708],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0022638952,-0.7255717516,-0.0455474928,0.27776739],"action_prob":0.7798060775,"action_logp":-0.2487099916,"action_dist_inputs":[0.6324426532,-0.6320941448],"value_targets":72.6510848999} +{"eps_id":1834071864,"obs":[-0.0022638952,-0.7255717516,-0.0455474928,0.27776739],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0167753305,-0.9200153351,-0.0399921462,0.5557439327],"action_prob":0.4935610294,"action_logp":-0.7061087489,"action_dist_inputs":[-0.0104943588,0.015262953],"value_targets":72.3748321533} +{"eps_id":1834071864,"obs":[-0.0167753305,-0.9200153351,-0.0399921462,0.5557439327],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0351756364,-0.72435534,-0.0288772676,0.2507340908],"action_prob":0.8028306365,"action_logp":-0.2196114957,"action_dist_inputs":[-0.6983270049,0.7057537436],"value_targets":72.0957946777} +{"eps_id":1834071864,"obs":[-0.0351756364,-0.72435534,-0.0288772676,0.2507340908],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0496627428,-0.5288332105,-0.0238625854,-0.0509156175],"action_prob":0.4939101636,"action_logp":-0.705401659,"action_dist_inputs":[0.0143363848,-0.0100242784],"value_targets":71.8139266968} +{"eps_id":1834071864,"obs":[-0.0496627428,-0.5288332105,-0.0238625854,-0.0509156175],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0602394082,-0.7236049771,-0.0248808991,0.2341439724],"action_prob":0.8014296293,"action_logp":-0.2213581055,"action_dist_inputs":[0.6973009706,-0.6979527473],"value_targets":71.5292205811} +{"eps_id":1834071864,"obs":[-0.0602394082,-0.7236049771,-0.0248808991,0.2341439724],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.074711509,-0.5281365514,-0.0201980192,-0.0662821755],"action_prob":0.4752360582,"action_logp":-0.7439436316,"action_dist_inputs":[0.0515238084,-0.0476129726],"value_targets":71.2416381836} +{"eps_id":1834071864,"obs":[-0.074711509,-0.5281365514,-0.0201980192,-0.0662821755],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0852742419,-0.7229632139,-0.0215236619,0.2199603468],"action_prob":0.8096572161,"action_logp":-0.211144343,"action_dist_inputs":[0.7233741283,-0.7244100571],"value_targets":70.9511489868} +{"eps_id":1834071864,"obs":[-0.0852742419,-0.7229632139,-0.0215236619,0.2199603468],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0997335017,-0.9177709818,-0.0171244554,0.5057768822],"action_prob":0.540340364,"action_logp":-0.6155560613,"action_dist_inputs":[0.0826238021,-0.0790891871],"value_targets":70.6577301025} +{"eps_id":1834071864,"obs":[-0.0997335017,-0.9177709818,-0.0171244554,0.5057768822],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1180889234,-1.1126474142,-0.0070089181,0.7930144072],"action_prob":0.2141073942,"action_logp":-1.5412775278,"action_dist_inputs":[-0.6468171477,0.6535252333],"value_targets":70.3613433838} +{"eps_id":1834071864,"obs":[-0.1180889234,-1.1126474142,-0.0070089181,0.7930144072],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1403418779,-0.9174299836,0.0088513698,0.4981348217],"action_prob":0.9118930101,"action_logp":-0.0922325775,"action_dist_inputs":[-1.1643662453,1.1726047993],"value_targets":70.061958313} +{"eps_id":1834071864,"obs":[-0.1403418779,-0.9174299836,0.0088513698,0.4981348217],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1586904675,-0.7224339843,0.0188140664,0.2082544714],"action_prob":0.8111034632,"action_logp":-0.2093596607,"action_dist_inputs":[-0.7253530622,0.7318431139],"value_targets":69.7595596313} +{"eps_id":1834071864,"obs":[-0.1586904675,-0.7224339843,0.0188140664,0.2082544714],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1731391549,-0.5275860429,0.0229791552,-0.0784347057],"action_prob":0.5231388807,"action_logp":-0.64790833,"action_dist_inputs":[-0.0446851887,0.047936473],"value_targets":69.4540939331} +{"eps_id":1834071864,"obs":[-0.1731391549,-0.5275860429,0.0229791552,-0.0784347057],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1836908758,-0.7230297327,0.0214104615,0.2214087248],"action_prob":0.7853196263,"action_logp":-0.2416645139,"action_dist_inputs":[0.6477689147,-0.6491715312],"value_targets":69.1455535889} +{"eps_id":1834071864,"obs":[-0.1836908758,-0.7230297327,0.0214104615,0.2214087248],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1981514692,-0.5282202363,0.0258386359,-0.0644443259],"action_prob":0.5548670292,"action_logp":-0.5890268087,"action_dist_inputs":[-0.1085478142,0.1118074879],"value_targets":68.8338928223} +{"eps_id":1834071864,"obs":[-0.1981514692,-0.5282202363,0.0258386359,-0.0644443259],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.208715871,-0.7237029672,0.0245497506,0.2362775654],"action_prob":0.76999861,"action_logp":-0.261366576,"action_dist_inputs":[0.6034901738,-0.6048129797],"value_targets":68.5190811157} +{"eps_id":1834071864,"obs":[-0.208715871,-0.7237029672,0.0245497506,0.2362775654],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2231899351,-0.919166863,0.0292753,0.5366020203],"action_prob":0.409014523,"action_logp":-0.894004643,"action_dist_inputs":[-0.1823786646,0.1856621504],"value_targets":68.2010955811} +{"eps_id":1834071864,"obs":[-0.2231899351,-0.919166863,0.0292753,0.5366020203],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2415732741,-0.7244685292,0.0400073417,0.2532853186],"action_prob":0.8634887934,"action_logp":-0.1467743516,"action_dist_inputs":[-0.919182539,0.9253915548],"value_targets":67.8798904419} +{"eps_id":1834071864,"obs":[-0.2415732741,-0.7244685292,0.0400073417,0.2532853186],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2560626268,-0.5299400091,0.0450730473,-0.0265149642],"action_prob":0.6542821527,"action_logp":-0.4242166281,"action_dist_inputs":[-0.3172816634,0.3206339777],"value_targets":67.5554504395} +{"eps_id":1834071864,"obs":[-0.2560626268,-0.5299400091,0.0450730473,-0.0265149642],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2666614354,-0.7256783843,0.0445427485,0.2800413966],"action_prob":0.7003751993,"action_logp":-0.3561390936,"action_dist_inputs":[0.4240937531,-0.4249911904],"value_targets":67.227722168} +{"eps_id":1834071864,"obs":[-0.2666614354,-0.7256783843,0.0445427485,0.2800413966],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2811750174,-0.5312191844,0.0501435772,0.0017331849],"action_prob":0.7073376179,"action_logp":-0.346247226,"action_dist_inputs":[-0.4395124912,0.4429759681],"value_targets":66.8966903687} +{"eps_id":1834071864,"obs":[-0.2811750174,-0.5312191844,0.0501435772,0.0017331849],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2917993963,-0.3368508816,0.050178241,-0.2747167647],"action_prob":0.3514116704,"action_logp":-1.0457968712,"action_dist_inputs":[0.3061298132,-0.3067099452],"value_targets":66.5623168945} +{"eps_id":1834071864,"obs":[-0.2917993963,-0.3368508816,0.050178241,-0.2747167647],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2985364199,-0.5326515436,0.0446839035,0.0333607346],"action_prob":0.8644759059,"action_logp":-0.1456318349,"action_dist_inputs":[0.9242120385,-0.9287622571],"value_targets":66.2245635986} +{"eps_id":1834071864,"obs":[-0.2985364199,-0.5326515436,0.0446839035,0.0333607346],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.3091894388,-0.3381978869,0.0453511178,-0.2448958755],"action_prob":0.396040529,"action_logp":-0.9262387156,"action_dist_inputs":[0.2108516842,-0.2111388445],"value_targets":65.883392334} +{"eps_id":1834071864,"obs":[-0.3091894388,-0.3381978869,0.0453511178,-0.2448958755],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.3159534037,-0.5339372754,0.040453203,0.0617399625],"action_prob":0.8521022201,"action_logp":-0.1600487977,"action_dist_inputs":[0.873426199,-0.877758801],"value_targets":65.5387802124} +{"eps_id":1834071864,"obs":[-0.3159534037,-0.5339372754,0.040453203,0.0617399625],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.3266321421,-0.3394179642,0.0416880026,-0.2179102749],"action_prob":0.4411923289,"action_logp":-0.8182743788,"action_dist_inputs":[0.1181397364,-0.1181846634],"value_targets":65.1906890869} +{"eps_id":1834071864,"obs":[-0.3266321421,-0.3394179642,0.0416880026,-0.2179102749],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.3334205151,-0.5351102948,0.0373297967,0.0876259059],"action_prob":0.8379122615,"action_logp":-0.176841855,"action_dist_inputs":[0.8193252683,-0.8234505057],"value_targets":64.8390808105} +{"eps_id":1834071864,"obs":[-0.3334205151,-0.5351102948,0.0373297967,0.0876259059],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.3441227078,-0.7307469249,0.0390823148,0.3918487728],"action_prob":0.5137839317,"action_logp":-0.6659524441,"action_dist_inputs":[0.0276519861,-0.0274976827],"value_targets":64.4839172363} +{"eps_id":1834071864,"obs":[-0.3441227078,-0.7307469249,0.0390823148,0.3918487728],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.3587376475,-0.5362007618,0.04691929,0.1117396653],"action_prob":0.8313272595,"action_logp":-0.1847317815,"action_dist_inputs":[-0.7956342697,0.7994284034],"value_targets":64.1251678467} +{"eps_id":1834071864,"obs":[-0.3587376475,-0.5362007618,0.04691929,0.1117396653],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.3694616556,-0.731962502,0.0491540842,0.4188483953],"action_prob":0.4436417222,"action_logp":-0.8127379417,"action_dist_inputs":[-0.1130235195,0.1133716777],"value_targets":63.7627983093} +{"eps_id":1834071864,"obs":[-0.3694616556,-0.731962502,0.0491540842,0.4188483953],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.384100914,-0.9277452826,0.0575310513,0.7266132832],"action_prob":0.1417985409,"action_logp":-1.9533479214,"action_dist_inputs":[-0.8982766867,0.9021547437],"value_targets":63.3967666626} +{"eps_id":1834071864,"obs":[-0.384100914,-0.9277452826,0.0575310513,0.7266132832],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.4026558101,-0.7334640026,0.0720633194,0.4525782764],"action_prob":0.9322711825,"action_logp":-0.0701315105,"action_dist_inputs":[-1.307715416,1.3143973351],"value_targets":63.0270347595} +{"eps_id":1834071864,"obs":[-0.4026558101,-0.7334640026,0.0720633194,0.4525782764],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.4173251092,-0.5394311547,0.0811148807,0.1834527403],"action_prob":0.8873220682,"action_logp":-0.1195472628,"action_dist_inputs":[-1.0298036337,1.0338708162],"value_targets":62.6535720825} +{"eps_id":1834071864,"obs":[-0.4173251092,-0.5394311547,0.0811148807,0.1834527403],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.4281137288,-0.3455578983,0.0847839341,-0.0825793445],"action_prob":0.7395635843,"action_logp":-0.301695019,"action_dist_inputs":[-0.5214167237,0.5222849846],"value_targets":62.2763366699} +{"eps_id":1834071864,"obs":[-0.4281137288,-0.3455578983,0.0847839341,-0.0825793445],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.4350248873,-0.5417864323,0.083132349,0.2356027961],"action_prob":0.6040267944,"action_logp":-0.5041367412,"action_dist_inputs":[0.2098035812,-0.2124684006],"value_targets":61.8952865601} +{"eps_id":1834071864,"obs":[-0.4350248873,-0.5417864323,0.083132349,0.2356027961],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.4458605945,-0.737991631,0.087844409,0.5533069968],"action_prob":0.200883761,"action_logp":-1.6050288677,"action_dist_inputs":[-0.6897696853,0.6910103559],"value_targets":61.5103912354} +{"eps_id":1834071864,"obs":[-0.4458605945,-0.737991631,0.087844409,0.5533069968],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.4606204331,-0.5442059636,0.0989105478,0.2895423472],"action_prob":0.9185885191,"action_logp":-0.0849170312,"action_dist_inputs":[-1.2092419863,1.2140803337],"value_targets":61.1216087341} +{"eps_id":1834071864,"obs":[-0.4606204331,-0.5442059636,0.0989105478,0.2895423472],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.4715045691,-0.3506233692,0.1047013924,0.0296199899],"action_prob":0.8512095213,"action_logp":-0.1610969454,"action_dist_inputs":[-0.8712229729,0.8728961349],"value_targets":60.7288970947} +{"eps_id":1834071864,"obs":[-0.4715045691,-0.3506233692,0.1047013924,0.0296199899],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.4785170257,-0.5470788479,0.1052937955,0.3534154594],"action_prob":0.3574461341,"action_logp":-1.028770566,"action_dist_inputs":[-0.2940753996,0.2923904955],"value_targets":60.3322181702} +{"eps_id":1834071864,"obs":[-0.4785170257,-0.5470788479,0.1052937955,0.3534154594],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.4894585907,-0.3535993695,0.1123621017,0.0957028046],"action_prob":0.8831616044,"action_logp":-0.1242470816,"action_dist_inputs":[-1.0102254152,1.0124912262],"value_targets":59.9315338135} +{"eps_id":1834071864,"obs":[-0.4894585907,-0.3535993695,0.1123621017,0.0957028046],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.4965305924,-0.1602521986,0.1142761558,-0.1595241129],"action_prob":0.7447705269,"action_logp":-0.2946791351,"action_dist_inputs":[-0.5360295773,0.5348835588],"value_targets":59.526802063} +{"eps_id":1834071864,"obs":[-0.4965305924,-0.1602521986,0.1142761558,-0.1595241129],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.4997356236,0.033064004,0.1110856757,-0.4140829742],"action_prob":0.4249121249,"action_logp":-0.8558728695,"action_dist_inputs":[0.1491820365,-0.1534584165],"value_targets":59.117980957} +{"eps_id":1834071864,"obs":[-0.4997356236,0.033064004,0.1110856757,-0.4140829742],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.4990743399,0.2264505476,0.10280402,-0.6697827578],"action_prob":0.1638329327,"action_logp":-1.8089081049,"action_dist_inputs":[0.8115079999,-0.8184733391],"value_targets":58.7050323486} +{"eps_id":1834071864,"obs":[-0.4990743399,0.2264505476,0.10280402,-0.6697827578],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.4945453405,0.0300608519,0.0894083604,-0.3465823829],"action_prob":0.9248770475,"action_logp":-0.078094475,"action_dist_inputs":[1.250819087,-1.2597157955],"value_targets":58.2879104614} +{"eps_id":1834071864,"obs":[-0.4945453405,0.0300608519,0.0894083604,-0.3465823829],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.4939441383,-0.1662115604,0.0824767128,-0.0270986427],"action_prob":0.8004101515,"action_logp":-0.2226310074,"action_dist_inputs":[0.691154182,-0.6977053285],"value_targets":57.8665771484} +{"eps_id":1834071864,"obs":[-0.4939441383,-0.1662115604,0.0824767128,-0.0270986427],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.4972683489,-0.3624134958,0.0819347426,0.2904245257],"action_prob":0.403849721,"action_logp":-0.9067124128,"action_dist_inputs":[-0.1963526756,0.193097204],"value_targets":57.4409866333} +{"eps_id":1834071864,"obs":[-0.4972683489,-0.3624134958,0.0819347426,0.2904245257],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.5045166016,-0.168549642,0.0877432302,0.0246661566],"action_prob":0.8629422188,"action_logp":-0.1474075168,"action_dist_inputs":[-0.9196291566,0.9203162789],"value_targets":57.0110969543} +{"eps_id":1834071864,"obs":[-0.5045166016,-0.168549642,0.0877432302,0.0246661566],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.5078876019,0.0252115726,0.0882365555,-0.239094153],"action_prob":0.6824882627,"action_logp":-0.3820099533,"action_dist_inputs":[-0.3840307295,0.3811998069],"value_targets":56.5768661499} +{"eps_id":1834071864,"obs":[-0.5078876019,0.0252115726,0.0882365555,-0.239094153],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.5073834062,0.2189694643,0.0834546685,-0.5026924014],"action_prob":0.3347462416,"action_logp":-1.0943825245,"action_dist_inputs":[0.3404806852,-0.3463150859],"value_targets":56.1382484436} +{"eps_id":1834071864,"obs":[-0.5073834062,0.2189694643,0.0834546685,-0.5026924014],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.5030040145,0.0227764975,0.0734008253,-0.1849199831],"action_prob":0.8749193549,"action_logp":-0.1336235404,"action_dist_inputs":[0.9685319662,-0.9766413569],"value_targets":55.6952018738} +{"eps_id":1834071864,"obs":[-0.5030040145,0.0227764975,0.0734008253,-0.1849199831],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.5025484562,0.2167756557,0.0697024241,-0.453574717],"action_prob":0.3947782218,"action_logp":-0.9294311404,"action_dist_inputs":[0.2109028697,-0.2163679451],"value_targets":55.2476768494} +{"eps_id":1834071864,"obs":[-0.5025484562,0.2167756557,0.0697024241,-0.453574717],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.4982129633,0.0207408611,0.0606309287,-0.1397606134],"action_prob":0.8561162353,"action_logp":-0.1553491056,"action_dist_inputs":[0.8877580762,-0.8956423402],"value_targets":54.7956352234} +{"eps_id":1834071864,"obs":[-0.4982129633,0.0207408611,0.0606309287,-0.1397606134],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.4977981448,0.2149443924,0.0578357168,-0.4127160609],"action_prob":0.4473869503,"action_logp":-0.8043314219,"action_dist_inputs":[0.1030472443,-0.1081869304],"value_targets":54.3390235901} +{"eps_id":1834071864,"obs":[-0.4977981448,0.2149443924,0.0578357168,-0.4127160609],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.4934992492,0.0190523323,0.0495813973,-0.1023750603],"action_prob":0.8376326561,"action_logp":-0.1771756262,"action_dist_inputs":[0.8165126443,-0.8242055178],"value_targets":53.8778038025} +{"eps_id":1834071864,"obs":[-0.4934992492,0.0190523323,0.0495813973,-0.1023750603],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.4931181967,-0.1767438203,0.0475338958,0.205529213],"action_prob":0.5090569258,"action_logp":-0.6751954556,"action_dist_inputs":[0.0156881511,-0.0205435269],"value_targets":53.4119224548} +{"eps_id":1834071864,"obs":[-0.4931181967,-0.1767438203,0.0475338958,0.205529213],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.49665308,0.0176672861,0.051644478,-0.0717883632],"action_prob":0.8162174821,"action_logp":-0.2030744702,"action_dist_inputs":[-0.7460116148,0.7449159622],"value_targets":52.9413375854} +{"eps_id":1834071864,"obs":[-0.49665308,0.0176672861,0.051644478,-0.0717883632],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.4962997437,-0.1781555712,0.0502087139,0.2367310077],"action_prob":0.452994734,"action_logp":-0.7918747663,"action_dist_inputs":[-0.0965907946,0.0919870883],"value_targets":52.4659957886} +{"eps_id":1834071864,"obs":[-0.4962997437,-0.1781555712,0.0502087139,0.2367310077],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.4998628497,0.0162144508,0.0549433343,-0.0397014357],"action_prob":0.8359351754,"action_logp":-0.1792041808,"action_dist_inputs":[-0.8145056963,0.8137840033],"value_targets":51.9858551025} +{"eps_id":1834071864,"obs":[-0.4998628497,0.0162144508,0.0549433343,-0.0397014357],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.4995385706,-0.1796505749,0.0541493036,0.2697979808],"action_prob":0.395875752,"action_logp":-0.9266548753,"action_dist_inputs":[-0.2134970427,0.2091823816],"value_targets":51.5008621216} +{"eps_id":1834071864,"obs":[-0.4995385706,-0.1796505749,0.0541493036,0.2697979808],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.5031315684,0.0146585051,0.0595452636,-0.0053263246],"action_prob":0.853368938,"action_logp":-0.1585633159,"action_dist_inputs":[-0.880785346,0.8804869056],"value_targets":51.0109710693} +{"eps_id":1834071864,"obs":[-0.5031315684,0.0146585051,0.0595452636,-0.0053263246],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.5028383732,0.2088781744,0.0594387352,-0.2786435485],"action_prob":0.6604831815,"action_logp":-0.4147836268,"action_dist_inputs":[-0.3347091973,0.3307389021],"value_targets":50.5161323547} +{"eps_id":1834071864,"obs":[-0.5028383732,0.2088781744,0.0594387352,-0.2786435485],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.4986608326,0.0129608838,0.0538658649,0.0321781188],"action_prob":0.6898251772,"action_logp":-0.3713170886,"action_dist_inputs":[0.3961488605,-0.4031531215],"value_targets":50.0162963867} +{"eps_id":1834071864,"obs":[-0.4986608326,0.0129608838,0.0538658649,0.0321781188],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.498401612,0.2072706521,0.0545094274,-0.2430348843],"action_prob":0.700049758,"action_logp":-0.3566038609,"action_dist_inputs":[-0.4255580902,0.4219768047],"value_targets":49.5114097595} +{"eps_id":1834071864,"obs":[-0.498401612,0.2072706521,0.0545094274,-0.2430348843],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.4942561984,0.0114141842,0.0496487319,0.0663313121],"action_prob":0.6458903551,"action_logp":-0.4371255338,"action_dist_inputs":[0.2971220016,-0.3039012551],"value_targets":49.0014266968} +{"eps_id":1834071864,"obs":[-0.4942561984,0.0114141842,0.0496487319,0.0663313121],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.4940279126,0.2057904452,0.0509753563,-0.2102830857],"action_prob":0.7327781916,"action_logp":-0.3109122217,"action_dist_inputs":[-0.5059811473,0.5027829409],"value_targets":48.486289978} +{"eps_id":1834071864,"obs":[-0.4940279126,0.2057904452,0.0509753563,-0.2102830857],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.4899121225,0.4001478851,0.0467696935,-0.4864602685],"action_prob":0.3991079032,"action_logp":-0.9185234308,"action_dist_inputs":[0.2013183534,-0.2078651637],"value_targets":47.9659461975} +{"eps_id":1834071864,"obs":[-0.4899121225,0.4001478851,0.0467696935,-0.4864602685],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.4819091558,0.2043982595,0.0370404907,-0.1794116497],"action_prob":0.853469491,"action_logp":-0.1584455073,"action_dist_inputs":[0.8767171502,-0.885358572],"value_targets":47.4403495789} +{"eps_id":1834071864,"obs":[-0.4819091558,0.2043982595,0.0370404907,-0.1794116497],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.4778212011,0.0087663801,0.0334522575,0.1247223169],"action_prob":0.5763368607,"action_logp":-0.5510630012,"action_dist_inputs":[0.1507214904,-0.157032162],"value_targets":46.9094467163} +{"eps_id":1834071864,"obs":[-0.4778212011,0.0087663801,0.0334522575,0.1247223169],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.477645874,0.2033935189,0.0359467044,-0.1572219133],"action_prob":0.7717733979,"action_logp":-0.2590643167,"action_dist_inputs":[-0.6104374528,0.6079143882],"value_targets":46.3731765747} +{"eps_id":1834071864,"obs":[-0.477645874,0.2033935189,0.0359467044,-0.1572219133],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.4735779762,0.0077758306,0.0328022651,0.1465810984],"action_prob":0.5418050885,"action_logp":-0.6128489375,"action_dist_inputs":[0.0807489231,-0.08686281],"value_targets":45.8314933777} +{"eps_id":1834071864,"obs":[-0.4735779762,0.0077758306,0.0328022651,0.1465810984],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.4734224677,0.2024130523,0.0357338861,-0.1355755478],"action_prob":0.7886776924,"action_logp":-0.2373975217,"action_dist_inputs":[-0.6596005559,0.6573728919],"value_targets":45.2843360901} +{"eps_id":1834071864,"obs":[-0.4734224677,0.2024130523,0.0357338861,-0.1355755478],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.4693742096,0.3970054388,0.0330223739,-0.4167742431],"action_prob":0.4935744703,"action_logp":-0.7060815096,"action_dist_inputs":[0.0099000912,-0.0158033706],"value_targets":44.7316513062} +{"eps_id":1834071864,"obs":[-0.4693742096,0.3970054388,0.0330223739,-0.4167742431],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.4614340961,0.2014314383,0.0246868916,-0.1138663664],"action_prob":0.8147617579,"action_logp":-0.2048595101,"action_dist_inputs":[0.7364424467,-0.7448104024],"value_targets":44.1733856201} +{"eps_id":1834071864,"obs":[-0.4614340961,0.2014314383,0.0246868916,-0.1138663664],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.4574054778,0.0059646056,0.0224095639,0.1865017414],"action_prob":0.4906687737,"action_logp":-0.7119859457,"action_dist_inputs":[-0.0215130001,0.0158160888],"value_targets":43.6094818115} +{"eps_id":1834071864,"obs":[-0.4574054778,0.0059646056,0.0224095639,0.1865017414],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.4572861791,0.2007588893,0.0261395983,-0.0990284458],"action_prob":0.8096200228,"action_logp":-0.2111902833,"action_dist_inputs":[-0.7246240973,0.7229189277],"value_targets":43.0398788452} +{"eps_id":1834071864,"obs":[-0.4572861791,0.2007588893,0.0261395983,-0.0990284458],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.4532710016,0.3954966366,0.0241590291,-0.3833511174],"action_prob":0.5361455679,"action_logp":-0.623349607,"action_dist_inputs":[-0.0751772299,0.0696576387],"value_targets":42.4645233154} +{"eps_id":1834071864,"obs":[-0.4532710016,0.3954966366,0.0241590291,-0.3833511174],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.4453610778,0.2000401616,0.0164920073,-0.0831498951],"action_prob":0.7949960828,"action_logp":-0.2294180691,"action_dist_inputs":[0.6735557914,-0.6817522049],"value_targets":41.8833580017} +{"eps_id":1834071864,"obs":[-0.4453610778,0.2000401616,0.0164920073,-0.0831498951],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.441360265,0.394921869,0.0148290088,-0.3705842793],"action_prob":0.5448971391,"action_logp":-0.6071581841,"action_dist_inputs":[-0.0927119926,0.0873617306],"value_targets":41.2963218689} +{"eps_id":1834071864,"obs":[-0.441360265,0.394921869,0.0148290088,-0.3705842793],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.4334618449,0.1995924115,0.0074173231,-0.0732627288],"action_prob":0.794184804,"action_logp":-0.2304390967,"action_dist_inputs":[0.671114862,-0.6792225838],"value_targets":40.7033538818} +{"eps_id":1834071864,"obs":[-0.4334618449,0.1995924115,0.0074173231,-0.0732627288],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.4294699728,0.3946072459,0.0059520686,-0.3635962605],"action_prob":0.5449621677,"action_logp":-0.6070389152,"action_dist_inputs":[-0.0927858353,0.0875499099],"value_targets":40.1044006348} +{"eps_id":1834071864,"obs":[-0.4294699728,0.3946072459,0.0059520686,-0.3635962605],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.421577841,0.5896440744,-0.0013198566,-0.6543964744],"action_prob":0.2014188915,"action_logp":-1.602368474,"action_dist_inputs":[0.6847065091,-0.6927432418],"value_targets":39.4993934631} +{"eps_id":1834071864,"obs":[-0.421577841,0.5896440744,-0.0013198566,-0.6543964744],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.4097849429,0.3945405483,-0.0144077856,-0.36212942],"action_prob":0.9223439693,"action_logp":-0.0808370262,"action_dist_inputs":[1.2325900793,-1.242038846],"value_targets":38.8882751465} +{"eps_id":1834071864,"obs":[-0.4097849429,0.3945405483,-0.0144077856,-0.36212942],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.4018941522,0.1996263117,-0.0216503739,-0.0740241706],"action_prob":0.8208504915,"action_logp":-0.1974142641,"action_dist_inputs":[0.7570784092,-0.7650420666],"value_targets":38.2709846497} +{"eps_id":1834071864,"obs":[-0.4018941522,0.1996263117,-0.0216503739,-0.0740241706],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.3979016244,0.0048213233,-0.0231308583,0.2117501199],"action_prob":0.510409832,"action_logp":-0.6725412607,"action_dist_inputs":[0.0182086602,-0.0234367214],"value_targets":37.6474609375} +{"eps_id":1834071864,"obs":[-0.3979016244,0.0048213233,-0.0231308583,0.2117501199],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.3978051841,0.2002662271,-0.0188958552,-0.0881386772],"action_prob":0.7986133695,"action_logp":-0.2248783112,"action_dist_inputs":[-0.6895237565,0.6881265044],"value_targets":37.0176353455} +{"eps_id":1834071864,"obs":[-0.3978051841,0.2002662271,-0.0188958552,-0.0881386772],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.3937998712,0.0054201502,-0.020658629,0.1985232085],"action_prob":0.5301614404,"action_logp":-0.6345737576,"action_dist_inputs":[0.057736896,-0.0630554631],"value_targets":36.3814506531} +{"eps_id":1834071864,"obs":[-0.3937998712,0.0054201502,-0.020658629,0.1985232085],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.3936914802,0.2008313984,-0.0166881643,-0.1006043255],"action_prob":0.7912105322,"action_logp":-0.2341911793,"action_dist_inputs":[-0.6668714881,0.665366292],"value_targets":35.7388381958} +{"eps_id":1834071864,"obs":[-0.3936914802,0.2008313984,-0.0166881643,-0.1006043255],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.3896748424,0.0059525394,-0.0187002514,0.1867671609],"action_prob":0.5482202172,"action_logp":-0.6010782123,"action_dist_inputs":[0.0940443575,-0.0994378775],"value_targets":35.0897369385} +{"eps_id":1834071864,"obs":[-0.3896748424,0.0059525394,-0.0187002514,0.1867671609],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.3895557821,0.2013369799,-0.0149649084,-0.1117558107],"action_prob":0.7840146422,"action_logp":-0.243327558,"action_dist_inputs":[-0.6454082131,0.6438089013],"value_targets":34.4340782166} +{"eps_id":1834071864,"obs":[-0.3895557821,0.2013369799,-0.0149649084,-0.1117558107],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.3855290413,0.3966701329,-0.0172000248,-0.4091222584],"action_prob":0.4350389242,"action_logp":-0.8323197961,"action_dist_inputs":[0.1279323697,-0.1333889514],"value_targets":33.7717971802} +{"eps_id":1834071864,"obs":[-0.3855290413,0.3966701329,-0.0172000248,-0.4091222584],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.3775956333,0.2017962188,-0.0253824685,-0.1219111681],"action_prob":0.8618414402,"action_logp":-0.1486839503,"action_dist_inputs":[0.911324203,-0.9193452001],"value_targets":33.1028251648} +{"eps_id":1834071864,"obs":[-0.3775956333,0.2017962188,-0.0253824685,-0.1219111681],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.3735597134,0.0070469449,-0.0278206933,0.1626569927],"action_prob":0.6038240194,"action_logp":-0.5044724941,"action_dist_inputs":[0.207953006,-0.2134711891],"value_targets":32.4270935059} +{"eps_id":1834071864,"obs":[-0.3735597134,0.0070469449,-0.0278206933,0.1626569927],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.3734187782,0.2025558949,-0.0245675519,-0.1386711597],"action_prob":0.7545286417,"action_logp":-0.2816620469,"action_dist_inputs":[-0.5623806119,0.5605323315],"value_targets":31.7445411682} +{"eps_id":1834071864,"obs":[-0.3734187782,0.2025558949,-0.0245675519,-0.1386711597],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.3693676591,0.3980209529,-0.0273409765,-0.4390023947],"action_prob":0.3683141172,"action_logp":-0.9988191128,"action_dist_inputs":[0.2669161856,-0.2725399435],"value_targets":31.0550918579} +{"eps_id":1834071864,"obs":[-0.3693676591,0.3980209529,-0.0273409765,-0.4390023947],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.3614072502,0.203296423,-0.0361210257,-0.1550620198],"action_prob":0.8869518638,"action_logp":-0.1199645698,"action_dist_inputs":[1.0259435177,-1.0340335369],"value_targets":30.3586788177} +{"eps_id":1834071864,"obs":[-0.3614072502,0.203296423,-0.0361210257,-0.1550620198],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.3573413193,0.0087097641,-0.0392222628,0.1260102987],"action_prob":0.6809782982,"action_logp":-0.3842248321,"action_dist_inputs":[0.3762740493,-0.3819973767],"value_targets":29.6552295685} +{"eps_id":1834071864,"obs":[-0.3573413193,0.0087097641,-0.0392222628,0.1260102987],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.3571671247,-0.1858289838,-0.0367020592,0.4060655832],"action_prob":0.3008545041,"action_logp":-1.2011284828,"action_dist_inputs":[-0.4227332473,0.4204988182],"value_targets":28.9446773529} +{"eps_id":1834071864,"obs":[-0.3571671247,-0.1858289838,-0.0367020592,0.4060655832],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.3608837128,0.0097936848,-0.0285807475,0.1020412222],"action_prob":0.8773434758,"action_logp":-0.1308567375,"action_dist_inputs":[-0.9830203056,0.9844899178],"value_targets":28.2269458771} +{"eps_id":1834071864,"obs":[-0.3608837128,0.0097936848,-0.0285807475,0.1020412222],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.3606878221,-0.1849072576,-0.0265399218,0.3855717778],"action_prob":0.3176135719,"action_logp":-1.1469198465,"action_dist_inputs":[-0.3836023808,0.3811583221],"value_targets":27.5019664764} +{"eps_id":1834071864,"obs":[-0.3606878221,-0.1849072576,-0.0265399218,0.3855717778],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.3643859625,0.0105812158,-0.018828487,0.0846405029],"action_prob":0.8741896152,"action_logp":-0.1344579905,"action_dist_inputs":[-0.9686032534,0.9699180126],"value_targets":26.7696628571} +{"eps_id":1834071864,"obs":[-0.3643859625,0.0105812158,-0.018828487,0.0846405029],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.3641743362,0.2059679329,-0.0171356779,-0.2139228731],"action_prob":0.672750473,"action_logp":-0.3963807523,"action_dist_inputs":[-0.3616200686,0.3590314686],"value_targets":26.0299625397} +{"eps_id":1834071864,"obs":[-0.3641743362,0.2059679329,-0.0171356779,-0.2139228731],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.3600549996,0.0110951047,-0.0214141347,0.0733058155],"action_prob":0.7370087504,"action_logp":-0.3051554859,"action_dist_inputs":[0.5121939182,-0.5182853341],"value_targets":25.2827911377} +{"eps_id":1834071864,"obs":[-0.3600549996,0.0110951047,-0.0214141347,0.0733058155],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.3598330915,0.2065173984,-0.0199480187,-0.2260556966],"action_prob":0.6520262957,"action_logp":-0.4276703596,"action_dist_inputs":[-0.3153265715,0.3126314878],"value_targets":24.5280704498} +{"eps_id":1834071864,"obs":[-0.3598330915,0.2065173984,-0.0199480187,-0.2260556966],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.355702728,0.0116861388,-0.0244691316,0.0602687225],"action_prob":0.7577374578,"action_logp":-0.2774183452,"action_dist_inputs":[0.5670776367,-0.5732371211],"value_targets":23.7657279968} +{"eps_id":1834071864,"obs":[-0.355702728,0.0116861388,-0.0244691316,0.0602687225],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.3554690182,0.2071502209,-0.023263758,-0.2400328368],"action_prob":0.626380682,"action_logp":-0.4677969515,"action_dist_inputs":[-0.2597697973,0.2569512129],"value_targets":22.9956855774} +{"eps_id":1834071864,"obs":[-0.3554690182,0.2071502209,-0.023263758,-0.2400328368],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.3513260186,0.0123681938,-0.0280644149,0.0452222154],"action_prob":0.779837966,"action_logp":-0.2486691028,"action_dist_inputs":[0.6292405128,-0.6354818344],"value_targets":22.2178649902} +{"eps_id":1834071864,"obs":[-0.3513260186,0.0123681938,-0.0280644149,0.0452222154],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.3510786593,-0.1823403239,-0.0271599703,0.328920126],"action_prob":0.4054212272,"action_logp":-0.9028286934,"action_dist_inputs":[-0.1929437816,0.1899827123],"value_targets":21.4321861267} +{"eps_id":1834071864,"obs":[-0.3510786593,-0.1823403239,-0.0271599703,0.328920126],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.3547254503,0.0131575353,-0.0205815677,0.0277974438],"action_prob":0.8508238792,"action_logp":-0.1615501046,"action_dist_inputs":[-0.8701716065,0.8709058166],"value_targets":20.6385707855} +{"eps_id":1834071864,"obs":[-0.3547254503,0.0131575353,-0.0205815677,0.0277974438],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.3544622958,-0.1816633046,-0.0200256184,0.313916266],"action_prob":0.4215820134,"action_logp":-0.863740921,"action_dist_inputs":[-0.1596985161,0.1565838158],"value_targets":19.8369407654} +{"eps_id":1834071864,"obs":[-0.3544622958,-0.1816633046,-0.0200256184,0.313916266],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.3580955565,0.013738106,-0.0137472935,0.0149857718],"action_prob":0.8470184207,"action_logp":-0.1660328209,"action_dist_inputs":[-0.8554001451,0.8560052514],"value_targets":19.0272140503} +{"eps_id":1834071864,"obs":[-0.3580955565,0.013738106,-0.0137472935,0.0149857718],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.3578208089,0.209054485,-0.0134475781,-0.2820026875],"action_prob":0.568951726,"action_logp":-0.5639597178,"action_dist_inputs":[-0.1404005438,0.1371749789],"value_targets":18.2093067169} +{"eps_id":1834071864,"obs":[-0.3578208089,0.209054485,-0.0134475781,-0.2820026875],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.3536397219,0.404365629,-0.0190876313,-0.5788964033],"action_prob":0.1873145252,"action_logp":-1.6749660969,"action_dist_inputs":[0.7305271029,-0.7370278835],"value_targets":17.3831367493} +{"eps_id":1834071864,"obs":[-0.3536397219,0.404365629,-0.0190876313,-0.5788964033],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.3455524147,0.2095163167,-0.0306655597,-0.2922870815],"action_prob":0.9305208325,"action_logp":-0.0720108226,"action_dist_inputs":[1.2930152416,-1.3017021418],"value_targets":16.5486240387} +{"eps_id":1834071864,"obs":[-0.3455524147,0.2095163167,-0.0306655597,-0.2922870815],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.3413620889,0.0148447212,-0.036511302,-0.0094312374],"action_prob":0.8403938413,"action_logp":-0.1738846302,"action_dist_inputs":[0.8272957206,-0.8338659406],"value_targets":15.7056808472} +{"eps_id":1834071864,"obs":[-0.3413620889,0.0148447212,-0.036511302,-0.0094312374],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.3410651982,-0.1797350943,-0.0366999283,0.2715120316],"action_prob":0.5241225958,"action_logp":-0.6460297108,"action_dist_inputs":[0.0465522073,-0.0500130951],"value_targets":14.8542232513} +{"eps_id":1834071864,"obs":[-0.3410651982,-0.1797350943,-0.0366999283,0.2715120316],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.3446598947,-0.3743146658,-0.0312696844,0.5523974895],"action_prob":0.1918755174,"action_logp":-1.6509084702,"action_dist_inputs":[-0.718885541,0.7189836502],"value_targets":13.9941644669} +{"eps_id":1834071864,"obs":[-0.3446598947,-0.3743146658,-0.0312696844,0.5523974895],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.3521461785,-0.178767845,-0.0202217363,0.2500288785],"action_prob":0.9102656841,"action_logp":-0.0940187648,"action_dist_inputs":[-1.1566342115,1.1602489948],"value_targets":13.125418663} +{"eps_id":1834071864,"obs":[-0.3521461785,-0.178767845,-0.0202217363,0.2500288785],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.3557215333,0.0166369546,-0.015221159,-0.0489631966],"action_prob":0.8049887419,"action_logp":-0.2169270068,"action_dist_inputs":[-0.7089288235,0.7088421583],"value_targets":12.2478981018} +{"eps_id":1834071864,"obs":[-0.3557215333,0.0166369546,-0.015221159,-0.0489631966],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.3553887904,0.2119738162,-0.0162004232,-0.3464094102],"action_prob":0.4460864365,"action_logp":-0.8072425127,"action_dist_inputs":[0.1063382253,-0.1101575568],"value_targets":11.3615131378} +{"eps_id":1834071864,"obs":[-0.3553887904,0.2119738162,-0.0162004232,-0.3464094102],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.3511493206,0.0170860104,-0.0231286101,-0.0588787608],"action_prob":0.8663936853,"action_logp":-0.1434158981,"action_dist_inputs":[0.9312555194,-0.9381864667],"value_targets":10.4661741257} +{"eps_id":1834071864,"obs":[-0.3511493206,0.0170860104,-0.0231286101,-0.0588787608],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.3508076072,0.2125318199,-0.0243061855,-0.3587682545],"action_prob":0.4106036127,"action_logp":-0.8901270032,"action_dist_inputs":[0.1787768304,-0.1826938689],"value_targets":9.5617923737} +{"eps_id":1834071864,"obs":[-0.3508076072,0.2125318199,-0.0243061855,-0.3587682545],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.3465569615,0.017763678,-0.0314815491,-0.0738476738],"action_prob":0.8794109821,"action_logp":-0.1285029501,"action_dist_inputs":[0.9899180532,-0.9969460368],"value_targets":8.6482753754} +{"eps_id":1834071864,"obs":[-0.3465569615,0.017763678,-0.0314815491,-0.0738476738],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.3462016881,-0.1768931299,-0.0329585038,0.2087388188],"action_prob":0.6340967417,"action_logp":-0.4555537403,"action_dist_inputs":[0.2728850842,-0.2769475281],"value_targets":7.7255306244} +{"eps_id":1834071864,"obs":[-0.3462016881,-0.1768931299,-0.0329585038,0.2087388188],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.3497395515,0.0186842065,-0.0287837274,-0.0941559151],"action_prob":0.7519274354,"action_logp":-0.2851154208,"action_dist_inputs":[-0.5547396541,0.554179132],"value_targets":6.7934651375} +{"eps_id":1834071864,"obs":[-0.3497395515,0.0186842065,-0.0287837274,-0.0941559151],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.3493658602,0.214206636,-0.0306668468,-0.3957793117],"action_prob":0.3361322582,"action_logp":-1.0902506113,"action_dist_inputs":[0.3381620944,-0.3424162567],"value_targets":5.8519849777} +{"eps_id":1834071864,"obs":[-0.3493658602,0.214206636,-0.0306668468,-0.3957793117],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.3450817466,0.0195329189,-0.038582433,-0.1129206866],"action_prob":0.9001560211,"action_logp":-0.1051871702,"action_dist_inputs":[1.0958254337,-1.10313344],"value_targets":4.9009947777} +{"eps_id":1834071864,"obs":[-0.3450817466,0.0195329189,-0.038582433,-0.1129206866],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.3446910679,-0.1750155538,-0.0408408456,0.1673444211],"action_prob":0.7120876908,"action_logp":-0.3395542204,"action_dist_inputs":[0.4505507052,-0.4549944699],"value_targets":3.9403989315} +{"eps_id":1834071864,"obs":[-0.3446910679,-0.1750155538,-0.0408408456,0.1673444211],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.3481913805,0.0206664801,-0.0374939591,-0.1379377395],"action_prob":0.6867117286,"action_logp":-0.3758406937,"action_dist_inputs":[-0.392893672,0.3918971419],"value_targets":2.970099926} +{"eps_id":1834071864,"obs":[-0.3481913805,0.0206664801,-0.0374939591,-0.1379377395],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.3477780521,-0.1738989502,-0.0402527116,0.1426846534],"action_prob":0.7458788157,"action_logp":-0.2931921482,"action_dist_inputs":[0.5360322595,-0.5407195687],"value_targets":1.9900000095} +{"eps_id":1834071864,"obs":[-0.3477780521,-0.1738989502,-0.0402527116,0.1426846534],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":true,"new_obs":[-0.3512560427,0.0217756778,-0.03739902,-0.1624206305],"action_prob":0.6494523287,"action_logp":-0.4316258729,"action_dist_inputs":[-0.3089400828,0.3076924682],"value_targets":1.0} +{"eps_id":1462230943,"obs":[-0.0179679319,0.0256773084,0.0438232161,0.0373748355],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":0.0,"dones":false,"new_obs":[-0.0174543858,0.2201443315,0.0445707142,-0.2411657274],"action_prob":0.7099567056,"action_logp":-0.3425512612,"action_dist_inputs":[-0.4474087656,0.4477650225],"value_targets":86.6020355225} +{"eps_id":1462230943,"obs":[-0.0174543858,0.2201443315,0.0445707142,-0.2411657274],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0130514996,0.0244149696,0.0397473983,0.065236181],"action_prob":0.7134503722,"action_logp":-0.3376424313,"action_dist_inputs":[0.4544297457,-0.4577713311],"value_targets":86.4666976929} +{"eps_id":1462230943,"obs":[-0.0130514996,0.0244149696,0.0397473983,0.065236181],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0125631997,0.2189451754,0.0410521217,-0.2146459967],"action_prob":0.7441474795,"action_logp":-0.2955160141,"action_dist_inputs":[-0.5334656835,0.5341723561],"value_targets":86.3300018311} +{"eps_id":1462230943,"obs":[-0.0125631997,0.2189451754,0.0410521217,-0.2146459967],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.008184297,0.0232610982,0.0367592014,0.0906987488],"action_prob":0.6759823561,"action_logp":-0.3915882707,"action_dist_inputs":[0.3661489785,-0.3692201078],"value_targets":86.1919174194} +{"eps_id":1462230943,"obs":[-0.008184297,0.0232610982,0.0367592014,0.0906987488],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0077190748,0.2178374082,0.0385731794,-0.1901637763],"action_prob":0.7724383473,"action_logp":-0.2582030594,"action_dist_inputs":[-0.610545814,0.6115854979],"value_targets":86.052444458} +{"eps_id":1462230943,"obs":[-0.0077190748,0.2178374082,0.0385731794,-0.1901637763],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0033623264,0.4123868942,0.0347699039,-0.4704332352],"action_prob":0.3634897172,"action_logp":-1.0120042562,"action_dist_inputs":[0.2787219584,-0.2815275788],"value_targets":85.9115600586} +{"eps_id":1462230943,"obs":[-0.0033623264,0.4123868942,0.0347699039,-0.4704332352],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0048854118,0.2167915106,0.025361238,-0.1669971794],"action_prob":0.8834188581,"action_logp":-0.1239558309,"action_dist_inputs":[1.0097507238,-1.0154612064],"value_targets":85.7692489624} +{"eps_id":1462230943,"obs":[0.0048854118,0.2167915106,0.025361238,-0.1669971794],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0092212418,0.4115414321,0.0220212936,-0.4515727162],"action_prob":0.3789343834,"action_logp":-0.9703922272,"action_dist_inputs":[0.2457629144,-0.2483108044],"value_targets":85.62550354} +{"eps_id":1462230943,"obs":[0.0092212418,0.4115414321,0.0220212936,-0.4515727162],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0174520705,0.2161150724,0.0129898395,-0.1520305127],"action_prob":0.881675005,"action_logp":-0.1259317398,"action_dist_inputs":[1.0014166832,-1.0069719553],"value_targets":85.4803085327} +{"eps_id":1462230943,"obs":[0.0174520705,0.2161150724,0.0129898395,-0.1520305127],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0217743721,0.020809548,0.0099492297,0.1447219551],"action_prob":0.6196388602,"action_logp":-0.4786184132,"action_dist_inputs":[0.2428257614,-0.2451899648],"value_targets":85.3336486816} +{"eps_id":1462230943,"obs":[0.0217743721,0.020809548,0.0099492297,0.1447219551],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0221905634,0.2157876194,0.0128436685,-0.1448056549],"action_prob":0.7990463972,"action_logp":-0.2243362665,"action_dist_inputs":[-0.6893072128,0.6910379529],"value_targets":85.1855010986} +{"eps_id":1462230943,"obs":[0.0221905634,0.2157876194,0.0128436685,-0.1448056549],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0265063141,0.0204841066,0.0099475551,0.1519013792],"action_prob":0.6061498523,"action_logp":-0.5006279945,"action_dist_inputs":[0.2144549936,-0.2167018354],"value_targets":85.0358581543} +{"eps_id":1462230943,"obs":[0.0265063141,0.0204841066,0.0099475551,0.1519013792],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0269159973,0.2154622227,0.0129855834,-0.1376267821],"action_prob":0.8061217666,"action_logp":-0.2155204564,"action_dist_inputs":[-0.7115718722,0.7134329081],"value_targets":84.8847045898} +{"eps_id":1462230943,"obs":[0.0269159973,0.2154622227,0.0129855834,-0.1376267821],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0312252417,0.4103958011,0.0102330474,-0.426184833],"action_prob":0.4081138968,"action_logp":-0.8962090015,"action_dist_inputs":[0.1848204434,-0.1869475394],"value_targets":84.7320251465} +{"eps_id":1462230943,"obs":[0.0312252417,0.4103958011,0.0102330474,-0.426184833],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0394331552,0.2151304036,0.001709351,-0.1302936226],"action_prob":0.8758958578,"action_logp":-0.1325080693,"action_dist_inputs":[0.9744291306,-0.9796971083],"value_targets":84.5778045654} +{"eps_id":1462230943,"obs":[0.0394331552,0.2151304036,0.001709351,-0.1302936226],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0437357649,0.0199840069,-0.0008965214,0.1629280895],"action_prob":0.6033836603,"action_logp":-0.5052019954,"action_dist_inputs":[0.2087853551,-0.210798651],"value_targets":84.4220275879} +{"eps_id":1462230943,"obs":[0.0437357649,0.0199840069,-0.0008965214,0.1629280895],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0441354439,0.2151187807,0.0023620406,-0.1300375313],"action_prob":0.804894805,"action_logp":-0.2170436829,"action_dist_inputs":[-0.7075622082,0.7096105218],"value_targets":84.2646713257} +{"eps_id":1462230943,"obs":[0.0441354439,0.2151187807,0.0023620406,-0.1300375313],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0484378226,0.4102068245,-0.0002387101,-0.4219743311],"action_prob":0.3980232477,"action_logp":-0.9212448597,"action_dist_inputs":[0.205872938,-0.2078355104],"value_targets":84.1057281494} +{"eps_id":1462230943,"obs":[0.0484378226,0.4102068245,-0.0002387101,-0.4219743311],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0566419587,0.2150882483,-0.0086781969,-0.1293666661],"action_prob":0.8802397251,"action_logp":-0.1275609881,"action_dist_inputs":[0.994767487,-0.9999346137],"value_targets":83.9451828003} +{"eps_id":1462230943,"obs":[0.0566419587,0.2150882483,-0.0086781969,-0.1293666661],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0609437227,0.4103334248,-0.0112655293,-0.424774766],"action_prob":0.3740382493,"action_logp":-0.9833972454,"action_dist_inputs":[0.2565077543,-0.2584235966],"value_targets":83.7830123901} +{"eps_id":1462230943,"obs":[0.0609437227,0.4103334248,-0.0112655293,-0.424774766],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0691503882,0.6056131124,-0.0197610259,-0.7209877372],"action_prob":0.1122498661,"action_logp":-2.1870279312,"action_dist_inputs":[1.0313991308,-1.0365637541],"value_targets":83.6192016602} +{"eps_id":1462230943,"obs":[0.0691503882,0.6056131124,-0.0197610259,-0.7209877372],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0812626556,0.4107700586,-0.034180779,-0.434589535],"action_prob":0.9415340424,"action_logp":-0.0602447614,"action_dist_inputs":[1.3856892586,-1.3933762312],"value_targets":83.453742981} +{"eps_id":1462230943,"obs":[0.0812626556,0.4107700586,-0.034180779,-0.434589535],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0894780532,0.2161482722,-0.0428725705,-0.1528745592],"action_prob":0.9020014405,"action_logp":-0.1031391621,"action_dist_inputs":[1.1072056293,-1.1124575138],"value_targets":83.286605835} +{"eps_id":1462230943,"obs":[0.0894780532,0.2161482722,-0.0428725705,-0.1528745592],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0938010216,0.4118570387,-0.0459300615,-0.458768636],"action_prob":0.2660489976,"action_logp":-1.3240747452,"action_dist_inputs":[0.5063394308,-0.5084222555],"value_targets":83.1177825928} +{"eps_id":1462230943,"obs":[0.0938010216,0.4118570387,-0.0459300615,-0.458768636],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.10203816,0.2174134254,-0.0551054366,-0.1809095144],"action_prob":0.9130951762,"action_logp":-0.0909151733,"action_dist_inputs":[1.1732853651,-1.1787413359],"value_targets":82.9472579956} +{"eps_id":1462230943,"obs":[0.10203816,0.2174134254,-0.0551054366,-0.1809095144],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.1063864306,0.0231215376,-0.0587236248,0.0938931406],"action_prob":0.7883171439,"action_logp":-0.2378548086,"action_dist_inputs":[0.6562353969,-0.6585757136],"value_targets":82.7750091553} +{"eps_id":1462230943,"obs":[0.1063864306,0.0231215376,-0.0587236248,0.0938931406],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.1068488583,-0.1711117178,-0.0568457618,0.3674860001],"action_prob":0.4019159377,"action_logp":-0.9115123153,"action_dist_inputs":[-0.1980731189,0.1994151622],"value_targets":82.601020813} +{"eps_id":1462230943,"obs":[0.1068488583,-0.1711117178,-0.0568457618,0.3674860001],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.1034266278,0.0247699805,-0.0494960435,0.0574343987],"action_prob":0.8714959025,"action_logp":-0.1375441402,"action_dist_inputs":[-0.954787612,0.9594630003],"value_targets":82.4252700806} +{"eps_id":1462230943,"obs":[0.1034266278,0.0247699805,-0.0494960435,0.0574343987],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.1039220244,0.2205653936,-0.0483473539,-0.2504449189],"action_prob":0.5434340239,"action_logp":-0.6098469496,"action_dist_inputs":[-0.0865973979,0.0875777155],"value_targets":82.2477493286} +{"eps_id":1462230943,"obs":[0.1039220244,0.2205653936,-0.0483473539,-0.2504449189],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.1083333343,0.4163432121,-0.0533562526,-0.5579770803],"action_prob":0.1551340967,"action_logp":-1.8634654284,"action_dist_inputs":[0.8459492326,-0.848938942],"value_targets":82.0684280396} +{"eps_id":1462230943,"obs":[0.1083333343,0.4163432121,-0.0533562526,-0.5579770803],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.1166602001,0.2220092714,-0.0645157918,-0.2825703025],"action_prob":0.9320892096,"action_logp":-0.0703267604,"action_dist_inputs":[1.3064762354,-1.3127570152],"value_targets":81.8873062134} +{"eps_id":1462230943,"obs":[0.1166602001,0.2220092714,-0.0645157918,-0.2825703025],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.121100381,0.027864024,-0.0701671988,-0.0109127946],"action_prob":0.8746547103,"action_logp":-0.1339260638,"action_dist_inputs":[0.9697031379,-0.9730541706],"value_targets":81.7043457031} +{"eps_id":1462230943,"obs":[0.121100381,0.027864024,-0.0701671988,-0.0109127946],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.1216576621,0.2239184529,-0.0703854561,-0.324883014],"action_prob":0.3553231955,"action_logp":-1.0347274542,"action_dist_inputs":[0.2979745865,-0.2977466285],"value_targets":81.5195465088} +{"eps_id":1462230943,"obs":[0.1216576621,0.2239184529,-0.0703854561,-0.324883014],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.1261360347,0.419968307,-0.0768831149,-0.6389059424],"action_prob":0.1054088473,"action_logp":-2.2499086857,"action_dist_inputs":[1.0673543215,-1.0711658001],"value_targets":81.3328704834} +{"eps_id":1462230943,"obs":[0.1261360347,0.419968307,-0.0768831149,-0.6389059424],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.1345354021,0.2259977758,-0.0896612331,-0.3713909984],"action_prob":0.9412674904,"action_logp":-0.0605279021,"action_dist_inputs":[1.3835904598,-1.3906433582],"value_targets":81.144317627} +{"eps_id":1462230943,"obs":[0.1345354021,0.2259977758,-0.0896612331,-0.3713909984],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.1390553564,0.0322564319,-0.0970890522,-0.10827142],"action_prob":0.9126083255,"action_logp":-0.0914485008,"action_dist_inputs":[1.1707646847,-1.1751419306],"value_targets":80.9538574219} +{"eps_id":1462230943,"obs":[0.1390553564,0.0322564319,-0.0970890522,-0.10827142],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.1397004873,0.2286259234,-0.0992544815,-0.4299378395],"action_prob":0.1870899349,"action_logp":-1.6761658192,"action_dist_inputs":[0.7340044379,-0.735026598],"value_targets":80.76146698} +{"eps_id":1462230943,"obs":[0.1397004873,0.2286259234,-0.0992544815,-0.4299378395],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.144272998,0.0350393131,-0.1078532413,-0.1701200008],"action_prob":0.9240840673,"action_logp":-0.0789522082,"action_dist_inputs":[1.2470713854,-1.2521047592],"value_targets":80.5671386719} +{"eps_id":1462230943,"obs":[0.144272998,0.0350393131,-0.1078532413,-0.1701200008],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.1449737847,0.2315264344,-0.1112556383,-0.4947855771],"action_prob":0.1381648779,"action_logp":-1.9793075323,"action_dist_inputs":[0.9143746495,-0.9162416458],"value_targets":80.3708496094} +{"eps_id":1462230943,"obs":[0.1449737847,0.2315264344,-0.1112556383,-0.4947855771],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.1496043205,0.0381348394,-0.1211513504,-0.239133358],"action_prob":0.9321773648,"action_logp":-0.0702321976,"action_dist_inputs":[1.3074411154,-1.3131847382],"value_targets":80.1725769043} +{"eps_id":1462230943,"obs":[0.1496043205,0.0381348394,-0.1211513504,-0.239133358],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.1503670067,-0.1550668329,-0.1259340197,0.0130135184],"action_prob":0.8934739232,"action_logp":-0.112638101,"action_dist_inputs":[1.0619566441,-1.0647706985],"value_targets":79.9722976685} +{"eps_id":1462230943,"obs":[0.1503670067,-0.1550668329,-0.1259340197,0.0130135184],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.1472656727,0.0416150987,-0.125673756,-0.3165998459],"action_prob":0.2319051921,"action_logp":-1.4614266157,"action_dist_inputs":[0.5991538167,-0.5984306931],"value_targets":79.7699966431} +{"eps_id":1462230943,"obs":[0.1472656727,0.0416150987,-0.125673756,-0.3165998459],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.148097977,-0.1515136659,-0.1320057511,-0.0660422891],"action_prob":0.911418736,"action_logp":-0.0927528217,"action_dist_inputs":[1.163646698,-1.1674351692],"value_targets":79.5656509399} +{"eps_id":1462230943,"obs":[0.148097977,-0.1515136659,-0.1320057511,-0.0660422891],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.1450677067,-0.3445202708,-0.1333265901,0.1822517812],"action_prob":0.8363751769,"action_logp":-0.178677991,"action_dist_inputs":[0.8154958487,-0.8160051107],"value_targets":79.3592453003} +{"eps_id":1462230943,"obs":[0.1450677067,-0.3445202708,-0.1333265901,0.1822517812],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.1381773055,-0.147767514,-0.1296815574,-0.1493401378],"action_prob":0.3858141899,"action_logp":-0.9523993731,"action_dist_inputs":[0.2341075242,-0.2308340222],"value_targets":79.1507568359} +{"eps_id":1462230943,"obs":[0.1381773055,-0.147767514,-0.1296815574,-0.1493401378],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.1352219433,-0.3408170044,-0.1326683611,0.0997829884],"action_prob":0.8737055659,"action_logp":-0.135011822,"action_dist_inputs":[0.9661996961,-0.9679279923],"value_targets":78.9401550293} +{"eps_id":1462230943,"obs":[0.1352219433,-0.3408170044,-0.1326683611,0.0997829884],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.1284056157,-0.5338127613,-0.1306726933,0.3478424549],"action_prob":0.7287333608,"action_logp":-0.3164473474,"action_dist_inputs":[0.4950867891,-0.4931189418],"value_targets":78.727432251} +{"eps_id":1462230943,"obs":[0.1284056157,-0.5338127613,-0.1306726933,0.3478424549],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.1177293584,-0.337097764,-0.1237158477,0.0169771295],"action_prob":0.5798556209,"action_logp":-0.5449761748,"action_dist_inputs":[-0.158341065,0.1638395935],"value_targets":78.5125579834} +{"eps_id":1462230943,"obs":[0.1177293584,-0.337097764,-0.1237158477,0.0169771295],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.1109874025,-0.5302483439,-0.1233763099,0.2682088315],"action_prob":0.7977923751,"action_logp":-0.2259069085,"action_dist_inputs":[0.6865790486,-0.6859741807],"value_targets":78.2955093384} +{"eps_id":1462230943,"obs":[0.1109874025,-0.5302483439,-0.1233763099,0.2682088315],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.1003824323,-0.3336014152,-0.1180121303,-0.0607000701],"action_prob":0.4547459483,"action_logp":-0.7880163789,"action_dist_inputs":[0.0930162072,-0.088496685],"value_targets":78.0762710571} +{"eps_id":1462230943,"obs":[0.1003824323,-0.3336014152,-0.1180121303,-0.0607000701],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0937104076,-0.5268509984,-0.1192261353,0.192543],"action_prob":0.8416672349,"action_logp":-0.1723705232,"action_dist_inputs":[0.835005641,-0.8356805444],"value_targets":77.8548202515} +{"eps_id":1462230943,"obs":[0.0937104076,-0.5268509984,-0.1192261353,0.192543],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0831733868,-0.7200834751,-0.1153752729,0.4453646243],"action_prob":0.654943347,"action_logp":-0.423206538,"action_dist_inputs":[0.3220710456,-0.3187690377],"value_targets":77.6311340332} +{"eps_id":1462230943,"obs":[0.0831733868,-0.7200834751,-0.1153752729,0.4453646243],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0687717125,-0.9134002328,-0.1064679772,0.6995667815],"action_prob":0.3425458074,"action_logp":-1.0713498592,"action_dist_inputs":[-0.3227023184,0.3292672932],"value_targets":77.4051818848} +{"eps_id":1462230943,"obs":[0.0687717125,-0.9134002328,-0.1064679772,0.6995667815],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0505037084,-0.7169759274,-0.0924766436,0.3753561676],"action_prob":0.8550729752,"action_logp":-0.1565684527,"action_dist_inputs":[-0.8834146857,0.8915417194],"value_targets":77.1769561768} +{"eps_id":1462230943,"obs":[0.0505037084,-0.7169759274,-0.0924766436,0.3753561676],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0361641906,-0.5206704736,-0.0849695206,0.0550067723],"action_prob":0.5881713629,"action_logp":-0.5307369828,"action_dist_inputs":[-0.1752707958,0.181140244],"value_targets":76.9464187622} +{"eps_id":1462230943,"obs":[0.0361641906,-0.5206704736,-0.0849695206,0.0550067723],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0257507805,-0.324439466,-0.0838693827,-0.2632288039],"action_prob":0.2330512404,"action_logp":-1.456496954,"action_dist_inputs":[0.5960648656,-0.5950968862],"value_targets":76.7135543823} +{"eps_id":1462230943,"obs":[0.0257507805,-0.324439466,-0.0838693827,-0.2632288039],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0192619916,-0.5182703733,-0.089133963,0.0018668768],"action_prob":0.8950684071,"action_logp":-0.1108551249,"action_dist_inputs":[1.0700243711,-1.0735670328],"value_targets":76.4783401489} +{"eps_id":1462230943,"obs":[0.0192619916,-0.5182703733,-0.089133963,0.0018668768],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0088965846,-0.7120084167,-0.0890966207,0.2651496232],"action_prob":0.8090465665,"action_logp":-0.2118988335,"action_dist_inputs":[0.7218617797,-0.7219651937],"value_targets":76.2407455444} +{"eps_id":1462230943,"obs":[0.0088965846,-0.7120084167,-0.0890966207,0.2651496232],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0053435843,-0.5157352686,-0.0837936327,-0.0542521179],"action_prob":0.4168017805,"action_logp":-0.8751444817,"action_dist_inputs":[0.1700187176,-0.1658975929],"value_targets":76.0007553101} +{"eps_id":1462230943,"obs":[-0.0053435843,-0.5157352686,-0.0837936327,-0.0542521179],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0156582892,-0.7095620036,-0.0848786756,0.2108613849],"action_prob":0.8366549015,"action_logp":-0.1783436239,"action_dist_inputs":[0.8162264824,-0.8173200488],"value_targets":75.7583389282} +{"eps_id":1462230943,"obs":[-0.0156582892,-0.7095620036,-0.0848786756,0.2108613849],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0298495293,-0.5133354664,-0.0806614459,-0.1073431522],"action_prob":0.3479067087,"action_logp":-1.0558209419,"action_dist_inputs":[0.3156706393,-0.312582612],"value_targets":75.5134735107} +{"eps_id":1462230943,"obs":[-0.0298495293,-0.5133354664,-0.0806614459,-0.1073431522],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0401162393,-0.7072144747,-0.0828083083,0.1588409841],"action_prob":0.8574143052,"action_logp":-0.1538340449,"action_dist_inputs":[0.895994246,-0.8979837298],"value_targets":75.26612854} +{"eps_id":1462230943,"obs":[-0.0401162393,-0.7072144747,-0.0828083083,0.1588409841],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0542605296,-0.5110105872,-0.0796314925,-0.1587741673],"action_prob":0.2900991142,"action_logp":-1.2375326157,"action_dist_inputs":[0.4484626949,-0.4464399219],"value_targets":75.0162963867} +{"eps_id":1462230943,"obs":[-0.0542605296,-0.5110105872,-0.0796314925,-0.1587741673],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0644807369,-0.7049074769,-0.0828069746,0.1077626273],"action_prob":0.8735517859,"action_logp":-0.1351878345,"action_dist_inputs":[0.9649625421,-0.9677722454],"value_targets":74.7639312744} +{"eps_id":1462230943,"obs":[-0.0644807369,-0.7049074769,-0.0828069746,0.1077626273],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0785788894,-0.8987511992,-0.0806517228,0.3732144237],"action_prob":0.757493794,"action_logp":-0.2777399719,"action_dist_inputs":[0.5699708462,-0.5690171719],"value_targets":74.5090255737} +{"eps_id":1462230943,"obs":[-0.0785788894,-0.8987511992,-0.0806517228,0.3732144237],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0965539142,-1.0926403999,-0.0731874332,0.6394168735],"action_prob":0.4977467358,"action_logp":-0.6976639032,"action_dist_inputs":[-0.001956143,0.0070569469],"value_targets":74.2515411377} +{"eps_id":1462230943,"obs":[-0.0965539142,-1.0926403999,-0.0731874332,0.6394168735],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1184067205,-0.8965784311,-0.0603990965,0.3246133327],"action_prob":0.7726215124,"action_logp":-0.2579660118,"action_dist_inputs":[-0.607762754,0.6154106259],"value_targets":73.9914550781} +{"eps_id":1462230943,"obs":[-0.1184067205,-0.8965784311,-0.0603990965,0.3246133327],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1363382936,-1.0907907486,-0.0539068282,0.597653985],"action_prob":0.535112381,"action_logp":-0.6252785325,"action_dist_inputs":[0.0725166127,-0.0681643635],"value_targets":73.7287445068} +{"eps_id":1462230943,"obs":[-0.1363382936,-1.0907907486,-0.0539068282,0.597653985],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1581541002,-0.8949574828,-0.0419537462,0.2884899974],"action_prob":0.755439043,"action_logp":-0.2804562151,"action_dist_inputs":[-0.5602791309,0.5675551295],"value_targets":73.4633712769} +{"eps_id":1462230943,"obs":[-0.1581541002,-0.8949574828,-0.0419537462,0.2884899974],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1760532558,-0.6992631555,-0.0361839496,-0.0171239804],"action_prob":0.4442155063,"action_logp":-0.8114454746,"action_dist_inputs":[0.1139073446,-0.1101633534],"value_targets":73.1953277588} +{"eps_id":1462230943,"obs":[-0.1760532558,-0.6992631555,-0.0361839496,-0.0171239804],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1900385171,-0.893848002,-0.0365264267,0.2639264762],"action_prob":0.810146451,"action_logp":-0.210540235,"action_dist_inputs":[0.7248714566,-0.7260907888],"value_targets":72.9245758057} +{"eps_id":1462230943,"obs":[-0.1900385171,-0.893848002,-0.0365264267,0.2639264762],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2079154849,-1.088430047,-0.0312478971,0.5448685288],"action_prob":0.5804645419,"action_logp":-0.543926537,"action_dist_inputs":[0.1639370769,-0.160743773],"value_targets":72.6510848999} +{"eps_id":1462230943,"obs":[-0.2079154849,-1.088430047,-0.0312478971,0.5448685288],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2296840847,-1.2830992937,-0.020350527,0.8275444508],"action_prob":0.2680307627,"action_logp":-1.3166534901,"action_dist_inputs":[-0.4990212917,0.5056154728],"value_targets":72.3748321533} +{"eps_id":1462230943,"obs":[-0.2296840847,-1.2830992937,-0.020350527,0.8275444508],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2553460598,-1.0877051353,-0.0037996385,0.5285311937],"action_prob":0.8871191144,"action_logp":-0.1197760254,"action_dist_inputs":[-1.0267227888,1.0349229574],"value_targets":72.0957946777} +{"eps_id":1462230943,"obs":[-0.2553460598,-1.0877051353,-0.0037996385,0.5285311937],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2771001756,-0.8925299644,0.0067709852,0.2346534133],"action_prob":0.7547555566,"action_logp":-0.2813613117,"action_dist_inputs":[-0.5589082241,0.5652304292],"value_targets":71.8139266968} +{"eps_id":1462230943,"obs":[-0.2771001756,-0.8925299644,0.0067709852,0.2346534133],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2949507833,-1.0877479315,0.0114640538,0.5294644237],"action_prob":0.5429249406,"action_logp":-0.6107842326,"action_dist_inputs":[0.0874075815,-0.084715873],"value_targets":71.5292205811} +{"eps_id":1462230943,"obs":[-0.2949507833,-1.0877479315,0.0114640538,0.5294644237],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.3167057335,-0.892789185,0.0220533423,0.2404157519],"action_prob":0.7788149118,"action_logp":-0.2499818206,"action_dist_inputs":[-0.6263149381,0.6324588656],"value_targets":71.2416381836} +{"eps_id":1462230943,"obs":[-0.3167057335,-0.892789185,0.0220533423,0.2404157519],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.334561497,-1.0882190466,0.0268616565,0.5399725437],"action_prob":0.5005661845,"action_logp":-0.6920154691,"action_dist_inputs":[0.0024802624,0.0002155537],"value_targets":70.9511489868} +{"eps_id":1462230943,"obs":[-0.334561497,-1.0882190466,0.0268616565,0.5399725437],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.3563258946,-0.8934848309,0.0376611054,0.2558728755],"action_prob":0.8093373179,"action_logp":-0.2115395069,"action_dist_inputs":[-0.7198444009,0.7258653641],"value_targets":70.6577301025} +{"eps_id":1462230943,"obs":[-0.3563258946,-0.8934848309,0.0376611054,0.2558728755],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.3741955757,-0.6989202499,0.0427785628,-0.0246972758],"action_prob":0.5607551336,"action_logp":-0.5784709454,"action_dist_inputs":[-0.1207070276,0.1235202923],"value_targets":70.3613433838} +{"eps_id":1462230943,"obs":[-0.3741955757,-0.6989202499,0.0427785628,-0.0246972758],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.3881739974,-0.8946287632,0.0422846191,0.281169951],"action_prob":0.7426037788,"action_logp":-0.2975926697,"action_dist_inputs":[0.5289933681,-0.5305523872],"value_targets":70.061958313} +{"eps_id":1462230943,"obs":[-0.3881739974,-0.8946287632,0.0422846191,0.281169951],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.4060665667,-0.7001346946,0.0479080193,0.0021175875],"action_prob":0.6165756583,"action_logp":-0.4835742414,"action_dist_inputs":[-0.2360382527,0.2390005589],"value_targets":69.7595596313} +{"eps_id":1462230943,"obs":[-0.4060665667,-0.7001346946,0.0479080193,0.0021175875],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.4200692773,-0.8959097862,0.0479503684,0.309522897],"action_prob":0.7030380368,"action_logp":-0.3523442447,"action_dist_inputs":[0.4302716255,-0.4315355122],"value_targets":69.4540939331} +{"eps_id":1462230943,"obs":[-0.4200692773,-0.8959097862,0.0479503684,0.309522897],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.4379874468,-1.0916810036,0.0541408285,0.6169341803],"action_prob":0.3244143426,"action_logp":-1.1257337332,"action_dist_inputs":[-0.3652250767,0.3683333099],"value_targets":69.1455535889} +{"eps_id":1462230943,"obs":[-0.4379874468,-1.0916810036,0.0541408285,0.6169341803],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.4598210752,-0.8973554969,0.0664795116,0.3417826891],"action_prob":0.8832247257,"action_logp":-0.1241755933,"action_dist_inputs":[-1.0086933374,1.0146355629],"value_targets":68.8338928223} +{"eps_id":1462230943,"obs":[-0.4598210752,-0.8973554969,0.0664795116,0.3417826891],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.4777681828,-0.7032392621,0.0733151659,0.0707821101],"action_prob":0.7510935664,"action_logp":-0.2862250507,"action_dist_inputs":[-0.5505979061,0.5538551807],"value_targets":68.5190811157} +{"eps_id":1462230943,"obs":[-0.4777681828,-0.7032392621,0.0733151659,0.0707821101],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.4918329716,-0.5092408061,0.0747308061,-0.1978985071],"action_prob":0.454046756,"action_logp":-0.7895550728,"action_dist_inputs":[0.0919630826,-0.092370145],"value_targets":68.2010955811} +{"eps_id":1462230943,"obs":[-0.4918329716,-0.5092408061,0.0747308061,-0.1978985071],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.502017796,-0.7053476572,0.0707728341,0.1173911989],"action_prob":0.8096151352,"action_logp":-0.2111962587,"action_dist_inputs":[0.7216701508,-0.7258415818],"value_targets":67.8798904419} +{"eps_id":1462230943,"obs":[-0.502017796,-0.7053476572,0.0707728341,0.1173911989],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.5161247253,-0.9014085531,0.0731206611,0.4315361381],"action_prob":0.4648087323,"action_logp":-0.7661292553,"action_dist_inputs":[-0.0704800785,0.070518069],"value_targets":67.5554504395} +{"eps_id":1462230943,"obs":[-0.5161247253,-0.9014085531,0.0731206611,0.4315361381],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.534152925,-0.7073940039,0.0817513838,0.1627703011],"action_prob":0.8416308761,"action_logp":-0.1724137664,"action_dist_inputs":[-0.8333839774,0.8370291591],"value_targets":67.227722168} +{"eps_id":1462230943,"obs":[-0.534152925,-0.7073940039,0.0817513838,0.1627703011],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.5483008027,-0.5135317445,0.0850067884,-0.1030437499],"action_prob":0.6373790503,"action_logp":-0.4503907561,"action_dist_inputs":[-0.2817889154,0.2822176516],"value_targets":66.8966903687} +{"eps_id":1462230943,"obs":[-0.5483008027,-0.5135317445,0.0850067884,-0.1030437499],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.5585714579,-0.7097625732,0.0829459131,0.2152015418],"action_prob":0.6888185143,"action_logp":-0.372777462,"action_dist_inputs":[0.3957108557,-0.3988904357],"value_targets":66.5623168945} +{"eps_id":1462230943,"obs":[-0.5585714579,-0.7097625732,0.0829459131,0.2152015418],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.5727666616,-0.5159183741,0.0872499421,-0.0502069741],"action_prob":0.716940701,"action_logp":-0.3327621222,"action_dist_inputs":[-0.4642673135,0.4650696516],"value_targets":66.2245635986} +{"eps_id":1462230943,"obs":[-0.5727666616,-0.5159183741,0.0872499421,-0.0502069741],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.5830850601,-0.7121760249,0.086245805,0.2686786354],"action_prob":0.6000264287,"action_logp":-0.5107815862,"action_dist_inputs":[0.2014557719,-0.2041194141],"value_targets":65.883392334} +{"eps_id":1462230943,"obs":[-0.5830850601,-0.7121760249,0.086245805,0.2686786354],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.5973285437,-0.5183839798,0.0916193798,0.0043958751],"action_prob":0.7814421654,"action_logp":-0.2466141582,"action_dist_inputs":[-0.6364740133,0.6376166344],"value_targets":65.5387802124} +{"eps_id":1462230943,"obs":[-0.5973285437,-0.5183839798,0.0916193798,0.0043958751],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.6076962352,-0.3246872723,0.0917072967,-0.2580321729],"action_prob":0.5057755709,"action_logp":-0.6816622019,"action_dist_inputs":[-0.012641469,0.0104619535],"value_targets":65.1906890869} +{"eps_id":1462230943,"obs":[-0.6076962352,-0.3246872723,0.0917072967,-0.2580321729],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.6141899824,-0.1309860945,0.086546652,-0.5204387903],"action_prob":0.2122534811,"action_logp":-1.5499740839,"action_dist_inputs":[0.6530381441,-0.6583570838],"value_targets":64.8390808105} +{"eps_id":1462230943,"obs":[-0.6141899824,-0.1309860945,0.086546652,-0.5204387903],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.6168097258,-0.3272129595,0.076137878,-0.2017872483],"action_prob":0.9083757997,"action_logp":-0.0960971043,"action_dist_inputs":[1.1430653334,-1.1508975029],"value_targets":64.4839172363} +{"eps_id":1462230943,"obs":[-0.6168097258,-0.3272129595,0.076137878,-0.2017872483],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.6233539581,-0.5233365893,0.0721021295,0.113908805],"action_prob":0.7456291318,"action_logp":-0.2935269475,"action_dist_inputs":[0.5352743864,-0.5401604176],"value_targets":64.1251678467} +{"eps_id":1462230943,"obs":[-0.6233539581,-0.5233365893,0.0721021295,0.113908805],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.6338207126,-0.329317838,0.0743803084,-0.1551834494],"action_prob":0.651283741,"action_logp":-0.428809911,"action_dist_inputs":[-0.3130261004,0.3116605878],"value_targets":63.7627983093} +{"eps_id":1462230943,"obs":[-0.6338207126,-0.329317838,0.0743803084,-0.1551834494],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.6404070854,-0.5254216194,0.0712766349,0.1600065231],"action_prob":0.6854256988,"action_logp":-0.3777151406,"action_dist_inputs":[0.3871484697,-0.3916714787],"value_targets":63.3967666626} +{"eps_id":1462230943,"obs":[-0.6404070854,-0.5254216194,0.0712766349,0.1600065231],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.6509155035,-0.3313886225,0.0744767711,-0.109366335],"action_prob":0.7119243145,"action_logp":-0.3397836387,"action_dist_inputs":[-0.4529087245,0.4518396556],"value_targets":63.0270347595} +{"eps_id":1462230943,"obs":[-0.6509155035,-0.3313886225,0.0744767711,-0.109366335],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.657543242,-0.1374085546,0.0722894445,-0.3776539266],"action_prob":0.3861226141,"action_logp":-0.9516003132,"action_dist_inputs":[0.2297261655,-0.2339141369],"value_targets":62.6535720825} +{"eps_id":1462230943,"obs":[-0.657543242,-0.1374085546,0.0722894445,-0.3776539266],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.6602914333,-0.3334787488,0.0647363663,-0.063082166],"action_prob":0.8517760038,"action_logp":-0.1604316682,"action_dist_inputs":[0.8708373308,-0.87776196],"value_targets":62.2763366699} +{"eps_id":1462230943,"obs":[-0.6602914333,-0.3334787488,0.0647363663,-0.063082166],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.6669610143,-0.5294662714,0.0634747222,0.2493025064],"action_prob":0.5550299287,"action_logp":-0.5887332559,"action_dist_inputs":[0.1085721925,-0.1124428064],"value_targets":61.8952865601} +{"eps_id":1462230943,"obs":[-0.6669610143,-0.5294662714,0.0634747222,0.2493025064],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.6775503159,-0.3353054821,0.06846077,-0.0227030814],"action_prob":0.7912483811,"action_logp":-0.2341433614,"action_dist_inputs":[-0.666472733,0.6659938097],"value_targets":61.5103912354} +{"eps_id":1462230943,"obs":[-0.6775503159,-0.3353054821,0.06846077,-0.0227030814],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.6842564344,-0.1412287205,0.0680067092,-0.2930245399],"action_prob":0.5201830864,"action_logp":-0.6535744667,"action_dist_inputs":[-0.0421968773,0.0385793187],"value_targets":61.1216087341} +{"eps_id":1462230943,"obs":[-0.6842564344,-0.1412287205,0.0680067092,-0.2930245399],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.687081039,-0.3372510672,0.0621462166,0.0203078464],"action_prob":0.7857961655,"action_logp":-0.2410578728,"action_dist_inputs":[0.6466923952,-0.6530768871],"value_targets":60.7288970947} +{"eps_id":1462230943,"obs":[-0.687081039,-0.3372510672,0.0621462166,0.0203078464],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.6938260198,-0.1430728585,0.0625523776,-0.2521379292],"action_prob":0.5779473186,"action_logp":-0.5482726097,"action_dist_inputs":[-0.1588487774,0.1555036455],"value_targets":60.3322181702} +{"eps_id":1462230943,"obs":[-0.6938260198,-0.1430728585,0.0625523776,-0.2521379292],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.6966875196,-0.3390296698,0.057509616,0.0596007966],"action_prob":0.7480846643,"action_logp":-0.2902390957,"action_dist_inputs":[0.5411430597,-0.5472801328],"value_targets":59.9315338135} +{"eps_id":1462230943,"obs":[-0.6966875196,-0.3390296698,0.057509616,0.0596007966],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.7034680843,-0.1447774321,0.0587016307,-0.214397341],"action_prob":0.6284720898,"action_logp":-0.4644636512,"action_dist_inputs":[-0.2643852234,0.261282593],"value_targets":59.526802063} +{"eps_id":1462230943,"obs":[-0.7034680843,-0.1447774321,0.0587016307,-0.214397341],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.7063636184,-0.3406873345,0.0544136837,0.0962097347],"action_prob":0.7064070106,"action_logp":-0.3475637138,"action_dist_inputs":[0.436041683,-0.441955477],"value_targets":59.117980957} +{"eps_id":1462230943,"obs":[-0.7063636184,-0.3406873345,0.0544136837,0.0962097347],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.7131773829,-0.1463857889,0.0563378781,-0.1788215041],"action_prob":0.6723698974,"action_logp":-0.3969466388,"action_dist_inputs":[-0.3609002233,0.3580232859],"value_targets":58.7050323486} +{"eps_id":1462230943,"obs":[-0.7131773829,-0.1463857889,0.0563378781,-0.1788215041],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.7161051035,-0.342266798,0.0527614504,0.1310884207],"action_prob":0.6608139873,"action_logp":-0.4142828882,"action_dist_inputs":[0.3306082189,-0.3363154233],"value_targets":58.2879104614} +{"eps_id":1462230943,"obs":[-0.7161051035,-0.342266798,0.0527614504,0.1310884207],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.7229504585,-0.5381032825,0.0553832166,0.4399390519],"action_prob":0.2894437909,"action_logp":-1.2397941351,"action_dist_inputs":[-0.4503709376,0.4477160275],"value_targets":57.8665771484} +{"eps_id":1462230943,"obs":[-0.7229504585,-0.5381032825,0.0553832166,0.4399390519],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.7337124944,-0.3438071012,0.0641819984,0.1652159691],"action_prob":0.8774125576,"action_logp":-0.1307779849,"action_dist_inputs":[-0.9835321903,0.9846209288],"value_targets":57.4409866333} +{"eps_id":1462230943,"obs":[-0.7337124944,-0.3438071012,0.0641819984,0.1652159691],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.740588665,-0.1496598125,0.067486316,-0.1065487862],"action_prob":0.7551252246,"action_logp":-0.2808717191,"action_dist_inputs":[-0.5642805696,0.5618560314],"value_targets":57.0110969543} +{"eps_id":1462230943,"obs":[-0.740588665,-0.1496598125,0.067486316,-0.1065487862],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.7435818315,-0.3456807137,0.0653553456,0.2066391557],"action_prob":0.5318938494,"action_logp":-0.6313112974,"action_dist_inputs":[0.0612255335,-0.06652347],"value_targets":56.5768661499} +{"eps_id":1462230943,"obs":[-0.7435818315,-0.3456807137,0.0653553456,0.2066391557],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.7504954338,-0.1515512466,0.0694881231,-0.0647325665],"action_prob":0.787825644,"action_logp":-0.2384785116,"action_dist_inputs":[-0.6569796801,0.6548884511],"value_targets":56.1382484436} +{"eps_id":1462230943,"obs":[-0.7504954338,-0.1515512466,0.0694881231,-0.0647325665],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.7535265088,-0.3475971222,0.0681934729,0.2490396351],"action_prob":0.4651609659,"action_logp":-0.7653717399,"action_dist_inputs":[-0.0723161995,0.0672659576],"value_targets":55.6952018738} +{"eps_id":1462230943,"obs":[-0.7535265088,-0.3475971222,0.0681934729,0.2490396351],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.7604784369,-0.1535119414,0.073174268,-0.0213781185],"action_prob":0.8154492378,"action_logp":-0.2040160894,"action_dist_inputs":[-0.7437594533,0.7420552969],"value_targets":55.2476768494} +{"eps_id":1462230943,"obs":[-0.7604784369,-0.1535119414,0.073174268,-0.0213781185],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.7635486722,-0.3496028483,0.0727467015,0.2934651673],"action_prob":0.3983234167,"action_logp":-0.9204909801,"action_dist_inputs":[-0.2086183727,0.2038373649],"value_targets":54.7956352234} +{"eps_id":1462230943,"obs":[-0.7635486722,-0.3496028483,0.0727467015,0.2934651673],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.7705407143,-0.1555894166,0.0786160082,0.0245833844],"action_prob":0.8387551904,"action_logp":-0.1758363843,"action_dist_inputs":[-0.8251193166,0.8238758445],"value_targets":54.3390235901} +{"eps_id":1462230943,"obs":[-0.7705407143,-0.1555894166,0.0786160082,0.0245833844],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.773652494,0.0383222438,0.0791076794,-0.2422964126],"action_prob":0.6656650305,"action_logp":-0.4069687128,"action_dist_inputs":[-0.3465547264,0.3420882523],"value_targets":53.8778038025} +{"eps_id":1462230943,"obs":[-0.773652494,0.0383222438,0.0791076794,-0.2422964126],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.7728860378,-0.1578353345,0.0742617473,0.0742531866],"action_prob":0.6387723088,"action_logp":-0.4482071698,"action_dist_inputs":[0.2814941406,-0.2885454893],"value_targets":53.4119224548} +{"eps_id":1462230943,"obs":[-0.7728860378,-0.1578353345,0.0742617473,0.0742531866],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.7760427594,0.0361477695,0.0757468119,-0.1941066235],"action_prob":0.7125296593,"action_logp":-0.3389337659,"action_dist_inputs":[-0.4558901191,0.4518117011],"value_targets":52.9413375854} +{"eps_id":1462230943,"obs":[-0.7760427594,0.0361477695,0.0757468119,-0.1941066235],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.7753198147,-0.1599713862,0.0718646795,0.1214768663],"action_prob":0.577226758,"action_logp":-0.5495200753,"action_dist_inputs":[0.1523096412,-0.1590896994],"value_targets":52.4659957886} +{"eps_id":1462230943,"obs":[-0.7753198147,-0.1599713862,0.0718646795,0.1214768663],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.7785192728,0.0340512991,0.0742942169,-0.1476959288],"action_prob":0.751173079,"action_logp":-0.2861192226,"action_dist_inputs":[-0.5542719364,0.550606668],"value_targets":51.9858551025} +{"eps_id":1462230943,"obs":[-0.7785192728,0.0340512991,0.0742942169,-0.1476959288],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.7778382301,0.2280350626,0.0713403001,-0.4160465002],"action_prob":0.4863941669,"action_logp":-0.7207359672,"action_dist_inputs":[0.0239711534,-0.030465629],"value_targets":51.5008621216} +{"eps_id":1462230943,"obs":[-0.7778382301,0.2280350626,0.0713403001,-0.4160465002],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.7732775211,0.0319783725,0.0630193651,-0.1017532498],"action_prob":0.7838340998,"action_logp":-0.2435579151,"action_dist_inputs":[0.6397333741,-0.648417592],"value_targets":51.0109710693} +{"eps_id":1462230943,"obs":[-0.7732775211,0.0319783725,0.0630193651,-0.1017532498],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.7726379633,0.2261432111,0.0609843023,-0.3739069402],"action_prob":0.5332459211,"action_logp":-0.6287726164,"action_dist_inputs":[-0.0696819723,0.0634981021],"value_targets":50.5161323547} +{"eps_id":1462230943,"obs":[-0.7726379633,0.2261432111,0.0609843023,-0.3739069402],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.7681151032,0.0302103907,0.0535061657,-0.0626357719],"action_prob":0.7561476231,"action_logp":-0.2795186639,"action_dist_inputs":[0.5615683794,-0.5701053143],"value_targets":50.0162963867} +{"eps_id":1462230943,"obs":[-0.7681151032,0.0302103907,0.0535061657,-0.0626357719],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.767510891,0.2245259881,0.0522534475,-0.3379687369],"action_prob":0.5719891191,"action_logp":-0.558635354,"action_dist_inputs":[-0.1479349434,0.1420363635],"value_targets":49.5114097595} +{"eps_id":1462230943,"obs":[-0.767510891,0.2245259881,0.0522534475,-0.3379687369],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.7630203366,0.4188669324,0.0454940759,-0.6137267351],"action_prob":0.2701670527,"action_logp":-1.3087147474,"action_dist_inputs":[0.4926902354,-0.5010848641],"value_targets":49.0014266968} +{"eps_id":1462230943,"obs":[-0.7630203366,0.4188669324,0.0454940759,-0.6137267351],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.754643023,0.2231397629,0.0332195386,-0.3070691228],"action_prob":0.8873782754,"action_logp":-0.1194838956,"action_dist_inputs":[1.0272467136,-1.0369900465],"value_targets":48.486289978} +{"eps_id":1462230943,"obs":[-0.754643023,0.2231397629,0.0332195386,-0.3070691228],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.7501802444,0.4177730083,0.0270781573,-0.5890931487],"action_prob":0.2792574465,"action_logp":-1.2756211758,"action_dist_inputs":[0.4699487388,-0.4781990647],"value_targets":47.9659461975} +{"eps_id":1462230943,"obs":[-0.7501802444,0.4177730083,0.0270781573,-0.5890931487],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.7418247461,0.2222825438,0.0152962944,-0.2880049646],"action_prob":0.8873615861,"action_logp":-0.1195027232,"action_dist_inputs":[1.0271948576,-1.0368750095],"value_targets":47.4403495789} +{"eps_id":1462230943,"obs":[-0.7418247461,0.2222825438,0.0152962944,-0.2880049646],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.7373791337,0.4171830714,0.0095361946,-0.5758246183],"action_prob":0.2767648995,"action_logp":-1.2845869064,"action_dist_inputs":[0.4762075543,-0.4843584299],"value_targets":46.9094467163} +{"eps_id":1462230943,"obs":[-0.7373791337,0.4171830714,0.0095361946,-0.5758246183],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.7290354371,0.2219287455,-0.0019802973,-0.2801528275],"action_prob":0.8914793134,"action_logp":-0.1148730665,"action_dist_inputs":[1.0481569767,-1.0577844381],"value_targets":46.3731765747} +{"eps_id":1462230943,"obs":[-0.7290354371,0.2219287455,-0.0019802973,-0.2801528275],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.724596858,0.4170788825,-0.0075833541,-0.5734596848],"action_prob":0.2626561224,"action_logp":-1.3369096518,"action_dist_inputs":[0.512054801,-0.5201539993],"value_targets":45.8314933777} +{"eps_id":1462230943,"obs":[-0.724596858,0.4170788825,-0.0075833541,-0.5734596848],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.7162553072,0.2220640779,-0.0190525483,-0.283175379],"action_prob":0.8992846608,"action_logp":-0.1061556861,"action_dist_inputs":[1.0898560286,-1.0994448662],"value_targets":45.2843360901} +{"eps_id":1462230943,"obs":[-0.7162553072,0.2220640779,-0.0190525483,-0.283175379],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.7118140459,0.0272189844,-0.024716055,0.0034381184],"action_prob":0.7621359825,"action_logp":-0.271630317,"action_dist_inputs":[0.5781664848,-0.5862594843],"value_targets":44.7316513062} +{"eps_id":1462230943,"obs":[-0.7118140459,0.0272189844,-0.024716055,0.0034381184],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.7112696171,0.2226865143,-0.0246472917,-0.2969394028],"action_prob":0.546307385,"action_logp":-0.6045734882,"action_dist_inputs":[-0.0955813751,0.090180479],"value_targets":44.1733856201} +{"eps_id":1462230943,"obs":[-0.7112696171,0.2226865143,-0.0246472917,-0.2969394028],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.7068158984,0.0279244389,-0.0305860806,-0.0121305957],"action_prob":0.7823394537,"action_logp":-0.245466575,"action_dist_inputs":[0.6356058717,-0.643746078],"value_targets":43.6094818115} +{"eps_id":1462230943,"obs":[-0.7068158984,0.0279244389,-0.0305860806,-0.0121305957],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.7062574029,-0.1667458266,-0.030828692,0.270747304],"action_prob":0.4854420424,"action_logp":-0.7226953506,"action_dist_inputs":[-0.0318832174,0.0263649449],"value_targets":43.0398788452} +{"eps_id":1462230943,"obs":[-0.7062574029,-0.1667458266,-0.030828692,0.270747304],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.7095923424,0.0288021807,-0.025413746,-0.0314975157],"action_prob":0.7833918929,"action_logp":-0.2441222221,"action_dist_inputs":[-0.6439552307,0.6415880919],"value_targets":42.4645233154} +{"eps_id":1462230943,"obs":[-0.7095923424,0.0288021807,-0.025413746,-0.0314975157],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.7090163231,-0.165946275,-0.0260436963,0.2530600131],"action_prob":0.5055968761,"action_logp":-0.6820156574,"action_dist_inputs":[0.0083638821,-0.0140243387],"value_targets":41.8833580017} +{"eps_id":1462230943,"obs":[-0.7090163231,-0.165946275,-0.0260436963,0.2530600131],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.7123352289,0.0295376815,-0.0209824964,-0.0477224737],"action_prob":0.7746343613,"action_logp":-0.25536412,"action_dist_inputs":[-0.6185998321,0.6160672903],"value_targets":41.2963218689} +{"eps_id":1462230943,"obs":[-0.7123352289,0.0295376815,-0.0209824964,-0.0477224737],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.7117444873,-0.1652772129,-0.0219369456,0.2382671386],"action_prob":0.522390008,"action_logp":-0.649340868,"action_dist_inputs":[0.0419205464,-0.0476993471],"value_targets":40.7033538818} +{"eps_id":1462230943,"obs":[-0.7117444873,-0.1652772129,-0.0219369456,0.2382671386],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.7150499821,0.030151153,-0.0171716027,-0.0612538196],"action_prob":0.7669470906,"action_logp":-0.2653374672,"action_dist_inputs":[-0.5969115496,0.5942404866],"value_targets":40.1044006348} +{"eps_id":1462230943,"obs":[-0.7150499821,0.030151153,-0.0171716027,-0.0612538196],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.7144469619,-0.1647204459,-0.0183966793,0.2259622663],"action_prob":0.5361850858,"action_logp":-0.6232758164,"action_dist_inputs":[0.0695582554,-0.0754356459],"value_targets":39.4993934631} +{"eps_id":1462230943,"obs":[-0.7144469619,-0.1647204459,-0.0183966793,0.2259622663],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.7177413702,0.0306595322,-0.0138774347,-0.0724664181],"action_prob":0.7603339553,"action_logp":-0.2739975154,"action_dist_inputs":[-0.5786491632,0.5758621693],"value_targets":38.8882751465} +{"eps_id":1462230943,"obs":[-0.7177413702,0.0306595322,-0.0138774347,-0.0724664181],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.7171282172,-0.164260745,-0.0153267626,0.2158060074],"action_prob":0.5473320484,"action_logp":-0.6026995778,"action_dist_inputs":[0.0919687673,-0.0979281366],"value_targets":38.2709846497} +{"eps_id":1462230943,"obs":[-0.7171282172,-0.164260745,-0.0153267626,0.2158060074],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.7204134464,0.0310769249,-0.0110106422,-0.0816719681],"action_prob":0.7547745705,"action_logp":-0.2813361883,"action_dist_inputs":[-0.5635621548,0.5606787205],"value_targets":37.6474609375} +{"eps_id":1462230943,"obs":[-0.7204134464,0.0310769249,-0.0110106422,-0.0816719681],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.7197918892,-0.1638854742,-0.0126440823,0.2075168341],"action_prob":0.5561440587,"action_logp":-0.5867279172,"action_dist_inputs":[0.1097499952,-0.115777351],"value_targets":37.0176353455} +{"eps_id":1462230943,"obs":[-0.7197918892,-0.1638854742,-0.0126440823,0.2075168341],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.7230696082,0.0314149782,-0.0084937457,-0.0891276821],"action_prob":0.7502334714,"action_logp":-0.2873708308,"action_dist_inputs":[-0.551410675,0.5484473705],"value_targets":36.3814506531} +{"eps_id":1462230943,"obs":[-0.7230696082,0.0314149782,-0.0084937457,-0.0891276821],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.7224413157,0.2266576439,-0.010276299,-0.3844782412],"action_prob":0.4371124208,"action_logp":-0.8275648952,"action_dist_inputs":[0.1234031245,-0.1294864416],"value_targets":35.7388381958} +{"eps_id":1462230943,"obs":[-0.7224413157,0.2266576439,-0.010276299,-0.3844782412],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.717908144,0.0316830799,-0.0179658644,-0.0950530246],"action_prob":0.8383616209,"action_logp":-0.1763057262,"action_dist_inputs":[0.8188058138,-0.8272824287],"value_targets":35.0897369385} +{"eps_id":1462230943,"obs":[-0.717908144,0.0316830799,-0.0179658644,-0.0950530246],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.717274487,-0.1631768197,-0.0198669247,0.191907987],"action_prob":0.5872704387,"action_logp":-0.5322698355,"action_dist_inputs":[0.1732833385,-0.1794095784],"value_targets":34.4340782166} +{"eps_id":1462230943,"obs":[-0.717274487,-0.1631768197,-0.0198669247,0.191907987],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.7205380201,0.0322236158,-0.0160287656,-0.1069753319],"action_prob":0.729988277,"action_logp":-0.3147267699,"action_dist_inputs":[-0.4988464713,0.4957167208],"value_targets":33.7717971802} +{"eps_id":1462230943,"obs":[-0.7205380201,0.0322236158,-0.0160287656,-0.1069753319],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.7198935151,-0.1626650095,-0.0181682706,0.1806078106],"action_prob":0.6013037562,"action_logp":-0.5086550117,"action_dist_inputs":[0.2023440003,-0.2085564882],"value_targets":33.1028251648} +{"eps_id":1462230943,"obs":[-0.7198935151,-0.1626650095,-0.0181682706,0.1806078106],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.7231468558,0.0327121429,-0.0145561155,-0.1177507341],"action_prob":0.7209996581,"action_logp":-0.3271166384,"action_dist_inputs":[-0.4763315022,0.4730940461],"value_targets":32.4270935059} +{"eps_id":1462230943,"obs":[-0.7231468558,0.0327121429,-0.0145561155,-0.1177507341],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.7224925756,-0.1621982604,-0.0169111304,0.1703045368],"action_prob":0.6142205,"action_logp":-0.4874013364,"action_dist_inputs":[0.229398489,-0.2356893718],"value_targets":31.7445411682} +{"eps_id":1462230943,"obs":[-0.7224925756,-0.1621982604,-0.0169111304,0.1703045368],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.7257365584,0.0331616104,-0.0135050388,-0.1276649982],"action_prob":0.7121697664,"action_logp":-0.3394389451,"action_dist_inputs":[-0.4546409547,0.4513044357],"value_targets":31.0550918579} +{"eps_id":1462230943,"obs":[-0.7257365584,0.0331616104,-0.0135050388,-0.1276649982],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.7250733376,-0.1617642939,-0.0160583388,0.1607268453],"action_prob":0.626370132,"action_logp":-0.4678138196,"action_dist_inputs":[0.255156219,-0.2615195811],"value_targets":30.3586788177} +{"eps_id":1462230943,"obs":[-0.7250733376,-0.1617642939,-0.0160583388,0.1607268453],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.7283086181,0.0335838273,-0.0128438026,-0.136978507],"action_prob":0.7033182979,"action_logp":-0.3519457281,"action_dist_inputs":[-0.4332893193,0.4298602045],"value_targets":29.6552295685} +{"eps_id":1462230943,"obs":[-0.7283086181,0.0335838273,-0.0128438026,-0.136978507],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.7276369333,0.2288873643,-0.0155833727,-0.4336856306],"action_prob":0.3619270623,"action_logp":-1.0163125992,"action_dist_inputs":[0.2802889943,-0.2867208421],"value_targets":28.9446773529} +{"eps_id":1462230943,"obs":[-0.7276369333,0.2288873643,-0.0155833727,-0.4336856306],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.7230591774,0.0339894667,-0.0242570844,-0.1459556073],"action_prob":0.8705501556,"action_logp":-0.1386299282,"action_dist_inputs":[0.9485931993,-0.9572380781],"value_targets":28.2269458771} +{"eps_id":1462230943,"obs":[-0.7230591774,0.0339894667,-0.0242570844,-0.1459556073],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.7223793864,-0.1607768536,-0.0271761976,0.1389770508],"action_prob":0.6680653691,"action_logp":-0.4033692777,"action_dist_inputs":[0.3464771509,-0.3529706299],"value_targets":27.5019664764} +{"eps_id":1462230943,"obs":[-0.7223793864,-0.1607768536,-0.0271761976,0.1389770508],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.7255949378,-0.3554992378,-0.0243966561,0.4229639769],"action_prob":0.333719641,"action_logp":-1.097454071,"action_dist_inputs":[-0.3475213051,0.3438880444],"value_targets":26.7696628571} +{"eps_id":1462230943,"obs":[-0.7255949378,-0.3554992378,-0.0243966561,0.4229639769],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.7327049375,-0.160040319,-0.0159373768,0.1226909161],"action_prob":0.8494122028,"action_logp":-0.1632107049,"action_dist_inputs":[-0.8651415706,0.8648566008],"value_targets":26.0299625397} +{"eps_id":1462230943,"obs":[-0.7327049375,-0.160040319,-0.0159373768,0.1226909161],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.7359057069,-0.3549303412,-0.0134835588,0.4103034437],"action_prob":0.3387570679,"action_logp":-1.082472086,"action_dist_inputs":[-0.3363089859,0.3325290084],"value_targets":25.2827911377} +{"eps_id":1462230943,"obs":[-0.7359057069,-0.3549303412,-0.0134835588,0.4103034437],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.7430043221,-0.1596198678,-0.0052774893,0.1134003028],"action_prob":0.8485112786,"action_logp":-0.1642719209,"action_dist_inputs":[-0.8616868854,0.8612853885],"value_targets":24.5280704498} +{"eps_id":1462230943,"obs":[-0.7430043221,-0.1596198678,-0.0052774893,0.1134003028],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.7461967468,-0.3546658158,-0.003009483,0.4044135511],"action_prob":0.3363523781,"action_logp":-1.0895959139,"action_dist_inputs":[-0.341732204,0.33785972],"value_targets":23.7657279968} +{"eps_id":1462230943,"obs":[-0.7461967468,-0.3546658158,-0.003009483,0.4044135511],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.7532900572,-0.1595012993,0.0050787879,0.110783346],"action_prob":0.8499444723,"action_logp":-0.1625842303,"action_dist_inputs":[-0.8673064113,0.8668592572],"value_targets":22.9956855774} +{"eps_id":1462230943,"obs":[-0.7532900572,-0.1595012993,0.0050787879,0.110783346],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.7564800978,0.0355475098,0.0072944551,-0.1802929342],"action_prob":0.6731566787,"action_logp":-0.3957771659,"action_dist_inputs":[-0.3632048666,0.3592924774],"value_targets":22.2178649902} +{"eps_id":1462230943,"obs":[-0.7564800978,0.0355475098,0.0072944551,-0.1802929342],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.7557691336,-0.1596780568,0.0036885962,0.1146821752],"action_prob":0.6629226208,"action_logp":-0.4110970497,"action_dist_inputs":[0.3347916305,-0.3415541947],"value_targets":21.4321861267} +{"eps_id":1462230943,"obs":[-0.7557691336,-0.1596780568,0.0036885962,0.1146821752],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.7589626908,0.0353908427,0.0059822397,-0.1768347472],"action_prob":0.6756941676,"action_logp":-0.3920147121,"action_dist_inputs":[-0.3689753115,0.3650781214],"value_targets":20.6385707855} +{"eps_id":1462230943,"obs":[-0.7589626908,0.0353908427,0.0059822397,-0.1768347472],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.7582548857,0.2304266691,0.002445545,-0.4676244855],"action_prob":0.3401149809,"action_logp":-1.0784715414,"action_dist_inputs":[0.3280148506,-0.3347670138],"value_targets":19.8369407654} +{"eps_id":1462230943,"obs":[-0.7582548857,0.2304266691,0.002445545,-0.4676244855],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.7536463141,0.0352702588,-0.0069069448,-0.1741717458],"action_prob":0.8735577464,"action_logp":-0.1351810694,"action_dist_inputs":[0.9619885087,-0.9707996249],"value_targets":19.0272140503} +{"eps_id":1462230943,"obs":[-0.7536463141,0.0352702588,-0.0069069448,-0.1741717458],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.7529409528,0.2304903716,-0.0103903795,-0.4690255523],"action_prob":0.3244469166,"action_logp":-1.125633359,"action_dist_inputs":[0.3633340299,-0.3700758219],"value_targets":18.2093067169} +{"eps_id":1462230943,"obs":[-0.7529409528,0.2304903716,-0.0103903795,-0.4690255523],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.7483311296,0.0355167389,-0.0197708905,-0.1796356142],"action_prob":0.8817494512,"action_logp":-0.1258473396,"action_dist_inputs":[1.0001484156,-1.0089538097],"value_targets":17.3831367493} +{"eps_id":1462230943,"obs":[-0.7483311296,0.0355167389,-0.0197708905,-0.1796356142],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.747620821,-0.1593167931,-0.0233636033,0.1067453027],"action_prob":0.7009917498,"action_logp":-0.3552591205,"action_dist_inputs":[0.422622472,-0.4294026792],"value_targets":16.5486240387} +{"eps_id":1462230943,"obs":[-0.747620821,-0.1593167931,-0.0233636033,0.1067453027],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.7508071065,0.0361320376,-0.0212286972,-0.1932162046],"action_prob":0.6315658689,"action_logp":-0.4595530629,"action_dist_inputs":[-0.2714673579,0.2674729228],"value_targets":15.7056808472} +{"eps_id":1462230943,"obs":[-0.7508071065,0.0361320376,-0.0212286972,-0.1932162046],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.7500844598,0.2315511405,-0.0250930209,-0.4925195575],"action_prob":0.2811747789,"action_logp":-1.268778801,"action_dist_inputs":[0.4658848941,-0.4727567434],"value_targets":14.8542232513} +{"eps_id":1462230943,"obs":[-0.7500844598,0.2315511405,-0.0250930209,-0.4925195575],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.7454534769,0.0367919356,-0.034943413,-0.2078493088],"action_prob":0.8984979987,"action_logp":-0.1070308238,"action_dist_inputs":[1.0858869553,-1.094758749],"value_targets":13.9941644669} +{"eps_id":1462230943,"obs":[-0.7454534769,0.0367919356,-0.034943413,-0.2078493088],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.744717598,-0.1578133702,-0.0391003974,0.0736091807],"action_prob":0.752964735,"action_logp":-0.2837368548,"action_dist_inputs":[0.5537628531,-0.5607246161],"value_targets":13.125418663} +{"eps_id":1462230943,"obs":[-0.744717598,-0.1578133702,-0.0391003974,0.0736091807],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.7478739023,-0.3523536026,-0.0376282148,0.3537038863],"action_prob":0.4362115264,"action_logp":-0.8296279907,"action_dist_inputs":[-0.1304040998,0.1261477917],"value_targets":12.2478981018} +{"eps_id":1462230943,"obs":[-0.7478739023,-0.3523536026,-0.0376282148,0.3537038863],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.7549209595,-0.15671736,-0.0305541381,0.0493971743],"action_prob":0.8076874614,"action_logp":-0.2135801017,"action_dist_inputs":[-0.7181463242,0.7169071436],"value_targets":11.3615131378} +{"eps_id":1462230943,"obs":[-0.7549209595,-0.15671736,-0.0305541381,0.0493971743],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.7580553293,-0.3513881862,-0.0295661949,0.332285583],"action_prob":0.4571788907,"action_logp":-0.7826805115,"action_dist_inputs":[-0.0880810246,0.0836240575],"value_targets":10.4661741257} +{"eps_id":1462230943,"obs":[-0.7580553293,-0.3513881862,-0.0295661949,0.332285583],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.7650830746,-0.1558581293,-0.0229204819,0.0304275136],"action_prob":0.7995065451,"action_logp":-0.2237605751,"action_dist_inputs":[-0.6923360229,0.6908767819],"value_targets":9.5617923737} +{"eps_id":1462230943,"obs":[-0.7650830746,-0.1558581293,-0.0229204819,0.0304275136],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.7682002187,0.0395848826,-0.0223119315,-0.2693980336],"action_prob":0.5279124975,"action_logp":-0.6388247013,"action_dist_inputs":[-0.0581932589,0.0535730757],"value_targets":8.6482753754} +{"eps_id":1462230943,"obs":[-0.7682002187,0.0395848826,-0.0223119315,-0.2693980336],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.7674085498,-0.1552116722,-0.0276998933,0.0161649045],"action_prob":0.7951871157,"action_logp":-0.2291778028,"action_dist_inputs":[0.6745656133,-0.6819149852],"value_targets":7.7255306244} +{"eps_id":1462230943,"obs":[-0.7674085498,-0.1552116722,-0.0276998933,0.0161649045],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.7705127597,0.0402963497,-0.027376594,-0.2851273715],"action_prob":0.4994258583,"action_logp":-0.6942961216,"action_dist_inputs":[-0.0012199902,-0.0035165409],"value_targets":6.7934651375} +{"eps_id":1462230943,"obs":[-0.7705127597,0.0402963497,-0.027376594,-0.2851273715],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.7697068453,0.2357978374,-0.0330791436,-0.58631742],"action_prob":0.1873264164,"action_logp":-1.6749026775,"action_dist_inputs":[0.7300173044,-0.7374596],"value_targets":5.8519849777} +{"eps_id":1462230943,"obs":[-0.7697068453,0.2357978374,-0.0330791436,-0.58631742],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.7649908662,0.0411544293,-0.0448054895,-0.304235518],"action_prob":0.9260463715,"action_logp":-0.0768309385,"action_dist_inputs":[1.2591404915,-1.2683463097],"value_targets":4.9009947777} +{"eps_id":1462230943,"obs":[-0.7649908662,0.0411544293,-0.0448054895,-0.304235518],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.7641677856,-0.1533013135,-0.0508901998,-0.0260130614],"action_prob":0.8416929841,"action_logp":-0.1723399311,"action_dist_inputs":[0.8316690326,-0.8392103314],"value_targets":3.9403989315} +{"eps_id":1462230943,"obs":[-0.7641677856,-0.1533013135,-0.0508901998,-0.0260130614],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.7672338486,0.0425121188,-0.0514104627,-0.334308207],"action_prob":0.3987295032,"action_logp":-0.9194720387,"action_dist_inputs":[0.2028550655,-0.2079066932],"value_targets":2.970099926} +{"eps_id":1462230943,"obs":[-0.7672338486,0.0425121188,-0.0514104627,-0.334308207],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.7663835883,-0.1518418789,-0.0580966249,-0.0582704023],"action_prob":0.8640652299,"action_logp":-0.1461070031,"action_dist_inputs":[0.9208863378,-0.9285865426],"value_targets":1.9900000095} +{"eps_id":1462230943,"obs":[-0.7663835883,-0.1518418789,-0.0580966249,-0.0582704023],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":true,"new_obs":[-0.769420445,-0.3460848033,-0.0592620336,0.2155315876],"action_prob":0.6577224731,"action_logp":-0.418972224,"action_dist_inputs":[0.3239327073,-0.3292284906],"value_targets":1.0} +{"eps_id":989577397,"obs":[0.0135382917,0.0214369353,-0.0469299294,-0.0164287537],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":0.0,"dones":false,"new_obs":[0.0139670298,0.2171994001,-0.0472585037,-0.3235414922],"action_prob":0.3986051977,"action_logp":-0.9197838306,"action_dist_inputs":[0.2053202391,-0.205959931],"value_targets":86.6020355225} +{"eps_id":989577397,"obs":[0.0139670298,0.2171994001,-0.0472585037,-0.3235414922],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0183110181,0.0227811262,-0.053729333,-0.0461284928],"action_prob":0.8875278831,"action_logp":-0.1193153709,"action_dist_inputs":[1.0306273699,-1.0351067781],"value_targets":86.4666976929} +{"eps_id":989577397,"obs":[0.0183110181,0.0227811262,-0.053729333,-0.0461284928],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0187666416,-0.1715308577,-0.0546519011,0.2291300446],"action_prob":0.6724970937,"action_logp":-0.3967574537,"action_dist_inputs":[0.3592567742,-0.3602440953],"value_targets":86.3300018311} +{"eps_id":989577397,"obs":[0.0187666416,-0.1715308577,-0.0546519011,0.2291300446],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0153360236,-0.3658309579,-0.0500693023,0.504085362],"action_prob":0.255125314,"action_logp":-1.3660004139,"action_dist_inputs":[-0.534399271,0.5370618105],"value_targets":86.1919174194} +{"eps_id":989577397,"obs":[0.0153360236,-0.3658309579,-0.0500693023,0.504085362],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0080194045,-0.1700404286,-0.0399875939,0.1960534155],"action_prob":0.9083291888,"action_logp":-0.0961484313,"action_dist_inputs":[-1.1439298391,1.1494731903],"value_targets":86.052444458} +{"eps_id":989577397,"obs":[0.0080194045,-0.1700404286,-0.0399875939,0.1960534155],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0046185958,0.0256300122,-0.0360665247,-0.1089708358],"action_prob":0.720482111,"action_logp":-0.3278346658,"action_dist_inputs":[-0.4722561538,0.4745983183],"value_targets":85.9115600586} +{"eps_id":989577397,"obs":[0.0046185958,0.0256300122,-0.0360665247,-0.1089708358],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0051311962,-0.1689570397,-0.0382459424,0.1721186787],"action_prob":0.7408721447,"action_logp":-0.2999272048,"action_dist_inputs":[0.5244011283,-0.5261053443],"value_targets":85.7692489624} +{"eps_id":989577397,"obs":[0.0051311962,-0.1689570397,-0.0382459424,0.1721186787],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0017520554,0.0266908389,-0.0348035693,-0.1323800981],"action_prob":0.6843152046,"action_logp":-0.3793366551,"action_dist_inputs":[-0.3857915699,0.3878828287],"value_targets":85.62550354} +{"eps_id":989577397,"obs":[0.0017520554,0.0266908389,-0.0348035693,-0.1323800981],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0022858721,-0.1679157317,-0.0374511704,0.1491227895],"action_prob":0.7697002888,"action_logp":-0.2617540658,"action_dist_inputs":[0.602312088,-0.6043078899],"value_targets":85.4803085327} +{"eps_id":989577397,"obs":[0.0022858721,-0.1679157317,-0.0374511704,0.1491227895],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0010724426,-0.3624819517,-0.0344687141,0.4297593832],"action_prob":0.3557595611,"action_logp":-1.0335001945,"action_dist_inputs":[-0.2959890962,0.2978279293],"value_targets":85.3336486816} +{"eps_id":989577397,"obs":[-0.0010724426,-0.3624819517,-0.0344687141,0.4297593832],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0083220815,-0.1668892503,-0.0258735269,0.1264128238],"action_prob":0.8871909976,"action_logp":-0.1196950227,"action_dist_inputs":[-1.0287117958,1.0336524248],"value_targets":85.1855010986} +{"eps_id":989577397,"obs":[-0.0083220815,-0.1668892503,-0.0258735269,0.1264128238],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0116598662,-0.3616311848,-0.0233452711,0.4108219743],"action_prob":0.3740680516,"action_logp":-0.983317554,"action_dist_inputs":[-0.2566094697,0.2581945658],"value_targets":85.0358581543} +{"eps_id":989577397,"obs":[-0.0116598662,-0.3616311848,-0.0233452711,0.4108219743],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0188924894,-0.1661861688,-0.0151288314,0.110871464],"action_prob":0.8844128251,"action_logp":-0.1228313148,"action_dist_inputs":[-1.0150593519,1.0198395252],"value_targets":84.8847045898} +{"eps_id":989577397,"obs":[-0.0188924894,-0.1661861688,-0.0151288314,0.110871464],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0222162139,0.0291492622,-0.0129114017,-0.1865458637],"action_prob":0.6195577979,"action_logp":-0.478749305,"action_dist_inputs":[-0.2431369871,0.2445346564],"value_targets":84.7320251465} +{"eps_id":989577397,"obs":[-0.0222162139,0.0291492622,-0.0129114017,-0.1865458637],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0216332283,-0.1657855958,-0.016642319,0.1020362154],"action_prob":0.8022888303,"action_logp":-0.220286563,"action_dist_inputs":[0.6990010142,-0.7016606331],"value_targets":84.5778045654} +{"eps_id":989577397,"obs":[-0.0216332283,-0.1657855958,-0.016642319,0.1020362154],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0249489397,0.0295708533,-0.0146015948,-0.1958505064],"action_prob":0.5987974405,"action_logp":-0.5128319263,"action_dist_inputs":[-0.1995944381,0.2008624673],"value_targets":84.4220275879} +{"eps_id":989577397,"obs":[-0.0249489397,0.0295708533,-0.0146015948,-0.1958505064],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0243575238,-0.1653392315,-0.0185186043,0.092190735],"action_prob":0.8130351305,"action_logp":-0.2069809437,"action_dist_inputs":[0.733523488,-0.7363300323],"value_targets":84.2646713257} +{"eps_id":989577397,"obs":[-0.0243575238,-0.1653392315,-0.0185186043,0.092190735],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0276643075,0.0300431922,-0.0166747905,-0.2062768042],"action_prob":0.5746251941,"action_logp":-0.5540373325,"action_dist_inputs":[-0.1498117,0.1509354413],"value_targets":84.1057281494} +{"eps_id":989577397,"obs":[-0.0276643075,0.0300431922,-0.0166747905,-0.2062768042],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0270634443,-0.1648363918,-0.020800326,0.081099771],"action_prob":0.8243029714,"action_logp":-0.1932171583,"action_dist_inputs":[0.7714036703,-0.7743733525],"value_targets":83.9451828003} +{"eps_id":989577397,"obs":[-0.0270634443,-0.1648363918,-0.020800326,0.081099771],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0303601716,-0.3596540987,-0.0191783309,0.3671482503],"action_prob":0.453582108,"action_logp":-0.7905789614,"action_dist_inputs":[-0.0926233307,0.093584314],"value_targets":83.7830123901} +{"eps_id":989577397,"obs":[-0.0303601716,-0.3596540987,-0.0191783309,0.3671482503],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0375532545,-0.1642649323,-0.0118353656,0.0684803352],"action_prob":0.8627559543,"action_logp":-0.1476234198,"action_dist_inputs":[-0.9170342088,0.9213365316],"value_targets":83.6192016602} +{"eps_id":989577397,"obs":[-0.0375532545,-0.1642649323,-0.0118353656,0.0684803352],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0408385508,0.0310246795,-0.0104657589,-0.2279130816],"action_prob":0.541608274,"action_logp":-0.6132122874,"action_dist_inputs":[-0.0830149502,0.0838038772],"value_targets":83.453742981} +{"eps_id":989577397,"obs":[-0.0408385508,0.0310246795,-0.0104657589,-0.2279130816],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.040218059,-0.1639461517,-0.0150240213,0.0614502728],"action_prob":0.8359592557,"action_logp":-0.1791753918,"action_dist_inputs":[0.8125930429,-0.8158718944],"value_targets":83.286605835} +{"eps_id":989577397,"obs":[-0.040218059,-0.1639461517,-0.0150240213,0.0614502728],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0434969813,0.0313879475,-0.0137950154,-0.2359347045],"action_prob":0.519587338,"action_logp":-0.6547203064,"action_dist_inputs":[-0.0388637371,0.0395257659],"value_targets":83.1177825928} +{"eps_id":989577397,"obs":[-0.0434969813,0.0313879475,-0.0137950154,-0.2359347045],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0428692214,0.2267042398,-0.0185137093,-0.5329368711],"action_prob":0.1554273218,"action_logp":-1.861577034,"action_dist_inputs":[0.8446149826,-0.8480374217],"value_targets":82.9472579956} +{"eps_id":989577397,"obs":[-0.0428692214,0.2267042398,-0.0185137093,-0.5329368711],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0383351371,0.031847503,-0.0291724466,-0.2461445779],"action_prob":0.9308697581,"action_logp":-0.0716359317,"action_dist_inputs":[1.2967611551,-1.3033661842],"value_targets":82.7750091553} +{"eps_id":989577397,"obs":[-0.0383351371,0.031847503,-0.0291724466,-0.2461445779],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0376981869,0.2273737043,-0.0340953395,-0.5478845835],"action_prob":0.1378828436,"action_logp":-1.9813508987,"action_dist_inputs":[0.9146772623,-0.9183095098],"value_targets":82.601020813} +{"eps_id":989577397,"obs":[-0.0376981869,0.2273737043,-0.0340953395,-0.5478845835],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0331507139,0.032746911,-0.0450530313,-0.2661361396],"action_prob":0.9346705079,"action_logp":-0.0675612092,"action_dist_inputs":[1.3269637823,-1.3337860107],"value_targets":82.4252700806} +{"eps_id":989577397,"obs":[-0.0331507139,0.032746911,-0.0450530313,-0.2661361396],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0324957743,-0.1617040485,-0.0503757522,0.0120032001],"action_prob":0.8810256124,"action_logp":-0.1266685575,"action_dist_inputs":[0.9991095662,-1.0030690432],"value_targets":82.2477493286} +{"eps_id":989577397,"obs":[-0.0324957743,-0.1617040485,-0.0503757522,0.0120032001],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0357298553,-0.3560687006,-0.0501356907,0.2883761227],"action_prob":0.6573919058,"action_logp":-0.4194749296,"action_dist_inputs":[0.3257651329,-0.3259280026],"value_targets":82.0684280396} +{"eps_id":989577397,"obs":[-0.0357298553,-0.3560687006,-0.0501356907,0.2883761227],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.042851232,-0.5504412055,-0.0443681665,0.5648347735],"action_prob":0.2490330338,"action_logp":-1.3901697397,"action_dist_inputs":[-0.5501681566,0.5536080599],"value_targets":81.8873062134} +{"eps_id":989577397,"obs":[-0.042851232,-0.5504412055,-0.0443681665,0.5648347735],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0538600534,-0.3547257483,-0.0330714695,0.2585105598],"action_prob":0.9091736674,"action_logp":-0.0952191651,"action_dist_inputs":[-1.1487834454,1.1548037529],"value_targets":81.7043457031} +{"eps_id":989577397,"obs":[-0.0538600534,-0.3547257483,-0.0330714695,0.2585105598],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.060954567,-0.5493603349,-0.0279012602,0.540581584],"action_prob":0.2631706297,"action_logp":-1.3349527121,"action_dist_inputs":[-0.5132133365,0.5163404346],"value_targets":81.5195465088} +{"eps_id":989577397,"obs":[-0.060954567,-0.5493603349,-0.0279012602,0.540581584],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0719417781,-0.3538575172,-0.0170896295,0.2392395884],"action_prob":0.907740593,"action_logp":-0.0967966095,"action_dist_inputs":[-1.1402584314,1.1460961103],"value_targets":81.3328704834} +{"eps_id":989577397,"obs":[-0.0719417781,-0.3538575172,-0.0170896295,0.2392395884],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0790189281,-0.1584956646,-0.0123048369,-0.0587845221],"action_prob":0.7359625101,"action_logp":-0.306576103,"action_dist_inputs":[-0.5110976696,0.5139901638],"value_targets":81.144317627} +{"eps_id":989577397,"obs":[-0.0790189281,-0.1584956646,-0.0123048369,-0.0587845221],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0821888372,-0.353439033,-0.0134805273,0.2299908847],"action_prob":0.7021852732,"action_logp":-0.3535579741,"action_dist_inputs":[0.4282905459,-0.4294352531],"value_targets":80.9538574219} +{"eps_id":989577397,"obs":[-0.0821888372,-0.353439033,-0.0134805273,0.2299908847],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0892576203,-0.1581270695,-0.0088807102,-0.0669136047],"action_prob":0.7289592028,"action_logp":-0.3161375225,"action_dist_inputs":[-0.493303895,0.4960445166],"value_targets":80.76146698} +{"eps_id":989577397,"obs":[-0.0892576203,-0.1581270695,-0.0088807102,-0.0669136047],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0924201608,-0.3531205952,-0.0102189817,0.2229542285],"action_prob":0.708088994,"action_logp":-0.3451854587,"action_dist_inputs":[0.4424069226,-0.4437139034],"value_targets":80.5671386719} +{"eps_id":989577397,"obs":[-0.0924201608,-0.3531205952,-0.0102189817,0.2229542285],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0994825736,-0.1578540802,-0.0057598976,-0.0729345679],"action_prob":0.7245881557,"action_logp":-0.3221518099,"action_dist_inputs":[-0.4823649824,0.4849707782],"value_targets":80.3708496094} +{"eps_id":989577397,"obs":[-0.0994825736,-0.1578540802,-0.0057598976,-0.0729345679],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1026396528,-0.3528929949,-0.0072185886,0.2179255337],"action_prob":0.7111772299,"action_logp":-0.3408335745,"action_dist_inputs":[0.4498327076,-0.4512757957],"value_targets":80.1725769043} +{"eps_id":989577397,"obs":[-0.1026396528,-0.3528929949,-0.0072185886,0.2179255337],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1096975133,-0.1576685905,-0.0028600781,-0.0770256743],"action_prob":0.7227967978,"action_logp":-0.3246271908,"action_dist_inputs":[-0.4779449105,0.4804323316],"value_targets":79.9722976685} +{"eps_id":989577397,"obs":[-0.1096975133,-0.1576685905,-0.0028600781,-0.0770256743],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1128508896,-0.3527494073,-0.0044005914,0.2147535235],"action_prob":0.7116590738,"action_logp":-0.3401563168,"action_dist_inputs":[0.4509503543,-0.4525051415],"value_targets":79.7699966431} +{"eps_id":989577397,"obs":[-0.1128508896,-0.3527494073,-0.0044005914,0.2147535235],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1199058741,-0.1575648338,-0.0001055211,-0.0793142915],"action_prob":0.7235206366,"action_logp":-0.32362625,"action_dist_inputs":[-0.4798038006,0.4821888506],"value_targets":79.5656509399} +{"eps_id":989577397,"obs":[-0.1199058741,-0.1575648338,-0.0001055211,-0.0793142915],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1230571717,-0.3526852727,-0.0016918069,0.2133353353],"action_prob":0.7096037269,"action_logp":-0.3430486023,"action_dist_inputs":[0.4459083676,-0.4475518465],"value_targets":79.3592453003} +{"eps_id":989577397,"obs":[-0.1230571717,-0.3526852727,-0.0016918069,0.2133353353],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1301108748,-0.5477830172,0.0025748997,0.5054841042],"action_prob":0.273309201,"action_logp":-1.2971515656,"action_dist_inputs":[-0.4877995551,0.4900979102],"value_targets":79.1507568359} +{"eps_id":989577397,"obs":[-0.1301108748,-0.5477830172,0.0025748997,0.5054841042],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1410665363,-0.352697432,0.0126845818,0.2136137486],"action_prob":0.9075426459,"action_logp":-0.0970147401,"action_dist_inputs":[-1.1393376589,1.1446551085],"value_targets":78.9401550293} +{"eps_id":989577397,"obs":[-0.1410665363,-0.352697432,0.0126845818,0.2136137486],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1481204778,-0.5479984283,0.0169568565,0.510270834],"action_prob":0.2489356995,"action_logp":-1.390560627,"action_dist_inputs":[-0.551033318,0.553263247],"value_targets":78.727432251} +{"eps_id":989577397,"obs":[-0.1481204778,-0.5479984283,0.0169568565,0.510270834],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1590804458,-0.3531193733,0.0271622725,0.2229793966],"action_prob":0.9135763645,"action_logp":-0.0903883129,"action_dist_inputs":[-1.1763845682,1.1817215681],"value_targets":78.5125579834} +{"eps_id":989577397,"obs":[-0.1590804458,-0.3531193733,0.0271622725,0.2229793966],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1661428362,-0.1583959907,0.0316218622,-0.0610132441],"action_prob":0.7838611603,"action_logp":-0.2435233444,"action_dist_inputs":[-0.6430310011,0.6452803016],"value_targets":78.2955093384} +{"eps_id":989577397,"obs":[-0.1661428362,-0.1583959907,0.0316218622,-0.0610132441],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1693107635,-0.3539567292,0.0304015968,0.2414764911],"action_prob":0.6117226481,"action_logp":-0.4914763272,"action_dist_inputs":[0.2265162915,-0.2280426919],"value_targets":78.0762710571} +{"eps_id":989577397,"obs":[-0.1693107635,-0.3539567292,0.0304015968,0.2414764911],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1763898879,-0.1592819393,0.0352311246,-0.0414640605],"action_prob":0.8078272939,"action_logp":-0.21340698,"action_dist_inputs":[-0.7167996168,0.7191540599],"value_targets":77.8548202515} +{"eps_id":989577397,"obs":[-0.1763898879,-0.1592819393,0.0352311246,-0.0414640605],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1795755327,-0.3548909128,0.0344018452,0.2621231377],"action_prob":0.5634656549,"action_logp":-0.5736489296,"action_dist_inputs":[0.1269429624,-0.1282962263],"value_targets":77.6311340332} +{"eps_id":989577397,"obs":[-0.1795755327,-0.3548909128,0.0344018452,0.2621231377],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1866733581,-0.1602764875,0.0396443084,-0.0195136257],"action_prob":0.8306838274,"action_logp":-0.1855060607,"action_dist_inputs":[-0.7939962745,0.7964850664],"value_targets":77.4051818848} +{"eps_id":989577397,"obs":[-0.1866733581,-0.1602764875,0.0396443084,-0.0195136257],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.189878881,-0.3559438884,0.0392540358,0.2854092419],"action_prob":0.5065085292,"action_logp":-0.6802141666,"action_dist_inputs":[0.0124389008,-0.0135965711],"value_targets":77.1769561768} +{"eps_id":989577397,"obs":[-0.189878881,-0.3559438884,0.0392540358,0.2854092419],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1969977617,-0.1614031345,0.0449622199,0.0053606159],"action_prob":0.8519831896,"action_logp":-0.1601884514,"action_dist_inputs":[-0.8737900257,0.8764509559],"value_targets":76.9464187622} +{"eps_id":989577397,"obs":[-0.1969977617,-0.1614031345,0.0449622199,0.0053606159],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2002258152,0.0330461226,0.0450694337,-0.2728040516],"action_prob":0.5584244132,"action_logp":-0.5826360583,"action_dist_inputs":[-0.1178525612,0.1169173494],"value_targets":76.7135543823} +{"eps_id":989577397,"obs":[-0.2002258152,0.0330461226,0.0450694337,-0.2728040516],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.199564904,-0.1626889706,0.0396133512,0.0337463804],"action_prob":0.8130375743,"action_logp":-0.2069779336,"action_dist_inputs":[0.7326450944,-0.7372248173],"value_targets":76.4783401489} +{"eps_id":989577397,"obs":[-0.199564904,-0.1626889706,0.0396133512,0.0337463804],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2028186768,-0.3583559394,0.0402882807,0.3386598527],"action_prob":0.3962672055,"action_logp":-0.9256665111,"action_dist_inputs":[-0.2108657956,0.2101771235],"value_targets":76.2407455444} +{"eps_id":989577397,"obs":[-0.2028186768,-0.3583559394,0.0402882807,0.3386598527],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2099857926,-0.1638297439,0.0470614769,0.0589488894],"action_prob":0.8814991117,"action_logp":-0.1261313111,"action_dist_inputs":[-1.0018061399,1.0048972368],"value_targets":76.0007553101} +{"eps_id":989577397,"obs":[-0.2099857926,-0.1638297439,0.0470614769,0.0589488894],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2132623941,0.0305869468,0.048240453,-0.2185222954],"action_prob":0.6656298041,"action_logp":-0.4070216417,"action_dist_inputs":[-0.3444736004,0.3440111578],"value_targets":75.7583389282} +{"eps_id":989577397,"obs":[-0.2132623941,0.0305869468,0.048240453,-0.2185222954],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2126506567,-0.165190205,0.0438700095,0.0889789984],"action_prob":0.7441147566,"action_logp":-0.2955600321,"action_dist_inputs":[0.5316818357,-0.535784483],"value_targets":75.5134735107} +{"eps_id":989577397,"obs":[-0.2126506567,-0.165190205,0.0438700095,0.0889789984],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2159544528,-0.360912621,0.0456495881,0.3951738179],"action_prob":0.2913286984,"action_logp":-1.2333030701,"action_dist_inputs":[-0.4445649087,0.4443748593],"value_targets":75.26612854} +{"eps_id":989577397,"obs":[-0.2159544528,-0.360912621,0.0456495881,0.3951738179],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2231727093,-0.1664671302,0.0535530634,0.1172257215],"action_prob":0.9033531547,"action_logp":-0.1016416922,"action_dist_inputs":[-1.1157178879,1.1193317175],"value_targets":75.0162963867} +{"eps_id":989577397,"obs":[-0.2231727093,-0.1664671302,0.0535530634,0.1172257215],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2265020609,-0.3623138666,0.0558975786,0.4263117611],"action_prob":0.2385998666,"action_logp":-1.4329673052,"action_dist_inputs":[-0.5801396966,0.5802314281],"value_targets":74.7639312744} +{"eps_id":989577397,"obs":[-0.2265020609,-0.3623138666,0.0558975786,0.4263117611],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2337483317,-0.1680263728,0.0644238144,0.1517610103],"action_prob":0.9134588838,"action_logp":-0.0905169249,"action_dist_inputs":[-1.1763322353,1.1802860498],"value_targets":74.5090255737} +{"eps_id":989577397,"obs":[-0.2337483317,-0.1680263728,0.0644238144,0.1517610103],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2371088564,0.0261167753,0.0674590319,-0.1199220791],"action_prob":0.809340179,"action_logp":-0.2115359306,"action_dist_inputs":[-0.722628653,0.7230998278],"value_targets":74.2515411377} +{"eps_id":989577397,"obs":[-0.2371088564,0.0261167753,0.0674590319,-0.1199220791],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.236586526,0.2202106118,0.0650605932,-0.3905827105],"action_prob":0.4722552896,"action_logp":-0.7502355576,"action_dist_inputs":[0.0539600812,-0.0571328141],"value_targets":73.9914550781} +{"eps_id":989577397,"obs":[-0.236586526,0.2202106118,0.0650605932,-0.3905827105],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.232182309,0.0242285188,0.0572489388,-0.0781172141],"action_prob":0.842045784,"action_logp":-0.1719209254,"action_dist_inputs":[0.8336648941,-0.8398641348],"value_targets":73.7287445068} +{"eps_id":989577397,"obs":[-0.232182309,0.0242285188,0.0572489388,-0.0781172141],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2316977382,-0.1716654003,0.0556865931,0.2320642769],"action_prob":0.4663972557,"action_logp":-0.762717545,"action_dist_inputs":[-0.0686924309,0.0659214854],"value_targets":73.4633712769} +{"eps_id":989577397,"obs":[-0.2316977382,-0.1716654003,0.0556865931,0.2320642769],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2351310551,0.0226184539,0.0603278801,-0.0425463803],"action_prob":0.858666122,"action_logp":-0.1523751318,"action_dist_inputs":[-0.9014564753,0.9027987719],"value_targets":73.1953277588} +{"eps_id":989577397,"obs":[-0.2351310551,0.0226184539,0.0603278801,-0.0425463803],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2346786857,0.2168257684,0.059476953,-0.3156017661],"action_prob":0.6083294153,"action_logp":-0.4970387816,"action_dist_inputs":[-0.2213436812,0.2189516425],"value_targets":72.9245758057} +{"eps_id":989577397,"obs":[-0.2346786857,0.2168257684,0.059476953,-0.3156017661],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2303421646,0.0209092647,0.053164918,-0.0047707325],"action_prob":0.774199307,"action_logp":-0.2559259534,"action_dist_inputs":[0.6132595539,-0.6189169884],"value_targets":72.6510848999} +{"eps_id":989577397,"obs":[-0.2303421646,0.0209092647,0.053164918,-0.0047707325],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2299239784,0.2152300328,0.0530695021,-0.2802169323],"action_prob":0.6601991653,"action_logp":-0.4152137339,"action_dist_inputs":[-0.3330822289,0.3310997486],"value_targets":72.3748321533} +{"eps_id":989577397,"obs":[-0.2299239784,0.2152300328,0.0530695021,-0.2802169323],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2256193757,0.0193928052,0.0474651642,0.0287204646],"action_prob":0.7375721335,"action_logp":-0.3043913543,"action_dist_inputs":[0.5140013695,-0.5193864107],"value_targets":72.0957946777} +{"eps_id":989577397,"obs":[-0.2256193757,0.0193928052,0.0474651642,0.0287204646],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2252315283,0.2138030678,0.0480395705,-0.2486169785],"action_prob":0.7022201419,"action_logp":-0.3535083234,"action_dist_inputs":[-0.4297447801,0.4281476736],"value_targets":71.8139266968} +{"eps_id":989577397,"obs":[-0.2252315283,0.2138030678,0.0480395705,-0.2486169785],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2209554613,0.0180291515,0.0430672318,0.0588233098],"action_prob":0.6988913417,"action_logp":-0.3582600355,"action_dist_inputs":[0.4184491932,-0.4235746264],"value_targets":71.5292205811} +{"eps_id":989577397,"obs":[-0.2209554613,0.0180291515,0.0430672318,0.0588233098],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.220594883,-0.1776829958,0.0442436971,0.3647771776],"action_prob":0.2635475397,"action_logp":-1.3335214853,"action_dist_inputs":[-0.5144209862,0.5131899118],"value_targets":71.2416381836} +{"eps_id":989577397,"obs":[-0.220594883,-0.1776829958,0.0442436971,0.3647771776],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2241485417,0.0167831741,0.0515392423,0.0863667801],"action_prob":0.902608335,"action_logp":-0.1024665684,"action_dist_inputs":[-1.11183846,1.1147093773],"value_targets":70.9511489868} +{"eps_id":989577397,"obs":[-0.2241485417,0.0167831741,0.0515392423,0.0863667801],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2238128781,-0.1790382117,0.0532665774,0.3948546648],"action_prob":0.2223065495,"action_logp":-1.5036979914,"action_dist_inputs":[-0.6265550256,0.6257201433],"value_targets":70.6577301025} +{"eps_id":989577397,"obs":[-0.2238128781,-0.1790382117,0.0532665774,0.3948546648],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2273936421,0.0152890505,0.0611636713,0.119430162],"action_prob":0.9107849598,"action_logp":-0.0934484825,"action_dist_inputs":[-1.159989953,1.1632672548],"value_targets":70.3613433838} +{"eps_id":989577397,"obs":[-0.2273936421,0.0152890505,0.0611636713,0.119430162],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2270878553,0.2094837725,0.0635522753,-0.153346315],"action_prob":0.8155123591,"action_logp":-0.2039387077,"action_dist_inputs":[-0.7432867289,0.7429475784],"value_targets":70.061958313} +{"eps_id":989577397,"obs":[-0.2270878553,0.2094837725,0.0635522753,-0.153346315],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2228981853,0.4036408961,0.0604853481,-0.4253222942],"action_prob":0.5074715018,"action_logp":-0.6783147454,"action_dist_inputs":[-0.0170287509,0.0128595149],"value_targets":69.7595596313} +{"eps_id":989577397,"obs":[-0.2228981853,0.4036408961,0.0604853481,-0.4253222942],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.214825362,0.2077166587,0.051978901,-0.114201501],"action_prob":0.8207390308,"action_logp":-0.1975500733,"action_dist_inputs":[0.7572258711,-0.76413697],"value_targets":69.4540939331} +{"eps_id":989577397,"obs":[-0.214825362,0.2077166587,0.051978901,-0.114201501],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2106710374,0.0118899411,0.0496948734,0.1944167763],"action_prob":0.4427823722,"action_logp":-0.8146768808,"action_dist_inputs":[-0.116802685,0.113074787],"value_targets":69.1455535889} +{"eps_id":989577397,"obs":[-0.2106710374,0.0118899411,0.0496948734,0.1944167763],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2104332298,-0.1839063466,0.0535832085,0.5023528934],"action_prob":0.1450650245,"action_logp":-1.930573225,"action_dist_inputs":[-0.8865875006,0.8872558475],"value_targets":68.8338928223} +{"eps_id":989577397,"obs":[-0.2104332298,-0.1839063466,0.0535832085,0.5023528934],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2141113579,0.0104210079,0.063630268,0.2270261645],"action_prob":0.9255762696,"action_logp":-0.0773387253,"action_dist_inputs":[-1.2580590248,1.2625826597],"value_targets":68.5190811157} +{"eps_id":989577397,"obs":[-0.2141113579,0.0104210079,0.063630268,0.2270261645],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2139029354,0.2045786232,0.0681707859,-0.0449262075],"action_prob":0.8749770522,"action_logp":-0.1335576177,"action_dist_inputs":[-0.972245574,0.9734548926],"value_targets":68.2010955811} +{"eps_id":989577397,"obs":[-0.2139029354,0.2045786232,0.0681707859,-0.0449262075],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2098113596,0.0085487198,0.0672722682,0.2684623599],"action_prob":0.3023405373,"action_logp":-1.1962013245,"action_dist_inputs":[-0.4194279313,0.4167492986],"value_targets":67.8798904419} +{"eps_id":989577397,"obs":[-0.2098113596,0.0085487198,0.0672722682,0.2684623599],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2096403986,-0.1874655634,0.0726415142,0.5815818906],"action_prob":0.1106700897,"action_logp":-2.2012016773,"action_dist_inputs":[-1.0410538912,1.0428609848],"value_targets":67.5554504395} +{"eps_id":989577397,"obs":[-0.2096403986,-0.1874655634,0.0726415142,0.5815818906],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2133897096,0.0065674214,0.0842731521,0.3126377761],"action_prob":0.9330884814,"action_logp":-0.0692552701,"action_dist_inputs":[-1.3148287535,1.320299983],"value_targets":67.227722168} +{"eps_id":989577397,"obs":[-0.2133897096,0.0065674214,0.0842731521,0.3126377761],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2132583559,0.2003940344,0.0905259028,0.0476766527],"action_prob":0.9031128287,"action_logp":-0.1019077599,"action_dist_inputs":[-1.1149053574,1.1173952818],"value_targets":66.8966903687} +{"eps_id":989577397,"obs":[-0.2132583559,0.2003940344,0.0905259028,0.0476766527],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2092504799,0.3941091597,0.0914794356,-0.2151279449],"action_prob":0.8127965331,"action_logp":-0.2072744668,"action_dist_inputs":[-0.7346732616,0.7336114645],"value_targets":66.5623168945} +{"eps_id":989577397,"obs":[-0.2092504799,0.3941091597,0.0914794356,-0.2151279449],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2013682872,0.1978065521,0.0871768817,0.1049522087],"action_prob":0.4488197863,"action_logp":-0.8011338115,"action_dist_inputs":[-0.1052210182,0.1002191976],"value_targets":66.2245635986} +{"eps_id":989577397,"obs":[-0.2013682872,0.1978065521,0.0871768817,0.1049522087],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.197412163,0.3915780187,0.0892759264,-0.1590037048],"action_prob":0.8445227146,"action_logp":-0.1689836234,"action_dist_inputs":[-0.8462159038,0.8460559845],"value_targets":65.883392334} +{"eps_id":989577397,"obs":[-0.197412163,0.3915780187,0.0892759264,-0.1590037048],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1895806044,0.5853159428,0.0860958472,-0.4222409725],"action_prob":0.637530148,"action_logp":-0.4501536787,"action_dist_inputs":[-0.2844145298,0.2802458405],"value_targets":65.5387802124} +{"eps_id":989577397,"obs":[-0.1895806044,0.5853159428,0.0860958472,-0.4222409725],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.177874282,0.7791193724,0.0776510313,-0.6865884066],"action_prob":0.299323231,"action_logp":-1.2062312365,"action_dist_inputs":[0.4216341972,-0.4288885295],"value_targets":65.1906890869} +{"eps_id":989577397,"obs":[-0.177874282,0.7791193724,0.0776510313,-0.6865884066],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1622918993,0.5830102563,0.0639192611,-0.3705047071],"action_prob":0.880698204,"action_logp":-0.1270402819,"action_dist_inputs":[0.9952024817,-1.0038565397],"value_targets":64.8390808105} +{"eps_id":989577397,"obs":[-0.1622918993,0.5830102563,0.0639192611,-0.3705047071],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1506316811,0.3870411813,0.056509167,-0.0583719015],"action_prob":0.6621413827,"action_logp":-0.4122762084,"action_dist_inputs":[0.3330067992,-0.3398447335],"value_targets":64.4839172363} +{"eps_id":989577397,"obs":[-0.1506316811,0.3870411813,0.056509167,-0.0583719015],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1428908706,0.5813092589,0.055341728,-0.3327040374],"action_prob":0.7243588567,"action_logp":-0.3224683404,"action_dist_inputs":[-0.4843993187,0.4817878604],"value_targets":64.1251678467} +{"eps_id":989577397,"obs":[-0.1428908706,0.5813092589,0.055341728,-0.3327040374],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1312646866,0.3854450881,0.048687648,-0.023095388],"action_prob":0.6174982786,"action_logp":-0.4820789993,"action_dist_inputs":[0.2362495363,-0.2426935732],"value_targets":63.7627983093} +{"eps_id":989577397,"obs":[-0.1312646866,0.3854450881,0.048687648,-0.023095388],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1235557795,0.1896599382,0.0482257418,0.2845428288],"action_prob":0.2466142029,"action_logp":-1.3999301195,"action_dist_inputs":[-0.5593597293,0.5573926568],"value_targets":63.3967666626} +{"eps_id":989577397,"obs":[-0.1235557795,0.1896599382,0.0482257418,0.2845428288],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1197625771,0.3840620816,0.0539165959,0.0074515105],"action_prob":0.893612504,"action_logp":-0.1124830097,"action_dist_inputs":[-1.0628788471,1.0653057098],"value_targets":63.0270347595} +{"eps_id":989577397,"obs":[-0.1197625771,0.3840620816,0.0539165959,0.0074515105],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1120813414,0.5783709884,0.0540656261,-0.2677445412],"action_prob":0.785726428,"action_logp":-0.2411465794,"action_dist_inputs":[-0.650323987,0.6490312219],"value_targets":62.6535720825} +{"eps_id":989577397,"obs":[-0.1120813414,0.5783709884,0.0540656261,-0.2677445412],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1005139202,0.7726813555,0.0487107374,-0.5428966284],"action_prob":0.4905538261,"action_logp":-0.7122202516,"action_dist_inputs":[0.0161328521,-0.0216561966],"value_targets":62.2763366699} +{"eps_id":989577397,"obs":[-0.1005139202,0.7726813555,0.0487107374,-0.5428966284],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.085060291,0.9670860767,0.0378528051,-0.8198425174],"action_prob":0.1908934712,"action_logp":-1.6560397148,"action_dist_inputs":[0.7181568742,-0.7260581255],"value_targets":61.8952865601} +{"eps_id":989577397,"obs":[-0.085060291,0.9670860767,0.0378528051,-0.8198425174],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0657185689,0.7714670897,0.0214559548,-0.5154982209],"action_prob":0.9170049429,"action_logp":-0.0866424292,"action_dist_inputs":[1.1966525316,-1.2056792974],"value_targets":61.5103912354} +{"eps_id":989577397,"obs":[-0.0657185689,0.7714670897,0.0214559548,-0.5154982209],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0502892286,0.5760496855,0.0111459903,-0.2161319703],"action_prob":0.8153515458,"action_logp":-0.2041359395,"action_dist_inputs":[0.7387597561,-0.7464056015],"value_targets":61.1216087341} +{"eps_id":989577397,"obs":[-0.0502892286,0.5760496855,0.0111459903,-0.2161319703],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0387682319,0.7710105181,0.0068233511,-0.5052782297],"action_prob":0.4963595569,"action_logp":-0.7004547119,"action_dist_inputs":[0.0048881401,-0.0096737919],"value_targets":60.7288970947} +{"eps_id":989577397,"obs":[-0.0387682319,0.7710105181,0.0068233511,-0.5052782297],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0233480223,0.5757930875,-0.0032822134,-0.2104528248],"action_prob":0.8232760429,"action_logp":-0.1944637448,"action_dist_inputs":[0.7656208277,-0.7730816603],"value_targets":60.3322181702} +{"eps_id":989577397,"obs":[-0.0233480223,0.5757930875,-0.0032822134,-0.2104528248],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0118321609,0.7709618211,-0.0074912701,-0.5041692853],"action_prob":0.4766907096,"action_logp":-0.7408874035,"action_dist_inputs":[0.0443394706,-0.0489652567],"value_targets":59.9315338135} +{"eps_id":989577397,"obs":[-0.0118321609,0.7709618211,-0.0074912701,-0.5041692853],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0035870753,0.5759462714,-0.0175746568,-0.213856563],"action_prob":0.8380568624,"action_logp":-0.1766693443,"action_dist_inputs":[0.8182662725,-0.8255742788],"value_targets":59.526802063} +{"eps_id":989577397,"obs":[0.0035870753,0.5759462714,-0.0175746568,-0.213856563],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.015106,0.3810799122,-0.0218517873,0.0732311159],"action_prob":0.5602007508,"action_logp":-0.5794600844,"action_dist_inputs":[0.1187040508,-0.123272799],"value_targets":59.117980957} +{"eps_id":989577397,"obs":[0.015106,0.3810799122,-0.0218517873,0.0732311159],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0227275975,0.576508224,-0.0203871652,-0.2262652516],"action_prob":0.7789078951,"action_logp":-0.249862507,"action_dist_inputs":[-0.6296814084,0.6296319962],"value_targets":58.7050323486} +{"eps_id":989577397,"obs":[0.0227275975,0.576508224,-0.0203871652,-0.2262652516],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0342577621,0.7719154954,-0.024912471,-0.5253086686],"action_prob":0.4096118808,"action_logp":-0.8925452232,"action_dist_inputs":[0.1805018634,-0.1850681156],"value_targets":58.2879104614} +{"eps_id":989577397,"obs":[0.0342577621,0.7719154954,-0.024912471,-0.5253086686],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0496960729,0.5771527886,-0.0354186445,-0.2405788302],"action_prob":0.8695619106,"action_logp":-0.1397657543,"action_dist_inputs":[0.9449806213,-0.9521096945],"value_targets":57.8665771484} +{"eps_id":989577397,"obs":[0.0496960729,0.5771527886,-0.0354186445,-0.2405788302],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0612391271,0.7727623582,-0.040230222,-0.5442200303],"action_prob":0.3515608311,"action_logp":-1.0453724861,"action_dist_inputs":[0.3037941158,-0.308391422],"value_targets":57.4409866333} +{"eps_id":989577397,"obs":[0.0612391271,0.7727623582,-0.040230222,-0.5442200303],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0766943768,0.578228116,-0.0511146225,-0.2644793093],"action_prob":0.8901592493,"action_logp":-0.1163548976,"action_dist_inputs":[1.0426381826,-1.0497301817],"value_targets":57.0110969543} +{"eps_id":989577397,"obs":[0.0766943768,0.578228116,-0.0511146225,-0.2644793093],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.088258937,0.3838716149,-0.0564042069,0.011653577],"action_prob":0.7185741663,"action_logp":-0.330486387,"action_dist_inputs":[0.4663591385,-0.4710406661],"value_targets":56.5768661499} +{"eps_id":989577397,"obs":[0.088258937,0.3838716149,-0.0564042069,0.011653577],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0959363729,0.579755187,-0.0561711341,-0.2982786],"action_prob":0.6556454301,"action_logp":-0.4221351743,"action_dist_inputs":[-0.3223736882,0.3215746582],"value_targets":56.1382484436} +{"eps_id":989577397,"obs":[0.0959363729,0.579755187,-0.0561711341,-0.2982786],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.107531473,0.7756310105,-0.062136706,-0.6081345081],"action_prob":0.2256338447,"action_logp":-1.4888417721,"action_dist_inputs":[0.614159286,-0.6189720035],"value_targets":55.6952018738} +{"eps_id":989577397,"obs":[0.107531473,0.7756310105,-0.062136706,-0.6081345081],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.1230440959,0.5814302564,-0.0742993951,-0.3356520236],"action_prob":0.9215709567,"action_logp":-0.0816754922,"action_dist_inputs":[1.2283178568,-1.2355679274],"value_targets":55.2476768494} +{"eps_id":989577397,"obs":[0.1230440959,0.5814302564,-0.0742993951,-0.3356520236],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.1346727014,0.3874400258,-0.0810124353,-0.0672933981],"action_prob":0.8355522752,"action_logp":-0.1796623766,"action_dist_inputs":[0.8102733493,-0.8152266741],"value_targets":54.7956352234} +{"eps_id":989577397,"obs":[0.1346727014,0.3874400258,-0.0810124353,-0.0672933981],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.1424214989,0.5836243629,-0.0823583081,-0.3843963742],"action_prob":0.4503554702,"action_logp":-0.7977180481,"action_dist_inputs":[0.0987660885,-0.1004684493],"value_targets":54.3390235901} +{"eps_id":989577397,"obs":[0.1424214989,0.5836243629,-0.0823583081,-0.3843963742],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.1540939808,0.389762342,-0.0900462344,-0.1187750027],"action_prob":0.8760170341,"action_logp":-0.132369712,"action_dist_inputs":[0.9750216603,-0.9802196622],"value_targets":53.8778038025} +{"eps_id":989577397,"obs":[0.1540939808,0.389762342,-0.0900462344,-0.1187750027],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.1618892401,0.1960382164,-0.0924217328,0.1441960782],"action_prob":0.6699009538,"action_logp":-0.4006254375,"action_dist_inputs":[0.3527850509,-0.3549520671],"value_targets":53.4119224548} +{"eps_id":989577397,"obs":[0.1618892401,0.1960382164,-0.0924217328,0.1441960782],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.1658100039,0.3923538625,-0.0895378143,-0.1761539578],"action_prob":0.71140486,"action_logp":-0.340513587,"action_dist_inputs":[-0.4503010511,0.4519158304],"value_targets":52.9413375854} +{"eps_id":989577397,"obs":[0.1658100039,0.3923538625,-0.0895378143,-0.1761539578],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.1736570746,0.1986198127,-0.0930608884,0.0869940817],"action_prob":0.7587760091,"action_logp":-0.2760486603,"action_dist_inputs":[0.5716781616,-0.5743025541],"value_targets":52.4659957886} +{"eps_id":989577397,"obs":[0.1736570746,0.1986198127,-0.0930608884,0.0869940817],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.1776294708,0.0049464833,-0.0913210064,0.3489266038],"action_prob":0.3892714381,"action_logp":-0.9434784055,"action_dist_inputs":[-0.2246923745,0.2256832868],"value_targets":51.9858551025} +{"eps_id":989577397,"obs":[0.1776294708,0.0049464833,-0.0913210064,0.3489266038],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.1777283996,0.2012405694,-0.0843424797,0.0289012976],"action_prob":0.8639212251,"action_logp":-0.1462736577,"action_dist_inputs":[-0.9218990207,0.9263489246],"value_targets":51.5008621216} +{"eps_id":989577397,"obs":[0.1777283996,0.2012405694,-0.0843424797,0.0289012976],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.1817532182,0.3974644244,-0.0837644488,-0.289155513],"action_prob":0.5115933418,"action_logp":-0.6702252626,"action_dist_inputs":[-0.0229872093,0.0233943723],"value_targets":51.0109710693} +{"eps_id":989577397,"obs":[0.1817532182,0.3974644244,-0.0837644488,-0.289155513],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.1897024959,0.5936747193,-0.0895475596,-0.6070370078],"action_prob":0.14012371,"action_logp":-1.9652296305,"action_dist_inputs":[0.9053795338,-0.9088834524],"value_targets":50.5161323547} +{"eps_id":989577397,"obs":[0.1897024959,0.5936747193,-0.0895475596,-0.6070370078],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.2015759945,0.3999114037,-0.1016883031,-0.3438483775],"action_prob":0.9366233945,"action_logp":-0.0654740036,"action_dist_inputs":[1.3432395458,-1.3499462605],"value_targets":50.0162963867} +{"eps_id":989577397,"obs":[0.2015759945,0.3999114037,-0.1016883031,-0.3438483775],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.2095742226,0.2063720077,-0.1085652709,-0.0848838091],"action_prob":0.8953983784,"action_logp":-0.1104865149,"action_dist_inputs":[1.0715491772,-1.0755614042],"value_targets":49.5114097595} +{"eps_id":989577397,"obs":[0.2095742226,0.2063720077,-0.1085652709,-0.0848838091],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.2137016654,0.4028691649,-0.1102629453,-0.4097498059],"action_prob":0.2535935938,"action_logp":-1.3720222712,"action_dist_inputs":[0.5394077301,-0.5401295424],"value_targets":49.0014266968} +{"eps_id":989577397,"obs":[0.2137016654,0.4028691649,-0.1102629453,-0.4097498059],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.2217590511,0.2094690949,-0.1184579432,-0.1537630558],"action_prob":0.9158778787,"action_logp":-0.087872237,"action_dist_inputs":[1.1914792061,-1.1961342096],"value_targets":48.486289978} +{"eps_id":989577397,"obs":[0.2217590511,0.2094690949,-0.1184579432,-0.1537630558],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.2259484231,0.0162249245,-0.1215332001,0.0993265957],"action_prob":0.8289902806,"action_logp":-0.1875468642,"action_dist_inputs":[0.7885173559,-0.7899705768],"value_targets":47.9659461975} +{"eps_id":989577397,"obs":[0.2259484231,0.0162249245,-0.1215332001,0.0993265957],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.2262729257,-0.176964581,-0.1195466742,0.3513305187],"action_prob":0.5501317978,"action_logp":-0.5975973606,"action_dist_inputs":[0.1016175225,-0.0995858088],"value_targets":47.4403495789} +{"eps_id":989577397,"obs":[0.2262729257,-0.176964581,-0.1195466742,0.3513305187],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.2227336317,0.0196365863,-0.1125200614,0.0234704725],"action_prob":0.7896748185,"action_logp":-0.2361340672,"action_dist_inputs":[-0.6589413285,0.6640248895],"value_targets":46.9094467163} +{"eps_id":989577397,"obs":[0.2227336317,0.0196365863,-0.1125200614,0.0234704725],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.2231263667,-0.1737069488,-0.1120506525,0.2786406577],"action_prob":0.6715234518,"action_logp":-0.3982063234,"action_dist_inputs":[0.3581598401,-0.3569238186],"value_targets":46.3731765747} +{"eps_id":989577397,"obs":[0.2231263667,-0.1737069488,-0.1120506525,0.2786406577],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.2196522355,0.022820292,-0.1064778343,-0.0471755266],"action_prob":0.7023271322,"action_logp":-0.353355974,"action_dist_inputs":[-0.4269419312,0.4314622581],"value_targets":45.8314933777} +{"eps_id":989577397,"obs":[0.2196522355,0.022820292,-0.1064778343,-0.0471755266],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.2201086283,-0.1706264913,-0.107421346,0.2101059258],"action_prob":0.760412097,"action_logp":-0.2738947272,"action_dist_inputs":[0.577677846,-0.5772624612],"value_targets":45.2843360901} +{"eps_id":989577397,"obs":[0.2201086283,-0.1706264913,-0.107421346,0.2101059258],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.2166960984,0.0258543454,-0.1032192335,-0.1144376844],"action_prob":0.589342773,"action_logp":-0.5287472606,"action_dist_inputs":[-0.1786605418,0.1825886369],"value_targets":44.7316513062} +{"eps_id":989577397,"obs":[0.2166960984,0.0258543454,-0.1032192335,-0.1144376844],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.2172131985,-0.1676487774,-0.1055079848,0.1439795643],"action_prob":0.8203327656,"action_logp":-0.1980452091,"action_dist_inputs":[0.7590848207,-0.7595188022],"value_targets":44.1733856201} +{"eps_id":989577397,"obs":[0.2172131985,-0.1676487774,-0.1055079848,0.1439795643],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.2138602138,-0.3611139357,-0.102628395,0.4016017318],"action_prob":0.5362722874,"action_logp":-0.6231132746,"action_dist_inputs":[0.0742982477,-0.0710462704],"value_targets":43.6094818115} +{"eps_id":989577397,"obs":[0.2138602138,-0.3611139357,-0.102628395,0.4016017318],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.2066379339,-0.1646974087,-0.0945963562,0.0784073621],"action_prob":0.7960773706,"action_logp":-0.2280588895,"action_dist_inputs":[-0.6779434681,0.6840121746],"value_targets":43.0398788452} +{"eps_id":989577397,"obs":[0.2066379339,-0.1646974087,-0.0945963562,0.0784073621],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.2033439875,-0.3583449423,-0.0930282101,0.339810133],"action_prob":0.6337878704,"action_logp":-0.4560410082,"action_dist_inputs":[0.2754917741,-0.2730096579],"value_targets":42.4645233154} +{"eps_id":989577397,"obs":[0.2033439875,-0.3583449423,-0.0930282101,0.339810133],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.1961770952,-0.1620309353,-0.0862320065,0.0193015411],"action_prob":0.7329775095,"action_logp":-0.3106402755,"action_dist_inputs":[-0.5020641088,0.5077181458],"value_targets":41.8833580017} +{"eps_id":989577397,"obs":[0.1961770952,-0.1620309353,-0.0862320065,0.0193015411],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.1929364651,0.0342150405,-0.0858459771,-0.2992944419],"action_prob":0.2905759215,"action_logp":-1.2358903885,"action_dist_inputs":[0.4471396804,-0.4454489052],"value_targets":41.2963218689} +{"eps_id":989577397,"obs":[0.1929364651,0.0342150405,-0.0858459771,-0.2992944419],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.1936207712,-0.1595850438,-0.0918318629,-0.0348725654],"action_prob":0.8954697847,"action_logp":-0.1104067788,"action_dist_inputs":[1.0725367069,-1.0753362179],"value_targets":40.7033538818} +{"eps_id":989577397,"obs":[0.1936207712,-0.1595850438,-0.0918318629,-0.0348725654],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.1904290766,-0.3532783091,-0.0925293192,0.2274824828],"action_prob":0.7774682641,"action_logp":-0.2517124712,"action_dist_inputs":[0.6259048581,-0.6250681877],"value_targets":40.1044006348} +{"eps_id":989577397,"obs":[0.1904290766,-0.3532783091,-0.0925293192,0.2274824828],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.1833635122,-0.5469644666,-0.0879796669,0.4896041155],"action_prob":0.4576689899,"action_logp":-0.7816090584,"action_dist_inputs":[-0.0825454891,0.087184839],"value_targets":39.4993934631} +{"eps_id":989577397,"obs":[0.1833635122,-0.5469644666,-0.0879796669,0.4896041155],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.1724242121,-0.7407422662,-0.0781875849,0.753313899],"action_prob":0.1697700471,"action_logp":-1.7733104229,"action_dist_inputs":[-0.7901175022,0.7971402407],"value_targets":38.8882751465} +{"eps_id":989577397,"obs":[0.1724242121,-0.7407422662,-0.0781875849,0.753313899],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.1576093733,-0.9347041249,-0.0631213114,1.0204041004],"action_prob":0.0756825432,"action_logp":-2.5812077522,"action_dist_inputs":[-1.2470248938,1.2554833889],"value_targets":38.2709846497} +{"eps_id":989577397,"obs":[0.1576093733,-0.9347041249,-0.0631213114,1.0204041004],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.1389152855,-0.7388004065,-0.0427132249,0.7085884213],"action_prob":0.9515047073,"action_logp":-0.0497106723,"action_dist_inputs":[-1.4834661484,1.4931106567],"value_targets":37.6474609375} +{"eps_id":989577397,"obs":[0.1389152855,-0.7388004065,-0.0427132249,0.7085884213],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.1241392791,-0.5431136489,-0.0285414588,0.4027721286],"action_prob":0.9250136018,"action_logp":-0.0779468343,"action_dist_inputs":[-1.252125144,1.2603771687],"value_targets":37.0176353455} +{"eps_id":989577397,"obs":[0.1241392791,-0.5431136489,-0.0285414588,0.4027721286],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.1132770106,-0.3475987911,-0.0204860158,0.1012291983],"action_prob":0.8174613118,"action_logp":-0.2015517205,"action_dist_inputs":[-0.7464218736,0.7528194785],"value_targets":36.3814506531} +{"eps_id":989577397,"obs":[0.1132770106,-0.3475987911,-0.0204860158,0.1012291983],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.1063250303,-0.5424212217,-0.0184614323,0.3873790801],"action_prob":0.5443945527,"action_logp":-0.6080810428,"action_dist_inputs":[0.0905817747,-0.0874652714],"value_targets":35.7388381958} +{"eps_id":989577397,"obs":[0.1063250303,-0.5424212217,-0.0184614323,0.3873790801],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.095476605,-0.3470421731,-0.0107138501,0.0889330581],"action_prob":0.8150024414,"action_logp":-0.2045641989,"action_dist_inputs":[-0.7383158207,0.7445327044],"value_targets":35.0897369385} +{"eps_id":989577397,"obs":[0.095476605,-0.3470421731,-0.0107138501,0.0889330581],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0885357633,-0.5420089364,-0.0089351889,0.3782165945],"action_prob":0.5467100739,"action_logp":-0.6038366556,"action_dist_inputs":[0.0951366648,-0.0922500566],"value_targets":34.4340782166} +{"eps_id":989577397,"obs":[0.0885357633,-0.5420089364,-0.0089351889,0.3782165945],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0776955858,-0.3467612267,-0.0013708567,0.0827298313],"action_prob":0.8179995418,"action_logp":-0.2008935213,"action_dist_inputs":[-0.7483947873,0.7544576526],"value_targets":33.7717971802} +{"eps_id":989577397,"obs":[0.0776955858,-0.3467612267,-0.0013708567,0.0827298313],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0707603619,-0.5418635011,0.0002837399,0.374979943],"action_prob":0.5382524133,"action_logp":-0.6194276214,"action_dist_inputs":[0.0780199096,-0.0752892718],"value_targets":33.1028251648} +{"eps_id":989577397,"obs":[0.0707603619,-0.5418635011,0.0002837399,0.374979943],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.05992309,-0.3467455804,0.0077833384,0.0823864862],"action_prob":0.8261924386,"action_logp":-0.1909275353,"action_dist_inputs":[-0.7764700055,0.7824090123],"value_targets":32.4270935059} +{"eps_id":989577397,"obs":[0.05992309,-0.3467455804,0.0077833384,0.0823864862],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.052988179,-0.54197824,0.0094310679,0.377514869],"action_prob":0.5184732676,"action_logp":-0.6568668485,"action_dist_inputs":[0.0382866003,-0.0356401019],"value_targets":31.7445411682} +{"eps_id":989577397,"obs":[0.052988179,-0.54197824,0.0094310679,0.377514869],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0421486162,-0.3469914794,0.0169813652,0.0878204554],"action_prob":0.8388671875,"action_logp":-0.1757028997,"action_dist_inputs":[-0.8219876885,0.8278357983],"value_targets":31.0550918579} +{"eps_id":989577397,"obs":[0.0421486162,-0.3469914794,0.0169813652,0.0878204554],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0352087878,-0.152116999,0.0187377743,-0.1994568408],"action_prob":0.5135850906,"action_logp":-0.6663395762,"action_dist_inputs":[-0.0258617476,0.0284918807],"value_targets":30.3586788177} +{"eps_id":989577397,"obs":[0.0352087878,-0.152116999,0.0187377743,-0.1994568408],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0321664475,-0.347501874,0.0147486385,0.0990776047],"action_prob":0.8209714293,"action_logp":-0.197266981,"action_dist_inputs":[0.7606217861,-0.7623211145],"value_targets":29.6552295685} +{"eps_id":989577397,"obs":[0.0321664475,-0.347501874,0.0147486385,0.0990776047],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0252164081,-0.5428320765,0.0167301893,0.3963769674],"action_prob":0.4677388072,"action_logp":-0.7598452568,"action_dist_inputs":[-0.0632866547,0.0659377128],"value_targets":28.9446773529} +{"eps_id":989577397,"obs":[0.0252164081,-0.5428320765,0.0167301893,0.3963769674],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0143597675,-0.3479514122,0.02465773,0.1090153679],"action_prob":0.8613122106,"action_logp":-0.1492982507,"action_dist_inputs":[-0.9102301002,0.9160016179],"value_targets":28.2269458771} +{"eps_id":989577397,"obs":[0.0143597675,-0.3479514122,0.02465773,0.1090153679],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.007400739,-0.543417871,0.0268380363,0.4093745649],"action_prob":0.4241479933,"action_logp":-0.8576728106,"action_dist_inputs":[-0.1515447497,0.1542234123],"value_targets":27.5019664764} +{"eps_id":989577397,"obs":[0.007400739,-0.543417871,0.0268380363,0.4093745649],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0034676185,-0.3486865163,0.0350255296,0.1252721399],"action_prob":0.8775535226,"action_logp":-0.1306173056,"action_dist_inputs":[-0.9818507433,0.98761338],"value_targets":26.7696628571} +{"eps_id":989577397,"obs":[-0.0034676185,-0.3486865163,0.0350255296,0.1252721399],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0104413489,-0.5442922711,0.0375309698,0.4287962317],"action_prob":0.3680640161,"action_logp":-0.9994983673,"action_dist_inputs":[-0.268883884,0.2716472745],"value_targets":26.0299625397} +{"eps_id":989577397,"obs":[-0.0104413489,-0.5442922711,0.0375309698,0.4287962317],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0213271938,-0.3497214019,0.0461068936,0.1481769383],"action_prob":0.8942070007,"action_logp":-0.111817956,"action_dist_inputs":[-1.0643209219,1.0701320171],"value_targets":25.2827911377} +{"eps_id":989577397,"obs":[-0.0213271938,-0.3497214019,0.0461068936,0.1481769383],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0283216219,-0.1552889794,0.0490704328,-0.1296111196],"action_prob":0.6970490813,"action_logp":-0.3608994484,"action_dist_inputs":[-0.4151923656,0.4180925488],"value_targets":24.5280704498} +{"eps_id":989577397,"obs":[-0.0283216219,-0.1552889794,0.0490704328,-0.1296111196],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.031427402,0.039096944,0.0464782119,-0.4064181149],"action_prob":0.2893229425,"action_logp":-1.2402117252,"action_dist_inputs":[0.4488178194,-0.4498566389],"value_targets":23.7657279968} +{"eps_id":989577397,"obs":[-0.031427402,0.039096944,0.0464782119,-0.4064181149],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0306454636,-0.1566522121,0.0383498482,-0.0994517878],"action_prob":0.8929720521,"action_logp":-0.1132000014,"action_dist_inputs":[1.0584298372,-1.0630358458],"value_targets":22.9956855774} +{"eps_id":989577397,"obs":[-0.0306454636,-0.1566522121,0.0383498482,-0.0994517878],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0337785073,-0.3523021936,0.0363608152,0.2050795108],"action_prob":0.6822091341,"action_logp":-0.3824190199,"action_dist_inputs":[0.3815862834,-0.3823564053],"value_targets":22.2178649902} +{"eps_id":989577397,"obs":[-0.0337785073,-0.3523021936,0.0363608152,0.2050795108],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0408245511,-0.1577185839,0.0404624045,-0.0759152696],"action_prob":0.7676685452,"action_logp":-0.2643972337,"action_dist_inputs":[-0.5959715843,0.5992213488],"value_targets":21.4321861267} +{"eps_id":989577397,"obs":[-0.0408245511,-0.1577185839,0.0404624045,-0.0759152696],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0439789221,-0.3533965349,0.0389440991,0.2292541116],"action_prob":0.6367768049,"action_logp":-0.451336056,"action_dist_inputs":[0.2804296017,-0.2809720933],"value_targets":20.6385707855} +{"eps_id":989577397,"obs":[-0.0439789221,-0.3533965349,0.0389440991,0.2292541116],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.051046852,-0.1588521153,0.0435291827,-0.0508945771],"action_prob":0.8009516001,"action_logp":-0.2219547629,"action_dist_inputs":[-0.694432497,0.697819829],"value_targets":19.8369407654} +{"eps_id":989577397,"obs":[-0.051046852,-0.1588521153,0.0435291827,-0.0508945771],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0542238951,0.0356195197,0.0425112881,-0.3295321763],"action_prob":0.4188816249,"action_logp":-0.8701668978,"action_dist_inputs":[0.1635335088,-0.1638326347],"value_targets":19.0272140503} +{"eps_id":989577397,"obs":[-0.0542238951,0.0356195197,0.0425112881,-0.3295321763],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0535115041,-0.1600809991,0.035920646,-0.0237520393],"action_prob":0.8627787828,"action_logp":-0.1475969851,"action_dist_inputs":[0.9172575474,-0.9213059545],"value_targets":18.2093067169} +{"eps_id":989577397,"obs":[-0.0535115041,-0.1600809991,0.035920646,-0.0237520393],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0567131266,-0.3556991816,0.0354456045,0.280044347],"action_prob":0.5420780778,"action_logp":-0.6123452783,"action_dist_inputs":[0.0843270347,-0.0843841955],"value_targets":17.3831367493} +{"eps_id":989577397,"obs":[-0.0567131266,-0.3556991816,0.0354456045,0.280044347],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0638271049,-0.161100328,0.0410464928,-0.00125177],"action_prob":0.8452538848,"action_logp":-0.1681182534,"action_dist_inputs":[-0.8470761776,0.8507748246],"value_targets":16.5486240387} +{"eps_id":989577397,"obs":[-0.0638271049,-0.161100328,0.0410464928,-0.00125177],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0670491159,-0.3567861617,0.0410214588,0.3040940464],"action_prob":0.4812448323,"action_logp":-0.7313791513,"action_dist_inputs":[-0.0374530256,0.0376029909],"value_targets":15.7056808472} +{"eps_id":989577397,"obs":[-0.0670491159,-0.3567861617,0.0410214588,0.3040940464],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.074184835,-0.5524680018,0.0471033379,0.6094267964],"action_prob":0.1334884167,"action_logp":-2.0137405396,"action_dist_inputs":[-0.9332992435,0.9371616244],"value_targets":14.8542232513} +{"eps_id":989577397,"obs":[-0.074184835,-0.5524680018,0.0471033379,0.6094267964],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.085234195,-0.3580350578,0.0592918731,0.331944108],"action_prob":0.9393812418,"action_logp":-0.062533848,"action_dist_inputs":[-1.3669118881,1.3737059832],"value_targets":13.9941644669} +{"eps_id":989577397,"obs":[-0.085234195,-0.3580350578,0.0592918731,0.331944108],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0923948959,-0.5539486408,0.0659307539,0.642719686],"action_prob":0.1085418239,"action_logp":-2.2206196785,"action_dist_inputs":[-1.0508098602,1.054913044],"value_targets":13.125418663} +{"eps_id":989577397,"obs":[-0.0923948959,-0.5539486408,0.0659307539,0.642719686],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1034738719,-0.3598046005,0.0787851512,0.3715059459],"action_prob":0.9442953467,"action_logp":-0.0573162995,"action_dist_inputs":[-1.4116390944,1.4187364578],"value_targets":12.2478981018} +{"eps_id":989577397,"obs":[-0.1034738719,-0.3598046005,0.0787851512,0.3715059459],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1106699631,-0.555952251,0.0862152651,0.687953651],"action_prob":0.0878900439,"action_logp":-2.4316687584,"action_dist_inputs":[-1.1675889492,1.1720849276],"value_targets":11.3615131378} +{"eps_id":989577397,"obs":[-0.1106699631,-0.555952251,0.0862152651,0.687953651],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1217890084,-0.3621260524,0.0999743417,0.423610121],"action_prob":0.9483645558,"action_logp":-0.0530162863,"action_dist_inputs":[-1.4515098333,1.459020257],"value_targets":10.4661741257} +{"eps_id":989577397,"obs":[-0.1217890084,-0.3621260524,0.0999743417,0.423610121],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1290315241,-0.1685519665,0.1084465459,0.1640423685],"action_prob":0.927415967,"action_logp":-0.0753531158,"action_dist_inputs":[-1.2713029385,1.2763543129],"value_targets":9.5617923737} +{"eps_id":989577397,"obs":[-0.1290315241,-0.1685519665,0.1084465459,0.1640423685],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1324025691,0.0248639137,0.1117273942,-0.0925562903],"action_prob":0.8587825894,"action_logp":-0.1522395015,"action_dist_inputs":[-0.9016469717,0.9035683274],"value_targets":8.6482753754} +{"eps_id":989577397,"obs":[-0.1324025691,0.0248639137,0.1117273942,-0.0925562903],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1319052875,-0.1716673672,0.1098762676,0.2331821322],"action_prob":0.3865110278,"action_logp":-0.950594902,"action_dist_inputs":[-0.2318199128,0.2301819175],"value_targets":7.7255306244} +{"eps_id":989577397,"obs":[-0.1319052875,-0.1716673672,0.1098762676,0.2331821322],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1353386343,-0.3681737483,0.1145399064,0.5584028959],"action_prob":0.1080571264,"action_logp":-2.2250952721,"action_dist_inputs":[-1.0540140867,1.0567280054],"value_targets":6.7934651375} +{"eps_id":989577397,"obs":[-0.1353386343,-0.3681737483,0.1145399064,0.5584028959],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1427021176,-0.564701438,0.1257079691,0.8848639131],"action_prob":0.0579611324,"action_logp":-2.847982645,"action_dist_inputs":[-1.3909578323,1.3973158598],"value_targets":5.8519849777} +{"eps_id":989577397,"obs":[-0.1427021176,-0.564701438,0.1257079691,0.8848639131],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1539961398,-0.3714898527,0.1434052438,0.6341943145],"action_prob":0.9542283416,"action_logp":-0.0468522832,"action_dist_inputs":[-1.5140963793,1.5231422186],"value_targets":4.9009947777} +{"eps_id":989577397,"obs":[-0.1539961398,-0.3714898527,0.1434052438,0.6341943145],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1614259332,-0.1786286086,0.1560891271,0.3898887932],"action_prob":0.9465833902,"action_logp":-0.054896187,"action_dist_inputs":[-1.4338014126,1.4409371614],"value_targets":3.9403989315} +{"eps_id":989577397,"obs":[-0.1614259332,-0.1786286086,0.1560891271,0.3898887932],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1649985164,-0.3755815923,0.1638869047,0.7274307609],"action_prob":0.0694309697,"action_logp":-2.6674222946,"action_dist_inputs":[-1.295396924,1.3000663519],"value_targets":2.970099926} +{"eps_id":989577397,"obs":[-0.1649985164,-0.3755815923,0.1638869047,0.7274307609],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1725101471,-0.1830589175,0.1784355193,0.4904836416],"action_prob":0.9492054582,"action_logp":-0.0521300025,"action_dist_inputs":[-1.4599362612,1.4678992033],"value_targets":1.9900000095} +{"eps_id":989577397,"obs":[-0.1725101471,-0.1830589175,0.1784355193,0.4904836416],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":true,"new_obs":[-0.1761713177,0.0091565605,0.1882451922,0.2589203715],"action_prob":0.9385487437,"action_logp":-0.0634204745,"action_dist_inputs":[-1.360150218,1.3659405708],"value_targets":1.0} +{"eps_id":1559257844,"obs":[0.0102226203,0.0125670079,-0.046466358,-0.0373003259],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":0.0,"dones":false,"new_obs":[0.0104739601,0.208323434,-0.047212366,-0.3442742229],"action_prob":0.3530667126,"action_logp":-1.041098237,"action_dist_inputs":[0.3023582101,-0.3032279313],"value_targets":74.7639312744} +{"eps_id":1559257844,"obs":[0.0104739601,0.208323434,-0.047212366,-0.3442742229],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0146404291,0.0139037948,-0.0540978499,-0.0668449998],"action_prob":0.8966846466,"action_logp":-0.1090510115,"action_dist_inputs":[1.0781111717,-1.0828074217],"value_targets":74.5090255737} +{"eps_id":1559257844,"obs":[0.0146404291,0.0139037948,-0.0540978499,-0.0668449998],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0149185043,0.209757939,-0.0554347485,-0.3760934472],"action_prob":0.2874993384,"action_logp":-1.2465347052,"action_dist_inputs":[0.4531623721,-0.4543978274],"value_targets":74.2515411377} +{"eps_id":1559257844,"obs":[0.0149185043,0.209757939,-0.0554347485,-0.3760934472],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0191136636,0.0154653741,-0.0629566163,-0.101391755],"action_prob":0.9093406796,"action_logp":-0.0950354412,"action_dist_inputs":[1.1502836943,-1.1553275585],"value_targets":73.9914550781} +{"eps_id":1559257844,"obs":[0.0191136636,0.0154653741,-0.0629566163,-0.101391755],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0194229707,0.2114304453,-0.0649844557,-0.4132540226],"action_prob":0.2264564186,"action_logp":-1.4852027893,"action_dist_inputs":[0.6133713722,-0.6150580645],"value_targets":73.7287445068} +{"eps_id":1559257844,"obs":[0.0194229707,0.2114304453,-0.0649844557,-0.4132540226],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0236515794,0.0172869284,-0.0732495338,-0.1417454332],"action_prob":0.9200137258,"action_logp":-0.0833666772,"action_dist_inputs":[1.2185405493,-1.2239927053],"value_targets":73.4633712769} +{"eps_id":1559257844,"obs":[0.0236515794,0.0172869284,-0.0732495338,-0.1417454332],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.023997318,-0.1767136753,-0.0760844424,0.1269590408],"action_prob":0.825286746,"action_logp":-0.1920243949,"action_dist_inputs":[0.7751738429,-0.7774110436],"value_targets":73.1953277588} +{"eps_id":1559257844,"obs":[0.023997318,-0.1767136753,-0.0760844424,0.1269590408],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0204630438,0.0194111206,-0.0735452622,-0.1887243241],"action_prob":0.5035290122,"action_logp":-0.6861139536,"action_dist_inputs":[-0.0062757274,0.0078405682],"value_targets":72.9245758057} +{"eps_id":1559257844,"obs":[0.0204630438,0.0194111206,-0.0735452622,-0.1887243241],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0208512675,-0.1745857894,-0.0773197487,0.0798816085],"action_prob":0.8582937717,"action_logp":-0.152808845,"action_dist_inputs":[0.899172008,-0.9020181894],"value_targets":72.6510848999} +{"eps_id":1559257844,"obs":[0.0208512675,-0.1745857894,-0.0773197487,0.0798816085],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.017359551,-0.3685190976,-0.0757221133,0.3472030759],"action_prob":0.5953786373,"action_logp":-0.5185576677,"action_dist_inputs":[0.1936070472,-0.1926387846],"value_targets":72.3748321533} +{"eps_id":1559257844,"obs":[0.017359551,-0.3685190976,-0.0757221133,0.3472030759],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0099891694,-0.562486887,-0.0687780529,0.615079999],"action_prob":0.2185421735,"action_logp":-1.5207762718,"action_dist_inputs":[-0.6349614263,0.639220655],"value_targets":72.0957946777} +{"eps_id":1559257844,"obs":[0.0099891694,-0.562486887,-0.0687780529,0.615079999],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0012605689,-0.3664747775,-0.0564764552,0.3015521765],"action_prob":0.9140805602,"action_logp":-0.0898365453,"action_dist_inputs":[-1.1789727211,1.1855359077],"value_targets":71.8139266968} +{"eps_id":1559257844,"obs":[-0.0012605689,-0.3664747775,-0.0564764552,0.3015521765],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0085900649,-0.1705952734,-0.0504454114,-0.0083938884],"action_prob":0.7516930103,"action_logp":-0.2854273021,"action_dist_inputs":[-0.5519038439,0.5557581186],"value_targets":71.5292205811} +{"eps_id":1559257844,"obs":[-0.0085900649,-0.1705952734,-0.0504454114,-0.0083938884],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0120019699,0.0252124779,-0.0506132878,-0.3165565431],"action_prob":0.3025054038,"action_logp":-1.1956561804,"action_dist_inputs":[0.417598635,-0.4177969098],"value_targets":71.2416381836} +{"eps_id":1559257844,"obs":[-0.0120019699,0.0252124779,-0.0506132878,-0.3165565431],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0114977201,-0.1691533923,-0.0569444187,-0.0402550548],"action_prob":0.900654912,"action_logp":-0.1046331003,"action_dist_inputs":[1.1000682116,-1.1044543982],"value_targets":70.9511489868} +{"eps_id":1559257844,"obs":[-0.0114977201,-0.1691533923,-0.0569444187,-0.0402550548],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0148807885,-0.3634144962,-0.057749521,0.2339316607],"action_prob":0.7514404655,"action_logp":-0.2857632935,"action_dist_inputs":[0.552788496,-0.5535210371],"value_targets":70.6577301025} +{"eps_id":1559257844,"obs":[-0.0148807885,-0.3634144962,-0.057749521,0.2339316607],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0221490785,-0.5576658249,-0.0530708879,0.5078539848],"action_prob":0.3597835004,"action_logp":-1.0222527981,"action_dist_inputs":[-0.2865814567,0.2897225022],"value_targets":70.3613433838} +{"eps_id":1559257844,"obs":[-0.0221490785,-0.5576658249,-0.0530708879,0.5078539848],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0333023928,-0.3618378341,-0.0429138094,0.1989300698],"action_prob":0.8793626428,"action_logp":-0.1285578758,"action_dist_inputs":[-0.9903116822,0.9960968494],"value_targets":70.061958313} +{"eps_id":1559257844,"obs":[-0.0333023928,-0.3618378341,-0.0429138094,0.1989300698],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0405391492,-0.1661292017,-0.0389352068,-0.106975399],"action_prob":0.6066423655,"action_logp":-0.4998158514,"action_dist_inputs":[-0.2152439058,0.217976287],"value_targets":69.7595596313} +{"eps_id":1559257844,"obs":[-0.0405391492,-0.1661292017,-0.0389352068,-0.106975399],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0438617356,0.0295284521,-0.0410747156,-0.4116834998],"action_prob":0.1986594498,"action_logp":-1.6161632538,"action_dist_inputs":[0.6964840293,-0.698210001],"value_targets":69.4540939331} +{"eps_id":1559257844,"obs":[-0.0438617356,0.0295284521,-0.0410747156,-0.4116834998],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0432711653,0.2252078801,-0.0493083857,-0.7170276642],"action_prob":0.0807921812,"action_logp":-2.5158751011,"action_dist_inputs":[1.2130534649,-1.218578577],"value_targets":69.1455535889} +{"eps_id":1559257844,"obs":[-0.0432711653,0.2252078801,-0.0493083857,-0.7170276642],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0387670062,0.0308017731,-0.0636489391,-0.4402636886],"action_prob":0.9457862973,"action_logp":-0.0557386428,"action_dist_inputs":[1.4254008532,-1.4336811304],"value_targets":68.8338928223} +{"eps_id":1559257844,"obs":[-0.0387670062,0.0308017731,-0.0636489391,-0.4402636886],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0381509736,-0.1633643657,-0.0724542141,-0.1683043987],"action_prob":0.926574707,"action_logp":-0.0762605891,"action_dist_inputs":[1.2646377087,-1.270588398],"value_targets":68.5190811157} +{"eps_id":1559257844,"obs":[-0.0381509736,-0.1633643657,-0.0724542141,-0.1683043987],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0414182581,0.032715857,-0.075820297,-0.4829359651],"action_prob":0.133537218,"action_logp":-2.0133750439,"action_dist_inputs":[0.9335753322,-0.9364635348],"value_targets":68.2010955811} +{"eps_id":1559257844,"obs":[-0.0414182581,0.032715857,-0.075820297,-0.4829359651],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0407639407,-0.1612586826,-0.0854790211,-0.2150799185],"action_prob":0.9321774244,"action_logp":-0.0702320859,"action_dist_inputs":[1.3070864677,-1.313542366],"value_targets":67.8798904419} +{"eps_id":1559257844,"obs":[-0.0407639407,-0.1612586826,-0.0854790211,-0.2150799185],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0439891145,-0.3550612032,-0.0897806138,0.0494622253],"action_prob":0.8894617558,"action_logp":-0.1171387807,"action_dist_inputs":[1.0408152342,-1.044439435],"value_targets":67.5554504395} +{"eps_id":1559257844,"obs":[-0.0439891145,-0.3550612032,-0.0897806138,0.0494622253],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0510903411,-0.5487887263,-0.0887913704,0.3125230968],"action_prob":0.7426701784,"action_logp":-0.2975032628,"action_dist_inputs":[0.5300201774,-0.5298731327],"value_targets":67.227722168} +{"eps_id":1559257844,"obs":[-0.0510903411,-0.5487887263,-0.0887913704,0.3125230968],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0620661154,-0.7425409555,-0.0825409144,0.5759361386],"action_prob":0.3957784772,"action_logp":-0.9269006252,"action_dist_inputs":[-0.2095424831,0.2135437131],"value_targets":66.8966903687} +{"eps_id":1559257844,"obs":[-0.0620661154,-0.7425409555,-0.0825409144,0.5759361386],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0769169331,-0.5463648438,-0.0710221902,0.2584343851],"action_prob":0.8522065878,"action_logp":-0.1599262804,"action_dist_inputs":[-0.8728197217,0.8791937232],"value_targets":66.5623168945} +{"eps_id":1559257844,"obs":[-0.0769169331,-0.5463648438,-0.0710221902,0.2584343851],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0878442302,-0.7404047847,-0.0658534989,0.5278975368],"action_prob":0.4572960734,"action_logp":-0.7824242115,"action_dist_inputs":[-0.0839442685,0.087288551],"value_targets":66.2245635986} +{"eps_id":1559257844,"obs":[-0.0878442302,-0.7404047847,-0.0658534989,0.5278975368],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1026523262,-0.5444211364,-0.0552955493,0.2152123153],"action_prob":0.8315685391,"action_logp":-0.1844415665,"action_dist_inputs":[-0.7953733206,0.8014114499],"value_targets":65.883392334} +{"eps_id":1559257844,"obs":[-0.1026523262,-0.5444211364,-0.0552955493,0.2152123153],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1135407463,-0.348554045,-0.0509913042,-0.094387874],"action_prob":0.4965696633,"action_logp":-0.7000315189,"action_dist_inputs":[0.0082321744,-0.0054893699],"value_targets":65.5387802124} +{"eps_id":1559257844,"obs":[-0.1135407463,-0.348554045,-0.0509913042,-0.094387874],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1205118299,-0.5429094434,-0.0528790615,0.1817813963],"action_prob":0.8313530087,"action_logp":-0.1847007573,"action_dist_inputs":[0.7965645194,-0.7986824512],"value_targets":65.1906890869} +{"eps_id":1559257844,"obs":[-0.1205118299,-0.5429094434,-0.0528790615,0.1817813963],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1313700229,-0.3470723033,-0.0492434353,-0.1271029562],"action_prob":0.4417944551,"action_logp":-0.8169105649,"action_dist_inputs":[0.1180228963,-0.115859665],"value_targets":64.8390808105} +{"eps_id":1559257844,"obs":[-0.1313700229,-0.3470723033,-0.0492434353,-0.1271029562],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1383114606,-0.5414555073,-0.0517854914,0.1496466547],"action_prob":0.8490839601,"action_logp":-0.1635972261,"action_dist_inputs":[0.8623759151,-0.8650582433],"value_targets":64.4839172363} +{"eps_id":1559257844,"obs":[-0.1383114606,-0.5414555073,-0.0517854914,0.1496466547],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1491405666,-0.7357991338,-0.0487925597,0.4255531728],"action_prob":0.6100150347,"action_logp":-0.494271636,"action_dist_inputs":[0.2244675756,-0.2229079604],"value_targets":64.1251678467} +{"eps_id":1559257844,"obs":[-0.1491405666,-0.7357991338,-0.0487925597,0.4255531728],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1638565511,-0.9301971793,-0.0402814969,0.7024639249],"action_prob":0.2512072027,"action_logp":-1.3814771175,"action_dist_inputs":[-0.5435593724,0.548624754],"value_targets":63.7627983093} +{"eps_id":1559257844,"obs":[-0.1638565511,-0.9301971793,-0.0402814969,0.7024639249],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1824605018,-0.7345407605,-0.0262322184,0.3973778188],"action_prob":0.9010475874,"action_logp":-0.1041972041,"action_dist_inputs":[-1.1009175777,1.1080009937],"value_targets":63.3967666626} +{"eps_id":1559257844,"obs":[-0.1824605018,-0.7345407605,-0.0262322184,0.3973778188],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1971513182,-0.9292809367,-0.0182846617,0.6816761494],"action_prob":0.2514595389,"action_logp":-1.3804731369,"action_dist_inputs":[-0.5430573821,0.5477858782],"value_targets":63.0270347595} +{"eps_id":1559257844,"obs":[-0.1971513182,-0.9292809367,-0.0182846617,0.6816761494],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2157369405,-0.733909905,-0.0046511381,0.3832931817],"action_prob":0.9035817385,"action_logp":-0.1013887003,"action_dist_inputs":[-1.1153841019,1.1222869158],"value_targets":62.6535720825} +{"eps_id":1559257844,"obs":[-0.2157369405,-0.733909905,-0.0046511381,0.3832931817],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2304151356,-0.9289655089,0.0030147256,0.6745060086],"action_prob":0.2354698032,"action_logp":-1.446172595,"action_dist_inputs":[-0.5865975618,0.591081202],"value_targets":62.2763366699} +{"eps_id":1559257844,"obs":[-0.2304151356,-0.9289655089,0.0030147256,0.6745060086],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2489944398,-0.7338855863,0.0165048447,0.382773757],"action_prob":0.9098451734,"action_logp":-0.0944808125,"action_dist_inputs":[-1.1524797678,1.1592669487],"value_targets":61.8952865601} +{"eps_id":1559257844,"obs":[-0.2489944398,-0.7338855863,0.0165048447,0.382773757],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2636721432,-0.5390017629,0.0241603199,0.0953401402],"action_prob":0.7936323285,"action_logp":-0.2311349809,"action_dist_inputs":[-0.6713127494,0.6756480932],"value_targets":61.5103912354} +{"eps_id":1559257844,"obs":[-0.2636721432,-0.5390017629,0.0241603199,0.0953401402],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2744521797,-0.7344615459,0.0260671228,0.3955466449],"action_prob":0.5416160226,"action_logp":-0.6131979227,"action_dist_inputs":[0.0837481618,-0.0831021518],"value_targets":61.1216087341} +{"eps_id":1559257844,"obs":[-0.2744521797,-0.7344615459,0.0260671228,0.3955466449],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2891414165,-0.5397189856,0.0339780562,0.1111948788],"action_prob":0.8180938959,"action_logp":-0.2007781565,"action_dist_inputs":[-0.7495998144,0.7538864613],"value_targets":60.7288970947} +{"eps_id":1559257844,"obs":[-0.2891414165,-0.5397189856,0.0339780562,0.1111948788],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2999357879,-0.7353109121,0.0362019539,0.4144010842],"action_prob":0.4867501855,"action_logp":-0.7200042605,"action_dist_inputs":[-0.0261258427,0.0268858895],"value_targets":60.3322181702} +{"eps_id":1559257844,"obs":[-0.2999357879,-0.7353109121,0.0362019539,0.4144010842],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.3146420121,-0.540720284,0.044489976,0.1333474368],"action_prob":0.8436025381,"action_logp":-0.170073837,"action_dist_inputs":[-0.8404957652,0.8447850347],"value_targets":59.9315338135} +{"eps_id":1559257844,"obs":[-0.3146420121,-0.540720284,0.044489976,0.1333474368],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.3254564106,-0.346262902,0.0471569225,-0.1449742466],"action_prob":0.5815815926,"action_logp":-0.5420040488,"action_dist_inputs":[-0.1641723812,0.1650969833],"value_targets":59.526802063} +{"eps_id":1559257844,"obs":[-0.3254564106,-0.346262902,0.0471569225,-0.1449742466],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.3323816955,-0.5420273542,0.0442574397,0.1622049809],"action_prob":0.7727811933,"action_logp":-0.2577593029,"action_dist_inputs":[0.6104691625,-0.6136131883],"value_targets":59.117980957} +{"eps_id":1559257844,"obs":[-0.3323816955,-0.5420273542,0.0442574397,0.1622049809],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.3432222307,-0.7377540469,0.0475015379,0.4685149491],"action_prob":0.3640862703,"action_logp":-1.0103644133,"action_dist_inputs":[-0.2782795429,0.2793923914],"value_targets":58.7050323486} +{"eps_id":1559257844,"obs":[-0.3432222307,-0.7377540469,0.0475015379,0.4685149491],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.3579773009,-0.5433341861,0.0568718389,0.1911743581],"action_prob":0.8825029731,"action_logp":-0.1249931455,"action_dist_inputs":[-1.0059438944,1.0104050636],"value_targets":58.2879104614} +{"eps_id":1559257844,"obs":[-0.3579773009,-0.5433341861,0.0568718389,0.1911743581],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.3688440025,-0.3490700126,0.0606953241,-0.0830394924],"action_prob":0.7076456547,"action_logp":-0.3458117843,"action_dist_inputs":[-0.4413446784,0.4426322281],"value_targets":57.8665771484} +{"eps_id":1559257844,"obs":[-0.3688440025,-0.3490700126,0.0606953241,-0.0830394924],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.3758254051,-0.5450071096,0.0590345338,0.2281584591],"action_prob":0.6633994579,"action_logp":-0.4103779495,"action_dist_inputs":[0.3379996419,-0.3404808342],"value_targets":57.4409866333} +{"eps_id":1559257844,"obs":[-0.3758254051,-0.5450071096,0.0590345338,0.2281584591],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.3867255449,-0.3507763445,0.0635977015,-0.0453341305],"action_prob":0.7608048916,"action_logp":-0.2733783424,"action_dist_inputs":[-0.5777916908,0.5793056488],"value_targets":57.0110969543} +{"eps_id":1559257844,"obs":[-0.3867255449,-0.3507763445,0.0635977015,-0.0453341305],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.3937410712,-0.1566212773,0.062691018,-0.3172928393],"action_prob":0.4105557501,"action_logp":-0.8902435303,"action_dist_inputs":[0.179777205,-0.1818911731],"value_targets":56.5768661499} +{"eps_id":1559257844,"obs":[-0.3937410712,-0.1566212773,0.062691018,-0.3172928393],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.3968734741,-0.3525775075,0.0563451648,-0.0055167964],"action_prob":0.8548525572,"action_logp":-0.1568263024,"action_dist_inputs":[0.8837859035,-0.8893928528],"value_targets":56.1382484436} +{"eps_id":1559257844,"obs":[-0.3968734741,-0.3525775075,0.0563451648,-0.0055167964],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.4039250314,-0.5484603643,0.0562348291,0.3043979108],"action_prob":0.5278657079,"action_logp":-0.6389133334,"action_dist_inputs":[0.0549026057,-0.0566758923],"value_targets":55.6952018738} +{"eps_id":1559257844,"obs":[-0.4039250314,-0.5484603643,0.0562348291,0.3043979108],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.414894253,-0.354183048,0.0623227879,0.0299668983],"action_prob":0.8304408193,"action_logp":-0.185798645,"action_dist_inputs":[-0.7933831811,0.7953714132],"value_targets":55.2476768494} +{"eps_id":1559257844,"obs":[-0.414894253,-0.354183048,0.0623227879,0.0299668983],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.4219779074,-0.1600076407,0.0629221275,-0.2424197048],"action_prob":0.5547865033,"action_logp":-0.589171946,"action_dist_inputs":[-0.1107569858,0.1092723757],"value_targets":54.7956352234} +{"eps_id":1559257844,"obs":[-0.4219779074,-0.1600076407,0.0629221275,-0.2424197048],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.425178051,-0.3559693098,0.0580737293,0.0694280043],"action_prob":0.7912526131,"action_logp":-0.2341379821,"action_dist_inputs":[0.6637659669,-0.6687265038],"value_targets":54.3390235901} +{"eps_id":1559257844,"obs":[-0.425178051,-0.3559693098,0.0580737293,0.0694280043],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.4322974384,-0.1617259532,0.0594622903,-0.2043817639],"action_prob":0.6185056567,"action_logp":-0.4804489315,"action_dist_inputs":[-0.242198348,0.2410120219],"value_targets":53.8778038025} +{"eps_id":1559257844,"obs":[-0.4322974384,-0.1617259532,0.0594622903,-0.2043817639],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.4355319738,-0.3576456308,0.0553746559,0.1064499691],"action_prob":0.7509325147,"action_logp":-0.2864394784,"action_dist_inputs":[0.5494759083,-0.5541161299],"value_targets":53.4119224548} +{"eps_id":1559257844,"obs":[-0.4355319738,-0.3576456308,0.0553746559,0.1064499691],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.4426848888,-0.1633591354,0.0575036556,-0.16826123],"action_prob":0.674382329,"action_logp":-0.3939580917,"action_dist_inputs":[-0.3644967973,0.3635764122],"value_targets":52.9413375854} +{"eps_id":1559257844,"obs":[-0.4426848888,-0.1633591354,0.0575036556,-0.16826123],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.4459520578,-0.3592550159,0.0541384295,0.1419936717],"action_prob":0.7030014992,"action_logp":-0.3523962796,"action_dist_inputs":[0.4286453724,-0.4329864681],"value_targets":52.4659957886} +{"eps_id":1559257844,"obs":[-0.4459520578,-0.3592550159,0.0541384295,0.1419936717],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.4531371593,-0.1649485379,0.0569783039,-0.1331300884],"action_prob":0.722535491,"action_logp":-0.3249887526,"action_dist_inputs":[-0.4788706899,0.4782027602],"value_targets":51.9858551025} +{"eps_id":1559257844,"obs":[-0.4531371593,-0.1649485379,0.0569783039,-0.1331300884],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.4564361274,-0.3608384132,0.0543157011,0.1769704223],"action_prob":0.6468990445,"action_logp":-0.4355649948,"action_dist_inputs":[0.3006887138,-0.3047477603],"value_targets":51.5008621216} +{"eps_id":1559257844,"obs":[-0.4564361274,-0.3608384132,0.0543157011,0.1769704223],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.4636529088,-0.1665341407,0.0578551106,-0.0980951935],"action_prob":0.7636057734,"action_logp":-0.2697036266,"action_dist_inputs":[-0.5864806771,0.5860700607],"value_targets":51.0109710693} +{"eps_id":1559257844,"obs":[-0.4636529088,-0.1665341407,0.0578551106,-0.0980951935],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.4669835865,-0.3624354899,0.0558932088,0.2122651041],"action_prob":0.5826850533,"action_logp":-0.5401084423,"action_dist_inputs":[0.1650086492,-0.1687969118],"value_targets":50.5161323547} +{"eps_id":1559257844,"obs":[-0.4669835865,-0.3624354899,0.0558932088,0.2122651041],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.4742322862,-0.1681553721,0.0601385087,-0.0622759424],"action_prob":0.7983873487,"action_logp":-0.2251614034,"action_dist_inputs":[-0.6881880164,0.6880576611],"value_targets":50.0162963867} +{"eps_id":1559257844,"obs":[-0.4742322862,-0.1681553721,0.0601385087,-0.0622759424],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.4775953889,-0.3640857041,0.0588929914,0.2487583309],"action_prob":0.5115473866,"action_logp":-0.6703150272,"action_dist_inputs":[0.0213388372,-0.0248589441],"value_targets":49.5114097595} +{"eps_id":1559257844,"obs":[-0.4775953889,-0.3640857041,0.0588929914,0.2487583309],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.4848771095,-0.1698521078,0.0638681576,-0.0247824434],"action_prob":0.8276479244,"action_logp":-0.1891674399,"action_dist_inputs":[-0.7844283581,0.7846197486],"value_targets":49.0014266968} +{"eps_id":1559257844,"obs":[-0.4848771095,-0.1698521078,0.0638681576,-0.0247824434],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.488274157,-0.3658290803,0.0633725077,0.2873481214],"action_prob":0.4362418354,"action_logp":-0.8295584917,"action_dist_inputs":[-0.1298350692,0.1265933812],"value_targets":48.486289978} +{"eps_id":1559257844,"obs":[-0.488274157,-0.3658290803,0.0633725077,0.2873481214],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.4955907464,-0.1716654301,0.0691194683,0.0153063415],"action_prob":0.8520700336,"action_logp":-0.1600865722,"action_dist_inputs":[-0.8751781583,0.8757511973],"value_targets":47.9659461975} +{"eps_id":1559257844,"obs":[-0.4955907464,-0.1716654301,0.0691194683,0.0153063415],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.4990240335,-0.3677070737,0.0694255978,0.3289715648],"action_prob":0.3610729277,"action_logp":-1.0186753273,"action_dist_inputs":[-0.2868215144,0.2838886678],"value_targets":47.4403495789} +{"eps_id":1559257844,"obs":[-0.4990240335,-0.3677070737,0.0694255978,0.3289715648],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.5063781738,-0.563745141,0.0760050267,0.6427150369],"action_prob":0.1277467906,"action_logp":-2.057705164,"action_dist_inputs":[-0.9599992633,0.9610305429],"value_targets":46.9094467163} +{"eps_id":1559257844,"obs":[-0.5063781738,-0.563745141,0.0760050267,0.6427150369],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.5176531076,-0.3697602451,0.0888593271,0.3749017119],"action_prob":0.928458333,"action_logp":-0.0742297769,"action_dist_inputs":[-1.2791217566,1.2841234207],"value_targets":46.3731765747} +{"eps_id":1559257844,"obs":[-0.5176531076,-0.3697602451,0.0888593271,0.3749017119],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.5250483155,-0.1760054529,0.0963573605,0.1115064695],"action_prob":0.8917622566,"action_logp":-0.1145556867,"action_dist_inputs":[-1.0536136627,1.0552556515],"value_targets":45.8314933777} +{"eps_id":1559257844,"obs":[-0.5250483155,-0.1760054529,0.0963573605,0.1115064695],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.528568387,0.0176131688,0.0985874906,-0.1492889822],"action_prob":0.7835362554,"action_logp":-0.2439379543,"action_dist_inputs":[-0.6442061067,0.6421880722],"value_targets":45.2843360901} +{"eps_id":1559257844,"obs":[-0.528568387,0.0176131688,0.0985874906,-0.1492889822],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.5282161236,-0.1787721664,0.0956017151,0.1727964133],"action_prob":0.4975875914,"action_logp":-0.6979836822,"action_dist_inputs":[-0.00743081,0.0022188872],"value_targets":44.7316513062} +{"eps_id":1559257844,"obs":[-0.5282161236,-0.1787721664,0.0956017151,0.1727964133],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.5317915678,-0.375123173,0.0990576446,0.4940420985],"action_prob":0.1747435331,"action_logp":-1.7444359064,"action_dist_inputs":[-0.7768514752,0.7755233049],"value_targets":44.1733856201} +{"eps_id":1559257844,"obs":[-0.5317915678,-0.375123173,0.0990576446,0.4940420985],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.539294064,-0.5714923739,0.1089384854,0.8162273169],"action_prob":0.0859590992,"action_logp":-2.4538836479,"action_dist_inputs":[-1.1804490089,1.1835546494],"value_targets":43.6094818115} +{"eps_id":1559257844,"obs":[-0.539294064,-0.5714923739,0.1089384854,0.8162273169],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.5507239103,-0.3780171275,0.1252630353,0.5596993566],"action_prob":0.9383023381,"action_logp":-0.063683033,"action_dist_inputs":[-1.357555747,1.3642711639],"value_targets":43.0398788452} +{"eps_id":1559257844,"obs":[-0.5507239103,-0.3780171275,0.1252630353,0.5596993566],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.5582842231,-0.184855327,0.1364570111,0.3089579642],"action_prob":0.9226039052,"action_logp":-0.08055529,"action_dist_inputs":[-1.2371331453,1.2411296368],"value_targets":42.4645233154} +{"eps_id":1559257844,"obs":[-0.5582842231,-0.184855327,0.1364570111,0.3089579642],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.5619813204,0.0080852583,0.1426361799,0.0622313656],"action_prob":0.8884295821,"action_logp":-0.1182999015,"action_dist_inputs":[-1.0370804071,1.0377190113],"value_targets":41.8833580017} +{"eps_id":1559257844,"obs":[-0.5619813204,0.0080852583,0.1426361799,0.0622313656],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.561819613,0.2009046674,0.1438807994,-0.1822675467],"action_prob":0.8031656742,"action_logp":-0.2191942334,"action_dist_inputs":[-0.7045448422,0.7016539574],"value_targets":41.2963218689} +{"eps_id":1559257844,"obs":[-0.561819613,0.2009046674,0.1438807994,-0.1822675467],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.5578015447,0.3937061131,0.1402354538,-0.4263262451],"action_prob":0.5986379981,"action_logp":-0.5130982399,"action_dist_inputs":[-0.2029741853,0.1968190372],"value_targets":40.7033538818} +{"eps_id":1559257844,"obs":[-0.5578015447,0.3937061131,0.1402354538,-0.4263262451],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.5499274135,0.5865921974,0.13170892,-0.6717208624],"action_prob":0.3152961135,"action_logp":-1.1542429924,"action_dist_inputs":[0.3834297955,-0.3920443654],"value_targets":40.1044006348} +{"eps_id":1559257844,"obs":[-0.5499274135,0.5865921974,0.13170892,-0.6717208624],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.5381955504,0.3899091482,0.1182745099,-0.3406409323],"action_prob":0.8566714525,"action_logp":-0.1547008306,"action_dist_inputs":[0.8890672326,-0.8988473415],"value_targets":39.4993934631} +{"eps_id":1559257844,"obs":[-0.5381955504,0.3899091482,0.1182745099,-0.3406409323],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.5303974152,0.1933203787,0.1114616916,-0.0131262233],"action_prob":0.6011522412,"action_logp":-0.5089070797,"action_dist_inputs":[0.2010432631,-0.2092250735],"value_targets":38.8882751465} +{"eps_id":1559257844,"obs":[-0.5303974152,0.1933203787,0.1114616916,-0.0131262233],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.5265309811,0.3866819739,0.1111991704,-0.2686668634],"action_prob":0.7487359047,"action_logp":-0.2893689573,"action_dist_inputs":[-0.5480332375,0.5438488126],"value_targets":38.2709846497} +{"eps_id":1559257844,"obs":[-0.5265309811,0.3866819739,0.1111991704,-0.2686668634],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.518797338,0.1901632845,0.105825834,0.0569157898],"action_prob":0.5062096715,"action_logp":-0.6808043122,"action_dist_inputs":[0.0086105857,-0.0162294582],"value_targets":37.6474609375} +{"eps_id":1559257844,"obs":[-0.518797338,0.1901632845,0.105825834,0.0569157898],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.5149940848,0.3836212456,0.1069641486,-0.2005929947],"action_prob":0.7948507071,"action_logp":-0.229600966,"action_dist_inputs":[-0.6787974238,0.6756191254],"value_targets":37.0176353455} +{"eps_id":1559257844,"obs":[-0.5149940848,0.3836212456,0.1069641486,-0.2005929947],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.5073216558,0.1871449798,0.1029522866,0.1238245592],"action_prob":0.4158573747,"action_logp":-0.8774129152,"action_dist_inputs":[-0.1733358204,0.1664669812],"value_targets":36.3814506531} +{"eps_id":1559257844,"obs":[-0.5073216558,0.1871449798,0.1029522866,0.1238245592],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.5035787821,-0.0092896987,0.1054287776,0.4471309483],"action_prob":0.1718025357,"action_logp":-1.7614095211,"action_dist_inputs":[-0.7875403166,0.7853655219],"value_targets":35.7388381958} +{"eps_id":1559257844,"obs":[-0.5035787821,-0.0092896987,0.1054287776,0.4471309483],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.5037645698,0.1841951758,0.1143713966,0.1894537956],"action_prob":0.9032884836,"action_logp":-0.1017133072,"action_dist_inputs":[-1.1161004305,1.1182086468],"value_targets":35.0897369385} +{"eps_id":1559257844,"obs":[-0.5037645698,0.1841951758,0.1143713966,0.1894537956],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.5000806451,0.3775109053,0.1181604713,-0.0650733933],"action_prob":0.8557625413,"action_logp":-0.1557623446,"action_dist_inputs":[-0.8908217549,0.8897100687],"value_targets":34.4340782166} +{"eps_id":1559257844,"obs":[-0.5000806451,0.3775109053,0.1181604713,-0.0650733933],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.4925304353,0.5707579851,0.1168590039,-0.318264395],"action_prob":0.7415862083,"action_logp":-0.2989638746,"action_dist_inputs":[-0.5294683576,0.5247609019],"value_targets":33.7717971802} +{"eps_id":1559257844,"obs":[-0.4925304353,0.5707579851,0.1168590039,-0.318264395],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.4811152816,0.7640385032,0.1104937196,-0.5719293952],"action_prob":0.5101208687,"action_logp":-0.6731075644,"action_dist_inputs":[-0.0244268719,0.0160622448],"value_targets":33.1028251648} +{"eps_id":1559257844,"obs":[-0.4811152816,0.7640385032,0.1104937196,-0.5719293952],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.4658344984,0.5675547719,0.0990551263,-0.2465820312],"action_prob":0.7400563955,"action_logp":-0.3010289073,"action_dist_inputs":[0.5179732442,-0.5282883644],"value_targets":32.4270935059} +{"eps_id":1559257844,"obs":[-0.4658344984,0.5675547719,0.0990551263,-0.2465820312],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.4544834197,0.3711678982,0.0941234902,0.0756288692],"action_prob":0.4193090796,"action_logp":-0.8691469431,"action_dist_inputs":[-0.1665569991,0.1590532511],"value_targets":31.7445411682} +{"eps_id":1559257844,"obs":[-0.4544834197,0.3711678982,0.0941234902,0.0756288692],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.4470600486,0.5648232698,0.0956360623,-0.1859369874],"action_prob":0.8145847917,"action_logp":-0.2050767541,"action_dist_inputs":[-0.7412962317,0.7387846708],"value_targets":31.0550918579} +{"eps_id":1559257844,"obs":[-0.4470600486,0.5648232698,0.0956360623,-0.1859369874],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.4357635975,0.3684723675,0.0919173285,0.1353176981],"action_prob":0.3509712517,"action_logp":-1.0470509529,"action_dist_inputs":[-0.3106438816,0.3041287661],"value_targets":30.3586788177} +{"eps_id":1559257844,"obs":[-0.4357635975,0.3684723675,0.0919173285,0.1353176981],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.4283941388,0.5621657372,0.0946236774,-0.1270102113],"action_prob":0.8381854892,"action_logp":-0.1765158772,"action_dist_inputs":[-0.8231651187,0.8216235042],"value_targets":29.6552295685} +{"eps_id":1559257844,"obs":[-0.4283941388,0.5621657372,0.0946236774,-0.1270102113],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.4171508253,0.7558137178,0.0920834765,-0.3884049058],"action_prob":0.7070099115,"action_logp":-0.3467106223,"action_dist_inputs":[-0.4431605339,0.4377452433],"value_targets":28.9446773529} +{"eps_id":1559257844,"obs":[-0.4171508253,0.7558137178,0.0920834765,-0.3884049058],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.4020345509,0.5595135689,0.0843153745,-0.0681679174],"action_prob":0.5386478305,"action_logp":-0.6186933517,"action_dist_inputs":[0.0728793815,-0.0820208713],"value_targets":28.2269458771} +{"eps_id":1559257844,"obs":[-0.4020345509,0.5595135689,0.0843153745,-0.0681679174],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.3908442855,0.75333184,0.0829520226,-0.3331031799],"action_prob":0.747162044,"action_logp":-0.2914731801,"action_dist_inputs":[-0.5439452529,0.5395879745],"value_targets":27.5019664764} +{"eps_id":1559257844,"obs":[-0.3908442855,0.75333184,0.0829520226,-0.3331031799],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.375777632,0.9471812248,0.0762899593,-0.5985174179],"action_prob":0.5227472782,"action_logp":-0.6486571431,"action_dist_inputs":[-0.0497392677,0.0413125642],"value_targets":26.7696628571} +{"eps_id":1559257844,"obs":[-0.375777632,0.9471812248,0.0762899593,-0.5985174179],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.3568340242,1.1411575079,0.0643196106,-0.8662279844],"action_prob":0.2678516805,"action_logp":-1.3173218966,"action_dist_inputs":[0.4975041747,-0.508045435],"value_targets":26.0299625397} +{"eps_id":1559257844,"obs":[-0.3568340242,1.1411575079,0.0643196106,-0.8662279844],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.3340108693,1.3353478909,0.0469950475,-1.1380143166],"action_prob":0.1271299422,"action_logp":-2.0625455379,"action_dist_inputs":[0.9578863978,-0.9686906934],"value_targets":25.2827911377} +{"eps_id":1559257844,"obs":[-0.3340108693,1.3353478909,0.0469950475,-1.1380143166],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.3073039055,1.1396439075,0.0242347606,-0.8309710622],"action_prob":0.9285924435,"action_logp":-0.0740853325,"action_dist_inputs":[1.2772297859,-1.2880367041],"value_targets":24.5280704498} +{"eps_id":1559257844,"obs":[-0.3073039055,1.1396439075,0.0242347606,-0.8309710622],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2845110297,0.944199264,0.0076153399,-0.5307658315],"action_prob":0.8814697266,"action_logp":-0.1261646301,"action_dist_inputs":[0.997913897,-1.0085080862],"value_targets":23.7657279968} +{"eps_id":1559257844,"obs":[-0.2845110297,0.944199264,0.0076153399,-0.5307658315],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2656270564,0.7489709854,-0.0029999763,-0.2356930524],"action_prob":0.748621881,"action_logp":-0.2895212173,"action_dist_inputs":[0.5406506062,-0.5506253839],"value_targets":22.9956855774} +{"eps_id":1559257844,"obs":[-0.2656270564,0.7489709854,-0.0029999763,-0.2356930524],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2506476343,0.5538920164,-0.0077138375,0.0560420752],"action_prob":0.4674364924,"action_logp":-0.7604917884,"action_dist_inputs":[-0.0688201934,0.0616185628],"value_targets":22.2178649902} +{"eps_id":1559257844,"obs":[-0.2506476343,0.5538920164,-0.0077138375,0.0560420752],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2395697832,0.7491237521,-0.006592996,-0.239064604],"action_prob":0.779163897,"action_logp":-0.249533847,"action_dist_inputs":[-0.6315920353,0.6292088628],"value_targets":21.4321861267} +{"eps_id":1559257844,"obs":[-0.2395697832,0.7491237521,-0.006592996,-0.239064604],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2245873064,0.9443392754,-0.0113742882,-0.5338198543],"action_prob":0.5231173635,"action_logp":-0.6479493976,"action_dist_inputs":[-0.049810119,0.0427255295],"value_targets":20.6385707855} +{"eps_id":1559257844,"obs":[-0.2245873064,0.9443392754,-0.0113742882,-0.5338198543],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2057005167,0.7493790984,-0.0220506843,-0.2447424829],"action_prob":0.7781695127,"action_logp":-0.2508109212,"action_dist_inputs":[0.6227154136,-0.6323155761],"value_targets":19.8369407654} +{"eps_id":1559257844,"obs":[-0.2057005167,0.7493790984,-0.0220506843,-0.2447424829],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1907129437,0.5545789003,-0.0269455351,0.040904358],"action_prob":0.5095507503,"action_logp":-0.6742258072,"action_dist_inputs":[0.0155677665,-0.022640001],"value_targets":19.0272140503} +{"eps_id":1559257844,"obs":[-0.1907129437,0.5545789003,-0.0269455351,0.040904358],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1796213686,0.7500767112,-0.0261274464,-0.2601568699],"action_prob":0.7606548071,"action_logp":-0.273575604,"action_dist_inputs":[-0.5793362856,0.576936841],"value_targets":18.2093067169} +{"eps_id":1559257844,"obs":[-0.1796213686,0.7500767112,-0.0261274464,-0.2601568699],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1646198332,0.5553372502,-0.0313305855,0.0241719503],"action_prob":0.5400820374,"action_logp":-0.6160342097,"action_dist_inputs":[0.0767884701,-0.0838845447],"value_targets":17.3831367493} +{"eps_id":1559257844,"obs":[-0.1646198332,0.5553372502,-0.0313305855,0.0241719503],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1535130888,0.3606783152,-0.0308471452,0.3068073988],"action_prob":0.2538689971,"action_logp":-1.3709368706,"action_dist_inputs":[-0.5403013229,0.5377815366],"value_targets":16.5486240387} +{"eps_id":1559257844,"obs":[-0.1535130888,0.3606783152,-0.0308471452,0.3068073988],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1462995112,0.5562259555,-0.0247109979,0.0045578978],"action_prob":0.8726476431,"action_logp":-0.1362234503,"action_dist_inputs":[-0.9615164995,0.96305722],"value_targets":15.7056808472} +{"eps_id":1559257844,"obs":[-0.1462995112,0.5562259555,-0.0247109979,0.0045578978],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1351750046,0.7516934276,-0.0246198401,-0.2958180904],"action_prob":0.7371344566,"action_logp":-0.304984957,"action_dist_inputs":[-0.5168605447,0.5142670274],"value_targets":14.8542232513} +{"eps_id":1559257844,"obs":[-0.1351750046,0.7516934276,-0.0246198401,-0.2958180904],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1201411337,0.9471575022,-0.0305362009,-0.5961629152],"action_prob":0.4050963819,"action_logp":-0.9036302567,"action_dist_inputs":[0.1885846704,-0.1956897229],"value_targets":13.9941644669} +{"eps_id":1559257844,"obs":[-0.1201411337,0.9471575022,-0.0305362009,-0.5961629152],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1011979803,0.7524759173,-0.0424594581,-0.3132528961],"action_prob":0.8550870419,"action_logp":-0.1565520465,"action_dist_inputs":[0.8830489516,-0.892020762],"value_targets":13.125418663} +{"eps_id":1559257844,"obs":[-0.1011979803,0.7524759173,-0.0424594581,-0.3132528961],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0861484632,0.5579837561,-0.0487245172,-0.034256909],"action_prob":0.6550130844,"action_logp":-0.4231000543,"action_dist_inputs":[0.3170130849,-0.3241356909],"value_targets":12.2478981018} +{"eps_id":1559257844,"obs":[-0.0861484632,0.5579837561,-0.0487245172,-0.034256909],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0749887899,0.7537693381,-0.0494096577,-0.3419061005],"action_prob":0.6717347503,"action_logp":-0.3978917599,"action_dist_inputs":[-0.3595619798,0.3564794958],"value_targets":11.3615131378} +{"eps_id":1559257844,"obs":[-0.0749887899,0.7537693381,-0.0494096577,-0.3419061005],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0599134006,0.9495581985,-0.0562477782,-0.6497514248],"action_prob":0.289396733,"action_logp":-1.2399567366,"action_dist_inputs":[0.4455785453,-0.452737242],"value_targets":10.4661741257} +{"eps_id":1559257844,"obs":[-0.0599134006,0.9495581985,-0.0562477782,-0.6497514248],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0409222357,1.1454166174,-0.0692428052,-0.959602654],"action_prob":0.0986777991,"action_logp":-2.315895319,"action_dist_inputs":[1.1016379595,-1.1103650331],"value_targets":9.5617923737} +{"eps_id":1559257844,"obs":[-0.0409222357,1.1454166174,-0.0692428052,-0.959602654],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.018013902,0.9512904286,-0.0884348601,-0.6894522309],"action_prob":0.9477491975,"action_logp":-0.0536653437,"action_dist_inputs":[1.4440970421,-1.4539378881],"value_targets":8.6482753754} +{"eps_id":1559257844,"obs":[-0.018013902,0.9512904286,-0.0884348601,-0.6894522309],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0010119054,0.757499814,-0.1022239029,-0.4258682132],"action_prob":0.9264470339,"action_logp":-0.0763984323,"action_dist_inputs":[1.2623194456,-1.2710316181],"value_targets":7.7255306244} +{"eps_id":1559257844,"obs":[0.0010119054,0.757499814,-0.1022239029,-0.4258682132],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0161619019,0.953909874,-0.1107412651,-0.7489468455],"action_prob":0.1359838545,"action_logp":-1.9952191114,"action_dist_inputs":[0.9209483862,-0.9281069636],"value_targets":6.7934651375} +{"eps_id":1559257844,"obs":[0.0161619019,0.953909874,-0.1107412651,-0.7489468455],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0352400988,1.1503710747,-0.1257202029,-1.0743248463],"action_prob":0.0577585958,"action_logp":-2.8514831066,"action_dist_inputs":[1.3915505409,-1.4004387856],"value_targets":5.8519849777} +{"eps_id":1559257844,"obs":[0.0352400988,1.1503710747,-0.1257202029,-1.0743248463],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0582475178,0.9571142793,-0.1472067088,-0.8235920668],"action_prob":0.9593116641,"action_logp":-0.0415392444,"action_dist_inputs":[1.5749708414,-1.5853054523],"value_targets":4.9009947777} +{"eps_id":1559257844,"obs":[0.0582475178,0.9571142793,-0.1472067088,-0.8235920668],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0773898065,1.1539101601,-0.1636785418,-1.1587159634],"action_prob":0.0466832817,"action_logp":-3.0643692017,"action_dist_inputs":[1.5036433935,-1.512917757],"value_targets":3.9403989315} +{"eps_id":1559257844,"obs":[0.0773898065,1.1539101601,-0.1636785418,-1.1587159634],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.1004680097,0.9612547755,-0.1868528575,-0.9215021133],"action_prob":0.9621682167,"action_logp":-0.0385659561,"action_dist_inputs":[1.6126295328,-1.6234101057],"value_targets":2.970099926} +{"eps_id":1559257844,"obs":[0.1004680097,0.9612547755,-0.1868528575,-0.9215021133],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.1196931005,0.7690819502,-0.2052829117,-0.6928760409],"action_prob":0.9590550661,"action_logp":-0.0418068059,"action_dist_inputs":[1.5719259977,-1.5817935467],"value_targets":1.9900000095} +{"eps_id":1559257844,"obs":[0.1196931005,0.7690819502,-0.2052829117,-0.6928760409],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":true,"new_obs":[0.1350747496,0.5773096681,-0.2191404253,-0.4711876214],"action_prob":0.953283608,"action_logp":-0.0478428081,"action_dist_inputs":[1.503621459,-1.51219666],"value_targets":1.0} +{"eps_id":428038203,"obs":[0.0167293884,0.031687621,-0.0048278347,0.0053721904],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":0.0,"dones":false,"new_obs":[0.0173631404,0.2268784791,-0.0047203912,-0.2888300717],"action_prob":0.5495176911,"action_logp":-0.5987143517,"action_dist_inputs":[-0.0994105414,0.0993115753],"value_targets":86.6020355225} +{"eps_id":428038203,"obs":[0.0173631404,0.2268784791,-0.0047203912,-0.2888300717],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0219007097,0.0318241566,-0.0104969926,0.002360374],"action_prob":0.8352686763,"action_logp":-0.1800018698,"action_dist_inputs":[0.8098112941,-0.8136260509],"value_targets":86.4666976929} +{"eps_id":428038203,"obs":[0.0219007097,0.0318241566,-0.0104969926,0.002360374],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0225371942,0.2270950675,-0.0104497848,-0.2936159074],"action_prob":0.5292859077,"action_logp":-0.6362265348,"action_dist_inputs":[-0.0587019213,0.0585758947],"value_targets":86.3300018311} +{"eps_id":428038203,"obs":[0.0225371942,0.2270950675,-0.0104497848,-0.2936159074],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0270790942,0.0321236476,-0.0163221024,-0.0042469315],"action_prob":0.8441901207,"action_logp":-0.1693775803,"action_dist_inputs":[0.8429422379,-0.8467988372],"value_targets":86.1919174194} +{"eps_id":428038203,"obs":[0.0270790942,0.0321236476,-0.0163221024,-0.0042469315],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0277215671,0.2274758369,-0.0164070409,-0.3020346761],"action_prob":0.5007499456,"action_logp":-0.6916484237,"action_dist_inputs":[-0.0015967912,0.0014030091],"value_targets":86.052444458} +{"eps_id":428038203,"obs":[0.0277215671,0.2274758369,-0.0164070409,-0.3020346761],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0322710834,0.032591518,-0.0224477351,-0.0145709887],"action_prob":0.8550007939,"action_logp":-0.1566528529,"action_dist_inputs":[0.8852187395,-0.8891552687],"value_targets":85.9115600586} +{"eps_id":428038203,"obs":[0.0322710834,0.032591518,-0.0224477351,-0.0145709887],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0329229161,0.2280280888,-0.0227391552,-0.3142510653],"action_prob":0.4635228813,"action_logp":-0.76889956,"action_dist_inputs":[0.0729318559,-0.0732364506],"value_targets":85.7692489624} +{"eps_id":428038203,"obs":[0.0329229161,0.2280280888,-0.0227391552,-0.3142510653],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0374834761,0.033237312,-0.0290241763,-0.028825175],"action_prob":0.8670765758,"action_logp":-0.1426279545,"action_dist_inputs":[0.9356479645,-0.9397065639],"value_targets":85.62550354} +{"eps_id":428038203,"obs":[0.0374834761,0.033237312,-0.0290241763,-0.028825175],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0381482244,0.2287632078,-0.0296006799,-0.3305223882],"action_prob":0.4177643955,"action_logp":-0.8728376627,"action_dist_inputs":[0.1657474339,-0.1662101746],"value_targets":85.4803085327} +{"eps_id":428038203,"obs":[0.0381482244,0.2287632078,-0.0296006799,-0.3305223882],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0427234881,0.0340748467,-0.0362111256,-0.0473191217],"action_prob":0.8797263503,"action_logp":-0.1281443536,"action_dist_inputs":[0.9928069115,-0.9970347285],"value_targets":85.3336486816} +{"eps_id":428038203,"obs":[0.0427234881,0.0340748467,-0.0362111256,-0.0473191217],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0434049852,0.2296968251,-0.0371575095,-0.3512034714],"action_prob":0.3647578061,"action_logp":-1.0085216761,"action_dist_inputs":[0.277048707,-0.2777240574],"value_targets":85.1855010986} +{"eps_id":428038203,"obs":[0.0434049852,0.2296968251,-0.0371575095,-0.3512034714],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0479989201,0.0351224542,-0.0441815779,-0.0704651698],"action_prob":0.8922565579,"action_logp":-0.1140015721,"action_dist_inputs":[1.0547754765,-1.0592250824],"value_targets":85.0358581543} +{"eps_id":428038203,"obs":[0.0479989201,0.0351224542,-0.0441815779,-0.0704651698],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0487013683,-0.1593391448,-0.0455908813,0.2079574317],"action_prob":0.6925981641,"action_logp":-0.3673053384,"action_dist_inputs":[0.4056705832,-0.4066235423],"value_targets":84.8847045898} +{"eps_id":428038203,"obs":[0.0487013683,-0.1593391448,-0.0455908813,0.2079574317],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0455145873,-0.3537805378,-0.0414317325,0.4859173596],"action_prob":0.2684159577,"action_logp":-1.3152173758,"action_dist_inputs":[-0.499960959,0.502713263],"value_targets":84.7320251465} +{"eps_id":428038203,"obs":[0.0455145873,-0.3537805378,-0.0414317325,0.4859173596],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0384389758,-0.1580991745,-0.0317133851,0.1804697812],"action_prob":0.907194972,"action_logp":-0.0973978713,"action_dist_inputs":[-1.1370960474,1.142760396],"value_targets":84.5778045654} +{"eps_id":428038203,"obs":[0.0384389758,-0.1580991745,-0.0317133851,0.1804697812],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0352769904,0.037461888,-0.0281039905,-0.122046493],"action_prob":0.7134701014,"action_logp":-0.3376147747,"action_dist_inputs":[-0.4549075365,0.45739007],"value_targets":84.4220275879} +{"eps_id":428038203,"obs":[0.0352769904,0.037461888,-0.0281039905,-0.122046493],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0360262282,0.2329749763,-0.0305449218,-0.4234617949],"action_prob":0.2563526332,"action_logp":-1.3612012863,"action_dist_inputs":[0.5317364931,-0.5332763791],"value_targets":84.2646713257} +{"eps_id":428038203,"obs":[0.0360262282,0.2329749763,-0.0305449218,-0.4234617949],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0406857282,0.0382987522,-0.0390141569,-0.1405624896],"action_prob":0.9119913578,"action_logp":-0.0921247378,"action_dist_inputs":[1.1665196419,-1.1716755629],"value_targets":84.1057281494} +{"eps_id":428038203,"obs":[0.0406857282,0.0382987522,-0.0390141569,-0.1405624896],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0414517038,-0.1562433392,-0.0418254063,0.1395613402],"action_prob":0.7819439173,"action_logp":-0.2459722906,"action_dist_inputs":[0.6376148462,-0.6394158006],"value_targets":83.9451828003} +{"eps_id":428038203,"obs":[0.0414517038,-0.1562433392,-0.0418254063,0.1395613402],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0383268371,0.0394519195,-0.0390341803,-0.1660179198],"action_prob":0.6192266941,"action_logp":-0.4792838693,"action_dist_inputs":[-0.2421175689,0.2441496104],"value_targets":83.7830123901} +{"eps_id":428038203,"obs":[0.0383268371,0.0394519195,-0.0390341803,-0.1660179198],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.039115876,-0.1550901681,-0.042354539,0.1140997931],"action_prob":0.8082747459,"action_logp":-0.2128532827,"action_dist_inputs":[0.7183563709,-0.720482409],"value_targets":83.6192016602} +{"eps_id":428038203,"obs":[0.039115876,-0.1550901681,-0.042354539,0.1140997931],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0360140726,-0.3495804369,-0.0400725417,0.3931250572],"action_prob":0.4336212575,"action_logp":-0.835583806,"action_dist_inputs":[-0.1326760203,0.1344156265],"value_targets":83.453742981} +{"eps_id":428038203,"obs":[0.0360140726,-0.3495804369,-0.0400725417,0.3931250572],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0290224645,-0.1539134234,-0.0322100408,0.0880819857],"action_prob":0.8652347326,"action_logp":-0.1447544694,"action_dist_inputs":[-0.9272907376,0.9321751595],"value_targets":83.286605835} +{"eps_id":428038203,"obs":[0.0290224645,-0.1539134234,-0.0322100408,0.0880819857],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0259441957,-0.3485592306,-0.0304484013,0.3704311252],"action_prob":0.4640192688,"action_logp":-0.7678291798,"action_dist_inputs":[-0.0713678524,0.0728042275],"value_targets":83.1177825928} +{"eps_id":428038203,"obs":[0.0259441957,-0.3485592306,-0.0304484013,0.3704311252],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0189730115,-0.1530182064,-0.0230397787,0.068305105],"action_prob":0.8577739596,"action_logp":-0.1534146816,"action_dist_inputs":[-0.8961185813,0.9008044004],"value_targets":82.9472579956} +{"eps_id":428038203,"obs":[0.0189730115,-0.1530182064,-0.0230397787,0.068305105],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0159126464,-0.3478024006,-0.0216736775,0.3536307514],"action_prob":0.4837197363,"action_logp":-0.7262495756,"action_dist_inputs":[-0.0319768824,0.0331671052],"value_targets":82.7750091553} +{"eps_id":428038203,"obs":[0.0159126464,-0.3478024006,-0.0216736775,0.3536307514],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.008956599,-0.1523790658,-0.0146010621,0.0541930422],"action_prob":0.8532576561,"action_logp":-0.1586937308,"action_dist_inputs":[-0.8779292107,0.8824537992],"value_targets":82.601020813} +{"eps_id":428038203,"obs":[0.008956599,-0.1523790658,-0.0146010621,0.0541930422],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0059090178,0.0429491661,-0.0135172009,-0.2430606335],"action_prob":0.5069037676,"action_logp":-0.6794340611,"action_dist_inputs":[-0.0133083062,0.0143087273],"value_targets":82.4252700806} +{"eps_id":428038203,"obs":[0.0059090178,0.0429491661,-0.0135172009,-0.2430606335],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0067680008,-0.1519771218,-0.0183784142,0.0453281514],"action_prob":0.8471598029,"action_logp":-0.1658658981,"action_dist_inputs":[0.8547025919,-0.857794106],"value_targets":82.2477493286} +{"eps_id":428038203,"obs":[0.0067680008,-0.1519771218,-0.0183784142,0.0453281514],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0037284584,-0.3468307853,-0.0174718499,0.3321563303],"action_prob":0.5204008222,"action_logp":-0.6531559825,"action_dist_inputs":[0.0412486307,-0.0403998978],"value_targets":82.0684280396} +{"eps_id":428038203,"obs":[0.0037284584,-0.3468307853,-0.0174718499,0.3321563303],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0032081574,-0.1514645517,-0.0108287241,0.0340152532],"action_prob":0.840765059,"action_logp":-0.1734430343,"action_dist_inputs":[-0.8298268914,0.8341044784],"value_targets":81.8873062134} +{"eps_id":428038203,"obs":[-0.0032081574,-0.1514645517,-0.0108287241,0.0340152532],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0062374482,-0.3464295566,-0.0101484191,0.3232620358],"action_prob":0.525854528,"action_logp":-0.6427306533,"action_dist_inputs":[0.0520965941,-0.0514137968],"value_targets":81.7043457031} +{"eps_id":428038203,"obs":[-0.0062374482,-0.3464295566,-0.0101484191,0.3232620358],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0131660393,-0.1511645764,-0.0036831785,0.0273960605],"action_prob":0.8406094313,"action_logp":-0.1736281365,"action_dist_inputs":[-0.8293017745,0.8334676623],"value_targets":81.5195465088} +{"eps_id":428038203,"obs":[-0.0131660393,-0.1511645764,-0.0036831785,0.0273960605],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0161893312,-0.3462335169,-0.0031352572,0.3189146221],"action_prob":0.52228719,"action_logp":-0.6495376825,"action_dist_inputs":[0.0448881164,-0.0443197377],"value_targets":81.3328704834} +{"eps_id":428038203,"obs":[-0.0161893312,-0.3462335169,-0.0031352572,0.3189146221],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0231140014,-0.1510670483,0.0032430354,0.0252446104],"action_prob":0.8438029289,"action_logp":-0.1698362827,"action_dist_inputs":[-0.8413559794,0.8454448581],"value_targets":81.144317627} +{"eps_id":428038203,"obs":[-0.0231140014,-0.1510670483,0.0032430354,0.0252446104],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0261353422,-0.3462353647,0.0037479275,0.3189489841],"action_prob":0.5096859336,"action_logp":-0.6739605665,"action_dist_inputs":[0.0196263622,-0.0191222392],"value_targets":80.9538574219} +{"eps_id":428038203,"obs":[-0.0261353422,-0.3462353647,0.0037479275,0.3189489841],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0330600515,-0.1511669904,0.0101269074,0.0274503697],"action_prob":0.8500725627,"action_logp":-0.1624335498,"action_dist_inputs":[-0.8655610681,0.8696095943],"value_targets":80.76146698} +{"eps_id":428038203,"obs":[-0.0330600515,-0.1511669904,0.0101269074,0.0274503697],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0360833891,0.0438082814,0.0106759146,-0.2620202899],"action_prob":0.5122156739,"action_logp":-0.6690094471,"action_dist_inputs":[-0.0241918415,0.0246807374],"value_targets":80.5671386719} +{"eps_id":428038203,"obs":[-0.0360833891,0.0438082814,0.0106759146,-0.2620202899],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0352072231,-0.1514644176,0.0054355091,0.0340107419],"action_prob":0.8413841128,"action_logp":-0.1727069914,"action_dist_inputs":[0.8325517774,-0.8360108733],"value_targets":80.3708496094} +{"eps_id":428038203,"obs":[-0.0352072231,-0.1514644176,0.0054355091,0.0340107419],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0382365137,-0.3466638923,0.0061157239,0.3284036517],"action_prob":0.4843858778,"action_logp":-0.7248734236,"action_dist_inputs":[-0.0309842657,0.0314925313],"value_targets":80.1725769043} +{"eps_id":428038203,"obs":[-0.0382365137,-0.3466638923,0.0061157239,0.3284036517],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0451697893,-0.1516295373,0.0126837976,0.0376556106],"action_prob":0.8589049578,"action_logp":-0.1520969868,"action_dist_inputs":[-0.9010911584,0.9051336646],"value_targets":79.9722976685} +{"eps_id":428038203,"obs":[-0.0451697893,-0.1516295373,0.0126837976,0.0376556106],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0482023805,0.0433082432,0.0134369098,-0.2509986162],"action_prob":0.5414395928,"action_logp":-0.6135237217,"action_dist_inputs":[-0.0828147456,0.083324872],"value_targets":79.7699966431} +{"eps_id":428038203,"obs":[-0.0482023805,0.0433082432,0.0134369098,-0.2509986162],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.047336217,0.2382357717,0.0084169367,-0.5394131541],"action_prob":0.1695770472,"action_logp":-1.7744479179,"action_dist_inputs":[0.7926046848,-0.7960230708],"value_targets":79.5656509399} +{"eps_id":428038203,"obs":[-0.047336217,0.2382357717,0.0084169367,-0.5394131541],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0425714999,0.0429965109,-0.0023713263,-0.2440901101],"action_prob":0.9269122481,"action_logp":-0.0758963525,"action_dist_inputs":[1.2668585777,-1.2733407021],"value_targets":79.3592453003} +{"eps_id":428038203,"obs":[-0.0425714999,0.0429965109,-0.0023713263,-0.2440901101],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0417115726,-0.1520914882,-0.0072531286,0.0478438996],"action_prob":0.8390029073,"action_logp":-0.175541088,"action_dist_inputs":[0.8237000108,-0.8271277547],"value_targets":79.1507568359} +{"eps_id":428038203,"obs":[-0.0417115726,-0.1520914882,-0.0072531286,0.0478438996],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0447534025,0.0431337096,-0.0062962505,-0.2471185923],"action_prob":0.5165293813,"action_logp":-0.6606231332,"action_dist_inputs":[-0.0328158401,0.0333257467],"value_targets":78.9401550293} +{"eps_id":428038203,"obs":[-0.0447534025,0.0431337096,-0.0062962505,-0.2471185923],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0438907258,-0.1518977582,-0.0112386225,0.043571718],"action_prob":0.8444030285,"action_logp":-0.1691253632,"action_dist_inputs":[0.8439231515,-0.847437501],"value_targets":78.727432251} +{"eps_id":428038203,"obs":[-0.0438907258,-0.1518977582,-0.0112386225,0.043571718],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0469286814,-0.3468567729,-0.0103671877,0.332687676],"action_prob":0.501588583,"action_logp":-0.6899750233,"action_dist_inputs":[0.0033829329,-0.0029716073],"value_targets":78.5125579834} +{"eps_id":428038203,"obs":[-0.0469286814,-0.3468567729,-0.0103671877,0.332687676],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0538658164,-0.1515887976,-0.0037134346,0.0367535651],"action_prob":0.8487865329,"action_logp":-0.1639475822,"action_dist_inputs":[-0.8606080413,0.8645070791],"value_targets":78.2955093384} +{"eps_id":428038203,"obs":[-0.0538658164,-0.1515887976,-0.0037134346,0.0367535651],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0568975918,0.0435862094,-0.0029783633,-0.2570986748],"action_prob":0.5025637746,"action_logp":-0.6880326867,"action_dist_inputs":[-0.0049786726,0.0052765836],"value_targets":78.0762710571} +{"eps_id":428038203,"obs":[-0.0568975918,0.0435862094,-0.0029783633,-0.2570986748],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0560258701,-0.1514930874,-0.0081203366,0.0346433446],"action_prob":0.8485993147,"action_logp":-0.1641681343,"action_dist_inputs":[0.8599789739,-0.8636786938],"value_targets":77.8548202515} +{"eps_id":428038203,"obs":[-0.0560258701,-0.1514930874,-0.0081203366,0.0346433446],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0590557307,0.0437443592,-0.0074274698,-0.2605905235],"action_prob":0.4881110489,"action_logp":-0.7172123194,"action_dist_inputs":[0.0238926541,-0.0236719809],"value_targets":77.6311340332} +{"eps_id":428038203,"obs":[-0.0590557307,0.0437443592,-0.0074274698,-0.2605905235],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0581808425,-0.151270777,-0.0126392804,0.0297404546],"action_prob":0.8542125225,"action_logp":-0.1575752497,"action_dist_inputs":[0.8821177483,-0.8859124184],"value_targets":77.4051818848} +{"eps_id":428038203,"obs":[-0.0581808425,-0.151270777,-0.0126392804,0.0297404546],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0612062588,0.0440301225,-0.0120444717,-0.2669033408],"action_prob":0.4675469995,"action_logp":-0.7602553964,"action_dist_inputs":[0.0650521442,-0.0649425983],"value_targets":77.1769561768} +{"eps_id":428038203,"obs":[-0.0612062588,0.0440301225,-0.0120444717,-0.2669033408],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0603256561,-0.1509178728,-0.0173825379,0.0219564494],"action_prob":0.8612937331,"action_logp":-0.1493197083,"action_dist_inputs":[0.9110773206,-0.9149992466],"value_targets":76.9464187622} +{"eps_id":428038203,"obs":[-0.0603256561,-0.1509178728,-0.0173825379,0.0219564494],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0633440167,0.0444489866,-0.01694341,-0.2761597931],"action_prob":0.4407059252,"action_logp":-0.8193774819,"action_dist_inputs":[0.1191294193,-0.1191682145],"value_targets":76.7135543823} +{"eps_id":428038203,"obs":[-0.0633440167,0.0444489866,-0.01694341,-0.2761597931],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0624550357,0.2398085296,-0.0224666055,-0.5741381645],"action_prob":0.1304387599,"action_logp":-2.0368514061,"action_dist_inputs":[0.9465005398,-0.9505842328],"value_targets":76.4783401489} +{"eps_id":428038203,"obs":[-0.0624550357,0.2398085296,-0.0224666055,-0.5741381645],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0576588623,0.045008637,-0.0339493677,-0.2886166871],"action_prob":0.936352849,"action_logp":-0.0657629222,"action_dist_inputs":[1.3407611847,-1.3478757143],"value_targets":76.2407455444} +{"eps_id":428038203,"obs":[-0.0576588623,0.045008637,-0.0339493677,-0.2886166871],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0567586906,-0.1496131569,-0.0397217013,-0.0068314103],"action_prob":0.8845328689,"action_logp":-0.1226956174,"action_dist_inputs":[1.0158753395,-1.0201981068],"value_targets":76.0007553101} +{"eps_id":428038203,"obs":[-0.0567586906,-0.1496131569,-0.0397217013,-0.0068314103],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0597509556,-0.3441435993,-0.03985833,0.2730589807],"action_prob":0.662103653,"action_logp":-0.4123331904,"action_dist_inputs":[0.3360647559,-0.3366182148],"value_targets":75.7583389282} +{"eps_id":428038203,"obs":[-0.0597509556,-0.3441435993,-0.03985833,0.2730589807],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.066633828,-0.1484762579,-0.0343971513,-0.0319241807],"action_prob":0.7529622912,"action_logp":-0.2837401628,"action_dist_inputs":[-0.555680573,0.5587932467],"value_targets":75.5134735107} +{"eps_id":428038203,"obs":[-0.066633828,-0.1484762579,-0.0343971513,-0.0319241807],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0696033537,0.0471216515,-0.0350356326,-0.3352582455],"action_prob":0.3063541949,"action_logp":-1.18301332,"action_dist_inputs":[0.4081511796,-0.409068346],"value_targets":75.26612854} +{"eps_id":428038203,"obs":[-0.0696033537,0.0471216515,-0.0350356326,-0.3352582455],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0686609149,-0.1474846154,-0.0417407975,-0.0538263619],"action_prob":0.9015282989,"action_logp":-0.1036638618,"action_dist_inputs":[1.1047000885,-1.1096221209],"value_targets":75.0162963867} +{"eps_id":428038203,"obs":[-0.0686609149,-0.1474846154,-0.0417407975,-0.0538263619],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0716106072,-0.341983974,-0.0428173281,0.2254003286],"action_prob":0.7378347516,"action_logp":-0.3040353656,"action_dist_inputs":[0.5167245865,-0.5180203915],"value_targets":74.7639312744} +{"eps_id":428038203,"obs":[-0.0716106072,-0.341983974,-0.0428173281,0.2254003286],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0784502923,-0.5364686847,-0.0383093208,0.5042755604],"action_prob":0.3257759809,"action_logp":-1.1215453148,"action_dist_inputs":[-0.3623927236,0.3649597466],"value_targets":74.5090255737} +{"eps_id":428038203,"obs":[-0.0784502923,-0.5364686847,-0.0383093208,0.5042755604],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0891796649,-0.3408283293,-0.0282238089,0.1997701675],"action_prob":0.8909375072,"action_logp":-0.1154809743,"action_dist_inputs":[-1.0474901199,1.052863121],"value_targets":74.2515411377} +{"eps_id":428038203,"obs":[-0.0891796649,-0.3408283293,-0.0282238089,0.1997701675],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0959962308,-0.5355355144,-0.0242284052,0.4834178686],"action_prob":0.3409899175,"action_logp":-1.0759023428,"action_dist_inputs":[-0.3283134401,0.3305725157],"value_targets":73.9914550781} +{"eps_id":428038203,"obs":[-0.0959962308,-0.5355355144,-0.0242284052,0.4834178686],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1067069396,-0.3400801122,-0.0145600485,0.1831984222],"action_prob":0.889085114,"action_logp":-0.1175622866,"action_dist_inputs":[-1.0381215811,1.0433088541],"value_targets":73.7287445068} +{"eps_id":428038203,"obs":[-0.1067069396,-0.3400801122,-0.0145600485,0.1831984222],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1135085449,-0.5349907279,-0.0108960802,0.4712528586],"action_prob":0.3419375122,"action_logp":-1.0731272697,"action_dist_inputs":[-0.3263214827,0.328350395],"value_targets":73.4633712769} +{"eps_id":428038203,"obs":[-0.1135085449,-0.5349907279,-0.0108960802,0.4712528586],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1242083609,-0.7299571037,-0.0014710226,0.7604816556],"action_prob":0.1093751639,"action_logp":-2.2129714489,"action_dist_inputs":[-1.0460416079,1.0510976315],"value_targets":73.1953277588} +{"eps_id":428038203,"obs":[-0.1242083609,-0.7299571037,-0.0014710226,0.7604816556],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1388075054,-0.5348148942,0.0137386108,0.4673362076],"action_prob":0.9418253899,"action_logp":-0.0599353835,"action_dist_inputs":[-1.3884578943,1.3959130049],"value_targets":72.9245758057} +{"eps_id":428038203,"obs":[-0.1388075054,-0.5348148942,0.0137386108,0.4673362076],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1495037973,-0.339889735,0.0230853353,0.1790150702],"action_prob":0.9004080892,"action_logp":-0.1049071699,"action_dist_inputs":[-1.098380208,1.103387475],"value_targets":72.6510848999} +{"eps_id":428038203,"obs":[-0.1495037973,-0.339889735,0.0230853353,0.1790150702],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1563015878,-0.1451056153,0.0266656373,-0.1062967703],"action_prob":0.7239108682,"action_logp":-0.3230870366,"action_dist_inputs":[-0.4810492992,0.4828950763],"value_targets":72.3748321533} +{"eps_id":428038203,"obs":[-0.1563015878,-0.1451056153,0.0266656373,-0.1062967703],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1592037082,-0.3405993581,0.0245397016,0.1946783513],"action_prob":0.6967615485,"action_logp":-0.3613120019,"action_dist_inputs":[0.414925158,-0.4169986844],"value_targets":72.0957946777} +{"eps_id":428038203,"obs":[-0.1592037082,-0.3405993581,0.0245397016,0.1946783513],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1660156846,-0.1458368599,0.028433267,-0.0901634768],"action_prob":0.7493667006,"action_logp":-0.2885268331,"action_dist_inputs":[-0.5466590524,0.5485783815],"value_targets":71.8139266968} +{"eps_id":428038203,"obs":[-0.1660156846,-0.1458368599,0.028433267,-0.0901634768],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.168932423,0.0488662422,0.0266299993,-0.3737418354],"action_prob":0.3346143365,"action_logp":-1.0947766304,"action_dist_inputs":[0.3427254558,-0.3446627259],"value_targets":71.5292205811} +{"eps_id":428038203,"obs":[-0.168932423,0.0488662422,0.0266299993,-0.3737418354],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1679551005,-0.1466236711,0.0191551615,-0.0727826357],"action_prob":0.8903064728,"action_logp":-0.1161895543,"action_dist_inputs":[1.0441787243,-1.0496968031],"value_targets":71.2416381836} +{"eps_id":428038203,"obs":[-0.1679551005,-0.1466236711,0.0191551615,-0.0727826357],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1708875746,0.0482184999,0.0176995099,-0.3593610227],"action_prob":0.3476627171,"action_logp":-1.0565224886,"action_dist_inputs":[0.3137566447,-0.315572232],"value_targets":70.9511489868} +{"eps_id":428038203,"obs":[-0.1708875746,0.0482184999,0.0176995099,-0.3593610227],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1699232012,-0.1471505314,0.0105122887,-0.0611499846],"action_prob":0.888740778,"action_logp":-0.1179497018,"action_dist_inputs":[1.0362403393,-1.0417019129],"value_targets":70.6577301025} +{"eps_id":428038203,"obs":[-0.1699232012,-0.1471505314,0.0105122887,-0.0611499846],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1728662252,-0.3424216211,0.0092892889,0.2348310202],"action_prob":0.6481106877,"action_logp":-0.4336937964,"action_dist_inputs":[0.3044930995,-0.3062517643],"value_targets":70.3613433838} +{"eps_id":428038203,"obs":[-0.1728662252,-0.3424216211,0.0092892889,0.2348310202],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1797146499,-0.1474336088,0.0139859095,-0.0549073629],"action_prob":0.7794694304,"action_logp":-0.2491417974,"action_dist_inputs":[-0.6302474737,0.632329464],"value_targets":70.061958313} +{"eps_id":428038203,"obs":[-0.1797146499,-0.1474336088,0.0139859095,-0.0549073629],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1826633215,-0.3427532613,0.0128877619,0.2421552092],"action_prob":0.6281321645,"action_logp":-0.4650047123,"action_dist_inputs":[0.2612386346,-0.2629734576],"value_targets":69.7595596313} +{"eps_id":428038203,"obs":[-0.1826633215,-0.3427532613,0.0128877619,0.2421552092],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1895183921,-0.1478177607,0.0177308656,-0.0464348793],"action_prob":0.7924432158,"action_logp":-0.2326344401,"action_dist_inputs":[-0.6688136458,0.6709022522],"value_targets":69.4540939331} +{"eps_id":428038203,"obs":[-0.1895183921,-0.1478177607,0.0177308656,-0.0464348793],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1924747378,0.0470455103,0.0168021694,-0.3334712386],"action_prob":0.3974761665,"action_logp":-0.9226202965,"action_dist_inputs":[0.2071534693,-0.2088387907],"value_targets":69.1455535889} +{"eps_id":428038203,"obs":[-0.1924747378,0.0470455103,0.0168021694,-0.3334712386],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1915338337,0.2419243306,0.010132744,-0.6208086014],"action_prob":0.1230130643,"action_logp":-2.0954647064,"action_dist_inputs":[0.9794207811,-0.9847807288],"value_targets":68.8338928223} +{"eps_id":428038203,"obs":[-0.1915338337,0.2419243306,0.010132744,-0.6208086014],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1866953522,0.0466623567,-0.0022834286,-0.3249517083],"action_prob":0.9391005635,"action_logp":-0.0628326908,"action_dist_inputs":[1.363861084,-1.3718372583],"value_targets":68.5190811157} +{"eps_id":428038203,"obs":[-0.1866953522,0.0466623567,-0.0022834286,-0.3249517083],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1857620925,-0.1484270096,-0.0087824622,-0.0329897441],"action_prob":0.8836607933,"action_logp":-0.1236819848,"action_dist_inputs":[1.0110914707,-1.0164721012],"value_targets":68.2010955811} +{"eps_id":428038203,"obs":[-0.1857620925,-0.1484270096,-0.0087824622,-0.0329897441],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1887306422,0.0468197763,-0.0094422577,-0.3284306526],"action_prob":0.36730057,"action_logp":-1.0015747547,"action_dist_inputs":[0.2710603178,-0.2727546394],"value_targets":67.8798904419} +{"eps_id":428038203,"obs":[-0.1887306422,0.0468197763,-0.0094422577,-0.3284306526],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1877942383,-0.1481664926,-0.0160108693,-0.0387402624],"action_prob":0.8887654543,"action_logp":-0.117921941,"action_dist_inputs":[1.0363548994,-1.0418367386],"value_targets":67.5554504395} +{"eps_id":428038203,"obs":[-0.1877942383,-0.1481664926,-0.0160108693,-0.0387402624],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1907575727,-0.3430552185,-0.0167856757,0.2488483787],"action_prob":0.6573132277,"action_logp":-0.4195945859,"action_dist_inputs":[0.3247548938,-0.3265890181],"value_targets":67.227722168} +{"eps_id":428038203,"obs":[-0.1907575727,-0.3430552185,-0.0167856757,0.2488483787],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1976186782,-0.1476976275,-0.0118087074,-0.0490814857],"action_prob":0.761890471,"action_logp":-0.2719525099,"action_dist_inputs":[-0.5805865526,0.5824853778],"value_targets":66.8966903687} +{"eps_id":428038203,"obs":[-0.1976186782,-0.1476976275,-0.0118087074,-0.0490814857],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2005726248,0.0475916378,-0.0127903372,-0.3454666436],"action_prob":0.3339610398,"action_logp":-1.0967309475,"action_dist_inputs":[0.344161272,-0.3461625576],"value_targets":66.5623168945} +{"eps_id":428038203,"obs":[-0.2005726248,0.0475916378,-0.0127903372,-0.3454666436],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1996207982,-0.1473460495,-0.0196996704,-0.0568442792],"action_prob":0.8967162371,"action_logp":-0.1090158373,"action_dist_inputs":[1.0777561665,-1.0835032463],"value_targets":66.2245635986} +{"eps_id":428038203,"obs":[-0.1996207982,-0.1473460495,-0.0196996704,-0.0568442792],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2025677264,-0.3421801031,-0.0208365563,0.2295587361],"action_prob":0.6933472753,"action_logp":-0.3662243187,"action_dist_inputs":[0.4068213701,-0.4089936316],"value_targets":65.883392334} +{"eps_id":428038203,"obs":[-0.2025677264,-0.3421801031,-0.0208365563,0.2295587361],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2094113231,-0.1467666775,-0.0162453819,-0.0696232617],"action_prob":0.7303832173,"action_logp":-0.3141858876,"action_dist_inputs":[-0.497484237,0.4990836084],"value_targets":65.5387802124} +{"eps_id":428038203,"obs":[-0.2094113231,-0.1467666775,-0.0162453819,-0.0696232617],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2123466581,-0.3416520059,-0.017637847,0.2178902179],"action_prob":0.7063269615,"action_logp":-0.347677052,"action_dist_inputs":[0.4376192391,-0.439991951],"value_targets":65.1906890869} +{"eps_id":428038203,"obs":[-0.2123466581,-0.3416520059,-0.017637847,0.2178902179],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2191796899,-0.1462824047,-0.0132800424,-0.0803038403],"action_prob":0.7190667391,"action_logp":-0.3298011124,"action_dist_inputs":[-0.4692046344,0.4706324339],"value_targets":64.8390808105} +{"eps_id":428038203,"obs":[-0.2191796899,-0.1462824047,-0.0132800424,-0.0803038403],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2221053392,0.0490273684,-0.0148861185,-0.3771468997],"action_prob":0.2836158574,"action_logp":-1.2601345778,"action_dist_inputs":[0.4620231092,-0.4645727575],"value_targets":64.4839172363} +{"eps_id":428038203,"obs":[-0.2221053392,0.0490273684,-0.0148861185,-0.3771468997],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2211247981,-0.1458800286,-0.0224290565,-0.0891945437],"action_prob":0.9075021744,"action_logp":-0.0970593169,"action_dist_inputs":[1.1386617422,-1.1448489428],"value_targets":64.1251678467} +{"eps_id":428038203,"obs":[-0.2211247981,-0.1458800286,-0.0224290565,-0.0891945437],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2240424007,0.0495561101,-0.0242129471,-0.3888686001],"action_prob":0.2567694783,"action_logp":-1.3595765829,"action_dist_inputs":[0.5300408602,-0.5327867866],"value_targets":63.7627983093} +{"eps_id":428038203,"obs":[-0.2240424007,0.0495561101,-0.0242129471,-0.3888686001],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2230512798,-0.1452139467,-0.0319903195,-0.1039170772],"action_prob":0.9136142731,"action_logp":-0.0903468207,"action_dist_inputs":[1.1761071682,-1.1824787855],"value_targets":63.3967666626} +{"eps_id":428038203,"obs":[-0.2230512798,-0.1452139467,-0.0319903195,-0.1039170772],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2259555459,-0.3398631811,-0.0340686627,0.1785038561],"action_prob":0.7748739123,"action_logp":-0.2550549805,"action_dist_inputs":[0.6165090799,-0.619530499],"value_targets":63.0270347595} +{"eps_id":428038203,"obs":[-0.2259555459,-0.3398631811,-0.0340686627,0.1785038561],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2327528149,-0.1442706883,-0.0304985847,-0.1247289032],"action_prob":0.6234474778,"action_logp":-0.4724907577,"action_dist_inputs":[-0.2516816258,0.25252527],"value_targets":62.6535720825} +{"eps_id":428038203,"obs":[-0.2327528149,-0.1442706883,-0.0304985847,-0.1247289032],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.235638231,0.0512746237,-0.0329931639,-0.4268756509],"action_prob":0.2049684376,"action_logp":-1.5848993063,"action_dist_inputs":[0.6760971546,-0.6794286966],"value_targets":62.2763366699} +{"eps_id":428038203,"obs":[-0.235638231,0.0512746237,-0.0329931639,-0.4268756509],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2346127331,0.2468479723,-0.0415306762,-0.7297742367],"action_prob":0.0761246309,"action_logp":-2.5753834248,"action_dist_inputs":[1.244687438,-1.2515178919],"value_targets":61.8952865601} +{"eps_id":428038203,"obs":[-0.2346127331,0.2468479723,-0.0415306762,-0.7297742367],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2296757847,0.4425185919,-0.0561261587,-1.0352336168],"action_prob":0.0488121472,"action_logp":-3.0197761059,"action_dist_inputs":[1.4802470207,-1.4894852638],"value_targets":61.5103912354} +{"eps_id":428038203,"obs":[-0.2296757847,0.4425185919,-0.0561261587,-1.0352336168],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2208254039,0.2481859773,-0.0768308342,-0.7606860995],"action_prob":0.9592861533,"action_logp":-0.0415658876,"action_dist_inputs":[1.5742844343,-1.585334897],"value_targets":61.1216087341} +{"eps_id":428038203,"obs":[-0.2208254039,0.2481859773,-0.0768308342,-0.7606860995],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.215861693,0.0542018488,-0.0920445547,-0.4931343794],"action_prob":0.9536172748,"action_logp":-0.0474928468,"action_dist_inputs":[1.5068660975,-1.5164686441],"value_targets":60.7288970947} +{"eps_id":428038203,"obs":[-0.215861693,0.0542018488,-0.0920445547,-0.4931343794],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2147776484,-0.1395095587,-0.1019072458,-0.230820179],"action_prob":0.9404137135,"action_logp":-0.0614353642,"action_dist_inputs":[1.3756064177,-1.383287549],"value_targets":60.3322181702} +{"eps_id":428038203,"obs":[-0.2147776484,-0.1395095587,-0.1019072458,-0.230820179],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2175678462,-0.3330388367,-0.1065236479,0.0280587934],"action_prob":0.9030961394,"action_logp":-0.1019262746,"action_dist_inputs":[1.1135005951,-1.11860919],"value_targets":59.9315338135} +{"eps_id":428038203,"obs":[-0.2175678462,-0.3330388367,-0.1065236479,0.0280587934],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2242286205,-0.5264846683,-0.1059624702,0.2853240371],"action_prob":0.7738062739,"action_logp":-0.2564337254,"action_dist_inputs":[0.6141568422,-0.6157731414],"value_targets":59.526802063} +{"eps_id":428038203,"obs":[-0.2242286205,-0.5264846683,-0.1059624702,0.2853240371],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2347583175,-0.719948411,-0.1002559885,0.5427972674],"action_prob":0.4446046352,"action_logp":-0.8105698228,"action_dist_inputs":[-0.1101694107,0.1123253182],"value_targets":59.117980957} +{"eps_id":428038203,"obs":[-0.2347583175,-0.719948411,-0.1002559885,0.5427972674],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2491572797,-0.5235709548,-0.0894000456,0.2202843279],"action_prob":0.8262002468,"action_logp":-0.1909180731,"action_dist_inputs":[-0.7770327926,0.7819008231],"value_targets":58.7050323486} +{"eps_id":428038203,"obs":[-0.2491572797,-0.5235709548,-0.0894000456,0.2202843279],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2596286833,-0.3272923827,-0.0849943608,-0.0992063507],"action_prob":0.4700146317,"action_logp":-0.7549914718,"action_dist_inputs":[0.0607108027,-0.0593747459],"value_targets":58.2879104614} +{"eps_id":428038203,"obs":[-0.2596286833,-0.3272923827,-0.0849943608,-0.0992063507],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2661745548,-0.5210998654,-0.0869784877,0.1654971838],"action_prob":0.8533454537,"action_logp":-0.1585907936,"action_dist_inputs":[0.8788327575,-0.8822522759],"value_targets":57.8665771484} +{"eps_id":428038203,"obs":[-0.2661745548,-0.5210998654,-0.0869784877,0.1654971838],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2765965462,-0.7148760557,-0.0836685374,0.4295231998],"action_prob":0.6207626462,"action_logp":-0.4768064618,"action_dist_inputs":[0.246640116,-0.2461463511],"value_targets":57.4409866333} +{"eps_id":428038203,"obs":[-0.2765965462,-0.7148760557,-0.0836685374,0.4295231998],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2908940613,-0.5186750293,-0.0750780776,0.1116813868],"action_prob":0.7268871069,"action_logp":-0.3189840913,"action_dist_inputs":[-0.4874953032,0.4913907051],"value_targets":57.0110969543} +{"eps_id":428038203,"obs":[-0.2908940613,-0.5186750293,-0.0750780776,0.1116813868],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.3012675643,-0.7126453519,-0.0728444457,0.3797650337],"action_prob":0.6810269356,"action_logp":-0.3841533959,"action_dist_inputs":[0.3790849149,-0.3794103265],"value_targets":56.5768661499} +{"eps_id":428038203,"obs":[-0.3012675643,-0.7126453519,-0.0728444457,0.3797650337],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.3155204654,-0.9066613913,-0.065249145,0.6486198902],"action_prob":0.3248826563,"action_logp":-1.1242911816,"action_dist_inputs":[-0.3640324771,0.3673898578],"value_targets":56.1382484436} +{"eps_id":428038203,"obs":[-0.3155204654,-0.9066613913,-0.065249145,0.6486198902],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.3336536884,-0.710694015,-0.0522767492,0.336124748],"action_prob":0.871515274,"action_logp":-0.1375219077,"action_dist_inputs":[-0.9543248415,0.960098207],"value_targets":55.6952018738} +{"eps_id":428038203,"obs":[-0.3336536884,-0.710694015,-0.0522767492,0.336124748],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.3478675783,-0.5148686171,-0.0455542542,0.0274255183],"action_prob":0.6468803287,"action_logp":-0.4355939925,"action_dist_inputs":[-0.3012580574,0.3040962517],"value_targets":55.2476768494} +{"eps_id":428038203,"obs":[-0.3478675783,-0.5148686171,-0.0455542542,0.0274255183],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.3581649661,-0.3191239834,-0.0450057462,-0.2792751491],"action_prob":0.2591033578,"action_logp":-1.3505282402,"action_dist_inputs":[0.524525702,-0.5261083245],"value_targets":54.7956352234} +{"eps_id":428038203,"obs":[-0.3581649661,-0.3191239834,-0.0450057462,-0.2792751491],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.3645474315,-0.5135759711,-0.050591249,-0.0011197145],"action_prob":0.9019329548,"action_logp":-0.1032150686,"action_dist_inputs":[1.10663414,-1.112254858],"value_targets":54.3390235901} +{"eps_id":428038203,"obs":[-0.3645474315,-0.5135759711,-0.050591249,-0.0011197145],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.3748189509,-0.7079372406,-0.0506136417,0.2751816809],"action_prob":0.7750592828,"action_logp":-0.2548157275,"action_dist_inputs":[0.6174885631,-0.6196143627],"value_targets":53.8778038025} +{"eps_id":428038203,"obs":[-0.3748189509,-0.7079372406,-0.0506136417,0.2751816809],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.3889777064,-0.9023019075,-0.0451100096,0.5514812469],"action_prob":0.4474902451,"action_logp":-0.8041005731,"action_dist_inputs":[-0.1044644937,0.1063519642],"value_targets":53.4119224548} +{"eps_id":428038203,"obs":[-0.3889777064,-0.9023019075,-0.0451100096,0.5514812469],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.4070237279,-1.0967621803,-0.0340803824,0.8296171427],"action_prob":0.1730408221,"action_logp":-1.7542277575,"action_dist_inputs":[-0.7796996236,0.7845281959],"value_targets":52.9413375854} +{"eps_id":428038203,"obs":[-0.4070237279,-1.0967621803,-0.0340803824,0.8296171427],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.4289589822,-1.2914021015,-0.01748804,1.1113897562],"action_prob":0.0831086859,"action_logp":-2.4876060486,"action_dist_inputs":[-1.1969691515,1.2038706541],"value_targets":52.4659957886} +{"eps_id":428038203,"obs":[-0.4289589822,-1.2914021015,-0.01748804,1.1113897562],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.4547870159,-1.0960549116,0.0047397558,0.8132724762],"action_prob":0.9442022443,"action_logp":-0.0574149042,"action_dist_inputs":[-1.4099905491,1.4186153412],"value_targets":51.9858551025} +{"eps_id":428038203,"obs":[-0.4547870159,-1.0960549116,0.0047397558,0.8132724762],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.4767081141,-1.2912414074,0.0210052058,1.1074424982],"action_prob":0.0772679076,"action_logp":-2.5604765415,"action_dist_inputs":[-1.2366273403,1.243432641],"value_targets":51.5008621216} +{"eps_id":428038203,"obs":[-0.4767081141,-1.2912414074,0.0210052058,1.1074424982],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.502532959,-1.0964018106,0.0431540571,0.8214226365],"action_prob":0.9465717673,"action_logp":-0.0549084842,"action_dist_inputs":[-1.432901144,1.4416060448],"value_targets":51.0109710693} +{"eps_id":428038203,"obs":[-0.502532959,-1.0964018106,0.0431540571,0.8214226365],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.5244609714,-0.9018960595,0.0595825091,0.5426189899],"action_prob":0.9304864407,"action_logp":-0.0720477626,"action_dist_inputs":[-1.293648243,1.3005377054],"value_targets":50.5161323547} +{"eps_id":428038203,"obs":[-0.5244609714,-0.9018960595,0.0595825091,0.5426189899],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.5424988866,-0.7076598406,0.0704348907,0.2692886293],"action_prob":0.8892222643,"action_logp":-0.117408067,"action_dist_inputs":[-1.0392081738,1.0436128378],"value_targets":50.0162963867} +{"eps_id":428038203,"obs":[-0.5424988866,-0.7076598406,0.0704348907,0.2692886293],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.5566521287,-0.513610065,0.0758206621,-0.0003736233],"action_prob":0.7644588351,"action_logp":-0.2685870826,"action_dist_inputs":[-0.5879654884,0.5893172026],"value_targets":49.5114097595} +{"eps_id":428038203,"obs":[-0.5566521287,-0.513610065,0.0758206621,-0.0003736233],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.566924274,-0.7097328901,0.0758131891,0.3152353466],"action_prob":0.5386254787,"action_logp":-0.6187348366,"action_dist_inputs":[0.0763570815,-0.0784531608],"value_targets":49.0014266968} +{"eps_id":428038203,"obs":[-0.566924274,-0.7097328901,0.0758131891,0.3152353466],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.5811189413,-0.9058482647,0.0821178928,0.630831182],"action_prob":0.1875018626,"action_logp":-1.673966527,"action_dist_inputs":[-0.7323542833,0.7339706421],"value_targets":48.486289978} +{"eps_id":428038203,"obs":[-0.5811189413,-0.9058482647,0.0821178928,0.630831182],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.5992359519,-0.7119623423,0.0947345197,0.3650978208],"action_prob":0.9167565107,"action_logp":-0.0869133845,"action_dist_inputs":[-1.1970404387,1.2020317316],"value_targets":47.9659461975} +{"eps_id":428038203,"obs":[-0.5992359519,-0.7119623423,0.0947345197,0.3650978208],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.6134751439,-0.5183054209,0.1020364761,0.1037252471],"action_prob":0.8567832708,"action_logp":-0.1545703113,"action_dist_inputs":[-0.8934368491,0.895388782],"value_targets":47.4403495789} +{"eps_id":428038203,"obs":[-0.6134751439,-0.5183054209,0.1020364761,0.1037252471],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.6238412857,-0.3247825205,0.1041109785,-0.155102551],"action_prob":0.6840208769,"action_logp":-0.3797668815,"action_dist_inputs":[-0.386802882,0.3855092525],"value_targets":46.9094467163} +{"eps_id":428038203,"obs":[-0.6238412857,-0.3247825205,0.1041109785,-0.155102551],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.6303369403,-0.5212291479,0.1010089293,0.1685251892],"action_prob":0.6378747225,"action_logp":-0.4496133924,"action_dist_inputs":[0.2808929682,-0.285258621],"value_targets":46.3731765747} +{"eps_id":428038203,"obs":[-0.6303369403,-0.5212291479,0.1010089293,0.1685251892],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.6407614946,-0.3276871443,0.1043794304,-0.090661861],"action_prob":0.7584092021,"action_logp":-0.276532203,"action_dist_inputs":[-0.5724008679,0.5715767741],"value_targets":45.8314933777} +{"eps_id":428038203,"obs":[-0.6407614946,-0.3276871443,0.1043794304,-0.090661861],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.6473152637,-0.5241383314,0.1025661975,0.2330429554],"action_prob":0.5303531289,"action_logp":-0.6342121959,"action_dist_inputs":[0.0588383526,-0.0627237186],"value_targets":45.2843360901} +{"eps_id":428038203,"obs":[-0.6473152637,-0.5241383314,0.1025661975,0.2330429554],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.6577980518,-0.3306200504,0.1072270572,-0.0256073531],"action_prob":0.8130534887,"action_logp":-0.2069583535,"action_dist_inputs":[-0.7351487279,0.7348259687],"value_targets":44.7316513062} +{"eps_id":428038203,"obs":[-0.6577980518,-0.3306200504,0.1072270572,-0.0256073531],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.6644104123,-0.1371861696,0.1067149043,-0.2826273739],"action_prob":0.582310915,"action_logp":-0.540750742,"action_dist_inputs":[-0.167851761,0.1644154936],"value_targets":44.1733856201} +{"eps_id":428038203,"obs":[-0.6644104123,-0.1371861696,0.1067149043,-0.2826273739],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.6671541333,-0.3336555958,0.1010623574,0.0417149663],"action_prob":0.7310189605,"action_logp":-0.3133158684,"action_dist_inputs":[0.4968351722,-0.5029632449],"value_targets":43.6094818115} +{"eps_id":428038203,"obs":[-0.6671541333,-0.3336555958,0.1010623574,0.0417149663],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.6738272309,-0.1401171535,0.1018966585,-0.2174496353],"action_prob":0.6690474749,"action_logp":-0.4019002914,"action_dist_inputs":[-0.3534233272,0.3504567742],"value_targets":43.0398788452} +{"eps_id":428038203,"obs":[-0.6738272309,-0.1401171535,0.1018966585,-0.2174496353],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.6766296029,-0.3365369439,0.0975476652,0.1055576354],"action_prob":0.6502361298,"action_logp":-0.4304196835,"action_dist_inputs":[0.3071821928,-0.3128952682],"value_targets":42.4645233154} +{"eps_id":428038203,"obs":[-0.6766296029,-0.3365369439,0.0975476652,0.1055576354],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.6833603382,-0.1429384202,0.0996588171,-0.154826045],"action_prob":0.7374734879,"action_logp":-0.3045251667,"action_dist_inputs":[-0.5176851749,0.5151928067],"value_targets":41.8833580017} +{"eps_id":428038203,"obs":[-0.6833603382,-0.1429384202,0.0996588171,-0.154826045],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.6862190962,-0.3393355906,0.0965622962,0.1675592065],"action_prob":0.557585299,"action_logp":-0.5841397643,"action_dist_inputs":[0.113023378,-0.1183445156],"value_targets":41.2963218689} +{"eps_id":428038203,"obs":[-0.6862190962,-0.3393355906,0.0965622962,0.1675592065],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.6930058002,-0.1457189471,0.0999134853,-0.093167603],"action_prob":0.7893420458,"action_logp":-0.2365555167,"action_dist_inputs":[-0.6614673734,0.6594969034],"value_targets":40.7033538818} +{"eps_id":428038203,"obs":[-0.6930058002,-0.1457189471,0.0999134853,-0.093167603],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.695920229,0.047839541,0.0980501324,-0.3527316451],"action_prob":0.5397548676,"action_logp":-0.6166401505,"action_dist_inputs":[-0.0821438283,0.0772120729],"value_targets":40.1044006348} +{"eps_id":428038203,"obs":[-0.695920229,0.047839541,0.0980501324,-0.3527316451],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.6949633956,0.2414404303,0.090995498,-0.6129575968],"action_prob":0.2420200258,"action_logp":-1.4187347889,"action_dist_inputs":[0.5671337843,-0.5745026469],"value_targets":39.4993934631} +{"eps_id":428038203,"obs":[-0.6949633956,0.2414404303,0.090995498,-0.6129575968],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.6901345849,0.045172561,0.0787363425,-0.2930580378],"action_prob":0.8976916671,"action_logp":-0.1079286411,"action_dist_inputs":[1.0813778639,-1.0904569626],"value_targets":38.8882751465} +{"eps_id":428038203,"obs":[-0.6901345849,0.045172561,0.0787363425,-0.2930580378],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.6892311573,-0.1509785354,0.0728751868,0.0233816449],"action_prob":0.7156896591,"action_logp":-0.3345086277,"action_dist_inputs":[0.4580575228,-0.4651228189],"value_targets":38.2709846497} +{"eps_id":428038203,"obs":[-0.6892311573,-0.1509785354,0.0728751868,0.0233816449],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.6922507286,-0.3470658362,0.073342815,0.3381382227],"action_prob":0.3391802013,"action_logp":-1.0812237263,"action_dist_inputs":[-0.335524857,0.3314248323],"value_targets":37.6474609375} +{"eps_id":428038203,"obs":[-0.6922507286,-0.3470658362,0.073342815,0.3381382227],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.6991920471,-0.1530600041,0.0801055804,0.0694552958],"action_prob":0.8623819351,"action_logp":-0.1480570436,"action_dist_inputs":[-0.9177280068,0.9174879789],"value_targets":37.0176353455} +{"eps_id":428038203,"obs":[-0.6991920471,-0.1530600041,0.0801055804,0.0694552958],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.7022532225,-0.3492335975,0.081494689,0.3862979114],"action_prob":0.2801182568,"action_logp":-1.2725434303,"action_dist_inputs":[-0.473798722,0.4700765014],"value_targets":36.3814506531} +{"eps_id":428038203,"obs":[-0.7022532225,-0.3492335975,0.081494689,0.3862979114],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.7092379332,-0.5454121232,0.0892206505,0.7035220861],"action_prob":0.1217484474,"action_logp":-2.1057982445,"action_dist_inputs":[-0.9878008366,0.9881751537],"value_targets":35.7388381958} +{"eps_id":428038203,"obs":[-0.7092379332,-0.5454121232,0.0892206505,0.7035220861],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.7201461792,-0.3516325057,0.1032910869,0.440204829],"action_prob":0.9233585596,"action_logp":-0.0797376707,"action_dist_inputs":[-1.2421642542,1.2467156649],"value_targets":35.0897369385} +{"eps_id":428038203,"obs":[-0.7201461792,-0.3516325057,0.1032910869,0.440204829],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.727178812,-0.1581124365,0.1120951846,0.1817854643],"action_prob":0.8934240937,"action_logp":-0.1126939133,"action_dist_inputs":[-1.0625194311,1.0636847019],"value_targets":34.4340782166} +{"eps_id":428038203,"obs":[-0.727178812,-0.1581124365,0.1120951846,0.1817854643],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.7303410769,-0.3546450138,0.1157308966,0.5076211691],"action_prob":0.178412348,"action_logp":-1.7236578465,"action_dist_inputs":[-0.7648056149,0.7623355985],"value_targets":33.7717971802} +{"eps_id":428038203,"obs":[-0.7303410769,-0.3546450138,0.1157308966,0.5076211691],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.73743397,-0.1613276303,0.125883311,0.2535338104],"action_prob":0.9045624137,"action_logp":-0.1003039628,"action_dist_inputs":[-1.1234489679,1.1255303621],"value_targets":33.1028251648} +{"eps_id":428038203,"obs":[-0.73743397,-0.1613276303,0.125883311,0.2535338104],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.7406604886,0.0317931175,0.1309539974,0.0030569166],"action_prob":0.8545830846,"action_logp":-0.1571415514,"action_dist_inputs":[-0.8862513304,0.8847573996],"value_targets":32.4270935059} +{"eps_id":428038203,"obs":[-0.7406604886,0.0317931175,0.1309539974,0.0030569166],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.7400246263,0.224817574,0.1310151368,-0.2456101477],"action_prob":0.735098958,"action_logp":-0.3077501655,"action_dist_inputs":[-0.5127280951,0.5079205632],"value_targets":31.7445411682} +{"eps_id":428038203,"obs":[-0.7400246263,0.224817574,0.1310151368,-0.2456101477],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.7355282903,0.4178486168,0.1261029243,-0.4942669272],"action_prob":0.4975922704,"action_logp":-0.6979742646,"action_dist_inputs":[0.0009922883,-0.0086386045],"value_targets":31.0550918579} +{"eps_id":428038203,"obs":[-0.7355282903,0.4178486168,0.1261029243,-0.4942669272],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.7271713018,0.6109876037,0.1162175909,-0.7446989417],"action_prob":0.2501351833,"action_logp":-1.3857537508,"action_dist_inputs":[0.5441467166,-0.5537447333],"value_targets":30.3586788177} +{"eps_id":428038203,"obs":[-0.7271713018,0.6109876037,0.1162175909,-0.7446989417],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.7149515748,0.4144699872,0.101323612,-0.4178198278],"action_prob":0.8798368573,"action_logp":-0.1280188113,"action_dist_inputs":[0.9902394414,-1.0006465912],"value_targets":29.6552295685} +{"eps_id":428038203,"obs":[-0.7149515748,0.4144699872,0.101323612,-0.4178198278],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.706662178,0.2180691063,0.0929672122,-0.0949912593],"action_prob":0.7007389069,"action_logp":-0.3556199074,"action_dist_inputs":[0.4207215607,-0.4300974905],"value_targets":28.9446773529} +{"eps_id":428038203,"obs":[-0.706662178,0.2180691063,0.0929672122,-0.0949912593],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.702300787,0.4117441177,0.0910673887,-0.3569562435],"action_prob":0.6362088919,"action_logp":-0.4522282779,"action_dist_inputs":[-0.2826443315,0.2763028443],"value_targets":28.2269458771} +{"eps_id":428038203,"obs":[-0.702300787,0.4117441177,0.0910673887,-0.3569562435],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.6940659285,0.215453431,0.0839282647,-0.0370034687],"action_prob":0.6424819827,"action_logp":-0.4424165487,"action_dist_inputs":[0.2885145545,-0.2976383567],"value_targets":27.5019664764} +{"eps_id":428038203,"obs":[-0.6940659285,0.215453431,0.0839282647,-0.0370034687],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.6897568107,0.0192344785,0.0831881985,0.2809349],"action_prob":0.3129869401,"action_logp":-1.1615937948,"action_dist_inputs":[-0.395938307,0.3902535439],"value_targets":26.7696628571} +{"eps_id":428038203,"obs":[-0.6897568107,0.0192344785,0.0831881985,0.2809349],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.6893721223,0.2130773515,0.0888068974,0.0156052224],"action_prob":0.8535519838,"action_logp":-0.1583488584,"action_dist_inputs":[-0.8820791245,0.8806564808],"value_targets":26.0299625397} +{"eps_id":428038203,"obs":[-0.6893721223,0.2130773515,0.0888068974,0.0156052224],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.685110569,0.4068208933,0.0891189948,-0.2477899045],"action_prob":0.7366172671,"action_logp":-0.3056868017,"action_dist_inputs":[-0.5167067647,0.5117537379],"value_targets":25.2827911377} +{"eps_id":428038203,"obs":[-0.685110569,0.4068208933,0.0891189948,-0.2477899045],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.6769741774,0.6005645394,0.0841631964,-0.5110857487],"action_prob":0.4976882339,"action_logp":-0.6977814436,"action_dist_inputs":[0.0004557292,-0.0087913861],"value_targets":24.5280704498} +{"eps_id":428038203,"obs":[-0.6769741774,0.6005645394,0.0841631964,-0.5110857487],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.6649628878,0.404364109,0.0739414841,-0.1931120902],"action_prob":0.7547678947,"action_logp":-0.2813450098,"action_dist_inputs":[0.5569280386,-0.5672767758],"value_targets":23.7657279968} +{"eps_id":428038203,"obs":[-0.6649628878,0.404364109,0.0739414841,-0.1931120902],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.6568756104,0.5983546972,0.0700792447,-0.4615839124],"action_prob":0.547576189,"action_logp":-0.6022536159,"action_dist_inputs":[-0.0993506387,0.0915317014],"value_targets":22.9956855774} +{"eps_id":428038203,"obs":[-0.6568756104,0.5983546972,0.0700792447,-0.4615839124],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.6449084878,0.4023159146,0.0608475655,-0.1476610154],"action_prob":0.7229188085,"action_logp":-0.3244583905,"action_dist_inputs":[0.4743818045,-0.4846043587],"value_targets":22.2178649902} +{"eps_id":428038203,"obs":[-0.6449084878,0.4023159146,0.0608475655,-0.1476610154],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.6368622184,0.5965161324,0.0578943454,-0.4205440879],"action_prob":0.5871882439,"action_logp":-0.5324097872,"action_dist_inputs":[-0.1798379868,0.1725158393],"value_targets":21.4321861267} +{"eps_id":428038203,"obs":[-0.6368622184,0.5965161324,0.0578943454,-0.4205440879],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.6249318719,0.400623709,0.0494834632,-0.1101863459],"action_prob":0.6944465041,"action_logp":-0.3646401763,"action_dist_inputs":[0.4054617584,-0.415528357],"value_targets":20.6385707855} +{"eps_id":428038203,"obs":[-0.6249318719,0.400623709,0.0494834632,-0.1101863459],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.6169193983,0.2048288882,0.0472797342,0.1976885945],"action_prob":0.3820074499,"action_logp":-0.9623151422,"action_dist_inputs":[-0.2439513803,0.2370848954],"value_targets":19.8369407654} +{"eps_id":428038203,"obs":[-0.6169193983,0.2048288882,0.0472797342,0.1976885945],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.6128228307,0.3992438018,0.0512335077,-0.0797128528],"action_prob":0.8211662173,"action_logp":-0.1970297247,"action_dist_inputs":[-0.7633844018,0.7608841658],"value_targets":19.0272140503} +{"eps_id":428038203,"obs":[-0.6128228307,0.3992438018,0.0512335077,-0.0797128528],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.604837954,0.2034262717,0.0496392511,0.2286840975],"action_prob":0.3459194303,"action_logp":-1.0615494251,"action_dist_inputs":[-0.3217012584,0.3153233826],"value_targets":18.2093067169} +{"eps_id":428038203,"obs":[-0.604837954,0.2034262717,0.0496392511,0.2286840975],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.6007694006,0.0076313964,0.0542129315,0.5366026163],"action_prob":0.1653138697,"action_logp":-1.7999093533,"action_dist_inputs":[-0.8105971813,0.8086126447],"value_targets":17.3831367493} +{"eps_id":428038203,"obs":[-0.6007694006,0.0076313964,0.0542129315,0.5366026163],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.6006168127,0.2019508481,0.0649449825,0.2614824474],"action_prob":0.8979154229,"action_logp":-0.107679382,"action_dist_inputs":[-1.0861939192,1.0880810022],"value_targets":16.5486240387} +{"eps_id":428038203,"obs":[-0.6006168127,0.2019508481,0.0649449825,0.2614824474],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.5965777636,0.0059648412,0.0701746345,0.5739220977],"action_prob":0.1499683261,"action_logp":-1.8973311186,"action_dist_inputs":[-0.8681067824,0.8667426705],"value_targets":15.7056808472} +{"eps_id":428038203,"obs":[-0.5965777636,0.0059648412,0.0701746345,0.5739220977],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.5964584947,0.2000364065,0.0816530734,0.304145664],"action_prob":0.9030751586,"action_logp":-0.1019495279,"action_dist_inputs":[-1.1147148609,1.1171547174],"value_targets":14.8542232513} +{"eps_id":428038203,"obs":[-0.5964584947,0.2000364065,0.0816530734,0.304145664],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.5924577713,0.3939055204,0.0877359882,0.0382902101],"action_prob":0.864641726,"action_logp":-0.145440042,"action_dist_inputs":[-0.9275094271,0.9268810153],"value_targets":13.9941644669} +{"eps_id":428038203,"obs":[-0.5924577713,0.3939055204,0.0877359882,0.0382902101],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.5845796466,0.5876669288,0.0885017961,-0.2254726887],"action_prob":0.7778161168,"action_logp":-0.2512651682,"action_dist_inputs":[-0.6285244226,0.6244602799],"value_targets":13.125418663} +{"eps_id":428038203,"obs":[-0.5845796466,0.5876669288,0.0885017961,-0.2254726887],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.5728263259,0.3913989365,0.0839923397,0.0937626809],"action_prob":0.4095699787,"action_logp":-0.8926475048,"action_dist_inputs":[-0.1868714988,0.1788718849],"value_targets":12.2478981018} +{"eps_id":428038203,"obs":[-0.5728263259,0.3913989365,0.0839923397,0.0937626809],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.5649983287,0.5852228403,0.0858675912,-0.1712835878],"action_prob":0.8046973944,"action_logp":-0.2172890007,"action_dist_inputs":[-0.7095393538,0.706376493],"value_targets":11.3615131378} +{"eps_id":428038203,"obs":[-0.5649983287,0.5852228403,0.0858675912,-0.1712835878],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.5532938838,0.7790175676,0.0824419186,-0.4356905222],"action_prob":0.6466149688,"action_logp":-0.4360042214,"action_dist_inputs":[-0.3056546748,0.2985382378],"value_targets":10.4661741257} +{"eps_id":428038203,"obs":[-0.5532938838,0.7790175676,0.0824419186,-0.4356905222],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.5377135277,0.5828312039,0.0737281069,-0.1182000414],"action_prob":0.6038564444,"action_logp":-0.5044187903,"action_dist_inputs":[0.2055773437,-0.2159823924],"value_targets":9.5617923737} +{"eps_id":428038203,"obs":[-0.5377135277,0.5828312039,0.0737281069,-0.1182000414],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.5260568857,0.7768235803,0.0713641122,-0.386741668],"action_prob":0.6863853931,"action_logp":-0.3763160408,"action_dist_inputs":[-0.3947675526,0.3885068893],"value_targets":8.6482753754} +{"eps_id":428038203,"obs":[-0.5260568857,0.7768235803,0.0713641122,-0.386741668],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.5105203986,0.9708637595,0.0636292771,-0.6560978293],"action_prob":0.4421174824,"action_logp":-0.8161796331,"action_dist_inputs":[0.1113055721,-0.1212671921],"value_targets":7.7255306244} +{"eps_id":428038203,"obs":[-0.5105203986,0.9708637595,0.0636292771,-0.6560978293],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.4911031425,0.7749164104,0.0505073182,-0.3440772295],"action_prob":0.7810252309,"action_logp":-0.2471477985,"action_dist_inputs":[0.6301348805,-0.641516149],"value_targets":6.7934651375} +{"eps_id":428038203,"obs":[-0.4911031425,0.7749164104,0.0505073182,-0.3440772295],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.4756048024,0.5791137218,0.0436257757,-0.0359048471],"action_prob":0.5321618915,"action_logp":-0.6308075786,"action_dist_inputs":[0.0596475676,-0.0691778064],"value_targets":5.8519849777} +{"eps_id":428038203,"obs":[-0.4756048024,0.5791137218,0.0436257757,-0.0359048471],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.464022547,0.3833941817,0.0429076776,0.2702170312],"action_prob":0.2675468326,"action_logp":-1.3184607029,"action_dist_inputs":[-0.5060041547,0.5011006594],"value_targets":4.9009947777} +{"eps_id":428038203,"obs":[-0.464022547,0.3833941817,0.0429076776,0.2702170312],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.4563546479,0.1876870245,0.0483120196,0.5761185288],"action_prob":0.1440463662,"action_logp":-1.9376200438,"action_dist_inputs":[-0.8913269043,0.8907539248],"value_targets":3.9403989315} +{"eps_id":428038203,"obs":[-0.4563546479,0.1876870245,0.0483120196,0.5761185288],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.4526009262,0.3820996583,0.0598343909,0.2990380824],"action_prob":0.90229249,"action_logp":-0.1028165296,"action_dist_inputs":[-1.1101332903,1.1128269434],"value_targets":2.970099926} +{"eps_id":428038203,"obs":[-0.4526009262,0.3820996583,0.0598343909,0.2990380824],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.4449589252,0.5763199329,0.0658151507,0.0258098114],"action_prob":0.8664435744,"action_logp":-0.1433582604,"action_dist_inputs":[-0.9349334836,0.9349397421],"value_targets":1.9900000095} +{"eps_id":428038203,"obs":[-0.4449589252,0.5763199329,0.0658151507,0.0258098114],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":true,"new_obs":[-0.4334325194,0.7704393268,0.0663313493,-0.2454031855],"action_prob":0.7867861986,"action_logp":-0.2397987545,"action_dist_inputs":[-0.6545006633,0.6511603594],"value_targets":1.0} +{"eps_id":578471551,"obs":[0.0359052904,-0.0459722243,0.0003621813,0.0016722598],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":0.0,"dones":false,"new_obs":[0.0349858478,0.1491445303,0.0003956264,-0.2908963859],"action_prob":0.4976669848,"action_logp":-0.6978241205,"action_dist_inputs":[0.0048557571,-0.0044763507],"value_targets":86.6020355225} +{"eps_id":578471551,"obs":[0.0349858478,0.1491445303,0.0003956264,-0.2908963859],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0379687361,-0.0459830575,-0.0054223011,0.0019113041],"action_prob":0.8499773145,"action_logp":-0.1625456214,"action_dist_inputs":[0.8654904962,-0.8689327836],"value_targets":86.4666976929} +{"eps_id":578471551,"obs":[0.0379687361,-0.0459830575,-0.0054223011,0.0019113041],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0370490775,0.1492162347,-0.005384075,-0.2924774885],"action_prob":0.484428674,"action_logp":-0.7247850895,"action_dist_inputs":[0.0313322656,-0.0309733115],"value_targets":86.3300018311} +{"eps_id":578471551,"obs":[0.0370490775,0.1492162347,-0.005384075,-0.2924774885],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0400334001,-0.0458285362,-0.0112336241,-0.0014974722],"action_prob":0.8552795053,"action_logp":-0.1563269496,"action_dist_inputs":[0.8865680695,-0.8900563717],"value_targets":86.1919174194} +{"eps_id":578471551,"obs":[0.0400334001,-0.0458285362,-0.0112336241,-0.0014974722],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0391168296,-0.2407875955,-0.0112635735,0.2876200378],"action_prob":0.5365890861,"action_logp":-0.6225226521,"action_dist_inputs":[0.0734576806,-0.0731608421],"value_targets":86.052444458} +{"eps_id":578471551,"obs":[0.0391168296,-0.2407875955,-0.0112635735,0.2876200378],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0343010761,-0.0455068424,-0.005511173,-0.0085939206],"action_prob":0.8381456137,"action_logp":-0.1765634269,"action_dist_inputs":[-0.8202969432,0.8241977096],"value_targets":85.9115600586} +{"eps_id":578471551,"obs":[0.0343010761,-0.0455068424,-0.005511173,-0.0085939206],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0333909392,-0.2405493259,-0.0056830514,0.2823450863],"action_prob":0.5378279686,"action_logp":-0.6202165484,"action_dist_inputs":[0.0759033635,-0.0756982043],"value_targets":85.7692489624} +{"eps_id":578471551,"obs":[0.0333909392,-0.2405493259,-0.0056830514,0.2823450863],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0285799541,-0.0453467742,-0.0000361498,-0.0121248225],"action_prob":0.8391723633,"action_logp":-0.1753391325,"action_dist_inputs":[-0.8241209388,0.8279622197],"value_targets":85.62550354} +{"eps_id":578471551,"obs":[0.0285799541,-0.0453467742,-0.0000361498,-0.0121248225],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0276730191,0.1497756988,-0.0002786462,-0.3048191667],"action_prob":0.4679661989,"action_logp":-0.7593592405,"action_dist_inputs":[0.0642315745,-0.064079538],"value_targets":85.4803085327} +{"eps_id":578471551,"obs":[0.0276730191,0.1497756988,-0.0002786462,-0.3048191667],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0306685325,-0.0453422852,-0.0063750292,-0.0122241192],"action_prob":0.8593959808,"action_logp":-0.1515254825,"action_dist_inputs":[0.9033129215,-0.9069693685],"value_targets":85.3336486816} +{"eps_id":578471551,"obs":[0.0306685325,-0.0453422852,-0.0063750292,-0.0122241192],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0297616869,0.1498705149,-0.0066195116,-0.3069116175],"action_prob":0.452886641,"action_logp":-0.7921134233,"action_dist_inputs":[0.0945686698,-0.0944455341],"value_targets":85.1855010986} +{"eps_id":578471551,"obs":[0.0297616869,0.1498705149,-0.0066195116,-0.3069116175],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0327590965,-0.0451564938,-0.0127577437,-0.0163236335],"action_prob":0.8648257256,"action_logp":-0.1452272683,"action_dist_inputs":[0.92612499,-0.9298379421],"value_targets":85.0358581543} +{"eps_id":578471551,"obs":[0.0327590965,-0.0451564938,-0.0127577437,-0.0163236335],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0318559669,0.1501460671,-0.013084216,-0.3130043149],"action_prob":0.4295471311,"action_logp":-0.8450238109,"action_dist_inputs":[0.1418733746,-0.1418257803],"value_targets":84.8847045898} +{"eps_id":578471551,"obs":[0.0318559669,0.1501460671,-0.013084216,-0.3130043149],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0348588899,-0.0447870605,-0.0193443038,-0.0244762935],"action_prob":0.8720313311,"action_logp":-0.1369299591,"action_dist_inputs":[0.9576131105,-0.9614260793],"value_targets":84.7320251465} +{"eps_id":578471551,"obs":[0.0348588899,-0.0447870605,-0.0193443038,-0.0244762935],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0339631476,-0.2396263331,-0.0198338293,0.2620410621],"action_prob":0.6018098593,"action_logp":-0.5078137517,"action_dist_inputs":[0.2064668089,-0.206544891],"value_targets":84.5778045654} +{"eps_id":578471551,"obs":[0.0339631476,-0.2396263331,-0.0198338293,0.2620410621],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0291706212,-0.0442269631,-0.014593007,-0.036831107],"action_prob":0.8051536679,"action_logp":-0.216722101,"action_dist_inputs":[-0.7076258063,0.7111960053],"value_targets":84.4220275879} +{"eps_id":578471551,"obs":[0.0291706212,-0.0442269631,-0.014593007,-0.036831107],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0282860808,-0.2391366363,-0.0153296301,0.2512120903],"action_prob":0.6154722571,"action_logp":-0.4853654206,"action_dist_inputs":[0.2350677252,-0.2353061885],"value_targets":84.2646713257} +{"eps_id":578471551,"obs":[0.0282860808,-0.2391366363,-0.0153296301,0.2512120903],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0235033482,-0.4340363741,-0.0103053879,0.5390205979],"action_prob":0.2008328289,"action_logp":-1.6052824259,"action_dist_inputs":[-0.6888229251,0.6922744513],"value_targets":84.1057281494} +{"eps_id":578471551,"obs":[0.0235033482,-0.4340363741,-0.0103053879,0.5390205979],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0148226209,-0.238771081,0.000475024,0.2431084812],"action_prob":0.9243714213,"action_logp":-0.0786412954,"action_dist_inputs":[-1.2485024929,1.2547777891],"value_targets":83.9451828003} +{"eps_id":578471551,"obs":[0.0148226209,-0.238771081,0.000475024,0.2431084812],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0100472001,-0.4338998199,0.0053371936,0.5359411836],"action_prob":0.1900021732,"action_logp":-1.6607197523,"action_dist_inputs":[-0.7233175635,0.726678431],"value_targets":83.7830123901} +{"eps_id":578471551,"obs":[0.0100472001,-0.4338998199,0.0053371936,0.5359411836],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0013692037,-0.2388533056,0.016056018,0.2449447513],"action_prob":0.9274538755,"action_logp":-0.0753122047,"action_dist_inputs":[-1.2709728479,1.2772475481],"value_targets":83.6192016602} +{"eps_id":578471551,"obs":[0.0013692037,-0.2388533056,0.016056018,0.2449447513],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0034078625,-0.0439643189,0.0209549125,-0.0426307917],"action_prob":0.8289753795,"action_logp":-0.1875648499,"action_dist_inputs":[-0.7875089049,0.7908739448],"value_targets":83.453742981} +{"eps_id":578471551,"obs":[-0.0034078625,-0.0439643189,0.0209549125,-0.0426307917],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0042871488,-0.2393803895,0.020102296,0.2565892637],"action_prob":0.5447397828,"action_logp":-0.6074470878,"action_dist_inputs":[0.0895501599,-0.0898888111],"value_targets":83.286605835} +{"eps_id":578471551,"obs":[-0.0042871488,-0.2393803895,0.020102296,0.2565892637],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0090747569,-0.0445511304,0.0252340809,-0.0296859145],"action_prob":0.8427219987,"action_logp":-0.17111817,"action_dist_inputs":[-0.8375837803,0.8410384655],"value_targets":83.1177825928} +{"eps_id":578471551,"obs":[-0.0090747569,-0.0445511304,0.0252340809,-0.0296859145],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0099657793,0.1502000242,0.0246403627,-0.31430161],"action_prob":0.4936538637,"action_logp":-0.7059206963,"action_dist_inputs":[0.0125858989,-0.0128000034],"value_targets":82.9472579956} +{"eps_id":578471551,"obs":[-0.0099657793,0.1502000242,0.0246403627,-0.31430161],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0069617787,-0.0452640988,0.0183543302,-0.0139508946],"action_prob":0.8469924331,"action_logp":-0.1660635173,"action_dist_inputs":[0.853661716,-0.8575424552],"value_targets":82.7750091553} +{"eps_id":578471551,"obs":[-0.0069617787,-0.0452640988,0.0183543302,-0.0139508946],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0078670606,0.1495898813,0.0180753134,-0.3007867634],"action_prob":0.5112490058,"action_logp":-0.6708985567,"action_dist_inputs":[-0.0225359928,0.0224675313],"value_targets":82.601020813} +{"eps_id":578471551,"obs":[-0.0078670606,0.1495898813,0.0180753134,-0.3007867634],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0048752627,-0.0457849726,0.0120595777,-0.0024585049],"action_prob":0.8426547647,"action_logp":-0.1711979359,"action_dist_inputs":[0.8371677995,-0.8409471512],"value_targets":82.4252700806} +{"eps_id":578471551,"obs":[-0.0048752627,-0.0457849726,0.0120595777,-0.0024585049],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0057909624,0.1491619647,0.0120104076,-0.2913122177],"action_prob":0.5210576057,"action_logp":-0.651894629,"action_dist_inputs":[-0.0421234034,0.0421569645],"value_targets":82.2477493286} +{"eps_id":578471551,"obs":[-0.0057909624,0.1491619647,0.0120104076,-0.2913122177],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0028077229,0.3441106379,0.006184163,-0.5801831484],"action_prob":0.1592516601,"action_logp":-1.8372695446,"action_dist_inputs":[0.8300465345,-0.8337600827],"value_targets":82.0684280396} +{"eps_id":578471551,"obs":[-0.0028077229,0.3441106379,0.006184163,-0.5801831484],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0040744897,0.1489025652,-0.0054194997,-0.2855585217],"action_prob":0.9296969175,"action_logp":-0.0728966296,"action_dist_inputs":[1.2877056599,-1.2943382263],"value_targets":81.8873062134} +{"eps_id":578471551,"obs":[0.0040744897,0.1489025652,-0.0054194997,-0.2855585217],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0070525408,-0.0461416766,-0.0111306701,0.0054102214],"action_prob":0.8508937359,"action_logp":-0.1614680588,"action_dist_inputs":[0.8689604998,-0.8726674318],"value_targets":81.7043457031} +{"eps_id":578471551,"obs":[0.0070525408,-0.0461416766,-0.0111306701,0.0054102214],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0061297072,0.1491381228,-0.0110224653,-0.2907636762],"action_prob":0.4822725952,"action_logp":-0.7292457819,"action_dist_inputs":[0.0355016813,-0.0354377367],"value_targets":81.5195465088} +{"eps_id":578471551,"obs":[0.0061297072,0.1491381228,-0.0110224653,-0.2907636762],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0091124699,-0.0458249375,-0.0168377385,-0.0015773914],"action_prob":0.8584362268,"action_logp":-0.1526428759,"action_dist_inputs":[0.8992869854,-0.9030751586],"value_targets":81.3328704834} +{"eps_id":578471551,"obs":[0.0091124699,-0.0458249375,-0.0168377385,-0.0015773914],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0081959711,-0.2407014221,-0.0168692879,0.2857458293],"action_prob":0.546174705,"action_logp":-0.6048163772,"action_dist_inputs":[0.092594184,-0.0926323682],"value_targets":81.144317627} +{"eps_id":578471551,"obs":[0.0081959711,-0.2407014221,-0.0168692879,0.2857458293],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0033819426,-0.4355787635,-0.0111543704,0.5730609298],"action_prob":0.1678134948,"action_logp":-1.7849020958,"action_dist_inputs":[-0.798813045,0.8023904562],"value_targets":80.9538574219} +{"eps_id":578471551,"obs":[0.0033819426,-0.4355787635,-0.0111543704,0.5730609298],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0053296327,-0.24030222,0.0003068479,0.2768850029],"action_prob":0.9301791787,"action_logp":-0.0723780394,"action_dist_inputs":[-1.2915239334,1.2979214191],"value_targets":80.76146698} +{"eps_id":578471551,"obs":[-0.0053296327,-0.24030222,0.0003068479,0.2768850029],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0101356767,-0.0451846421,0.0058445479,-0.0157011319],"action_prob":0.8411952853,"action_logp":-0.1729314327,"action_dist_inputs":[-0.8318260312,0.8353229165],"value_targets":80.5671386719} +{"eps_id":578471551,"obs":[-0.0101356767,-0.0451846421,0.0058445479,-0.0157011319],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0110393697,0.149853006,0.0055305255,-0.3065343201],"action_prob":0.4785621166,"action_logp":-0.7369692922,"action_dist_inputs":[0.0427969247,-0.0430072509],"value_targets":80.3708496094} +{"eps_id":578471551,"obs":[-0.0110393697,0.149853006,0.0055305255,-0.3065343201],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0080423094,-0.0453473143,-0.0006001611,-0.0121123483],"action_prob":0.8564463258,"action_logp":-0.1549636573,"action_dist_inputs":[0.8910592794,-0.8950231075],"value_targets":80.1725769043} +{"eps_id":578471551,"obs":[-0.0080423094,-0.0453473143,-0.0006001611,-0.0121123483],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0089492556,-0.2404606491,-0.000842408,0.2803811729],"action_prob":0.5290104747,"action_logp":-0.6367470622,"action_dist_inputs":[0.057987228,-0.0581851117],"value_targets":79.9722976685} +{"eps_id":578471551,"obs":[-0.0089492556,-0.2404606491,-0.000842408,0.2803811729],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0137584694,-0.0453266948,0.0047652153,-0.0125673413],"action_prob":0.8429939747,"action_logp":-0.1707954407,"action_dist_inputs":[-0.8385930061,0.8420829177],"value_targets":79.7699966431} +{"eps_id":578471551,"obs":[-0.0137584694,-0.0453266948,0.0047652153,-0.0125673413],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0146650029,-0.2405166626,0.0045138681,0.2816152573],"action_prob":0.5167751908,"action_logp":-0.6601473689,"action_dist_inputs":[0.0334544592,-0.0336715579],"value_targets":79.5656509399} +{"eps_id":578471551,"obs":[-0.0146650029,-0.2405166626,0.0045138681,0.2816152573],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0194753353,-0.4357027113,0.0101461736,0.5757184029],"action_prob":0.1514051855,"action_logp":-1.8877956867,"action_dist_inputs":[-0.8600659966,0.8635563254],"value_targets":79.3592453003} +{"eps_id":578471551,"obs":[-0.0194753353,-0.4357027113,0.0101461736,0.5757184029],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0281893909,-0.2407244444,0.0216605421,0.286248982],"action_prob":0.9341607094,"action_logp":-0.0681068003,"action_dist_inputs":[-1.3229976892,1.329433322],"value_targets":79.1507568359} +{"eps_id":578471551,"obs":[-0.0281893909,-0.2407244444,0.0216605421,0.286248982],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0330038778,-0.4361484945,0.0273855217,0.5856840014],"action_prob":0.1351732463,"action_logp":-2.0011980534,"action_dist_inputs":[-0.9262125492,0.929759264],"value_targets":78.9401550293} +{"eps_id":578471551,"obs":[-0.0330038778,-0.4361484945,0.0273855217,0.5856840014],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.04172685,-0.2414206266,0.0390992016,0.3017520607],"action_prob":0.9377708435,"action_logp":-0.0642496347,"action_dist_inputs":[-1.3530575037,1.3596248627],"value_targets":78.727432251} +{"eps_id":578471551,"obs":[-0.04172685,-0.2414206266,0.0390992016,0.3017520607],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0465552621,-0.0468771197,0.0451342426,0.0216520652],"action_prob":0.8836402297,"action_logp":-0.1237052679,"action_dist_inputs":[-1.0118196011,1.0155436993],"value_targets":78.5125579834} +{"eps_id":578471551,"obs":[-0.0465552621,-0.0468771197,0.0451342426,0.0216520652],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0474928021,0.1475694776,0.0455672853,-0.2564558387],"action_prob":0.6483738422,"action_logp":-0.4332877994,"action_dist_inputs":[-0.305870384,0.3060284853],"value_targets":78.2955093384} +{"eps_id":578471551,"obs":[-0.0474928021,0.1475694776,0.0455672853,-0.2564558387],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0445414148,0.3420122266,0.0404381678,-0.5344250798],"action_prob":0.2337002903,"action_logp":-1.4537158012,"action_dist_inputs":[0.5920203328,-0.5955135822],"value_targets":78.0762710571} +{"eps_id":578471551,"obs":[-0.0445414148,0.3420122266,0.0404381678,-0.5344250798],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0377011709,0.1463456303,0.0297496654,-0.229279533],"action_prob":0.913033247,"action_logp":-0.090982981,"action_dist_inputs":[1.1724706888,-1.1787756681],"value_targets":77.8548202515} +{"eps_id":578471551,"obs":[-0.0377011709,0.1463456303,0.0297496654,-0.229279533],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0347742587,0.3410301208,0.0251640752,-0.5124319196],"action_prob":0.2456237227,"action_logp":-1.4039545059,"action_dist_inputs":[0.5594201684,-0.5626704097],"value_targets":77.6311340332} +{"eps_id":578471551,"obs":[-0.0347742587,0.3410301208,0.0251640752,-0.5124319196],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0279536545,0.5357887149,0.0149154374,-0.7970799208],"action_prob":0.0878776386,"action_logp":-2.4318099022,"action_dist_inputs":[1.1668270826,-1.173001647],"value_targets":77.4051818848} +{"eps_id":578471551,"obs":[-0.0279536545,0.5357887149,0.0149154374,-0.7970799208],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0172378793,0.340465337,-0.0010261612,-0.4997424185],"action_prob":0.9466179013,"action_logp":-0.0548597351,"action_dist_inputs":[1.4335079193,-1.4419118166],"value_targets":77.1769561768} +{"eps_id":578471551,"obs":[-0.0172378793,0.340465337,-0.0010261612,-0.4997424185],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.010428573,0.145357877,-0.0110210096,-0.2073830664],"action_prob":0.916885078,"action_logp":-0.0867731571,"action_dist_inputs":[1.1973093748,-1.2034482956],"value_targets":76.9464187622} +{"eps_id":578471551,"obs":[-0.010428573,0.145357877,-0.0110210096,-0.2073830664],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0075214156,-0.0496047661,-0.0151686715,0.0818030164],"action_prob":0.7854780555,"action_logp":-0.2414627522,"action_dist_inputs":[0.6474030614,-0.6504773498],"value_targets":76.7135543823} +{"eps_id":578471551,"obs":[-0.0075214156,-0.0496047661,-0.0151686715,0.0818030164],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0085135112,-0.2445060313,-0.0135326106,0.3696618676],"action_prob":0.3654177189,"action_logp":-1.0067141056,"action_dist_inputs":[-0.2755913138,0.2763344646],"value_targets":76.4783401489} +{"eps_id":578471551,"obs":[-0.0085135112,-0.2445060313,-0.0135326106,0.3696618676],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0134036317,-0.049194444,-0.0061393734,0.0727428421],"action_prob":0.8873380423,"action_logp":-0.1195292771,"action_dist_inputs":[-1.0297987461,1.0340352058],"value_targets":76.2407455444} +{"eps_id":578471551,"obs":[-0.0134036317,-0.049194444,-0.0061393734,0.0727428421],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.014387521,-0.2442278415,-0.0046845167,0.3634824753],"action_prob":0.363361299,"action_logp":-1.0123575926,"action_dist_inputs":[-0.2800751626,0.280729413],"value_targets":76.0007553101} +{"eps_id":578471551,"obs":[-0.014387521,-0.2442278415,-0.0046845167,0.3634824753],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0192720778,-0.0490396284,0.0025851324,0.0693261102],"action_prob":0.8890452981,"action_logp":-0.1176071167,"action_dist_inputs":[-1.0384128094,1.0426127911],"value_targets":75.7583389282} +{"eps_id":578471551,"obs":[-0.0192720778,-0.0490396284,0.0025851324,0.0693261102],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0202528704,0.1460451633,0.0039716545,-0.2225400805],"action_prob":0.6487293839,"action_logp":-0.4327396452,"action_dist_inputs":[-0.3064164817,0.3070423603],"value_targets":75.5134735107} +{"eps_id":578471551,"obs":[-0.0202528704,0.1460451633,0.0039716545,-0.2225400805],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0173319671,-0.0491333269,-0.0004791469,0.0713930205],"action_prob":0.7832270861,"action_logp":-0.2443326116,"action_dist_inputs":[0.640681982,-0.6438901424],"value_targets":75.26612854} +{"eps_id":578471551,"obs":[-0.0173319671,-0.0491333269,-0.0004791469,0.0713930205],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0183146335,0.1459954828,0.0009487134,-0.2214410454],"action_prob":0.6461673975,"action_logp":-0.4366966486,"action_dist_inputs":[-0.3007952273,0.3014394641],"value_targets":75.0162963867} +{"eps_id":578471551,"obs":[-0.0183146335,0.1459954828,0.0009487134,-0.2214410454],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.015394723,-0.04914001,-0.0034801075,0.0715409964],"action_prob":0.7859698534,"action_logp":-0.2408368737,"action_dist_inputs":[0.6487998962,-0.6520015001],"value_targets":74.7639312744} +{"eps_id":578471551,"obs":[-0.015394723,-0.04914001,-0.0034801075,0.0715409964],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0163775235,-0.2442118973,-0.0020492875,0.3631238937],"action_prob":0.3599298894,"action_logp":-1.021846056,"action_dist_inputs":[-0.2875132859,0.2881551683],"value_targets":74.5090255737} +{"eps_id":578471551,"obs":[-0.0163775235,-0.2442118973,-0.0020492875,0.3631238937],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.021261761,-0.0490608774,0.0052131903,0.0697955042],"action_prob":0.8901563287,"action_logp":-0.1163581833,"action_dist_inputs":[-1.0440689325,1.0482702255],"value_targets":74.2515411377} +{"eps_id":578471551,"obs":[-0.021261761,-0.0490608774,0.0052131903,0.0697955042],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0222429782,0.1459859461,0.0066091004,-0.2212380916],"action_prob":0.6551088095,"action_logp":-0.4229539335,"action_dist_inputs":[-0.3204702139,0.3211022615],"value_targets":73.9914550781} +{"eps_id":578471551,"obs":[-0.0222429782,0.1459859461,0.0066091004,-0.2212380916],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0193232596,-0.0492298454,0.0021843386,0.0735222697],"action_prob":0.778062582,"action_logp":-0.2509483099,"action_dist_inputs":[0.6256088614,-0.6288028955],"value_targets":73.7287445068} +{"eps_id":578471551,"obs":[-0.0193232596,-0.0492298454,0.0021843386,0.0735222697],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0203078575,0.1458607167,0.0036547841,-0.2184706926],"action_prob":0.6557338834,"action_logp":-0.4220002592,"action_dist_inputs":[-0.3218355775,0.3225046098],"value_targets":73.4633712769} +{"eps_id":578471551,"obs":[-0.0203078575,0.1458607167,0.0036547841,-0.2184706926],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0173906423,-0.0493132845,-0.0007146299,0.0753628612],"action_prob":0.7787938714,"action_logp":-0.2500089109,"action_dist_inputs":[0.6277406216,-0.6309104562],"value_targets":73.1953277588} +{"eps_id":578471551,"obs":[-0.0173906423,-0.0493132845,-0.0007146299,0.0753628612],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0183769092,0.145818904,0.0007926274,-0.2175454497],"action_prob":0.6531249881,"action_logp":-0.4259867668,"action_dist_inputs":[-0.3160590231,0.3167449534],"value_targets":72.9245758057} +{"eps_id":578471551,"obs":[-0.0183769092,0.145818904,0.0007926274,-0.2175454497],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0154605303,-0.0493143685,-0.0035582816,0.0753874034],"action_prob":0.7815986276,"action_logp":-0.2464139462,"action_dist_inputs":[0.6359215379,-0.6390852332],"value_targets":72.6510848999} +{"eps_id":578471551,"obs":[-0.0154605303,-0.0493143685,-0.0035582816,0.0753874034],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0164468177,-0.2443851233,-0.0020505334,0.3669455647],"action_prob":0.3527944982,"action_logp":-1.0418695211,"action_dist_inputs":[-0.3030473888,0.3037306368],"value_targets":72.3748321533} +{"eps_id":578471551,"obs":[-0.0164468177,-0.2443851233,-0.0020505334,0.3669455647],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0213345196,-0.049234096,0.0052883779,0.0736167729],"action_prob":0.8917214274,"action_logp":-0.1146015003,"action_dist_inputs":[-1.0521044731,1.0563415289],"value_targets":72.0957946777} +{"eps_id":578471551,"obs":[-0.0213345196,-0.049234096,0.0052883779,0.0736167729],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0223192014,0.1458116472,0.0067607132,-0.2173929662],"action_prob":0.6622784138,"action_logp":-0.4120692313,"action_dist_inputs":[-0.3363949955,0.3370694518],"value_targets":71.8139266968} +{"eps_id":578471551,"obs":[-0.0223192014,0.1458116472,0.0067607132,-0.2173929662],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0194029696,-0.0494063012,0.0024128538,0.0774148852],"action_prob":0.7731329203,"action_logp":-0.2573043108,"action_dist_inputs":[0.6114658713,-0.6146206856],"value_targets":71.5292205811} +{"eps_id":578471551,"obs":[-0.0194029696,-0.0494063012,0.0024128538,0.0774148852],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0203910954,0.1456809789,0.0039611515,-0.2145058066],"action_prob":0.6633282304,"action_logp":-0.410485357,"action_dist_inputs":[-0.3387242258,0.3394373953],"value_targets":71.2416381836} +{"eps_id":578471551,"obs":[-0.0203910954,0.1456809789,0.0039611515,-0.2145058066],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0174774751,0.3407460749,-0.0003289644,-0.505936563],"action_prob":0.2264796048,"action_logp":-1.4851003885,"action_dist_inputs":[0.6125844121,-0.6157127023],"value_targets":70.9511489868} +{"eps_id":578471551,"obs":[-0.0174774751,0.3407460749,-0.0003289644,-0.505936563],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0106625538,0.1456287652,-0.0104476959,-0.2133573294],"action_prob":0.918125689,"action_logp":-0.0854209512,"action_dist_inputs":[1.2054816484,-1.2116671801],"value_targets":70.6577301025} +{"eps_id":578471551,"obs":[-0.0106625538,0.1456287652,-0.0104476959,-0.2133573294],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0077499789,0.3408985138,-0.0147148427,-0.5093175173],"action_prob":0.2084350139,"action_logp":-1.5681279898,"action_dist_inputs":[0.6656255722,-0.6687591076],"value_targets":70.3613433838} +{"eps_id":578471551,"obs":[-0.0077499789,0.3408985138,-0.0147148427,-0.5093175173],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0009320088,0.1459869295,-0.0249011926,-0.221307829],"action_prob":0.9224663973,"action_logp":-0.0807043016,"action_dist_inputs":[1.2350429296,-1.2412967682],"value_targets":70.061958313} +{"eps_id":578471551,"obs":[-0.0009320088,0.1459869295,-0.0249011926,-0.221307829],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0019877297,-0.0487704091,-0.0293273497,0.0634174123],"action_prob":0.8166989684,"action_logp":-0.2024847418,"action_dist_inputs":[0.7454514503,-0.7486891747],"value_targets":69.7595596313} +{"eps_id":578471551,"obs":[0.0019877297,-0.0487704091,-0.0293273497,0.0634174123],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0010123216,-0.2434598655,-0.028059002,0.3467050195],"action_prob":0.4349930584,"action_logp":-0.8324252367,"action_dist_inputs":[-0.1304897666,0.131018281],"value_targets":69.4540939331} +{"eps_id":578471551,"obs":[0.0010123216,-0.2434598655,-0.028059002,0.3467050195],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0038568757,-0.4381717145,-0.0211249013,0.6304095387],"action_prob":0.1329572499,"action_logp":-2.0177276134,"action_dist_inputs":[-0.9355341196,0.9395266175],"value_targets":69.1455535889} +{"eps_id":578471551,"obs":[-0.0038568757,-0.4381717145,-0.0211249013,0.6304095387],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0126203094,-0.242761448,-0.0085167103,0.3311493099],"action_prob":0.9361958504,"action_logp":-0.0659305677,"action_dist_inputs":[-1.3396452665,1.3463617563],"value_targets":68.8338928223} +{"eps_id":578471551,"obs":[-0.0126203094,-0.242761448,-0.0085167103,0.3311493099],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.017475538,-0.0475193076,-0.0018937244,0.0357928313],"action_prob":0.871029377,"action_logp":-0.1380795538,"action_dist_inputs":[-0.9531061053,0.9569851756],"value_targets":68.5190811157} +{"eps_id":578471551,"obs":[-0.017475538,-0.0475193076,-0.0018937244,0.0357928313],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0184259247,-0.2426140457,-0.0011778679,0.3278776705],"action_prob":0.4281593561,"action_logp":-0.8482598066,"action_dist_inputs":[-0.1445631236,0.1448017806],"value_targets":68.2010955811} +{"eps_id":578471551,"obs":[-0.0184259247,-0.2426140457,-0.0011778679,0.3278776705],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0232782066,-0.4377192259,0.0053796857,0.6201888919],"action_prob":0.1261545122,"action_logp":-2.0702478886,"action_dist_inputs":[-0.9657703638,0.969625771],"value_targets":67.8798904419} +{"eps_id":578471551,"obs":[-0.0232782066,-0.4377192259,0.0053796857,0.6201888919],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0320325904,-0.242672801,0.017783463,0.3292051554],"action_prob":0.9385361671,"action_logp":-0.0634339005,"action_dist_inputs":[-1.359572053,1.3663002253],"value_targets":67.5554504395} +{"eps_id":578471551,"obs":[-0.0320325904,-0.242672801,0.017783463,0.3292051554],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0368860476,-0.0478084721,0.0243675672,0.0421829373],"action_prob":0.8851645589,"action_logp":-0.1219817176,"action_dist_inputs":[-1.0191845894,1.023088932],"value_targets":67.227722168} +{"eps_id":578471551,"obs":[-0.0368860476,-0.0478084721,0.0243675672,0.0421829373],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0378422141,0.1469557285,0.0252112262,-0.2427132875],"action_prob":0.6437753439,"action_logp":-0.4404054284,"action_dist_inputs":[-0.2957336605,0.2960546911],"value_targets":66.8966903687} +{"eps_id":578471551,"obs":[-0.0378422141,0.1469557285,0.0252112262,-0.2427132875],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0349031016,-0.0485170931,0.0203569606,0.0578140914],"action_prob":0.7780657411,"action_logp":-0.250944227,"action_dist_inputs":[0.6255071759,-0.6289227605],"value_targets":66.5623168945} +{"eps_id":578471551,"obs":[-0.0349031016,-0.0485170931,0.0203569606,0.0578140914],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0358734429,-0.2439249158,0.0215132423,0.3568496406],"action_prob":0.3355413377,"action_logp":-1.0920101404,"action_dist_inputs":[-0.3413714767,0.3418560922],"value_targets":66.2245635986} +{"eps_id":578471551,"obs":[-0.0358734429,-0.2439249158,0.0215132423,0.3568496406],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0407519415,-0.04911533,0.0286502354,0.0710272714],"action_prob":0.8979421854,"action_logp":-0.1076496243,"action_dist_inputs":[-1.0851954222,1.0893704891],"value_targets":65.883392334} +{"eps_id":578471551,"obs":[-0.0407519415,-0.04911533,0.0286502354,0.0710272714],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0417342484,0.1455844045,0.0300707798,-0.2124804705],"action_prob":0.7030231953,"action_logp":-0.3523653746,"action_dist_inputs":[-0.4305461049,0.4311900139],"value_targets":65.5387802124} +{"eps_id":578471551,"obs":[-0.0417342484,0.1455844045,0.0300707798,-0.2124804705],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0388225615,0.3402638137,0.0258211698,-0.4955280721],"action_prob":0.2705037892,"action_logp":-1.3074691296,"action_dist_inputs":[0.4944746494,-0.4975932837],"value_targets":65.1906890869} +{"eps_id":578471551,"obs":[-0.0388225615,0.3402638137,0.0258211698,-0.4955280721],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0320172831,0.1447874308,0.0159106087,-0.1948206574],"action_prob":0.9070182443,"action_logp":-0.0975927263,"action_dist_inputs":[1.1358458996,-1.141913414],"value_targets":64.8390808105} +{"eps_id":578471551,"obs":[-0.0320172831,0.1447874308,0.0159106087,-0.1948206574],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0291215349,0.3396782279,0.0120141963,-0.4824423194],"action_prob":0.2727319002,"action_logp":-1.2992659807,"action_dist_inputs":[0.4889270067,-0.491878897],"value_targets":64.4839172363} +{"eps_id":578471551,"obs":[-0.0291215349,0.3396782279,0.0120141963,-0.4824423194],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0223279707,0.1443887651,0.0023653496,-0.185997203],"action_prob":0.9080985188,"action_logp":-0.096402429,"action_dist_inputs":[1.1423224211,-1.1483129263],"value_targets":64.1251678467} +{"eps_id":578471551,"obs":[-0.0223279707,0.1443887651,0.0023653496,-0.185997203],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0194401946,-0.0507669412,-0.0013545945,0.1074309573],"action_prob":0.7371914983,"action_logp":-0.30490762,"action_dist_inputs":[0.5142769217,-0.5171452761],"value_targets":63.7627983093} +{"eps_id":578471551,"obs":[-0.0194401946,-0.0507669412,-0.0013545945,0.1074309573],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0204555336,-0.2458694577,0.0007940248,0.3996862173],"action_prob":0.2923428118,"action_logp":-1.2298281193,"action_dist_inputs":[-0.4415068924,0.4425257742],"value_targets":63.3967666626} +{"eps_id":578471551,"obs":[-0.0204555336,-0.2458694577,0.0007940248,0.3996862173],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0253729224,-0.441002667,0.0087877493,0.6926193833],"action_prob":0.0956117958,"action_logp":-2.3474590778,"action_dist_inputs":[-1.1212112904,1.1257513762],"value_targets":63.0270347595} +{"eps_id":578471551,"obs":[-0.0253729224,-0.441002667,0.0087877493,0.6926193833],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0341929756,-0.2460037172,0.022640137,0.402715832],"action_prob":0.9446944594,"action_logp":-0.0568937398,"action_dist_inputs":[-1.4153413773,1.4226464033],"value_targets":62.6535720825} +{"eps_id":578471551,"obs":[-0.0341929756,-0.2460037172,0.022640137,0.402715832],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0391130522,-0.0512100756,0.030694453,0.1172558665],"action_prob":0.9121143222,"action_logp":-0.0919899195,"action_dist_inputs":[-1.1675411463,1.1721881628],"value_targets":62.2763366699} +{"eps_id":578471551,"obs":[-0.0391130522,-0.0512100756,0.030694453,0.1172558665],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0401372537,0.1434589475,0.0330395699,-0.1655873656],"action_prob":0.7716482878,"action_logp":-0.2592264116,"action_dist_inputs":[-0.6082164645,0.6094254255],"value_targets":61.8952865601} +{"eps_id":578471551,"obs":[-0.0401372537,0.1434589475,0.0330395699,-0.1655873656],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0372680724,-0.0521200001,0.0297278222,0.1373327971],"action_prob":0.6441314816,"action_logp":-0.4398524165,"action_dist_inputs":[0.2953635156,-0.2979778945],"value_targets":61.5103912354} +{"eps_id":578471551,"obs":[-0.0372680724,-0.0521200001,0.0297278222,0.1373327971],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0383104719,-0.2476548553,0.0324744806,0.4392441809],"action_prob":0.2066746652,"action_logp":-1.5766093731,"action_dist_inputs":[-0.6718174815,0.6732700467],"value_targets":61.1216087341} +{"eps_id":578471551,"obs":[-0.0383104719,-0.2476548553,0.0324744806,0.4392441809],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0432635695,-0.0530072078,0.0412593633,0.1569724828],"action_prob":0.9222547412,"action_logp":-0.0809337795,"action_dist_inputs":[-1.234164834,1.2392195463],"value_targets":60.7288970947} +{"eps_id":578471551,"obs":[-0.0432635695,-0.0530072078,0.0412593633,0.1569724828],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0443237163,0.141500473,0.0443988107,-0.1224139258],"action_prob":0.8241865039,"action_logp":-0.1933584511,"action_dist_inputs":[-0.7716196179,0.7733531594],"value_targets":60.3322181702} +{"eps_id":578471551,"obs":[-0.0443237163,0.141500473,0.0443988107,-0.1224139258],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0414937064,-0.0542285256,0.041950535,0.1839392036],"action_prob":0.5309449434,"action_logp":-0.6330969334,"action_dist_inputs":[0.0609099045,-0.0630283058],"value_targets":59.9315338135} +{"eps_id":578471551,"obs":[-0.0414937064,-0.0542285256,0.041950535,0.1839392036],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0425782762,-0.2499248385,0.0456293188,0.4895552397],"action_prob":0.1545565277,"action_logp":-1.8671953678,"action_dist_inputs":[-0.8486117721,0.8506896496],"value_targets":59.526802063} +{"eps_id":578471551,"obs":[-0.0425782762,-0.2499248385,0.0456293188,0.4895552397],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0475767739,-0.055475302,0.0554204211,0.2115948945],"action_prob":0.9317180514,"action_logp":-0.0707250237,"action_dist_inputs":[-1.3038846254,1.3095003366],"value_targets":59.117980957} +{"eps_id":578471551,"obs":[-0.0475767739,-0.055475302,0.0554204211,0.2115948945],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0486862771,0.1388122588,0.059652321,-0.0631037429],"action_prob":0.8704696298,"action_logp":-0.1387223899,"action_dist_inputs":[-0.951317668,0.9537997842],"value_targets":58.7050323486} +{"eps_id":578471551,"obs":[-0.0486862771,0.1388122588,0.059652321,-0.0631037429],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0459100343,-0.0571119748,0.0583902448,0.247787267],"action_prob":0.3759701252,"action_logp":-0.9782455564,"action_dist_inputs":[-0.2540153861,0.2526730895],"value_targets":58.2879104614} +{"eps_id":578471551,"obs":[-0.0459100343,-0.0571119748,0.0583902448,0.247787267],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0470522717,0.137129575,0.0633459911,-0.0259215198],"action_prob":0.8876857162,"action_logp":-0.1191375032,"action_dist_inputs":[-1.032181859,1.0351353884],"value_targets":57.8665771484} +{"eps_id":578471551,"obs":[-0.0470522717,0.137129575,0.0633459911,-0.0259215198],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0443096794,0.3312885761,0.0628275573,-0.2979646325],"action_prob":0.6946766376,"action_logp":-0.3643088043,"action_dist_inputs":[-0.411444962,0.4106301367],"value_targets":57.4409866333} +{"eps_id":578471551,"obs":[-0.0443096794,0.3312885761,0.0628275573,-0.2979646325],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0376839079,0.5254613161,0.0568682663,-0.5701898932],"action_prob":0.2914733589,"action_logp":-1.2328066826,"action_dist_inputs":[0.4419319034,-0.4463072717],"value_targets":57.0110969543} +{"eps_id":578471551,"obs":[-0.0376839079,0.5254613161,0.0568682663,-0.5701898932],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0271746833,0.7197415233,0.045464471,-0.8444288969],"action_prob":0.1031637639,"action_logp":-2.271437645,"action_dist_inputs":[1.0779037476,-1.084651947],"value_targets":56.5768661499} +{"eps_id":578471551,"obs":[-0.0271746833,0.7197415233,0.045464471,-0.8444288969],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0127798533,0.5240296721,0.0285758916,-0.5378024578],"action_prob":0.9425436258,"action_logp":-0.0591730848,"action_dist_inputs":[1.3945006132,-1.4030554295],"value_targets":56.1382484436} +{"eps_id":578471551,"obs":[-0.0127798533,0.5240296721,0.0285758916,-0.5378024578],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0022992599,0.3285178542,0.0178198423,-0.2362543344],"action_prob":0.8979158401,"action_logp":-0.1076789498,"action_dist_inputs":[1.0838721991,-1.0904062986],"value_targets":55.6952018738} +{"eps_id":578471551,"obs":[-0.0022992599,0.3285178542,0.0178198423,-0.2362543344],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.004271097,0.1331458986,0.0130947558,0.0619958267],"action_prob":0.6932433844,"action_logp":-0.3663741648,"action_dist_inputs":[0.4057892263,-0.4095373154],"value_targets":55.2476768494} +{"eps_id":578471551,"obs":[0.004271097,0.1331458986,0.0130947558,0.0619958267],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0069340151,0.3280776739,0.0143346721,-0.2265270203],"action_prob":0.7415036559,"action_logp":-0.2990751863,"action_dist_inputs":[-0.5267381668,0.5270605683],"value_targets":54.7956352234} +{"eps_id":578471551,"obs":[0.0069340151,0.3280776739,0.0143346721,-0.2265270203],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0134955682,0.5229918361,0.0098041315,-0.5146540403],"action_prob":0.315800041,"action_logp":-1.1526460648,"action_dist_inputs":[0.3847708702,-0.3883700371],"value_targets":54.3390235901} +{"eps_id":578471551,"obs":[0.0134955682,0.5229918361,0.0098041315,-0.5146540403],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0239554048,0.3277332187,-0.0004889486,-0.2188977599],"action_prob":0.8979859948,"action_logp":-0.1076008081,"action_dist_inputs":[1.0843678713,-1.0906769037],"value_targets":53.8778038025} +{"eps_id":578471551,"obs":[0.0239554048,0.3277332187,-0.0004889486,-0.2188977599],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0305100698,0.1326182634,-0.0048669036,0.0736308917],"action_prob":0.7005870342,"action_logp":-0.3558366597,"action_dist_inputs":[0.4233134985,-0.4267815948],"value_targets":53.4119224548} +{"eps_id":578471551,"obs":[0.0305100698,0.1326182634,-0.0048669036,0.0736308917],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0331624337,0.3278096318,-0.0033942861,-0.2205835879],"action_prob":0.7298244238,"action_logp":-0.3149512708,"action_dist_inputs":[-0.4965979159,0.4971341193],"value_targets":52.9413375854} +{"eps_id":578471551,"obs":[0.0331624337,0.3278096318,-0.0033942861,-0.2205835879],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0397186279,0.13273637,-0.0078059579,0.0710267127],"action_prob":0.7094624043,"action_logp":-0.3432478011,"action_dist_inputs":[0.444678247,-0.4480963647],"value_targets":52.4659957886} +{"eps_id":578471551,"obs":[0.0397186279,0.13273637,-0.0078059579,0.0710267127],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0423733555,0.3279693723,-0.0063854232,-0.2241087705],"action_prob":0.7211135626,"action_logp":-0.3269586563,"action_dist_inputs":[-0.474717617,0.4752742946],"value_targets":51.9858551025} +{"eps_id":578471551,"obs":[0.0423733555,0.3279693723,-0.0063854232,-0.2241087705],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0489327423,0.1329392493,-0.0108675985,0.0665531233],"action_prob":0.7211185694,"action_logp":-0.3269516826,"action_dist_inputs":[0.4733158648,-0.4767009914],"value_targets":51.5008621216} +{"eps_id":578471551,"obs":[0.0489327423,0.1329392493,-0.0108675985,0.0665531233],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0515915267,-0.0620252155,-0.0095365364,0.3557875454],"action_prob":0.291048646,"action_logp":-1.2342648506,"action_dist_inputs":[-0.4448721111,0.4454243183],"value_targets":51.0109710693} +{"eps_id":578471551,"obs":[0.0515915267,-0.0620252155,-0.0095365364,0.3557875454],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0503510237,0.133231014,-0.0024207854,0.0601128154],"action_prob":0.900650382,"action_logp":-0.1046381444,"action_dist_inputs":[-1.100071907,1.1043998003],"value_targets":50.5161323547} +{"eps_id":578471551,"obs":[0.0503510237,0.133231014,-0.0024207854,0.0601128154],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0530156456,-0.0618561395,-0.0012185291,0.3520309925],"action_prob":0.2868663371,"action_logp":-1.2487388849,"action_dist_inputs":[-0.4550457001,0.4556066692],"value_targets":50.0162963867} +{"eps_id":578471551,"obs":[0.0530156456,-0.0618561395,-0.0012185291,0.3520309925],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0517785214,0.1332831234,0.0058220909,0.0589640699],"action_prob":0.9025188088,"action_logp":-0.1025657728,"action_dist_inputs":[-1.1105772257,1.1149522066],"value_targets":49.5114097595} +{"eps_id":578471551,"obs":[0.0517785214,0.1332831234,0.0058220909,0.0589640699],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0544441827,0.3283210993,0.0070013721,-0.231876269],"action_prob":0.7246898413,"action_logp":-0.3220115304,"action_dist_inputs":[-0.4836040139,0.4842412472],"value_targets":49.0014266968} +{"eps_id":578471551,"obs":[0.0544441827,0.3283210993,0.0070013721,-0.231876269],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0610106066,0.1330998242,0.0023638466,0.0630068704],"action_prob":0.7111949325,"action_logp":-0.3408087492,"action_dist_inputs":[0.4489647746,-0.4522297978],"value_targets":48.486289978} +{"eps_id":578471551,"obs":[0.0610106066,0.1330998242,0.0023638466,0.0630068704],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0636726022,0.3281877935,0.0036239841,-0.228929311],"action_prob":0.7245829105,"action_logp":-0.3221590817,"action_dist_inputs":[-0.4832855463,0.4840242267],"value_targets":47.9659461975} +{"eps_id":578471551,"obs":[0.0636726022,0.3281877935,0.0036239841,-0.228929311],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0702363551,0.523257792,-0.0009546023,-0.5204669237],"action_prob":0.2867334485,"action_logp":-1.2492022514,"action_dist_inputs":[0.4540629685,-0.4572392404],"value_targets":47.4403495789} +{"eps_id":578471551,"obs":[0.0702363551,0.523257792,-0.0009546023,-0.5204669237],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0807015151,0.3281492889,-0.0113639403,-0.2280849367],"action_prob":0.904990077,"action_logp":-0.0998313278,"action_dist_inputs":[1.1239614487,-1.1299804449],"value_targets":46.9094467163} +{"eps_id":578471551,"obs":[0.0807015151,0.3281492889,-0.0113639403,-0.2280849367],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0872645006,0.1331915557,-0.0159256384,0.0609918348],"action_prob":0.7385352254,"action_logp":-0.3030864894,"action_dist_inputs":[0.5176312327,-0.5207382441],"value_targets":46.3731765747} +{"eps_id":578471551,"obs":[0.0872645006,0.1331915557,-0.0159256384,0.0609918348],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.089928329,0.3285381794,-0.0147058023,-0.2366728932],"action_prob":0.6885926723,"action_logp":-0.373105377,"action_dist_inputs":[-0.3963865638,0.3971613944],"value_targets":45.8314933777} +{"eps_id":578471551,"obs":[0.089928329,0.3285381794,-0.0147058023,-0.2366728932],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.0964990929,0.1336293817,-0.0194392595,0.0513353497],"action_prob":0.7563860416,"action_logp":-0.2792033851,"action_dist_inputs":[0.5649225116,-0.5680447221],"value_targets":45.2843360901} +{"eps_id":578471551,"obs":[0.0964990929,0.1336293817,-0.0194392595,0.0513353497],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0991716832,-0.0612085164,-0.0184125528,0.3378221989],"action_prob":0.3353117704,"action_logp":-1.092694521,"action_dist_inputs":[-0.3417766094,0.3424806595],"value_targets":44.7316513062} +{"eps_id":578471551,"obs":[0.0991716832,-0.0612085164,-0.0184125528,0.3378221989],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.0979475081,0.1341705471,-0.0116561092,0.0393902995],"action_prob":0.891900599,"action_logp":-0.1144005731,"action_dist_inputs":[-1.0529303551,1.0573735237],"value_targets":44.1733856201} +{"eps_id":578471551,"obs":[0.0979475081,0.1341705471,-0.0116561092,0.0393902995],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.1006309241,0.3294576705,-0.0108683035,-0.2569473386],"action_prob":0.6584028602,"action_logp":-0.417938292,"action_dist_inputs":[-0.3277747035,0.328410089],"value_targets":43.6094818115} +{"eps_id":578471551,"obs":[0.1006309241,0.3294576705,-0.0108683035,-0.2569473386],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.107220076,0.5247331262,-0.0160072502,-0.5530383587],"action_prob":0.2225071192,"action_logp":-1.5027961731,"action_dist_inputs":[0.6239653826,-0.6271499991],"value_targets":43.0398788452} +{"eps_id":578471551,"obs":[0.107220076,0.5247331262,-0.0160072502,-0.5530383587],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.1177147403,0.3298395574,-0.0270680171,-0.2654414475],"action_prob":0.9186252356,"action_logp":-0.0848770663,"action_dist_inputs":[1.2088495493,-1.2149637938],"value_targets":42.4645233154} +{"eps_id":578471551,"obs":[0.1177147403,0.3298395574,-0.0270680171,-0.2654414475],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.1243115291,0.135114193,-0.0323768482,0.0185825843],"action_prob":0.8075401187,"action_logp":-0.2137625515,"action_dist_inputs":[0.715446353,-0.7186585069],"value_targets":41.8833580017} +{"eps_id":578471551,"obs":[0.1243115291,0.135114193,-0.0323768482,0.0185825843],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.1270138174,0.3306851387,-0.0320051946,-0.2841372192],"action_prob":0.5709622502,"action_logp":-0.5604321361,"action_dist_inputs":[-0.1426636428,0.1431146562],"value_targets":41.2963218689} +{"eps_id":578471551,"obs":[0.1270138174,0.3306851387,-0.0320051946,-0.2841372192],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.133627519,0.1360339373,-0.0376879387,-0.0017178794],"action_prob":0.8300921321,"action_logp":-0.1862185895,"action_dist_inputs":[0.791473031,-0.7948071361],"value_targets":40.7033538818} +{"eps_id":578471551,"obs":[0.133627519,0.1360339373,-0.0376879387,-0.0017178794],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.1363481879,-0.0585278124,-0.037722297,0.2788398862],"action_prob":0.4851487875,"action_logp":-0.7232996821,"action_dist_inputs":[-0.029579889,0.0298425071],"value_targets":40.1044006348} +{"eps_id":578471551,"obs":[0.1363481879,-0.0585278124,-0.037722297,0.2788398862],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.1351776421,0.1371113956,-0.0321455002,-0.0254978966],"action_prob":0.8482384086,"action_logp":-0.1645935327,"action_dist_inputs":[-0.8584410548,0.8624101281],"value_targets":39.4993934631} +{"eps_id":578471551,"obs":[0.1351776421,0.1371113956,-0.0321455002,-0.0254978966],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.137919873,-0.0575351641,-0.0326554589,0.2568719685],"action_prob":0.5232061148,"action_logp":-0.6477798223,"action_dist_inputs":[0.0464712493,-0.0464198999],"value_targets":38.8882751465} +{"eps_id":578471551,"obs":[0.137919873,-0.0575351641,-0.0326554589,0.2568719685],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.1367691606,0.1380374283,-0.0275180191,-0.0459295176],"action_prob":0.8357239366,"action_logp":-0.1794569641,"action_dist_inputs":[-0.811478436,0.815271616],"value_targets":38.2709846497} +{"eps_id":578471551,"obs":[0.1367691606,0.1380374283,-0.0275180191,-0.0459295176],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.1395299137,-0.0566793494,-0.0284366086,0.2379457802],"action_prob":0.5559244752,"action_logp":-0.587122798,"action_dist_inputs":[0.1122561619,-0.1123816893],"value_targets":37.6474609375} +{"eps_id":578471551,"obs":[0.1395299137,-0.0566793494,-0.0284366086,0.2379457802],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.1383963227,0.1388370693,-0.0236776937,-0.0635695085],"action_prob":0.8234279752,"action_logp":-0.1942792237,"action_dist_inputs":[-0.7680531144,0.7716937661],"value_targets":37.0176353455} +{"eps_id":578471551,"obs":[0.1383963227,0.1388370693,-0.0236776937,-0.0635695085],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.1411730647,0.3342903554,-0.0249490831,-0.363627851],"action_prob":0.4160702825,"action_logp":-0.8769010901,"action_dist_inputs":[0.1693258733,-0.1696005464],"value_targets":36.3814506531} +{"eps_id":578471551,"obs":[0.1411730647,0.3342903554,-0.0249490831,-0.363627851],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.147858873,0.1395317167,-0.0322216414,-0.0789150745],"action_prob":0.8765512705,"action_logp":-0.1317600757,"action_dist_inputs":[0.978133738,-0.9820352197],"value_targets":35.7388381958} +{"eps_id":578471551,"obs":[0.147858873,0.1395317167,-0.0322216414,-0.0789150745],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.1506495029,-0.0551138669,-0.0337999426,0.2034302205],"action_prob":0.6333200336,"action_logp":-0.4567793608,"action_dist_inputs":[0.2730377913,-0.2734487355],"value_targets":35.0897369385} +{"eps_id":578471551,"obs":[0.1506495029,-0.0551138669,-0.0337999426,0.2034302205],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.1495472342,-0.2497365475,-0.029731337,0.4852622151],"action_prob":0.2200776637,"action_logp":-1.5137747526,"action_dist_inputs":[-0.6309412122,0.6342725754],"value_targets":34.4340782166} +{"eps_id":578471551,"obs":[0.1495472342,-0.2497365475,-0.029731337,0.4852622151],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.1445524991,-0.0542079285,-0.0200260933,0.1833591759],"action_prob":0.9195398688,"action_logp":-0.0838818997,"action_dist_inputs":[-1.2149004936,1.221211195],"value_targets":33.7717971802} +{"eps_id":578471551,"obs":[0.1445524991,-0.0542079285,-0.0200260933,0.1833591759],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.1434683353,0.1411947608,-0.0163589101,-0.1155733615],"action_prob":0.7745348811,"action_logp":-0.255492568,"action_dist_inputs":[-0.615463078,0.6186342835],"value_targets":33.1028251648} +{"eps_id":578471551,"obs":[0.1434683353,0.1411947608,-0.0163589101,-0.1155733615],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.1462922394,-0.0536890216,-0.0186703764,0.1719038785],"action_prob":0.6682856083,"action_logp":-0.4030396044,"action_dist_inputs":[0.34985587,-0.3505855501],"value_targets":32.4270935059} +{"eps_id":578471551,"obs":[0.1462922394,-0.0536890216,-0.0186703764,0.1719038785],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.1452184618,0.141695112,-0.0152322995,-0.126609996],"action_prob":0.76152426,"action_logp":-0.2724332213,"action_dist_inputs":[-0.5789896846,0.5820648074],"value_targets":31.7445411682} +{"eps_id":578471551,"obs":[0.1452184618,0.141695112,-0.0152322995,-0.126609996],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.1480523497,-0.0532053523,-0.0177644994,0.1612286568],"action_prob":0.68516922,"action_logp":-0.3780893981,"action_dist_inputs":[0.3884043694,-0.389226228],"value_targets":31.0550918579} +{"eps_id":578471551,"obs":[0.1480523497,-0.0532053523,-0.0177644994,0.1612286568],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.1469882429,0.1421663463,-0.0145399263,-0.1370051503],"action_prob":0.7480649948,"action_logp":-0.2902653813,"action_dist_inputs":[-0.5426671505,0.5456515551],"value_targets":30.3586788177} +{"eps_id":578471551,"obs":[0.1469882429,0.1421663463,-0.0145399263,-0.1370051503],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.1498315781,-0.0527443551,-0.0172800291,0.1510554254],"action_prob":0.7009771466,"action_logp":-0.3552800119,"action_dist_inputs":[0.4255225658,-0.4264326096],"value_targets":29.6552295685} +{"eps_id":578471551,"obs":[0.1498315781,-0.0527443551,-0.0172800291,0.1510554254],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.1487766951,0.1426207125,-0.0142589211,-0.1470285058],"action_prob":0.7338545918,"action_logp":-0.3094443977,"action_dist_inputs":[-0.5056858063,0.5085821748],"value_targets":28.9446773529} +{"eps_id":578471551,"obs":[0.1487766951,0.1426207125,-0.0142589211,-0.1470285058],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.1516291052,-0.0522941723,-0.0171994921,0.1411221176],"action_prob":0.7161041498,"action_logp":-0.3339296579,"action_dist_inputs":[0.4621105194,-0.4631076157],"value_targets":28.2269458771} +{"eps_id":578471551,"obs":[0.1516291052,-0.0522941723,-0.0171994921,0.1411221176],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.1505832225,0.1430698335,-0.0143770492,-0.1569369435],"action_prob":0.7185252309,"action_logp":-0.3305544853,"action_dist_inputs":[-0.467174679,0.4699834585],"value_targets":27.5019664764} +{"eps_id":578471551,"obs":[0.1505832225,0.1430698335,-0.0143770492,-0.1569369435],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.153444618,-0.0518433563,-0.0175157879,0.1311759055],"action_prob":0.7308741212,"action_logp":-0.3135139942,"action_dist_inputs":[0.4989880323,-0.500074029],"value_targets":26.7696628571} +{"eps_id":578471551,"obs":[0.153444618,-0.0518433563,-0.0175157879,0.1311759055],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.1524077505,0.143525064,-0.0148922699,-0.1669811457],"action_prob":0.701636672,"action_logp":-0.3543395698,"action_dist_inputs":[-0.4261924922,0.4289112389],"value_targets":26.0299625397} +{"eps_id":578471551,"obs":[0.1524077505,0.143525064,-0.0148922699,-0.1669811457],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.1552782506,-0.0513805822,-0.018231893,0.12096674],"action_prob":0.7455438972,"action_logp":-0.2936412692,"action_dist_inputs":[0.5369032025,-0.5380825996],"value_targets":25.2827911377} +{"eps_id":578471551,"obs":[0.1552782506,-0.0513805822,-0.018231893,0.12096674],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.1542506367,0.1439977735,-0.0158125572,-0.1774119586],"action_prob":0.6826668382,"action_logp":-0.3817483485,"action_dist_inputs":[-0.3817155063,0.3843392432],"value_targets":24.5280704498} +{"eps_id":578471551,"obs":[0.1542506367,0.1439977735,-0.0158125572,-0.1774119586],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.157130599,-0.050894361,-0.0193607975,0.1102409959],"action_prob":0.7603037357,"action_logp":-0.2740373015,"action_dist_inputs":[0.5765323639,-0.5778128505],"value_targets":23.7657279968} +{"eps_id":578471551,"obs":[0.157130599,-0.050894361,-0.0193607975,0.1102409959],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.1561127156,-0.2457336038,-0.017155977,0.3967533708],"action_prob":0.3389958739,"action_logp":-1.0817673206,"action_dist_inputs":[-0.3326254189,0.3351466656],"value_targets":22.9956855774} +{"eps_id":578471551,"obs":[0.1561127156,-0.2457336038,-0.017155977,0.3967533708],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.1511980295,-0.4406079948,-0.0092209103,0.6839783192],"action_prob":0.1038148254,"action_logp":-2.2651464939,"action_dist_inputs":[-1.0749065876,1.0806317329],"value_targets":22.2178649902} +{"eps_id":578471551,"obs":[0.1511980295,-0.4406079948,-0.0092209103,0.6839783192],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.1423858702,-0.245359242,0.0044586565,0.3884067237],"action_prob":0.9458399415,"action_logp":-0.055681929,"action_dist_inputs":[-1.4260293245,1.4341001511],"value_targets":21.4321861267} +{"eps_id":578471551,"obs":[0.1423858702,-0.245359242,0.0044586565,0.3884067237],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.1374786943,-0.0503008589,0.0122267911,0.0971329138],"action_prob":0.9030824304,"action_logp":-0.1019414514,"action_dist_inputs":[-1.1131249666,1.118827939],"value_targets":20.6385707855} +{"eps_id":578471551,"obs":[0.1374786943,-0.0503008589,0.0122267911,0.0971329138],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.1364726722,-0.2455958873,0.0141694499,0.3936481476],"action_prob":0.2990499139,"action_logp":-1.2071447372,"action_dist_inputs":[-0.4246988297,0.4271272421],"value_targets":19.8369407654} +{"eps_id":578471551,"obs":[0.1364726722,-0.2455958873,0.0141694499,0.3936481476],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.1315607578,-0.4409160316,0.0220424123,0.690764606],"action_prob":0.0911196992,"action_logp":-2.3955812454,"action_dist_inputs":[-1.1471332312,1.1529061794],"value_targets":19.0272140503} +{"eps_id":578471551,"obs":[0.1315607578,-0.4409160316,0.0220424123,0.690764606],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.1227424368,-0.2461067438,0.0358577035,0.4051016271],"action_prob":0.9493698478,"action_logp":-0.0519568622,"action_dist_inputs":[-1.4615107775,1.4697396755],"value_targets":18.2093067169} +{"eps_id":578471551,"obs":[0.1227424368,-0.2461067438,0.0358577035,0.4051016271],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.1178203002,-0.0515111685,0.0439597368,0.1239358038],"action_prob":0.9193997383,"action_logp":-0.0840342566,"action_dist_inputs":[-1.2141435146,1.2200753689],"value_targets":17.3831367493} +{"eps_id":578471551,"obs":[0.1178203002,-0.0515111685,0.0439597368,0.1239358038],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.1167900786,0.1429543197,0.0464384519,-0.1545606405],"action_prob":0.7899683714,"action_logp":-0.2357623428,"action_dist_inputs":[-0.6609745026,0.6637603641],"value_targets":16.5486240387} +{"eps_id":578471551,"obs":[0.1167900786,0.1429543197,0.0464384519,-0.1545606405],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.1196491644,0.3373816609,0.0433472395,-0.4322392344],"action_prob":0.3890517056,"action_logp":-0.9440430403,"action_dist_inputs":[0.2251737565,-0.2261263877],"value_targets":15.7056808472} +{"eps_id":578471551,"obs":[0.1196491644,0.3373816609,0.0433472395,-0.4322392344],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.1263967901,0.1416736096,0.0347024575,-0.1262128204],"action_prob":0.8715236783,"action_logp":-0.1375122517,"action_dist_inputs":[0.9551621079,-0.9593359232],"value_targets":14.8542232513} +{"eps_id":578471551,"obs":[0.1263967901,0.1416736096,0.0347024575,-0.1262128204],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.1292302758,0.336281687,0.0321782008,-0.4077486992],"action_prob":0.4192120433,"action_logp":-0.8693783879,"action_dist_inputs":[0.1626788229,-0.1633300334],"value_targets":13.9941644669} +{"eps_id":578471551,"obs":[0.1292302758,0.336281687,0.0321782008,-0.4077486992],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.1359559,0.1407185942,0.0240232255,-0.1050971597],"action_prob":0.8651665449,"action_logp":-0.1448332667,"action_dist_inputs":[0.9274576306,-0.9314243197],"value_targets":13.125418663} +{"eps_id":578471551,"obs":[0.1359559,0.1407185942,0.0240232255,-0.1050971597],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.1387702823,-0.0547392406,0.0219212826,0.1950671524],"action_prob":0.5623188019,"action_logp":-0.575686276,"action_dist_inputs":[0.1250815094,-0.1254966408],"value_targets":12.2478981018} +{"eps_id":578471551,"obs":[0.1387702823,-0.0547392406,0.0219212826,0.1950671524],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.137675494,0.1400623918,0.0258226264,-0.0906207561],"action_prob":0.8351832628,"action_logp":-0.1801041365,"action_dist_inputs":[-0.8096257448,0.8131913543],"value_targets":11.3615131378} +{"eps_id":578471551,"obs":[0.137675494,0.1400623918,0.0258226264,-0.0906207561],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.1404767334,0.3348048925,0.0240102112,-0.3750461042],"action_prob":0.4719752073,"action_logp":-0.7508288026,"action_dist_inputs":[0.0559916832,-0.0562250204],"value_targets":10.4661741257} +{"eps_id":578471551,"obs":[0.1404767334,0.3348048925,0.0240102112,-0.3750461042],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.1471728384,0.1393502653,0.0165092889,-0.0748903453],"action_prob":0.8506875038,"action_logp":-0.161710456,"action_dist_inputs":[0.86817801,-0.8718252182],"value_targets":9.5617923737} +{"eps_id":578471551,"obs":[0.1471728384,0.1393502653,0.0165092889,-0.0748903453],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.1499598473,-0.0560044311,0.0150114819,0.2229552716],"action_prob":0.5173168182,"action_logp":-0.6590997577,"action_dist_inputs":[0.0346259475,-0.0346691236],"value_targets":8.6482753754} +{"eps_id":578471551,"obs":[0.1499598473,-0.0560044311,0.0150114819,0.2229552716],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.1488397568,0.1388997883,0.0194705874,-0.0649548993],"action_prob":0.850399673,"action_logp":-0.1620488018,"action_dist_inputs":[-0.8669202924,0.8708193898],"value_targets":7.7255306244} +{"eps_id":578471551,"obs":[0.1488397568,0.1388997883,0.0194705874,-0.0649548993],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.1516177505,0.3337372541,0.0181714892,-0.3514316976],"action_prob":0.5104386806,"action_logp":-0.6724848151,"action_dist_inputs":[-0.0208303072,0.0209304355],"value_targets":6.7934651375} +{"eps_id":578471551,"obs":[0.1516177505,0.3337372541,0.0181714892,-0.3514316976],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.1582924873,0.5285961032,0.0111428555,-0.6383296251],"action_prob":0.161462009,"action_logp":-1.8234853745,"action_dist_inputs":[0.8220014572,-0.8253884315],"value_targets":5.8519849777} +{"eps_id":578471551,"obs":[0.1582924873,0.5285961032,0.0111428555,-0.6383296251],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[0.1688644141,0.3333205879,-0.0016237379,-0.3421585858],"action_prob":0.9268123507,"action_logp":-0.0760041922,"action_dist_inputs":[1.2662374973,-1.2724863291],"value_targets":4.9009947777} +{"eps_id":578471551,"obs":[0.1688644141,0.3333205879,-0.0016237379,-0.3421585858],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.1755308211,0.1382217705,-0.0084669096,-0.0499881431],"action_prob":0.8479137421,"action_logp":-0.1649763882,"action_dist_inputs":[0.8575162888,-0.8608145118],"value_targets":3.9403989315} +{"eps_id":578471551,"obs":[0.1755308211,0.1382217705,-0.0084669096,-0.0499881431],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.1782952696,-0.0567777492,-0.0094666723,0.2400114238],"action_prob":0.5256067514,"action_logp":-0.6432019472,"action_dist_inputs":[0.0514116436,-0.0511051305],"value_targets":2.970099926} +{"eps_id":578471551,"obs":[0.1782952696,-0.0567777492,-0.0094666723,0.2400114238],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[0.1771597117,0.138478145,-0.0046664444,-0.0556424595],"action_prob":0.841119349,"action_logp":-0.1730216891,"action_dist_inputs":[-0.8312383294,0.8353424668],"value_targets":1.9900000095} +{"eps_id":578471551,"obs":[0.1771597117,0.138478145,-0.0046664444,-0.0556424595],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":true,"new_obs":[0.1799292713,-0.0565765873,-0.0057792934,0.2355645299],"action_prob":0.5290122628,"action_logp":-0.6367436647,"action_dist_inputs":[0.0582325682,-0.0579470433],"value_targets":1.0} +{"eps_id":1467118106,"obs":[-0.0112490868,-0.0466275066,-0.0366825201,-0.0020674383],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":0.0,"dones":false,"new_obs":[-0.0121816369,0.1490008086,-0.0367238708,-0.3060947359],"action_prob":0.4096399546,"action_logp":-0.8924766779,"action_dist_inputs":[0.1825356185,-0.1829183847],"value_targets":86.6020355225} +{"eps_id":1467118106,"obs":[-0.0121816369,0.1490008086,-0.0367238708,-0.3060947359],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0092016207,-0.0455791131,-0.0428457633,-0.0252160467],"action_prob":0.8825621009,"action_logp":-0.1249261275,"action_dist_inputs":[1.0063029528,-1.0106160641],"value_targets":86.4666976929} +{"eps_id":1467118106,"obs":[-0.0092016207,-0.0455791131,-0.0428457633,-0.0252160467],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.010113203,-0.2400612682,-0.0433500856,0.253646642],"action_prob":0.6487786174,"action_logp":-0.4326637685,"action_dist_inputs":[0.3064948022,-0.3071799576],"value_targets":86.3300018311} +{"eps_id":1467118106,"obs":[-0.010113203,-0.2400612682,-0.0433500856,0.253646642],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0149144279,-0.0443480015,-0.0382771529,-0.0523883738],"action_prob":0.7665682435,"action_logp":-0.2658315897,"action_dist_inputs":[-0.5930405259,0.5959931612],"value_targets":86.1919174194} +{"eps_id":1467118106,"obs":[-0.0149144279,-0.0443480015,-0.0382771529,-0.0523883738],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0158013888,-0.2389007956,-0.0393249206,0.2279763222],"action_prob":0.6874831915,"action_logp":-0.3747178912,"action_dist_inputs":[0.3936760426,-0.3947032094],"value_targets":86.052444458} +{"eps_id":1467118106,"obs":[-0.0158013888,-0.2389007956,-0.0393249206,0.2279763222],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0205794033,-0.0432395712,-0.0347653925,-0.0768471584],"action_prob":0.7376081944,"action_logp":-0.3043424785,"action_dist_inputs":[-0.5154408216,0.5181331635],"value_targets":85.9115600586} +{"eps_id":1467118106,"obs":[-0.0205794033,-0.0432395712,-0.0347653925,-0.0768471584],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0214441959,-0.2378463298,-0.0363023356,0.2046676278],"action_prob":0.7199277282,"action_logp":-0.3286044598,"action_dist_inputs":[0.471377492,-0.4727257192],"value_targets":85.7692489624} +{"eps_id":1467118106,"obs":[-0.0214441959,-0.2378463298,-0.0363023356,0.2046676278],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0262011215,-0.4324308336,-0.0322089829,0.4856815338],"action_prob":0.2929157615,"action_logp":-1.2278702259,"action_dist_inputs":[-0.4394111037,0.4418536127],"value_targets":85.62550354} +{"eps_id":1467118106,"obs":[-0.0262011215,-0.4324308336,-0.0322089829,0.4856815338],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0348497406,-0.2368695289,-0.0224953536,0.1830239296],"action_prob":0.9016740322,"action_logp":-0.103502214,"action_dist_inputs":[-1.1052974463,1.1106678247],"value_targets":85.4803085327} +{"eps_id":1467118106,"obs":[-0.0348497406,-0.2368695289,-0.0224953536,0.1830239296],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0395871289,-0.4316624999,-0.0188348759,0.4685263634],"action_prob":0.3024244308,"action_logp":-1.1959238052,"action_dist_inputs":[-0.4167892337,0.4189900756],"value_targets":85.3336486816} +{"eps_id":1467118106,"obs":[-0.0395871289,-0.4316624999,-0.0188348759,0.4685263634],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0482203811,-0.2362796068,-0.0094643477,0.1699668765],"action_prob":0.901157856,"action_logp":-0.1040748507,"action_dist_inputs":[-1.1024625301,1.1076934338],"value_targets":85.1855010986} +{"eps_id":1467118106,"obs":[-0.0482203811,-0.2362796068,-0.0094643477,0.1699668765],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0529459715,-0.0410234705,-0.0060650101,-0.1256866753],"action_prob":0.7008304,"action_logp":-0.355489403,"action_dist_inputs":[-0.4246100485,0.4266450405],"value_targets":85.0358581543} +{"eps_id":1467118106,"obs":[-0.0529459715,-0.0410234705,-0.0060650101,-0.1256866753],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0537664406,-0.2360580117,-0.0085787438,0.1650766581],"action_prob":0.7429356575,"action_logp":-0.2971458733,"action_dist_inputs":[0.5296479464,-0.5316348672],"value_targets":84.8847045898} +{"eps_id":1467118106,"obs":[-0.0537664406,-0.2360580117,-0.0085787438,0.1650766581],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0584876016,-0.4310561121,-0.0052772104,0.4550409317],"action_prob":0.3052556515,"action_logp":-1.1866056919,"action_dist_inputs":[-0.4102264047,0.4121681154],"value_targets":84.7320251465} +{"eps_id":1467118106,"obs":[-0.0584876016,-0.4310561121,-0.0052772104,0.4550409317],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0671087205,-0.2358599454,0.0038236082,0.1606992632],"action_prob":0.9019272327,"action_logp":-0.1032214165,"action_dist_inputs":[-1.1068725586,1.1119513512],"value_targets":84.5778045654} +{"eps_id":1467118106,"obs":[-0.0671087205,-0.2358599454,0.0038236082,0.1606992632],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0718259215,-0.040792942,0.0070375935,-0.1307749748],"action_prob":0.7110535502,"action_logp":-0.3410075307,"action_dist_inputs":[-0.4493247271,0.4511815012],"value_targets":84.4220275879} +{"eps_id":1467118106,"obs":[-0.0718259215,-0.040792942,0.0070375935,-0.1307749748],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0726417825,-0.236014992,0.0044220942,0.1641198993],"action_prob":0.7283335328,"action_logp":-0.3169961572,"action_dist_inputs":[0.4920405746,-0.4941435456],"value_targets":84.2646713257} +{"eps_id":1467118106,"obs":[-0.0726417825,-0.236014992,0.0044220942,0.1641198993],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0773620829,-0.0409566276,0.0077044917,-0.1271647066],"action_prob":0.7179480791,"action_logp":-0.3313580155,"action_dist_inputs":[-0.4662312269,0.4680749178],"value_targets":84.1057281494} +{"eps_id":1467118106,"obs":[-0.0773620829,-0.0409566276,0.0077044917,-0.1271647066],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0781812146,0.1540541053,0.0051611979,-0.4174070358],"action_prob":0.2785797715,"action_logp":-1.2780507803,"action_dist_inputs":[0.4747072458,-0.4768100381],"value_targets":83.9451828003} +{"eps_id":1467118106,"obs":[-0.0781812146,0.1540541053,0.0051611979,-0.4174070358],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0751001313,0.3491025269,-0.003186943,-0.7084584236],"action_prob":0.0941531956,"action_logp":-2.3628320694,"action_dist_inputs":[1.1291834116,-1.1347635984],"value_targets":83.7830123901} +{"eps_id":1467118106,"obs":[-0.0751001313,0.3491025269,-0.003186943,-0.7084584236],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0681180805,0.1540248841,-0.0173561107,-0.4167803526],"action_prob":0.9446604252,"action_logp":-0.0569297783,"action_dist_inputs":[1.4146106243,-1.422727108],"value_targets":83.6192016602} +{"eps_id":1467118106,"obs":[-0.0681180805,0.1540248841,-0.0173561107,-0.4167803526],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0650375858,-0.0408468544,-0.0256917179,-0.129619211],"action_prob":0.912725091,"action_logp":-0.0913205519,"action_dist_inputs":[1.1708456278,-1.1765263081],"value_targets":83.453742981} +{"eps_id":1467118106,"obs":[-0.0650375858,-0.0408468544,-0.0256917179,-0.129619211],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0658545196,-0.2355915308,-0.0282841027,0.1548488587],"action_prob":0.7748630047,"action_logp":-0.2550690472,"action_dist_inputs":[0.6168327332,-0.6191447377],"value_targets":83.286605835} +{"eps_id":1467118106,"obs":[-0.0658545196,-0.2355915308,-0.0282841027,0.1548488587],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0705663487,-0.4302973151,-0.0251871254,0.4384763241],"action_prob":0.3613431156,"action_logp":-1.017927289,"action_dist_inputs":[-0.2839696109,0.285569787],"value_targets":83.1177825928} +{"eps_id":1467118106,"obs":[-0.0705663487,-0.4302973151,-0.0251871254,0.4384763241],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0791722983,-0.2348280996,-0.0164175984,0.1379612535],"action_prob":0.8854320645,"action_logp":-0.1216795668,"action_dist_inputs":[-1.0200936794,1.0248141289],"value_targets":82.9472579956} +{"eps_id":1467118106,"obs":[-0.0791722983,-0.2348280996,-0.0164175984,0.1379612535],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0838688612,-0.4297111034,-0.0136583736,0.4254198074],"action_prob":0.3674048781,"action_logp":-1.0012907982,"action_dist_inputs":[-0.2709904611,0.2723755836],"value_targets":82.7750091553} +{"eps_id":1467118106,"obs":[-0.0838688612,-0.4297111034,-0.0136583736,0.4254198074],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0924630836,-0.2343983799,-0.0051499773,0.1284625977],"action_prob":0.8855640292,"action_logp":-0.1215305179,"action_dist_inputs":[-1.0208086967,1.0254006386],"value_targets":82.601020813} +{"eps_id":1467118106,"obs":[-0.0924630836,-0.2343983799,-0.0051499773,0.1284625977],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0971510485,-0.0392030329,-0.0025807251,-0.1658406258],"action_prob":0.6393774748,"action_logp":-0.4472602904,"action_dist_inputs":[-0.2857110798,0.2869521976],"value_targets":82.4252700806} +{"eps_id":1467118106,"obs":[-0.0971510485,-0.0392030329,-0.0025807251,-0.1658406258],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.0979351103,-0.2342879474,-0.0058975373,0.1260270476],"action_prob":0.7853754163,"action_logp":-0.2415934652,"action_dist_inputs":[0.6472253799,-0.6500461698],"value_targets":82.2477493286} +{"eps_id":1467118106,"obs":[-0.0979351103,-0.2342879474,-0.0058975373,0.1260270476],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1026208699,-0.0390820056,-0.0033769966,-0.1685106456],"action_prob":0.6337587237,"action_logp":-0.4560869634,"action_dist_inputs":[-0.2736073732,0.2747687697],"value_targets":82.0684280396} +{"eps_id":1467118106,"obs":[-0.1026208699,-0.0390820056,-0.0033769966,-0.1685106456],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1034025103,-0.2341554612,-0.0067472095,0.1231050342],"action_prob":0.7889801264,"action_logp":-0.2370141298,"action_dist_inputs":[0.6579419971,-0.6608467698],"value_targets":81.8873062134} +{"eps_id":1467118106,"obs":[-0.1034025103,-0.2341554612,-0.0067472095,0.1231050342],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1080856174,-0.4291800857,-0.0042851088,0.413651675],"action_prob":0.3730570376,"action_logp":-0.9860239625,"action_dist_inputs":[-0.2590241432,0.2601000667],"value_targets":81.7043457031} +{"eps_id":1467118106,"obs":[-0.1080856174,-0.4291800857,-0.0042851088,0.413651675],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1166692227,-0.2339976728,0.0039879247,0.1196208969],"action_prob":0.8849571347,"action_logp":-0.1222160459,"action_dist_inputs":[-1.0179299116,1.0223045349],"value_targets":81.5195465088} +{"eps_id":1467118106,"obs":[-0.1166692227,-0.2339976728,0.0039879247,0.1196208969],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1213491708,-0.0389330797,0.0063803429,-0.1718012094],"action_prob":0.6440066695,"action_logp":-0.4400461912,"action_dist_inputs":[-0.2959030569,0.2968941629],"value_targets":81.3328704834} +{"eps_id":1467118106,"obs":[-0.1213491708,-0.0389330797,0.0063803429,-0.1718012094],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1221278384,-0.2341457754,0.0029443186,0.122887671],"action_prob":0.7792723179,"action_logp":-0.2493946999,"action_dist_inputs":[0.6292087436,-0.6322219372],"value_targets":81.144317627} +{"eps_id":1467118106,"obs":[-0.1221278384,-0.2341457754,0.0029443186,0.122887671],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1268107444,-0.4293097854,0.0054020719,0.4164980352],"action_prob":0.3514874578,"action_logp":-1.0455812216,"action_dist_inputs":[-0.3057692647,0.3067379594],"value_targets":80.9538574219} +{"eps_id":1467118106,"obs":[-0.1268107444,-0.4293097854,0.0054020719,0.4164980352],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1353969425,-0.2342648059,0.0137320329,0.1255230457],"action_prob":0.8909083605,"action_logp":-0.1155136824,"action_dist_inputs":[-1.0478640795,1.0521893501],"value_targets":80.76146698} +{"eps_id":1467118106,"obs":[-0.1353969425,-0.2342648059,0.0137320329,0.1255230457],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1400822401,-0.0393422395,0.0162424929,-0.1627961397],"action_prob":0.675643146,"action_logp":-0.392090261,"action_dist_inputs":[-0.3664367795,0.3673837781],"value_targets":80.5671386719} +{"eps_id":1467118106,"obs":[-0.1400822401,-0.0393422395,0.0162424929,-0.1627961397],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.140869081,-0.2346929014,0.0129865706,0.134966284],"action_prob":0.7528959513,"action_logp":-0.2838282287,"action_dist_inputs":[0.555570066,-0.558547616],"value_targets":80.3708496094} +{"eps_id":1467118106,"obs":[-0.140869081,-0.2346929014,0.0129865706,0.134966284],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1455629468,-0.4299984276,0.0156858973,0.4317178428],"action_prob":0.3092175126,"action_logp":-1.1737103462,"action_dist_inputs":[-0.4013954997,0.4023844898],"value_targets":80.1725769043} +{"eps_id":1467118106,"obs":[-0.1455629468,-0.4299984276,0.0156858973,0.4317178428],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1541629136,-0.2351020575,0.0243202541,0.1440207064],"action_prob":0.9007412791,"action_logp":-0.1045372188,"action_dist_inputs":[-1.1005458832,1.1049425602],"value_targets":79.9722976685} +{"eps_id":1467118106,"obs":[-0.1541629136,-0.2351020575,0.0243202541,0.1440207064],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1588649601,-0.0403366983,0.0272006672,-0.1408915073],"action_prob":0.7254655957,"action_logp":-0.320941627,"action_dist_inputs":[-0.4853503406,0.4863865972],"value_targets":79.7699966431} +{"eps_id":1467118106,"obs":[-0.1588649601,-0.0403366983,0.0272006672,-0.1408915073],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.159671694,0.1543853283,0.0243828371,-0.424870491],"action_prob":0.2993037403,"action_logp":-1.2062963247,"action_dist_inputs":[0.4239115715,-0.4267039299],"value_targets":79.5656509399} +{"eps_id":1467118106,"obs":[-0.159671694,0.1543853283,0.0243828371,-0.424870491],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1565839797,0.3491535485,0.0158854276,-0.7097681761],"action_prob":0.0990634635,"action_logp":-2.3119945526,"action_dist_inputs":[1.1007957458,-1.1068782806],"value_targets":79.3592453003} +{"eps_id":1467118106,"obs":[-0.1565839797,0.3491535485,0.0158854276,-0.7097681761],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1496009082,0.1538152397,0.0016900643,-0.4121275246],"action_prob":0.9445518255,"action_logp":-0.0570447482,"action_dist_inputs":[1.4134193659,-1.4218411446],"value_targets":79.1507568359} +{"eps_id":1467118106,"obs":[-0.1496009082,0.1538152397,0.0016900643,-0.4121275246],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1465246081,-0.0413306244,-0.0065524862,-0.1189122573],"action_prob":0.9057309031,"action_logp":-0.0990130231,"action_dist_inputs":[1.1282610893,-1.1343278885],"value_targets":78.9401550293} +{"eps_id":1467118106,"obs":[-0.1465246081,-0.0413306244,-0.0065524862,-0.1189122573],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1473512203,0.1538845897,-0.0089307316,-0.4136552215],"action_prob":0.2743516266,"action_logp":-1.2933447361,"action_dist_inputs":[0.4849785566,-0.4876765311],"value_targets":78.727432251} +{"eps_id":1467118106,"obs":[-0.1473512203,0.1538845897,-0.0089307316,-0.4136552215],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1442735344,-0.0411096402,-0.0172038358,-0.1238011345],"action_prob":0.9099507928,"action_logp":-0.0943647474,"action_dist_inputs":[1.1534456015,-1.1595884562],"value_targets":78.5125579834} +{"eps_id":1467118106,"obs":[-0.1442735344,-0.0411096402,-0.0172038358,-0.1238011345],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1450957209,-0.2359809428,-0.0196798574,0.1634048969],"action_prob":0.7495882511,"action_logp":-0.2882312536,"action_dist_inputs":[0.5467982292,-0.5496189594],"value_targets":78.2955093384} +{"eps_id":1467118106,"obs":[-0.1450957209,-0.2359809428,-0.0196798574,0.1634048969],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1498153359,-0.0405828804,-0.0164117608,-0.1354209483],"action_prob":0.6786794662,"action_logp":-0.3876063228,"action_dist_inputs":[-0.3733414114,0.3743683398],"value_targets":78.0762710571} +{"eps_id":1467118106,"obs":[-0.1498153359,-0.0405828804,-0.0164117608,-0.1354209483],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1506270021,-0.2354659587,-0.0191201791,0.1520394534],"action_prob":0.7632319331,"action_logp":-0.2701933384,"action_dist_inputs":[0.5837432742,-0.5867376924],"value_targets":77.8548202515} +{"eps_id":1467118106,"obs":[-0.1506270021,-0.2354659587,-0.0191201791,0.1520394534],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1553363204,-0.4303089976,-0.0160793904,0.4386295974],"action_prob":0.3397013843,"action_logp":-1.0796883106,"action_dist_inputs":[-0.3318782151,0.3327469528],"value_targets":77.6311340332} +{"eps_id":1467118106,"obs":[-0.1553363204,-0.4303089976,-0.0160793904,0.4386295974],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1639425009,-0.2349631935,-0.0073067988,0.1409216225],"action_prob":0.8890241385,"action_logp":-0.1176308617,"action_dist_inputs":[-1.0383342505,1.0424780846],"value_targets":77.4051818848} +{"eps_id":1467118106,"obs":[-0.1639425009,-0.2349631935,-0.0073067988,0.1409216225],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1686417609,-0.0397373512,-0.0044883662,-0.1540574878],"action_prob":0.6646373868,"action_logp":-0.4085136354,"action_dist_inputs":[-0.3416576087,0.3423718512],"value_targets":77.1769561768} +{"eps_id":1467118106,"obs":[-0.1686417609,-0.0397373512,-0.0044883662,-0.1540574878],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1694364995,0.1554485708,-0.0075695161,-0.448153019],"action_prob":0.231666252,"action_logp":-1.4624575377,"action_dist_inputs":[0.5978325605,-0.6010939479],"value_targets":76.9464187622} +{"eps_id":1467118106,"obs":[-0.1694364995,0.1554485708,-0.0075695161,-0.448153019],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1663275361,-0.0395654887,-0.0165325757,-0.1578657031],"action_prob":0.9182053208,"action_logp":-0.0853342637,"action_dist_inputs":[1.2058184147,-1.2123906612],"value_targets":76.7135543823} +{"eps_id":1467118106,"obs":[-0.1663275361,-0.0395654887,-0.0165325757,-0.1578657031],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1671188474,-0.2344468832,-0.0196898896,0.1295560896],"action_prob":0.7887234092,"action_logp":-0.2373396009,"action_dist_inputs":[0.6569319963,-0.6603156924],"value_targets":76.4783401489} +{"eps_id":1467118106,"obs":[-0.1671188474,-0.2344468832,-0.0196898896,0.1295560896],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1718077809,-0.0390484892,-0.0170987695,-0.1692731529],"action_prob":0.6189690828,"action_logp":-0.4796999097,"action_dist_inputs":[-0.2423405349,0.2428344041],"value_targets":76.2407455444} +{"eps_id":1467118106,"obs":[-0.1718077809,-0.0390484892,-0.0170987695,-0.1692731529],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1725887507,-0.2339215726,-0.0204842314,0.1179669574],"action_prob":0.801173687,"action_logp":-0.2216775417,"action_dist_inputs":[0.6950421929,-0.6986037493],"value_targets":76.0007553101} +{"eps_id":1467118106,"obs":[-0.1725887507,-0.2339215726,-0.0204842314,0.1179669574],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1772671789,-0.4287441373,-0.0181248933,0.4041175544],"action_prob":0.4047353864,"action_logp":-0.9045218229,"action_dist_inputs":[-0.1927251667,0.1930474788],"value_targets":75.7583389282} +{"eps_id":1467118106,"obs":[-0.1772671789,-0.4287441373,-0.0181248933,0.4041175544],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.185842067,-0.2333698869,-0.0100425417,0.1057758778],"action_prob":0.871093452,"action_logp":-0.1380060315,"action_dist_inputs":[-0.9534977674,0.9571635127],"value_targets":75.5134735107} +{"eps_id":1467118106,"obs":[-0.185842067,-0.2333698869,-0.0100425417,0.1057758778],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1905094683,-0.0381054692,-0.0079270247,-0.19005844],"action_prob":0.5952149034,"action_logp":-0.5188327432,"action_dist_inputs":[-0.1927090585,0.1928572208],"value_targets":75.26612854} +{"eps_id":1467118106,"obs":[-0.1905094683,-0.0381054692,-0.0079270247,-0.19005844],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1912715733,0.1571289897,-0.0117281927,-0.4852314293],"action_prob":0.1897320896,"action_logp":-1.6621422768,"action_dist_inputs":[0.7239451408,-0.727806747],"value_targets":75.0162963867} +{"eps_id":1467118106,"obs":[-0.1912715733,0.1571289897,-0.0117281927,-0.4852314293],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1881289929,-0.0378255136,-0.0214328226,-0.1962678134],"action_prob":0.9265879989,"action_logp":-0.0762462318,"action_dist_inputs":[1.264192462,-1.2712298632],"value_targets":74.7639312744} +{"eps_id":1467118106,"obs":[-0.1881289929,-0.0378255136,-0.0214328226,-0.1962678134],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.18888551,-0.2326344401,-0.0253581777,0.0895776972],"action_prob":0.8295755386,"action_logp":-0.1868411154,"action_dist_inputs":[0.7892988324,-0.7933228612],"value_targets":74.5090255737} +{"eps_id":1467118106,"obs":[-0.18888551,-0.2326344401,-0.0253581777,0.0895776972],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.1935381889,-0.4273838997,-0.0235666241,0.3741534948],"action_prob":0.4712017775,"action_logp":-0.752468884,"action_dist_inputs":[-0.0577345006,0.0575860254],"value_targets":74.2515411377} +{"eps_id":1467118106,"obs":[-0.1935381889,-0.4273838997,-0.0235666241,0.3741534948],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2020858675,-0.2319352627,-0.0160835534,0.0741339773],"action_prob":0.848729074,"action_logp":-0.1640152782,"action_dist_inputs":[-0.8607100844,0.8639572859],"value_targets":73.9914550781} +{"eps_id":1467118106,"obs":[-0.2020858675,-0.2319352627,-0.0160835534,0.0741339773],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2067245692,-0.4268229902,-0.0146008749,0.3616994023],"action_prob":0.4803852737,"action_logp":-0.7331668735,"action_dist_inputs":[-0.0394318774,0.0390672982],"value_targets":73.7287445068} +{"eps_id":1467118106,"obs":[-0.2067245692,-0.4268229902,-0.0146008749,0.3616994023],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2152610421,-0.2314965874,-0.0073668868,0.0644485056],"action_prob":0.847061038,"action_logp":-0.1659825295,"action_dist_inputs":[-0.8543192744,0.8574143052],"value_targets":73.4633712769} +{"eps_id":1467118106,"obs":[-0.2152610421,-0.2314965874,-0.0073668868,0.0644485056],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.219890967,-0.426512152,-0.0060779168,0.3547980487],"action_prob":0.4788798988,"action_logp":-0.7363054156,"action_dist_inputs":[-0.0425257385,0.0420049727],"value_targets":73.1953277588} +{"eps_id":1467118106,"obs":[-0.219890967,-0.426512152,-0.0060779168,0.3547980487],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2284212112,-0.2313043028,0.0010180443,0.0602048114],"action_prob":0.8488350511,"action_logp":-0.1638904214,"action_dist_inputs":[-0.8612530828,0.8642403483],"value_targets":72.9245758057} +{"eps_id":1467118106,"obs":[-0.2284212112,-0.2313043028,0.0010180443,0.0602048114],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2330472916,-0.426440835,0.0022221406,0.3532087505],"action_prob":0.4669584036,"action_logp":-0.7615150809,"action_dist_inputs":[-0.066489324,0.0658698156],"value_targets":72.6510848999} +{"eps_id":1467118106,"obs":[-0.2330472916,-0.426440835,0.0022221406,0.3532087505],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2415761203,-0.231350556,0.0092863161,0.0612273701],"action_prob":0.8538028598,"action_logp":-0.1580549479,"action_dist_inputs":[-0.8809093833,0.8838351965],"value_targets":72.3748321533} +{"eps_id":1467118106,"obs":[-0.2415761203,-0.231350556,0.0092863161,0.0612273701],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2462031245,-0.0363629684,0.0105108628,-0.228511259],"action_prob":0.5551891923,"action_logp":-0.5884463787,"action_dist_inputs":[-0.1111615226,0.1104983091],"value_targets":72.0957946777} +{"eps_id":1467118106,"obs":[-0.2462031245,-0.0363629684,0.0105108628,-0.228511259],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2469303906,0.1586072147,0.005940638,-0.5178602338],"action_prob":0.1779411733,"action_logp":-1.7263022661,"action_dist_inputs":[0.7629209757,-0.7674379349],"value_targets":71.8139266968} +{"eps_id":1467118106,"obs":[-0.2469303906,0.1586072147,0.005940638,-0.5178602338],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2437582463,-0.036597874,-0.0044165668,-0.2233112305],"action_prob":0.9283927679,"action_logp":-0.0743003935,"action_dist_inputs":[1.2773957253,-1.2848635912],"value_targets":71.5292205811} +{"eps_id":1467118106,"obs":[-0.2437582463,-0.036597874,-0.0044165668,-0.2233112305],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2444901913,-0.2316564173,-0.0088827908,0.0679752678],"action_prob":0.8321153522,"action_logp":-0.1837842166,"action_dist_inputs":[0.7980692983,-0.8026243448],"value_targets":71.2416381836} +{"eps_id":1467118106,"obs":[-0.2444901913,-0.2316564173,-0.0088827908,0.0679752678],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.24912332,-0.036408253,-0.0075232857,-0.2274969369],"action_prob":0.5291743875,"action_logp":-0.6364372373,"action_dist_inputs":[-0.0587788001,0.058051575],"value_targets":70.9511489868} +{"eps_id":1467118106,"obs":[-0.24912332,-0.036408253,-0.0075232857,-0.2274969369],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.249851495,0.1588204056,-0.0120732244,-0.5225434303],"action_prob":0.1621762514,"action_logp":-1.8190715313,"action_dist_inputs":[0.8187340498,-0.823390007],"value_targets":70.6577301025} +{"eps_id":1467118106,"obs":[-0.249851495,0.1588204056,-0.0120732244,-0.5225434303],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.246675089,0.3541101813,-0.0225240942,-0.8190062046],"action_prob":0.0673505291,"action_logp":-2.6978445053,"action_dist_inputs":[1.3102426529,-1.3178759813],"value_targets":70.3613433838} +{"eps_id":1467118106,"obs":[-0.246675089,0.3541101813,-0.0225240942,-0.8190062046],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.23959288,0.1593036354,-0.0389042199,-0.5334920287],"action_prob":0.9541633129,"action_logp":-0.0469204187,"action_dist_inputs":[1.5130099058,-1.5227409601],"value_targets":70.061958313} +{"eps_id":1467118106,"obs":[-0.23959288,0.1593036354,-0.0389042199,-0.5334920287],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2364068031,-0.0352501944,-0.0495740585,-0.2533170879],"action_prob":0.9382324815,"action_logp":-0.0637575239,"action_dist_inputs":[1.3563894033,-1.3642308712],"value_targets":69.7595596313} +{"eps_id":1467118106,"obs":[-0.2364068031,-0.0352501944,-0.0495740585,-0.2533170879],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2371118069,-0.229630515,-0.0546404012,0.0233264025],"action_prob":0.8829230666,"action_logp":-0.1245172024,"action_dist_inputs":[1.007612586,-1.0127944946],"value_targets":69.4540939331} +{"eps_id":1467118106,"obs":[-0.2371118069,-0.229630515,-0.0546404012,0.0233264025],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2417044193,-0.4239280224,-0.0541738719,0.2982814312],"action_prob":0.6570298672,"action_logp":-0.4200257659,"action_dist_inputs":[0.3242908418,-0.3257954121],"value_targets":69.1455535889} +{"eps_id":1467118106,"obs":[-0.2417044193,-0.4239280224,-0.0541738719,0.2982814312],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2501829863,-0.2280773818,-0.0482082441,-0.0109828915],"action_prob":0.7389088869,"action_logp":-0.3025806546,"action_dist_inputs":[-0.5191273093,0.5211778879],"value_targets":68.8338928223} +{"eps_id":1467118106,"obs":[-0.2501829863,-0.2280773818,-0.0482082441,-0.0109828915],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2547445297,-0.4224759936,-0.0484279022,0.2661085129],"action_prob":0.7000826001,"action_logp":-0.3565569818,"action_dist_inputs":[0.4228583574,-0.4248328209],"value_targets":68.5190811157} +{"eps_id":1467118106,"obs":[-0.2547445297,-0.4224759936,-0.0484279022,0.2661085129],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2631940544,-0.616874516,-0.0431057326,0.5431321859],"action_prob":0.2974971831,"action_logp":-1.2123504877,"action_dist_inputs":[-0.4287730455,0.4304714799],"value_targets":68.2010955811} +{"eps_id":1467118106,"obs":[-0.2631940544,-0.616874516,-0.0431057326,0.5431321859],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2755315304,-0.4211741388,-0.0322430879,0.2371851355],"action_prob":0.8904532194,"action_logp":-0.1160247177,"action_dist_inputs":[-1.0453877449,1.0499911308],"value_targets":67.8798904419} +{"eps_id":1467118106,"obs":[-0.2755315304,-0.4211741388,-0.0322430879,0.2371851355],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2839550078,-0.2256067246,-0.0274993852,-0.0654913262],"action_prob":0.6870359778,"action_logp":-0.3753686547,"action_dist_inputs":[-0.3924701512,0.3938282728],"value_targets":67.5554504395} +{"eps_id":1467118106,"obs":[-0.2839550078,-0.2256067246,-0.0274993852,-0.0654913262],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2884671688,-0.4203238487,-0.0288092121,0.2183901072],"action_prob":0.7411346436,"action_logp":-0.2995729744,"action_dist_inputs":[0.5245807767,-0.5272935033],"value_targets":67.227722168} +{"eps_id":1467118106,"obs":[-0.2884671688,-0.4203238487,-0.0288092121,0.2183901072],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.2968736291,-0.2248021662,-0.0244414099,-0.0832394436],"action_prob":0.6637141705,"action_logp":-0.4099036455,"action_dist_inputs":[-0.3393955529,0.3404945135],"value_targets":66.8966903687} +{"eps_id":1467118106,"obs":[-0.2968736291,-0.2248021662,-0.0244414099,-0.0832394436],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.3013696671,-0.4195653796,-0.0261061974,0.2016330659],"action_prob":0.7578110695,"action_logp":-0.2773211598,"action_dist_inputs":[0.5688636899,-0.571852088],"value_targets":66.5623168945} +{"eps_id":1467118106,"obs":[-0.3013696671,-0.4195653796,-0.0261061974,0.2016330659],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.3097609878,-0.2240799814,-0.0220735371,-0.0991694629],"action_prob":0.6415080428,"action_logp":-0.4439335763,"action_dist_inputs":[-0.2905315757,0.2913837433],"value_targets":66.2245635986} +{"eps_id":1467118106,"obs":[-0.3097609878,-0.2240799814,-0.0220735371,-0.0991694629],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.3142425716,-0.4188787341,-0.0240569264,0.1864683926],"action_prob":0.7719702125,"action_logp":-0.2588093281,"action_dist_inputs":[0.6081138253,-0.6113556623],"value_targets":65.883392334} +{"eps_id":1467118106,"obs":[-0.3142425716,-0.4188787341,-0.0240569264,0.1864683926],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.3226201534,-0.2234209925,-0.0203275587,-0.1137054339],"action_prob":0.6201532483,"action_logp":-0.4777886868,"action_dist_inputs":[-0.244791612,0.2454069257],"value_targets":65.5387802124} +{"eps_id":1467118106,"obs":[-0.3226201534,-0.2234209925,-0.0203275587,-0.1137054339],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.3270885646,-0.4182458818,-0.0226016659,0.1724956036],"action_prob":0.7843167186,"action_logp":-0.2429423332,"action_dist_inputs":[0.6437620521,-0.6472399235],"value_targets":65.1906890869} +{"eps_id":1467118106,"obs":[-0.3270885646,-0.4182458818,-0.0226016659,0.1724956036],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.3354535103,-0.613037169,-0.0191517547,0.4579636157],"action_prob":0.4007020295,"action_logp":-0.9145371914,"action_dist_inputs":[-0.2010776252,0.2014632821],"value_targets":64.8390808105} +{"eps_id":1467118106,"obs":[-0.3354535103,-0.613037169,-0.0191517547,0.4579636157],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.3477142453,-0.807883203,-0.0099924821,0.7445487976],"action_prob":0.1377645731,"action_logp":-1.9822090864,"action_dist_inputs":[-0.9151921272,0.9187901616],"value_targets":64.4839172363} +{"eps_id":1467118106,"obs":[-0.3477142453,-0.807883203,-0.0099924821,0.7445487976],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.3638719022,-0.6126247644,0.0048984936,0.4487380385],"action_prob":0.929823041,"action_logp":-0.0727609619,"action_dist_inputs":[-1.2888237238,1.2951511145],"value_targets":64.1251678467} +{"eps_id":1467118106,"obs":[-0.3638719022,-0.6126247644,0.0048984936,0.4487380385],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.3761244118,-0.4175724685,0.0138732549,0.1576032341],"action_prob":0.8714149594,"action_logp":-0.1376370192,"action_dist_inputs":[-0.9550269842,0.9585006237],"value_targets":63.7627983093} +{"eps_id":1467118106,"obs":[-0.3761244118,-0.4175724685,0.0138732549,0.1576032341],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.384475857,-0.6128902435,0.0170253199,0.4546303451],"action_prob":0.350413233,"action_logp":-1.0486421585,"action_dist_inputs":[-0.3085801303,0.3086431921],"value_targets":63.3967666626} +{"eps_id":1467118106,"obs":[-0.384475857,-0.6128902435,0.0170253199,0.4546303451],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.3967336416,-0.4180131555,0.0261179265,0.1673622131],"action_prob":0.8799145222,"action_logp":-0.1279304922,"action_dist_inputs":[-0.9940783978,0.9975428581],"value_targets":63.0270347595} +{"eps_id":1467118106,"obs":[-0.3967336416,-0.4180131555,0.0261179265,0.1673622131],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.4050939083,-0.6134990454,0.0294651706,0.4681688249],"action_prob":0.3118811846,"action_logp":-1.1651329994,"action_dist_inputs":[-0.3956386447,0.3957006037],"value_targets":62.6535720825} +{"eps_id":1467118106,"obs":[-0.4050939083,-0.6134990454,0.0294651706,0.4681688249],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.4173639119,-0.41880548,0.038828548,0.1849167645],"action_prob":0.8899967074,"action_logp":-0.1165375039,"action_dist_inputs":[-1.0435843468,1.0471235514],"value_targets":62.2763366699} +{"eps_id":1467118106,"obs":[-0.4173639119,-0.41880548,0.038828548,0.1849167645],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.4257400036,-0.2242599726,0.0425268821,-0.0952688009],"action_prob":0.7328102589,"action_logp":-0.3108684719,"action_dist_inputs":[-0.5043970346,0.5045307875],"value_targets":61.8952865601} +{"eps_id":1467118106,"obs":[-0.4257400036,-0.2242599726,0.0425268821,-0.0952688009],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.4302251935,-0.0297725257,0.0406215079,-0.3742370605],"action_prob":0.3561106622,"action_logp":-1.0325137377,"action_dist_inputs":[0.294439137,-0.2978461981],"value_targets":61.5103912354} +{"eps_id":1467118106,"obs":[-0.4302251935,-0.0297725257,0.0406215079,-0.3742370605],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.4308206439,-0.2254472524,0.0331367664,-0.0690276772],"action_prob":0.8795821071,"action_logp":-0.1283083558,"action_dist_inputs":[0.9909287095,-0.9975505471],"value_targets":61.1216087341} +{"eps_id":1467118106,"obs":[-0.4308206439,-0.2254472524,0.0331367664,-0.0690276772],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.4353296161,-0.0308156554,0.0317562111,-0.3510743678],"action_prob":0.3846512735,"action_logp":-0.9554181099,"action_dist_inputs":[0.2333170027,-0.2365349829],"value_targets":60.7288970947} +{"eps_id":1467118106,"obs":[-0.4353296161,-0.0308156554,0.0317562111,-0.3510743678],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.4359459281,-0.226374492,0.0247347243,-0.0485492572],"action_prob":0.8725966811,"action_logp":-0.1362818033,"action_dist_inputs":[0.9588139057,-0.9653016925],"value_targets":60.3322181702} +{"eps_id":1467118106,"obs":[-0.4359459281,-0.226374492,0.0247347243,-0.0485492572],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.4404734075,-0.0316157863,0.0237637386,-0.3333266973],"action_prob":0.4057878256,"action_logp":-0.9019248486,"action_dist_inputs":[0.189160198,-0.1922457516],"value_targets":59.9315338135} +{"eps_id":1467118106,"obs":[-0.4404734075,-0.0316157863,0.0237637386,-0.3333266973],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.4411057234,0.1631600112,0.0170972049,-0.6184220314],"action_prob":0.1326088607,"action_logp":-2.0203514099,"action_dist_inputs":[0.9358462691,-0.9422399402],"value_targets":59.526802063} +{"eps_id":1467118106,"obs":[-0.4411057234,0.1631600112,0.0170972049,-0.6184220314],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.4378425181,-0.0321965292,0.0047287652,-0.3204036951],"action_prob":0.9386862516,"action_logp":-0.0632740036,"action_dist_inputs":[1.3598510027,-1.3686254025],"value_targets":59.117980957} +{"eps_id":1467118106,"obs":[-0.4378425181,-0.0321965292,0.0047287652,-0.3204036951],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.4384864569,-0.227385506,-0.0016793087,-0.0262332521],"action_prob":0.8725751638,"action_logp":-0.1363064498,"action_dist_inputs":[0.9587754011,-0.9651469588],"value_targets":58.7050323486} +{"eps_id":1467118106,"obs":[-0.4384864569,-0.227385506,-0.0016793087,-0.0262332521],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.4430341721,-0.422483325,-0.0022039737,0.2659193575],"action_prob":0.6054518223,"action_logp":-0.5017802715,"action_dist_inputs":[0.2126130462,-0.2156206816],"value_targets":58.2879104614} +{"eps_id":1467118106,"obs":[-0.4430341721,-0.422483325,-0.0022039737,0.2659193575],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.4514838159,-0.2273299992,0.0031144135,-0.0274579022],"action_prob":0.7735699415,"action_logp":-0.2567391694,"action_dist_inputs":[-0.6140520573,0.6145281792],"value_targets":57.8665771484} +{"eps_id":1467118106,"obs":[-0.4514838159,-0.2273299992,0.0031144135,-0.0274579022],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.4560304284,-0.4224964678,0.0025652554,0.2662060261],"action_prob":0.5957785845,"action_logp":-0.5178861618,"action_dist_inputs":[0.1924166083,-0.1954896897],"value_targets":57.4409866333} +{"eps_id":1467118106,"obs":[-0.4560304284,-0.4224964678,0.0025652554,0.2662060261],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.4644803703,-0.2274112254,0.007889376,-0.0256666932],"action_prob":0.7789815068,"action_logp":-0.249767974,"action_dist_inputs":[-0.6296668649,0.6300740838],"value_targets":57.0110969543} +{"eps_id":1467118106,"obs":[-0.4644803703,-0.2274112254,0.007889376,-0.0256666932],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.4690285921,-0.032403294,0.0073760422,-0.3158500195],"action_prob":0.4192624092,"action_logp":-0.8692582846,"action_dist_inputs":[0.161345914,-0.1644562632],"value_targets":56.5768661499} +{"eps_id":1467118106,"obs":[-0.4690285921,-0.032403294,0.0073760422,-0.3158500195],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.4696766436,-0.2276295274,0.0010590416,-0.0208501052],"action_prob":0.865175426,"action_logp":-0.1448229551,"action_dist_inputs":[0.9262674451,-0.9326902628],"value_targets":56.1382484436} +{"eps_id":1467118106,"obs":[-0.4696766436,-0.2276295274,0.0010590416,-0.0208501052],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.4742292464,-0.4227666557,0.0006420395,0.2721667588],"action_prob":0.5845128298,"action_logp":-0.5369765759,"action_dist_inputs":[0.1691011041,-0.1722258031],"value_targets":55.6952018738} +{"eps_id":1467118106,"obs":[-0.4742292464,-0.4227666557,0.0006420395,0.2721667588],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.4826845825,-0.2276538759,0.0060853749,-0.0203135815],"action_prob":0.7822743058,"action_logp":-0.2455498576,"action_dist_inputs":[-0.6393256783,0.6396436095],"value_targets":55.2476768494} +{"eps_id":1467118106,"obs":[-0.4826845825,-0.2276538759,0.0060853749,-0.0203135815],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.4872376621,-0.4228625596,0.0056791035,0.2742831111],"action_prob":0.571300745,"action_logp":-0.5598395467,"action_dist_inputs":[0.1419931352,-0.1451669484],"value_targets":54.7956352234} +{"eps_id":1467118106,"obs":[-0.4872376621,-0.4228625596,0.0056791035,0.2742831111],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.4956949055,-0.2278220952,0.0111647658,-0.0166032072],"action_prob":0.7890433073,"action_logp":-0.2369340956,"action_dist_inputs":[-0.6594494581,0.6597189903],"value_targets":54.3390235901} +{"eps_id":1467118106,"obs":[-0.4956949055,-0.2278220952,0.0111647658,-0.0166032072],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.5002513528,-0.4231023788,0.0108327018,0.2795813084],"action_prob":0.5523470044,"action_logp":-0.5935788155,"action_dist_inputs":[0.1034815237,-0.1066764891],"value_targets":53.8778038025} +{"eps_id":1467118106,"obs":[-0.5002513528,-0.4231023788,0.0108327018,0.2795813084],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.5087133646,-0.228136614,0.0164243281,-0.0096654091],"action_prob":0.7981448174,"action_logp":-0.2254652381,"action_dist_inputs":[-0.6872430444,0.6874964833],"value_targets":53.4119224548} +{"eps_id":1467118106,"obs":[-0.5087133646,-0.228136614,0.0164243281,-0.0096654091],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.5132761002,-0.0332540125,0.016231019,-0.297121346],"action_prob":0.4726169705,"action_logp":-0.7494699955,"action_dist_inputs":[0.0532264374,-0.05641542],"value_targets":52.9413375854} +{"eps_id":1467118106,"obs":[-0.5132761002,-0.0332540125,0.016231019,-0.297121346],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.5139412284,-0.2286035419,0.0102885924,0.000636053],"action_prob":0.840781033,"action_logp":-0.1734239906,"action_dist_inputs":[0.8288396597,-0.8352114558],"value_targets":52.4659957886} +{"eps_id":1467118106,"obs":[-0.5139412284,-0.2286035419,0.0102885924,0.000636053],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.5185132623,-0.4238715172,0.0103013143,0.2965473235],"action_prob":0.5200938582,"action_logp":-0.6537460089,"action_dist_inputs":[0.0386334732,-0.0417852215],"value_targets":51.9858551025} +{"eps_id":1467118106,"obs":[-0.5185132623,-0.4238715172,0.0103013143,0.2965473235],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.5269907117,-0.2288979292,0.0162322596,0.007130973],"action_prob":0.8102584481,"action_logp":-0.2104020268,"action_dist_inputs":[-0.7257044911,0.7259859443],"value_targets":51.5008621216} +{"eps_id":1467118106,"obs":[-0.5269907117,-0.2288979292,0.0162322596,0.007130973],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.5315686464,-0.0340124816,0.0163748804,-0.2803865969],"action_prob":0.5048986673,"action_logp":-0.6833974719,"action_dist_inputs":[-0.0113733616,0.008221956],"value_targets":51.0109710693} +{"eps_id":1467118106,"obs":[-0.5315686464,-0.0340124816,0.0163748804,-0.2803865969],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.5322489142,0.1608721018,0.0107671479,-0.5678603053],"action_prob":0.1750355959,"action_logp":-1.7427659035,"action_dist_inputs":[0.772025764,-0.7783250809],"value_targets":50.5161323547} +{"eps_id":1467118106,"obs":[-0.5322489142,0.1608721018,0.0107671479,-0.5678603053],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.5290314555,-0.0343992077,-0.0005900582,-0.2718048096],"action_prob":0.9295168519,"action_logp":-0.0730903372,"action_dist_inputs":[1.2853119373,-1.2939801216],"value_targets":50.0162963867} +{"eps_id":1467118106,"obs":[-0.5290314555,-0.0343992077,-0.0005900582,-0.2718048096],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.5297194719,-0.2295127362,-0.0060261544,0.0206919406],"action_prob":0.8337211013,"action_logp":-0.1818563789,"action_dist_inputs":[0.8029716015,-0.8092608452],"value_targets":49.5114097595} +{"eps_id":1467118106,"obs":[-0.5297194719,-0.2295127362,-0.0060261544,0.0206919406],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.5343096852,-0.0343048833,-0.0056123156,-0.2738862038],"action_prob":0.4862885773,"action_logp":-0.7209530473,"action_dist_inputs":[0.025866041,-0.0289934091],"value_targets":49.0014266968} +{"eps_id":1467118106,"obs":[-0.5343096852,-0.0343048833,-0.0056123156,-0.2738862038],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.5349957943,-0.2293463051,-0.0110900402,0.0170213189],"action_prob":0.8388448358,"action_logp":-0.1757294983,"action_dist_inputs":[0.8216573596,-0.8280006051],"value_targets":48.486289978} +{"eps_id":1467118106,"obs":[-0.5349957943,-0.2293463051,-0.0110900402,0.0170213189],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.5395827293,-0.4243074656,-0.0107496139,0.3061846793],"action_prob":0.5292238593,"action_logp":-0.6363437772,"action_dist_inputs":[0.0569113716,-0.0601173528],"value_targets":47.9659461975} +{"eps_id":1467118106,"obs":[-0.5395827293,-0.4243074656,-0.0107496139,0.3061846793],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.548068881,-0.2290340066,-0.0046259197,0.0101310713],"action_prob":0.7984570861,"action_logp":-0.225074023,"action_dist_inputs":[-0.6882882714,0.688390553],"value_targets":47.4403495789} +{"eps_id":1467118106,"obs":[-0.548068881,-0.2290340066,-0.0046259197,0.0101310713],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.5526495576,-0.4240893126,-0.0044232984,0.3013508916],"action_prob":0.5269419551,"action_logp":-0.6406648755,"action_dist_inputs":[0.0522773899,-0.0555948578],"value_targets":46.9094467163} +{"eps_id":1467118106,"obs":[-0.5526495576,-0.4240893126,-0.0044232984,0.3013508916],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.5611313581,-0.22890459,0.0016037192,0.0072762272],"action_prob":0.7996545434,"action_logp":-0.2235754728,"action_dist_inputs":[-0.6920679808,0.6920686364],"value_targets":46.3731765747} +{"eps_id":1467118106,"obs":[-0.5611313581,-0.22890459,0.0016037192,0.0072762272],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.5657094121,-0.4240494967,0.0017492437,0.3004647195],"action_prob":0.5181236267,"action_logp":-0.6575414538,"action_dist_inputs":[0.0345655978,-0.0379605666],"value_targets":45.8314933777} +{"eps_id":1467118106,"obs":[-0.5657094121,-0.4240494967,0.0017492437,0.3004647195],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.5741904378,-0.228952527,0.0077585382,0.0083339773],"action_prob":0.8036462069,"action_logp":-0.2185961157,"action_dist_inputs":[-0.7046526074,0.7045887113],"value_targets":45.2843360901} +{"eps_id":1467118106,"obs":[-0.5741904378,-0.228952527,0.0077585382,0.0083339773],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.578769505,-0.0339427032,0.007925218,-0.2818909883],"action_prob":0.4971297979,"action_logp":-0.6989040971,"action_dist_inputs":[0.0040202863,-0.0074605951],"value_targets":44.7316513062} +{"eps_id":1467118106,"obs":[-0.578769505,-0.0339427032,0.007925218,-0.2818909883],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.5794483423,-0.2291767895,0.0022873981,0.0132809598],"action_prob":0.8266498446,"action_logp":-0.1903740615,"action_dist_inputs":[0.7777844667,-0.7842835188],"value_targets":44.1733856201} +{"eps_id":1467118106,"obs":[-0.5794483423,-0.2291767895,0.0022873981,0.0132809598],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.5840318799,-0.034087725,0.0025530173,-0.2786794007],"action_prob":0.4960717857,"action_logp":-0.7010346055,"action_dist_inputs":[0.0061339331,-0.0095792329],"value_targets":43.6094818115} +{"eps_id":1467118106,"obs":[-0.5840318799,-0.034087725,0.0025530173,-0.2786794007],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.5847136378,-0.2292460054,-0.0030205704,0.014807662],"action_prob":0.8280758262,"action_logp":-0.1886505187,"action_dist_inputs":[0.7827708125,-0.7892807722],"value_targets":43.0398788452} +{"eps_id":1467118106,"obs":[-0.5847136378,-0.2292460054,-0.0030205704,0.014807662],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.5892985463,-0.4243245125,-0.0027244173,0.3065360487],"action_prob":0.5105293393,"action_logp":-0.6723071337,"action_dist_inputs":[0.0193232354,-0.0228004418],"value_targets":42.4645233154} +{"eps_id":1467118106,"obs":[-0.5892985463,-0.4243245125,-0.0027244173,0.3065360487],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.5977850556,-0.2291638404,0.0034063035,0.0129951416],"action_prob":0.8031795621,"action_logp":-0.2191769928,"action_dist_inputs":[-0.7032399178,0.7030462027],"value_targets":41.8833580017} +{"eps_id":1467118106,"obs":[-0.5977850556,-0.2291638404,0.0034063035,0.0129951416],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.6023682952,-0.0340909027,0.0036662063,-0.2786111236],"action_prob":0.5000332594,"action_logp":-0.6930806637,"action_dist_inputs":[-0.0018390636,-0.0017059678],"value_targets":41.2963218689} +{"eps_id":1467118106,"obs":[-0.6023682952,-0.0340909027,0.0036662063,-0.2786111236],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.6030501127,-0.2292649597,-0.0019060159,0.0152258724],"action_prob":0.8241453767,"action_logp":-0.19340837,"action_dist_inputs":[0.7690628767,-0.7756265998],"value_targets":40.7033538818} +{"eps_id":1467118106,"obs":[-0.6030501127,-0.2292649597,-0.0019060159,0.0152258724],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.6076354384,-0.034115728,-0.0016014985,-0.2780578136],"action_prob":0.4946813583,"action_logp":-0.7038414478,"action_dist_inputs":[0.0088525452,-0.0124227768],"value_targets":40.1044006348} +{"eps_id":1467118106,"obs":[-0.6076354384,-0.034115728,-0.0016014985,-0.2780578136],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.6083177328,-0.2292148024,-0.0071626548,0.0141195711],"action_prob":0.8275826573,"action_logp":-0.189246282,"action_dist_inputs":[0.7809993625,-0.787591815],"value_targets":39.4993934631} +{"eps_id":1467118106,"obs":[-0.6083177328,-0.2292148024,-0.0071626548,0.0141195711],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.6129020452,-0.4242332876,-0.0068802633,0.304534018],"action_prob":0.5161679387,"action_logp":-0.6613231301,"action_dist_inputs":[0.0305365156,-0.0341577642],"value_targets":38.8882751465} +{"eps_id":1467118106,"obs":[-0.6129020452,-0.4242332876,-0.0068802633,0.304534018],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.6213867068,-0.2290139794,-0.0007895831,0.009689182],"action_prob":0.7969421148,"action_logp":-0.2269732207,"action_dist_inputs":[-0.6838430166,0.6834480762],"value_targets":38.2709846497} +{"eps_id":1467118106,"obs":[-0.6213867068,-0.2290139794,-0.0007895831,0.009689182],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.6259669662,-0.4241245985,-0.0005957995,0.3021228909],"action_prob":0.5101904273,"action_logp":-0.672971189,"action_dist_inputs":[0.0185294561,-0.0222379863],"value_targets":37.6474609375} +{"eps_id":1467118106,"obs":[-0.6259669662,-0.4241245985,-0.0005957995,0.3021228909],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.634449482,-0.2289941609,0.005446658,0.0092521114],"action_prob":0.7996774912,"action_logp":-0.2235467732,"action_dist_inputs":[-0.6923757792,0.6919042468],"value_targets":37.0176353455} +{"eps_id":1467118106,"obs":[-0.634449482,-0.2289941609,0.005446658,0.0092521114],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.6390293837,-0.0339507386,0.0056317006,-0.2817073762],"action_prob":0.5020111203,"action_logp":-0.6891329885,"action_dist_inputs":[-0.0059045139,0.002140075],"value_targets":36.3814506531} +{"eps_id":1467118106,"obs":[-0.6390293837,-0.0339507386,0.0056317006,-0.2817073762],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.6397083998,-0.2291525602,-0.0000024469,0.012746444],"action_prob":0.8189600706,"action_logp":-0.1997199506,"action_dist_inputs":[0.7513121963,-0.7580054402],"value_targets":35.7388381958} +{"eps_id":1467118106,"obs":[-0.6397083998,-0.2291525602,-0.0000024469,0.012746444],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.6442914009,-0.034030579,0.000252482,-0.2799372673],"action_prob":0.498608768,"action_logp":-0.6959335208,"action_dist_inputs":[0.0008934992,-0.0046715438],"value_targets":35.0897369385} +{"eps_id":1467118106,"obs":[-0.6442914009,-0.034030579,0.000252482,-0.2799372673],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.6449720263,-0.2291561365,-0.0053462633,0.0128252935],"action_prob":0.8215610981,"action_logp":-0.1965489686,"action_dist_inputs":[0.7601242661,-0.7668358684],"value_targets":34.4340782166} +{"eps_id":1467118106,"obs":[-0.6449720263,-0.2291561365,-0.0053462633,0.0128252935],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.6495551467,-0.0339579172,-0.005089757,-0.2815396488],"action_prob":0.4898024201,"action_logp":-0.7137531638,"action_dist_inputs":[0.0184894167,-0.0223066248],"value_targets":33.7717971802} +{"eps_id":1467118106,"obs":[-0.6495551467,-0.0339579172,-0.005089757,-0.2815396488],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.6502343416,0.1612362564,-0.0107205501,-0.5758234859],"action_prob":0.1732783318,"action_logp":-1.7528561354,"action_dist_inputs":[0.7779084444,-0.7846604586],"value_targets":33.1028251648} +{"eps_id":1467118106,"obs":[-0.6502343416,0.1612362564,-0.0107205501,-0.5758234859],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.6470096111,-0.0337337814,-0.0222370196,-0.2865370214],"action_prob":0.9303327203,"action_logp":-0.0722130239,"action_dist_inputs":[1.2914460897,-1.3003644943],"value_targets":32.4270935059} +{"eps_id":1467118106,"obs":[-0.6470096111,-0.0337337814,-0.0222370196,-0.2865370214],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.6476842761,-0.2285316586,-0.0279677603,-0.0009495874],"action_prob":0.8448024392,"action_logp":-0.1686524451,"action_dist_inputs":[0.8437903523,-0.8506139517],"value_targets":31.7445411682} +{"eps_id":1467118106,"obs":[-0.6476842761,-0.2285316586,-0.0279677603,-0.0009495874],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.6522548795,-0.0330200046,-0.0279867519,-0.3023238182],"action_prob":0.4270762503,"action_logp":-0.850792706,"action_dist_inputs":[0.1448996663,-0.1488903016],"value_targets":31.0550918579} +{"eps_id":1467118106,"obs":[-0.6522548795,-0.0330200046,-0.0279867519,-0.3023238182],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.6529152989,0.1624894142,-0.0340332277,-0.603700161],"action_prob":0.1416271329,"action_logp":-1.954557538,"action_dist_inputs":[0.8974403739,-0.9044004679],"value_targets":30.3586788177} +{"eps_id":1467118106,"obs":[-0.6529152989,0.1624894142,-0.0340332277,-0.603700161],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.6496655345,-0.0321404412,-0.0461072326,-0.3219282627],"action_prob":0.9393453002,"action_logp":-0.062572144,"action_dist_inputs":[1.3654280901,-1.3745580912],"value_targets":29.6552295685} +{"eps_id":1467118106,"obs":[-0.6496655345,-0.0321404412,-0.0461072326,-0.3219282627],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.650308311,-0.226576522,-0.0525457971,-0.044134818],"action_prob":0.8803443313,"action_logp":-0.127442196,"action_dist_inputs":[0.9942777753,-1.0014169216],"value_targets":28.9446773529} +{"eps_id":1467118106,"obs":[-0.650308311,-0.226576522,-0.0525457971,-0.044134818],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.6548398733,-0.4209071398,-0.0534284934,0.231517449],"action_prob":0.6800351739,"action_logp":-0.3856107295,"action_dist_inputs":[0.3747622371,-0.3791711926],"value_targets":28.2269458771} +{"eps_id":1467118106,"obs":[-0.6548398733,-0.4209071398,-0.0534284934,0.231517449],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.6632580161,-0.6152265668,-0.0487981439,0.5068801045],"action_prob":0.332367748,"action_logp":-1.1015132666,"action_dist_inputs":[-0.3494730294,0.3480224609],"value_targets":27.5019664764} +{"eps_id":1467118106,"obs":[-0.6632580161,-0.6152265668,-0.0487981439,0.5068801045],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.675562501,-0.4194521904,-0.0386605412,0.1992271841],"action_prob":0.8565257192,"action_logp":-0.1548709571,"action_dist_inputs":[-0.8925566077,0.8941714764],"value_targets":26.7696628571} +{"eps_id":1467118106,"obs":[-0.675562501,-0.4194521904,-0.0386605412,0.1992271841],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.6839515567,-0.2237992287,-0.0346759968,-0.1053963304],"action_prob":0.6470957994,"action_logp":-0.4352609515,"action_dist_inputs":[-0.3040421307,0.3022555411],"value_targets":26.0299625397} +{"eps_id":1467118106,"obs":[-0.6839515567,-0.2237992287,-0.0346759968,-0.1053963304],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.6884275675,-0.4184075296,-0.0367839262,0.1761479378],"action_prob":0.7279148698,"action_logp":-0.317571193,"action_dist_inputs":[0.4895398617,-0.4945292473],"value_targets":25.2827911377} +{"eps_id":1467118106,"obs":[-0.6884275675,-0.4184075296,-0.0367839262,0.1761479378],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.696795702,-0.2227789909,-0.0332609676,-0.1279084086],"action_prob":0.6183363795,"action_logp":-0.4807226658,"action_dist_inputs":[-0.242264092,0.2402288914],"value_targets":24.5280704498} +{"eps_id":1467118106,"obs":[-0.696795702,-0.2227789909,-0.0332609676,-0.1279084086],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.7012512684,-0.4174090624,-0.0358191356,0.154098317],"action_prob":0.7498264313,"action_logp":-0.2879135609,"action_dist_inputs":[0.5462381244,-0.5514487624],"value_targets":23.7657279968} +{"eps_id":1467118106,"obs":[-0.7012512684,-0.4174090624,-0.0358191356,0.154098317],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.7095994949,-0.6120003462,-0.0327371657,0.4352694154],"action_prob":0.4114983678,"action_logp":-0.8879502416,"action_dist_inputs":[-0.180025205,0.1777494252],"value_targets":22.9956855774} +{"eps_id":1467118106,"obs":[-0.7095994949,-0.6120003462,-0.0327371657,0.4352694154],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.7218394876,-0.4164305925,-0.0240317788,0.1324489862],"action_prob":0.8279093504,"action_logp":-0.18885158,"action_dist_inputs":[-0.7850599885,0.7858226895],"value_targets":22.2178649902} +{"eps_id":1467118106,"obs":[-0.7218394876,-0.4164305925,-0.0240317788,0.1324489862],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.7301681042,-0.2209727913,-0.0213827994,-0.167717725],"action_prob":0.577477634,"action_logp":-0.5490855575,"action_dist_inputs":[-0.1574792713,0.1549480408],"value_targets":21.4321861267} +{"eps_id":1467118106,"obs":[-0.7301681042,-0.2209727913,-0.0213827994,-0.167717725],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.7345875502,-0.415782243,-0.0247371551,0.1181435883],"action_prob":0.7715220451,"action_logp":-0.2593900263,"action_dist_inputs":[0.6056625843,-0.6112627387],"value_targets":20.6385707855} +{"eps_id":1467118106,"obs":[-0.7345875502,-0.415782243,-0.0247371551,0.1181435883],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.742903173,-0.6105412245,-0.0223742817,0.4029206634],"action_prob":0.4440043569,"action_logp":-0.8119208813,"action_dist_inputs":[-0.1138183475,0.1111076102],"value_targets":19.8369407654} +{"eps_id":1467118106,"obs":[-0.742903173,-0.6105412245,-0.0223742817,0.4029206634],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.7551140189,-0.8053388,-0.0143158697,0.6884664297],"action_prob":0.1857836246,"action_logp":-1.6831725836,"action_dist_inputs":[-0.7386632562,0.7389801145],"value_targets":19.0272140503} +{"eps_id":1467118106,"obs":[-0.7551140189,-0.8053388,-0.0143158697,0.6884664297],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.7712208033,-0.6100211143,-0.0005465414,0.3913111687],"action_prob":0.9025995135,"action_logp":-0.1024763584,"action_dist_inputs":[-1.1114445925,1.115003109],"value_targets":18.2093067169} +{"eps_id":1467118106,"obs":[-0.7712208033,-0.6100211143,-0.0005465414,0.3913111687],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.7834212184,-0.8051353097,0.007279682,0.6838217378],"action_prob":0.1790945828,"action_logp":-1.7198412418,"action_dist_inputs":[-0.7611790895,0.7613148093],"value_targets":17.3831367493} +{"eps_id":1467118106,"obs":[-0.7834212184,-0.8051353097,0.007279682,0.6838217378],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.79952389,-0.6101151705,0.0209561177,0.3934395313],"action_prob":0.9049790502,"action_logp":-0.0998435169,"action_dist_inputs":[-1.1251432896,1.1286706924],"value_targets":16.5486240387} +{"eps_id":1467118106,"obs":[-0.79952389,-0.6101151705,0.0209561177,0.3934395313],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.8117262125,-0.8055281639,0.0288249068,0.6926552653],"action_prob":0.1662937999,"action_logp":-1.7939991951,"action_dist_inputs":[-0.8060209751,0.8061039448],"value_targets":15.7056808472} +{"eps_id":1467118106,"obs":[-0.8117262125,-0.8055281639,0.0288249068,0.6926552653],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.8278367519,-0.6108176708,0.0426780134,0.4091842473],"action_prob":0.9089660048,"action_logp":-0.0954476073,"action_dist_inputs":[-1.1487182379,1.1523556709],"value_targets":14.8542232513} +{"eps_id":1467118106,"obs":[-0.8278367519,-0.6108176708,0.0426780134,0.4091842473],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.8400531411,-0.4163259864,0.0508616976,0.1302558929],"action_prob":0.8501812816,"action_logp":-0.1623056531,"action_dist_inputs":[-0.8679217696,0.8681018949],"value_targets":13.9941644669} +{"eps_id":1467118106,"obs":[-0.8400531411,-0.4163259864,0.0508616976,0.1302558929],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.8483796716,-0.2219681293,0.0534668155,-0.1459569931],"action_prob":0.6866050959,"action_logp":-0.3759959936,"action_dist_inputs":[-0.3936639726,0.3906311989],"value_targets":13.125418663} +{"eps_id":1467118106,"obs":[-0.8483796716,-0.2219681293,0.0534668155,-0.1459569931],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.8528190255,-0.0276510119,0.0505476743,-0.4213044047],"action_prob":0.3778954744,"action_logp":-0.9731376171,"action_dist_inputs":[0.2463898063,-0.2521006167],"value_targets":12.2478981018} +{"eps_id":1467118106,"obs":[-0.8528190255,-0.0276510119,0.0505476743,-0.4213044047],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.8533720374,-0.2234513313,0.0421215855,-0.1131243706],"action_prob":0.8473789692,"action_logp":-0.1656072289,"action_dist_inputs":[0.8531172276,-0.8610727191],"value_targets":11.3615131378} +{"eps_id":1467118106,"obs":[-0.8533720374,-0.2234513313,0.0421215855,-0.1131243706],"actions":0,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.8578410745,-0.4191507399,0.0398591012,0.1925444454],"action_prob":0.5933074951,"action_logp":-0.5220425129,"action_dist_inputs":[0.1860627681,-0.1915926337],"value_targets":10.4661741257} +{"eps_id":1467118106,"obs":[-0.8578410745,-0.4191507399,0.0398591012,0.1925444454],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.8662240505,-0.224620983,0.0437099896,-0.087302953],"action_prob":0.7356031537,"action_logp":-0.3070645034,"action_dist_inputs":[-0.5129570365,0.5102826953],"value_targets":9.5617923737} +{"eps_id":1467118106,"obs":[-0.8662240505,-0.224620983,0.0437099896,-0.087302953],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.8707165122,-0.4203413427,0.0419639312,0.2188438028],"action_prob":0.552913785,"action_logp":-0.5925531983,"action_dist_inputs":[0.1035227254,-0.1089279652],"value_targets":8.6482753754} +{"eps_id":1467118106,"obs":[-0.8707165122,-0.4203413427,0.0419639312,0.2188438028],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.8791233301,-0.2258435786,0.0463408045,-0.0603121929],"action_prob":0.7587116957,"action_logp":-0.2761334181,"action_dist_inputs":[-0.5740776062,0.5715517998],"value_targets":7.7255306244} +{"eps_id":1467118106,"obs":[-0.8791233301,-0.2258435786,0.0463408045,-0.0603121929],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.8836401701,-0.0314156413,0.045134563,-0.3380216956],"action_prob":0.4904168546,"action_logp":-0.7124994993,"action_dist_inputs":[0.0165312383,-0.0218058713],"value_targets":6.7934651375} +{"eps_id":1467118106,"obs":[-0.8836401701,-0.0314156413,0.045134563,-0.3380216956],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.8842685223,-0.2271498293,0.0383741297,-0.0314543694],"action_prob":0.7874501944,"action_logp":-0.2389551252,"action_dist_inputs":[0.6509921551,-0.6586318016],"value_targets":5.8519849777} +{"eps_id":1467118106,"obs":[-0.8842685223,-0.2271498293,0.0383741297,-0.0314543694],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.8888115287,-0.0325985886,0.0377450399,-0.3117871583],"action_prob":0.5187550783,"action_logp":-0.6563233733,"action_dist_inputs":[-0.0400859788,0.0349696018],"value_targets":4.9009947777} +{"eps_id":1467118106,"obs":[-0.8888115287,-0.0325985886,0.0377450399,-0.3117871583],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.8894634843,-0.2282373756,0.0315092988,-0.0074435757],"action_prob":0.7708887458,"action_logp":-0.2602111995,"action_dist_inputs":[0.6029022932,-0.6104338765],"value_targets":3.9403989315} +{"eps_id":1467118106,"obs":[-0.8894634843,-0.2282373756,0.0315092988,-0.0074435757],"actions":1,"rewards":1.0,"prev_actions":0,"prev_rewards":1.0,"dones":false,"new_obs":[-0.8940282464,-0.0335811563,0.0313604251,-0.2900207937],"action_prob":0.5415246487,"action_logp":-0.6133666635,"action_dist_inputs":[-0.0857351124,0.080746986],"value_targets":2.970099926} +{"eps_id":1467118106,"obs":[-0.8940282464,-0.0335811563,0.0313604251,-0.2900207937],"actions":1,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":false,"new_obs":[-0.8946998715,0.1610799134,0.0255600102,-0.572650373],"action_prob":0.2436147183,"action_logp":-1.4121673107,"action_dist_inputs":[0.5627604127,-0.5702024698],"value_targets":1.9900000095} +{"eps_id":1467118106,"obs":[-0.8946998715,0.1610799134,0.0255600102,-0.572650373],"actions":0,"rewards":1.0,"prev_actions":1,"prev_rewards":1.0,"dones":true,"new_obs":[-0.8914782405,-0.0343909338,0.0141070038,-0.2720260322],"action_prob":0.8994359374,"action_logp":-0.1059874669,"action_dist_inputs":[1.0909270048,-1.1000454426],"value_targets":1.0} diff --git a/rllib/tests/data/cartpole/cartpole-v1_large/1_000000_000000.parquet b/rllib/tests/data/cartpole/cartpole-v1_large/1_000000_000000.parquet new file mode 100644 index 0000000000000..15b91fc49262d Binary files /dev/null and b/rllib/tests/data/cartpole/cartpole-v1_large/1_000000_000000.parquet differ diff --git a/rllib/tests/data/cartpole/cartpole-v1_large/1_000001_000000.parquet b/rllib/tests/data/cartpole/cartpole-v1_large/1_000001_000000.parquet new file mode 100644 index 0000000000000..7e5707875fc27 Binary files /dev/null and b/rllib/tests/data/cartpole/cartpole-v1_large/1_000001_000000.parquet differ diff --git a/rllib/tests/data/cartpole/cartpole-v1_large/1_000002_000000.parquet b/rllib/tests/data/cartpole/cartpole-v1_large/1_000002_000000.parquet new file mode 100644 index 0000000000000..b80c2a454db17 Binary files /dev/null and b/rllib/tests/data/cartpole/cartpole-v1_large/1_000002_000000.parquet differ diff --git a/rllib/tests/data/cartpole/cartpole-v1_large/1_000003_000000.parquet b/rllib/tests/data/cartpole/cartpole-v1_large/1_000003_000000.parquet new file mode 100644 index 0000000000000..c190b8130a365 Binary files /dev/null and b/rllib/tests/data/cartpole/cartpole-v1_large/1_000003_000000.parquet differ diff --git a/rllib/tests/data/cartpole/cartpole-v1_large/1_000004_000000.parquet b/rllib/tests/data/cartpole/cartpole-v1_large/1_000004_000000.parquet new file mode 100644 index 0000000000000..b58f759e18b63 Binary files /dev/null and b/rllib/tests/data/cartpole/cartpole-v1_large/1_000004_000000.parquet differ diff --git a/rllib/tests/data/cartpole/cartpole-v1_large/1_000005_000000.parquet b/rllib/tests/data/cartpole/cartpole-v1_large/1_000005_000000.parquet new file mode 100644 index 0000000000000..a098068a87a54 Binary files /dev/null and b/rllib/tests/data/cartpole/cartpole-v1_large/1_000005_000000.parquet differ diff --git a/rllib/tests/data/cartpole/cartpole-v1_large/1_000006_000000.parquet b/rllib/tests/data/cartpole/cartpole-v1_large/1_000006_000000.parquet new file mode 100644 index 0000000000000..4d6df469c7932 Binary files /dev/null and b/rllib/tests/data/cartpole/cartpole-v1_large/1_000006_000000.parquet differ diff --git a/rllib/tests/data/cartpole/cartpole-v1_large/1_000007_000000.parquet b/rllib/tests/data/cartpole/cartpole-v1_large/1_000007_000000.parquet new file mode 100644 index 0000000000000..658a24b409c72 Binary files /dev/null and b/rllib/tests/data/cartpole/cartpole-v1_large/1_000007_000000.parquet differ diff --git a/rllib/tests/data/cartpole/cartpole-v1_large/1_000008_000000.parquet b/rllib/tests/data/cartpole/cartpole-v1_large/1_000008_000000.parquet new file mode 100644 index 0000000000000..9d23b33cff0ab Binary files /dev/null and b/rllib/tests/data/cartpole/cartpole-v1_large/1_000008_000000.parquet differ diff --git a/rllib/tests/data/cartpole/cartpole-v1_large/1_000009_000000.parquet b/rllib/tests/data/cartpole/cartpole-v1_large/1_000009_000000.parquet new file mode 100644 index 0000000000000..150d9133170ca Binary files /dev/null and b/rllib/tests/data/cartpole/cartpole-v1_large/1_000009_000000.parquet differ diff --git a/rllib/tests/data/cartpole/cartpole-v1_large/1_000010_000000.parquet b/rllib/tests/data/cartpole/cartpole-v1_large/1_000010_000000.parquet new file mode 100644 index 0000000000000..d60fdce6d283d Binary files /dev/null and b/rllib/tests/data/cartpole/cartpole-v1_large/1_000010_000000.parquet differ diff --git a/rllib/tests/data/cartpole/cartpole-v1_large/1_000011_000000.parquet b/rllib/tests/data/cartpole/cartpole-v1_large/1_000011_000000.parquet new file mode 100644 index 0000000000000..e2f050c328dbc Binary files /dev/null and b/rllib/tests/data/cartpole/cartpole-v1_large/1_000011_000000.parquet differ diff --git a/rllib/tests/data/cartpole/cartpole-v1_large/1_000012_000000.parquet b/rllib/tests/data/cartpole/cartpole-v1_large/1_000012_000000.parquet new file mode 100644 index 0000000000000..aa927ab033aa1 Binary files /dev/null and b/rllib/tests/data/cartpole/cartpole-v1_large/1_000012_000000.parquet differ diff --git a/rllib/tests/data/cartpole/cartpole-v1_large/1_000013_000000.parquet b/rllib/tests/data/cartpole/cartpole-v1_large/1_000013_000000.parquet new file mode 100644 index 0000000000000..da0df74fb7e56 Binary files /dev/null and b/rllib/tests/data/cartpole/cartpole-v1_large/1_000013_000000.parquet differ diff --git a/rllib/tests/data/cartpole/cartpole-v1_large/1_000014_000000.parquet b/rllib/tests/data/cartpole/cartpole-v1_large/1_000014_000000.parquet new file mode 100644 index 0000000000000..58944cc89412a Binary files /dev/null and b/rllib/tests/data/cartpole/cartpole-v1_large/1_000014_000000.parquet differ diff --git a/rllib/tests/data/cartpole/cartpole-v1_large/1_000015_000000.parquet b/rllib/tests/data/cartpole/cartpole-v1_large/1_000015_000000.parquet new file mode 100644 index 0000000000000..09da85e0b9daa Binary files /dev/null and b/rllib/tests/data/cartpole/cartpole-v1_large/1_000015_000000.parquet differ diff --git a/rllib/tests/data/cartpole/cartpole-v1_large/1_000016_000000.parquet b/rllib/tests/data/cartpole/cartpole-v1_large/1_000016_000000.parquet new file mode 100644 index 0000000000000..17bc70de993f3 Binary files /dev/null and b/rllib/tests/data/cartpole/cartpole-v1_large/1_000016_000000.parquet differ diff --git a/rllib/tests/data/cartpole/cartpole-v1_large/1_000017_000000.parquet b/rllib/tests/data/cartpole/cartpole-v1_large/1_000017_000000.parquet new file mode 100644 index 0000000000000..c2d95d378d028 Binary files /dev/null and b/rllib/tests/data/cartpole/cartpole-v1_large/1_000017_000000.parquet differ diff --git a/rllib/tests/data/cartpole/cartpole-v1_large/1_000018_000000.parquet b/rllib/tests/data/cartpole/cartpole-v1_large/1_000018_000000.parquet new file mode 100644 index 0000000000000..183ccafeefe8b Binary files /dev/null and b/rllib/tests/data/cartpole/cartpole-v1_large/1_000018_000000.parquet differ diff --git a/rllib/tests/data/cartpole/cartpole-v1_large/1_000019_000000.parquet b/rllib/tests/data/cartpole/cartpole-v1_large/1_000019_000000.parquet new file mode 100644 index 0000000000000..8c2738ef33b1b Binary files /dev/null and b/rllib/tests/data/cartpole/cartpole-v1_large/1_000019_000000.parquet differ diff --git a/rllib/tests/data/cartpole/cartpole-v1_large/1_000020_000000.parquet b/rllib/tests/data/cartpole/cartpole-v1_large/1_000020_000000.parquet new file mode 100644 index 0000000000000..c89cdaddc707c Binary files /dev/null and b/rllib/tests/data/cartpole/cartpole-v1_large/1_000020_000000.parquet differ diff --git a/rllib/tests/data/cartpole/cartpole-v1_large/1_000021_000000.parquet b/rllib/tests/data/cartpole/cartpole-v1_large/1_000021_000000.parquet new file mode 100644 index 0000000000000..26f3ec49319bb Binary files /dev/null and b/rllib/tests/data/cartpole/cartpole-v1_large/1_000021_000000.parquet differ diff --git a/rllib/tests/data/cartpole/cartpole-v1_large/1_000022_000000.parquet b/rllib/tests/data/cartpole/cartpole-v1_large/1_000022_000000.parquet new file mode 100644 index 0000000000000..ac0d1862c02ad Binary files /dev/null and b/rllib/tests/data/cartpole/cartpole-v1_large/1_000022_000000.parquet differ diff --git a/rllib/tests/data/cartpole/cartpole-v1_large/1_000023_000000.parquet b/rllib/tests/data/cartpole/cartpole-v1_large/1_000023_000000.parquet new file mode 100644 index 0000000000000..f9c72940cd36f Binary files /dev/null and b/rllib/tests/data/cartpole/cartpole-v1_large/1_000023_000000.parquet differ diff --git a/rllib/tests/data/cartpole/cartpole-v1_large/1_000024_000000.parquet b/rllib/tests/data/cartpole/cartpole-v1_large/1_000024_000000.parquet new file mode 100644 index 0000000000000..11811144e824b Binary files /dev/null and b/rllib/tests/data/cartpole/cartpole-v1_large/1_000024_000000.parquet differ diff --git a/rllib/tests/data/cartpole/cartpole-v1_large/1_000025_000000.parquet b/rllib/tests/data/cartpole/cartpole-v1_large/1_000025_000000.parquet new file mode 100644 index 0000000000000..840d94eb31f48 Binary files /dev/null and b/rllib/tests/data/cartpole/cartpole-v1_large/1_000025_000000.parquet differ diff --git a/rllib/tests/data/cartpole/cartpole-v1_large/1_000026_000000.parquet b/rllib/tests/data/cartpole/cartpole-v1_large/1_000026_000000.parquet new file mode 100644 index 0000000000000..11065627c115d Binary files /dev/null and b/rllib/tests/data/cartpole/cartpole-v1_large/1_000026_000000.parquet differ diff --git a/rllib/tests/data/cartpole/cartpole-v1_large/1_000027_000000.parquet b/rllib/tests/data/cartpole/cartpole-v1_large/1_000027_000000.parquet new file mode 100644 index 0000000000000..8cdfc4d19dd12 Binary files /dev/null and b/rllib/tests/data/cartpole/cartpole-v1_large/1_000027_000000.parquet differ diff --git a/rllib/tests/data/cartpole/cartpole-v1_large/1_000028_000000.parquet b/rllib/tests/data/cartpole/cartpole-v1_large/1_000028_000000.parquet new file mode 100644 index 0000000000000..e38ddcf0aa921 Binary files /dev/null and b/rllib/tests/data/cartpole/cartpole-v1_large/1_000028_000000.parquet differ diff --git a/rllib/tests/data/cartpole/cartpole-v1_large/1_000029_000000.parquet b/rllib/tests/data/cartpole/cartpole-v1_large/1_000029_000000.parquet new file mode 100644 index 0000000000000..1cc34e7bd8e4e Binary files /dev/null and b/rllib/tests/data/cartpole/cartpole-v1_large/1_000029_000000.parquet differ diff --git a/rllib/tests/data/cartpole/cartpole-v1_large/1_000030_000000.parquet b/rllib/tests/data/cartpole/cartpole-v1_large/1_000030_000000.parquet new file mode 100644 index 0000000000000..96cb32b7d4900 Binary files /dev/null and b/rllib/tests/data/cartpole/cartpole-v1_large/1_000030_000000.parquet differ diff --git a/rllib/tests/data/cartpole/cartpole-v1_large/1_000031_000000.parquet b/rllib/tests/data/cartpole/cartpole-v1_large/1_000031_000000.parquet new file mode 100644 index 0000000000000..b99224903b1eb Binary files /dev/null and b/rllib/tests/data/cartpole/cartpole-v1_large/1_000031_000000.parquet differ diff --git a/rllib/tests/data/cartpole/cartpole-v1_large/1_000032_000000.parquet b/rllib/tests/data/cartpole/cartpole-v1_large/1_000032_000000.parquet new file mode 100644 index 0000000000000..c943862b793e0 Binary files /dev/null and b/rllib/tests/data/cartpole/cartpole-v1_large/1_000032_000000.parquet differ diff --git a/rllib/tests/data/cartpole/cartpole-v1_large/1_000033_000000.parquet b/rllib/tests/data/cartpole/cartpole-v1_large/1_000033_000000.parquet new file mode 100644 index 0000000000000..74d3a9e8d03e4 Binary files /dev/null and b/rllib/tests/data/cartpole/cartpole-v1_large/1_000033_000000.parquet differ diff --git a/rllib/tests/data/cartpole/cartpole-v1_large/1_000034_000000.parquet b/rllib/tests/data/cartpole/cartpole-v1_large/1_000034_000000.parquet new file mode 100644 index 0000000000000..e5c7aaccccd9f Binary files /dev/null and b/rllib/tests/data/cartpole/cartpole-v1_large/1_000034_000000.parquet differ diff --git a/rllib/tests/data/cartpole/cartpole-v1_large/1_000035_000000.parquet b/rllib/tests/data/cartpole/cartpole-v1_large/1_000035_000000.parquet new file mode 100644 index 0000000000000..f7107c9666e75 Binary files /dev/null and b/rllib/tests/data/cartpole/cartpole-v1_large/1_000035_000000.parquet differ diff --git a/rllib/tests/data/cartpole/cartpole-v1_large/1_000036_000000.parquet b/rllib/tests/data/cartpole/cartpole-v1_large/1_000036_000000.parquet new file mode 100644 index 0000000000000..193ccc078efa4 Binary files /dev/null and b/rllib/tests/data/cartpole/cartpole-v1_large/1_000036_000000.parquet differ diff --git a/rllib/tests/data/cartpole/cartpole-v1_large/1_000037_000000.parquet b/rllib/tests/data/cartpole/cartpole-v1_large/1_000037_000000.parquet new file mode 100644 index 0000000000000..a2d1efad16ff4 Binary files /dev/null and b/rllib/tests/data/cartpole/cartpole-v1_large/1_000037_000000.parquet differ diff --git a/rllib/tests/data/cartpole/cartpole-v1_large/1_000038_000000.parquet b/rllib/tests/data/cartpole/cartpole-v1_large/1_000038_000000.parquet new file mode 100644 index 0000000000000..a78956d76e572 Binary files /dev/null and b/rllib/tests/data/cartpole/cartpole-v1_large/1_000038_000000.parquet differ diff --git a/rllib/tests/data/cartpole/cartpole-v1_large/1_000039_000000.parquet b/rllib/tests/data/cartpole/cartpole-v1_large/1_000039_000000.parquet new file mode 100644 index 0000000000000..55048135f262d Binary files /dev/null and b/rllib/tests/data/cartpole/cartpole-v1_large/1_000039_000000.parquet differ diff --git a/rllib/tests/data/cartpole/cartpole-v1_large/1_000040_000000.parquet b/rllib/tests/data/cartpole/cartpole-v1_large/1_000040_000000.parquet new file mode 100644 index 0000000000000..a3e885725ad40 Binary files /dev/null and b/rllib/tests/data/cartpole/cartpole-v1_large/1_000040_000000.parquet differ diff --git a/rllib/tests/data/cartpole/cartpole-v1_large/1_000041_000000.parquet b/rllib/tests/data/cartpole/cartpole-v1_large/1_000041_000000.parquet new file mode 100644 index 0000000000000..5fa961a2e871a Binary files /dev/null and b/rllib/tests/data/cartpole/cartpole-v1_large/1_000041_000000.parquet differ diff --git a/rllib/tests/data/cartpole/cartpole-v1_large/1_000042_000000.parquet b/rllib/tests/data/cartpole/cartpole-v1_large/1_000042_000000.parquet new file mode 100644 index 0000000000000..2f0367021119f Binary files /dev/null and b/rllib/tests/data/cartpole/cartpole-v1_large/1_000042_000000.parquet differ diff --git a/rllib/tests/data/cartpole/cartpole-v1_large/1_000043_000000.parquet b/rllib/tests/data/cartpole/cartpole-v1_large/1_000043_000000.parquet new file mode 100644 index 0000000000000..7ecc3ec9ec4e0 Binary files /dev/null and b/rllib/tests/data/cartpole/cartpole-v1_large/1_000043_000000.parquet differ diff --git a/rllib/tests/data/cartpole/cartpole-v1_large/1_000044_000000.parquet b/rllib/tests/data/cartpole/cartpole-v1_large/1_000044_000000.parquet new file mode 100644 index 0000000000000..7f60a39ac4dfe Binary files /dev/null and b/rllib/tests/data/cartpole/cartpole-v1_large/1_000044_000000.parquet differ diff --git a/rllib/tests/data/cartpole/cartpole-v1_large/1_000045_000000.parquet b/rllib/tests/data/cartpole/cartpole-v1_large/1_000045_000000.parquet new file mode 100644 index 0000000000000..60b2342baac83 Binary files /dev/null and b/rllib/tests/data/cartpole/cartpole-v1_large/1_000045_000000.parquet differ diff --git a/rllib/tests/data/cartpole/cartpole-v1_large/1_000046_000000.parquet b/rllib/tests/data/cartpole/cartpole-v1_large/1_000046_000000.parquet new file mode 100644 index 0000000000000..cbc3b79d2fd36 Binary files /dev/null and b/rllib/tests/data/cartpole/cartpole-v1_large/1_000046_000000.parquet differ diff --git a/rllib/tests/data/cartpole/cartpole-v1_large/1_000047_000000.parquet b/rllib/tests/data/cartpole/cartpole-v1_large/1_000047_000000.parquet new file mode 100644 index 0000000000000..251dcdeb65f37 Binary files /dev/null and b/rllib/tests/data/cartpole/cartpole-v1_large/1_000047_000000.parquet differ diff --git a/rllib/tests/data/cartpole/cartpole-v1_large/1_000048_000000.parquet b/rllib/tests/data/cartpole/cartpole-v1_large/1_000048_000000.parquet new file mode 100644 index 0000000000000..57582daaef6d7 Binary files /dev/null and b/rllib/tests/data/cartpole/cartpole-v1_large/1_000048_000000.parquet differ diff --git a/rllib/tests/data/cartpole/cartpole-v1_large/1_000049_000000.parquet b/rllib/tests/data/cartpole/cartpole-v1_large/1_000049_000000.parquet new file mode 100644 index 0000000000000..5f8dfd34449fc Binary files /dev/null and b/rllib/tests/data/cartpole/cartpole-v1_large/1_000049_000000.parquet differ diff --git a/rllib/tests/data/cartpole/cartpole-v1_large/1_000050_000000.parquet b/rllib/tests/data/cartpole/cartpole-v1_large/1_000050_000000.parquet new file mode 100644 index 0000000000000..5c1434a285214 Binary files /dev/null and b/rllib/tests/data/cartpole/cartpole-v1_large/1_000050_000000.parquet differ diff --git a/rllib/tests/data/cartpole/cartpole-v1_large/1_000051_000000.parquet b/rllib/tests/data/cartpole/cartpole-v1_large/1_000051_000000.parquet new file mode 100644 index 0000000000000..616f92f32c4e0 Binary files /dev/null and b/rllib/tests/data/cartpole/cartpole-v1_large/1_000051_000000.parquet differ diff --git a/rllib/tests/data/cartpole/cartpole-v1_large/1_000052_000000.parquet b/rllib/tests/data/cartpole/cartpole-v1_large/1_000052_000000.parquet new file mode 100644 index 0000000000000..2680661fd5c64 Binary files /dev/null and b/rllib/tests/data/cartpole/cartpole-v1_large/1_000052_000000.parquet differ diff --git a/rllib/tests/data/cartpole/cartpole-v1_large/1_000053_000000.parquet b/rllib/tests/data/cartpole/cartpole-v1_large/1_000053_000000.parquet new file mode 100644 index 0000000000000..21429fcee09bb Binary files /dev/null and b/rllib/tests/data/cartpole/cartpole-v1_large/1_000053_000000.parquet differ diff --git a/rllib/tests/data/cartpole/cartpole-v1_large/1_000054_000000.parquet b/rllib/tests/data/cartpole/cartpole-v1_large/1_000054_000000.parquet new file mode 100644 index 0000000000000..abef8c5872557 Binary files /dev/null and b/rllib/tests/data/cartpole/cartpole-v1_large/1_000054_000000.parquet differ diff --git a/rllib/tests/data/cartpole/cartpole-v1_large/1_000055_000000.parquet b/rllib/tests/data/cartpole/cartpole-v1_large/1_000055_000000.parquet new file mode 100644 index 0000000000000..cf254f4cf4bcc Binary files /dev/null and b/rllib/tests/data/cartpole/cartpole-v1_large/1_000055_000000.parquet differ diff --git a/rllib/tests/data/cartpole/cartpole-v1_large/1_000056_000000.parquet b/rllib/tests/data/cartpole/cartpole-v1_large/1_000056_000000.parquet new file mode 100644 index 0000000000000..5077dc723f309 Binary files /dev/null and b/rllib/tests/data/cartpole/cartpole-v1_large/1_000056_000000.parquet differ diff --git a/rllib/tests/data/cartpole/cartpole-v1_large/1_000057_000000.parquet b/rllib/tests/data/cartpole/cartpole-v1_large/1_000057_000000.parquet new file mode 100644 index 0000000000000..0c8c42adfedc5 Binary files /dev/null and b/rllib/tests/data/cartpole/cartpole-v1_large/1_000057_000000.parquet differ diff --git a/rllib/tests/data/cartpole/cartpole-v1_large/1_000058_000000.parquet b/rllib/tests/data/cartpole/cartpole-v1_large/1_000058_000000.parquet new file mode 100644 index 0000000000000..61b25ada11bb0 Binary files /dev/null and b/rllib/tests/data/cartpole/cartpole-v1_large/1_000058_000000.parquet differ diff --git a/rllib/tests/data/cartpole/cartpole-v1_large/1_000059_000000.parquet b/rllib/tests/data/cartpole/cartpole-v1_large/1_000059_000000.parquet new file mode 100644 index 0000000000000..536a1eaa8005d Binary files /dev/null and b/rllib/tests/data/cartpole/cartpole-v1_large/1_000059_000000.parquet differ diff --git a/rllib/tests/data/cartpole/cartpole-v1_large/1_000060_000000.parquet b/rllib/tests/data/cartpole/cartpole-v1_large/1_000060_000000.parquet new file mode 100644 index 0000000000000..6e65425cc6f4e Binary files /dev/null and b/rllib/tests/data/cartpole/cartpole-v1_large/1_000060_000000.parquet differ diff --git a/rllib/tests/data/cartpole/cartpole-v1_large/1_000061_000000.parquet b/rllib/tests/data/cartpole/cartpole-v1_large/1_000061_000000.parquet new file mode 100644 index 0000000000000..d2eea56a97f93 Binary files /dev/null and b/rllib/tests/data/cartpole/cartpole-v1_large/1_000061_000000.parquet differ diff --git a/rllib/tests/data/cartpole/cartpole-v1_large/1_000062_000000.parquet b/rllib/tests/data/cartpole/cartpole-v1_large/1_000062_000000.parquet new file mode 100644 index 0000000000000..03a46300fb13e Binary files /dev/null and b/rllib/tests/data/cartpole/cartpole-v1_large/1_000062_000000.parquet differ diff --git a/rllib/tests/data/cartpole/cartpole-v1_large/1_000063_000000.parquet b/rllib/tests/data/cartpole/cartpole-v1_large/1_000063_000000.parquet new file mode 100644 index 0000000000000..ecde2c83d384e Binary files /dev/null and b/rllib/tests/data/cartpole/cartpole-v1_large/1_000063_000000.parquet differ diff --git a/rllib/tests/data/cartpole/cartpole-v1_large/1_000064_000000.parquet b/rllib/tests/data/cartpole/cartpole-v1_large/1_000064_000000.parquet new file mode 100644 index 0000000000000..247bc660ab9b7 Binary files /dev/null and b/rllib/tests/data/cartpole/cartpole-v1_large/1_000064_000000.parquet differ diff --git a/rllib/tests/data/cartpole/cartpole-v1_large/1_000065_000000.parquet b/rllib/tests/data/cartpole/cartpole-v1_large/1_000065_000000.parquet new file mode 100644 index 0000000000000..5c594e2d52f6d Binary files /dev/null and b/rllib/tests/data/cartpole/cartpole-v1_large/1_000065_000000.parquet differ diff --git a/rllib/tests/data/cartpole/cartpole-v1_large/1_000066_000000.parquet b/rllib/tests/data/cartpole/cartpole-v1_large/1_000066_000000.parquet new file mode 100644 index 0000000000000..c39631502be3c Binary files /dev/null and b/rllib/tests/data/cartpole/cartpole-v1_large/1_000066_000000.parquet differ diff --git a/rllib/tests/data/cartpole/cartpole-v1_large/1_000067_000000.parquet b/rllib/tests/data/cartpole/cartpole-v1_large/1_000067_000000.parquet new file mode 100644 index 0000000000000..b9b50a6e72f42 Binary files /dev/null and b/rllib/tests/data/cartpole/cartpole-v1_large/1_000067_000000.parquet differ diff --git a/rllib/tests/data/cartpole/cartpole-v1_large/1_000068_000000.parquet b/rllib/tests/data/cartpole/cartpole-v1_large/1_000068_000000.parquet new file mode 100644 index 0000000000000..2d6276f25aa9a Binary files /dev/null and b/rllib/tests/data/cartpole/cartpole-v1_large/1_000068_000000.parquet differ diff --git a/rllib/tests/data/cartpole/cartpole-v1_large/1_000069_000000.parquet b/rllib/tests/data/cartpole/cartpole-v1_large/1_000069_000000.parquet new file mode 100644 index 0000000000000..563c897b1fa1d Binary files /dev/null and b/rllib/tests/data/cartpole/cartpole-v1_large/1_000069_000000.parquet differ diff --git a/rllib/tests/data/cartpole/cartpole-v1_large/1_000070_000000.parquet b/rllib/tests/data/cartpole/cartpole-v1_large/1_000070_000000.parquet new file mode 100644 index 0000000000000..5be38c27c146a Binary files /dev/null and b/rllib/tests/data/cartpole/cartpole-v1_large/1_000070_000000.parquet differ diff --git a/rllib/tests/data/cartpole/cartpole-v1_large/1_000071_000000.parquet b/rllib/tests/data/cartpole/cartpole-v1_large/1_000071_000000.parquet new file mode 100644 index 0000000000000..445dde8051cf1 Binary files /dev/null and b/rllib/tests/data/cartpole/cartpole-v1_large/1_000071_000000.parquet differ diff --git a/rllib/tests/data/cartpole/cartpole-v1_large/1_000072_000000.parquet b/rllib/tests/data/cartpole/cartpole-v1_large/1_000072_000000.parquet new file mode 100644 index 0000000000000..acddd75d05438 Binary files /dev/null and b/rllib/tests/data/cartpole/cartpole-v1_large/1_000072_000000.parquet differ diff --git a/rllib/tests/data/cartpole/cartpole-v1_large/1_000073_000000.parquet b/rllib/tests/data/cartpole/cartpole-v1_large/1_000073_000000.parquet new file mode 100644 index 0000000000000..5cd76d0cd1d7f Binary files /dev/null and b/rllib/tests/data/cartpole/cartpole-v1_large/1_000073_000000.parquet differ diff --git a/rllib/tests/data/cartpole/cartpole-v1_large/1_000074_000000.parquet b/rllib/tests/data/cartpole/cartpole-v1_large/1_000074_000000.parquet new file mode 100644 index 0000000000000..bafe9f02c160e Binary files /dev/null and b/rllib/tests/data/cartpole/cartpole-v1_large/1_000074_000000.parquet differ diff --git a/rllib/tests/data/cartpole/cartpole-v1_large/1_000075_000000.parquet b/rllib/tests/data/cartpole/cartpole-v1_large/1_000075_000000.parquet new file mode 100644 index 0000000000000..7b67628efcf2e Binary files /dev/null and b/rllib/tests/data/cartpole/cartpole-v1_large/1_000075_000000.parquet differ diff --git a/rllib/tests/data/cartpole/cartpole-v1_large/1_000076_000000.parquet b/rllib/tests/data/cartpole/cartpole-v1_large/1_000076_000000.parquet new file mode 100644 index 0000000000000..f37ed3280bbb2 Binary files /dev/null and b/rllib/tests/data/cartpole/cartpole-v1_large/1_000076_000000.parquet differ diff --git a/rllib/tests/data/cartpole/cartpole-v1_large/1_000077_000000.parquet b/rllib/tests/data/cartpole/cartpole-v1_large/1_000077_000000.parquet new file mode 100644 index 0000000000000..76f27defc6546 Binary files /dev/null and b/rllib/tests/data/cartpole/cartpole-v1_large/1_000077_000000.parquet differ diff --git a/rllib/tests/data/cartpole/cartpole-v1_large/1_000078_000000.parquet b/rllib/tests/data/cartpole/cartpole-v1_large/1_000078_000000.parquet new file mode 100644 index 0000000000000..207e074148ee7 Binary files /dev/null and b/rllib/tests/data/cartpole/cartpole-v1_large/1_000078_000000.parquet differ diff --git a/rllib/tests/data/cartpole/cartpole-v1_large/1_000079_000000.parquet b/rllib/tests/data/cartpole/cartpole-v1_large/1_000079_000000.parquet new file mode 100644 index 0000000000000..f4e1fd5406c1c Binary files /dev/null and b/rllib/tests/data/cartpole/cartpole-v1_large/1_000079_000000.parquet differ diff --git a/rllib/tests/data/cartpole/cartpole-v1_large/1_000080_000000.parquet b/rllib/tests/data/cartpole/cartpole-v1_large/1_000080_000000.parquet new file mode 100644 index 0000000000000..f74c09d3e41d5 Binary files /dev/null and b/rllib/tests/data/cartpole/cartpole-v1_large/1_000080_000000.parquet differ diff --git a/rllib/tests/data/cartpole/cartpole-v1_large/1_000081_000000.parquet b/rllib/tests/data/cartpole/cartpole-v1_large/1_000081_000000.parquet new file mode 100644 index 0000000000000..44c56c513cd19 Binary files /dev/null and b/rllib/tests/data/cartpole/cartpole-v1_large/1_000081_000000.parquet differ diff --git a/rllib/tests/data/cartpole/cartpole-v1_large/1_000082_000000.parquet b/rllib/tests/data/cartpole/cartpole-v1_large/1_000082_000000.parquet new file mode 100644 index 0000000000000..a16c6a803cb79 Binary files /dev/null and b/rllib/tests/data/cartpole/cartpole-v1_large/1_000082_000000.parquet differ diff --git a/rllib/tests/data/cartpole/cartpole-v1_large/1_000083_000000.parquet b/rllib/tests/data/cartpole/cartpole-v1_large/1_000083_000000.parquet new file mode 100644 index 0000000000000..f6fcce463fbcc Binary files /dev/null and b/rllib/tests/data/cartpole/cartpole-v1_large/1_000083_000000.parquet differ diff --git a/rllib/tests/data/cartpole/cartpole-v1_large/1_000084_000000.parquet b/rllib/tests/data/cartpole/cartpole-v1_large/1_000084_000000.parquet new file mode 100644 index 0000000000000..801a336c32b0d Binary files /dev/null and b/rllib/tests/data/cartpole/cartpole-v1_large/1_000084_000000.parquet differ diff --git a/rllib/tests/data/cartpole/cartpole-v1_large/1_000085_000000.parquet b/rllib/tests/data/cartpole/cartpole-v1_large/1_000085_000000.parquet new file mode 100644 index 0000000000000..086e8f15dd5ce Binary files /dev/null and b/rllib/tests/data/cartpole/cartpole-v1_large/1_000085_000000.parquet differ diff --git a/rllib/tests/data/cartpole/cartpole-v1_large/1_000086_000000.parquet b/rllib/tests/data/cartpole/cartpole-v1_large/1_000086_000000.parquet new file mode 100644 index 0000000000000..790a0be7dfd66 Binary files /dev/null and b/rllib/tests/data/cartpole/cartpole-v1_large/1_000086_000000.parquet differ diff --git a/rllib/tests/data/cartpole/cartpole-v1_large/1_000087_000000.parquet b/rllib/tests/data/cartpole/cartpole-v1_large/1_000087_000000.parquet new file mode 100644 index 0000000000000..fc1ad1b80d3a5 Binary files /dev/null and b/rllib/tests/data/cartpole/cartpole-v1_large/1_000087_000000.parquet differ diff --git a/rllib/tests/data/cartpole/cartpole-v1_large/1_000088_000000.parquet b/rllib/tests/data/cartpole/cartpole-v1_large/1_000088_000000.parquet new file mode 100644 index 0000000000000..33adb0687751a Binary files /dev/null and b/rllib/tests/data/cartpole/cartpole-v1_large/1_000088_000000.parquet differ diff --git a/rllib/tests/data/cartpole/cartpole-v1_large/1_000089_000000.parquet b/rllib/tests/data/cartpole/cartpole-v1_large/1_000089_000000.parquet new file mode 100644 index 0000000000000..c7afb1a57ccb8 Binary files /dev/null and b/rllib/tests/data/cartpole/cartpole-v1_large/1_000089_000000.parquet differ diff --git a/rllib/tests/data/cartpole/cartpole-v1_large/1_000090_000000.parquet b/rllib/tests/data/cartpole/cartpole-v1_large/1_000090_000000.parquet new file mode 100644 index 0000000000000..9d3ea549f270c Binary files /dev/null and b/rllib/tests/data/cartpole/cartpole-v1_large/1_000090_000000.parquet differ diff --git a/rllib/tests/data/cartpole/cartpole-v1_large/1_000091_000000.parquet b/rllib/tests/data/cartpole/cartpole-v1_large/1_000091_000000.parquet new file mode 100644 index 0000000000000..a433ac58abc5b Binary files /dev/null and b/rllib/tests/data/cartpole/cartpole-v1_large/1_000091_000000.parquet differ diff --git a/rllib/tests/data/cartpole/cartpole-v1_large/1_000092_000000.parquet b/rllib/tests/data/cartpole/cartpole-v1_large/1_000092_000000.parquet new file mode 100644 index 0000000000000..1861edb392d21 Binary files /dev/null and b/rllib/tests/data/cartpole/cartpole-v1_large/1_000092_000000.parquet differ diff --git a/rllib/tests/data/cartpole/cartpole-v1_large/1_000093_000000.parquet b/rllib/tests/data/cartpole/cartpole-v1_large/1_000093_000000.parquet new file mode 100644 index 0000000000000..03bed277cf8d7 Binary files /dev/null and b/rllib/tests/data/cartpole/cartpole-v1_large/1_000093_000000.parquet differ diff --git a/rllib/tests/data/cartpole/cartpole-v1_large/1_000094_000000.parquet b/rllib/tests/data/cartpole/cartpole-v1_large/1_000094_000000.parquet new file mode 100644 index 0000000000000..8c84141c52c6b Binary files /dev/null and b/rllib/tests/data/cartpole/cartpole-v1_large/1_000094_000000.parquet differ diff --git a/rllib/tests/data/cartpole/cartpole-v1_large/1_000095_000000.parquet b/rllib/tests/data/cartpole/cartpole-v1_large/1_000095_000000.parquet new file mode 100644 index 0000000000000..6d804ca59cc20 Binary files /dev/null and b/rllib/tests/data/cartpole/cartpole-v1_large/1_000095_000000.parquet differ diff --git a/rllib/tests/data/cartpole/cartpole-v1_large/1_000096_000000.parquet b/rllib/tests/data/cartpole/cartpole-v1_large/1_000096_000000.parquet new file mode 100644 index 0000000000000..a9169e6f00d52 Binary files /dev/null and b/rllib/tests/data/cartpole/cartpole-v1_large/1_000096_000000.parquet differ diff --git a/rllib/tests/data/cartpole/cartpole-v1_large/1_000097_000000.parquet b/rllib/tests/data/cartpole/cartpole-v1_large/1_000097_000000.parquet new file mode 100644 index 0000000000000..d941ea0d757fc Binary files /dev/null and b/rllib/tests/data/cartpole/cartpole-v1_large/1_000097_000000.parquet differ diff --git a/rllib/tests/data/cartpole/cartpole-v1_large/1_000098_000000.parquet b/rllib/tests/data/cartpole/cartpole-v1_large/1_000098_000000.parquet new file mode 100644 index 0000000000000..444223c0d5143 Binary files /dev/null and b/rllib/tests/data/cartpole/cartpole-v1_large/1_000098_000000.parquet differ diff --git a/rllib/tests/data/cartpole/cartpole-v1_large/1_000099_000000.parquet b/rllib/tests/data/cartpole/cartpole-v1_large/1_000099_000000.parquet new file mode 100644 index 0000000000000..f593e858e4c8c Binary files /dev/null and b/rllib/tests/data/cartpole/cartpole-v1_large/1_000099_000000.parquet differ diff --git a/rllib/tests/data/cartpole/cartpole-v1_large/1_000100_000000.parquet b/rllib/tests/data/cartpole/cartpole-v1_large/1_000100_000000.parquet new file mode 100644 index 0000000000000..14bf50be1a2b2 Binary files /dev/null and b/rllib/tests/data/cartpole/cartpole-v1_large/1_000100_000000.parquet differ diff --git a/rllib/tests/data/cartpole/cartpole-v1_large/1_000101_000000.parquet b/rllib/tests/data/cartpole/cartpole-v1_large/1_000101_000000.parquet new file mode 100644 index 0000000000000..bf435c5ab3e56 Binary files /dev/null and b/rllib/tests/data/cartpole/cartpole-v1_large/1_000101_000000.parquet differ diff --git a/rllib/tests/data/cartpole/cartpole-v1_large/1_000102_000000.parquet b/rllib/tests/data/cartpole/cartpole-v1_large/1_000102_000000.parquet new file mode 100644 index 0000000000000..56d25eff6f19f Binary files /dev/null and b/rllib/tests/data/cartpole/cartpole-v1_large/1_000102_000000.parquet differ diff --git a/rllib/tests/data/cartpole/cartpole-v1_large/1_000103_000000.parquet b/rllib/tests/data/cartpole/cartpole-v1_large/1_000103_000000.parquet new file mode 100644 index 0000000000000..435c19cde94fd Binary files /dev/null and b/rllib/tests/data/cartpole/cartpole-v1_large/1_000103_000000.parquet differ diff --git a/rllib/tests/data/cartpole/cartpole-v1_large/1_000104_000000.parquet b/rllib/tests/data/cartpole/cartpole-v1_large/1_000104_000000.parquet new file mode 100644 index 0000000000000..63ef476a57db9 Binary files /dev/null and b/rllib/tests/data/cartpole/cartpole-v1_large/1_000104_000000.parquet differ diff --git a/rllib/tests/data/cartpole/cartpole-v1_large/1_000105_000000.parquet b/rllib/tests/data/cartpole/cartpole-v1_large/1_000105_000000.parquet new file mode 100644 index 0000000000000..5091a592b0e75 Binary files /dev/null and b/rllib/tests/data/cartpole/cartpole-v1_large/1_000105_000000.parquet differ diff --git a/rllib/tests/data/cartpole/cartpole-v1_large/1_000106_000000.parquet b/rllib/tests/data/cartpole/cartpole-v1_large/1_000106_000000.parquet new file mode 100644 index 0000000000000..13d75efcbeeab Binary files /dev/null and b/rllib/tests/data/cartpole/cartpole-v1_large/1_000106_000000.parquet differ diff --git a/rllib/tests/data/cartpole/cartpole-v1_large/1_000107_000000.parquet b/rllib/tests/data/cartpole/cartpole-v1_large/1_000107_000000.parquet new file mode 100644 index 0000000000000..0706709ed1480 Binary files /dev/null and b/rllib/tests/data/cartpole/cartpole-v1_large/1_000107_000000.parquet differ diff --git a/rllib/tests/data/cartpole/cartpole-v1_large/1_000108_000000.parquet b/rllib/tests/data/cartpole/cartpole-v1_large/1_000108_000000.parquet new file mode 100644 index 0000000000000..262fa41766abd Binary files /dev/null and b/rllib/tests/data/cartpole/cartpole-v1_large/1_000108_000000.parquet differ diff --git a/rllib/tests/data/cartpole/cartpole-v1_large/1_000109_000000.parquet b/rllib/tests/data/cartpole/cartpole-v1_large/1_000109_000000.parquet new file mode 100644 index 0000000000000..75df48893bbc3 Binary files /dev/null and b/rllib/tests/data/cartpole/cartpole-v1_large/1_000109_000000.parquet differ diff --git a/rllib/tests/data/cartpole/cartpole-v1_large/1_000110_000000.parquet b/rllib/tests/data/cartpole/cartpole-v1_large/1_000110_000000.parquet new file mode 100644 index 0000000000000..a46d352221b61 Binary files /dev/null and b/rllib/tests/data/cartpole/cartpole-v1_large/1_000110_000000.parquet differ diff --git a/rllib/tests/data/cartpole/cartpole-v1_large/1_000111_000000.parquet b/rllib/tests/data/cartpole/cartpole-v1_large/1_000111_000000.parquet new file mode 100644 index 0000000000000..3109b20ef0193 Binary files /dev/null and b/rllib/tests/data/cartpole/cartpole-v1_large/1_000111_000000.parquet differ diff --git a/rllib/tests/data/cartpole/cartpole-v1_large/1_000112_000000.parquet b/rllib/tests/data/cartpole/cartpole-v1_large/1_000112_000000.parquet new file mode 100644 index 0000000000000..9f07bc747f372 Binary files /dev/null and b/rllib/tests/data/cartpole/cartpole-v1_large/1_000112_000000.parquet differ diff --git a/rllib/tests/data/cartpole/cartpole-v1_large/1_000113_000000.parquet b/rllib/tests/data/cartpole/cartpole-v1_large/1_000113_000000.parquet new file mode 100644 index 0000000000000..9172150317ae9 Binary files /dev/null and b/rllib/tests/data/cartpole/cartpole-v1_large/1_000113_000000.parquet differ diff --git a/rllib/tests/data/cartpole/cartpole-v1_large/1_000114_000000.parquet b/rllib/tests/data/cartpole/cartpole-v1_large/1_000114_000000.parquet new file mode 100644 index 0000000000000..48fb2a8f7139e Binary files /dev/null and b/rllib/tests/data/cartpole/cartpole-v1_large/1_000114_000000.parquet differ diff --git a/rllib/tests/data/cartpole/cartpole-v1_large/1_000115_000000.parquet b/rllib/tests/data/cartpole/cartpole-v1_large/1_000115_000000.parquet new file mode 100644 index 0000000000000..3a126c1809d23 Binary files /dev/null and b/rllib/tests/data/cartpole/cartpole-v1_large/1_000115_000000.parquet differ diff --git a/rllib/tests/data/cartpole/cartpole-v1_large/1_000116_000000.parquet b/rllib/tests/data/cartpole/cartpole-v1_large/1_000116_000000.parquet new file mode 100644 index 0000000000000..ea2dcc2d0653b Binary files /dev/null and b/rllib/tests/data/cartpole/cartpole-v1_large/1_000116_000000.parquet differ diff --git a/rllib/tests/data/cartpole/cartpole-v1_large/1_000117_000000.parquet b/rllib/tests/data/cartpole/cartpole-v1_large/1_000117_000000.parquet new file mode 100644 index 0000000000000..7351931ea06d4 Binary files /dev/null and b/rllib/tests/data/cartpole/cartpole-v1_large/1_000117_000000.parquet differ diff --git a/rllib/tests/data/cartpole/cartpole-v1_large/1_000118_000000.parquet b/rllib/tests/data/cartpole/cartpole-v1_large/1_000118_000000.parquet new file mode 100644 index 0000000000000..47e3d36492aab Binary files /dev/null and b/rllib/tests/data/cartpole/cartpole-v1_large/1_000118_000000.parquet differ diff --git a/rllib/tests/data/cartpole/cartpole-v1_large/1_000119_000000.parquet b/rllib/tests/data/cartpole/cartpole-v1_large/1_000119_000000.parquet new file mode 100644 index 0000000000000..fbb16d2e68744 Binary files /dev/null and b/rllib/tests/data/cartpole/cartpole-v1_large/1_000119_000000.parquet differ diff --git a/rllib/tests/data/cartpole/cartpole-v1_large/1_000120_000000.parquet b/rllib/tests/data/cartpole/cartpole-v1_large/1_000120_000000.parquet new file mode 100644 index 0000000000000..148b4bbede0a2 Binary files /dev/null and b/rllib/tests/data/cartpole/cartpole-v1_large/1_000120_000000.parquet differ diff --git a/rllib/tests/data/cartpole/cartpole-v1_large/1_000121_000000.parquet b/rllib/tests/data/cartpole/cartpole-v1_large/1_000121_000000.parquet new file mode 100644 index 0000000000000..514603d8272b7 Binary files /dev/null and b/rllib/tests/data/cartpole/cartpole-v1_large/1_000121_000000.parquet differ diff --git a/rllib/tests/data/cartpole/cartpole-v1_large/1_000122_000000.parquet b/rllib/tests/data/cartpole/cartpole-v1_large/1_000122_000000.parquet new file mode 100644 index 0000000000000..10c87ab607801 Binary files /dev/null and b/rllib/tests/data/cartpole/cartpole-v1_large/1_000122_000000.parquet differ diff --git a/rllib/tests/data/cartpole/cartpole-v1_large/1_000123_000000.parquet b/rllib/tests/data/cartpole/cartpole-v1_large/1_000123_000000.parquet new file mode 100644 index 0000000000000..c60447d261192 Binary files /dev/null and b/rllib/tests/data/cartpole/cartpole-v1_large/1_000123_000000.parquet differ diff --git a/rllib/tests/data/cartpole/cartpole-v1_large/1_000124_000000.parquet b/rllib/tests/data/cartpole/cartpole-v1_large/1_000124_000000.parquet new file mode 100644 index 0000000000000..461602434a6e1 Binary files /dev/null and b/rllib/tests/data/cartpole/cartpole-v1_large/1_000124_000000.parquet differ diff --git a/rllib/tests/data/cartpole/cartpole-v1_large/1_000125_000000.parquet b/rllib/tests/data/cartpole/cartpole-v1_large/1_000125_000000.parquet new file mode 100644 index 0000000000000..0787fd4d42204 Binary files /dev/null and b/rllib/tests/data/cartpole/cartpole-v1_large/1_000125_000000.parquet differ diff --git a/rllib/tests/data/cartpole/cartpole-v1_large/1_000126_000000.parquet b/rllib/tests/data/cartpole/cartpole-v1_large/1_000126_000000.parquet new file mode 100644 index 0000000000000..af0b09b6740ed Binary files /dev/null and b/rllib/tests/data/cartpole/cartpole-v1_large/1_000126_000000.parquet differ diff --git a/rllib/tests/data/cartpole/cartpole-v1_large/1_000127_000000.parquet b/rllib/tests/data/cartpole/cartpole-v1_large/1_000127_000000.parquet new file mode 100644 index 0000000000000..56dc1b4f978b0 Binary files /dev/null and b/rllib/tests/data/cartpole/cartpole-v1_large/1_000127_000000.parquet differ diff --git a/rllib/tests/data/cartpole/cartpole-v1_large/1_000128_000000.parquet b/rllib/tests/data/cartpole/cartpole-v1_large/1_000128_000000.parquet new file mode 100644 index 0000000000000..0824f48f5b8c7 Binary files /dev/null and b/rllib/tests/data/cartpole/cartpole-v1_large/1_000128_000000.parquet differ diff --git a/rllib/tests/data/cartpole/cartpole-v1_large/1_000129_000000.parquet b/rllib/tests/data/cartpole/cartpole-v1_large/1_000129_000000.parquet new file mode 100644 index 0000000000000..9b85ec5b87ac1 Binary files /dev/null and b/rllib/tests/data/cartpole/cartpole-v1_large/1_000129_000000.parquet differ diff --git a/rllib/tests/data/cartpole/cartpole-v1_large/1_000130_000000.parquet b/rllib/tests/data/cartpole/cartpole-v1_large/1_000130_000000.parquet new file mode 100644 index 0000000000000..c4a25f95c8dd4 Binary files /dev/null and b/rllib/tests/data/cartpole/cartpole-v1_large/1_000130_000000.parquet differ diff --git a/rllib/tests/data/cartpole/cartpole-v1_large/1_000131_000000.parquet b/rllib/tests/data/cartpole/cartpole-v1_large/1_000131_000000.parquet new file mode 100644 index 0000000000000..e9c2dada72da1 Binary files /dev/null and b/rllib/tests/data/cartpole/cartpole-v1_large/1_000131_000000.parquet differ diff --git a/rllib/tests/data/cartpole/cartpole-v1_large/1_000132_000000.parquet b/rllib/tests/data/cartpole/cartpole-v1_large/1_000132_000000.parquet new file mode 100644 index 0000000000000..0a8c12533646e Binary files /dev/null and b/rllib/tests/data/cartpole/cartpole-v1_large/1_000132_000000.parquet differ diff --git a/rllib/tests/data/cartpole/cartpole-v1_large/1_000133_000000.parquet b/rllib/tests/data/cartpole/cartpole-v1_large/1_000133_000000.parquet new file mode 100644 index 0000000000000..041bd7d62ae4c Binary files /dev/null and b/rllib/tests/data/cartpole/cartpole-v1_large/1_000133_000000.parquet differ diff --git a/rllib/tests/data/cartpole/cartpole-v1_large/1_000134_000000.parquet b/rllib/tests/data/cartpole/cartpole-v1_large/1_000134_000000.parquet new file mode 100644 index 0000000000000..168e4c0d23995 Binary files /dev/null and b/rllib/tests/data/cartpole/cartpole-v1_large/1_000134_000000.parquet differ diff --git a/rllib/tests/data/cartpole/cartpole-v1_large/1_000135_000000.parquet b/rllib/tests/data/cartpole/cartpole-v1_large/1_000135_000000.parquet new file mode 100644 index 0000000000000..e1c5a56d5137c Binary files /dev/null and b/rllib/tests/data/cartpole/cartpole-v1_large/1_000135_000000.parquet differ diff --git a/rllib/tests/data/cartpole/cartpole-v1_large/1_000136_000000.parquet b/rllib/tests/data/cartpole/cartpole-v1_large/1_000136_000000.parquet new file mode 100644 index 0000000000000..7f49dd91a6b9d Binary files /dev/null and b/rllib/tests/data/cartpole/cartpole-v1_large/1_000136_000000.parquet differ diff --git a/rllib/tests/data/cartpole/cartpole-v1_large/1_000137_000000.parquet b/rllib/tests/data/cartpole/cartpole-v1_large/1_000137_000000.parquet new file mode 100644 index 0000000000000..eaeb971c00155 Binary files /dev/null and b/rllib/tests/data/cartpole/cartpole-v1_large/1_000137_000000.parquet differ diff --git a/rllib/tests/data/cartpole/cartpole-v1_large/1_000138_000000.parquet b/rllib/tests/data/cartpole/cartpole-v1_large/1_000138_000000.parquet new file mode 100644 index 0000000000000..88c179b838a99 Binary files /dev/null and b/rllib/tests/data/cartpole/cartpole-v1_large/1_000138_000000.parquet differ diff --git a/rllib/tests/data/cartpole/cartpole-v1_large/1_000139_000000.parquet b/rllib/tests/data/cartpole/cartpole-v1_large/1_000139_000000.parquet new file mode 100644 index 0000000000000..f25f104ff137b Binary files /dev/null and b/rllib/tests/data/cartpole/cartpole-v1_large/1_000139_000000.parquet differ diff --git a/rllib/tests/data/cartpole/cartpole-v1_large/1_000140_000000.parquet b/rllib/tests/data/cartpole/cartpole-v1_large/1_000140_000000.parquet new file mode 100644 index 0000000000000..d436c8bbbcfed Binary files /dev/null and b/rllib/tests/data/cartpole/cartpole-v1_large/1_000140_000000.parquet differ diff --git a/rllib/tests/data/cartpole/cartpole-v1_large/1_000141_000000.parquet b/rllib/tests/data/cartpole/cartpole-v1_large/1_000141_000000.parquet new file mode 100644 index 0000000000000..d87dddc16f561 Binary files /dev/null and b/rllib/tests/data/cartpole/cartpole-v1_large/1_000141_000000.parquet differ diff --git a/rllib/tests/data/cartpole/cartpole-v1_large/1_000142_000000.parquet b/rllib/tests/data/cartpole/cartpole-v1_large/1_000142_000000.parquet new file mode 100644 index 0000000000000..82a8fade88fa5 Binary files /dev/null and b/rllib/tests/data/cartpole/cartpole-v1_large/1_000142_000000.parquet differ diff --git a/rllib/tests/data/cartpole/cartpole-v1_large/1_000143_000000.parquet b/rllib/tests/data/cartpole/cartpole-v1_large/1_000143_000000.parquet new file mode 100644 index 0000000000000..a0bf135da9dbe Binary files /dev/null and b/rllib/tests/data/cartpole/cartpole-v1_large/1_000143_000000.parquet differ diff --git a/rllib/tests/data/cartpole/cartpole-v1_large/1_000144_000000.parquet b/rllib/tests/data/cartpole/cartpole-v1_large/1_000144_000000.parquet new file mode 100644 index 0000000000000..050ecd91878d8 Binary files /dev/null and b/rllib/tests/data/cartpole/cartpole-v1_large/1_000144_000000.parquet differ diff --git a/rllib/tests/data/cartpole/cartpole-v1_large/1_000145_000000.parquet b/rllib/tests/data/cartpole/cartpole-v1_large/1_000145_000000.parquet new file mode 100644 index 0000000000000..b51f8c25bd4c5 Binary files /dev/null and b/rllib/tests/data/cartpole/cartpole-v1_large/1_000145_000000.parquet differ diff --git a/rllib/tests/data/cartpole/cartpole-v1_large/1_000146_000000.parquet b/rllib/tests/data/cartpole/cartpole-v1_large/1_000146_000000.parquet new file mode 100644 index 0000000000000..aa7826041ffb3 Binary files /dev/null and b/rllib/tests/data/cartpole/cartpole-v1_large/1_000146_000000.parquet differ diff --git a/rllib/tests/data/cartpole/cartpole-v1_large/1_000147_000000.parquet b/rllib/tests/data/cartpole/cartpole-v1_large/1_000147_000000.parquet new file mode 100644 index 0000000000000..cb0f6d5324472 Binary files /dev/null and b/rllib/tests/data/cartpole/cartpole-v1_large/1_000147_000000.parquet differ diff --git a/rllib/tests/data/cartpole/cartpole-v1_large/1_000148_000000.parquet b/rllib/tests/data/cartpole/cartpole-v1_large/1_000148_000000.parquet new file mode 100644 index 0000000000000..5e6fc4c4f267e Binary files /dev/null and b/rllib/tests/data/cartpole/cartpole-v1_large/1_000148_000000.parquet differ diff --git a/rllib/tests/data/cartpole/cartpole-v1_large/1_000149_000000.parquet b/rllib/tests/data/cartpole/cartpole-v1_large/1_000149_000000.parquet new file mode 100644 index 0000000000000..e02dff1d8c1ea Binary files /dev/null and b/rllib/tests/data/cartpole/cartpole-v1_large/1_000149_000000.parquet differ diff --git a/rllib/tests/data/cartpole/cartpole-v1_large/1_000150_000000.parquet b/rllib/tests/data/cartpole/cartpole-v1_large/1_000150_000000.parquet new file mode 100644 index 0000000000000..ca144d3821c97 Binary files /dev/null and b/rllib/tests/data/cartpole/cartpole-v1_large/1_000150_000000.parquet differ diff --git a/rllib/tests/data/cartpole/cartpole-v1_large/1_000151_000000.parquet b/rllib/tests/data/cartpole/cartpole-v1_large/1_000151_000000.parquet new file mode 100644 index 0000000000000..529288980032d Binary files /dev/null and b/rllib/tests/data/cartpole/cartpole-v1_large/1_000151_000000.parquet differ diff --git a/rllib/tests/data/cartpole/cartpole-v1_large/1_000152_000000.parquet b/rllib/tests/data/cartpole/cartpole-v1_large/1_000152_000000.parquet new file mode 100644 index 0000000000000..adc31d05872e9 Binary files /dev/null and b/rllib/tests/data/cartpole/cartpole-v1_large/1_000152_000000.parquet differ diff --git a/rllib/tests/data/cartpole/cartpole-v1_large/1_000153_000000.parquet b/rllib/tests/data/cartpole/cartpole-v1_large/1_000153_000000.parquet new file mode 100644 index 0000000000000..5e40edb97207a Binary files /dev/null and b/rllib/tests/data/cartpole/cartpole-v1_large/1_000153_000000.parquet differ diff --git a/rllib/tests/data/cartpole/cartpole-v1_large/1_000154_000000.parquet b/rllib/tests/data/cartpole/cartpole-v1_large/1_000154_000000.parquet new file mode 100644 index 0000000000000..1f5e353212acf Binary files /dev/null and b/rllib/tests/data/cartpole/cartpole-v1_large/1_000154_000000.parquet differ diff --git a/rllib/tests/data/cartpole/cartpole-v1_large/1_000155_000000.parquet b/rllib/tests/data/cartpole/cartpole-v1_large/1_000155_000000.parquet new file mode 100644 index 0000000000000..13ac65e5e5a42 Binary files /dev/null and b/rllib/tests/data/cartpole/cartpole-v1_large/1_000155_000000.parquet differ diff --git a/rllib/tests/data/cartpole/cartpole-v1_large/1_000156_000000.parquet b/rllib/tests/data/cartpole/cartpole-v1_large/1_000156_000000.parquet new file mode 100644 index 0000000000000..c0bd8211880fd Binary files /dev/null and b/rllib/tests/data/cartpole/cartpole-v1_large/1_000156_000000.parquet differ diff --git a/rllib/tests/data/cartpole/cartpole-v1_large/1_000157_000000.parquet b/rllib/tests/data/cartpole/cartpole-v1_large/1_000157_000000.parquet new file mode 100644 index 0000000000000..5c7d8e6d03e43 Binary files /dev/null and b/rllib/tests/data/cartpole/cartpole-v1_large/1_000157_000000.parquet differ diff --git a/rllib/tests/data/cartpole/cartpole-v1_large/1_000158_000000.parquet b/rllib/tests/data/cartpole/cartpole-v1_large/1_000158_000000.parquet new file mode 100644 index 0000000000000..e0e95d8a56afd Binary files /dev/null and b/rllib/tests/data/cartpole/cartpole-v1_large/1_000158_000000.parquet differ diff --git a/rllib/tests/data/cartpole/cartpole-v1_large/1_000159_000000.parquet b/rllib/tests/data/cartpole/cartpole-v1_large/1_000159_000000.parquet new file mode 100644 index 0000000000000..6c616a5fcf08b Binary files /dev/null and b/rllib/tests/data/cartpole/cartpole-v1_large/1_000159_000000.parquet differ diff --git a/rllib/tests/data/cartpole/cartpole-v1_large/1_000160_000000.parquet b/rllib/tests/data/cartpole/cartpole-v1_large/1_000160_000000.parquet new file mode 100644 index 0000000000000..c16ba7123464a Binary files /dev/null and b/rllib/tests/data/cartpole/cartpole-v1_large/1_000160_000000.parquet differ diff --git a/rllib/tests/data/cartpole/cartpole-v1_large/1_000161_000000.parquet b/rllib/tests/data/cartpole/cartpole-v1_large/1_000161_000000.parquet new file mode 100644 index 0000000000000..42942646c1974 Binary files /dev/null and b/rllib/tests/data/cartpole/cartpole-v1_large/1_000161_000000.parquet differ diff --git a/rllib/tests/data/cartpole/cartpole-v1_large/1_000162_000000.parquet b/rllib/tests/data/cartpole/cartpole-v1_large/1_000162_000000.parquet new file mode 100644 index 0000000000000..9a425b6319f4e Binary files /dev/null and b/rllib/tests/data/cartpole/cartpole-v1_large/1_000162_000000.parquet differ diff --git a/rllib/tests/data/cartpole/cartpole-v1_large/1_000163_000000.parquet b/rllib/tests/data/cartpole/cartpole-v1_large/1_000163_000000.parquet new file mode 100644 index 0000000000000..c377ce91f90f5 Binary files /dev/null and b/rllib/tests/data/cartpole/cartpole-v1_large/1_000163_000000.parquet differ diff --git a/rllib/tests/data/cartpole/cartpole-v1_large/1_000164_000000.parquet b/rllib/tests/data/cartpole/cartpole-v1_large/1_000164_000000.parquet new file mode 100644 index 0000000000000..651d0c1126b93 Binary files /dev/null and b/rllib/tests/data/cartpole/cartpole-v1_large/1_000164_000000.parquet differ diff --git a/rllib/tests/data/cartpole/cartpole-v1_large/1_000165_000000.parquet b/rllib/tests/data/cartpole/cartpole-v1_large/1_000165_000000.parquet new file mode 100644 index 0000000000000..afce633848d68 Binary files /dev/null and b/rllib/tests/data/cartpole/cartpole-v1_large/1_000165_000000.parquet differ diff --git a/rllib/tests/data/cartpole/cartpole-v1_large/1_000166_000000.parquet b/rllib/tests/data/cartpole/cartpole-v1_large/1_000166_000000.parquet new file mode 100644 index 0000000000000..2393cc4105506 Binary files /dev/null and b/rllib/tests/data/cartpole/cartpole-v1_large/1_000166_000000.parquet differ diff --git a/rllib/tests/data/cartpole/cartpole-v1_large/1_000167_000000.parquet b/rllib/tests/data/cartpole/cartpole-v1_large/1_000167_000000.parquet new file mode 100644 index 0000000000000..04661215d10c6 Binary files /dev/null and b/rllib/tests/data/cartpole/cartpole-v1_large/1_000167_000000.parquet differ diff --git a/rllib/tests/data/cartpole/cartpole-v1_large/1_000168_000000.parquet b/rllib/tests/data/cartpole/cartpole-v1_large/1_000168_000000.parquet new file mode 100644 index 0000000000000..80116fe4c59da Binary files /dev/null and b/rllib/tests/data/cartpole/cartpole-v1_large/1_000168_000000.parquet differ diff --git a/rllib/tests/data/cartpole/cartpole-v1_large/1_000169_000000.parquet b/rllib/tests/data/cartpole/cartpole-v1_large/1_000169_000000.parquet new file mode 100644 index 0000000000000..b90f6542b4f9e Binary files /dev/null and b/rllib/tests/data/cartpole/cartpole-v1_large/1_000169_000000.parquet differ diff --git a/rllib/tests/data/cartpole/cartpole-v1_large/1_000170_000000.parquet b/rllib/tests/data/cartpole/cartpole-v1_large/1_000170_000000.parquet new file mode 100644 index 0000000000000..c27667017d594 Binary files /dev/null and b/rllib/tests/data/cartpole/cartpole-v1_large/1_000170_000000.parquet differ diff --git a/rllib/tests/data/cartpole/cartpole-v1_large/1_000171_000000.parquet b/rllib/tests/data/cartpole/cartpole-v1_large/1_000171_000000.parquet new file mode 100644 index 0000000000000..0bda5a366be2d Binary files /dev/null and b/rllib/tests/data/cartpole/cartpole-v1_large/1_000171_000000.parquet differ diff --git a/rllib/tests/data/cartpole/cartpole-v1_large/1_000172_000000.parquet b/rllib/tests/data/cartpole/cartpole-v1_large/1_000172_000000.parquet new file mode 100644 index 0000000000000..b47c6c067b7d7 Binary files /dev/null and b/rllib/tests/data/cartpole/cartpole-v1_large/1_000172_000000.parquet differ diff --git a/rllib/tests/data/cartpole/cartpole-v1_large/1_000173_000000.parquet b/rllib/tests/data/cartpole/cartpole-v1_large/1_000173_000000.parquet new file mode 100644 index 0000000000000..2e4d17a667acd Binary files /dev/null and b/rllib/tests/data/cartpole/cartpole-v1_large/1_000173_000000.parquet differ diff --git a/rllib/tests/data/cartpole/cartpole-v1_large/1_000174_000000.parquet b/rllib/tests/data/cartpole/cartpole-v1_large/1_000174_000000.parquet new file mode 100644 index 0000000000000..b93667f32a769 Binary files /dev/null and b/rllib/tests/data/cartpole/cartpole-v1_large/1_000174_000000.parquet differ diff --git a/rllib/tests/data/cartpole/cartpole-v1_large/1_000175_000000.parquet b/rllib/tests/data/cartpole/cartpole-v1_large/1_000175_000000.parquet new file mode 100644 index 0000000000000..e17ef31804c60 Binary files /dev/null and b/rllib/tests/data/cartpole/cartpole-v1_large/1_000175_000000.parquet differ diff --git a/rllib/tests/data/cartpole/cartpole-v1_large/1_000176_000000.parquet b/rllib/tests/data/cartpole/cartpole-v1_large/1_000176_000000.parquet new file mode 100644 index 0000000000000..fadd565483ffc Binary files /dev/null and b/rllib/tests/data/cartpole/cartpole-v1_large/1_000176_000000.parquet differ diff --git a/rllib/tests/data/cartpole/cartpole-v1_large/1_000177_000000.parquet b/rllib/tests/data/cartpole/cartpole-v1_large/1_000177_000000.parquet new file mode 100644 index 0000000000000..5058cfb3fe1d5 Binary files /dev/null and b/rllib/tests/data/cartpole/cartpole-v1_large/1_000177_000000.parquet differ diff --git a/rllib/tests/data/cartpole/cartpole-v1_large/1_000178_000000.parquet b/rllib/tests/data/cartpole/cartpole-v1_large/1_000178_000000.parquet new file mode 100644 index 0000000000000..1926f4c0cc12a Binary files /dev/null and b/rllib/tests/data/cartpole/cartpole-v1_large/1_000178_000000.parquet differ diff --git a/rllib/tests/data/cartpole/cartpole-v1_large/1_000179_000000.parquet b/rllib/tests/data/cartpole/cartpole-v1_large/1_000179_000000.parquet new file mode 100644 index 0000000000000..965a89a19e283 Binary files /dev/null and b/rllib/tests/data/cartpole/cartpole-v1_large/1_000179_000000.parquet differ diff --git a/rllib/tests/data/cartpole/cartpole-v1_large/1_000180_000000.parquet b/rllib/tests/data/cartpole/cartpole-v1_large/1_000180_000000.parquet new file mode 100644 index 0000000000000..bd87260258c19 Binary files /dev/null and b/rllib/tests/data/cartpole/cartpole-v1_large/1_000180_000000.parquet differ diff --git a/rllib/tests/data/cartpole/cartpole-v1_large/1_000181_000000.parquet b/rllib/tests/data/cartpole/cartpole-v1_large/1_000181_000000.parquet new file mode 100644 index 0000000000000..95c9b8567108b Binary files /dev/null and b/rllib/tests/data/cartpole/cartpole-v1_large/1_000181_000000.parquet differ diff --git a/rllib/tests/data/cartpole/cartpole-v1_large/1_000182_000000.parquet b/rllib/tests/data/cartpole/cartpole-v1_large/1_000182_000000.parquet new file mode 100644 index 0000000000000..69fa65ff8ac86 Binary files /dev/null and b/rllib/tests/data/cartpole/cartpole-v1_large/1_000182_000000.parquet differ diff --git a/rllib/tests/data/cartpole/cartpole-v1_large/1_000183_000000.parquet b/rllib/tests/data/cartpole/cartpole-v1_large/1_000183_000000.parquet new file mode 100644 index 0000000000000..70753cd131a7f Binary files /dev/null and b/rllib/tests/data/cartpole/cartpole-v1_large/1_000183_000000.parquet differ diff --git a/rllib/tests/data/cartpole/cartpole-v1_large/1_000184_000000.parquet b/rllib/tests/data/cartpole/cartpole-v1_large/1_000184_000000.parquet new file mode 100644 index 0000000000000..96eadd5f171fc Binary files /dev/null and b/rllib/tests/data/cartpole/cartpole-v1_large/1_000184_000000.parquet differ diff --git a/rllib/tests/data/cartpole/cartpole-v1_large/1_000185_000000.parquet b/rllib/tests/data/cartpole/cartpole-v1_large/1_000185_000000.parquet new file mode 100644 index 0000000000000..81cd5fdff72b7 Binary files /dev/null and b/rllib/tests/data/cartpole/cartpole-v1_large/1_000185_000000.parquet differ diff --git a/rllib/tests/data/cartpole/cartpole-v1_large/1_000186_000000.parquet b/rllib/tests/data/cartpole/cartpole-v1_large/1_000186_000000.parquet new file mode 100644 index 0000000000000..b8c0317fe49b1 Binary files /dev/null and b/rllib/tests/data/cartpole/cartpole-v1_large/1_000186_000000.parquet differ diff --git a/rllib/tests/data/cartpole/cartpole-v1_large/1_000187_000000.parquet b/rllib/tests/data/cartpole/cartpole-v1_large/1_000187_000000.parquet new file mode 100644 index 0000000000000..a5f8a5f14fe5e Binary files /dev/null and b/rllib/tests/data/cartpole/cartpole-v1_large/1_000187_000000.parquet differ diff --git a/rllib/tests/data/cartpole/cartpole-v1_large/1_000188_000000.parquet b/rllib/tests/data/cartpole/cartpole-v1_large/1_000188_000000.parquet new file mode 100644 index 0000000000000..7acc639669198 Binary files /dev/null and b/rllib/tests/data/cartpole/cartpole-v1_large/1_000188_000000.parquet differ diff --git a/rllib/tests/data/cartpole/cartpole-v1_large/1_000189_000000.parquet b/rllib/tests/data/cartpole/cartpole-v1_large/1_000189_000000.parquet new file mode 100644 index 0000000000000..5180424b92adc Binary files /dev/null and b/rllib/tests/data/cartpole/cartpole-v1_large/1_000189_000000.parquet differ diff --git a/rllib/tests/data/cartpole/cartpole-v1_large/1_000190_000000.parquet b/rllib/tests/data/cartpole/cartpole-v1_large/1_000190_000000.parquet new file mode 100644 index 0000000000000..66143f28c29fa Binary files /dev/null and b/rllib/tests/data/cartpole/cartpole-v1_large/1_000190_000000.parquet differ diff --git a/rllib/tests/data/cartpole/cartpole-v1_large/1_000191_000000.parquet b/rllib/tests/data/cartpole/cartpole-v1_large/1_000191_000000.parquet new file mode 100644 index 0000000000000..77b050eebb1dd Binary files /dev/null and b/rllib/tests/data/cartpole/cartpole-v1_large/1_000191_000000.parquet differ diff --git a/rllib/tests/data/cartpole/cartpole-v1_large/1_000192_000000.parquet b/rllib/tests/data/cartpole/cartpole-v1_large/1_000192_000000.parquet new file mode 100644 index 0000000000000..af8911d73330b Binary files /dev/null and b/rllib/tests/data/cartpole/cartpole-v1_large/1_000192_000000.parquet differ diff --git a/rllib/tests/data/cartpole/cartpole-v1_large/1_000193_000000.parquet b/rllib/tests/data/cartpole/cartpole-v1_large/1_000193_000000.parquet new file mode 100644 index 0000000000000..e747d1b1ca6ce Binary files /dev/null and b/rllib/tests/data/cartpole/cartpole-v1_large/1_000193_000000.parquet differ diff --git a/rllib/tests/data/cartpole/cartpole-v1_large/1_000194_000000.parquet b/rllib/tests/data/cartpole/cartpole-v1_large/1_000194_000000.parquet new file mode 100644 index 0000000000000..011d9055cb657 Binary files /dev/null and b/rllib/tests/data/cartpole/cartpole-v1_large/1_000194_000000.parquet differ diff --git a/rllib/tests/data/cartpole/cartpole-v1_large/1_000195_000000.parquet b/rllib/tests/data/cartpole/cartpole-v1_large/1_000195_000000.parquet new file mode 100644 index 0000000000000..af3de0251cf45 Binary files /dev/null and b/rllib/tests/data/cartpole/cartpole-v1_large/1_000195_000000.parquet differ diff --git a/rllib/tests/data/cartpole/cartpole-v1_large/1_000196_000000.parquet b/rllib/tests/data/cartpole/cartpole-v1_large/1_000196_000000.parquet new file mode 100644 index 0000000000000..7516d2df1f358 Binary files /dev/null and b/rllib/tests/data/cartpole/cartpole-v1_large/1_000196_000000.parquet differ diff --git a/rllib/tests/data/cartpole/cartpole-v1_large/1_000197_000000.parquet b/rllib/tests/data/cartpole/cartpole-v1_large/1_000197_000000.parquet new file mode 100644 index 0000000000000..a966556265ee9 Binary files /dev/null and b/rllib/tests/data/cartpole/cartpole-v1_large/1_000197_000000.parquet differ diff --git a/rllib/tests/data/cartpole/cartpole-v1_large/1_000198_000000.parquet b/rllib/tests/data/cartpole/cartpole-v1_large/1_000198_000000.parquet new file mode 100644 index 0000000000000..f0d6a2cb51d0b Binary files /dev/null and b/rllib/tests/data/cartpole/cartpole-v1_large/1_000198_000000.parquet differ diff --git a/rllib/tests/data/cartpole/cartpole-v1_large/1_000199_000000.parquet b/rllib/tests/data/cartpole/cartpole-v1_large/1_000199_000000.parquet new file mode 100644 index 0000000000000..9db863441e01d Binary files /dev/null and b/rllib/tests/data/cartpole/cartpole-v1_large/1_000199_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_large/1_000000_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_large/1_000000_000000.parquet new file mode 100644 index 0000000000000..1ee2a6dad07c9 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_large/1_000000_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_large/1_000001_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_large/1_000001_000000.parquet new file mode 100644 index 0000000000000..9a65798fba724 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_large/1_000001_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_large/1_000002_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_large/1_000002_000000.parquet new file mode 100644 index 0000000000000..d066013a034e2 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_large/1_000002_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_large/1_000003_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_large/1_000003_000000.parquet new file mode 100644 index 0000000000000..2753277094a69 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_large/1_000003_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_large/1_000004_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_large/1_000004_000000.parquet new file mode 100644 index 0000000000000..998405efed617 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_large/1_000004_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_large/1_000005_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_large/1_000005_000000.parquet new file mode 100644 index 0000000000000..1c3a267df692f Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_large/1_000005_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_large/1_000006_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_large/1_000006_000000.parquet new file mode 100644 index 0000000000000..aec4472db1cd0 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_large/1_000006_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_large/1_000007_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_large/1_000007_000000.parquet new file mode 100644 index 0000000000000..c140e83969f17 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_large/1_000007_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_large/1_000008_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_large/1_000008_000000.parquet new file mode 100644 index 0000000000000..c838620ee4c51 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_large/1_000008_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_large/1_000009_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_large/1_000009_000000.parquet new file mode 100644 index 0000000000000..2bc09508a2b58 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_large/1_000009_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_large/1_000010_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_large/1_000010_000000.parquet new file mode 100644 index 0000000000000..deb290a1a1086 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_large/1_000010_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_large/1_000011_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_large/1_000011_000000.parquet new file mode 100644 index 0000000000000..25b63cd1c52d6 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_large/1_000011_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_large/1_000012_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_large/1_000012_000000.parquet new file mode 100644 index 0000000000000..a89b73ca14eae Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_large/1_000012_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_large/1_000013_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_large/1_000013_000000.parquet new file mode 100644 index 0000000000000..1786c143ec51b Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_large/1_000013_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_large/1_000014_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_large/1_000014_000000.parquet new file mode 100644 index 0000000000000..5cc27af905b8d Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_large/1_000014_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_large/1_000015_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_large/1_000015_000000.parquet new file mode 100644 index 0000000000000..3ecf016767708 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_large/1_000015_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_large/1_000016_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_large/1_000016_000000.parquet new file mode 100644 index 0000000000000..9493c0f8761f2 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_large/1_000016_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_large/1_000017_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_large/1_000017_000000.parquet new file mode 100644 index 0000000000000..ddf6fafa678d7 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_large/1_000017_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_large/1_000018_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_large/1_000018_000000.parquet new file mode 100644 index 0000000000000..b7dc8f62320a4 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_large/1_000018_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_large/1_000019_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_large/1_000019_000000.parquet new file mode 100644 index 0000000000000..133cdcae7514d Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_large/1_000019_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_large/1_000020_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_large/1_000020_000000.parquet new file mode 100644 index 0000000000000..a235845822e0c Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_large/1_000020_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_large/1_000021_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_large/1_000021_000000.parquet new file mode 100644 index 0000000000000..3e7088573141d Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_large/1_000021_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_large/1_000022_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_large/1_000022_000000.parquet new file mode 100644 index 0000000000000..cd56a9d056ab4 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_large/1_000022_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_large/1_000023_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_large/1_000023_000000.parquet new file mode 100644 index 0000000000000..a697717aea207 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_large/1_000023_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_large/1_000024_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_large/1_000024_000000.parquet new file mode 100644 index 0000000000000..1b0e2afba036f Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_large/1_000024_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_large/1_000025_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_large/1_000025_000000.parquet new file mode 100644 index 0000000000000..fc388f610e455 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_large/1_000025_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_large/1_000026_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_large/1_000026_000000.parquet new file mode 100644 index 0000000000000..34a5baa563c7f Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_large/1_000026_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_large/1_000027_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_large/1_000027_000000.parquet new file mode 100644 index 0000000000000..22e72032134f9 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_large/1_000027_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_large/1_000028_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_large/1_000028_000000.parquet new file mode 100644 index 0000000000000..62fcfa0d40ce4 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_large/1_000028_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_large/1_000029_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_large/1_000029_000000.parquet new file mode 100644 index 0000000000000..91d123770ff2c Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_large/1_000029_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_large/1_000030_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_large/1_000030_000000.parquet new file mode 100644 index 0000000000000..73eb45fdea877 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_large/1_000030_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_large/1_000031_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_large/1_000031_000000.parquet new file mode 100644 index 0000000000000..65f69e278767e Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_large/1_000031_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_large/1_000032_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_large/1_000032_000000.parquet new file mode 100644 index 0000000000000..608de797d24f2 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_large/1_000032_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_large/1_000033_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_large/1_000033_000000.parquet new file mode 100644 index 0000000000000..c036023f510fe Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_large/1_000033_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_large/1_000034_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_large/1_000034_000000.parquet new file mode 100644 index 0000000000000..301a2b23ef413 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_large/1_000034_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_large/1_000035_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_large/1_000035_000000.parquet new file mode 100644 index 0000000000000..e73bd7e67a3ec Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_large/1_000035_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_large/1_000036_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_large/1_000036_000000.parquet new file mode 100644 index 0000000000000..8b0ee5e847ea1 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_large/1_000036_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_large/1_000037_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_large/1_000037_000000.parquet new file mode 100644 index 0000000000000..1e25acb7f55a5 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_large/1_000037_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_large/1_000038_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_large/1_000038_000000.parquet new file mode 100644 index 0000000000000..7c45ae3f1291b Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_large/1_000038_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_large/1_000039_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_large/1_000039_000000.parquet new file mode 100644 index 0000000000000..d7a774cbd23ac Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_large/1_000039_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_large/1_000040_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_large/1_000040_000000.parquet new file mode 100644 index 0000000000000..9e8b3d9acf619 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_large/1_000040_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_large/1_000041_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_large/1_000041_000000.parquet new file mode 100644 index 0000000000000..a9699d89725f3 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_large/1_000041_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_large/1_000042_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_large/1_000042_000000.parquet new file mode 100644 index 0000000000000..58512548a5767 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_large/1_000042_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_large/1_000043_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_large/1_000043_000000.parquet new file mode 100644 index 0000000000000..b1f5c6c00edce Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_large/1_000043_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_large/1_000044_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_large/1_000044_000000.parquet new file mode 100644 index 0000000000000..8d1e4a4a0e111 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_large/1_000044_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_large/1_000045_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_large/1_000045_000000.parquet new file mode 100644 index 0000000000000..52bbf3daa0a07 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_large/1_000045_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_large/1_000046_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_large/1_000046_000000.parquet new file mode 100644 index 0000000000000..bfe51c4fb485f Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_large/1_000046_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_large/1_000047_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_large/1_000047_000000.parquet new file mode 100644 index 0000000000000..4b537caa6ecba Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_large/1_000047_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_large/1_000048_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_large/1_000048_000000.parquet new file mode 100644 index 0000000000000..fca3515f7d3f9 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_large/1_000048_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_large/1_000049_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_large/1_000049_000000.parquet new file mode 100644 index 0000000000000..085a5aa7b2b7a Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_large/1_000049_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_large/1_000050_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_large/1_000050_000000.parquet new file mode 100644 index 0000000000000..272df69df0645 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_large/1_000050_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_large/1_000051_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_large/1_000051_000000.parquet new file mode 100644 index 0000000000000..2c99c2b3a1d1e Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_large/1_000051_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_large/1_000052_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_large/1_000052_000000.parquet new file mode 100644 index 0000000000000..e6da53f7d34c1 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_large/1_000052_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_large/1_000053_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_large/1_000053_000000.parquet new file mode 100644 index 0000000000000..6cca8ea31f018 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_large/1_000053_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_large/1_000054_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_large/1_000054_000000.parquet new file mode 100644 index 0000000000000..54348d6de8408 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_large/1_000054_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_large/1_000055_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_large/1_000055_000000.parquet new file mode 100644 index 0000000000000..015416b6ed7e3 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_large/1_000055_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_large/1_000056_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_large/1_000056_000000.parquet new file mode 100644 index 0000000000000..5465c7c699f24 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_large/1_000056_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_large/1_000057_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_large/1_000057_000000.parquet new file mode 100644 index 0000000000000..7bb9bf97183bf Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_large/1_000057_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_large/1_000058_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_large/1_000058_000000.parquet new file mode 100644 index 0000000000000..d240d7e182131 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_large/1_000058_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_large/1_000059_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_large/1_000059_000000.parquet new file mode 100644 index 0000000000000..0f76cdbae80ce Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_large/1_000059_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_large/1_000060_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_large/1_000060_000000.parquet new file mode 100644 index 0000000000000..62fc5de0ba559 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_large/1_000060_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_large/1_000061_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_large/1_000061_000000.parquet new file mode 100644 index 0000000000000..5ee2bfa8b09de Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_large/1_000061_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_large/1_000062_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_large/1_000062_000000.parquet new file mode 100644 index 0000000000000..cd1187c25470a Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_large/1_000062_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_large/1_000063_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_large/1_000063_000000.parquet new file mode 100644 index 0000000000000..78e52ac0edccb Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_large/1_000063_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_large/1_000064_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_large/1_000064_000000.parquet new file mode 100644 index 0000000000000..bd7a58cfc91be Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_large/1_000064_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_large/1_000065_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_large/1_000065_000000.parquet new file mode 100644 index 0000000000000..fdf4bac38cdf6 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_large/1_000065_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_large/1_000066_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_large/1_000066_000000.parquet new file mode 100644 index 0000000000000..ce9751d00b336 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_large/1_000066_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_large/1_000067_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_large/1_000067_000000.parquet new file mode 100644 index 0000000000000..0ee3d2f106f21 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_large/1_000067_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_large/1_000068_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_large/1_000068_000000.parquet new file mode 100644 index 0000000000000..a2ac6f522b544 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_large/1_000068_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_large/1_000069_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_large/1_000069_000000.parquet new file mode 100644 index 0000000000000..0375b6d6a46dd Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_large/1_000069_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_large/1_000070_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_large/1_000070_000000.parquet new file mode 100644 index 0000000000000..fe4f91c726a18 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_large/1_000070_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_large/1_000071_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_large/1_000071_000000.parquet new file mode 100644 index 0000000000000..b263e40e0537e Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_large/1_000071_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_large/1_000072_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_large/1_000072_000000.parquet new file mode 100644 index 0000000000000..30e9310096bb8 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_large/1_000072_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_large/1_000073_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_large/1_000073_000000.parquet new file mode 100644 index 0000000000000..8cc2e3c4fe025 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_large/1_000073_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_large/1_000074_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_large/1_000074_000000.parquet new file mode 100644 index 0000000000000..704beb68aab0d Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_large/1_000074_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_large/1_000075_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_large/1_000075_000000.parquet new file mode 100644 index 0000000000000..255b95c38e3b8 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_large/1_000075_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_large/1_000076_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_large/1_000076_000000.parquet new file mode 100644 index 0000000000000..d851d52a140cc Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_large/1_000076_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_large/1_000077_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_large/1_000077_000000.parquet new file mode 100644 index 0000000000000..0e504eaa382ca Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_large/1_000077_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_large/1_000078_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_large/1_000078_000000.parquet new file mode 100644 index 0000000000000..812b420c0b4df Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_large/1_000078_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_large/1_000079_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_large/1_000079_000000.parquet new file mode 100644 index 0000000000000..e47ce803a5a89 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_large/1_000079_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_large/1_000080_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_large/1_000080_000000.parquet new file mode 100644 index 0000000000000..d4d383b9725aa Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_large/1_000080_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_large/1_000081_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_large/1_000081_000000.parquet new file mode 100644 index 0000000000000..6773ab962fde0 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_large/1_000081_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_large/1_000082_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_large/1_000082_000000.parquet new file mode 100644 index 0000000000000..e90d879820fcd Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_large/1_000082_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_large/1_000083_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_large/1_000083_000000.parquet new file mode 100644 index 0000000000000..d930bcfdcb5a1 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_large/1_000083_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_large/1_000084_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_large/1_000084_000000.parquet new file mode 100644 index 0000000000000..ca0d1efc05112 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_large/1_000084_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_large/1_000085_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_large/1_000085_000000.parquet new file mode 100644 index 0000000000000..536e115436003 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_large/1_000085_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_large/1_000086_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_large/1_000086_000000.parquet new file mode 100644 index 0000000000000..868729c58bf5d Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_large/1_000086_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_large/1_000087_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_large/1_000087_000000.parquet new file mode 100644 index 0000000000000..a337fee34918a Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_large/1_000087_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_large/1_000088_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_large/1_000088_000000.parquet new file mode 100644 index 0000000000000..82859502046a6 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_large/1_000088_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_large/1_000089_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_large/1_000089_000000.parquet new file mode 100644 index 0000000000000..5bddfa0aaf2b8 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_large/1_000089_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_large/1_000090_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_large/1_000090_000000.parquet new file mode 100644 index 0000000000000..528a38f6de80a Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_large/1_000090_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_large/1_000091_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_large/1_000091_000000.parquet new file mode 100644 index 0000000000000..c5ace388a680b Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_large/1_000091_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_large/1_000092_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_large/1_000092_000000.parquet new file mode 100644 index 0000000000000..8b92e1dc7f403 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_large/1_000092_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_large/1_000093_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_large/1_000093_000000.parquet new file mode 100644 index 0000000000000..74e5bb96059ed Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_large/1_000093_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_large/1_000094_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_large/1_000094_000000.parquet new file mode 100644 index 0000000000000..c486d56e375f4 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_large/1_000094_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_large/1_000095_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_large/1_000095_000000.parquet new file mode 100644 index 0000000000000..d7e0f1a2ac697 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_large/1_000095_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_large/1_000096_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_large/1_000096_000000.parquet new file mode 100644 index 0000000000000..b8f1009bf5c35 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_large/1_000096_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_large/1_000097_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_large/1_000097_000000.parquet new file mode 100644 index 0000000000000..9b59caf9aa622 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_large/1_000097_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_large/1_000098_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_large/1_000098_000000.parquet new file mode 100644 index 0000000000000..f87dab0aeb25c Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_large/1_000098_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_large/1_000099_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_large/1_000099_000000.parquet new file mode 100644 index 0000000000000..7a640d064b954 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_large/1_000099_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_large/1_000100_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_large/1_000100_000000.parquet new file mode 100644 index 0000000000000..71f6003682ea5 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_large/1_000100_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_large/1_000101_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_large/1_000101_000000.parquet new file mode 100644 index 0000000000000..fcf17abe7d2fe Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_large/1_000101_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_large/1_000102_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_large/1_000102_000000.parquet new file mode 100644 index 0000000000000..6ff98653933be Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_large/1_000102_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_large/1_000103_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_large/1_000103_000000.parquet new file mode 100644 index 0000000000000..add71213e429f Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_large/1_000103_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_large/1_000104_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_large/1_000104_000000.parquet new file mode 100644 index 0000000000000..b3e76ddf2b928 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_large/1_000104_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_large/1_000105_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_large/1_000105_000000.parquet new file mode 100644 index 0000000000000..f2714a82a67e8 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_large/1_000105_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_large/1_000106_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_large/1_000106_000000.parquet new file mode 100644 index 0000000000000..e9b5163cd83dc Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_large/1_000106_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_large/1_000107_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_large/1_000107_000000.parquet new file mode 100644 index 0000000000000..03381166f4eb3 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_large/1_000107_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_large/1_000108_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_large/1_000108_000000.parquet new file mode 100644 index 0000000000000..0e8551eeec920 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_large/1_000108_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_large/1_000109_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_large/1_000109_000000.parquet new file mode 100644 index 0000000000000..b27cc76373e1f Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_large/1_000109_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_large/1_000110_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_large/1_000110_000000.parquet new file mode 100644 index 0000000000000..822f152289856 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_large/1_000110_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_large/1_000111_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_large/1_000111_000000.parquet new file mode 100644 index 0000000000000..ba6f59c8c2c04 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_large/1_000111_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_large/1_000112_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_large/1_000112_000000.parquet new file mode 100644 index 0000000000000..b05e1ce847a8c Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_large/1_000112_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_large/1_000113_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_large/1_000113_000000.parquet new file mode 100644 index 0000000000000..7d4ef2d775225 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_large/1_000113_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_large/1_000114_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_large/1_000114_000000.parquet new file mode 100644 index 0000000000000..34969bfcebf0b Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_large/1_000114_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_large/1_000115_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_large/1_000115_000000.parquet new file mode 100644 index 0000000000000..6ced5c4080005 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_large/1_000115_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_large/1_000116_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_large/1_000116_000000.parquet new file mode 100644 index 0000000000000..0aa28ecf6afdc Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_large/1_000116_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_large/1_000117_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_large/1_000117_000000.parquet new file mode 100644 index 0000000000000..44568ef0c9db0 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_large/1_000117_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_large/1_000118_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_large/1_000118_000000.parquet new file mode 100644 index 0000000000000..3f2f457e22e49 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_large/1_000118_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_large/1_000119_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_large/1_000119_000000.parquet new file mode 100644 index 0000000000000..a00ec14e22cb6 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_large/1_000119_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_large/1_000120_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_large/1_000120_000000.parquet new file mode 100644 index 0000000000000..09e4c0b4ccc9b Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_large/1_000120_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_large/1_000121_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_large/1_000121_000000.parquet new file mode 100644 index 0000000000000..7037ffe916be7 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_large/1_000121_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_large/1_000122_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_large/1_000122_000000.parquet new file mode 100644 index 0000000000000..57222458ce20b Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_large/1_000122_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_large/1_000123_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_large/1_000123_000000.parquet new file mode 100644 index 0000000000000..0a6f01a0de0fa Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_large/1_000123_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_large/1_000124_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_large/1_000124_000000.parquet new file mode 100644 index 0000000000000..d8b50234c6906 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_large/1_000124_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_large/1_000125_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_large/1_000125_000000.parquet new file mode 100644 index 0000000000000..e3f7ad3fa3494 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_large/1_000125_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_large/1_000126_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_large/1_000126_000000.parquet new file mode 100644 index 0000000000000..66dd026275b3e Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_large/1_000126_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_large/1_000127_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_large/1_000127_000000.parquet new file mode 100644 index 0000000000000..a89ea37f51cd3 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_large/1_000127_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_large/1_000128_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_large/1_000128_000000.parquet new file mode 100644 index 0000000000000..a3974d47ddec2 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_large/1_000128_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_large/1_000129_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_large/1_000129_000000.parquet new file mode 100644 index 0000000000000..e82ea8d31a6b0 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_large/1_000129_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_large/1_000130_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_large/1_000130_000000.parquet new file mode 100644 index 0000000000000..7469de4ba3ecb Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_large/1_000130_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_large/1_000131_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_large/1_000131_000000.parquet new file mode 100644 index 0000000000000..919a41bb5e03d Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_large/1_000131_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_large/1_000132_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_large/1_000132_000000.parquet new file mode 100644 index 0000000000000..7aa29f9cf298e Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_large/1_000132_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_large/1_000133_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_large/1_000133_000000.parquet new file mode 100644 index 0000000000000..2fa1e20220b01 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_large/1_000133_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_large/1_000134_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_large/1_000134_000000.parquet new file mode 100644 index 0000000000000..bd04f46c8adea Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_large/1_000134_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_large/1_000135_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_large/1_000135_000000.parquet new file mode 100644 index 0000000000000..9ebb3ac313cf4 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_large/1_000135_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_large/1_000136_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_large/1_000136_000000.parquet new file mode 100644 index 0000000000000..9c6216aa23290 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_large/1_000136_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_large/1_000137_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_large/1_000137_000000.parquet new file mode 100644 index 0000000000000..39dd5134809aa Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_large/1_000137_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_large/1_000138_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_large/1_000138_000000.parquet new file mode 100644 index 0000000000000..956cdbaad6505 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_large/1_000138_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_large/1_000139_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_large/1_000139_000000.parquet new file mode 100644 index 0000000000000..6666874123b00 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_large/1_000139_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_large/1_000140_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_large/1_000140_000000.parquet new file mode 100644 index 0000000000000..e481be831dfe0 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_large/1_000140_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_large/1_000141_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_large/1_000141_000000.parquet new file mode 100644 index 0000000000000..c9c14ef8529a6 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_large/1_000141_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_large/1_000142_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_large/1_000142_000000.parquet new file mode 100644 index 0000000000000..043784c006dac Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_large/1_000142_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_large/1_000143_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_large/1_000143_000000.parquet new file mode 100644 index 0000000000000..9e22e201d3cf4 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_large/1_000143_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_large/1_000144_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_large/1_000144_000000.parquet new file mode 100644 index 0000000000000..48ee98d33176e Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_large/1_000144_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_large/1_000145_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_large/1_000145_000000.parquet new file mode 100644 index 0000000000000..a8c267703168b Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_large/1_000145_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_large/1_000146_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_large/1_000146_000000.parquet new file mode 100644 index 0000000000000..4ada5816e55cd Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_large/1_000146_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_large/1_000147_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_large/1_000147_000000.parquet new file mode 100644 index 0000000000000..c563ea6b32bc1 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_large/1_000147_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_large/1_000148_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_large/1_000148_000000.parquet new file mode 100644 index 0000000000000..89e49ee0c0c88 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_large/1_000148_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_large/1_000149_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_large/1_000149_000000.parquet new file mode 100644 index 0000000000000..a37e7233b3863 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_large/1_000149_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_large/1_000150_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_large/1_000150_000000.parquet new file mode 100644 index 0000000000000..6217cf1801e67 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_large/1_000150_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_large/1_000151_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_large/1_000151_000000.parquet new file mode 100644 index 0000000000000..3e70224ffaef1 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_large/1_000151_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_large/1_000152_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_large/1_000152_000000.parquet new file mode 100644 index 0000000000000..dcba3f9799fe1 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_large/1_000152_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_large/1_000153_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_large/1_000153_000000.parquet new file mode 100644 index 0000000000000..e13b3e328116a Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_large/1_000153_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_large/1_000154_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_large/1_000154_000000.parquet new file mode 100644 index 0000000000000..e2df10b7ea0a5 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_large/1_000154_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_large/1_000155_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_large/1_000155_000000.parquet new file mode 100644 index 0000000000000..30323c7c8e6ae Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_large/1_000155_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_large/1_000156_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_large/1_000156_000000.parquet new file mode 100644 index 0000000000000..a4e17bd3c6d6b Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_large/1_000156_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_large/1_000157_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_large/1_000157_000000.parquet new file mode 100644 index 0000000000000..3afeeedd34985 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_large/1_000157_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_large/1_000158_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_large/1_000158_000000.parquet new file mode 100644 index 0000000000000..f71928c87897d Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_large/1_000158_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_large/1_000159_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_large/1_000159_000000.parquet new file mode 100644 index 0000000000000..baeb28d277f12 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_large/1_000159_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_large/1_000160_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_large/1_000160_000000.parquet new file mode 100644 index 0000000000000..a6c8daac62399 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_large/1_000160_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_large/1_000161_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_large/1_000161_000000.parquet new file mode 100644 index 0000000000000..4403d01d30626 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_large/1_000161_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_large/1_000162_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_large/1_000162_000000.parquet new file mode 100644 index 0000000000000..245c6d310f4af Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_large/1_000162_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_large/1_000163_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_large/1_000163_000000.parquet new file mode 100644 index 0000000000000..244df2f282b9b Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_large/1_000163_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_large/1_000164_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_large/1_000164_000000.parquet new file mode 100644 index 0000000000000..1ef0a76a9a681 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_large/1_000164_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_large/1_000165_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_large/1_000165_000000.parquet new file mode 100644 index 0000000000000..9d0d439d558fb Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_large/1_000165_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_large/1_000166_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_large/1_000166_000000.parquet new file mode 100644 index 0000000000000..1cb394ffdc2c6 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_large/1_000166_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_large/1_000167_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_large/1_000167_000000.parquet new file mode 100644 index 0000000000000..fe42dcc5dfda5 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_large/1_000167_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_large/1_000168_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_large/1_000168_000000.parquet new file mode 100644 index 0000000000000..76a62aa42dda4 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_large/1_000168_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_large/1_000169_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_large/1_000169_000000.parquet new file mode 100644 index 0000000000000..97347ad53d569 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_large/1_000169_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_large/1_000170_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_large/1_000170_000000.parquet new file mode 100644 index 0000000000000..547c9305dbd49 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_large/1_000170_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_large/1_000171_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_large/1_000171_000000.parquet new file mode 100644 index 0000000000000..88146650990a6 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_large/1_000171_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_large/1_000172_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_large/1_000172_000000.parquet new file mode 100644 index 0000000000000..1975a79d3c1ac Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_large/1_000172_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_large/1_000173_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_large/1_000173_000000.parquet new file mode 100644 index 0000000000000..0b6f2475bae19 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_large/1_000173_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_large/1_000174_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_large/1_000174_000000.parquet new file mode 100644 index 0000000000000..26d412d751d88 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_large/1_000174_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_large/1_000175_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_large/1_000175_000000.parquet new file mode 100644 index 0000000000000..e02801877af98 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_large/1_000175_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_large/1_000176_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_large/1_000176_000000.parquet new file mode 100644 index 0000000000000..11520ebc82025 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_large/1_000176_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_large/1_000177_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_large/1_000177_000000.parquet new file mode 100644 index 0000000000000..f0cb687f5a0f4 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_large/1_000177_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_large/1_000178_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_large/1_000178_000000.parquet new file mode 100644 index 0000000000000..1ee2b13b0b3b2 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_large/1_000178_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_large/1_000179_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_large/1_000179_000000.parquet new file mode 100644 index 0000000000000..6cd818937e775 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_large/1_000179_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_large/1_000180_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_large/1_000180_000000.parquet new file mode 100644 index 0000000000000..93b408cb255b7 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_large/1_000180_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_large/1_000181_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_large/1_000181_000000.parquet new file mode 100644 index 0000000000000..41e68e3e1e08f Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_large/1_000181_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_large/1_000182_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_large/1_000182_000000.parquet new file mode 100644 index 0000000000000..e93c27e5b81f5 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_large/1_000182_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_large/1_000183_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_large/1_000183_000000.parquet new file mode 100644 index 0000000000000..7c9fa111c172e Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_large/1_000183_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_large/1_000184_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_large/1_000184_000000.parquet new file mode 100644 index 0000000000000..fc9b8578d5b16 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_large/1_000184_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_large/1_000185_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_large/1_000185_000000.parquet new file mode 100644 index 0000000000000..02d1ae234a157 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_large/1_000185_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_large/1_000186_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_large/1_000186_000000.parquet new file mode 100644 index 0000000000000..c5f3d12e4cddb Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_large/1_000186_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_large/1_000187_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_large/1_000187_000000.parquet new file mode 100644 index 0000000000000..e44c6ebb1d0d2 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_large/1_000187_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_large/1_000188_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_large/1_000188_000000.parquet new file mode 100644 index 0000000000000..cf44cb37c56c7 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_large/1_000188_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_large/1_000189_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_large/1_000189_000000.parquet new file mode 100644 index 0000000000000..987ec243ec763 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_large/1_000189_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_large/1_000190_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_large/1_000190_000000.parquet new file mode 100644 index 0000000000000..4b5753a2fc2c8 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_large/1_000190_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_large/1_000191_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_large/1_000191_000000.parquet new file mode 100644 index 0000000000000..e0ffa2a9f468d Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_large/1_000191_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_large/1_000192_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_large/1_000192_000000.parquet new file mode 100644 index 0000000000000..1a3dfc2673eb0 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_large/1_000192_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_large/1_000193_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_large/1_000193_000000.parquet new file mode 100644 index 0000000000000..75de11c1e1da4 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_large/1_000193_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_large/1_000194_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_large/1_000194_000000.parquet new file mode 100644 index 0000000000000..99a44b0fd31be Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_large/1_000194_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_large/1_000195_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_large/1_000195_000000.parquet new file mode 100644 index 0000000000000..5cab3b1e96efe Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_large/1_000195_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_large/1_000196_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_large/1_000196_000000.parquet new file mode 100644 index 0000000000000..d68e8b4e0f363 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_large/1_000196_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_large/1_000197_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_large/1_000197_000000.parquet new file mode 100644 index 0000000000000..25878ea638536 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_large/1_000197_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_large/1_000198_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_large/1_000198_000000.parquet new file mode 100644 index 0000000000000..c0375918b1db3 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_large/1_000198_000000.parquet differ diff --git a/rllib/tests/data/pendulum/pendulum-v1_large/1_000199_000000.parquet b/rllib/tests/data/pendulum/pendulum-v1_large/1_000199_000000.parquet new file mode 100644 index 0000000000000..dfb1a983ac331 Binary files /dev/null and b/rllib/tests/data/pendulum/pendulum-v1_large/1_000199_000000.parquet differ diff --git a/rllib/tuned_examples/bc/cartpole-bc.yaml b/rllib/tuned_examples/bc/cartpole-bc.yaml index b48c0f14ba3df..cea32542ecad3 100644 --- a/rllib/tuned_examples/bc/cartpole-bc.yaml +++ b/rllib/tuned_examples/bc/cartpole-bc.yaml @@ -11,6 +11,8 @@ cartpole-bc: config: # Works for both torch and tf. framework: torch + enable_rl_module_and_learner: false + enable_env_runner_and_connector_v2: false # In order to evaluate on an actual environment, use these following # settings: evaluation_num_env_runners: 1 diff --git a/rllib/tuned_examples/bc/cartpole_bc.py b/rllib/tuned_examples/bc/cartpole_bc.py new file mode 100644 index 0000000000000..ab30f2ad14f37 --- /dev/null +++ b/rllib/tuned_examples/bc/cartpole_bc.py @@ -0,0 +1,69 @@ +from pathlib import Path + +from ray.rllib.algorithms.bc import BCConfig +from ray.rllib.utils.metrics import ( + ENV_RUNNER_RESULTS, + EPISODE_RETURN_MEAN, + EVALUATION_RESULTS, + TRAINING_ITERATION_TIMER, +) +from ray.rllib.utils.test_utils import ( + add_rllib_example_script_args, + run_rllib_example_script_experiment, +) + +parser = add_rllib_example_script_args() +# Use `parser` to add your own custom command line options to this script +# and (if needed) use their values toset up `config` below. +args = parser.parse_args() + +assert ( + args.env == "CartPole-v1" or args.env is None +), "This tuned example works only with `CartPole-v1`." + +# Define the data paths. +data_path = "tests/data/cartpole/cartpole-v1_large" +base_path = Path(__file__).parents[2] +print(f"base_path={base_path}") +data_path = "local://" + base_path.joinpath(data_path).as_posix() +print(f"data_path={data_path}") + +# Define the BC config. +config = ( + BCConfig() + .environment(env="CartPole-v1") + .api_stack( + enable_rl_module_and_learner=True, + enable_env_runner_and_connector_v2=True, + ) + .evaluation( + evaluation_interval=3, + evaluation_num_env_runners=1, + evaluation_duration=5, + evaluation_parallel_to_training=True, + ) + # Note, the `input_` argument is the major argument for the + # new offline API. Via the `input_read_method_kwargs` the + # arguments for the `ray.data.Dataset` read method can be + # configured. The read method needs at least as many blocks + # as remote learners. + .offline_data( + input_=[data_path], + input_read_method_kwargs={"override_num_blocks": max(args.num_gpus, 1)}, + prelearner_module_synch_period=20, + ) + .training( + # To increase learning speed with multiple learners, + # increase the learning rate correspondingly. + lr=0.0008 * max(1, args.num_gpus**0.5), + train_batch_size_per_learner=2000, + ) +) + +stop = { + f"{EVALUATION_RESULTS}/{ENV_RUNNER_RESULTS}/{EPISODE_RETURN_MEAN}": 120.0, + TRAINING_ITERATION_TIMER: 350.0, +} + +if __name__ == "__main__": + run_rllib_example_script_experiment(config, args, stop=stop) diff --git a/rllib/tuned_examples/bc/pendulum_bc.py b/rllib/tuned_examples/bc/pendulum_bc.py new file mode 100644 index 0000000000000..ffd26e471b533 --- /dev/null +++ b/rllib/tuned_examples/bc/pendulum_bc.py @@ -0,0 +1,68 @@ +from pathlib import Path + +from ray.rllib.algorithms.bc import BCConfig +from ray.rllib.utils.metrics import ( + ENV_RUNNER_RESULTS, + EPISODE_RETURN_MEAN, + EVALUATION_RESULTS, + TRAINING_ITERATION_TIMER, +) +from ray.rllib.utils.test_utils import ( + add_rllib_example_script_args, + run_rllib_example_script_experiment, +) + +parser = add_rllib_example_script_args() +# Use `parser` to add your own custom command line options to this script +# and (if needed) use their values toset up `config` below. +args = parser.parse_args() + +assert ( + args.env == "Pendulum-v1" or args.env is None +), "This tuned example works only with `Pendulum-v1`." + +# Define the data paths. +data_path = "tests/data/pendulum/pendulum-v1_large" +base_path = Path(__file__).parents[2] +print(f"base_path={base_path}") +data_path = "local://" + base_path.joinpath(data_path).as_posix() +print(f"data_path={data_path}") + +# Define the BC config. +config = ( + BCConfig() + .environment(env="Pendulum-v1") + .api_stack( + enable_rl_module_and_learner=True, + enable_env_runner_and_connector_v2=True, + ) + .evaluation( + evaluation_interval=3, + evaluation_num_env_runners=1, + evaluation_duration=5, + evaluation_parallel_to_training=True, + ) + # Note, the `input_` argument is the major argument for the + # new offline API. Via the `input_read_method_kwargs` the + # arguments for the `ray.data.Dataset` read method can be + # configured. The read method needs at least as many blocks + # as remote learners. + .offline_data( + input_=[data_path], + input_read_method_kwargs={"override_num_blocks": max(args.num_gpus, 1)}, + ) + .training( + # To increase learning speed with multiple learners, + # increase the learning rate correspondingly. + lr=0.0008 * max(1, args.num_gpus**0.5), + train_batch_size_per_learner=2000, + ) +) + +stop = { + f"{EVALUATION_RESULTS}/{ENV_RUNNER_RESULTS}/{EPISODE_RETURN_MEAN}": -200.0, + TRAINING_ITERATION_TIMER: 350.0, +} + +if __name__ == "__main__": + run_rllib_example_script_experiment(config, args, stop=stop) diff --git a/rllib/tuned_examples/cql/pendulum-cql.yaml b/rllib/tuned_examples/cql/pendulum-cql.yaml index 15082a4578f66..9d45e39ad4484 100644 --- a/rllib/tuned_examples/cql/pendulum-cql.yaml +++ b/rllib/tuned_examples/cql/pendulum-cql.yaml @@ -13,6 +13,9 @@ pendulum-cql: # Works for both torch and tf. framework: torch + # Set seed. + seed: 0 + # Use one or more offline files or "input: sampler" for online learning. input: 'dataset' input_config: diff --git a/rllib/tuned_examples/sac/multi_agent_pendulum_sac.py b/rllib/tuned_examples/sac/multi_agent_pendulum_sac.py index 043ea844448ef..59c1fa5bbf364 100644 --- a/rllib/tuned_examples/sac/multi_agent_pendulum_sac.py +++ b/rllib/tuned_examples/sac/multi_agent_pendulum_sac.py @@ -27,7 +27,7 @@ .environment(env="multi_agent_pendulum") .training( initial_alpha=1.001, - lr=3e-4, + lr=8e-4, target_entropy="auto", n_step=1, tau=0.005, diff --git a/rllib/utils/metrics/__init__.py b/rllib/utils/metrics/__init__.py index 9a2a7fea4a8e0..33d9c592af42f 100644 --- a/rllib/utils/metrics/__init__.py +++ b/rllib/utils/metrics/__init__.py @@ -94,6 +94,7 @@ GRAD_WAIT_TIMER = "grad_wait" SAMPLE_TIMER = "sample" # @OldAPIStack ENV_RUNNER_SAMPLING_TIMER = "env_runner_sampling_timer" +OFFLINE_SAMPLING_TIMER = "offline_sampling_timer" REPLAY_BUFFER_SAMPLE_TIMER = "replay_buffer_sampling_timer" REPLAY_BUFFER_UPDATE_PRIOS_TIMER = "replay_buffer_update_prios_timer" LEARNER_UPDATE_TIMER = "learner_update_timer"