From 3054a0f9e8aec6cb17c12cb6be296852c7a7836f Mon Sep 17 00:00:00 2001 From: VictorS67 <185000048@qq.com> Date: Wed, 10 Aug 2022 17:45:28 -0400 Subject: [PATCH] modify context distribution and add summaries --- datasets/bandits.py | 15 ++++++++- datasets/contexts.py | 46 +++++++++++++++++--------- datasets/policies.py | 15 +++++---- main.py | 66 ++++++++++++++++++++++++-------------- metrics/arm_summary.py | 8 +++++ metrics/context_summary.py | 8 +++++ metrics/evaluator.py | 25 +++++++++++++-- requirements.txt | 1 + 8 files changed, 134 insertions(+), 50 deletions(-) create mode 100644 metrics/arm_summary.py create mode 100644 metrics/context_summary.py diff --git a/datasets/bandits.py b/datasets/bandits.py index dd81b25..92056cc 100644 --- a/datasets/bandits.py +++ b/datasets/bandits.py @@ -47,7 +47,13 @@ def _init_contexts(self, contexts: Dict) -> Dict: for context in contexts: context = dict(context) context_name = context["name"] - contexts_dict[context_name] = ContextAllocateData(context["values"], context["allocations"]) + contexts_dict[context_name] = ContextAllocateData( + context["min_value"], + context["max_value"], + context["value_type"], + context["normalize"], + context["distribution"] + ) if context['extra'] is True: self.terms.append(context_name) if context['interaction'] is True: @@ -61,3 +67,10 @@ def get_actions(self) -> List: def get_contextual_variables(self) -> List: return list(self.contexts_dict.keys()) + + def get_noncont_contextual_variables(self) -> List: + lst = [] + for context in list(self.contexts_dict.keys()): + if self.contexts_dict[context].type != "CONT": + lst.append(context) + return lst diff --git a/datasets/contexts.py b/datasets/contexts.py index 267938c..5eb7811 100644 --- a/datasets/contexts.py +++ b/datasets/contexts.py @@ -1,21 +1,35 @@ -from typing import List +from typing import List, Dict +from scipy.stats import randint, bernoulli, uniform, norm + +import numpy as np + class ContextAllocateData: - values: List[float] - allocations: List[float] + min_val: float + max_val: float + type: str + normalize: bool + distribution: Dict - def __init__(self, values: List, allocations: List) -> None: - if len(values) == 0: - print("no context values!") - return None + def __init__(self, min_val: float, max_val: float, type: str, normalize: bool, distribution: Dict) -> None: + self.min_val = min_val + self.max_val = max_val + self.type = type + self.normalize = normalize + self.distribution = distribution - if len(values) != len(allocations): - print("can't allocate context values!") - return None - - if sum(allocations) != 1: - print("allocation invalid!") - return None + def get_rvs(self) -> float: + distribution_copy = self.distribution.copy() + dis_type = distribution_copy.pop('type', None) - self.values = values - self.allocations = allocations + if dis_type is not None: + random_val = eval(dis_type).rvs(**distribution_copy) + if self.type != "CONT": + random_val = np.floor(random_val) + random_val = np.clip(random_val, self.min_val, self.max_val) + + if self.normalize: + random_val = (random_val - self.min_val) / (self.max_val - self.min_val) + + return round(random_val, 2) + return None diff --git a/datasets/policies.py b/datasets/policies.py index 2c75982..0980583 100644 --- a/datasets/policies.py +++ b/datasets/policies.py @@ -330,7 +330,7 @@ def __init__(self, policy_configs: Dict, bandit: Bandit) -> None: def get_contexts(self, new_learner_dict: Dict={}) -> Dict: for var in self.bandit.contexts_dict: - new_learner_dict[var] = np.random.choice(self.bandit.contexts_dict[var].values, size=1, p=self.bandit.contexts_dict[var].allocations)[0] + new_learner_dict[var] = self.bandit.contexts_dict[var].get_rvs() return new_learner_dict @@ -386,14 +386,17 @@ def get_reward(self, new_learner_df: pd.DataFrame) -> pd.DataFrame: terms.append(var) terms = list(set(terms)) - # Get scale of error. - err_scale = (reward.max_value - reward.min_value) / 3 - # Update reward for the new learner dataframe. for index, row in new_learner_df.iterrows(): row_terms = row[terms] - error = np.random.normal(0, err_scale, 1)[0] - true_reward = calculate_outcome(row_terms.to_dict(), np.array(true_coef_mean), include_intercept, true_estimate) + error + raw_reward = calculate_outcome(row_terms.to_dict(), np.array(true_coef_mean), include_intercept, true_estimate) + if reward.value_type == "BIN": + true_reward = np.random.binomial(1, raw_reward, 1)[0] + else: + # Get scale of error. + err_scale = (reward.max_value - reward.min_value) / 6 + error = np.random.normal(0, err_scale, 1)[0] + true_reward = raw_reward + error row[reward_name] = reward.get_reward(true_reward) return new_learner_df diff --git a/main.py b/main.py index 2d44ff5..7a64a42 100644 --- a/main.py +++ b/main.py @@ -5,8 +5,9 @@ import numpy as np import json +from io import BytesIO + from datasets.arms import ArmData -from datasets.contexts import ContextAllocateData from datasets.bandits import Bandit from datasets.policies import PolicyFactory from metrics.evaluator import EvaluatorFactory @@ -62,13 +63,13 @@ def simulate( # The priority of unique in rewards is higher than the priority of unique in contexts, in terms of sorting. all_policies.sort(key=lambda x: 10 * int(x.is_unique_reward()) + int(x.is_unique_contexts()), reverse=True) - # Print the reward generating plan for all policies. - first_policy = all_policies[0] - print( - "Policy {} is used for generating rewards: {}".format( - first_policy.get_name(), first_policy.get_reward_generate_plan() - ) - ) + # # Print the reward generating plan for all policies. + # first_policy = all_policies[0] + # print( + # "Policy {} is used for generating rewards: {}".format( + # first_policy.get_name(), first_policy.get_reward_generate_plan() + # ) + # ) reward_pool = None contexts_pool = None @@ -82,14 +83,25 @@ def simulate( simulation_df = pd.DataFrame(columns=columns) # Check whether the reward and contexts should be merged before running simulation. - is_unique_reward = policy.is_unique_reward() and reward_pool is None - is_unique_contexts = policy.is_unique_contexts() and contexts_pool is None + is_unique_reward = policy.is_unique_reward() + is_unique_contexts = policy.is_unique_contexts() if not is_unique_reward: if not is_unique_contexts: - merged_pool = pd.concat([reward_pool, contexts_pool], axis=1) - simulation_df = pd.concat([simulation_df, merged_pool]) + if reward_pool is not None and contexts_pool is not None: + merged_pool = pd.concat([reward_pool, contexts_pool], axis=1) + simulation_df = pd.concat([simulation_df, merged_pool]) else: - simulation_df = pd.concat([simulation_df, reward_pool]) + if reward_pool is not None: + simulation_df = pd.concat([simulation_df, reward_pool]) + elif not is_unique_contexts: + if contexts_pool is None: + simulation_df = pd.concat([simulation_df, contexts_pool]) + else: + print( + "Policy {} is used for generating rewards: {}".format( + policy.get_name(), policy.get_reward_generate_plan() + ) + ) for trail in tqdm(range(numTrails), desc='Trails'): # Initialize one update batch of datapoints @@ -158,21 +170,27 @@ def simulate( # Create the contexts pool if policy is unique in contexts. if is_unique_contexts: contexts_pool = simulation_df[policy.bandit.get_contextual_variables()] - + simulation_output_path = configs["simulation"] os.makedirs(f"{output_path}/{simulation_output_path}", exist_ok=True) - - simulation_result_name = "{}_results".format(policy.get_name()) - simulation_df.to_csv(f"{output_path}/{simulation_output_path}/{simulation_result_name}.csv") + + writer = pd.ExcelWriter(f"{output_path}/{simulation_output_path}/{policy.get_name()}.xlsx", engine='xlsxwriter') + simulation_result_name = "simulation_results" + simulation_df.to_excel(writer, sheet_name=f'{simulation_result_name}') # Evaluate - evaluator = EvaluatorFactory(simulation_df, policy).get_evaluator() - evaluation_output_path = configs["evaluation"] - os.makedirs(f"{output_path}/{evaluation_output_path}", exist_ok=True) - os.makedirs(f"{output_path}/{evaluation_output_path}/metrics", exist_ok=True) - for metric in list(evaluator.metrics.keys()): - metric_name = "{}_{}".format(policy.get_name(), metric) - evaluator.metrics[metric].to_csv(f"{output_path}/{evaluation_output_path}/metrics/{metric_name}.csv") + checkpoints = configs.get("checkpoints", ["all"]) + for checkpoint in tqdm(checkpoints, desc='Checkpoints'): + checkpoint_df = simulation_df.head(n=checkpoint) + evaluator = EvaluatorFactory(checkpoint_df, policy).get_evaluator() + evaluation_output_path = configs["evaluation"] + # os.makedirs(f"{output_path}/{evaluation_output_path}", exist_ok=True) + # os.makedirs(f"{output_path}/{evaluation_output_path}/metrics", exist_ok=True) + for metric in list(evaluator.metrics.keys()): + metric_name = "{}_{}".format(checkpoint, metric) + evaluator.metrics[metric].to_excel(writer, sheet_name=f"{metric_name}") + + writer.save() # if checkpoint_path is None: # # Get columns of simulation dataframe diff --git a/metrics/arm_summary.py b/metrics/arm_summary.py new file mode 100644 index 0000000..ca162e2 --- /dev/null +++ b/metrics/arm_summary.py @@ -0,0 +1,8 @@ +import pandas as pd +import numpy as np + + +def arm_summary(simulation_df: pd.DataFrame, reward_name: str) -> pd.DataFrame: + arm_group = simulation_df.groupby(by=["arm"]).agg({reward_name: ['min', 'max', 'mean', 'std', 'sem', 'count'], 'arm' : ['count']}) + + return arm_group.unstack(level=0).unstack() diff --git a/metrics/context_summary.py b/metrics/context_summary.py new file mode 100644 index 0000000..459974d --- /dev/null +++ b/metrics/context_summary.py @@ -0,0 +1,8 @@ +import pandas as pd +import numpy as np + + +def context_summary(simulation_df: pd.DataFrame, reward_name: str, context: str) -> pd.DataFrame: + context_group = simulation_df.groupby(by=[context, "arm"]).agg({reward_name: ['min', 'max', 'mean', 'std', 'sem', 'count'], 'arm' : ['count']}) + + return context_group.stack(level=[0, 1]).unstack(level=[1, 0]) diff --git a/metrics/evaluator.py b/metrics/evaluator.py index 0514f64..f64b072 100644 --- a/metrics/evaluator.py +++ b/metrics/evaluator.py @@ -1,11 +1,13 @@ import pandas as pd -from typing import Dict, Union +from typing import List, Dict, Union from datasets.policies import Policy from policies.types import PolicyType from metrics.confidence_interval import estimate_confidence_interval from metrics.wald_test import perfrom_wald_test +from metrics.arm_summary import arm_summary +from metrics.context_summary import context_summary class Evaluator: @@ -29,6 +31,9 @@ def __init__(self, simulation_df: pd.DataFrame, policy: Policy) -> None: def _test_wald(self) -> pd.DataFrame: return perfrom_wald_test(self.simulation_df, self.policy) + + def _arm_summary(self, reward: str) -> pd.DataFrame: + return arm_summary(self.simulation_df, reward) class TSPostDiffEvaluator(Evaluator): @@ -41,6 +46,9 @@ def __init__(self, simulation_df: pd.DataFrame, policy: Policy) -> None: def _test_wald(self) -> pd.DataFrame: return perfrom_wald_test(self.simulation_df, self.policy) + + def _arm_summary(self, reward: str) -> pd.DataFrame: + return arm_summary(self.simulation_df, reward) class TSContextualEvaluator(Evaluator): @@ -48,13 +56,24 @@ class TSContextualEvaluator(Evaluator): def __init__(self, simulation_df: pd.DataFrame, policy: Policy) -> None: super().__init__(simulation_df, policy) regression_formula = self.policy.configs["regression_formula"] + reward = self.policy.bandit.reward.name + noncont_contexts = self.policy.bandit.get_noncont_contextual_variables() self.metrics = { - "confidence_interval": self._evaluate_confidence_interval(regression_formula) + "confidence_interval": self._evaluate_confidence_interval(regression_formula), + "arm_summary": self._arm_summary(reward) } - + for context in noncont_contexts: + self.metrics["{}_summary".format(context)] = self._context_summary(reward, context) + def _evaluate_confidence_interval(self, regression_formula: str) -> pd.DataFrame: return estimate_confidence_interval(self.simulation_df, regression_formula) + def _arm_summary(self, reward: str) -> pd.DataFrame: + return arm_summary(self.simulation_df, reward) + + def _context_summary(self, reward: str, context: str) -> pd.DataFrame: + return context_summary(self.simulation_df, reward, context) + class EvaluatorFactory: evaluator: Union[TSPostDiffEvaluator, TSContextualEvaluator] diff --git a/requirements.txt b/requirements.txt index 3d30520..a783dd3 100644 --- a/requirements.txt +++ b/requirements.txt @@ -7,3 +7,4 @@ scipy==1.8.0 six==1.16.0 termcolor==1.1.0 tqdm==4.64.0 +xlsxwriter \ No newline at end of file