Skip to content

Commit

Permalink
modify context distribution and add summaries
Browse files Browse the repository at this point in the history
  • Loading branch information
VictorS67 committed Aug 10, 2022
1 parent b37b041 commit 3054a0f
Show file tree
Hide file tree
Showing 8 changed files with 134 additions and 50 deletions.
15 changes: 14 additions & 1 deletion datasets/bandits.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,13 @@ def _init_contexts(self, contexts: Dict) -> Dict:
for context in contexts:
context = dict(context)
context_name = context["name"]
contexts_dict[context_name] = ContextAllocateData(context["values"], context["allocations"])
contexts_dict[context_name] = ContextAllocateData(
context["min_value"],
context["max_value"],
context["value_type"],
context["normalize"],
context["distribution"]
)
if context['extra'] is True:
self.terms.append(context_name)
if context['interaction'] is True:
Expand All @@ -61,3 +67,10 @@ def get_actions(self) -> List:

def get_contextual_variables(self) -> List:
return list(self.contexts_dict.keys())

def get_noncont_contextual_variables(self) -> List:
lst = []
for context in list(self.contexts_dict.keys()):
if self.contexts_dict[context].type != "CONT":
lst.append(context)
return lst
46 changes: 30 additions & 16 deletions datasets/contexts.py
Original file line number Diff line number Diff line change
@@ -1,21 +1,35 @@
from typing import List
from typing import List, Dict
from scipy.stats import randint, bernoulli, uniform, norm

import numpy as np


class ContextAllocateData:
values: List[float]
allocations: List[float]
min_val: float
max_val: float
type: str
normalize: bool
distribution: Dict

def __init__(self, values: List, allocations: List) -> None:
if len(values) == 0:
print("no context values!")
return None
def __init__(self, min_val: float, max_val: float, type: str, normalize: bool, distribution: Dict) -> None:
self.min_val = min_val
self.max_val = max_val
self.type = type
self.normalize = normalize
self.distribution = distribution

if len(values) != len(allocations):
print("can't allocate context values!")
return None

if sum(allocations) != 1:
print("allocation invalid!")
return None
def get_rvs(self) -> float:
distribution_copy = self.distribution.copy()
dis_type = distribution_copy.pop('type', None)

self.values = values
self.allocations = allocations
if dis_type is not None:
random_val = eval(dis_type).rvs(**distribution_copy)
if self.type != "CONT":
random_val = np.floor(random_val)
random_val = np.clip(random_val, self.min_val, self.max_val)

if self.normalize:
random_val = (random_val - self.min_val) / (self.max_val - self.min_val)

return round(random_val, 2)
return None
15 changes: 9 additions & 6 deletions datasets/policies.py
Original file line number Diff line number Diff line change
Expand Up @@ -330,7 +330,7 @@ def __init__(self, policy_configs: Dict, bandit: Bandit) -> None:

def get_contexts(self, new_learner_dict: Dict={}) -> Dict:
for var in self.bandit.contexts_dict:
new_learner_dict[var] = np.random.choice(self.bandit.contexts_dict[var].values, size=1, p=self.bandit.contexts_dict[var].allocations)[0]
new_learner_dict[var] = self.bandit.contexts_dict[var].get_rvs()

return new_learner_dict

Expand Down Expand Up @@ -386,14 +386,17 @@ def get_reward(self, new_learner_df: pd.DataFrame) -> pd.DataFrame:
terms.append(var)
terms = list(set(terms))

# Get scale of error.
err_scale = (reward.max_value - reward.min_value) / 3

# Update reward for the new learner dataframe.
for index, row in new_learner_df.iterrows():
row_terms = row[terms]
error = np.random.normal(0, err_scale, 1)[0]
true_reward = calculate_outcome(row_terms.to_dict(), np.array(true_coef_mean), include_intercept, true_estimate) + error
raw_reward = calculate_outcome(row_terms.to_dict(), np.array(true_coef_mean), include_intercept, true_estimate)
if reward.value_type == "BIN":
true_reward = np.random.binomial(1, raw_reward, 1)[0]
else:
# Get scale of error.
err_scale = (reward.max_value - reward.min_value) / 6
error = np.random.normal(0, err_scale, 1)[0]
true_reward = raw_reward + error
row[reward_name] = reward.get_reward(true_reward)

return new_learner_df
Expand Down
66 changes: 42 additions & 24 deletions main.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,9 @@
import numpy as np
import json

from io import BytesIO

from datasets.arms import ArmData
from datasets.contexts import ContextAllocateData
from datasets.bandits import Bandit
from datasets.policies import PolicyFactory
from metrics.evaluator import EvaluatorFactory
Expand Down Expand Up @@ -62,13 +63,13 @@ def simulate(
# The priority of unique in rewards is higher than the priority of unique in contexts, in terms of sorting.
all_policies.sort(key=lambda x: 10 * int(x.is_unique_reward()) + int(x.is_unique_contexts()), reverse=True)

# Print the reward generating plan for all policies.
first_policy = all_policies[0]
print(
"Policy {} is used for generating rewards: {}".format(
first_policy.get_name(), first_policy.get_reward_generate_plan()
)
)
# # Print the reward generating plan for all policies.
# first_policy = all_policies[0]
# print(
# "Policy {} is used for generating rewards: {}".format(
# first_policy.get_name(), first_policy.get_reward_generate_plan()
# )
# )

reward_pool = None
contexts_pool = None
Expand All @@ -82,14 +83,25 @@ def simulate(
simulation_df = pd.DataFrame(columns=columns)

# Check whether the reward and contexts should be merged before running simulation.
is_unique_reward = policy.is_unique_reward() and reward_pool is None
is_unique_contexts = policy.is_unique_contexts() and contexts_pool is None
is_unique_reward = policy.is_unique_reward()
is_unique_contexts = policy.is_unique_contexts()
if not is_unique_reward:
if not is_unique_contexts:
merged_pool = pd.concat([reward_pool, contexts_pool], axis=1)
simulation_df = pd.concat([simulation_df, merged_pool])
if reward_pool is not None and contexts_pool is not None:
merged_pool = pd.concat([reward_pool, contexts_pool], axis=1)
simulation_df = pd.concat([simulation_df, merged_pool])
else:
simulation_df = pd.concat([simulation_df, reward_pool])
if reward_pool is not None:
simulation_df = pd.concat([simulation_df, reward_pool])
elif not is_unique_contexts:
if contexts_pool is None:
simulation_df = pd.concat([simulation_df, contexts_pool])
else:
print(
"Policy {} is used for generating rewards: {}".format(
policy.get_name(), policy.get_reward_generate_plan()
)
)

for trail in tqdm(range(numTrails), desc='Trails'):
# Initialize one update batch of datapoints
Expand Down Expand Up @@ -158,21 +170,27 @@ def simulate(
# Create the contexts pool if policy is unique in contexts.
if is_unique_contexts:
contexts_pool = simulation_df[policy.bandit.get_contextual_variables()]

simulation_output_path = configs["simulation"]
os.makedirs(f"{output_path}/{simulation_output_path}", exist_ok=True)

simulation_result_name = "{}_results".format(policy.get_name())
simulation_df.to_csv(f"{output_path}/{simulation_output_path}/{simulation_result_name}.csv")

writer = pd.ExcelWriter(f"{output_path}/{simulation_output_path}/{policy.get_name()}.xlsx", engine='xlsxwriter')
simulation_result_name = "simulation_results"
simulation_df.to_excel(writer, sheet_name=f'{simulation_result_name}')

# Evaluate
evaluator = EvaluatorFactory(simulation_df, policy).get_evaluator()
evaluation_output_path = configs["evaluation"]
os.makedirs(f"{output_path}/{evaluation_output_path}", exist_ok=True)
os.makedirs(f"{output_path}/{evaluation_output_path}/metrics", exist_ok=True)
for metric in list(evaluator.metrics.keys()):
metric_name = "{}_{}".format(policy.get_name(), metric)
evaluator.metrics[metric].to_csv(f"{output_path}/{evaluation_output_path}/metrics/{metric_name}.csv")
checkpoints = configs.get("checkpoints", ["all"])
for checkpoint in tqdm(checkpoints, desc='Checkpoints'):
checkpoint_df = simulation_df.head(n=checkpoint)
evaluator = EvaluatorFactory(checkpoint_df, policy).get_evaluator()
evaluation_output_path = configs["evaluation"]
# os.makedirs(f"{output_path}/{evaluation_output_path}", exist_ok=True)
# os.makedirs(f"{output_path}/{evaluation_output_path}/metrics", exist_ok=True)
for metric in list(evaluator.metrics.keys()):
metric_name = "{}_{}".format(checkpoint, metric)
evaluator.metrics[metric].to_excel(writer, sheet_name=f"{metric_name}")

writer.save()

# if checkpoint_path is None:
# # Get columns of simulation dataframe
Expand Down
8 changes: 8 additions & 0 deletions metrics/arm_summary.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
import pandas as pd
import numpy as np


def arm_summary(simulation_df: pd.DataFrame, reward_name: str) -> pd.DataFrame:
arm_group = simulation_df.groupby(by=["arm"]).agg({reward_name: ['min', 'max', 'mean', 'std', 'sem', 'count'], 'arm' : ['count']})

return arm_group.unstack(level=0).unstack()
8 changes: 8 additions & 0 deletions metrics/context_summary.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
import pandas as pd
import numpy as np


def context_summary(simulation_df: pd.DataFrame, reward_name: str, context: str) -> pd.DataFrame:
context_group = simulation_df.groupby(by=[context, "arm"]).agg({reward_name: ['min', 'max', 'mean', 'std', 'sem', 'count'], 'arm' : ['count']})

return context_group.stack(level=[0, 1]).unstack(level=[1, 0])
25 changes: 22 additions & 3 deletions metrics/evaluator.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
import pandas as pd

from typing import Dict, Union
from typing import List, Dict, Union

from datasets.policies import Policy
from policies.types import PolicyType
from metrics.confidence_interval import estimate_confidence_interval
from metrics.wald_test import perfrom_wald_test
from metrics.arm_summary import arm_summary
from metrics.context_summary import context_summary


class Evaluator:
Expand All @@ -29,6 +31,9 @@ def __init__(self, simulation_df: pd.DataFrame, policy: Policy) -> None:

def _test_wald(self) -> pd.DataFrame:
return perfrom_wald_test(self.simulation_df, self.policy)

def _arm_summary(self, reward: str) -> pd.DataFrame:
return arm_summary(self.simulation_df, reward)


class TSPostDiffEvaluator(Evaluator):
Expand All @@ -41,20 +46,34 @@ def __init__(self, simulation_df: pd.DataFrame, policy: Policy) -> None:

def _test_wald(self) -> pd.DataFrame:
return perfrom_wald_test(self.simulation_df, self.policy)

def _arm_summary(self, reward: str) -> pd.DataFrame:
return arm_summary(self.simulation_df, reward)


class TSContextualEvaluator(Evaluator):

def __init__(self, simulation_df: pd.DataFrame, policy: Policy) -> None:
super().__init__(simulation_df, policy)
regression_formula = self.policy.configs["regression_formula"]
reward = self.policy.bandit.reward.name
noncont_contexts = self.policy.bandit.get_noncont_contextual_variables()
self.metrics = {
"confidence_interval": self._evaluate_confidence_interval(regression_formula)
"confidence_interval": self._evaluate_confidence_interval(regression_formula),
"arm_summary": self._arm_summary(reward)
}

for context in noncont_contexts:
self.metrics["{}_summary".format(context)] = self._context_summary(reward, context)

def _evaluate_confidence_interval(self, regression_formula: str) -> pd.DataFrame:
return estimate_confidence_interval(self.simulation_df, regression_formula)

def _arm_summary(self, reward: str) -> pd.DataFrame:
return arm_summary(self.simulation_df, reward)

def _context_summary(self, reward: str, context: str) -> pd.DataFrame:
return context_summary(self.simulation_df, reward, context)


class EvaluatorFactory:
evaluator: Union[TSPostDiffEvaluator, TSContextualEvaluator]
Expand Down
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,4 @@ scipy==1.8.0
six==1.16.0
termcolor==1.1.0
tqdm==4.64.0
xlsxwriter

0 comments on commit 3054a0f

Please sign in to comment.