Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Removed references to CAO #14

Merged
merged 1 commit into from
Jul 30, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 0 additions & 14 deletions src/prsdk/data/cao_mapping.py

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
import joblib
import torch

from data.cao_mapping import CAOMapping
from persistence.serializers.serializer import Serializer
from predictors.neural_network.torch_neural_net import TorchNeuralNet
from predictors.neural_network.neural_net_predictor import NeuralNetPredictor
Expand All @@ -31,9 +30,6 @@ def save(self, model: NeuralNetPredictor, path: Path):

# Note: we don't save the model's device, as it's not guaranteed to be available on load
config = {
"context": model.cao.context,
"actions": model.cao.actions,
"outcomes": model.cao.outcomes,
"features": model.features,
"label": model.label,
"hidden_sizes": model.hidden_sizes,
Expand Down Expand Up @@ -68,9 +64,7 @@ def load(self, path: Path) -> NeuralNetPredictor:
# Initialize model with config
with open(path / "config.json", "r", encoding="utf-8") as file:
config = json.load(file)
# Grab CAO out of config
cao = CAOMapping(config.pop("context"), config.pop("actions"), config.pop("outcomes"))
nnp = NeuralNetPredictor(cao, config)
nnp = NeuralNetPredictor(config)

nnp.model = TorchNeuralNet(len(config["features"]),
config["hidden_sizes"],
Expand Down
12 changes: 2 additions & 10 deletions src/prsdk/persistence/serializers/sklearn_serializer.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@

import joblib

from data.cao_mapping import CAOMapping
from persistence.serializers.serializer import Serializer
from predictors.sklearn_predictors.sklearn_predictor import SKLearnPredictor

Expand All @@ -24,13 +23,8 @@ def save(self, model: SKLearnPredictor, path: Path):
"""
path.mkdir(parents=True, exist_ok=True)

# Add CAO to the config
config = dict(model.config.items())
cao_dict = {"context": model.cao.context, "actions": model.cao.actions, "outcomes": model.cao.outcomes}
config.update(cao_dict)

with open(path / "config.json", "w", encoding="utf-8") as file:
json.dump(config, file)
json.dump(model.config, file)
joblib.dump(model.model, path / "model.joblib")

def load(self, path: Path) -> "SKLearnPredictor":
Expand All @@ -44,11 +38,9 @@ def load(self, path: Path) -> "SKLearnPredictor":
if not (load_path / "config.json").exists() or not (load_path / "model.joblib").exists():
raise FileNotFoundError("Model files not found in path.")

# Extract CAO from config
with open(load_path / "config.json", "r", encoding="utf-8") as file:
config = json.load(file)
cao = CAOMapping(config.pop("context"), config.pop("actions"), config.pop("outcomes"))

model = joblib.load(load_path / "model.joblib")
sklearn_predictor = SKLearnPredictor(cao, model, config)
sklearn_predictor = SKLearnPredictor(model, config)
return sklearn_predictor
8 changes: 2 additions & 6 deletions src/prsdk/predictors/neural_network/neural_net_predictor.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter

from data.cao_mapping import CAOMapping
from data.torch_data import TorchDataset
from predictors.predictor import Predictor
from predictors.neural_network.torch_neural_net import TorchNeuralNet
Expand All @@ -29,11 +28,8 @@ class NeuralNetPredictor(Predictor):
Data is automatically standardized and the scaler is saved with the model.
TODO: We want to be able to have custom scaling in the future.
"""
def __init__(self, cao: CAOMapping, model_config: dict):
def __init__(self, model_config: dict):
"""
:param context: list of context features.
:param actions: list of action features.
:param outcomes: list of outcomes to predict.
:param model_config: dictionary of model configuration parameters.
Model config should contain the following:
features: list of features to use in the model (optional, defaults to all context + actions)
Expand All @@ -48,7 +44,7 @@ def __init__(self, cao: CAOMapping, model_config: dict):
train_pct: percentage of training data to use (defaults to 1)
step_lr_params: dictionary of parameters to pass to the step learning rate scheduler (defaults to 1, 0.1)
"""
super().__init__(cao)
super().__init__()
self.features = model_config.get("features", None)
self.label = model_config.get("label", None)

Expand Down
13 changes: 2 additions & 11 deletions src/prsdk/predictors/predictor.py
Original file line number Diff line number Diff line change
@@ -1,26 +1,17 @@
"""
Abstract class for predictors to inherit from.
Interface for predictors to implement.
"""
from abc import ABC, abstractmethod

import pandas as pd

from data.cao_mapping import CAOMapping


class Predictor(ABC):
"""
Abstract class for predictors to inherit from.
Interface for predictors to implement.
Predictors must be able to be fit and predict on a DataFrame.
It is up to the Predictor to keep track of the proper label to label the output DataFrame.
"""
def __init__(self, cao: CAOMapping):
"""
Initializes the Predictor with the context, actions, and outcomes.
:param cao: CAOMapping object with context, actions, and outcomes.
"""
self.cao = cao

@abstractmethod
def fit(self, X_train: pd.DataFrame, y_train: pd.Series):
"""
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
"""
from sklearn.linear_model import LinearRegression

from data.cao_mapping import CAOMapping
from predictors.sklearn_predictors.sklearn_predictor import SKLearnPredictor


Expand All @@ -12,14 +11,13 @@ class LinearRegressionPredictor(SKLearnPredictor):
Simple linear regression predictor.
See SKLearnPredictor for more details.
"""
def __init__(self, cao: CAOMapping, model_config: dict):
def __init__(self, model_config: dict):
"""
:param cao: CAOMapping object with context, actions, and outcomes for super constructor.
:param model_config: Configuration to pass into the SKLearn constructor. Also contains the keys "features" and
"label" to keep track of the features and label to predict.
"""
if not model_config:
model_config = {}
lr_config = {key: value for key, value in model_config.items() if key not in ["features", "label"]}
model = LinearRegression(**lr_config)
super().__init__(cao, model, model_config)
super().__init__(model, model_config)
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
"""
from sklearn.ensemble import RandomForestRegressor

from data.cao_mapping import CAOMapping
from predictors.sklearn_predictors.sklearn_predictor import SKLearnPredictor


Expand All @@ -12,12 +11,11 @@ class RandomForestPredictor(SKLearnPredictor):
Simple random forest predictor.
See SKLearnPredictor for more details.
"""
def __init__(self, cao: CAOMapping, model_config: dict):
def __init__(self, model_config: dict):
"""
:param cao: CAOMapping object with context, actions, and outcomes for super constructor.
:param model_config: Configuration to pass into the SKLearn constructor. Also contains the keys "features" and
"label" to keep track of the features and label to predict.
"""
rf_config = {key: value for key, value in model_config.items() if key not in ["features", "label"]}
model = RandomForestRegressor(**rf_config)
super().__init__(cao, model, model_config)
super().__init__(model, model_config)
5 changes: 2 additions & 3 deletions src/prsdk/predictors/sklearn_predictors/sklearn_predictor.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@

import pandas as pd

from data.cao_mapping import CAOMapping
from predictors.predictor import Predictor


Expand All @@ -15,14 +14,14 @@ class SKLearnPredictor(Predictor, ABC):
Simple abstract class for sklearn predictors.
Keeps track of features fit on and label to predict.
"""
def __init__(self, cao: CAOMapping, model, model_config: dict):
def __init__(self, model, model_config: dict):
"""
Model config contains the following:
features: list of features to use for prediction (optional, defaults to all features)
label: name of the label to predict (optional, defaults to passed label during fit)
Any other parameters are passed to the model.
"""
super().__init__(cao)
super().__init__()
self.config = model_config
self.model = model

Expand Down
14 changes: 2 additions & 12 deletions src/prsdk/prescriptors/prescriptor.py
Original file line number Diff line number Diff line change
@@ -1,26 +1,16 @@
"""
Abstract prescriptor class to be implemented.
Interface prescriptor to be implemented.
"""
from abc import ABC, abstractmethod

import pandas as pd

from data.cao_mapping import CAOMapping


# pylint: disable=too-few-public-methods
class Prescriptor(ABC):
"""
Abstract class for prescriptors to allow us to experiment with different implementations.
Interface for prescriptors to implement.
"""
def __init__(self, cao: CAOMapping):
"""
We keep track of the context, actions, and outcomes in the CAO mapping to ensure the prescriptor is compatible
with the project it's in.
:param cao: CAOMapping object with context, actions, and outcomes.
"""
self.cao = cao

@abstractmethod
def prescribe(self, context_df: pd.DataFrame) -> pd.DataFrame:
"""
Expand Down
1 change: 0 additions & 1 deletion tests/persistence/test_hf_persistence.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@ class TestHuggingFacePersistence(unittest.TestCase):
"""
Tests the HuggingFace Persistor. We can't test the actual upload but we can test the download with an
arbitrary model from HuggingFace.
TODO: We have to update our models to match the new configs that save CAO
"""
def setUp(self):
self.temp_dir = Path("tests/temp")
Expand Down
6 changes: 2 additions & 4 deletions tests/persistence/test_predictor_serialization.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@

import pandas as pd

from data.cao_mapping import CAOMapping
from persistence.serializers.neural_network_serializer import NeuralNetSerializer
from persistence.serializers.sklearn_serializer import SKLearnSerializer
from predictors.neural_network.neural_net_predictor import NeuralNetPredictor
Expand All @@ -25,7 +24,6 @@ def setUp(self):
2 models with the same parameters, load one from the other's save, and check if
their predictions are the same.
"""
self.cao = CAOMapping(["a", "b"], ["c"], ["label"])
self.models = [
NeuralNetPredictor,
LinearRegressionPredictor,
Expand Down Expand Up @@ -56,7 +54,7 @@ def test_save_file_names(self):
]
for model, serializer, config, test_names in zip(self.models, self.serializers, self.configs, save_file_names):
with self.subTest(model=model):
predictor = model(self.cao, config)
predictor = model(config)
predictor.fit(self.dummy_data, self.dummy_target)
serializer.save(predictor, self.temp_path)
files = [f.name for f in self.temp_path.glob("**/*") if f.is_file()]
Expand All @@ -71,7 +69,7 @@ def test_loaded_same(self):
"""
for model, serializer, config in zip(self.models, self.serializers, self.configs):
with self.subTest(model=model):
predictor = model(self.cao, config)
predictor = model(config)
predictor.fit(self.dummy_data.iloc[:2], self.dummy_target.iloc[:2])
output = predictor.predict(self.dummy_data.iloc[2:])
serializer.save(predictor, self.temp_path)
Expand Down
10 changes: 3 additions & 7 deletions tests/predictors/test_neural_net.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,22 +5,18 @@

import pandas as pd

from data.cao_mapping import CAOMapping
from predictors.neural_network.neural_net_predictor import NeuralNetPredictor


class TestNeuralNet(unittest.TestCase):
"""
Specifically tests the neural net predictor
"""
def setUp(self):
self.cao = CAOMapping(["a", "b"], ["c"], ["label"])

def test_single_input(self):
"""
Tests the neural net with a single input.
"""
predictor = NeuralNetPredictor(self.cao, {"hidden_sizes": [4], "epochs": 1, "batch_size": 1, "device": "cpu"})
predictor = NeuralNetPredictor({"hidden_sizes": [4], "epochs": 1, "batch_size": 1, "device": "cpu"})

train_data = pd.DataFrame({"a": [1], "b": [2], "c": [3], "label": [4]})
test_data = pd.DataFrame({"a": [4], "b": [5], "c": [6]})
Expand All @@ -33,7 +29,7 @@ def test_multi_input(self):
"""
Tests the neural net with multiple inputs.
"""
predictor = NeuralNetPredictor(self.cao, {"hidden_sizes": [4], "epochs": 1, "batch_size": 1, "device": "cpu"})
predictor = NeuralNetPredictor({"hidden_sizes": [4], "epochs": 1, "batch_size": 1, "device": "cpu"})

train_data = pd.DataFrame({"a": [1, 2], "b": [2, 3], "c": [3, 4], "label": [4, 5]})
test_data = pd.DataFrame({"a": [4, 5], "b": [5, 6], "c": [6, 7]})
Expand All @@ -46,7 +42,7 @@ def test_batched_input(self):
"""
Tests the neural network with batched inputs.
"""
predictor = NeuralNetPredictor(self.cao, {"hidden_sizes": [4], "epochs": 1, "batch_size": 2, "device": "cpu"})
predictor = NeuralNetPredictor({"hidden_sizes": [4], "epochs": 1, "batch_size": 2, "device": "cpu"})

train_data = pd.DataFrame({"a": [1, 2, 3], "b": [2, 3, 4], "c": [3, 4, 5], "label": [4, 5, 6]})
test_data = pd.DataFrame({"a": [4, 5], "b": [5, 6], "c": [6, 7]})
Expand Down
Loading