Skip to content

Commit

Permalink
Merge pull request #14 from Project-Resilience/remove-cao
Browse files Browse the repository at this point in the history
Removed references to CAO
  • Loading branch information
danyoungday authored Jul 30, 2024
2 parents 94e79fb + a7ffdf7 commit 1ecd52a
Show file tree
Hide file tree
Showing 12 changed files with 20 additions and 83 deletions.
14 changes: 0 additions & 14 deletions src/prsdk/data/cao_mapping.py

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
import joblib
import torch

from data.cao_mapping import CAOMapping
from persistence.serializers.serializer import Serializer
from predictors.neural_network.torch_neural_net import TorchNeuralNet
from predictors.neural_network.neural_net_predictor import NeuralNetPredictor
Expand All @@ -31,9 +30,6 @@ def save(self, model: NeuralNetPredictor, path: Path):

# Note: we don't save the model's device, as it's not guaranteed to be available on load
config = {
"context": model.cao.context,
"actions": model.cao.actions,
"outcomes": model.cao.outcomes,
"features": model.features,
"label": model.label,
"hidden_sizes": model.hidden_sizes,
Expand Down Expand Up @@ -68,9 +64,7 @@ def load(self, path: Path) -> NeuralNetPredictor:
# Initialize model with config
with open(path / "config.json", "r", encoding="utf-8") as file:
config = json.load(file)
# Grab CAO out of config
cao = CAOMapping(config.pop("context"), config.pop("actions"), config.pop("outcomes"))
nnp = NeuralNetPredictor(cao, config)
nnp = NeuralNetPredictor(config)

nnp.model = TorchNeuralNet(len(config["features"]),
config["hidden_sizes"],
Expand Down
12 changes: 2 additions & 10 deletions src/prsdk/persistence/serializers/sklearn_serializer.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@

import joblib

from data.cao_mapping import CAOMapping
from persistence.serializers.serializer import Serializer
from predictors.sklearn_predictors.sklearn_predictor import SKLearnPredictor

Expand All @@ -24,13 +23,8 @@ def save(self, model: SKLearnPredictor, path: Path):
"""
path.mkdir(parents=True, exist_ok=True)

# Add CAO to the config
config = dict(model.config.items())
cao_dict = {"context": model.cao.context, "actions": model.cao.actions, "outcomes": model.cao.outcomes}
config.update(cao_dict)

with open(path / "config.json", "w", encoding="utf-8") as file:
json.dump(config, file)
json.dump(model.config, file)
joblib.dump(model.model, path / "model.joblib")

def load(self, path: Path) -> "SKLearnPredictor":
Expand All @@ -44,11 +38,9 @@ def load(self, path: Path) -> "SKLearnPredictor":
if not (load_path / "config.json").exists() or not (load_path / "model.joblib").exists():
raise FileNotFoundError("Model files not found in path.")

# Extract CAO from config
with open(load_path / "config.json", "r", encoding="utf-8") as file:
config = json.load(file)
cao = CAOMapping(config.pop("context"), config.pop("actions"), config.pop("outcomes"))

model = joblib.load(load_path / "model.joblib")
sklearn_predictor = SKLearnPredictor(cao, model, config)
sklearn_predictor = SKLearnPredictor(model, config)
return sklearn_predictor
8 changes: 2 additions & 6 deletions src/prsdk/predictors/neural_network/neural_net_predictor.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter

from data.cao_mapping import CAOMapping
from data.torch_data import TorchDataset
from predictors.predictor import Predictor
from predictors.neural_network.torch_neural_net import TorchNeuralNet
Expand All @@ -29,11 +28,8 @@ class NeuralNetPredictor(Predictor):
Data is automatically standardized and the scaler is saved with the model.
TODO: We want to be able to have custom scaling in the future.
"""
def __init__(self, cao: CAOMapping, model_config: dict):
def __init__(self, model_config: dict):
"""
:param context: list of context features.
:param actions: list of action features.
:param outcomes: list of outcomes to predict.
:param model_config: dictionary of model configuration parameters.
Model config should contain the following:
features: list of features to use in the model (optional, defaults to all context + actions)
Expand All @@ -48,7 +44,7 @@ def __init__(self, cao: CAOMapping, model_config: dict):
train_pct: percentage of training data to use (defaults to 1)
step_lr_params: dictionary of parameters to pass to the step learning rate scheduler (defaults to 1, 0.1)
"""
super().__init__(cao)
super().__init__()
self.features = model_config.get("features", None)
self.label = model_config.get("label", None)

Expand Down
13 changes: 2 additions & 11 deletions src/prsdk/predictors/predictor.py
Original file line number Diff line number Diff line change
@@ -1,26 +1,17 @@
"""
Abstract class for predictors to inherit from.
Interface for predictors to implement.
"""
from abc import ABC, abstractmethod

import pandas as pd

from data.cao_mapping import CAOMapping


class Predictor(ABC):
"""
Abstract class for predictors to inherit from.
Interface for predictors to implement.
Predictors must be able to be fit and predict on a DataFrame.
It is up to the Predictor to keep track of the proper label to label the output DataFrame.
"""
def __init__(self, cao: CAOMapping):
"""
Initializes the Predictor with the context, actions, and outcomes.
:param cao: CAOMapping object with context, actions, and outcomes.
"""
self.cao = cao

@abstractmethod
def fit(self, X_train: pd.DataFrame, y_train: pd.Series):
"""
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
"""
from sklearn.linear_model import LinearRegression

from data.cao_mapping import CAOMapping
from predictors.sklearn_predictors.sklearn_predictor import SKLearnPredictor


Expand All @@ -12,14 +11,13 @@ class LinearRegressionPredictor(SKLearnPredictor):
Simple linear regression predictor.
See SKLearnPredictor for more details.
"""
def __init__(self, cao: CAOMapping, model_config: dict):
def __init__(self, model_config: dict):
"""
:param cao: CAOMapping object with context, actions, and outcomes for super constructor.
:param model_config: Configuration to pass into the SKLearn constructor. Also contains the keys "features" and
"label" to keep track of the features and label to predict.
"""
if not model_config:
model_config = {}
lr_config = {key: value for key, value in model_config.items() if key not in ["features", "label"]}
model = LinearRegression(**lr_config)
super().__init__(cao, model, model_config)
super().__init__(model, model_config)
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
"""
from sklearn.ensemble import RandomForestRegressor

from data.cao_mapping import CAOMapping
from predictors.sklearn_predictors.sklearn_predictor import SKLearnPredictor


Expand All @@ -12,12 +11,11 @@ class RandomForestPredictor(SKLearnPredictor):
Simple random forest predictor.
See SKLearnPredictor for more details.
"""
def __init__(self, cao: CAOMapping, model_config: dict):
def __init__(self, model_config: dict):
"""
:param cao: CAOMapping object with context, actions, and outcomes for super constructor.
:param model_config: Configuration to pass into the SKLearn constructor. Also contains the keys "features" and
"label" to keep track of the features and label to predict.
"""
rf_config = {key: value for key, value in model_config.items() if key not in ["features", "label"]}
model = RandomForestRegressor(**rf_config)
super().__init__(cao, model, model_config)
super().__init__(model, model_config)
5 changes: 2 additions & 3 deletions src/prsdk/predictors/sklearn_predictors/sklearn_predictor.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@

import pandas as pd

from data.cao_mapping import CAOMapping
from predictors.predictor import Predictor


Expand All @@ -15,14 +14,14 @@ class SKLearnPredictor(Predictor, ABC):
Simple abstract class for sklearn predictors.
Keeps track of features fit on and label to predict.
"""
def __init__(self, cao: CAOMapping, model, model_config: dict):
def __init__(self, model, model_config: dict):
"""
Model config contains the following:
features: list of features to use for prediction (optional, defaults to all features)
label: name of the label to predict (optional, defaults to passed label during fit)
Any other parameters are passed to the model.
"""
super().__init__(cao)
super().__init__()
self.config = model_config
self.model = model

Expand Down
14 changes: 2 additions & 12 deletions src/prsdk/prescriptors/prescriptor.py
Original file line number Diff line number Diff line change
@@ -1,26 +1,16 @@
"""
Abstract prescriptor class to be implemented.
Interface prescriptor to be implemented.
"""
from abc import ABC, abstractmethod

import pandas as pd

from data.cao_mapping import CAOMapping


# pylint: disable=too-few-public-methods
class Prescriptor(ABC):
"""
Abstract class for prescriptors to allow us to experiment with different implementations.
Interface for prescriptors to implement.
"""
def __init__(self, cao: CAOMapping):
"""
We keep track of the context, actions, and outcomes in the CAO mapping to ensure the prescriptor is compatible
with the project it's in.
:param cao: CAOMapping object with context, actions, and outcomes.
"""
self.cao = cao

@abstractmethod
def prescribe(self, context_df: pd.DataFrame) -> pd.DataFrame:
"""
Expand Down
1 change: 0 additions & 1 deletion tests/persistence/test_hf_persistence.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@ class TestHuggingFacePersistence(unittest.TestCase):
"""
Tests the HuggingFace Persistor. We can't test the actual upload but we can test the download with an
arbitrary model from HuggingFace.
TODO: We have to update our models to match the new configs that save CAO
"""
def setUp(self):
self.temp_dir = Path("tests/temp")
Expand Down
6 changes: 2 additions & 4 deletions tests/persistence/test_predictor_serialization.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@

import pandas as pd

from data.cao_mapping import CAOMapping
from persistence.serializers.neural_network_serializer import NeuralNetSerializer
from persistence.serializers.sklearn_serializer import SKLearnSerializer
from predictors.neural_network.neural_net_predictor import NeuralNetPredictor
Expand All @@ -25,7 +24,6 @@ def setUp(self):
2 models with the same parameters, load one from the other's save, and check if
their predictions are the same.
"""
self.cao = CAOMapping(["a", "b"], ["c"], ["label"])
self.models = [
NeuralNetPredictor,
LinearRegressionPredictor,
Expand Down Expand Up @@ -56,7 +54,7 @@ def test_save_file_names(self):
]
for model, serializer, config, test_names in zip(self.models, self.serializers, self.configs, save_file_names):
with self.subTest(model=model):
predictor = model(self.cao, config)
predictor = model(config)
predictor.fit(self.dummy_data, self.dummy_target)
serializer.save(predictor, self.temp_path)
files = [f.name for f in self.temp_path.glob("**/*") if f.is_file()]
Expand All @@ -71,7 +69,7 @@ def test_loaded_same(self):
"""
for model, serializer, config in zip(self.models, self.serializers, self.configs):
with self.subTest(model=model):
predictor = model(self.cao, config)
predictor = model(config)
predictor.fit(self.dummy_data.iloc[:2], self.dummy_target.iloc[:2])
output = predictor.predict(self.dummy_data.iloc[2:])
serializer.save(predictor, self.temp_path)
Expand Down
10 changes: 3 additions & 7 deletions tests/predictors/test_neural_net.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,22 +5,18 @@

import pandas as pd

from data.cao_mapping import CAOMapping
from predictors.neural_network.neural_net_predictor import NeuralNetPredictor


class TestNeuralNet(unittest.TestCase):
"""
Specifically tests the neural net predictor
"""
def setUp(self):
self.cao = CAOMapping(["a", "b"], ["c"], ["label"])

def test_single_input(self):
"""
Tests the neural net with a single input.
"""
predictor = NeuralNetPredictor(self.cao, {"hidden_sizes": [4], "epochs": 1, "batch_size": 1, "device": "cpu"})
predictor = NeuralNetPredictor({"hidden_sizes": [4], "epochs": 1, "batch_size": 1, "device": "cpu"})

train_data = pd.DataFrame({"a": [1], "b": [2], "c": [3], "label": [4]})
test_data = pd.DataFrame({"a": [4], "b": [5], "c": [6]})
Expand All @@ -33,7 +29,7 @@ def test_multi_input(self):
"""
Tests the neural net with multiple inputs.
"""
predictor = NeuralNetPredictor(self.cao, {"hidden_sizes": [4], "epochs": 1, "batch_size": 1, "device": "cpu"})
predictor = NeuralNetPredictor({"hidden_sizes": [4], "epochs": 1, "batch_size": 1, "device": "cpu"})

train_data = pd.DataFrame({"a": [1, 2], "b": [2, 3], "c": [3, 4], "label": [4, 5]})
test_data = pd.DataFrame({"a": [4, 5], "b": [5, 6], "c": [6, 7]})
Expand All @@ -46,7 +42,7 @@ def test_batched_input(self):
"""
Tests the neural network with batched inputs.
"""
predictor = NeuralNetPredictor(self.cao, {"hidden_sizes": [4], "epochs": 1, "batch_size": 2, "device": "cpu"})
predictor = NeuralNetPredictor({"hidden_sizes": [4], "epochs": 1, "batch_size": 2, "device": "cpu"})

train_data = pd.DataFrame({"a": [1, 2, 3], "b": [2, 3, 4], "c": [3, 4, 5], "label": [4, 5, 6]})
test_data = pd.DataFrame({"a": [4, 5], "b": [5, 6], "c": [6, 7]})
Expand Down

0 comments on commit 1ecd52a

Please sign in to comment.