Skip to content

Commit

Permalink
Bug fixes and code clean-up (#46)
Browse files Browse the repository at this point in the history
  • Loading branch information
jzsmoreno authored Jan 19, 2025
1 parent 74b109a commit 23df6b1
Show file tree
Hide file tree
Showing 8 changed files with 231 additions and 98 deletions.
106 changes: 55 additions & 51 deletions examples/Deep_Models.ipynb

Large diffs are not rendered by default.

168 changes: 146 additions & 22 deletions examples/Simulation_Models.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -28,24 +28,104 @@
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" Color Tamaño Género Edad Precio Cantidad\n",
"0 Rojo Pequeño Mujer 25 100.50 5\n",
"1 Azul Mediano Hombre 30 150.75 10\n",
"2 Rojo Grande Mujer 22 200.00 3\n",
"3 Verde Mediano Hombre 35 80.25 8\n",
"4 Rojo Pequeño Mujer 28 120.00 7\n"
]
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Color</th>\n",
" <th>Tamaño</th>\n",
" <th>Género</th>\n",
" <th>Edad</th>\n",
" <th>Precio</th>\n",
" <th>Cantidad</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Rojo</td>\n",
" <td>Pequeño</td>\n",
" <td>Mujer</td>\n",
" <td>25</td>\n",
" <td>100.50</td>\n",
" <td>5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Azul</td>\n",
" <td>Mediano</td>\n",
" <td>Hombre</td>\n",
" <td>30</td>\n",
" <td>150.75</td>\n",
" <td>10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Rojo</td>\n",
" <td>Grande</td>\n",
" <td>Mujer</td>\n",
" <td>22</td>\n",
" <td>200.00</td>\n",
" <td>3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>Verde</td>\n",
" <td>Mediano</td>\n",
" <td>Hombre</td>\n",
" <td>35</td>\n",
" <td>80.25</td>\n",
" <td>8</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>Rojo</td>\n",
" <td>Pequeño</td>\n",
" <td>Mujer</td>\n",
" <td>28</td>\n",
" <td>120.00</td>\n",
" <td>7</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Color Tamaño Género Edad Precio Cantidad\n",
"0 Rojo Pequeño Mujer 25 100.50 5\n",
"1 Azul Mediano Hombre 30 150.75 10\n",
"2 Rojo Grande Mujer 22 200.00 3\n",
"3 Verde Mediano Hombre 35 80.25 8\n",
"4 Rojo Pequeño Mujer 28 120.00 7"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Load data from a parquet file into a pandas DataFrame\n",
"data = pd.read_parquet(\"./sample_data.parquet\")\n",
"df = pd.DataFrame(data)\n",
"# Display the first few rows of the dataframe to verify it's loaded correctly\n",
"print(df.head())"
"df.head()"
]
},
{
Expand All @@ -54,19 +134,63 @@
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" Color Tamaño Género Edad Precio Cantidad\n",
"5 Amarillo Grande Hombre 40 250.0 4\n"
]
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Color</th>\n",
" <th>Tamaño</th>\n",
" <th>Género</th>\n",
" <th>Edad</th>\n",
" <th>Precio</th>\n",
" <th>Cantidad</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>12</th>\n",
" <td>Azul</td>\n",
" <td>Pequeño</td>\n",
" <td>Hombre</td>\n",
" <td>29</td>\n",
" <td>125.0</td>\n",
" <td>8</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Color Tamaño Género Edad Precio Cantidad\n",
"12 Azul Pequeño Hombre 29 125.0 8"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Sample one row from the dataframe for demonstration purposes\n",
"df_aux = df.sample(1)\n",
"# Display the sampled row\n",
"print(df_aux.head())"
"df_aux.head()"
]
},
{
Expand Down Expand Up @@ -129,8 +253,8 @@
"name": "stdout",
"output_type": "stream",
"text": [
"The predicted value is 225.39\n",
"The probability of this value is: 15.75%\n",
"The predicted value is 81.15\n",
"The probability of this value is: 23.84%\n",
"The variable is an inlier\n"
]
}
Expand Down Expand Up @@ -180,7 +304,7 @@
"name": "stdout",
"output_type": "stream",
"text": [
"The probability of this value is: 15.75%\n",
"The probability of this value is: 23.84%\n",
"The variable is an inlier\n"
]
}
Expand Down Expand Up @@ -222,7 +346,7 @@
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"display_name": "base",
"language": "python",
"name": "python3"
},
Expand Down
2 changes: 1 addition & 1 deletion likelihood/VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
1.2.25
1.3.0
6 changes: 3 additions & 3 deletions likelihood/graph/graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,8 +45,8 @@ def _add_nodes(self) -> None:

def draw(self, name="graph.html", **kwargs) -> None:
"""Display the network using HTML format"""
spring_length = kwargs["spring_length"] if "spring_length" in kwargs else 500
node_distance = kwargs["node_distance"] if "node_distance" in kwargs else 100
spring_length = kwargs.get("spring_length", 500)
node_distance = kwargs.get("node_distance", 100)
self.G.repulsion(node_distance=node_distance, spring_length=spring_length)
self.G.show_buttons(filter_=["physics"])
self.G.show(name)
Expand Down Expand Up @@ -89,5 +89,5 @@ def pyvis_to_networkx(self):
df["y"] = y
# Instantiate DynamicGraph
fs = DynamicGraph(df, n_importances=2)
print(fs.fit())
fs.fit()
fs.draw()
2 changes: 1 addition & 1 deletion likelihood/graph/nn.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ def cal_adjacency_matrix(

assert len(df_categorical) > 0

similarity = kwargs["similarity"] if "similarity" in kwargs else len(df_categorical.columns) - 1
similarity = kwargs.get("similarity", len(df_categorical.columns) - 1)
assert similarity <= df_categorical.shape[1]

adj_dict = {}
Expand Down
25 changes: 22 additions & 3 deletions likelihood/models/deep/autoencoders.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,8 @@ def __init__(self, input_shape_parm, num_classes, units, activation, **kwargs):
The activation function to use for the classifier layer. Default is "softmax". If the activation function is not a classification function, the model can be used in regression problems.
num_layers : `int`
The number of hidden layers in the classifier. Default is 1.
dropout : `float`
The dropout rate to use in the classifier. Default is None.
"""
super(AutoClassifier, self).__init__()
self.input_shape_parm = input_shape_parm
Expand All @@ -91,6 +93,7 @@ def __init__(self, input_shape_parm, num_classes, units, activation, **kwargs):
self.classifier = None
self.classifier_activation = kwargs.get("classifier_activation", "softmax")
self.num_layers = kwargs.get("num_layers", 1)
self.dropout = kwargs.get("dropout", None)

def build(self, input_shape):
self.encoder = tf.keras.Sequential(
Expand All @@ -113,6 +116,8 @@ def build(self, input_shape):
self.classifier.add(
tf.keras.layers.Dense(units=self.units, activation=self.activation)
)
if self.dropout:
self.classifier.add(tf.keras.layers.Dropout(self.dropout))
self.classifier.add(
tf.keras.layers.Dense(units=self.num_classes, activation=self.classifier_activation)
)
Expand All @@ -132,6 +137,7 @@ def get_config(self):
"activation": self.activation,
"classifier_activation": self.classifier_activation,
"num_layers": self.num_layers,
"dropout": self.dropout,
}
base_config = super(AutoClassifier, self).get_config()
return dict(list(base_config.items()) + list(config.items()))
Expand All @@ -145,6 +151,7 @@ def from_config(cls, config):
activation=config["activation"],
classifier_activation=config["classifier_activation"],
num_layers=config["num_layers"],
dropout=config["dropout"],
)


Expand All @@ -156,6 +163,7 @@ def call_existing_code(
input_shape_parm: None | int = None,
num_classes: None | int = None,
num_layers: int = 1,
**kwargs,
) -> AutoClassifier:
"""
Calls an existing AutoClassifier instance.
Expand All @@ -180,12 +188,14 @@ def call_existing_code(
`AutoClassifier`
The AutoClassifier instance.
"""
dropout = kwargs.get("dropout", None)
model = AutoClassifier(
input_shape_parm=input_shape_parm,
num_classes=num_classes,
units=units,
activation=activation,
num_layers=num_layers,
dropout=dropout,
)
model.compile(
optimizer=optimizer,
Expand Down Expand Up @@ -254,6 +264,11 @@ def build_model(
if "num_layers" not in hyperparameters_keys
else hyperparameters["num_layers"]
)
dropout = (
hp.Float("dropout", min_value=0.1, max_value=0.9, sampling="log")
if "dropout" not in hyperparameters_keys
else hyperparameters["dropout"]
)

model = call_existing_code(
units=units,
Expand All @@ -263,6 +278,7 @@ def build_model(
input_shape_parm=input_shape_parm,
num_classes=num_classes,
num_layers=num_layers,
dropout=dropout,
)
return model

Expand Down Expand Up @@ -408,10 +424,8 @@ def __init__(self, model: AutoClassifier, inputs: np.ndarray) -> None:
self.model = model
self.encoder_layer = self.model.encoder.layers[0]
self.decoder_layer = self.model.decoder.layers[0]
self.classifier_layer = self.model.classifier.layers[-2]
self.encoder_weights = self.encoder_layer.get_weights()[0]
self.decoder_weights = self.decoder_layer.get_weights()[0]
self.classifier_weights = self.classifier_layer.get_weights()[0]
colors = dict(mcolors.BASE_COLORS, **mcolors.CSS4_COLORS)

by_hsv = sorted(
Expand Down Expand Up @@ -580,7 +594,12 @@ def _viz_tsne_repr(self, **kwargs) -> None:
y = np.asarray(y).astype(np.float32)

model = AutoClassifier(
input_shape_parm=X.shape[1], num_classes=3, units=27, activation="selu", num_layers=2
input_shape_parm=X.shape[1],
num_classes=3,
units=27,
activation="tanh",
num_layers=2,
dropout=0.2,
)
model.compile(
optimizer="adam",
Expand Down
6 changes: 0 additions & 6 deletions likelihood/models/simulation.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,31 +2,25 @@
import warnings
from typing import List, Tuple, Union

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from pandas.core.frame import DataFrame

from likelihood.tools import DataScaler, FeatureSelection, OneHotEncoder, cdf, check_nan_inf

# Suppress RankWarning
warnings.simplefilter("ignore", np.RankWarning)


# --------------------------------------------------------------------------------------------------------------------------------------
def categories_by_quartile(df: DataFrame, column: str) -> Tuple[str, str]:
# Count the frequency of each category in the column
freq = df[column].value_counts()

# Calculate the 25th percentile (Q1) and 75th percentile (Q3)
q1 = freq.quantile(0.25)
q3 = freq.quantile(0.75)

# Filter categories that are below the 25th percentile and above the 75th percentile
least_frequent = freq[freq <= q1]
most_frequent = freq[freq >= q3]

# Get the least frequent category (25th percentile) and the most frequent category (75th percentile)
least_frequent_category = least_frequent.idxmin() if not least_frequent.empty else None
most_frequent_category = most_frequent.idxmax() if not most_frequent.empty else None

Expand Down
Loading

0 comments on commit 23df6b1

Please sign in to comment.