Skip to content

Commit

Permalink
Update tools.py
Browse files Browse the repository at this point in the history
* Add parameters `DataFrameEncoder`
  • Loading branch information
jzsmoreno committed Jan 29, 2024
1 parent f1bc568 commit a0b2c06
Show file tree
Hide file tree
Showing 2 changed files with 37 additions and 21 deletions.
56 changes: 36 additions & 20 deletions likelihood/tools/tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -551,24 +551,36 @@ class DataFrameEncoder:
"""Allows encoding and decoding Dataframes"""

def __init__(self, data: DataFrame) -> None:
"""Sets the columns of the dataframe"""
"""Sets the columns of the `DataFrame`"""
self._df = data.copy()
self._names = data.columns
self._encode_columns = []
self.encoding_list = []
self.decoding_list = []

def load_config(self, path_to_dictionaries: str = "./") -> None:
"""Loads dictionaries from a given directory"""
with open(os.path.join(path_to_dictionaries, "labelencoder_dictionary.pkl"), "rb") as file:
def load_config(self, path_to_dictionaries: str = "./", **kwargs) -> None:
"""Loads dictionaries from a given directory
Keyword Arguments:
----------
- dictionary_name (`str`): An optional string parameter. By default it is set to `labelencoder_dictionary`
"""
dictionary_name = (
kwargs["dictionary_name"] if "dictionary_name" in kwargs else "labelencoder_dictionary"
)
with open(os.path.join(path_to_dictionaries, dictionary_name + ".pkl"), "rb") as file:
labelencoder = pickle.load(file)
self.encoding_list = labelencoder[0]
self.decoding_list = labelencoder[1]
self._encode_columns = labelencoder[2]
print("Configuration successfully uploaded")

def train(self, save_mode: bool = True) -> None:
"""Trains the encoders and decoders using the dataframe"""
def train(self, path_to_save: str, **kwargs) -> None:
"""Trains the encoders and decoders using the `DataFrame`"""
save_mode = kwargs["save_mode"] if "save_mode" in kwargs else True
dictionary_name = (
kwargs["dictionary_name"] if "dictionary_name" in kwargs else "labelencoder_dictionary"
)
for i in self._names:
if self._df[i].dtype == "object":
self._encode_columns.append(i)
Expand All @@ -582,12 +594,18 @@ def train(self, save_mode: bool = True) -> None:
self.encoding_list.append(encode_dict)
self.decoding_list.append(decode_dict)
if save_mode:
self._save_encoder()
self._save_encoder(path_to_save, dictionary_name)

def encode(self) -> DataFrame:
"""Encodes the object type columns of the dataframe"""
def encode(self, path_to_save: str = "./", **kwargs) -> DataFrame:
"""Encodes the `object` type columns of the dataframe
Keyword Arguments:
----------
- save_mode (`bool`): An optional integer parameter. By default it is set to `True`
- dictionary_name (`str`): An optional string parameter. By default it is set to `labelencoder_dictionary`
"""
if len(self.encoding_list) == 0:
self.train()
self.train(path_to_save, **kwargs)
return self._df

else:
Expand All @@ -601,7 +619,7 @@ def encode(self) -> DataFrame:
return self._df

def decode(self) -> DataFrame:
"""Decodes the int type columns of the dataframe"""
"""Decodes the `int` type columns of the `DataFrame`"""
j = 0
df_decoded = self._df.copy()
try:
Expand All @@ -620,7 +638,7 @@ def decode(self) -> DataFrame:
print(f"{warning_type}: {msg}")

def get_dictionaries(self) -> Tuple[List[dict], List[dict]]:
"""Allows to return the list of dictionaries for encoding and decoding"""
"""Allows to return the `list` of dictionaries for `encoding` and `decoding`"""
try:
return self.encoding_list, self.decoding_list
except ValueError as e:
Expand All @@ -629,24 +647,24 @@ def get_dictionaries(self) -> Tuple[List[dict], List[dict]]:
msg += "Error: {%s}" % e
print(f"{warning_type}: {msg}")

def _save_encoder(self, path_to_save: str = "./") -> None:
"""Method to serialize the encoding_list, decoding_list and _encode_columns list"""
with open(path_to_save + "labelencoder_dictionary.pkl", "wb") as f:
def _save_encoder(self, path_to_save: str, dictionary_name: str) -> None:
"""Method to serialize the `encoding_list`, `decoding_list` and `_encode_columns` list"""
with open(path_to_save + dictionary_name + ".pkl", "wb") as f:
pickle.dump([self.encoding_list, self.decoding_list, self._encode_columns], f)

def _code_transformation_to(self, character: str, dictionary_list: List[dict]) -> int:
"""Auxiliary function to perform data transformation using a dictionary
Parameters
----------
character : str
character : `str`
A character data type.
dictionary_list : List[dict]
dictionary_list : List[`dict`]
An object of dictionary type.
Returns
-------
dict_type[character] or np.nan if dict_type[character] doesn't exist.
dict_type[`character`] or `np.nan` if dict_type[`character`] doesn't exist.
"""
try:
return dictionary_list[character]
Expand Down Expand Up @@ -757,8 +775,6 @@ def _confu_mat(self, y_true: ndarray, y_pred: ndarray, labels: list) -> ndarray:
helper._summary_pred(y_true, y_pred, labels)
print(helper._f1_score(y_true, y_pred, labels))

breakpoint()

# Use DataFrameEncoder
# Create a DataFrame
data = {"Name": ["John", "Alice", "Bob"], "Age": [25, 30, 35]}
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

setuptools.setup(
name="likelihood", # Replace with your own username
version="1.2.2",
version="1.2.3",
author="J. A. Moreno-Guerra",
author_email="[email protected]",
description="A package that performs the maximum likelihood algorithm.",
Expand Down

0 comments on commit a0b2c06

Please sign in to comment.