-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
4b09997
commit def980e
Showing
1 changed file
with
127 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,127 @@ | ||
{ | ||
"nbformat": 4, | ||
"nbformat_minor": 0, | ||
"metadata": { | ||
"colab": { | ||
"provenance": [], | ||
"authorship_tag": "ABX9TyNXuzwVU1r7VkwSe9K3urmL", | ||
"include_colab_link": true | ||
}, | ||
"kernelspec": { | ||
"name": "python3", | ||
"display_name": "Python 3" | ||
}, | ||
"language_info": { | ||
"name": "python" | ||
} | ||
}, | ||
"cells": [ | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": { | ||
"id": "view-in-github", | ||
"colab_type": "text" | ||
}, | ||
"source": [ | ||
"<a href=\"https://colab.research.google.com/github/Saeidhoseinipour/100Data/blob/main/ModelTrainer.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": { | ||
"id": "DjzHGr01XPn5" | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"from sklearn.model_selection import train_test_split\n", | ||
"from sklearn.linear_model import LogisticRegression, LinearRegression\n", | ||
"from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor\n", | ||
"from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor\n", | ||
"from sklearn.cluster import KMeans\n", | ||
"from sklearn.decomposition import PCA\n", | ||
"from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, mean_squared_error, silhouette_score\n", | ||
"\n", | ||
"class ModelTrainer:\n", | ||
" def __init__(self, datasets):\n", | ||
" \"\"\"\n", | ||
" Initialize the class with cleaned datasets.\n", | ||
" :param datasets: Dictionary of cleaned datasets from BioinformaticsDatasetCleaner.\n", | ||
" \"\"\"\n", | ||
" self.datasets = datasets\n", | ||
"\n", | ||
" def train_model(self, dataset_name, target_column):\n", | ||
" \"\"\"\n", | ||
" Train a machine learning model based on the dataset type.\n", | ||
" :param dataset_name: Name of the dataset.\n", | ||
" :param target_column: Name of the target column.\n", | ||
" \"\"\"\n", | ||
" if dataset_name in self.datasets:\n", | ||
" dataset = self.datasets[dataset_name]['data']\n", | ||
" dataset_type = self.datasets[dataset_name]['type']\n", | ||
"\n", | ||
" # Split data into features and target\n", | ||
" X = dataset.drop(columns=[target_column])\n", | ||
" y = dataset[target_column]\n", | ||
"\n", | ||
" # Split into training and testing sets\n", | ||
" X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)\n", | ||
"\n", | ||
" # Train a model based on dataset type\n", | ||
" if dataset_type == 'quantitative':\n", | ||
" # Regression model for quantitative data\n", | ||
" model = RandomForestRegressor()\n", | ||
" model.fit(X_train, y_train)\n", | ||
" y_pred = model.predict(X_test)\n", | ||
"\n", | ||
" # Evaluate regression model\n", | ||
" mse = mean_squared_error(y_test, y_pred)\n", | ||
" print(f\"Mean Squared Error for {dataset_name}: {mse:.2f}\")\n", | ||
"\n", | ||
" elif dataset_type == 'qualitative':\n", | ||
" # Classification model for qualitative data\n", | ||
" model = RandomForestClassifier()\n", | ||
" model.fit(X_train, y_train)\n", | ||
" y_pred = model.predict(X_test)\n", | ||
"\n", | ||
" # Evaluate classification model\n", | ||
" accuracy = accuracy_score(y_test, y_pred)\n", | ||
" print(f\"Model Accuracy for {dataset_name}: {accuracy:.2f}\")\n", | ||
" print(\"Classification Report:\\n\", classification_report(y_test, y_pred))\n", | ||
" print(\"Confusion Matrix:\\n\", confusion_matrix(y_test, y_pred))\n", | ||
"\n", | ||
" elif dataset_type == 'unsupervised':\n", | ||
" # Clustering model for unsupervised data\n", | ||
" model = KMeans(n_clusters=3)\n", | ||
" model.fit(X)\n", | ||
" labels = model.labels_\n", | ||
"\n", | ||
" # Evaluate clustering model\n", | ||
" silhouette = silhouette_score(X, labels)\n", | ||
" print(f\"Silhouette Score for {dataset_name}: {silhouette:.2f}\")\n", | ||
"\n", | ||
" else:\n", | ||
" raise ValueError(f\"No model defined for {dataset_type} data.\")\n", | ||
"\n", | ||
" return model, X_test, y_test\n", | ||
" else:\n", | ||
" raise KeyError(f\"Dataset {dataset_name} not found.\")\n", | ||
"\n", | ||
" def apply_pca(self, dataset_name, n_components=2):\n", | ||
" \"\"\"\n", | ||
" Apply PCA for dimensionality reduction.\n", | ||
" :param dataset_name: Name of the dataset.\n", | ||
" :param n_components: Number of components for PCA.\n", | ||
" \"\"\"\n", | ||
" if dataset_name in self.datasets:\n", | ||
" dataset = self.datasets[dataset_name]['data']\n", | ||
" pca = PCA(n_components=n_components)\n", | ||
" transformed_data = pca.fit_transform(dataset)\n", | ||
" print(f\"PCA applied to {dataset_name}. Transformed data shape: {transformed_data.shape}\")\n", | ||
" return transformed_data\n", | ||
" else:\n", | ||
" raise KeyError(f\"Dataset {dataset_name} not found.\")" | ||
] | ||
} | ||
] | ||
} |