Skip to content

Commit

Permalink
Created using Colab_Class2
Browse files Browse the repository at this point in the history
  • Loading branch information
Saeidhoseinipour committed Jan 19, 2025
1 parent 4b09997 commit def980e
Showing 1 changed file with 127 additions and 0 deletions.
127 changes: 127 additions & 0 deletions ModelTrainer.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,127 @@
{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"provenance": [],
"authorship_tag": "ABX9TyNXuzwVU1r7VkwSe9K3urmL",
"include_colab_link": true
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
},
"language_info": {
"name": "python"
}
},
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "view-in-github",
"colab_type": "text"
},
"source": [
"<a href=\"https://colab.research.google.com/github/Saeidhoseinipour/100Data/blob/main/ModelTrainer.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "DjzHGr01XPn5"
},
"outputs": [],
"source": [
"from sklearn.model_selection import train_test_split\n",
"from sklearn.linear_model import LogisticRegression, LinearRegression\n",
"from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor\n",
"from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor\n",
"from sklearn.cluster import KMeans\n",
"from sklearn.decomposition import PCA\n",
"from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, mean_squared_error, silhouette_score\n",
"\n",
"class ModelTrainer:\n",
" def __init__(self, datasets):\n",
" \"\"\"\n",
" Initialize the class with cleaned datasets.\n",
" :param datasets: Dictionary of cleaned datasets from BioinformaticsDatasetCleaner.\n",
" \"\"\"\n",
" self.datasets = datasets\n",
"\n",
" def train_model(self, dataset_name, target_column):\n",
" \"\"\"\n",
" Train a machine learning model based on the dataset type.\n",
" :param dataset_name: Name of the dataset.\n",
" :param target_column: Name of the target column.\n",
" \"\"\"\n",
" if dataset_name in self.datasets:\n",
" dataset = self.datasets[dataset_name]['data']\n",
" dataset_type = self.datasets[dataset_name]['type']\n",
"\n",
" # Split data into features and target\n",
" X = dataset.drop(columns=[target_column])\n",
" y = dataset[target_column]\n",
"\n",
" # Split into training and testing sets\n",
" X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)\n",
"\n",
" # Train a model based on dataset type\n",
" if dataset_type == 'quantitative':\n",
" # Regression model for quantitative data\n",
" model = RandomForestRegressor()\n",
" model.fit(X_train, y_train)\n",
" y_pred = model.predict(X_test)\n",
"\n",
" # Evaluate regression model\n",
" mse = mean_squared_error(y_test, y_pred)\n",
" print(f\"Mean Squared Error for {dataset_name}: {mse:.2f}\")\n",
"\n",
" elif dataset_type == 'qualitative':\n",
" # Classification model for qualitative data\n",
" model = RandomForestClassifier()\n",
" model.fit(X_train, y_train)\n",
" y_pred = model.predict(X_test)\n",
"\n",
" # Evaluate classification model\n",
" accuracy = accuracy_score(y_test, y_pred)\n",
" print(f\"Model Accuracy for {dataset_name}: {accuracy:.2f}\")\n",
" print(\"Classification Report:\\n\", classification_report(y_test, y_pred))\n",
" print(\"Confusion Matrix:\\n\", confusion_matrix(y_test, y_pred))\n",
"\n",
" elif dataset_type == 'unsupervised':\n",
" # Clustering model for unsupervised data\n",
" model = KMeans(n_clusters=3)\n",
" model.fit(X)\n",
" labels = model.labels_\n",
"\n",
" # Evaluate clustering model\n",
" silhouette = silhouette_score(X, labels)\n",
" print(f\"Silhouette Score for {dataset_name}: {silhouette:.2f}\")\n",
"\n",
" else:\n",
" raise ValueError(f\"No model defined for {dataset_type} data.\")\n",
"\n",
" return model, X_test, y_test\n",
" else:\n",
" raise KeyError(f\"Dataset {dataset_name} not found.\")\n",
"\n",
" def apply_pca(self, dataset_name, n_components=2):\n",
" \"\"\"\n",
" Apply PCA for dimensionality reduction.\n",
" :param dataset_name: Name of the dataset.\n",
" :param n_components: Number of components for PCA.\n",
" \"\"\"\n",
" if dataset_name in self.datasets:\n",
" dataset = self.datasets[dataset_name]['data']\n",
" pca = PCA(n_components=n_components)\n",
" transformed_data = pca.fit_transform(dataset)\n",
" print(f\"PCA applied to {dataset_name}. Transformed data shape: {transformed_data.shape}\")\n",
" return transformed_data\n",
" else:\n",
" raise KeyError(f\"Dataset {dataset_name} not found.\")"
]
}
]
}

0 comments on commit def980e

Please sign in to comment.