diff --git a/src/notebooks/supervized_regression.ipynb b/src/notebooks/supervized_regression.ipynb
index 0b0073a..151f9ff 100644
--- a/src/notebooks/supervized_regression.ipynb
+++ b/src/notebooks/supervized_regression.ipynb
@@ -351,7 +351,7 @@
"metadata": {},
"outputs": [],
"source": [
- "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)"
+ "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)"
]
},
{
@@ -486,6 +486,166 @@
"source": [
"En moyenne, le modèle de régression linéaire parvient à prévoir la durée nécessaire au financement à 10 jours près. "
]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Forêt aléatoire"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from sklearn.model_selection import train_test_split\n",
+ "from sklearn.ensemble import RandomForestRegressor\n",
+ "from sklearn.metrics import mean_squared_error, mean_absolute_error\n",
+ "import pandas as pd\n",
+ "import numpy as np"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 30,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
RandomForestRegressor(random_state=42)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org. "
+ ],
+ "text/plain": [
+ "RandomForestRegressor(random_state=42)"
+ ]
+ },
+ "execution_count": 30,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Create and train a Random Forest Regressor\n",
+ "random_forest = RandomForestRegressor(n_estimators=100, random_state=42)\n",
+ "random_forest.fit(X_train, y_train)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 31,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Variable | \n",
+ " Importance | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " const | \n",
+ " 0.00000 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " has_video | \n",
+ " 0.03096 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " goal | \n",
+ " 0.41027 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " facebook_friends | \n",
+ " 0.55877 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Variable Importance\n",
+ "0 const 0.00000\n",
+ "1 has_video 0.03096\n",
+ "2 goal 0.41027\n",
+ "3 facebook_friends 0.55877"
+ ]
+ },
+ "execution_count": 31,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Get feature importances\n",
+ "feature_importances = random_forest.feature_importances_\n",
+ "\n",
+ "# Display feature importances\n",
+ "importances_df = pd.DataFrame({\n",
+ " 'Variable': [col for col in X.columns],\n",
+ " 'Importance': feature_importances.tolist()\n",
+ "})\n",
+ "importances_df"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 34,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Random Forest Mean Squared Error = 229.26\n",
+ "Random Forest Mean Absolute Error = 11.66\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Evaluate model prediction capacity\n",
+ "y_pred_rf = random_forest.predict(X_test)\n",
+ "\n",
+ "# Calculate Mean Squared Error and Mean Absolute Error\n",
+ "rf_mse = mean_squared_error(y_true=y_test, y_pred=y_pred_rf)\n",
+ "rf_mae = mean_absolute_error(y_true=y_test, y_pred=y_pred_rf)\n",
+ "\n",
+ "print(f\"Random Forest Mean Squared Error = {round(rf_mse, 2)}\")\n",
+ "print(f\"Random Forest Mean Absolute Error = {round(rf_mae, 2)}\")\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "En moyenne, le modèle de forêt aléatoire parvient à prévoir la durée nécessaire au financement à 12 jours près. \n",
+ "\n",
+ "**On conservera donc le modèle de régression linéaire pour la prédiction.**"
+ ]
}
],
"metadata": {