From 57a89e52c824ec7df8c386e5c5b6c6f74260ca9f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Diego=20Crist=C3=B3bal=20Herreros?= Date: Sat, 21 Jan 2023 22:30:24 +0100 Subject: [PATCH 1/2] Feat: add some cleaning and test lr accurancy --- entrega.ipynb | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/entrega.ipynb b/entrega.ipynb index db3ebda..ff6bb8c 100644 --- a/entrega.ipynb +++ b/entrega.ipynb @@ -24,9 +24,16 @@ "![nba-logo](images/nba-logo.png)" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Una empresa dedicada a las apuestas, necesita un modelo de entrenamiento fiable en el cual poder saber si gana un equipo u otro dependiendo de unos datos de entrada. En este ejercicio se haran pruebas de distintos modelos y se seleccionaran distintas variables para probar su eficacia." + ] + }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 223, "metadata": {}, "outputs": [], "source": [ @@ -35,7 +42,10 @@ "from IPython.display import display\n", "import zipfile as zp\n", "import matplotlib.pyplot as plt\n", + "from sklearn.linear_model import LogisticRegression\n", + "from sklearn.model_selection import train_test_split\n", "import sklearn.preprocessing as prep\n", + "from sklearn.feature_selection import SelectKBest, chi2\n", "from sklearn.metrics import precision_score, recall_score, confusion_matrix, mean_absolute_error, mean_absolute_percentage_error, r2_score, accuracy_score, mean_squared_error" ] }, From bf833e679a7bb06d22908f407523d1e2a7c79d37 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Diego=20Crist=C3=B3bal=20Herreros?= Date: Sat, 21 Jan 2023 22:32:14 +0100 Subject: [PATCH 2/2] Feat: add again the previous commit --- entrega.ipynb | 2056 ++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 2041 insertions(+), 15 deletions(-) diff --git a/entrega.ipynb b/entrega.ipynb index ff6bb8c..765346f 100644 --- a/entrega.ipynb +++ b/entrega.ipynb @@ -58,15 +58,14 @@ }, { "cell_type": "code", - - "execution_count": 3, + "execution_count": 224, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "/tmp/ipykernel_6687/1756342927.py:3: DtypeWarning: Columns (6) have mixed types. Specify dtype option on import or set low_memory=False.\n", + "/tmp/ipykernel_5804/1756342927.py:3: DtypeWarning: Columns (6) have mixed types. Specify dtype option on import or set low_memory=False.\n", " df_games_details = pd.read_csv(zf.open('games_details.csv'))\n" ] } @@ -89,7 +88,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 225, "metadata": {}, "outputs": [ { @@ -935,7 +934,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 226, "metadata": {}, "outputs": [ { @@ -1051,7 +1050,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 227, "metadata": {}, "outputs": [ { @@ -1248,7 +1247,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 228, "metadata": {}, "outputs": [ { @@ -1672,7 +1671,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 229, "metadata": {}, "outputs": [ { @@ -2091,7 +2090,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 230, "metadata": {}, "outputs": [ { @@ -2166,7 +2165,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 231, "metadata": {}, "outputs": [ { @@ -2460,7 +2459,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 232, "metadata": { "scrolled": true }, @@ -2646,7 +2645,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 233, "metadata": {}, "outputs": [], "source": [ @@ -2655,8 +2654,10 @@ }, { "cell_type": "code", - "execution_count": 15, - "metadata": {}, + "execution_count": 234, + "metadata": { + "scrolled": true + }, "outputs": [ { "data": { @@ -2685,7 +2686,7 @@ "dtype: int64" ] }, - "execution_count": 15, + "execution_count": 234, "metadata": {}, "output_type": "execute_result" } @@ -2693,6 +2694,2031 @@ "source": [ "df_games.isnull().sum()" ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Para saber que equipo es, podríamos fusionar algunos datos del dataframe de partidos con el de equipos de alguna manera" + ] + }, + { + "cell_type": "code", + "execution_count": 235, + "metadata": { + "scrolled": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "Index(['GAME_DATE_EST', 'GAME_ID', 'GAME_STATUS_TEXT', 'HOME_TEAM_ID',\n", + " 'VISITOR_TEAM_ID', 'SEASON', 'TEAM_ID_home', 'PTS_home', 'FG_PCT_home',\n", + " 'FT_PCT_home', 'FG3_PCT_home', 'AST_home', 'REB_home', 'TEAM_ID_away',\n", + " 'PTS_away', 'FG_PCT_away', 'FT_PCT_away', 'FG3_PCT_away', 'AST_away',\n", + " 'REB_away', 'HOME_TEAM_WINS'],\n", + " dtype='object')" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "Index(['LEAGUE_ID', 'TEAM_ID', 'MIN_YEAR', 'MAX_YEAR', 'ABBREVIATION',\n", + " 'NICKNAME', 'YEARFOUNDED', 'CITY', 'ARENA', 'ARENACAPACITY', 'OWNER',\n", + " 'GENERALMANAGER', 'HEADCOACH', 'DLEAGUEAFFILIATION'],\n", + " dtype='object')" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "display(df_games.columns)\n", + "display(df_teams.columns)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Vemos como podemos sustituir los IDS por los nombres de los equipos" + ] + }, + { + "cell_type": "code", + "execution_count": 236, + "metadata": {}, + "outputs": [], + "source": [ + "df_teams = df_teams[['TEAM_ID', 'NICKNAME']]\n", + "\n", + "# Reemplaza HOME_TEAM_ID por los nombres del dataframe teams\n", + "nombres_local = df_teams.copy()\n", + "nombres_local.columns = ['HOME_TEAM_ID', 'NICKNAME']\n", + "# Se unen el ID de lequipo por el nickname\n", + "result_1 = pd.merge(df_games['HOME_TEAM_ID'], nombres_local, how =\"left\", on=\"HOME_TEAM_ID\") \n", + "df_games['HOME_TEAM_ID'] = result_1['NICKNAME']\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Vemos como se ha cambiado el HOME_TEAM_ID por el nombre del equipo, haremos lo mismo con el visitante" + ] + }, + { + "cell_type": "code", + "execution_count": 237, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
GAME_DATE_ESTGAME_IDGAME_STATUS_TEXTHOME_TEAM_IDVISITOR_TEAM_IDSEASONTEAM_ID_homePTS_homeFG_PCT_homeFT_PCT_home...AST_homeREB_homeTEAM_ID_awayPTS_awayFG_PCT_awayFT_PCT_awayFG3_PCT_awayAST_awayREB_awayHOME_TEAM_WINS
02022-12-2222200477FinalPelicans161061275920221610612740126.00.4840.926...25.046.01610612759117.00.4780.8150.32123.044.01
12022-12-2222200478FinalJazz161061276420221610612762120.00.4880.952...16.040.01610612764112.00.5610.7650.33320.037.01
22022-12-2122200466FinalCavaliers161061274920221610612739114.00.4820.786...22.037.01610612749106.00.4700.6820.43320.046.01
32022-12-2122200467Final76ers161061276520221610612755113.00.4410.909...27.049.0161061276593.00.3920.7350.26115.046.01
42022-12-2122200468FinalHawks161061274120221610612737108.00.4291.000...22.047.01610612741110.00.5000.7730.29220.047.00
52022-12-2122200469FinalCeltics161061275420221610612738112.00.3860.840...26.062.01610612754117.00.4690.7780.46227.047.00
62022-12-2122200470FinalNets161061274420221610612751143.00.6430.875...42.032.01610612744113.00.4940.7600.36432.036.01
72022-12-2122200471FinalKnicks161061276120221610612752106.00.5530.611...25.038.01610612761113.00.4470.9090.26517.038.00
82022-12-2122200472FinalRockets161061275320221610612745110.00.4660.647...22.049.01610612753116.00.4510.6970.29719.045.00
92022-12-2122200473FinalTimberwolves16106127422022161061275099.00.4940.700...23.039.01610612742104.00.4530.8520.33317.039.00
102022-12-2122200474FinalThunder161061275720221610612760101.00.4680.840...19.037.0161061275798.00.4940.6670.38929.036.01
112022-12-2122200475FinalKings161061274720221610612758134.00.5050.750...29.046.01610612747120.00.5000.8330.45825.039.01
122022-12-2122200476FinalClippers161061276620221610612746126.00.5060.913...29.048.01610612766105.00.4020.7590.29025.040.01
132022-12-2022200461FinalPistons161061276220221610612765111.00.5060.741...22.043.01610612762126.00.5050.6320.43527.043.00
142022-12-2022200462FinalHeat161061274120221610612748103.00.4690.706...26.035.01610612741113.00.5480.8330.41924.039.00
152022-12-2022200463FinalKnicks161061274420221610612752132.00.5170.781...27.047.0161061274494.00.4730.9230.34323.029.01
162022-12-2022200464FinalSuns161061276420221610612756110.00.4610.789...26.044.01610612764113.00.4750.7030.40722.041.00
172022-12-2022200465FinalNuggets161061276320221610612743105.00.4490.600...28.048.0161061276391.00.4440.6670.19225.042.01
182022-12-1922200452FinalCavaliers161061276220221610612739122.00.6140.808...24.045.0161061276299.00.3870.7390.29419.035.01
192022-12-1922200453Final76ers161061276120221610612755104.00.4000.926...22.041.01610612761101.00.4200.8000.27524.050.01
\n", + "

20 rows × 21 columns

\n", + "
" + ], + "text/plain": [ + " GAME_DATE_EST GAME_ID GAME_STATUS_TEXT HOME_TEAM_ID VISITOR_TEAM_ID \\\n", + "0 2022-12-22 22200477 Final Pelicans 1610612759 \n", + "1 2022-12-22 22200478 Final Jazz 1610612764 \n", + "2 2022-12-21 22200466 Final Cavaliers 1610612749 \n", + "3 2022-12-21 22200467 Final 76ers 1610612765 \n", + "4 2022-12-21 22200468 Final Hawks 1610612741 \n", + "5 2022-12-21 22200469 Final Celtics 1610612754 \n", + "6 2022-12-21 22200470 Final Nets 1610612744 \n", + "7 2022-12-21 22200471 Final Knicks 1610612761 \n", + "8 2022-12-21 22200472 Final Rockets 1610612753 \n", + "9 2022-12-21 22200473 Final Timberwolves 1610612742 \n", + "10 2022-12-21 22200474 Final Thunder 1610612757 \n", + "11 2022-12-21 22200475 Final Kings 1610612747 \n", + "12 2022-12-21 22200476 Final Clippers 1610612766 \n", + "13 2022-12-20 22200461 Final Pistons 1610612762 \n", + "14 2022-12-20 22200462 Final Heat 1610612741 \n", + "15 2022-12-20 22200463 Final Knicks 1610612744 \n", + "16 2022-12-20 22200464 Final Suns 1610612764 \n", + "17 2022-12-20 22200465 Final Nuggets 1610612763 \n", + "18 2022-12-19 22200452 Final Cavaliers 1610612762 \n", + "19 2022-12-19 22200453 Final 76ers 1610612761 \n", + "\n", + " SEASON TEAM_ID_home PTS_home FG_PCT_home FT_PCT_home ... AST_home \\\n", + "0 2022 1610612740 126.0 0.484 0.926 ... 25.0 \n", + "1 2022 1610612762 120.0 0.488 0.952 ... 16.0 \n", + "2 2022 1610612739 114.0 0.482 0.786 ... 22.0 \n", + "3 2022 1610612755 113.0 0.441 0.909 ... 27.0 \n", + "4 2022 1610612737 108.0 0.429 1.000 ... 22.0 \n", + "5 2022 1610612738 112.0 0.386 0.840 ... 26.0 \n", + "6 2022 1610612751 143.0 0.643 0.875 ... 42.0 \n", + "7 2022 1610612752 106.0 0.553 0.611 ... 25.0 \n", + "8 2022 1610612745 110.0 0.466 0.647 ... 22.0 \n", + "9 2022 1610612750 99.0 0.494 0.700 ... 23.0 \n", + "10 2022 1610612760 101.0 0.468 0.840 ... 19.0 \n", + "11 2022 1610612758 134.0 0.505 0.750 ... 29.0 \n", + "12 2022 1610612746 126.0 0.506 0.913 ... 29.0 \n", + "13 2022 1610612765 111.0 0.506 0.741 ... 22.0 \n", + "14 2022 1610612748 103.0 0.469 0.706 ... 26.0 \n", + "15 2022 1610612752 132.0 0.517 0.781 ... 27.0 \n", + "16 2022 1610612756 110.0 0.461 0.789 ... 26.0 \n", + "17 2022 1610612743 105.0 0.449 0.600 ... 28.0 \n", + "18 2022 1610612739 122.0 0.614 0.808 ... 24.0 \n", + "19 2022 1610612755 104.0 0.400 0.926 ... 22.0 \n", + "\n", + " REB_home TEAM_ID_away PTS_away FG_PCT_away FT_PCT_away FG3_PCT_away \\\n", + "0 46.0 1610612759 117.0 0.478 0.815 0.321 \n", + "1 40.0 1610612764 112.0 0.561 0.765 0.333 \n", + "2 37.0 1610612749 106.0 0.470 0.682 0.433 \n", + "3 49.0 1610612765 93.0 0.392 0.735 0.261 \n", + "4 47.0 1610612741 110.0 0.500 0.773 0.292 \n", + "5 62.0 1610612754 117.0 0.469 0.778 0.462 \n", + "6 32.0 1610612744 113.0 0.494 0.760 0.364 \n", + "7 38.0 1610612761 113.0 0.447 0.909 0.265 \n", + "8 49.0 1610612753 116.0 0.451 0.697 0.297 \n", + "9 39.0 1610612742 104.0 0.453 0.852 0.333 \n", + "10 37.0 1610612757 98.0 0.494 0.667 0.389 \n", + "11 46.0 1610612747 120.0 0.500 0.833 0.458 \n", + "12 48.0 1610612766 105.0 0.402 0.759 0.290 \n", + "13 43.0 1610612762 126.0 0.505 0.632 0.435 \n", + "14 35.0 1610612741 113.0 0.548 0.833 0.419 \n", + "15 47.0 1610612744 94.0 0.473 0.923 0.343 \n", + "16 44.0 1610612764 113.0 0.475 0.703 0.407 \n", + "17 48.0 1610612763 91.0 0.444 0.667 0.192 \n", + "18 45.0 1610612762 99.0 0.387 0.739 0.294 \n", + "19 41.0 1610612761 101.0 0.420 0.800 0.275 \n", + "\n", + " AST_away REB_away HOME_TEAM_WINS \n", + "0 23.0 44.0 1 \n", + "1 20.0 37.0 1 \n", + "2 20.0 46.0 1 \n", + "3 15.0 46.0 1 \n", + "4 20.0 47.0 0 \n", + "5 27.0 47.0 0 \n", + "6 32.0 36.0 1 \n", + "7 17.0 38.0 0 \n", + "8 19.0 45.0 0 \n", + "9 17.0 39.0 0 \n", + "10 29.0 36.0 1 \n", + "11 25.0 39.0 1 \n", + "12 25.0 40.0 1 \n", + "13 27.0 43.0 0 \n", + "14 24.0 39.0 0 \n", + "15 23.0 29.0 1 \n", + "16 22.0 41.0 0 \n", + "17 25.0 42.0 1 \n", + "18 19.0 35.0 1 \n", + "19 24.0 50.0 1 \n", + "\n", + "[20 rows x 21 columns]" + ] + }, + "execution_count": 237, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_games.head(20)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Haremos lo mismo con los equipos visitantes" + ] + }, + { + "cell_type": "code", + "execution_count": 238, + "metadata": {}, + "outputs": [], + "source": [ + "# Reemplaza VISITOR_TEAM_ID por los nombres del dataframe teams\n", + "nombres_visitante = df_teams.copy()\n", + "nombres_visitante.columns = ['VISITOR_TEAM_ID', 'NICKNAME']\n", + "# Se unen el ID del equipo por el nickname\n", + "result_2 = pd.merge(df_games['VISITOR_TEAM_ID'], nombres_visitante, how =\"left\", on=\"VISITOR_TEAM_ID\") \n", + "df_games['VISITOR_TEAM_ID'] = result_2['NICKNAME']" + ] + }, + { + "cell_type": "code", + "execution_count": 239, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
GAME_DATE_ESTGAME_IDGAME_STATUS_TEXTHOME_TEAM_IDVISITOR_TEAM_IDSEASONTEAM_ID_homePTS_homeFG_PCT_homeFT_PCT_home...AST_homeREB_homeTEAM_ID_awayPTS_awayFG_PCT_awayFT_PCT_awayFG3_PCT_awayAST_awayREB_awayHOME_TEAM_WINS
02022-12-2222200477FinalPelicansSpurs20221610612740126.00.4840.926...25.046.01610612759117.00.4780.8150.32123.044.01
12022-12-2222200478FinalJazzWizards20221610612762120.00.4880.952...16.040.01610612764112.00.5610.7650.33320.037.01
22022-12-2122200466FinalCavaliersBucks20221610612739114.00.4820.786...22.037.01610612749106.00.4700.6820.43320.046.01
32022-12-2122200467Final76ersPistons20221610612755113.00.4410.909...27.049.0161061276593.00.3920.7350.26115.046.01
42022-12-2122200468FinalHawksBulls20221610612737108.00.4291.000...22.047.01610612741110.00.5000.7730.29220.047.00
\n", + "

5 rows × 21 columns

\n", + "
" + ], + "text/plain": [ + " GAME_DATE_EST GAME_ID GAME_STATUS_TEXT HOME_TEAM_ID VISITOR_TEAM_ID \\\n", + "0 2022-12-22 22200477 Final Pelicans Spurs \n", + "1 2022-12-22 22200478 Final Jazz Wizards \n", + "2 2022-12-21 22200466 Final Cavaliers Bucks \n", + "3 2022-12-21 22200467 Final 76ers Pistons \n", + "4 2022-12-21 22200468 Final Hawks Bulls \n", + "\n", + " SEASON TEAM_ID_home PTS_home FG_PCT_home FT_PCT_home ... AST_home \\\n", + "0 2022 1610612740 126.0 0.484 0.926 ... 25.0 \n", + "1 2022 1610612762 120.0 0.488 0.952 ... 16.0 \n", + "2 2022 1610612739 114.0 0.482 0.786 ... 22.0 \n", + "3 2022 1610612755 113.0 0.441 0.909 ... 27.0 \n", + "4 2022 1610612737 108.0 0.429 1.000 ... 22.0 \n", + "\n", + " REB_home TEAM_ID_away PTS_away FG_PCT_away FT_PCT_away FG3_PCT_away \\\n", + "0 46.0 1610612759 117.0 0.478 0.815 0.321 \n", + "1 40.0 1610612764 112.0 0.561 0.765 0.333 \n", + "2 37.0 1610612749 106.0 0.470 0.682 0.433 \n", + "3 49.0 1610612765 93.0 0.392 0.735 0.261 \n", + "4 47.0 1610612741 110.0 0.500 0.773 0.292 \n", + "\n", + " AST_away REB_away HOME_TEAM_WINS \n", + "0 23.0 44.0 1 \n", + "1 20.0 37.0 1 \n", + "2 20.0 46.0 1 \n", + "3 15.0 46.0 1 \n", + "4 20.0 47.0 0 \n", + "\n", + "[5 rows x 21 columns]" + ] + }, + "execution_count": 239, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_games.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Exploración de los valores únicos de las variables del dataframe de partidos" + ] + }, + { + "cell_type": "code", + "execution_count": 240, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['2022-12-22' '2022-12-21' '2022-12-20' ... '2014-10-06' '2014-10-05'\n", + " '2014-10-04']\n", + "[22200477 22200478 22200466 ... 11400005 11400002 11400001]\n", + "['Final']\n", + "['Pelicans' 'Jazz' 'Cavaliers' '76ers' 'Hawks' 'Celtics' 'Nets' 'Knicks'\n", + " 'Rockets' 'Timberwolves' 'Thunder' 'Kings' 'Clippers' 'Pistons' 'Heat'\n", + " 'Suns' 'Nuggets' 'Pacers' 'Raptors' 'Lakers' 'Spurs' 'Bucks' 'Hornets'\n", + " 'Bulls' 'Mavericks' 'Grizzlies' 'Magic' 'Wizards' 'Trail Blazers'\n", + " 'Warriors']\n", + "['Spurs' 'Wizards' 'Bucks' 'Pistons' 'Bulls' 'Pacers' 'Warriors' 'Raptors'\n", + " 'Magic' 'Mavericks' 'Trail Blazers' 'Lakers' 'Hornets' 'Jazz' 'Grizzlies'\n", + " 'Knicks' 'Nets' 'Heat' 'Pelicans' 'Hawks' 'Kings' 'Timberwolves'\n", + " 'Nuggets' 'Suns' 'Cavaliers' 'Celtics' 'Thunder' 'Clippers' 'Rockets'\n", + " '76ers']\n", + "[2022 2021 2020 2019 2013 2012 2011 2010 2009 2008 2007 2006 2005 2004\n", + " 2003 2018 2017 2016 2015 2014]\n", + "[1610612740 1610612762 1610612739 1610612755 1610612737 1610612738\n", + " 1610612751 1610612752 1610612745 1610612750 1610612760 1610612758\n", + " 1610612746 1610612765 1610612748 1610612756 1610612743 1610612754\n", + " 1610612761 1610612747 1610612759 1610612749 1610612766 1610612741\n", + " 1610612742 1610612763 1610612753 1610612764 1610612757 1610612744]\n", + "[126. 120. 114. 113. 108. 112. 143. 106. 110. 99. 101. 134. 111. 103.\n", + " 132. 105. 122. 104. 116. 119. 123. 130. 92. 121. 150. 102. 100. 95.\n", + " 115. 118. 109. 91. 142. 125. 135. 90. 141. 128. 82. 133. 129. 117.\n", + " 131. 97. 107. 144. 124. 98. 96. 139. 137. 140. 94. 138. 127. 153.\n", + " 88. 86. 80. 75. 87. 89. 81. 93. 77. 83. 136. 85. 84. 152.\n", + " 146. 78. 79. 59. 147. 154. 145. 73. 149. 68. 76. 151. 63. 69.\n", + " 67. 74. 72. 65. 71. 70. 62. 64. 66. 168. 36. 157. 60. 158.\n", + " 148. 161.]\n", + "[0.484 0.488 0.482 0.441 0.429 0.386 0.643 0.553 0.466 0.494 0.468 0.505\n", + " 0.506 0.469 0.517 0.461 0.449 0.614 0.4 0.511 0.426 0.472 0.424 0.455\n", + " 0.495 0.474 0.348 0.433 0.477 0.421 0.655 0.457 0.5 0.43 0.533 0.413\n", + " 0.47 0.398 0.459 0.417 0.556 0.527 0.549 0.378 0.412 0.548 0.464 0.392\n", + " 0.651 0.382 0.512 0.42 0.476 0.473 0.354 0.602 0.419 0.539 0.489 0.581\n", + " 0.462 0.554 0.48 0.44 0.481 0.435 0.471 0.635 0.547 0.518 0.592 0.542\n", + " 0.57 0.446 0.443 0.479 0.535 0.516 0.513 0.366 0.431 0.447 0.467 0.524\n", + " 0.404 0.437 0.405 0.407 0.465 0.355 0.391 0.56 0.528 0.529 0.452 0.557\n", + " 0.46 0.537 0.532 0.388 0.483 0.543 0.608 0.523 0.53 0.453 0.571 0.591\n", + " 0.448 0.568 0.526 0.55 0.456 0.558 0.432 0.425 0.438 0.463 0.38 0.515\n", + " 0.544 0.478 0.385 0.536 0.356 0.434 0.451 0.6 0.538 0.427 0.337 0.597\n", + " 0.551 0.387 0.578 0.416 0.304 0.439 0.519 0.596 0.409 0.458 0.415 0.534\n", + " 0.559 0.418 0.493 0.374 0.45 0.525 0.444 0.372 0.422 0.613 0.609 0.373\n", + " 0.361 0.51 0.411 0.394 0.486 0.384 0.414 0.522 0.65 0.436 0.579 0.475\n", + " 0.349 0.442 0.565 0.454 0.49 0.351 0.402 0.389 0.561 0.393 0.408 0.34\n", + " 0.344 0.312 0.383 0.546 0.369 0.319 0.397 0.379 0.624 0.645 0.396 0.333\n", + " 0.562 0.52 0.521 0.642 0.541 0.589 0.507 0.583 0.615 0.353 0.403 0.423\n", + " 0.563 0.607 0.593 0.595 0.61 0.584 0.36 0.485 0.487 0.367 0.375 0.569\n", + " 0.323 0.314 0.54 0.566 0.545 0.39 0.288 0.632 0.567 0.352 0.358 0.327\n", + " 0.573 0.531 0.514 0.377 0.41 0.338 0.627 0.406 0.37 0.347 0.619 0.381\n", + " 0.552 0.345 0.625 0.376 0.365 0.603 0.346 0.321 0.371 0.653 0.326 0.395\n", + " 0.309 0.274 0.364 0.325 0.383 0.438 0.566 0.443 0.393 0.571 0.58 0.464\n", + " 0.448 0.621 0.388 0.631 0.459 0.357 0.612 0.453 0.342 0.343 0.463 0.538\n", + " 0.404 0.403 0.454 0.468 0.553 0.558 0.458 0.398 0.554 0.445 0.543 0.564\n", + " 0.427 0.432 0.394 0.575 0.654 0.277 0.533 0.586 0.574 0.598 0.359 0.372\n", + " 0.437 0.559 0.549 0.378 0.548 0.298 0.368 0.333 0.617 0.576 0.363 0.295\n", + " 0.343 0.508 0.323 0.592 0.35 0.362 0.588 0.269 0.3 0.577 0.585 0.633\n", + " 0.329 0.59 0.605 0.671 0.582 0.367 0.628 0.492 0.338 0.318 0.634 0.293\n", + " 0.313 0.33 0.341 0.257 0.311 0.611 0.303 0.316 0.32 0.64 0.328 0.63\n", + " 0.31 0.301 0.289 0.587 0.641 0.567 0.62 0.291 0.315 0.658 0.644 0.606\n", + " 0.308 0.623 0.648 0.646 0.622 0.675 0.279 0.324 0.618 0.317 0.278 0.594\n", + " 0.604 0.636 0.662 0.652 0.306 0.296 0.284 0.297 0.339 0.283 0.616 0.29\n", + " 0.307 0.629 0.275 0.305 0.322 0.626 0.299 0.509 0.496 0.639 0.282 0.286\n", + " 0.651 0.684 0.302 0.294 0.667]\n", + "[0.926 0.952 0.786 0.909 1. 0.84 0.875 0.611 0.647 0.7 0.75 0.913\n", + " 0.741 0.706 0.781 0.789 0.6 0.808 0.9 0.645 0.833 0.708 0.865 0.548\n", + " 0.857 0.652 0.853 0.773 0.813 0.727 0.615 0.758 0.591 0.69 0.818 0.714\n", + " 0.684 0.806 0.824 0.862 0.81 0.846 0.783 0.811 0.793 0.852 0.864 0.739\n", + " 0.579 0.731 0.829 0.778 0.912 0.667 0.733 0.682 0.688 0.762 0.889 0.917\n", + " 0.895 0.692 0.8 0.893 0.85 0.905 0.531 0.417 0.867 0.903 0.882 0.842\n", + " 0.765 0.769 0.897 0.722 0.885 0.72 0.5 0.643 0.737 0.583 0.88 0.87\n", + " 0.923 0.65 0.63 0.4 0.774 0.759 0.947 0.538 0.76 0.767 0.815 0.826\n", + " 0.654 0.559 0.619 0.957 0.633 0.64 0.886 0.571 0.844 0.526 0.725 0.55\n", + " 0.792 0.556 0.721 0.828 0.613 0.78 0.938 0.879 0.524 0.632 0.871 0.931\n", + " 0.704 0.95 0.696 0.839 0.821 0.717 0.533 0.962 0.825 0.724 0.655 0.794\n", + " 0.816 0.625 0.742 0.581 0.577 0.955 0.636 0.861 0.719 0.609 0.676 0.735\n", + " 0.784 0.944 0.542 0.649 0.71 0.963 0.757 0.621 0.545 0.929 0.586 0.941\n", + " 0.775 0.939 0.744 0.622 0.914 0.795 0.677 0.674 0.593 0.755 0.958 0.529\n", + " 0.565 0.848 0.933 0.698 0.68 0.707 0.763 0.791 0.906 0.629 0.711 0.658\n", + " 0.628 0.788 0.385 0.679 0.607 0.697 0.656 0.588 0.935 0.514 0.705 0.686\n", + " 0.854 0.516 0.444 0.567 0.92 0.563 0.964 0.756 0.308 0.474 0.476 0.56\n", + " 0.694 0.595 0.605 0.739 0.697 0.606 0.844 0.793 0.714 0.838 0.594 0.786\n", + " 0.842 0.96 0.389 0.692 0.781 0.806 0.656 0.45 0.478 0.676 0.808 0.783\n", + " 0.824 0.864 0.571 0.438 0.719 0.723 0.738 0.333 0.576 0.724 0.536 0.704\n", + " 0.525 0.744 0.771 0.829 0.657 0.788 0.839 0.886 0.822 0.811 0.48 0.429\n", + " 0.766 0.333 0.614 0.577 0.854 0.644 0.634 0.743 0.73 0.538 0.827 0.97\n", + " 0.868 0.481 0.694 0.841 0.805 0.86 0.881 0.462 0.435 0.639 0.703 0.552\n", + " 0.533 0.756 0.675 0.658 0.618 0.686 0.448 0.745 0.685 0.804 0.872 0.553\n", + " 0.66 0.568 0.718 0.972 0.641 0.59 0.52 0.578 0.943 0.617 0.635 0.567\n", + " 0.522 0.559 0.732 0.566 0.791 0.809 0.659 0.375 0.902 0.467 0.548 0.966\n", + " 0.357 0.971 0.535 0.925 0.455 0.541 0.519 0.387 0.683 0.407 0.976 0.364\n", + " 0.433 0.949 0.884 0.471 0.787 0.608 0.543 0.391 0.878 0.561 0.892 0.946\n", + " 0.83 0.837 0.729 0.707 0.761 0.646 0.575 0.412 0.911 0.653 0.782 0.919\n", + " 0.907 0.843 0.61 0.904 0.814 0.968 0.74 0.927 0.273 0.564 0.651 0.421\n", + " 0.512 0.717 0.457 0.796 0.517 0.485 0.689 0.318 0.436 0.558 0.405 0.604\n", + " 0.42 0.382 0.648 0.528 0.363 0.361 0.596 0.537 0.836 0.381 0.921 0.754\n", + " 0.681 0.873 0.484 0.3 0.316 0.851 0.638 0.286 0.472 0.585 0.967 0.458\n", + " 0.863 0.776 0.143 0.483 0.969 0.409 0.313 0.486 0.587 0.898 0.915 0.167\n", + " 0.513 0.734]\n", + "[0.382 0.457 0.313 0.297 0.378 0.317 0.636 0.423 0.395 0.267 0.333 0.362\n", + " 0.469 0.36 0.341 0.425 0.406 0.37 0.577 0.357 0.375 0.208 0.387 0.4\n", + " 0.355 0.513 0.364 0.255 0.342 0.394 0.535 0.407 0.414 0.194 0.103 0.474\n", + " 0.422 0.366 0.349 0.345 0.239 0.452 0.419 0.273 0.383 0.41 0.444 0.286\n", + " 0.321 0.379 0.25 0.304 0.343 0.319 0.276 0.282 0.32 0.229 0.481 0.222\n", + " 0.477 0.471 0.559 0.308 0.231 0.439 0.385 0.433 0.45 0.281 0.296 0.295\n", + " 0.325 0.361 0.472 0.279 0.435 0.368 0.424 0.348 0.429 0.259 0.353 0.359\n", + " 0.488 0.346 0.371 0.172 0.256 0.302 0.323 0.31 0.344 0.381 0.367 0.459\n", + " 0.441 0.517 0.489 0.2 0.531 0.455 0.438 0.372 0.486 0.484 0.462 0.388\n", + " 0.188 0.278 0.3 0.5 0.257 0.432 0.269 0.241 0.182 0.261 0.39 0.316\n", + " 0.294 0.373 0.263 0.526 0.283 0.483 0.298 0.436 0.351 0.28 0.525 0.326\n", + " 0.464 0.238 0.214 0.516 0.35 0.219 0.515 0.218 0.389 0.27 0.207 0.511\n", + " 0.475 0.174 0.421 0.442 0.417 0.212 0.391 0.412 0.324 0.156 0.48 0.458\n", + " 0.268 0.447 0.242 0.277 0.235 0.29 0.528 0.289 0.275 0.404 0.46 0.314\n", + " 0.303 0.533 0.588 0.265 0.133 0.476 0.258 0.22 0.34 0.206 0.386 0.356\n", + " 0.293 0.288 0.192 0.393 0.262 0.405 0.392 0.216 0.467 0.536 0.318 0.179\n", + " 0.224 0.209 0.233 0.468 0.171 0.225 0.465 0.377 0.243 0.485 0.52 0.311\n", + " 0.226 0.545 0.397 0.556 0.514 0.542 0.548 0.523 0.415 0.292 0.44 0.408\n", + " 0.571 0.15 0.217 0.463 0.487 0.354 0.306 0.136 0.63 0.167 0.236 0.154\n", + " 0.147 0.244 0.583 0.111 0.347 0.237 0.575 0.529 0.409 0.479 0.095 0.184\n", + " 0.478 0.185 0.449 0.56 0.196 0.426 0.413 0.227 0.327 0.121 0.563 0.524\n", + " 0.17 0.24 0.205 0.271 0.42 0.158 0.541 0.161 0.512 0.684 0.593 0.394\n", + " 0.333 0.323 0.533 0.438 0.135 0.162 0.378 0.586 0.343 0.393 0.404 0.448\n", + " 0.229 0.211 0.543 0.224 0.194 0.464 0.458 0.383 0.204 0.519 0.396 0.148\n", + " 0.459 0.463 0.645 0.152 0.473 0.234 0.559 0.418 0.615 0.189 0.432 0.367\n", + " 0.16 0.468 0.522 0.089 0.579 0.548 0.509 0.532 0.167 0.269 0.552 0.643\n", + " 0.277 0.219 0.621 0.175 0.38 0.189 0.178 0.72 0.629 0.339 0.537 0.372\n", + " 0.6 0.448 0.227 0.143 0.13 0.591 0.565 0.118 0.571 0.279 0.538 0.26\n", + " 0.138 0.125 0.176 0.067 0.611 0.19 0.625 0.55 0.577 0.077 0.704 0.1\n", + " 0.688 0.105 0.667 0.692 0.091 0. 0.083 0.739 0.594 0.609 0.056 0.059\n", + " 0.632 0.714 0.647 0.063 0.607 0.75 0.65 0.818 0.64 0.053 0.071 0.652\n", + " 0.833 0.706 0.619 0.8 0.12 0.889 0.05 0.786 0.769 0.7 0.129 0.733\n", + " 0.722 0.682 0.727 0.045 0.875 1. 0.291 0.307 0.285 0.824 0.778 0.087\n", + " 0.618 0.048 0.142 0.09 0.21 0.272 0.23 0.076 0.193 0.352 0.315 0.153\n", + " 0.312 0.117 0.68 0.49 0.266 0.213 0.195 0.633 0.119 0.339 0.329 0.842\n", + " 0.576 0.606 0.521 0.159 0.388 0.094 0.568 0.115 0.107 0.59 0.567 0.585\n", + " 0.086 0.595 0.08 0.613]\n", + "[25. 16. 22. 27. 26. 42. 23. 19. 29. 28. 24. 30. 20. 21. 38. 33. 17. 31.\n", + " 34. 36. 32. 18. 15. 40. 14. 35. 39. 37. 41. 11. 12. 13. 10. 50. 43. 9.\n", + " 8. 7. 6. 44. 45. 47.]\n", + "[46. 40. 37. 49. 47. 62. 32. 38. 39. 48. 43. 35. 44. 45. 41. 51. 42. 54.\n", + " 55. 53. 34. 56. 52. 31. 33. 67. 59. 50. 30. 36. 57. 29. 58. 26. 60. 24.\n", + " 64. 66. 61. 70. 28. 63. 68. 65. 27. 25. 23. 17. 22. 21. 69. 18. 71. 15.\n", + " 72.]\n", + "[1610612759 1610612764 1610612749 1610612765 1610612741 1610612754\n", + " 1610612744 1610612761 1610612753 1610612742 1610612757 1610612747\n", + " 1610612766 1610612762 1610612763 1610612752 1610612751 1610612748\n", + " 1610612740 1610612737 1610612758 1610612750 1610612743 1610612756\n", + " 1610612739 1610612738 1610612760 1610612746 1610612745 1610612755]\n", + "[117. 112. 106. 93. 110. 113. 116. 104. 98. 120. 105. 126. 94. 91.\n", + " 99. 101. 125. 124. 128. 121. 95. 109. 115. 111. 107. 97. 114. 122.\n", + " 119. 108. 129. 141. 88. 103. 100. 87. 92. 136. 102. 118. 89. 123.\n", + " 133. 81. 130. 135. 96. 85. 140. 127. 137. 143. 132. 131. 83. 145.\n", + " 134. 82. 90. 86. 80. 79. 77. 139. 146. 78. 153. 150. 149. 142.\n", + " 138. 157. 158. 75. 84. 74. 152. 154. 147. 144. 76. 69. 71. 70.\n", + " 72. 66. 73. 62. 63. 68. 61. 58. 67. 65. 64. 56. 60. 57.\n", + " 59. 151. 54. 33. 161. 159. 168. 148.]\n", + "[0.478 0.561 0.47 0.392 0.5 0.469 0.494 0.447 0.451 0.453 0.402 0.505\n", + " 0.548 0.473 0.475 0.444 0.387 0.42 0.474 0.554 0.408 0.556 0.448 0.516\n", + " 0.384 0.55 0.531 0.523 0.427 0.463 0.429 0.512 0.409 0.471 0.378 0.584\n", + " 0.547 0.524 0.462 0.525 0.415 0.605 0.438 0.481 0.443 0.372 0.465 0.484\n", + " 0.495 0.43 0.526 0.527 0.419 0.413 0.4 0.32 0.398 0.442 0.386 0.356\n", + " 0.477 0.489 0.434 0.467 0.48 0.367 0.538 0.52 0.426 0.458 0.456 0.437\n", + " 0.459 0.542 0.441 0.532 0.424 0.53 0.449 0.411 0.534 0.376 0.425 0.488\n", + " 0.37 0.389 0.485 0.44 0.506 0.521 0.432 0.349 0.517 0.487 0.482 0.529\n", + " 0.435 0.535 0.416 0.543 0.522 0.433 0.446 0.563 0.461 0.41 0.575 0.383\n", + " 0.537 0.649 0.513 0.479 0.58 0.476 0.414 0.391 0.436 0.61 0.472 0.549\n", + " 0.573 0.511 0.39 0.528 0.388 0.357 0.422 0.345 0.375 0.545 0.457 0.518\n", + " 0.36 0.625 0.396 0.394 0.6 0.45 0.468 0.327 0.533 0.322 0.51 0.455\n", + " 0.407 0.393 0.38 0.493 0.515 0.536 0.577 0.418 0.464 0.404 0.568 0.567\n", + " 0.395 0.54 0.439 0.483 0.565 0.519 0.405 0.49 0.379 0.507 0.358 0.341\n", + " 0.371 0.423 0.403 0.466 0.365 0.397 0.353 0.347 0.323 0.355 0.412 0.333\n", + " 0.354 0.417 0.368 0.351 0.452 0.421 0.377 0.514 0.373 0.593 0.609 0.544\n", + " 0.551 0.564 0.326 0.558 0.602 0.578 0.406 0.598 0.56 0.277 0.454 0.348\n", + " 0.591 0.385 0.486 0.541 0.344 0.305 0.366 0.566 0.557 0.546 0.359 0.431\n", + " 0.581 0.574 0.352 0.374 0.46 0.586 0.539 0.337 0.509 0.343 0.57 0.553\n", + " 0.571 0.362 0.382 0.329 0.687 0.34 0.63 0.603 0.295 0.381 0.298 0.317\n", + " 0.582 0.579 0.369 0.585 0.364 0.394 0.553 0.464 0.398 0.533 0.383 0.438\n", + " 0.543 0.459 0.443 0.538 0.445 0.621 0.404 0.576 0.453 0.458 0.655 0.314\n", + " 0.333 0.64 0.388 0.549 0.378 0.567 0.548 0.454 0.448 0.616 0.468 0.329\n", + " 0.393 0.463 0.361 0.372 0.437 0.554 0.427 0.432 0.559 0.558 0.566 0.628\n", + " 0.363 0.59 0.571 0.615 0.552 0.582 0.367 0.343 0.315 0.569 0.607 0.613\n", + " 0.612 0.33 0.346 0.674 0.308 0.594 0.304 0.491 0.403 0.577 0.562 0.321\n", + " 0.597 0.35 0.623 0.27 0.282 0.636 0.288 0.325 0.606 0.323 0.294 0.318\n", + " 0.272 0.646 0.589 0.307 0.274 0.291 0.324 0.587 0.338 0.293 0.492 0.342\n", + " 0.638 0.303 0.301 0.629 0.588 0.595 0.278 0.313 0.246 0.256 0.31 0.296\n", + " 0.311 0.508 0.622 0.299 0.583 0.319 0.641 0.657 0.269 0.627 0.292 0.302\n", + " 0.289 0.592 0.658 0.316 0.632 0.284 0.67 0.271 0.286 0.29 0.259 0.306\n", + " 0.328 0.312 0.608 0.596 0.28 0.279 0.287 0.268 0.267 0.297 0.275 0.244\n", + " 0.309 0.273 0.624 0.614 0.667 0.634 0.618 0.653 0.645]\n", + "[0.815 0.765 0.682 0.735 0.773 0.778 0.76 0.909 0.697 0.852 0.667 0.833\n", + " 0.759 0.632 0.923 0.703 0.739 0.8 0.92 0.789 0.806 0.81 0.824 0.895\n", + " 0.767 0.913 0.846 0.75 0.792 0.696 0.737 0.5 0.784 0.769 0.828 0.857\n", + " 0.783 0.719 0.56 0.821 0.862 0.889 0.85 0.706 0.565 0.733 0.947 0.688\n", + " 0.952 0.941 0.526 0.692 0.657 0.868 0.867 0.826 0.774 0.938 0.731 0.818\n", + " 0.875 0.625 0.741 0.905 0.522 0.914 0.69 0.87 0.813 0.871 0.958 0.714\n", + " 0.611 0.9 0.724 0.727 0.842 0.786 0.808 0.743 0.538 0.722 0.882 0.962\n", + " 0.791 0.483 1. 0.676 0.6 0.684 0.762 0.591 0.88 0.788 0.848 0.742\n", + " 0.64 0.865 0.944 0.745 0.885 0.619 0.579 0.805 0.7 0.926 0.829 0.917\n", + " 0.548 0.864 0.3 0.781 0.381 0.929 0.839 0.654 0.647 0.68 0.964 0.793\n", + " 0.771 0.756 0.775 0.95 0.633 0.72 0.65 0.677 0.84 0.571 0.963 0.563\n", + " 0.933 0.96 0.704 0.708 0.636 0.594 0.643 0.645 0.618 0.711 0.844 0.545\n", + " 0.655 0.73 0.588 0.652 0.893 0.957 0.794 0.52 0.615 0.894 0.556 0.763\n", + " 0.861 0.795 0.586 0.755 0.853 0.583 0.955 0.931 0.757 0.529 0.886 0.474\n", + " 0.63 0.758 0.45 0.725 0.609 0.421 0.679 0.606 0.462 0.686 0.621 0.607\n", + " 0.524 0.55 0.4 0.536 0.897 0.656 0.71 0.375 0.577 0.935 0.639 0.417\n", + " 0.517 0.641 0.444 0.967 0.783 0.542 0.686 0.714 0.854 0.806 0.786 0.842\n", + " 0.455 0.837 0.744 0.816 0.793 0.864 0.824 0.571 0.808 0.844 0.781 0.829\n", + " 0.739 0.704 0.697 0.595 0.838 0.629 0.316 0.567 0.692 0.974 0.724 0.788\n", + " 0.839 0.533 0.707 0.903 0.906 0.78 0.719 0.718 0.593 0.538 0.912 0.651\n", + " 0.811 0.675 0.969 0.676 0.694 0.892 0.581 0.559 0.516 0.519 0.577 0.641\n", + " 0.429 0.756 0.649 0.656 0.613 0.471 0.879 0.435 0.771 0.814 0.385 0.622\n", + " 0.825 0.628 0.48 0.467 0.61 0.478 0.391 0.543 0.548 0.673 0.333 0.448\n", + " 0.412 0.886 0.674 0.653 0.35 0.804 0.605 0.576 0.644 0.44 0.617 0.452\n", + " 0.568 0.414 0.553 0.552 0.791 0.881 0.438 0.683 0.841 0.464 0.476 0.353\n", + " 0.968 0.721 0.143 0.736 0.872 0.86 0.368 0.946 0.83 0.732 0.469 0.919\n", + " 0.357 0.878 0.843 0.971 0.558 0.973 0.702 0.766 0.423 0.891 0.523 0.698\n", + " 0.761 0.776 0.458 0.689 0.827 0.717 0.658 0.884 0.531 0.634 0.972 0.738\n", + " 0.541 0.729 0.515 0.705 0.308 0.59 0.712 0.313 0.66 0.459 0.939 0.902\n", + " 0.787 0.659 0.402 0.418 0.454 0.614 0.453 0.442 0.461 0.723 0.575 0.691\n", + " 0.389 0.681 0.37 0.74 0.592 0.796 0.822 0.764 0.585 0.2 0.441 0.263\n", + " 0.921 0.93 0.176 0.943 0.485 0.635 0.587 0.907 0.537 0.393 0.62 0.484\n", + " 0.32 0.851 0.409]\n", + "[0.321 0.333 0.433 0.261 0.292 0.462 0.364 0.265 0.297 0.389 0.458 0.29\n", + " 0.435 0.419 0.343 0.407 0.192 0.294 0.275 0.353 0.5 0.422 0.325 0.432\n", + " 0.429 0.323 0.414 0.308 0.517 0.421 0.316 0.256 0.36 0.367 0.306 0.361\n", + " 0.34 0.267 0.467 0.387 0.273 0.395 0.346 0.386 0.48 0.351 0.26 0.4\n", + " 0.281 0.45 0.424 0.37 0.529 0.487 0.182 0.238 0.255 0.148 0.396 0.296\n", + " 0.324 0.217 0.231 0.342 0.423 0.24 0.259 0.279 0.345 0.25 0.3 0.366\n", + " 0.382 0.286 0.485 0.355 0.314 0.344 0.472 0.417 0.167 0.212 0.368 0.452\n", + " 0.357 0.356 0.463 0.35 0.385 0.258 0.514 0.229 0.303 0.393 0.375 0.107\n", + " 0.348 0.413 0.289 0.27 0.293 0.16 0.457 0.444 0.379 0.184 0.313 0.347\n", + " 0.426 0.214 0.415 0.282 0.227 0.515 0.406 0.22 0.244 0.237 0.263 0.447\n", + " 0.38 0.276 0.233 0.28 0.194 0.511 0.206 0.216 0.359 0.269 0.318 0.516\n", + " 0.371 0.455 0.235 0.257 0.31 0.2 0.548 0.481 0.226 0.383 0.262 0.436\n", + " 0.158 0.438 0.412 0.195 0.304 0.425 0.442 0.581 0.268 0.219 0.464 0.32\n", + " 0.138 0.295 0.483 0.448 0.405 0.326 0.404 0.593 0.409 0.441 0.243 0.311\n", + " 0.352 0.222 0.362 0.242 0.372 0.484 0.394 0.176 0.317 0.341 0.152 0.349\n", + " 0.512 0.121 0.378 0.41 0.469 0.288 0.207 0.1 0.154 0.476 0.567 0.278\n", + " 0.189 0.391 0.39 0.553 0.576 0.538 0.475 0.528 0.42 0.459 0.354 0.575\n", + " 0.381 0.474 0.471 0.535 0.486 0.241 0.531 0.143 0.188 0.271 0.365 0.55\n", + " 0.465 0.571 0.208 0.468 0.302 0.171 0.477 0.552 0.478 0.298 0.545 0.533\n", + " 0.439 0.556 0.563 0.283 0.46 0.44 0.225 0.489 0.179 0.543 0.542 0.172\n", + " 0.479 0.51 0.568 0.163 0.526 0.319 0.373 0.19 0.524 0.522 0.52 0.583\n", + " 0.315 0.077 0.205 0.15 0.185 0.224 0.091 0.269 0.378 0.449 0.333 0.394\n", + " 0.533 0.458 0.377 0.279 0.343 0.519 0.393 0.636 0.585 0.571 0.125 0.438\n", + " 0.464 0.161 0.559 0.323 0.087 0.448 0.327 0.538 0.468 0.404 0.083 0.56\n", + " 0.569 0.459 0.194 0.219 0.209 0.488 0.367 0.229 0.564 0.463 0.574 0.186\n", + " 0.594 0.432 0.277 0.541 0.588 0.211 0.227 0.339 0.372 0.513 0.586 0.383\n", + " 0.548 0.397 0.565 0.59 0.105 0.358 0.431 0.667 0.6 0.156 0.408 0.128\n", + " 0.607 0.213 0.189 0.625 0.536 0.094 0.167 0.053 0.071 0.577 0.05 0.579\n", + " 0.619 0.133 0.136 0.174 0.095 0.118 0.611 0.643 0.765 0.615 0.111 0.632\n", + " 0.63 0. 0.115 0.591 0.067 0.706 0.688 0.103 0.692 0.063 0.778 0.13\n", + " 0.652 0.059 0.613 0.696 0.7 0.75 0.682 0.567 0.609 0.65 0.714 0.647\n", + " 0.737 0.633 0.622 0.135 0.621 1. 0.12 0.727 0.181 0.153 0.388 0.266\n", + " 0.076 0.64 0.8 0.833 0.818 0.857 0.733 0.285 0.142 0.157 0.166 0.187\n", + " 0.291 0.392 0.173 0.307 0.23 0.363 0.543 0.68 0.147 0.175 0.309 0.525\n", + " 0.129 0.537 0.783 0.196 0.576 0.191 0.08 0.731 0.04 ]\n", + "[23. 20. 15. 27. 32. 17. 19. 29. 25. 24. 22. 30. 26. 13. 31. 34. 21. 40.\n", + " 18. 28. 16. 38. 36. 14. 12. 33. 35. 37. 39. 9. 41. 11. 8. 10. 7. 5.\n", + " 6. 4. 42. 43. 46.]\n", + "[44. 37. 46. 47. 36. 38. 45. 39. 40. 43. 29. 41. 42. 35. 50. 56. 49. 30.\n", + " 48. 53. 33. 52. 24. 51. 28. 59. 31. 32. 34. 55. 23. 54. 58. 61. 57. 60.\n", + " 62. 25. 27. 64. 66. 26. 68. 65. 63. 22. 21. 19. 71. 67. 81. 20.]\n", + "[1 0]\n" + ] + } + ], + "source": [ + "for column in df_games.columns:\n", + " print(df_games[column].unique())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Al menos GAME_STATUS_TEXT nos sobra ya que solo tiene un valor unico y no tiene relevancia" + ] + }, + { + "cell_type": "code", + "execution_count": 241, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
GAME_DATE_ESTGAME_IDHOME_TEAM_IDVISITOR_TEAM_IDSEASONTEAM_ID_homePTS_homeFG_PCT_homeFT_PCT_homeFG3_PCT_homeAST_homeREB_homeTEAM_ID_awayPTS_awayFG_PCT_awayFT_PCT_awayFG3_PCT_awayAST_awayREB_awayHOME_TEAM_WINS
02022-12-2222200477PelicansSpurs20221610612740126.00.4840.9260.38225.046.01610612759117.00.4780.8150.32123.044.01
12022-12-2222200478JazzWizards20221610612762120.00.4880.9520.45716.040.01610612764112.00.5610.7650.33320.037.01
22022-12-2122200466CavaliersBucks20221610612739114.00.4820.7860.31322.037.01610612749106.00.4700.6820.43320.046.01
32022-12-212220046776ersPistons20221610612755113.00.4410.9090.29727.049.0161061276593.00.3920.7350.26115.046.01
42022-12-2122200468HawksBulls20221610612737108.00.4291.0000.37822.047.01610612741110.00.5000.7730.29220.047.00
\n", + "
" + ], + "text/plain": [ + " GAME_DATE_EST GAME_ID HOME_TEAM_ID VISITOR_TEAM_ID SEASON TEAM_ID_home \\\n", + "0 2022-12-22 22200477 Pelicans Spurs 2022 1610612740 \n", + "1 2022-12-22 22200478 Jazz Wizards 2022 1610612762 \n", + "2 2022-12-21 22200466 Cavaliers Bucks 2022 1610612739 \n", + "3 2022-12-21 22200467 76ers Pistons 2022 1610612755 \n", + "4 2022-12-21 22200468 Hawks Bulls 2022 1610612737 \n", + "\n", + " PTS_home FG_PCT_home FT_PCT_home FG3_PCT_home AST_home REB_home \\\n", + "0 126.0 0.484 0.926 0.382 25.0 46.0 \n", + "1 120.0 0.488 0.952 0.457 16.0 40.0 \n", + "2 114.0 0.482 0.786 0.313 22.0 37.0 \n", + "3 113.0 0.441 0.909 0.297 27.0 49.0 \n", + "4 108.0 0.429 1.000 0.378 22.0 47.0 \n", + "\n", + " TEAM_ID_away PTS_away FG_PCT_away FT_PCT_away FG3_PCT_away AST_away \\\n", + "0 1610612759 117.0 0.478 0.815 0.321 23.0 \n", + "1 1610612764 112.0 0.561 0.765 0.333 20.0 \n", + "2 1610612749 106.0 0.470 0.682 0.433 20.0 \n", + "3 1610612765 93.0 0.392 0.735 0.261 15.0 \n", + "4 1610612741 110.0 0.500 0.773 0.292 20.0 \n", + "\n", + " REB_away HOME_TEAM_WINS \n", + "0 44.0 1 \n", + "1 37.0 1 \n", + "2 46.0 1 \n", + "3 46.0 1 \n", + "4 47.0 0 " + ] + }, + "execution_count": 241, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_games = df_games.drop(columns=['GAME_STATUS_TEXT'])\n", + "df_games.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Selecciono solo la temporada 2022" + ] + }, + { + "cell_type": "code", + "execution_count": 242, + "metadata": {}, + "outputs": [], + "source": [ + "df_games_2022 = df_games.loc[df_games['SEASON'] == 2022]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Vemos cantidad de filas y columnas del nuevo dataframe seleccionando 2022" + ] + }, + { + "cell_type": "code", + "execution_count": 243, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(542, 20)" + ] + }, + "execution_count": 243, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_games_2022.shape" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Guardamos las columnas en una variable llamada variables" + ] + }, + { + "cell_type": "code", + "execution_count": 244, + "metadata": { + "scrolled": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['GAME_DATE_EST', 'GAME_ID', 'HOME_TEAM_ID', 'VISITOR_TEAM_ID', 'SEASON', 'TEAM_ID_home', 'PTS_home', 'FG_PCT_home', 'FT_PCT_home', 'FG3_PCT_home', 'AST_home', 'REB_home', 'TEAM_ID_away', 'PTS_away', 'FG_PCT_away', 'FT_PCT_away', 'FG3_PCT_away', 'AST_away', 'REB_away', 'HOME_TEAM_WINS']\n" + ] + } + ], + "source": [ + "variables = list(df_games_2022.columns)\n", + "print(variables)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Guardamos las variables numericas en una lista y luego creamos otra lista con las mismas variables pero añadiendo norm para saber que están normalizadas" + ] + }, + { + "cell_type": "code", + "execution_count": 245, + "metadata": {}, + "outputs": [], + "source": [ + "variables_elegidas = ['PTS_home', 'FG_PCT_home', 'FT_PCT_home', 'FG3_PCT_home', 'AST_home', 'REB_home', 'PTS_away', 'FG_PCT_away', 'FT_PCT_away', 'FG3_PCT_away', 'AST_away', 'REB_away']\n", + "variables_elegidas_norm = ['PTS_home_norm', 'FG_PCT_home_norm', 'FT_PCT_home_norm', 'FG3_PCT_home_norm', 'AST_home_norm', 'REB_home_norm', 'PTS_away_norm', 'FG_PCT_away_norm', 'FT_PCT_away_norm', 'FG3_PCT_away_norm', 'AST_away_norm', 'REB_away_norm']\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Añadir al dataframe \"X\" las variables que hemos elegido para entrenar y al \"y\" el target, esto dirá si gana el equipo local 0 y si gana el visitante 1" + ] + }, + { + "cell_type": "code", + "execution_count": 246, + "metadata": {}, + "outputs": [], + "source": [ + "X = df_games_2022[variables_elegidas]\n", + "y = df_games_2022[\"HOME_TEAM_WINS\"]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Muestra el conjunto de datos de la temporada 2022 y el target" + ] + }, + { + "cell_type": "code", + "execution_count": 247, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
PTS_homeFG_PCT_homeFT_PCT_homeFG3_PCT_homeAST_homeREB_homePTS_awayFG_PCT_awayFT_PCT_awayFG3_PCT_awayAST_awayREB_away
0126.00.4840.9260.38225.046.0117.00.4780.8150.32123.044.0
1120.00.4880.9520.45716.040.0112.00.5610.7650.33320.037.0
2114.00.4820.7860.31322.037.0106.00.4700.6820.43320.046.0
3113.00.4410.9090.29727.049.093.00.3920.7350.26115.046.0
4108.00.4291.0000.37822.047.0110.00.5000.7730.29220.047.0
\n", + "
" + ], + "text/plain": [ + " PTS_home FG_PCT_home FT_PCT_home FG3_PCT_home AST_home REB_home \\\n", + "0 126.0 0.484 0.926 0.382 25.0 46.0 \n", + "1 120.0 0.488 0.952 0.457 16.0 40.0 \n", + "2 114.0 0.482 0.786 0.313 22.0 37.0 \n", + "3 113.0 0.441 0.909 0.297 27.0 49.0 \n", + "4 108.0 0.429 1.000 0.378 22.0 47.0 \n", + "\n", + " PTS_away FG_PCT_away FT_PCT_away FG3_PCT_away AST_away REB_away \n", + "0 117.0 0.478 0.815 0.321 23.0 44.0 \n", + "1 112.0 0.561 0.765 0.333 20.0 37.0 \n", + "2 106.0 0.470 0.682 0.433 20.0 46.0 \n", + "3 93.0 0.392 0.735 0.261 15.0 46.0 \n", + "4 110.0 0.500 0.773 0.292 20.0 47.0 " + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "0 1\n", + "1 1\n", + "2 1\n", + "3 1\n", + "4 0\n", + "Name: HOME_TEAM_WINS, dtype: int64" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "display(X.head())\n", + "display(y.head())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Hacemos el split de train y de test, 0.7 y 0.3 respectivamente" + ] + }, + { + "cell_type": "code", + "execution_count": 248, + "metadata": {}, + "outputs": [], + "source": [ + "X_train, X_test, y_train, y_test = train_test_split(\n", + " X, \n", + " y, \n", + " test_size=0.3, random_state=42)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Normalizamos las variables elegidas en train" + ] + }, + { + "cell_type": "code", + "execution_count": 249, + "metadata": {}, + "outputs": [], + "source": [ + "scaler = prep.MinMaxScaler()\n", + "X = scaler.fit_transform(X_train[variables_elegidas])\n", + "\n", + "# Creamos columnas con datos normalizados\n", + "X_train[variables_elegidas_norm] = X\n", + "# Borramos las variables no normalizadas\n", + "X_train = X_train.select_dtypes(include = 'number').drop(variables_elegidas, axis = 1)" + ] + }, + { + "cell_type": "code", + "execution_count": 250, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
PTS_home_normFG_PCT_home_normFT_PCT_home_normFG3_PCT_home_normAST_home_normREB_home_normPTS_away_normFG_PCT_away_normFT_PCT_away_normFG3_PCT_away_normAST_away_normREB_away_norm
4400.2054790.3901730.4000.3670100.080.4418600.4761900.4316110.8814290.4574710.6296300.684211
2470.1506850.0953760.6880.2288660.040.5813950.2698410.2492400.6828570.3103450.4814810.578947
4340.3287670.4104050.6000.1628870.480.3720930.3809520.3647420.7357140.4873560.2592590.763158
570.7534250.5809250.6660.7030930.560.5116280.6666670.5471120.8414290.4252870.6666670.421053
2340.6438360.8612720.4660.7587630.760.4883720.5238100.3465050.3542860.3816090.5925930.552632
\n", + "
" + ], + "text/plain": [ + " PTS_home_norm FG_PCT_home_norm FT_PCT_home_norm FG3_PCT_home_norm \\\n", + "440 0.205479 0.390173 0.400 0.367010 \n", + "247 0.150685 0.095376 0.688 0.228866 \n", + "434 0.328767 0.410405 0.600 0.162887 \n", + "57 0.753425 0.580925 0.666 0.703093 \n", + "234 0.643836 0.861272 0.466 0.758763 \n", + "\n", + " AST_home_norm REB_home_norm PTS_away_norm FG_PCT_away_norm \\\n", + "440 0.08 0.441860 0.476190 0.431611 \n", + "247 0.04 0.581395 0.269841 0.249240 \n", + "434 0.48 0.372093 0.380952 0.364742 \n", + "57 0.56 0.511628 0.666667 0.547112 \n", + "234 0.76 0.488372 0.523810 0.346505 \n", + "\n", + " FT_PCT_away_norm FG3_PCT_away_norm AST_away_norm REB_away_norm \n", + "440 0.881429 0.457471 0.629630 0.684211 \n", + "247 0.682857 0.310345 0.481481 0.578947 \n", + "434 0.735714 0.487356 0.259259 0.763158 \n", + "57 0.841429 0.425287 0.666667 0.421053 \n", + "234 0.354286 0.381609 0.592593 0.552632 " + ] + }, + "execution_count": 250, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "X_train.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Normalizamos test" + ] + }, + { + "cell_type": "code", + "execution_count": 251, + "metadata": {}, + "outputs": [], + "source": [ + "X = scaler.transform(X_test[variables_elegidas])\n", + "\n", + "# Creamos columnas con datos normalizados\n", + "X_test[variables_elegidas_norm] = X\n", + "# Borramos las variables no normalizadas\n", + "X_test = X_test.select_dtypes(include = 'number').drop(variables_elegidas, axis = 1)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Vemos cuales son las mejores 6 variables correlacionadas con y, también la correlación de todas" + ] + }, + { + "cell_type": "code", + "execution_count": 252, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['PTS_home_norm' 'FG_PCT_home_norm' 'AST_home_norm' 'PTS_away_norm'\n", + " 'FG_PCT_away_norm' 'FG3_PCT_away_norm']\n", + "Variable PTS_home_norm: 4.3868\n", + "Variable FG_PCT_home_norm: 3.7384\n", + "Variable FT_PCT_home_norm: 0.3319\n", + "Variable FG3_PCT_home_norm: 3.0046\n", + "Variable AST_home_norm: 3.0950\n", + "Variable REB_home_norm: 0.5436\n", + "Variable PTS_away_norm: 5.7462\n", + "Variable FG_PCT_away_norm: 3.8130\n", + "Variable FT_PCT_away_norm: 0.1706\n", + "Variable FG3_PCT_away_norm: 4.5008\n", + "Variable AST_away_norm: 2.6714\n", + "Variable REB_away_norm: 1.6384\n" + ] + } + ], + "source": [ + "selector = SelectKBest(chi2, k=6)\n", + "\n", + "X_select = selector.fit_transform(X_train, y_train)\n", + "print(selector.get_feature_names_out())\n", + "for var, value in zip(selector.feature_names_in_, selector.scores_):\n", + " print('Variable %s: %.4f' % (var, value))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "APLICAMOS EL ENTRENAMIENTO DE MODELOS" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Empezamos usando regresión logistíca" + ] + }, + { + "cell_type": "code", + "execution_count": 253, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Accuracy en train: 0.9287598944591029\n", + "Accuracy en test: 0.9141104294478528\n" + ] + } + ], + "source": [ + "# Creamos el objeto del modelo con parámetros por defecto, fijando la semilla para evitar aleatoriedad\n", + "logreg = LogisticRegression(random_state=42)\n", + "\n", + "# Entrenamos con el conjunto de train y su target\n", + "logreg.fit(X_train, y_train)\n", + "print('Accuracy en train: ', logreg.score(X_train, y_train))\n", + "print('Accuracy en test: ', logreg.score(X_test, y_test))" + ] } ], "metadata": {