From 75975be8a198d37f6a2cec2e1d154fdd2db9bb88 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Diego=20Crist=C3=B3bal=20Herreros?= Date: Thu, 19 Jan 2023 22:55:56 +0100 Subject: [PATCH] Feat: analyze null values --- entrega.ipynb | 1665 ++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 1635 insertions(+), 30 deletions(-) diff --git a/entrega.ipynb b/entrega.ipynb index f25edac..ec4468c 100644 --- a/entrega.ipynb +++ b/entrega.ipynb @@ -26,7 +26,7 @@ }, { "cell_type": "code", - "execution_count": 74, + "execution_count": 6, "metadata": {}, "outputs": [], "source": [ @@ -48,9 +48,18 @@ }, { "cell_type": "code", - "execution_count": 78, + "execution_count": 7, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/tmp/ipykernel_13296/1756342927.py:3: DtypeWarning: Columns (6) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " df_games_details = pd.read_csv(zf.open('games_details.csv'))\n" + ] + } + ], "source": [ "zf = zp.ZipFile('dataframes/nba.zip')\n", "df_games = pd.read_csv(zf.open('games.csv'))\n", @@ -69,7 +78,7 @@ }, { "cell_type": "code", - "execution_count": 79, + "execution_count": 8, "metadata": {}, "outputs": [ { @@ -111,7 +120,7 @@ " PTS_home\n", " FG_PCT_home\n", " FT_PCT_home\n", - " FG3_PCT_home\n", + " ...\n", " AST_home\n", " REB_home\n", " TEAM_ID_away\n", @@ -137,7 +146,7 @@ " 126.0\n", " 0.484\n", " 0.926\n", - " 0.382\n", + " ...\n", " 25.0\n", " 46.0\n", " 1610612759\n", @@ -161,7 +170,7 @@ " 120.0\n", " 0.488\n", " 0.952\n", - " 0.457\n", + " ...\n", " 16.0\n", " 40.0\n", " 1610612764\n", @@ -185,7 +194,7 @@ " 114.0\n", " 0.482\n", " 0.786\n", - " 0.313\n", + " ...\n", " 22.0\n", " 37.0\n", " 1610612749\n", @@ -209,7 +218,7 @@ " 113.0\n", " 0.441\n", " 0.909\n", - " 0.297\n", + " ...\n", " 27.0\n", " 49.0\n", " 1610612765\n", @@ -233,7 +242,7 @@ " 108.0\n", " 0.429\n", " 1.000\n", - " 0.378\n", + " ...\n", " 22.0\n", " 47.0\n", " 1610612741\n", @@ -247,6 +256,7 @@ " \n", " \n", "\n", + "

5 rows × 21 columns

\n", "" ], "text/plain": [ @@ -257,26 +267,28 @@ "3 2022-12-21 22200467 Final 1610612755 1610612765 \n", "4 2022-12-21 22200468 Final 1610612737 1610612741 \n", "\n", - " SEASON TEAM_ID_home PTS_home FG_PCT_home FT_PCT_home FG3_PCT_home \\\n", - "0 2022 1610612740 126.0 0.484 0.926 0.382 \n", - "1 2022 1610612762 120.0 0.488 0.952 0.457 \n", - "2 2022 1610612739 114.0 0.482 0.786 0.313 \n", - "3 2022 1610612755 113.0 0.441 0.909 0.297 \n", - "4 2022 1610612737 108.0 0.429 1.000 0.378 \n", + " SEASON TEAM_ID_home PTS_home FG_PCT_home FT_PCT_home ... AST_home \\\n", + "0 2022 1610612740 126.0 0.484 0.926 ... 25.0 \n", + "1 2022 1610612762 120.0 0.488 0.952 ... 16.0 \n", + "2 2022 1610612739 114.0 0.482 0.786 ... 22.0 \n", + "3 2022 1610612755 113.0 0.441 0.909 ... 27.0 \n", + "4 2022 1610612737 108.0 0.429 1.000 ... 22.0 \n", "\n", - " AST_home REB_home TEAM_ID_away PTS_away FG_PCT_away FT_PCT_away \\\n", - "0 25.0 46.0 1610612759 117.0 0.478 0.815 \n", - "1 16.0 40.0 1610612764 112.0 0.561 0.765 \n", - "2 22.0 37.0 1610612749 106.0 0.470 0.682 \n", - "3 27.0 49.0 1610612765 93.0 0.392 0.735 \n", - "4 22.0 47.0 1610612741 110.0 0.500 0.773 \n", + " REB_home TEAM_ID_away PTS_away FG_PCT_away FT_PCT_away FG3_PCT_away \\\n", + "0 46.0 1610612759 117.0 0.478 0.815 0.321 \n", + "1 40.0 1610612764 112.0 0.561 0.765 0.333 \n", + "2 37.0 1610612749 106.0 0.470 0.682 0.433 \n", + "3 49.0 1610612765 93.0 0.392 0.735 0.261 \n", + "4 47.0 1610612741 110.0 0.500 0.773 0.292 \n", "\n", - " FG3_PCT_away AST_away REB_away HOME_TEAM_WINS \n", - "0 0.321 23.0 44.0 1 \n", - "1 0.333 20.0 37.0 1 \n", - "2 0.433 20.0 46.0 1 \n", - "3 0.261 15.0 46.0 1 \n", - "4 0.292 20.0 47.0 0 " + " AST_away REB_away HOME_TEAM_WINS \n", + "0 23.0 44.0 1 \n", + "1 20.0 37.0 1 \n", + "2 20.0 46.0 1 \n", + "3 15.0 46.0 1 \n", + "4 20.0 47.0 0 \n", + "\n", + "[5 rows x 21 columns]" ] }, "metadata": {}, @@ -912,7 +924,7 @@ }, { "cell_type": "code", - "execution_count": 77, + "execution_count": 9, "metadata": {}, "outputs": [ { @@ -1018,6 +1030,1599 @@ "print(\"----------------------------\")\n", "display(df_teams.shape)" ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Vemos la información" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "--------- PARTIDOS ----------\n", + "-----------------------------\n" + ] + }, + { + "data": { + "text/plain": [ + "GAME_DATE_EST object\n", + "GAME_ID int64\n", + "GAME_STATUS_TEXT object\n", + "HOME_TEAM_ID int64\n", + "VISITOR_TEAM_ID int64\n", + "SEASON int64\n", + "TEAM_ID_home int64\n", + "PTS_home float64\n", + "FG_PCT_home float64\n", + "FT_PCT_home float64\n", + "FG3_PCT_home float64\n", + "AST_home float64\n", + "REB_home float64\n", + "TEAM_ID_away int64\n", + "PTS_away float64\n", + "FG_PCT_away float64\n", + "FT_PCT_away float64\n", + "FG3_PCT_away float64\n", + "AST_away float64\n", + "REB_away float64\n", + "HOME_TEAM_WINS int64\n", + "dtype: object" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "--------- DETALLES PARTIDOS ----------\n", + "--------------------------------------\n" + ] + }, + { + "data": { + "text/plain": [ + "GAME_ID int64\n", + "TEAM_ID int64\n", + "TEAM_ABBREVIATION object\n", + "TEAM_CITY object\n", + "PLAYER_ID int64\n", + "PLAYER_NAME object\n", + "NICKNAME object\n", + "START_POSITION object\n", + "COMMENT object\n", + "MIN object\n", + "FGM float64\n", + "FGA float64\n", + "FG_PCT float64\n", + "FG3M float64\n", + "FG3A float64\n", + "FG3_PCT float64\n", + "FTM float64\n", + "FTA float64\n", + "FT_PCT float64\n", + "OREB float64\n", + "DREB float64\n", + "REB float64\n", + "AST float64\n", + "STL float64\n", + "BLK float64\n", + "TO float64\n", + "PF float64\n", + "PTS float64\n", + "PLUS_MINUS float64\n", + "dtype: object" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "--------- JUGADORES ----------\n", + "------------------------------\n" + ] + }, + { + "data": { + "text/plain": [ + "PLAYER_NAME object\n", + "TEAM_ID int64\n", + "PLAYER_ID int64\n", + "SEASON int64\n", + "dtype: object" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "--------- RANKING LIGA ----------\n", + "---------------------------------\n" + ] + }, + { + "data": { + "text/plain": [ + "TEAM_ID int64\n", + "LEAGUE_ID int64\n", + "SEASON_ID int64\n", + "STANDINGSDATE object\n", + "CONFERENCE object\n", + "TEAM object\n", + "G int64\n", + "W int64\n", + "L int64\n", + "W_PCT float64\n", + "HOME_RECORD object\n", + "ROAD_RECORD object\n", + "RETURNTOPLAY float64\n", + "dtype: object" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "--------- EQUIPOS ----------\n", + "----------------------------\n" + ] + }, + { + "data": { + "text/plain": [ + "LEAGUE_ID int64\n", + "TEAM_ID int64\n", + "MIN_YEAR int64\n", + "MAX_YEAR int64\n", + "ABBREVIATION object\n", + "NICKNAME object\n", + "YEARFOUNDED int64\n", + "CITY object\n", + "ARENA object\n", + "ARENACAPACITY float64\n", + "OWNER object\n", + "GENERALMANAGER object\n", + "HEADCOACH object\n", + "DLEAGUEAFFILIATION object\n", + "dtype: object" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "print(\"--------- PARTIDOS ----------\")\n", + "print(\"-----------------------------\")\n", + "display(df_games.dtypes)\n", + "print(\"--------- DETALLES PARTIDOS ----------\")\n", + "print(\"--------------------------------------\")\n", + "display(df_games_details.dtypes)\n", + "print(\"--------- JUGADORES ----------\")\n", + "print(\"------------------------------\")\n", + "display(df_players.dtypes)\n", + "print(\"--------- RANKING LIGA ----------\")\n", + "print(\"---------------------------------\")\n", + "display(df_ranking.dtypes)\n", + "print(\"--------- EQUIPOS ----------\")\n", + "print(\"----------------------------\")\n", + "display(df_teams.dtypes)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Ver si hay nulos en los distintos dataframes" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "--------- PARTIDOS ----------\n", + "-----------------------------\n" + ] + }, + { + "data": { + "text/plain": [ + "GAME_DATE_EST 0\n", + "GAME_ID 0\n", + "GAME_STATUS_TEXT 0\n", + "HOME_TEAM_ID 0\n", + "VISITOR_TEAM_ID 0\n", + "SEASON 0\n", + "TEAM_ID_home 0\n", + "PTS_home 99\n", + "FG_PCT_home 99\n", + "FT_PCT_home 99\n", + "FG3_PCT_home 99\n", + "AST_home 99\n", + "REB_home 99\n", + "TEAM_ID_away 0\n", + "PTS_away 99\n", + "FG_PCT_away 99\n", + "FT_PCT_away 99\n", + "FG3_PCT_away 99\n", + "AST_away 99\n", + "REB_away 99\n", + "HOME_TEAM_WINS 0\n", + "dtype: int64" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
GAME_DATE_ESTGAME_IDGAME_STATUS_TEXTHOME_TEAM_IDVISITOR_TEAM_IDSEASONTEAM_ID_homePTS_homeFG_PCT_homeFT_PCT_home...AST_homeREB_homeTEAM_ID_awayPTS_awayFG_PCT_awayFT_PCT_awayFG3_PCT_awayAST_awayREB_awayHOME_TEAM_WINS
191752003-10-2410300116Final1610612753161061276220031610612753NaNNaNNaN...NaNNaN1610612762NaNNaNNaNNaNNaNNaN0
191762003-10-2410300108Final1610612737161061276420031610612737NaNNaNNaN...NaNNaN1610612764NaNNaNNaNNaNNaNNaN0
191772003-10-2410300109Final1610612738161061275120031610612738NaNNaNNaN...NaNNaN1610612751NaNNaNNaNNaNNaNNaN0
191782003-10-2410300113Final1610612759161061274520031610612759NaNNaNNaN...NaNNaN1610612745NaNNaNNaNNaNNaNNaN0
191792003-10-2410300112Final1610612749161061276520031610612749NaNNaNNaN...NaNNaN1610612765NaNNaNNaNNaNNaNNaN0
..................................................................
192692003-10-0910300019Final1610612743161061275620031610612743NaNNaNNaN...NaNNaN1610612756NaNNaNNaNNaNNaNNaN0
192702003-10-0910300022Final1610612757161061275820031610612757NaNNaNNaN...NaNNaN1610612758NaNNaNNaNNaNNaNNaN0
192712003-10-0810300013Final1610612759161061276320031610612759NaNNaNNaN...NaNNaN1610612763NaNNaNNaNNaNNaNNaN0
192782003-10-0810300015Final1610612747161061274420031610612747NaNNaNNaN...NaNNaN1610612744NaNNaNNaNNaNNaNNaN0
192792003-10-0710300006Final1610612747161061274420031610612747NaNNaNNaN...NaNNaN1610612744NaNNaNNaNNaNNaNNaN0
\n", + "

99 rows × 21 columns

\n", + "
" + ], + "text/plain": [ + " GAME_DATE_EST GAME_ID GAME_STATUS_TEXT HOME_TEAM_ID VISITOR_TEAM_ID \\\n", + "19175 2003-10-24 10300116 Final 1610612753 1610612762 \n", + "19176 2003-10-24 10300108 Final 1610612737 1610612764 \n", + "19177 2003-10-24 10300109 Final 1610612738 1610612751 \n", + "19178 2003-10-24 10300113 Final 1610612759 1610612745 \n", + "19179 2003-10-24 10300112 Final 1610612749 1610612765 \n", + "... ... ... ... ... ... \n", + "19269 2003-10-09 10300019 Final 1610612743 1610612756 \n", + "19270 2003-10-09 10300022 Final 1610612757 1610612758 \n", + "19271 2003-10-08 10300013 Final 1610612759 1610612763 \n", + "19278 2003-10-08 10300015 Final 1610612747 1610612744 \n", + "19279 2003-10-07 10300006 Final 1610612747 1610612744 \n", + "\n", + " SEASON TEAM_ID_home PTS_home FG_PCT_home FT_PCT_home ... \\\n", + "19175 2003 1610612753 NaN NaN NaN ... \n", + "19176 2003 1610612737 NaN NaN NaN ... \n", + "19177 2003 1610612738 NaN NaN NaN ... \n", + "19178 2003 1610612759 NaN NaN NaN ... \n", + "19179 2003 1610612749 NaN NaN NaN ... \n", + "... ... ... ... ... ... ... \n", + "19269 2003 1610612743 NaN NaN NaN ... \n", + "19270 2003 1610612757 NaN NaN NaN ... \n", + "19271 2003 1610612759 NaN NaN NaN ... \n", + "19278 2003 1610612747 NaN NaN NaN ... \n", + "19279 2003 1610612747 NaN NaN NaN ... \n", + "\n", + " AST_home REB_home TEAM_ID_away PTS_away FG_PCT_away FT_PCT_away \\\n", + "19175 NaN NaN 1610612762 NaN NaN NaN \n", + "19176 NaN NaN 1610612764 NaN NaN NaN \n", + "19177 NaN NaN 1610612751 NaN NaN NaN \n", + "19178 NaN NaN 1610612745 NaN NaN NaN \n", + "19179 NaN NaN 1610612765 NaN NaN NaN \n", + "... ... ... ... ... ... ... \n", + "19269 NaN NaN 1610612756 NaN NaN NaN \n", + "19270 NaN NaN 1610612758 NaN NaN NaN \n", + "19271 NaN NaN 1610612763 NaN NaN NaN \n", + "19278 NaN NaN 1610612744 NaN NaN NaN \n", + "19279 NaN NaN 1610612744 NaN NaN NaN \n", + "\n", + " FG3_PCT_away AST_away REB_away HOME_TEAM_WINS \n", + "19175 NaN NaN NaN 0 \n", + "19176 NaN NaN NaN 0 \n", + "19177 NaN NaN NaN 0 \n", + "19178 NaN NaN NaN 0 \n", + "19179 NaN NaN NaN 0 \n", + "... ... ... ... ... \n", + "19269 NaN NaN NaN 0 \n", + "19270 NaN NaN NaN 0 \n", + "19271 NaN NaN NaN 0 \n", + "19278 NaN NaN NaN 0 \n", + "19279 NaN NaN NaN 0 \n", + "\n", + "[99 rows x 21 columns]" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "print(\"--------- PARTIDOS ----------\")\n", + "print(\"-----------------------------\")\n", + "display(df_games.isnull().sum())\n", + "display(df_games[df_games.isna().any(axis=1)])" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "--------- DETALLES PARTIDOS ----------\n", + "--------------------------------------\n" + ] + }, + { + "data": { + "text/plain": [ + "GAME_ID 0\n", + "TEAM_ID 0\n", + "TEAM_ABBREVIATION 0\n", + "TEAM_CITY 0\n", + "PLAYER_ID 0\n", + "PLAYER_NAME 0\n", + "NICKNAME 615591\n", + "START_POSITION 412863\n", + "COMMENT 558939\n", + "MIN 109690\n", + "FGM 109690\n", + "FGA 109690\n", + "FG_PCT 109690\n", + "FG3M 109690\n", + "FG3A 109690\n", + "FG3_PCT 109690\n", + "FTM 109690\n", + "FTA 109690\n", + "FT_PCT 109690\n", + "OREB 109690\n", + "DREB 109690\n", + "REB 109690\n", + "AST 109690\n", + "STL 109690\n", + "BLK 109690\n", + "TO 109690\n", + "PF 109690\n", + "PTS 109690\n", + "PLUS_MINUS 133351\n", + "dtype: int64" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
GAME_IDTEAM_IDTEAM_ABBREVIATIONTEAM_CITYPLAYER_IDPLAYER_NAMENICKNAMESTART_POSITIONCOMMENTMIN...OREBDREBREBASTSTLBLKTOPFPTSPLUS_MINUS
0222004771610612759SASSan Antonio1629641Romeo LangfordRomeoFNaN18:06...1.01.02.00.01.00.02.05.02.0-2.0
1222004771610612759SASSan Antonio1631110Jeremy SochanJeremyFNaN31:01...6.03.09.06.01.00.02.01.023.0-14.0
2222004771610612759SASSan Antonio1627751Jakob PoeltlJakobCNaN21:42...1.03.04.01.01.00.02.04.013.0-4.0
3222004771610612759SASSan Antonio1630170Devin VassellDevinGNaN30:20...0.09.09.05.03.00.02.01.010.0-18.0
4222004771610612759SASSan Antonio1630200Tre JonesTreGNaN27:44...0.02.02.03.00.00.02.02.019.00.0
..................................................................
668623112000051610612743DENDenver202706Jordan HamiltonNaNNaNNaN19...0.02.02.00.02.00.01.03.017.0NaN
668624112000051610612743DENDenver202702Kenneth FariedNaNNaNNaN23...1.00.01.01.01.00.03.03.018.0NaN
668625112000051610612743DENDenver201585Kosta KoufosNaNNaNNaN15...3.05.08.00.01.00.00.03.06.0NaN
668626112000051610612743DENDenver202389Timofey MozgovNaNNaNNaN19...1.02.03.01.00.00.04.02.02.0NaN
668627112000051610612743DENDenver201951Ty LawsonNaNNaNNaN27...0.02.02.06.02.00.06.01.08.0NaN
\n", + "

668628 rows × 29 columns

\n", + "
" + ], + "text/plain": [ + " GAME_ID TEAM_ID TEAM_ABBREVIATION TEAM_CITY PLAYER_ID \\\n", + "0 22200477 1610612759 SAS San Antonio 1629641 \n", + "1 22200477 1610612759 SAS San Antonio 1631110 \n", + "2 22200477 1610612759 SAS San Antonio 1627751 \n", + "3 22200477 1610612759 SAS San Antonio 1630170 \n", + "4 22200477 1610612759 SAS San Antonio 1630200 \n", + "... ... ... ... ... ... \n", + "668623 11200005 1610612743 DEN Denver 202706 \n", + "668624 11200005 1610612743 DEN Denver 202702 \n", + "668625 11200005 1610612743 DEN Denver 201585 \n", + "668626 11200005 1610612743 DEN Denver 202389 \n", + "668627 11200005 1610612743 DEN Denver 201951 \n", + "\n", + " PLAYER_NAME NICKNAME START_POSITION COMMENT MIN ... OREB \\\n", + "0 Romeo Langford Romeo F NaN 18:06 ... 1.0 \n", + "1 Jeremy Sochan Jeremy F NaN 31:01 ... 6.0 \n", + "2 Jakob Poeltl Jakob C NaN 21:42 ... 1.0 \n", + "3 Devin Vassell Devin G NaN 30:20 ... 0.0 \n", + "4 Tre Jones Tre G NaN 27:44 ... 0.0 \n", + "... ... ... ... ... ... ... ... \n", + "668623 Jordan Hamilton NaN NaN NaN 19 ... 0.0 \n", + "668624 Kenneth Faried NaN NaN NaN 23 ... 1.0 \n", + "668625 Kosta Koufos NaN NaN NaN 15 ... 3.0 \n", + "668626 Timofey Mozgov NaN NaN NaN 19 ... 1.0 \n", + "668627 Ty Lawson NaN NaN NaN 27 ... 0.0 \n", + "\n", + " DREB REB AST STL BLK TO PF PTS PLUS_MINUS \n", + "0 1.0 2.0 0.0 1.0 0.0 2.0 5.0 2.0 -2.0 \n", + "1 3.0 9.0 6.0 1.0 0.0 2.0 1.0 23.0 -14.0 \n", + "2 3.0 4.0 1.0 1.0 0.0 2.0 4.0 13.0 -4.0 \n", + "3 9.0 9.0 5.0 3.0 0.0 2.0 1.0 10.0 -18.0 \n", + "4 2.0 2.0 3.0 0.0 0.0 2.0 2.0 19.0 0.0 \n", + "... ... ... ... ... ... ... ... ... ... \n", + "668623 2.0 2.0 0.0 2.0 0.0 1.0 3.0 17.0 NaN \n", + "668624 0.0 1.0 1.0 1.0 0.0 3.0 3.0 18.0 NaN \n", + "668625 5.0 8.0 0.0 1.0 0.0 0.0 3.0 6.0 NaN \n", + "668626 2.0 3.0 1.0 0.0 0.0 4.0 2.0 2.0 NaN \n", + "668627 2.0 2.0 6.0 2.0 0.0 6.0 1.0 8.0 NaN \n", + "\n", + "[668628 rows x 29 columns]" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "print(\"--------- DETALLES PARTIDOS ----------\")\n", + "print(\"--------------------------------------\")\n", + "display(df_games_details.isnull().sum())\n", + "display(df_games_details[df_games_details.isna().any(axis=1)])" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "--------- DETALLES PARTIDOS ----------\n", + "--------------------------------------\n" + ] + }, + { + "data": { + "text/plain": [ + "PLAYER_NAME 0\n", + "TEAM_ID 0\n", + "PLAYER_ID 0\n", + "SEASON 0\n", + "dtype: int64" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
PLAYER_NAMETEAM_IDPLAYER_IDSEASON
\n", + "
" + ], + "text/plain": [ + "Empty DataFrame\n", + "Columns: [PLAYER_NAME, TEAM_ID, PLAYER_ID, SEASON]\n", + "Index: []" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "print(\"--------- JUGADORES ----------\")\n", + "print(\"------------------------------\")\n", + "display(df_players.isnull().sum())\n", + "display(df_players[df_players.isna().any(axis=1)])" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "--------- RANKING LIGA ----------\n", + "---------------------------------\n" + ] + }, + { + "data": { + "text/plain": [ + "TEAM_ID 0\n", + "LEAGUE_ID 0\n", + "SEASON_ID 0\n", + "STANDINGSDATE 0\n", + "CONFERENCE 0\n", + "TEAM 0\n", + "G 0\n", + "W 0\n", + "L 0\n", + "W_PCT 0\n", + "HOME_RECORD 0\n", + "ROAD_RECORD 0\n", + "RETURNTOPLAY 206352\n", + "dtype: int64" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
TEAM_IDLEAGUE_IDSEASON_IDSTANDINGSDATECONFERENCETEAMGWLW_PCTHOME_RECORDROAD_RECORDRETURNTOPLAY
016106127430220222022-12-22WestDenver3019110.63310-39-8NaN
116106127630220222022-12-22WestMemphis3019110.63313-26-9NaN
216106127400220222022-12-22WestNew Orleans3119120.61313-46-8NaN
316106127560220222022-12-22WestPhoenix3219130.59414-45-9NaN
416106127460220222022-12-22WestLA Clippers3319140.57611-78-7NaN
..........................................
21033716106127650220132014-09-01EastDetroit8229530.35417-2412-29NaN
21033816106127380220132014-09-01EastBoston8225570.30516-259-32NaN
21033916106127530220132014-09-01EastOrlando8223590.28019-224-37NaN
21034016106127550220132014-09-01EastPhiladelphia8219630.23210-319-32NaN
21034116106127490220132014-09-01EastMilwaukee8215670.18310-315-36NaN
\n", + "

206352 rows × 13 columns

\n", + "
" + ], + "text/plain": [ + " TEAM_ID LEAGUE_ID SEASON_ID STANDINGSDATE CONFERENCE \\\n", + "0 1610612743 0 22022 2022-12-22 West \n", + "1 1610612763 0 22022 2022-12-22 West \n", + "2 1610612740 0 22022 2022-12-22 West \n", + "3 1610612756 0 22022 2022-12-22 West \n", + "4 1610612746 0 22022 2022-12-22 West \n", + "... ... ... ... ... ... \n", + "210337 1610612765 0 22013 2014-09-01 East \n", + "210338 1610612738 0 22013 2014-09-01 East \n", + "210339 1610612753 0 22013 2014-09-01 East \n", + "210340 1610612755 0 22013 2014-09-01 East \n", + "210341 1610612749 0 22013 2014-09-01 East \n", + "\n", + " TEAM G W L W_PCT HOME_RECORD ROAD_RECORD RETURNTOPLAY \n", + "0 Denver 30 19 11 0.633 10-3 9-8 NaN \n", + "1 Memphis 30 19 11 0.633 13-2 6-9 NaN \n", + "2 New Orleans 31 19 12 0.613 13-4 6-8 NaN \n", + "3 Phoenix 32 19 13 0.594 14-4 5-9 NaN \n", + "4 LA Clippers 33 19 14 0.576 11-7 8-7 NaN \n", + "... ... .. .. .. ... ... ... ... \n", + "210337 Detroit 82 29 53 0.354 17-24 12-29 NaN \n", + "210338 Boston 82 25 57 0.305 16-25 9-32 NaN \n", + "210339 Orlando 82 23 59 0.280 19-22 4-37 NaN \n", + "210340 Philadelphia 82 19 63 0.232 10-31 9-32 NaN \n", + "210341 Milwaukee 82 15 67 0.183 10-31 5-36 NaN \n", + "\n", + "[206352 rows x 13 columns]" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "print(\"--------- RANKING LIGA ----------\")\n", + "print(\"---------------------------------\")\n", + "display(df_ranking.isnull().sum())\n", + "display(df_ranking[df_ranking.isna().any(axis=1)])" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "--------- EQUIPOS ----------\n", + "----------------------------\n" + ] + }, + { + "data": { + "text/plain": [ + "LEAGUE_ID 0\n", + "TEAM_ID 0\n", + "MIN_YEAR 0\n", + "MAX_YEAR 0\n", + "ABBREVIATION 0\n", + "NICKNAME 0\n", + "YEARFOUNDED 0\n", + "CITY 0\n", + "ARENA 0\n", + "ARENACAPACITY 4\n", + "OWNER 0\n", + "GENERALMANAGER 0\n", + "HEADCOACH 0\n", + "DLEAGUEAFFILIATION 0\n", + "dtype: int64" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
LEAGUE_IDTEAM_IDMIN_YEARMAX_YEARABBREVIATIONNICKNAMEYEARFOUNDEDCITYARENAARENACAPACITYOWNERGENERALMANAGERHEADCOACHDLEAGUEAFFILIATION
20161061274020022019NOPPelicans2002New OrleansSmoothie King CenterNaNTom BensonTrajan LangdonAlvin GentryNo Affiliate
120161061275119762019BKNNets1976BrooklynBarclays CenterNaNJoe TsaiSean MarksKenny AtkinsonLong Island Nets
160161061275519492019PHI76ers1949PhiladelphiaWells Fargo CenterNaNJoshua HarrisElton BrandBrett BrownDelaware Blue Coats
170161061275619682019PHXSuns1968PhoenixTalking Stick Resort ArenaNaNRobert SarverJames JonesMonty WilliamsNorthern Arizona Suns
\n", + "
" + ], + "text/plain": [ + " LEAGUE_ID TEAM_ID MIN_YEAR MAX_YEAR ABBREVIATION NICKNAME \\\n", + "2 0 1610612740 2002 2019 NOP Pelicans \n", + "12 0 1610612751 1976 2019 BKN Nets \n", + "16 0 1610612755 1949 2019 PHI 76ers \n", + "17 0 1610612756 1968 2019 PHX Suns \n", + "\n", + " YEARFOUNDED CITY ARENA ARENACAPACITY \\\n", + "2 2002 New Orleans Smoothie King Center NaN \n", + "12 1976 Brooklyn Barclays Center NaN \n", + "16 1949 Philadelphia Wells Fargo Center NaN \n", + "17 1968 Phoenix Talking Stick Resort Arena NaN \n", + "\n", + " OWNER GENERALMANAGER HEADCOACH DLEAGUEAFFILIATION \n", + "2 Tom Benson Trajan Langdon Alvin Gentry No Affiliate \n", + "12 Joe Tsai Sean Marks Kenny Atkinson Long Island Nets \n", + "16 Joshua Harris Elton Brand Brett Brown Delaware Blue Coats \n", + "17 Robert Sarver James Jones Monty Williams Northern Arizona Suns " + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "print(\"--------- EQUIPOS ----------\")\n", + "print(\"----------------------------\")\n", + "display(df_teams.isnull().sum())\n", + "display(df_teams[df_teams.isna().any(axis=1)])" + ] } ], "metadata": { @@ -1036,7 +2641,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.6 (main, Aug 1 2022, 20:38:21) [GCC 5.4.0 20160609]" + "version": "3.10.6" }, "vscode": { "interpreter": {