In [1]:
import numpy as np
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import plotly.io as pio
pio.templates["plotly"].layout.colorway = px.colors.qualitative.Vivid
px.defaults.width = 800
from scipy.optimize import minimize
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
import sklearn.linear_model as lm
We'll continue with the games
dataset from last lecture.
In [2]:
basketball = pd.read_csv("data/nba.csv")
first_team = basketball.groupby("GAME_ID").first()
second_team = basketball.groupby("GAME_ID").last()
games = first_team.merge(second_team, left_index = True, right_index = True, suffixes = ["", "_OPP"])
games['GOAL_DIFF'] = games["FG_PCT"] - games["FG_PCT_OPP"]
games['WON'] = (games['WL'] == "W").astype(int)
games = games[['TEAM_NAME', 'TEAM_NAME_OPP', 'MATCHUP', 'WON', 'WL', 'AST', 'GOAL_DIFF']]
games
Out[2]:
TEAM_NAME | TEAM_NAME_OPP | MATCHUP | WON | WL | AST | GOAL_DIFF | |
---|---|---|---|---|---|---|---|
GAME_ID | |||||||
21700001 | Boston Celtics | Cleveland Cavaliers | BOS @ CLE | 0 | L | 24 | -0.049 |
21700002 | Golden State Warriors | Houston Rockets | GSW vs. HOU | 0 | L | 34 | 0.053 |
21700003 | Charlotte Hornets | Detroit Pistons | CHA @ DET | 0 | L | 16 | -0.030 |
21700004 | Indiana Pacers | Brooklyn Nets | IND vs. BKN | 1 | W | 29 | 0.041 |
21700005 | Orlando Magic | Miami Heat | ORL vs. MIA | 1 | W | 22 | 0.042 |
... | ... | ... | ... | ... | ... | ... | ... |
21701226 | New Orleans Pelicans | San Antonio Spurs | NOP vs. SAS | 1 | W | 30 | 0.189 |
21701227 | Oklahoma City Thunder | Memphis Grizzlies | OKC vs. MEM | 1 | W | 32 | 0.069 |
21701228 | LA Clippers | Los Angeles Lakers | LAC vs. LAL | 0 | L | 27 | 0.017 |
21701229 | Utah Jazz | Portland Trail Blazers | UTA @ POR | 0 | L | 18 | -0.090 |
21701230 | Houston Rockets | Sacramento Kings | HOU @ SAC | 0 | L | 11 | -0.097 |
1230 rows × 7 columns
As before, we will use the "GOAL_DIFF"
feature to classify whether a team won (1) or lost (0) their game.
In [3]:
np.random.seed(42)
games["JitterWON"] = games["WON"] + np.random.uniform(-0.1, 0.1, len(games))
px.scatter(games, x="GOAL_DIFF", y="JitterWON", color="WL")