by Suraj Rampure
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import as px
import plotly.graph_objs as go
from scipy.optimize import minimize
import sklearn.linear_model as lm
plt.rcParams['figure.figsize'] = (4, 4)
plt.rcParams['figure.dpi'] = 150
plt.rcParams['lines.linewidth'] = 3
So far, our logistic regression model predicts probabilities. But we originally set out on a mission to create a classifier. How can we use our predicted probabilities to create classifications?
Let's get back the NBA data we had last time.
df = pd.read_csv('nba.csv')
df["WON"] = df["WL"]
df["WON"] = df["WON"].replace("W", 1)
df["WON"] = df["WON"].replace("L", 0)
one_team = df.groupby("GAME_ID").first()
opponent = df.groupby("GAME_ID").last()
games = one_team.merge(opponent, left_index = True, right_index = True, suffixes = ["", "_OPP"])
games["FG_PCT_DIFF"] = games["FG_PCT"] - games["FG_PCT_OPP"]
games["PF_DIFF"] = games["PF"] - games["PF_OPP"]
games['WON'] = games['WL'].replace('L', 0).replace('W', 1)
games = games[['TEAM_NAME', 'MATCHUP', 'WON', 'FG_PCT_DIFF', 'PF_DIFF']]
Let's call this model basic_model
since it only has one feature. (Eventually, we will use more features.)
It is the same model we fit in the last lecture.
basic_model = lm.LogisticRegression(penalty = 'none', fit_intercept = False, solver = 'lbfgs')[['FG_PCT_DIFF']], games['WON'])
As before, we can use .predict_proba
to get the predicted probabilities for each class under our logistic regression model.
We can plot our model, too:
x_sorted = np.array(games['FG_PCT_DIFF'].sort_values()).reshape(len(games), 1)
basic_model_ps_sorted = basic_model.predict_proba(x_sorted)[:, 1]
points = go.Scatter(name = 'true observations',
x = games['FG_PCT_DIFF'],
y = games['WON'],
mode = 'markers',
lr_line = go.Scatter(name = 'Logistic Regression model',
x = x_sorted.flatten(),
y = basic_model_ps_sorted)
fig = go.Figure([points, lr_line])