import numpy as np
import pandas as pd
import plotly.graph_objects as go
import plotly.express as px
import plotly.figure_factory as ff


data = pd.read_csv("data/synthetic_data.csv")
data.head()


data_scatter = go.Scatter3d(x=data["X0"], y=data["X1"], z=data["Y"], 
                            mode="markers",
                            marker=dict(size=2))
layout = dict(margin=dict(l=0, r=0, t=0, b=0), 
              height=600,
              scene = dict(xaxis_title='X0', yaxis_title='X1', zaxis_title='Y'))
go.Figure([data_scatter], layout)


from numpy.linalg import inv
def ordinary_least_squares(X, Y):
    return inv(X.T @ X) @ X.T @ Y


theta_hat = ordinary_least_squares(data[["X0", "X1"]], data[["Y"]])
theta_hat


from numpy.linalg import solve
def ordinary_least_squares(X, Y):
    return solve(X.T @ X, X.T @ Y)


theta_hat = ordinary_least_squares(data[["X0", "X1"]], data[["Y"]])
theta_hat

array([[ 0.16682271],
       [-0.60333908]])


data["Yhat"] = data[["X0", "X1"]] @ theta_hat
data


fig = px.scatter(data, x="Yhat", y="Y")
fig.add_trace(go.Scatter(x=[-5,5], y=[-5,5], name="y=yhat"))
fig.update_layout(xaxis_title ="Yhat")


data['residuals'] = data['Y'] - data['Yhat']
data


px.histogram(data, x='residuals', nbins=100)


def make_surface(f, X, grid_points = 30, name="Linear Model"):
    u = np.linspace(X[:,0].min(),X[:,0].max(), grid_points)
    v = np.linspace(X[:,1].min(),X[:,1].max(), grid_points)
    xu, xv = np.meshgrid(u,v)
    X = np.vstack((xu.flatten(),xv.flatten())).transpose()
    z = f(X)
    return go.Surface(x=xu, y=xv, z=z.reshape(xu.shape), opacity=0.8, name=name,  showscale=False)


simple_plane = make_surface(
    lambda X: X @ theta_hat, 
    data[["X0", "X1"]].to_numpy(), 
    grid_points=5,
    name="plane")
go.Figure([data_scatter, simple_plane], layout)


from sklearn.linear_model import LinearRegression


model = LinearRegression(fit_intercept=True)


model.fit(data[["X0", "X1"]], data[["Y"]])

LinearRegression()


model.coef_

array([[ 0.34667351, -0.68472445]])


model.intercept_

array([4.87661395])


data['Yhat_sklearn'] = model.predict(data[["X0", "X1"]])
data


fig = px.scatter(data, x="Yhat_sklearn", y="Y")
fig.add_trace(go.Scatter(x=[0,10], y=[0,10], name="y=yhat"))
fig.update_layout(xaxis_title ="yhat")


data['residuals_sklearn'] = data['Y'] - data['Yhat_sklearn'] 
data


px.histogram(data['residuals_sklearn'], nbins=100)


plane_with_bias = make_surface(model.predict, 
    data[["X0", "X1"]].to_numpy(), 
    grid_points=5,
    name="Plane with Bias")
go.Figure([data_scatter, simple_plane, plane_with_bias], layout)


from sklearn.metrics import mean_squared_error, mean_absolute_error


print("Mean Squared Error:", 
      mean_squared_error(data["Y"], data["Yhat_sklearn"]))

Mean Squared Error: 3.7756044320694873


print("Mean Absolute Error:", 
      mean_absolute_error(data["Y"], data["Yhat_sklearn"]))

Mean Absolute Error: 1.5446935067491216


print("Root Mean Squared Error:", 
      np.sqrt(mean_squared_error(data["Y"], data["Yhat_sklearn"])))

Root Mean Squared Error: 1.9430914626104163


print("Standard Deviation of Residuals:",  
      np.std(data['residuals_sklearn']))

Standard Deviation of Residuals: 1.9430914626104165


from plotly.subplots import make_subplots

def evaluate_model(predict, grid_points=30):  
    # Compute Y and Yhat
    Y = data["Y"].to_numpy()
    Yhat = predict(data[["X0", "X1"]].to_numpy()).flatten()
    # Compute and print error metrics
    print("Mean Absolute Error:", mean_absolute_error(Y, Yhat))
    print("Root Mean Squared Error:", np.sqrt(mean_squared_error(Y, Yhat)))

    # Make Side by Side Plots
    fig = make_subplots(rows=1, cols=2, 
                        specs=[[{'type': 'scene'}, {'type': 'xy'}]])
    fig.add_trace(data_scatter, row=1, col=1)
    surf = make_surface(predict, 
                       data[["X0", "X1"]].to_numpy(), 
                       grid_points=grid_points)
    fig.add_trace(surf, row=1, col=1)
    fig.update_layout(layout)
    
    fig.add_trace(go.Scatter(x=Yhat, y=Y, mode="markers"), row=1, col=2)
    ymin = np.min([np.min(Y), np.min(Yhat)])
    ymax = np.max([np.max(Y), np.max(Yhat)])
    fig.add_trace(go.Scatter(x=[ymin,ymax], y=[ymin,ymax], name="y=yhat"))
    fig.update_layout(xaxis_title ="yhat", yaxis_title="Y")
    fig.update_layout(showlegend=False)
    fig.show()


evaluate_model(model.predict, grid_points=5)

Mean Absolute Error: 1.5446935067491216
Root Mean Squared Error: 1.9430914626104163


no_intercept_model = LinearRegression(fit_intercept=False)
no_intercept_model.fit(data[["X0", "X1"]], data[["Y"]])

LinearRegression(fit_intercept=False)


evaluate_model(no_intercept_model.predict, grid_points=5)

Mean Absolute Error: 4.814916851370774
Root Mean Squared Error: 5.217264403053303

	X0	X1	Y
0	-1.254599	4.507143	0.669332
1	2.319939	0.986585	5.430523
2	-3.439814	-3.440055	7.640933
3	-4.419164	3.661761	-1.836007
4	1.011150	2.080726	6.148833

	X0	X1	Y	Yhat
0	-1.254599	4.507143	0.669332	-2.928631
1	2.319939	0.986585	5.430523	-0.208227
2	-3.439814	-3.440055	7.640933	1.501680
3	-4.419164	3.661761	-1.836007	-2.946501
4	1.011150	2.080726	6.148833	-1.086700
...	...	...	...	...
495	2.994159	1.946965	5.910233	-0.675186
496	-2.278549	0.902307	4.166517	-0.924511
497	-1.390261	-4.084179	8.897440	2.232218
498	4.173136	-3.631814	4.823716	2.887389
499	4.502374	-0.539942	5.124975	1.076866

	X0	X1	Y	Yhat	residuals
0	-1.254599	4.507143	0.669332	-2.928631	3.597963
1	2.319939	0.986585	5.430523	-0.208227	5.638749
2	-3.439814	-3.440055	7.640933	1.501680	6.139253
3	-4.419164	3.661761	-1.836007	-2.946501	1.110494
4	1.011150	2.080726	6.148833	-1.086700	7.235534
...	...	...	...	...	...
495	2.994159	1.946965	5.910233	-0.675186	6.585419
496	-2.278549	0.902307	4.166517	-0.924511	5.091027
497	-1.390261	-4.084179	8.897440	2.232218	6.665222
498	4.173136	-3.631814	4.823716	2.887389	1.936327
499	4.502374	-0.539942	5.124975	1.076866	4.048109

	X0	X1	Y	Yhat	residuals	Yhat_sklearn
0	-1.254599	4.507143	0.669332	-2.928631	3.597963	1.355527
1	2.319939	0.986585	5.430523	-0.208227	5.638749	5.005337
2	-3.439814	-3.440055	7.640933	1.501680	6.139253	6.039611
3	-4.419164	3.661761	-1.836007	-2.946501	1.110494	0.837309
4	1.011150	2.080726	6.148833	-1.086700	7.235534	3.802429
...	...	...	...	...	...	...
495	2.994159	1.946965	5.910233	-0.675186	6.585419	4.581475
496	-2.278549	0.902307	4.166517	-0.924511	5.091027	3.468870
497	-1.390261	-4.084179	8.897440	2.232218	6.665222	7.191185
498	4.173136	-3.631814	4.823716	2.887389	1.936327	8.810121
499	4.502374	-0.539942	5.124975	1.076866	4.048109	6.807179

	X0	X1	Y	Yhat	residuals	Yhat_sklearn	residuals_sklearn
0	-1.254599	4.507143	0.669332	-2.928631	3.597963	1.355527	-0.686195
1	2.319939	0.986585	5.430523	-0.208227	5.638749	5.005337	0.425186
2	-3.439814	-3.440055	7.640933	1.501680	6.139253	6.039611	1.601322
3	-4.419164	3.661761	-1.836007	-2.946501	1.110494	0.837309	-2.673316
4	1.011150	2.080726	6.148833	-1.086700	7.235534	3.802429	2.346404
...	...	...	...	...	...	...	...
495	2.994159	1.946965	5.910233	-0.675186	6.585419	4.581475	1.328758
496	-2.278549	0.902307	4.166517	-0.924511	5.091027	3.468870	0.697647
497	-1.390261	-4.084179	8.897440	2.232218	6.665222	7.191185	1.706255
498	4.173136	-3.631814	4.823716	2.887389	1.936327	8.810121	-3.986405
499	4.502374	-0.539942	5.124975	1.076866	4.048109	6.807179	-1.682204

Fitting Linear Models¶

Toy Data set¶

Fitting an Ordinary Least Squares Model¶

Making A Prediction¶

Visualizing the Model¶

Introducing Scikit-learn¶

Using Scikit-learn¶

Computing Error Metrics¶

	Y
0	0.166823
1	-0.603339