import numpy as np
import pandas as pd
import plotly.graph_objects as go
import plotly.express as px
import plotly.figure_factory as ff
from plotly.subplots import make_subplots


from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error


data = pd.read_csv("data/synthetic_data.csv")
data.head()


data_scatter = go.Scatter3d(x=data["X0"], y=data["X1"], z=data["Y"], 
                            mode="markers",
                            marker=dict(size=2))
layout = dict(margin=dict(l=0, r=0, t=0, b=0), 
              height=600,
              scene = dict(xaxis_title='X0', yaxis_title='X1', zaxis_title='Y'))
go.Figure([data_scatter], layout)


model = LinearRegression(fit_intercept=True)
model.fit(data[["X0", "X1"]], data[["Y"]])

LinearRegression()


def make_surface(predict, grid_points = 30, name="Linear Model"):
    """
    This function constructs a 3d surface from a prediction function.
    """
    u = np.linspace(data["X0"].min(), data["X0"].max(), grid_points)
    v = np.linspace(data["X1"].min(), data["X1"].max(), grid_points)
    xu, xv = np.meshgrid(u,v)
    X = np.vstack((xu.flatten(),xv.flatten())).transpose()
    z = predict(X)
    return go.Surface(x=xu, y=xv, z=z.reshape(xu.shape), opacity=0.8, 
                      name=name, showscale=False)


def evaluate_model(predict, grid_points=30):  
    """
    This function visualizes the predict function 
    """
    # Compute Y and Yhat
    Y = data["Y"].to_numpy()
    Yhat = predict(data[["X0", "X1"]].to_numpy()).flatten()
    # Compute and print error metrics
    print("Mean Absolute Error:", mean_absolute_error(Y, Yhat))
    print("Root Mean Squared Error:", np.sqrt(mean_squared_error(Y, Yhat)))

    # Make Side by Side Plots
    fig = make_subplots(rows=1, cols=2, 
                        specs=[[{'type': 'scene'}, {'type': 'xy'}]])
    fig.add_trace(data_scatter, row=1, col=1)
    fig.add_trace(make_surface(predict, grid_points=grid_points), row=1, col=1)
    fig.update_layout(layout)
    fig.add_trace(go.Scatter(x=Yhat, y=Y, mode="markers"), row=1, col=2)
    ymin = np.min([np.min(Y), np.min(Yhat)])
    ymax = np.max([np.max(Y), np.max(Yhat)])
    fig.add_trace(go.Scatter(x=[ymin,ymax], y=[ymin,ymax], name="y=yhat"))
    fig.update_layout(xaxis_title ="yhat", yaxis_title="Y")
    fig.update_layout(showlegend=False)
    fig.show()


evaluate_model(model.predict, grid_points=5)

Mean Absolute Error: 1.5446935067491216
Root Mean Squared Error: 1.9430914626104163


def phi_periodic(X):
    return np.hstack([
        X,
        np.sin(X),
        np.sin(10*X),
        np.sin(X + 1),
        np.sin(10*X + 1),
    ])


Phi = phi_periodic(data[["X0", "X1"]])
Phi

array([[-1.25459881,  4.50714306, -0.95042469, ..., -0.7004603 ,
         0.85230815,  0.86864421],
       [ 2.31993942,  0.98658484,  0.7322727 , ...,  0.91479811,
        -0.80361643, -0.99159739],
       [-3.4398136 , -3.4400548 ,  0.29382014, ..., -0.64539314,
        -0.91655664, -0.9155894 ],
       ...,
       [-1.39026103, -4.08417927, -0.98374772, ..., -0.05738185,
        -0.32993969, -0.84088255],
       [ 4.17313575, -3.63181369, -0.85809239, ..., -0.48798433,
        -0.94928048,  0.68936906],
       [ 4.50237354, -0.53994227, -0.9780277 , ...,  0.44399983,
         0.89127739,  0.95142449]])


Phi.shape

(500, 10)


model_phi = LinearRegression()
model_phi.fit(Phi, data["Y"])

LinearRegression()


evaluate_model(lambda X: model_phi.predict(phi_periodic(X)))

Mean Absolute Error: 1.2542207074699765
Root Mean Squared Error: 1.640910849605613


def phi_curved(X):
    return np.hstack([
        X * X,
        np.expand_dims(np.prod(X, axis=1), 1),
        X ** 3,
    ])


phi_curved(data[["X0", "X1"]]).shape

(500, 5)


def phi_curved_and_periodic(X):
    return np.hstack([phi_curved(X), phi_periodic(X)])


crazy_model = LinearRegression()
crazy_model.fit(phi_curved_and_periodic(data[["X0", "X1"]]), data[["Y"]])
crazy_model.coef_

array([[ 1.91066267e-03, -2.11589768e-03,  1.97448326e-01,
         7.62814553e-04,  2.25383620e-03,  2.44286423e-01,
        -5.32931863e-01, -6.21987292e-01,  1.09903384e+00,
        -1.01014457e-02,  3.50670902e-02,  1.19722668e+00,
        -6.20322912e-02,  2.83608334e-02, -3.23123574e-02]])


evaluate_model(lambda X: crazy_model.predict(phi_curved_and_periodic(X)))

Mean Absolute Error: 0.15052783249777388
Root Mean Squared Error: 0.18925011553598578

	X0	X1	Y
0	-1.254599	4.507143	0.669332
1	2.319939	0.986585	5.430523
2	-3.439814	-3.440055	7.640933
3	-4.419164	3.661761	-1.836007
4	1.011150	2.080726	6.148833

Modeling Non-linear Relationships¶

Toy Data set¶

Basic Linear Model¶

What does it mean to be a Linear Model¶

Introducing Non-linear Features¶

Success!!!!!¶