Lecture 27 – Data 100, Fall 2024¶

Data 100, Fall 2024

Acknowledgments Page

In [10]:
import numpy as np
import pandas as pd
from sklearn.datasets import make_circles, make_blobs
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from torch.optim import SGD, Adam

from ipywidgets import interact, IntSlider, FloatSlider, Dropdown, Checkbox
import plotly.graph_objects as go
import plotly.express as px

Binary Classification Task on Circles Dataset¶

Step 0: Getting the Data¶

First, let's generate some data. We will use Scikit-learn's make_circles to create a dataset that isn't linearly separable.

The dataset consists of two concentric circles where the color of each point represents which class it belongs to.

In [11]:
from sklearn.datasets import make_circles
X, y = make_circles(n_samples=1000, noise=0.2, factor=0.5, random_state=42)
data_fig = go.FigureWidget()
data_fig.add_trace(go.Scatter(x=X[y == 0, 0], y=X[y == 0, 1], 
                              mode='markers', marker=dict(color='red'), 
                              name='0'))   
data_fig.add_trace(go.Scatter(x=X[y == 1, 0], y=X[y == 1, 1], 
                              mode='markers', marker=dict(color='blue'), 
                              name='1'))  
data_fig.update_layout(width=500, height=500,
                       xaxis_range=[-2, 2], yaxis_range=[-2, 2],
                       xaxis_title='Feature 1',
                       yaxis_title='Feature 2',
                       title='make_circles Dataset')
data_fig.show()
print("The first 5 training datapoints:", X[:5])
print("The labels for the first 5 datapoints:", y[:5])
The first 5 training datapoints: [[ 0.36229708  0.28247097]
 [-0.27207715  0.23564621]
 [-0.64072517  0.54943623]
 [-0.56693828  0.24588771]
 [ 0.47106162 -0.88152647]]
The labels for the first 5 datapoints: [1 1 1 1 0]

Convert the data into PyTorch Tensor format and split it into training and test sets.

In [12]:
def make_tensors(X, y):
    from torch.utils.data import random_split, TensorDataset
    data = TensorDataset(torch.tensor(X, dtype=torch.float32), 
                         torch.tensor(y, dtype=torch.long))
    torch.manual_seed(140)
    train_data, test_data = random_split(data, [0.8, 0.2]) 
    return train_data, test_data

training_data, test_data = make_tensors(X, y)

Step 1: Defining the Model¶

In [13]:
# Logistic Regression Model
class LogisticRegressionModel(nn.Module):
    def __init__(self):
        super(LogisticRegressionModel, self).__init__()
        self.linear = nn.Linear(2, 1)

    def forward(self, x):
        p = torch.sigmoid(self.linear(x))
        return torch.cat([1 - p, p], dim=1)

Step 2: Define the Loss¶

Here we are using cross entropy loss since we made the model return a probability for each class.

In [14]:
loss_fn = nn.CrossEntropyLoss()

Step 3: Optimize the Loss¶

In [15]:
def plot_decision_boundary_pytorch(model,  num_points=100, probs=True):
    # Generate a grid of points
    xx, yy = torch.meshgrid(torch.linspace(-4, 4, num_points),
                            torch.linspace(-4, 4, num_points),
                            indexing='ij')
    grid = torch.cat([xx.reshape(-1, 1), yy.reshape(-1, 1)], dim=1)
    with torch.no_grad():
        preds = model(grid) 
        num_classes = preds.shape[1] 
        if num_classes > 2: # support for multiclass
            preds = torch.argmax(preds, axis=1).reshape(xx.shape).T
            return go.Contour(x=xx[:, 0], y=yy[0], z=preds, 
                      colorscale=[px.colors.qualitative.Plotly[i] for i in range(num_classes)], 
                      opacity = 0.5, showscale=False)
        else: # Binary classification case (red/blue)
            if probs:
                preds = preds[:,1].reshape(xx.shape).T
            else:
                preds = (preds[:, 1] > 0.5).float().reshape(xx.shape).T
            return go.Contour(x=xx[:, 0], y=yy[0], z=preds, 
                              colorscale=[[0, 'red'], [1, 'blue']], 
                              opacity = 0.5, showscale=False)
In [16]:
def optimize_model(train_dataset, 
                   test_dataset,
                   model, loss_fn, 
                   pred_fig, loss_fig,
                   batch_size=64,
                   learning_rate = 0.01,
                   nepochs=50,
                   sleep_time=0.2):
    import time
    
    from torch.utils.data import DataLoader
    # Create a dataloader for training
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
    
    # Define the optimizer (this is the update rule)
    optimizer = optim.Adam(model.parameters(), learning_rate)

    test_loss_curve = []
    for epoch in range(nepochs):
        # Loop through all the batches
        for batch, (X, y) in enumerate(train_loader):
            # Zero the gradients to start the next step
            optimizer.zero_grad()
            # Compute prediction and loss
            pred = model(X)
            loss = loss_fn(pred, y)
            # Backpropagation (compute the gradient)
            loss.backward()
            # Update the parameters using the optimizer's update rule
            optimizer.step()
            

        # Evaluate the model on the test data
        # In practice, we often do this in batches too since the data is too big to fit in memory
        with torch.no_grad():
            test_loss_sum = 0.0
            for X_test, y_test in test_loader:
                test_pred = model(X_test)
                test_loss = loss_fn(test_pred, y_test)
                test_loss_sum += test_loss.item()
            num_test_batches = len(test_loader)
            test_loss_curve.append(test_loss_sum/num_test_batches)
            
            # Visualization Code
            boundary = plot_decision_boundary_pytorch(model, probs=True)
            pred_fig.data[-1].z = boundary.z
            loss_fig.data[0].x = np.arange(epoch+1)
            loss_fig.data[0].y = test_loss_curve
            if(sleep_time > 0):
                time.sleep(sleep_time)
In [17]:
from ipywidgets import HBox
pred_fig = go.FigureWidget(data=data_fig.data, layout=data_fig.layout)
loss_fig = go.FigureWidget()
loss_fig.add_trace(go.Scatter(x=[], y=[], mode='lines', name='Train Loss'))
loss_fig.update_layout(title='Test Loss', xaxis_title='Epochs', yaxis_title='Test Loss (BCE)')
model = LogisticRegressionModel()
boundary = plot_decision_boundary_pytorch(model, probs=True)
pred_fig.add_trace(boundary)
display(HBox([pred_fig,loss_fig]))
optimize_model(training_data, test_data, model, loss_fn, pred_fig, loss_fig, nepochs=50)
HBox(children=(FigureWidget({
    'data': [{'marker': {'color': 'red'},
              'mode': 'markers',
     …

Go back to Step 1: Building a neural network¶

It's apparent that our linear decision boundary won't cut it for this data. Let's try to build a simple neural network that can distinguish between these two classes.

In [18]:
class NeuralNetworkModel1(nn.Module):
    def __init__(self):
        super().__init__()
        self.hidden1 = nn.Linear(2, 8)
        self.hidden2 = nn.Linear(8, 8)
        self.output = nn.Linear(8, 1)

    def forward(self, x):
        x = torch.tanh(self.hidden1(x))
        x = torch.tanh(self.hidden2(x))
        p = torch.sigmoid(self.output(x))
        return torch.cat([1 - p, p], dim=1)

How we initialize the weights can have a big impact on how well our model trains. With logistic regression we can set all the weights to zero and be fine. With a neural network we cannot. Why? However, small starting weights are generally a good idea. Xavier initialization is a popular method for initializing weights in a neural network. It sets the weights to be normally distributed with mean 0 and variance 2/(number of input units + number of output units). PyTorch has a function for this in torch.nn.init.

In [19]:
def initialize_weights(model):
    for layer in model.children():
        if isinstance(layer, nn.Linear):
            nn.init.xavier_uniform_(layer.weight)
            nn.init.zeros_(layer.bias)
In [20]:
model = NeuralNetworkModel1()
initialize_weights(model)

Let's try running this model with the exact same optimization and visualization code we used before:

In [21]:
from ipywidgets import HBox

model = NeuralNetworkModel1()
initialize_weights(model)

pred_fig = go.FigureWidget(data=data_fig.data, layout=data_fig.layout)
loss_fig = go.FigureWidget()
loss_fig.add_trace(go.Scatter(x=[], y=[], mode='lines', name='Train Loss'))
loss_fig.update_layout(title='Test Loss', xaxis_title='Epochs', yaxis_title='Test Loss (BCE)')
boundary = plot_decision_boundary_pytorch(model, probs=True)
pred_fig.add_trace(boundary)
display(HBox([pred_fig,loss_fig]))
optimize_model(training_data, test_data, model, loss_fn, pred_fig, loss_fig, 
               batch_size=16, learning_rate =0.001, nepochs=100, sleep_time=0)
HBox(children=(FigureWidget({
    'data': [{'marker': {'color': 'red'},
              'mode': 'markers',
     …

Step 4: A Custom Design¶

In [22]:
# Function to dynamically create a neural network
class CustomNeuralNetwork(nn.Module):
    def __init__(self, input_size, output_size, layers, activation_functions):
        super().__init__()
        self.layers = nn.ModuleList()
        self.activations = []

        # Create the layers dynamically
        current_size = input_size
        for i, layer_size in enumerate(layers):
            self.layers.append(nn.Linear(current_size, layer_size))
            self.activations.append(activation_functions[i])
            current_size = layer_size

        # Add the final output layer
        self.layers.append(nn.Linear(current_size, output_size))
        self.activations.append('sigmoid')  # Output activation is sigmoid for binary classification

    def forward(self, x):
        for i, layer in enumerate(self.layers[:-1]):  # Apply activation for all but the last layer
            x = layer(x)
            x = self.apply_activation(x, self.activations[i])
        x = self.layers[-1](x)  # Final output layer
        p = torch.sigmoid(x)  # Apply sigmoid activation for output
        return torch.cat([1 - p, p], dim=1)

    @staticmethod
    def apply_activation(x, activation):
        if activation == 'relu':
            return torch.relu(x)
        elif activation == 'tanh':
            return torch.tanh(x)
        elif activation == 'sigmoid':
            return torch.sigmoid(x)
        else:
            return x
In [23]:
from ipywidgets import HBox

model = NeuralNetworkModel1()
initialize_weights(model)

pred_fig = go.FigureWidget(data=data_fig.data, layout=data_fig.layout)
loss_fig = go.FigureWidget()
loss_fig.add_trace(go.Scatter(x=[], y=[], mode='lines', name='Train Loss'))
loss_fig.update_layout(title='Test Loss', xaxis_title='Epochs', yaxis_title='Test Loss (BCE)')
boundary = plot_decision_boundary_pytorch(model, probs=True)
pred_fig.add_trace(boundary)
display(HBox([pred_fig,loss_fig]))

@interact(n_layers=IntSlider(min=1, max=5, step=1, value=2, description="Layers"),
         neurons_per_layer=IntSlider(min=4, max=64, step=4, value=8, description="Neurons/Layer"),
         activation_fn=Dropdown(options=['relu', 'tanh', 'sigmoid'], value='tanh', description="Activation"),
         learning_rate=FloatSlider(min=0.001, max=0.1, step=0.001, value=0.01, description="Learning Rate"),
         batch_size=IntSlider(min=1, max=128, step=8, value=32, description="Batch Size"),
         epochs=IntSlider(min=10, max=200, step=10, value=10, description="Epochs"))
def update_model(n_layers, neurons_per_layer, activation_fn, learning_rate, batch_size, epochs):
    # setup the model
    layers = [neurons_per_layer] * n_layers
    activation_functions = [activation_fn] * n_layers
    model = CustomNeuralNetwork(input_size=2, output_size=1, layers=layers, activation_functions=activation_functions)
    initialize_weights(model)
    optimize_model(training_data, test_data, model, loss_fn, pred_fig, loss_fig, 
                   learning_rate=learning_rate,
                   batch_size=batch_size, nepochs=epochs, sleep_time=0)
HBox(children=(FigureWidget({
    'data': [{'marker': {'color': 'red'},
              'mode': 'markers',
     …
interactive(children=(IntSlider(value=2, description='Layers', max=5, min=1), IntSlider(value=8, description='…

Multi-Class Classification on Blobs Dataset¶

Step 0: Loading and Preparing the Data¶

In [24]:
# Generate and normalize the dataset
def generate_blobs(n_samples, centers, std):
    from sklearn.datasets import make_blobs
    from sklearn.preprocessing import StandardScaler
    X, y = make_blobs(n_samples=n_samples, centers=centers, cluster_std=std, random_state=42)
    scaler = StandardScaler()
    X = scaler.fit_transform(X)
    return X, y
In [25]:
X, y = generate_blobs(n_samples=100, centers=3, std=1)
blob_data_fig = go.FigureWidget()
blob_data_fig.update_layout(width=500, height=500,
                       xaxis_range=[-3, 3], yaxis_range=[-3, 3],
                       xaxis_title='Feature 1',
                       yaxis_title='Feature 2',
                       title='make_blobs Dataset')
for i in np.unique(y):
    blob_data_fig.add_trace(go.Scatter(x=X[y == i, 0], y=X[y == i, 1], 
                                  mode='markers', marker=dict(color=px.colors.qualitative.Plotly[i]), 
                                  name=str(i)))

blob_data_fig.show()

Step 1: Defining the Model¶

In [26]:
# Define the Neural Network with dynamic output size
class BlobNN(nn.Module):
    def __init__(self, input_dim, num_classes, hidden_dim = 16):
        super().__init__()
        self.hidden = nn.Linear(input_dim, hidden_dim)
        self.output = nn.Linear(hidden_dim, num_classes)
    
    def forward(self, x):
        x = torch.relu(self.hidden(x))
        logits = self.output(x)
        return torch.softmax(logits, dim=1)

Step 2: Training the Model and Visualize¶

In [28]:
model = BlobNN(2, 3)
initialize_weights(model)

blob_pred_fig = go.FigureWidget(data=blob_data_fig.data, layout=blob_data_fig.layout)
blob_loss_fig = go.FigureWidget()
blob_loss_fig.add_trace(go.Scatter(x=[], y=[], mode='lines', name='Train Loss'))
blob_loss_fig.update_layout(title='Test Loss', xaxis_title='Epochs', yaxis_title='Test Loss (CE)')
boundary = plot_decision_boundary_pytorch(model)
blob_pred_fig.add_trace(boundary)
display(HBox([blob_pred_fig, blob_loss_fig]))

@interact(n_samples=IntSlider(min=100, max=1000, step=100, value=500, description="Samples"),
         centers=IntSlider(min=2, max=5, step=1, value=3, description="Centers"),
         std=FloatSlider(min=0.5, max=5.0, step=0.5, value=1.0, description="Std Dev"),
         epochs=IntSlider(min=10, max=200, step=2, value=20, description="Epochs"),
         learning_rate=FloatSlider(min=0.001, max=0.1, step=0.001, value=0.01, description="Learning Rate"))
def update_model2(n_samples, centers, std, epochs, learning_rate):
    X, y = generate_blobs(n_samples=n_samples, centers=centers, std=std)
    blob_pred_fig.data = []
    for i in np.unique(y):
        blob_pred_fig.add_trace(go.Scatter(x=X[y == i, 0], y=X[y == i, 1], 
                                mode='markers', marker=dict(color=px.colors.qualitative.Plotly[i]), 
                                name=str(i)))
    train_data, test_data = make_tensors(X, y)
    model = BlobNN(2, centers)
    initialize_weights(model)
    boundary = plot_decision_boundary_pytorch(model)
    blob_pred_fig.add_trace(boundary)
    optimize_model(train_data, test_data, model, loss_fn, blob_pred_fig, blob_loss_fig, 
                   nepochs=epochs, learning_rate=learning_rate, sleep_time=0)
    
HBox(children=(FigureWidget({
    'data': [{'marker': {'color': '#636EFA'},
              'mode': 'markers',
 …
interactive(children=(IntSlider(value=500, description='Samples', max=1000, min=100, step=100), IntSlider(valu…
In [ ]: