import pandas as pd
import numpy as np
import scipy as sp
import plotly.express as px
import seaborn as sns

mpg = sns.load_dataset("mpg").dropna()
mpg.head()

px.histogram(mpg, x="displacement")

px.scatter(mpg, x="displacement", y="horsepower")

fig = px.scatter_3d(mpg, x="displacement", y="horsepower", z="weight",
                    width=800, height=800)
fig.update_traces(marker=dict(size=3))

fig = px.scatter_3d(mpg, x="displacement", 
                    y="horsepower", 
                    z="weight", 
                    color="model_year",
                    width=800, height=800, 
                    opacity=.7)
fig.update_traces(marker=dict(size=5))

fig = px.scatter_3d(mpg, x="displacement", 
                    y="horsepower", 
                    z="weight", 
                    color="model_year",
                    size="mpg",
                    symbol="origin",
                    width=900, height=800, 
                    opacity=.7)
# remove heat map legend and freeze the axes
fig.update_layout(coloraxis_showscale=False,
                  scene=(dict(xaxis_range=[50, 500], 
                              yaxis_range=[40, 250], 
                              zaxis_range=[1000, 5000])))

from sklearn.decomposition import PCA
pca = PCA(n_components=2,)

X = pd.get_dummies(mpg[["displacement", "horsepower", "weight", "model_year", "origin", "mpg"]])
zs = pca.fit_transform(X)
mpg[["z1", "z2"]] = zs
mpg.head()

fig = px.scatter(mpg, x="z1", y="z2", color="model_year", symbol="origin", 
                 hover_data=["displacement", "horsepower", "weight", "name"])
fig.update_layout(legend=dict(x=.92, y=1), xaxis_range=[-1500, 2500], yaxis_range=[-200, 300])

	mpg	cylinders	displacement	horsepower	weight	acceleration	model_year	origin	name
0	18.0	8	307.0	130.0	3504	12.0	70	usa	chevrolet chevelle malibu
1	15.0	8	350.0	165.0	3693	11.5	70	usa	buick skylark 320
2	18.0	8	318.0	150.0	3436	11.0	70	usa	plymouth satellite
3	16.0	8	304.0	150.0	3433	12.0	70	usa	amc rebel sst
4	17.0	8	302.0	140.0	3449	10.5	70	usa	ford torino

	mpg	cylinders	displacement	horsepower	weight	acceleration	model_year	origin	name	z1	z2
0	18.0	8	307.0	130.0	3504	12.0	70	usa	chevrolet chevelle malibu	536.462765	50.770168
1	15.0	8	350.0	165.0	3693	11.5	70	usa	buick skylark 320	730.376262	79.103119
2	18.0	8	318.0	150.0	3436	11.0	70	usa	plymouth satellite	470.999791	75.360935
3	16.0	8	304.0	150.0	3433	12.0	70	usa	amc rebel sst	466.436304	62.509155
4	17.0	8	302.0	140.0	3449	10.5	70	usa	ford torino	481.692727	55.684400

Lecture 24 – Data 100, Fall 2024¶

Working with High Dimensional Data¶

Visualizing 1 Dimensional Data¶

Visualizing 2 Dimensional Data¶

Visualizing 3 Dimensional Data¶

Visualizing 4 Dimensional Data¶

Visualizing 6 Dimensional Data¶

Dimensionality Reduction¶