import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns


%matplotlib inline


births = pd.read_csv('baby.csv')


births.head()


births.shape

(1174, 6)


births['Maternal Smoker'].value_counts()

False    715
True     459
Name: Maternal Smoker, dtype: int64


from datascience import Table
t = Table.from_df(births['Maternal Smoker'].value_counts().reset_index())
t.barh("index", "Maternal Smoker")


births['Maternal Smoker'].value_counts().plot(kind = 'bar');


ms = births['Maternal Smoker'].value_counts();
plt.bar(ms.index, ms);


sns.countplot(data = births, x = 'Maternal Smoker');


import plotly.express as px
px.histogram(births, x = 'Maternal Smoker', color = 'Maternal Smoker')


sns.countplot(data = births, x = 'Maternal Pregnancy Weight');


sns.histplot(data = births, x = 'Maternal Pregnancy Weight');


px.histogram(births, x = 'Maternal Pregnancy Weight')


sns.histplot(data = births, x = 'Maternal Pregnancy Weight', bins = 20);
sns.rugplot(data = births, x = 'Maternal Pregnancy Weight', color = "red");


sns.histplot(data = births, x = 'Maternal Pregnancy Weight', kde = True);
sns.rugplot(data = births, x = 'Maternal Pregnancy Weight', color = "red");


plt.figure(figsize = (3, 6))
sns.boxplot(y = "Birth Weight", data = births);


bweights = births["Birth Weight"]
q1 = np.percentile(bweights, 25)
q2 = np.percentile(bweights, 50)
q3 = np.percentile(bweights, 75)
iqr = q3 - q1
whisk1 = q1 - 1.5*iqr
whisk2 = q3 + 1.5*iqr

whisk1, q1, q2, q3, whisk2

(73.5, 108.0, 120.0, 131.0, 165.5)


plt.figure(figsize = (3, 6))
sns.violinplot(y=births["Birth Weight"]);


plt.figure(figsize=(5, 8))
sns.boxplot(data=births, x = 'Maternal Smoker', y = 'Birth Weight');


plt.figure(figsize=(5, 8))
sns.violinplot(data=births, x = 'Maternal Smoker', y = 'Birth Weight');


births.head()


plt.scatter(births['Maternal Height'], births['Birth Weight']);
plt.xlabel('Maternal Height')
plt.ylabel('Birth Weight');


plt.scatter(data=births, x='Maternal Height', y='Birth Weight');
plt.xlabel('Maternal Height')
plt.ylabel('Birth Weight');


sns.scatterplot(data = births, x = 'Maternal Height', y = 'Birth Weight', hue = 'Maternal Smoker');


births["Maternal Height (jittered)"] = births["Maternal Height"] + np.random.uniform(-0.2, 0.2, len(births))
fig = sns.scatterplot(data = births, x = 'Maternal Height (jittered)', y = 'Birth Weight', hue = 'Maternal Smoker');


sns.lmplot(data = births, x = 'Maternal Height', y = 'Birth Weight', ci=False, hue='Maternal Smoker');


sns.jointplot(data = births, x = 'Maternal Height', y = 'Birth Weight');


sns.jointplot(data = births, x = 'Maternal Height', y = 'Birth Weight', hue='Maternal Smoker');


sns.jointplot(data = births, x = 'Maternal Height', y = 'Birth Weight', kind='hex');


sns.jointplot(data = births, x = 'Maternal Height', y = 'Birth Weight', kind='kde', fill=True);


sns.jointplot(data = births, x = 'Maternal Height', y = 'Birth Weight', kind='kde', hue='Maternal Smoker');


births.plot();

	Birth Weight	Gestational Days	Maternal Age	Maternal Height	Maternal Pregnancy Weight	Maternal Smoker
0	120	284	27	62	100	False
1	113	282	33	64	135	False
2	128	279	28	64	115	True
3	108	282	23	67	125	True
4	136	286	25	62	93	False

	Birth Weight	Gestational Days	Maternal Age	Maternal Height	Maternal Pregnancy Weight	Maternal Smoker
0	120	284	27	62	100	False
1	113	282	33	64	135	False
2	128	279	28	64	115	True
3	108	282	23	67	125	True
4	136	286	25	62	93	False

Lecture 7 – Data 100, Spring 2022¶

Bar Plots¶

Box Plots¶

Violin Plots¶

Side by side box plots and violin plots¶

Scatter plots¶

Hex plots and contour plots¶

Bonus¶