by Josh Hug (Fall 2019)
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import random
iris = pd.read_csv("iris.csv")
iris.sample(10)
iris = iris.drop("species", axis = 1)
iris.sample(10)
sns.scatterplot(data = iris, x = "petal_length", y= "petal_width", color="black")
plt.xlabel('x')
plt.ylabel('y');
#plt.savefig('2d_data_needing_clustering.png', dpi = 300, bbox_inches = "tight")
class Center():
def __init__(self, data):
"""generates a random center inside the region bounded by the data"""
num_dimensions = data.shape[1]
self.coordinates = np.array([0.0] * num_dimensions)
for i in range(num_dimensions):
min_value = np.min(data[:, i])
max_value = np.max(data[:, i])
random_value = random.uniform(min_value, max_value)
self.coordinates[i] = random_value
def __str__(self):
return str(self.coordinates)
def __repr__(self):
return repr(self.coordinates)
def dist(self, data_point):
return np.sqrt(np.sum((self.coordinates - data_point)**2, axis = 1))
def dist_sq(self, data_point):
return np.sum((self.coordinates - data_point)**2, axis = 1)
c1 = Center(iris.loc[:, ['petal_length', 'petal_width']].values)
c2 = Center(iris.loc[:, ['petal_length', 'petal_width']].values)
# force coordinates from the lecture demo
c1.coordinates = np.array([2.52364007, 2.31040024])
c2.coordinates = np.array([6.53276402, 1.211463])
def plot_centers_and_black_data(iris, centers):
sns.scatterplot(data = iris, x = "petal_length", y= "petal_width", color="black")
for center in centers:
plt.plot(center.coordinates[0], center.coordinates[1], '*', markersize = 10)
plt.xlabel('petal_length')
plt.ylabel('petal_width')
legend_text = ['c' + str(i) for i in range(1, len(centers) + 1)]
legend_text.append('data')
plt.legend(legend_text)
plot_centers_and_black_data(iris, (c1, c2))
#plt.savefig('2means_demo_initial_placement.png', dpi = 300, bbox_inches = "tight")
def get_cluster_number(dists):
return np.where(dists == np.min(dists))[0][0]
iris["dist1"] = c1.dist(iris[["petal_length", "petal_width"]])
iris["dist2"] = c2.dist(iris[["petal_length", "petal_width"]])
iris["cluster"] = iris[["dist1", "dist2"]].apply(get_cluster_number, axis = 1)
iris.head(10)
iris["cluster"].value_counts()
def plot_centers_and_colorized_data(iris, centers):
current_palette = sns.color_palette()[0:len(centers)]
sns.scatterplot(data = iris, x = "petal_length", y= "petal_width", hue="cluster", palette=current_palette)
for center in centers:
plt.plot(center.coordinates[0], center.coordinates[1], '*', markersize = 10)
plt.xlabel('petal_length')
plt.ylabel('petal_width')
legend_text = ['c' + str(i) for i in range(1, len(centers) + 1)]
plt.legend(legend_text)
plot_centers_and_colorized_data(iris, (c1, c2))
#plt.savefig('2means_demo_initial_placement_colored.png', dpi = 300, bbox_inches = "tight")
average_c1_length = np.mean(iris[iris["cluster"] == 0])["petal_length"]
average_c1_width = np.mean(iris[iris["cluster"] == 0])["petal_width"]
c1.coordinates = (average_c1_length, average_c1_width)
average_c2_length = np.mean(iris[iris["cluster"] == 1])["petal_length"]
average_c2_width = np.mean(iris[iris["cluster"] == 1])["petal_width"]
c2.coordinates = (average_c2_length, average_c2_width)
plot_centers_and_black_data(iris, (c1, c2))
#plt.savefig('2means_demo_center_position_2.png', dpi = 300, bbox_inches = "tight")
iris["dist1"] = c1.dist(iris[["petal_length", "petal_width"]])
iris["dist2"] = c2.dist(iris[["petal_length", "petal_width"]])
iris["cluster"] = iris[["dist1", "dist2"]].apply(get_cluster_number, axis = 1)
plot_centers_and_colorized_data(iris, (c1, c2))
#plt.savefig('2means_demo_center_position_2_colorized.png', dpi = 300, bbox_inches = "tight")
average_c1_length = np.mean(iris[iris["cluster"] == 0])["petal_length"]
average_c1_width = np.mean(iris[iris["cluster"] == 0])["petal_width"]
c1.coordinates = (average_c1_length, average_c1_width)
average_c2_length = np.mean(iris[iris["cluster"] == 1])["petal_length"]
average_c2_width = np.mean(iris[iris["cluster"] == 1])["petal_width"]
c2.coordinates = (average_c2_length, average_c2_width)
plot_centers_and_black_data(iris, (c1, c2))
#plt.savefig('2means_demo_center_position_3.png', dpi = 300, bbox_inches = "tight")
iris["dist1"] = c1.dist(iris[["petal_length", "petal_width"]])
iris["dist2"] = c2.dist(iris[["petal_length", "petal_width"]])
iris["cluster"] = iris[["dist1", "dist2"]].apply(get_cluster_number, axis = 1)
plot_centers_and_colorized_data(iris, (c1, c2))
#plt.savefig('2means_demo_center_position_3_colorized.png', dpi = 300, bbox_inches = "tight")
average_c1_length = np.mean(iris[iris["cluster"] == 0])["petal_length"]
average_c1_width = np.mean(iris[iris["cluster"] == 0])["petal_width"]
c1.coordinates = (average_c1_length, average_c1_width)
average_c2_length = np.mean(iris[iris["cluster"] == 1])["petal_length"]
average_c2_width = np.mean(iris[iris["cluster"] == 1])["petal_width"]
c2.coordinates = (average_c2_length, average_c2_width)
plot_centers_and_black_data(iris, (c1, c2))
#plt.savefig('2means_demo_center_position_4.png', dpi = 300, bbox_inches = "tight")
iris["dist1"] = c1.dist(iris[["petal_length", "petal_width"]])
iris["dist2"] = c2.dist(iris[["petal_length", "petal_width"]])
iris["cluster"] = iris[["dist1", "dist2"]].apply(get_cluster_number, axis = 1)
plot_centers_and_colorized_data(iris, (c1, c2))
#plt.savefig('2means_demo_center_position_4_colorized.png', dpi = 300, bbox_inches = "tight")
average_c1_length = np.mean(iris[iris["cluster"] == 0])["petal_length"]
average_c1_width = np.mean(iris[iris["cluster"] == 0])["petal_width"]
c1.coordinates = (average_c1_length, average_c1_width)
average_c2_length = np.mean(iris[iris["cluster"] == 1])["petal_length"]
average_c2_width = np.mean(iris[iris["cluster"] == 1])["petal_width"]
c2.coordinates = (average_c2_length, average_c2_width)
plot_centers_and_black_data(iris, (c1, c2))
#plt.savefig('2means_demo_center_position_5.png', dpi = 300, bbox_inches = "tight")
iris["dist1"] = c1.dist(iris[["petal_length", "petal_width"]])
iris["dist2"] = c2.dist(iris[["petal_length", "petal_width"]])
iris["cluster"] = iris[["dist1", "dist2"]].apply(get_cluster_number, axis = 1)
plot_centers_and_colorized_data(iris, (c1, c2))
# plt.savefig('2means_demo_center_position_5_colorized.png', dpi = 300, bbox_inches = "tight")
average_c1_length = np.mean(iris[iris["cluster"] == 0])["petal_length"]
average_c1_width = np.mean(iris[iris["cluster"] == 0])["petal_width"]
c1.coordinates = (average_c1_length, average_c1_width)
average_c2_length = np.mean(iris[iris["cluster"] == 1])["petal_length"]
average_c2_width = np.mean(iris[iris["cluster"] == 1])["petal_width"]
c2.coordinates = (average_c2_length, average_c2_width)
plot_centers_and_black_data(iris, (c1, c2))
# plt.savefig('2means_demo_center_position_6.png', dpi = 300, bbox_inches = "tight")
average_c1_length = np.mean(iris[iris["cluster"] == 0])["petal_length"]
average_c1_width = np.mean(iris[iris["cluster"] == 0])["petal_width"]
c1.coordinates = (average_c1_length, average_c1_width)
average_c2_length = np.mean(iris[iris["cluster"] == 1])["petal_length"]
average_c2_width = np.mean(iris[iris["cluster"] == 1])["petal_width"]
c2.coordinates = (average_c2_length, average_c2_width)
plot_centers_and_colorized_data(iris, (c1, c2))
# plt.savefig('2means_demo_center_position_6_colorized.png', dpi = 300, bbox_inches = "tight")
import copy
def compute_centers_after_N_iterations(data, column_names, centers, N):
centers = copy.deepcopy(centers)
for i in range(N):
#recompute clusters
dist_names = []
for center_num in range(len(centers)):
data["dist" + str(center_num)] = centers[center_num].dist(data[column_names])
dist_names.append("dist" + str(center_num))
data["cluster"] = data[dist_names].apply(get_cluster_number, axis = 1)
#update centers
for center_num in range(len(centers)):
for col_num in range(len(column_names)):
col_name = column_names[col_num]
centers[center_num].coordinates[col_num] = np.mean(data[data["cluster"] == center_num])[col_name]
return centers
c1 = Center(iris.loc[:, ['petal_length', 'petal_width']].values)
c2 = Center(iris.loc[:, ['petal_length', 'petal_width']].values)
c1.coordinates = np.array([2.52364007, 2.31040024])
c2.coordinates = np.array([6.53276402, 1.211463])
iris
def inertia(data, centers):
total_inertia = 0
for center_num in range(len(centers)):
data_in_this_cluster = data[data["cluster"] == center_num]
total_inertia += np.sum(centers[center_num].dist(data_in_this_cluster[["petal_length", "petal_width"]]))
return total_inertia
def distortion(data, centers):
total_distortion = 0
for center_num in range(len(centers)):
data_in_this_cluster = data[data["cluster"] == center_num]
total_distortion += np.sum(centers[center_num].dist(data_in_this_cluster[["petal_length", "petal_width"]]))/len(data_in_this_cluster)
return total_distortion
random.seed(25)
c1 = Center(iris.loc[:, ['petal_length', 'petal_width']].values)
c2 = Center(iris.loc[:, ['petal_length', 'petal_width']].values)
c3 = Center(iris.loc[:, ['petal_length', 'petal_width']].values)
c4 = Center(iris.loc[:, ['petal_length', 'petal_width']].values)
new_centers = compute_centers_after_N_iterations(iris, ['petal_length', 'petal_width'], [c1, c2, c3, c4], 12)
print(f"inertia: {inertia(iris, new_centers)}, distortion: {distortion(iris, new_centers)})")
plot_centers_and_colorized_data(iris, new_centers)
# plt.savefig("k4_example1.png", bbox_inches = "tight", dpi=300)
random.seed(29)
c1 = Center(iris.loc[:, ['petal_length', 'petal_width']].values)
c2 = Center(iris.loc[:, ['petal_length', 'petal_width']].values)
c3 = Center(iris.loc[:, ['petal_length', 'petal_width']].values)
c4 = Center(iris.loc[:, ['petal_length', 'petal_width']].values)
new_centers = compute_centers_after_N_iterations(iris, ['petal_length', 'petal_width'], [c1, c2, c3, c4], 12)
print(f"inertia: {inertia(iris, new_centers)}, distortion: {distortion(iris, new_centers)})")
plot_centers_and_colorized_data(iris, new_centers)
# plt.savefig("k4_example2.png", bbox_inches = "tight", dpi=300)
random.seed(40)
c1 = Center(iris.loc[:, ['petal_length', 'petal_width']].values)
c2 = Center(iris.loc[:, ['petal_length', 'petal_width']].values)
c3 = Center(iris.loc[:, ['petal_length', 'petal_width']].values)
c4 = Center(iris.loc[:, ['petal_length', 'petal_width']].values)
new_centers = compute_centers_after_N_iterations(iris, ['petal_length', 'petal_width'], [c1, c2, c3, c4], 12)
print(f"inertia: {inertia(iris, new_centers)}, distortion: {distortion(iris, new_centers)})")
plot_centers_and_colorized_data(iris, new_centers)
# plt.savefig("k4_example3.png", bbox_inches = "tight", dpi=300)
random.seed(75)
c1 = Center(iris.loc[:, ['petal_length', 'petal_width']].values)
c2 = Center(iris.loc[:, ['petal_length', 'petal_width']].values)
c3 = Center(iris.loc[:, ['petal_length', 'petal_width']].values)
c4 = Center(iris.loc[:, ['petal_length', 'petal_width']].values)
new_centers = compute_centers_after_N_iterations(iris, ['petal_length', 'petal_width'], [c1, c2, c3, c4], 12)
print(f"inertia: {inertia(iris, new_centers)}, distortion: {distortion(iris, new_centers)})")
plot_centers_and_colorized_data(iris, new_centers)
# plt.savefig("k4_example4.png", bbox_inches = "tight", dpi=300)
random.seed(20)
np.random.seed(20)
iris_small = iris.sample(7)
c1 = Center(iris_small.loc[:, ['petal_length', 'petal_width']].values)
c2 = Center(iris_small.loc[:, ['petal_length', 'petal_width']].values)
new_centers2 = compute_centers_after_N_iterations(iris_small, ['petal_length', 'petal_width'], [c1, c2], 12)
plot_centers_and_colorized_data(iris_small, new_centers2)
# plt.savefig("distortion_computation.png", dpi = 300, bbox_inches = "tight")
def print_distances_squared(data, centers):
for center_num in range(len(centers)):
data_in_this_cluster = data[data["cluster"] == center_num]
print(centers[center_num].dist(data_in_this_cluster[["petal_length", "petal_width"]])**2)
print_distances_squared(iris_small, new_centers2)
inertia(iris_small, new_centers2)
distortion(iris_small, new_centers2)
iris_small
c1.coordinates = [1.2, 0.15]
c2.coordinates = [4.906000000000001, 1.6760000000000006]
iris["dist1"] = c1.dist(iris[["petal_length", "petal_width"]])
iris["dist2"] = c2.dist(iris[["petal_length", "petal_width"]])
iris["cluster"] = iris[["dist1", "dist2"]].apply(get_cluster_number, axis = 1)
plot_centers_and_colorized_data(iris, (c1, c2))
#plt.savefig("intuitive_clustering.png", dpi=300, bbox_inches = "tight")
print(f"inertia: {inertia(iris, [c1, c2])}, distortion: {distortion(iris, [c1, c2])})")
average_c1_length = np.mean(iris[iris["cluster"] == 0])["petal_length"]
average_c1_width = np.mean(iris[iris["cluster"] == 0])["petal_width"]
c1.coordinates = (average_c1_length, average_c1_width)
average_c2_length = np.mean(iris[iris["cluster"] == 1])["petal_length"]
average_c2_width = np.mean(iris[iris["cluster"] == 1])["petal_width"]
c2.coordinates = (average_c2_length, average_c2_width)
plot_centers_and_black_data(iris, (c1, c2))
iris["dist1"] = c1.dist(iris[["petal_length", "petal_width"]])
iris["dist2"] = c2.dist(iris[["petal_length", "petal_width"]])
iris["cluster"] = iris[["dist1", "dist2"]].apply(get_cluster_number, axis = 1)
plot_centers_and_colorized_data(iris, (c1, c2))
print(f"inertia: {inertia(iris, [c1, c2])}, distortion: {distortion(iris, [c1, c2])})")
random.seed(42)
np.random.seed(42)
iris_small = iris.sample(13).loc[:, 'sepal_length':'petal_width'].reset_index(drop=True)
iris_small = iris_small.drop(8).reset_index(drop=True)
sns.scatterplot(data = iris_small, x = "petal_length", y= "petal_width", color="black");
iris_small["cluster"] = np.array(range(0, len(iris_small)))
iris_small
def plot_clusters(data):
p1 = sns.scatterplot(data = data, x = "petal_length", y= "petal_width")
for line in range(0,data.shape[0]):
p1.text(data["petal_length"][line]+0.05, data["petal_width"][line] - 0.03,
data["cluster"][line], horizontalalignment='left',
size='medium', color='black', weight='semibold')
plot_clusters(iris_small)
#plt.savefig("agglomerative_start.png", dpi=300, bbox_inches = "tight")
from scipy.spatial import distance
def dist_between_clusters(data, cnum1, cnum2):
cluster1 = data[data["cluster"] == cnum1]
cluster2 = data[data["cluster"] == cnum2]
return distance.cdist(cluster1[["petal_length", "petal_width"]], cluster2[["petal_length", "petal_width"]]).max()
def closest_clusters(data):
cluster_values = data["cluster"].unique()
smallest_distance = float("inf")
best_pair = [-1, -1]
for cnum1 in cluster_values:
for cnum2 in cluster_values:
if cnum1 == cnum2:
continue
cur_dist = dist_between_clusters(data, cnum1, cnum2)
if cur_dist < smallest_distance:
best_pair = [cnum1, cnum2]
smallest_distance = cur_dist
return best_pair
def merge_clusters(data, cnum1, cnum2):
data.loc[data["cluster"] == cnum2, "cluster"] = cnum1
i = 0
while len(iris_small["cluster"].unique()) != 2:
i += 1
cnum1, cnum2 = closest_clusters(iris_small)
merge_clusters(iris_small, cnum1, cnum2)
plot_clusters(iris_small)
# plt.savefig(f"agglomerative_merge{i}.png", dpi=300, bbox_inches = "tight")
iris_small = iris.copy()
iris_small["cluster"] = np.array(range(0, len(iris_small)))
plot_clusters(iris_small)
#my code is too slow
#i = 0
#while len(iris_small["cluster"].unique()) != 2:
# i += 1
# print(i)
# cnum1, cnum2 = closest_clusters(iris_small)
# merge_clusters(iris_small, cnum1, cnum2)
#plot_clusters(iris_small)
#plt.savefig(f"agglomerative_merge{i}.png", dpi=300, bbox_inches = "tight")
#plt.clf()
from sklearn.cluster import AgglomerativeClustering
clustering = AgglomerativeClustering().fit(iris[["petal_length", "petal_width"]])
clustering.labels_
iris["cluster"] = clustering.labels_
sns.scatterplot(data = iris, x = "petal_length", y= "petal_width", hue ="cluster", legend = None);
#plt.savefig("agglomerative_output.png", dpi = 300, bbox_inches = "tight")
plot_clusters(iris)
# plt.savefig("agglomerative_output_numbers.png", dpi = 300, bbox_inches = "tight")
#from https://github.com/scikit-learn/scikit-learn/blob/70cf4a676caa2d2dad2e3f6e4478d64bcb0506f7/examples/cluster/plot_hierarchical_clustering_dendrogram.py
from scipy.cluster.hierarchy import dendrogram
def plot_dendrogram(model, **kwargs):
# Children of hierarchical clustering
children = model.children_
# Distances between each pair of children
# Since we don't have this information, we can use a uniform one for plotting
distance = np.arange(children.shape[0])
# The number of observations contained in each cluster level
no_of_observations = np.arange(2, children.shape[0]+2)
# Create linkage matrix and then plot the dendrogram
linkage_matrix = np.column_stack([children, distance, no_of_observations]).astype(float)
# Plot the corresponding dendrogram
dendrogram(linkage_matrix, **kwargs)
plt.title('Hierarchical Clustering Dendrogram')
plot_dendrogram(clustering, labels=clustering.labels_)
# plt.savefig("dendrogram.png", bbox_inches = "tight", dpi = 300)
from sklearn.cluster import KMeans
distortions = []
inertias = []
mapping1 = {}
mapping2 = {}
K = range(1,10)
X = iris[["petal_length", "petal_width"]]
for k in K:
#Building and fitting the model
kmeanModel = KMeans(n_clusters=k).fit(X)
distortions.append(sum(np.min(distance.cdist(X, kmeanModel.cluster_centers_,
'euclidean'),axis=1)) / X.shape[0])
inertias.append(kmeanModel.inertia_)
mapping1[k] = sum(np.min(distance.cdist(X, kmeanModel.cluster_centers_,
'euclidean'),axis=1)) / X.shape[0]
mapping2[k] = kmeanModel.inertia_
plt.plot(K, distortions, 'bx-')
plt.xlabel('Values of K')
plt.ylabel('Distortion')
plt.title('The Elbow Method using Distortion') ;
# plt.savefig("elbow.png", dpi=300, bbox_inches = "tight")
X.query("petal_length < 3.2 and petal_length > 2")
from sklearn.metrics import silhouette_samples, silhouette_score
import matplotlib.cm as cm
fig, ax1 = plt.subplots(1, 1)
#fig.set_size_inches(18, 7)
n_clusters = 2
# The 1st subplot is the silhouette plot
# The silhouette coefficient can range from -1, 1 but in this example all
# lie within [-0.1, 1]
ax1.set_xlim([-0.1, 1])
# The (n_clusters+1)*10 is for inserting blank space between silhouette
# plots of individual clusters, to demarcate them clearly.
ax1.set_ylim([0, len(X) + (n_clusters + 1) * 10])
cluster_labels = clustering.fit_predict(X)
# The silhouette_score gives the average value for all the samples.
# This gives a perspective into the density and separation of the formed
# clusters
silhouette_avg = silhouette_score(X, cluster_labels)
print("For n_clusters =", n_clusters,
"The average silhouette_score is :", silhouette_avg)
# Compute the silhouette scores for each sample
sample_silhouette_values = silhouette_samples(X, cluster_labels)
y_lower = 10
for i in range(n_clusters):
# Aggregate the silhouette scores for samples belonging to
# cluster i, and sort them
ith_cluster_silhouette_values = \
sample_silhouette_values[cluster_labels == i]
ith_cluster_silhouette_values.sort()
size_cluster_i = ith_cluster_silhouette_values.shape[0]
y_upper = y_lower + size_cluster_i
color = cm.nipy_spectral(float(i) / n_clusters)
ax1.fill_betweenx(np.arange(y_lower, y_upper),
0, ith_cluster_silhouette_values,
facecolor=color, edgecolor=color, alpha=0.7)
# Label the silhouette plots with their cluster numbers at the middle
ax1.text(-0.05, y_lower + 0.5 * size_cluster_i, str(i))
# Compute the new y_lower for next plot
y_lower = y_upper + 10 # 10 for the 0 samples
ax1.set_title("The silhouette plot for the various clusters.")
ax1.set_xlabel("The silhouette coefficient values")
ax1.set_ylabel("Cluster label")
# The vertical line for average silhouette score of all the values
ax1.axvline(x=silhouette_avg, color="red", linestyle="--");
# plt.savefig("silhoutte_plot.png", dpi=300, bbox_inches = "tight")
from sklearn.cluster import AgglomerativeClustering
clustering = AgglomerativeClustering(n_clusters = 3).fit(iris[["petal_length", "petal_width"]])
fig, ax1 = plt.subplots(1, 1)
#fig.set_size_inches(18, 7)
n_clusters = 3
# The 1st subplot is the silhouette plot
# The silhouette coefficient can range from -1, 1 but in this example all
# lie within [-0.1, 1]
ax1.set_xlim([-0.1, 1])
# The (n_clusters+1)*10 is for inserting blank space between silhouette
# plots of individual clusters, to demarcate them clearly.
ax1.set_ylim([0, len(X) + (n_clusters + 1) * 10])
cluster_labels = clustering.fit_predict(X)
# The silhouette_score gives the average value for all the samples.
# This gives a perspective into the density and separation of the formed
# clusters
silhouette_avg = silhouette_score(X, cluster_labels)
print("For n_clusters =", n_clusters,
"The average silhouette_score is :", silhouette_avg)
# Compute the silhouette scores for each sample
sample_silhouette_values = silhouette_samples(X, cluster_labels)
y_lower = 10
for i in range(n_clusters):
# Aggregate the silhouette scores for samples belonging to
# cluster i, and sort them
ith_cluster_silhouette_values = \
sample_silhouette_values[cluster_labels == i]
ith_cluster_silhouette_values.sort()
size_cluster_i = ith_cluster_silhouette_values.shape[0]
y_upper = y_lower + size_cluster_i
color = cm.nipy_spectral(float(i) / n_clusters)
ax1.fill_betweenx(np.arange(y_lower, y_upper),
0, ith_cluster_silhouette_values,
facecolor=color, edgecolor=color, alpha=0.7)
# Label the silhouette plots with their cluster numbers at the middle
ax1.text(-0.05, y_lower + 0.5 * size_cluster_i, str(i))
# Compute the new y_lower for next plot
y_lower = y_upper + 10 # 10 for the 0 samples
ax1.set_title("The silhouette plot for the various clusters.")
ax1.set_xlabel("The silhouette coefficient values")
ax1.set_ylabel("Cluster label")
# The vertical line for average silhouette score of all the values
ax1.axvline(x=silhouette_avg, color="red", linestyle="--");
# plt.savefig("silhoutte_plot_k3.png", dpi=300, bbox_inches = "tight")
min(sample_silhouette_values)
iris["cluster"] = cluster_labels
current_palette = sns.color_palette()[0:3]
sns.scatterplot(data = iris, x = "petal_length", y= "petal_width", hue="cluster", palette = current_palette);
# plt.savefig("iris_3_class_agglomerative.png", dpi = 300, bbox_inches = "tight")