pip install pandas
import pandas as pd
df = pd.read_csv('/home/pi/notebooks/openSAP_ds3_STORES_US.csv')
df.head()
pip install matplotlib sklearn
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans
kmeans = KMeans(n_clusters=3, random_state=0)
df['CLUSTER'] = kmeans.fit_predict(df[['TURNOVER', 'SIZE']])
plt.scatter(df.TURNOVER, df.SIZE, c=df.CLUSTER)
plt.xlabel('TURNOVER')
plt.ylabel('SIZE')
centroids = kmeans.cluster_centers_
cen_x = [i[0] for i in centroids]
cen_y = [i[1] for i in centroids]
df['cen_x'] = df.CLUSTER.map({0:cen_x[0], 1:cen_x[1], 2:cen_x[2]})
df['cen_y'] = df.CLUSTER.map({0:cen_y[0], 1:cen_y[1], 2:cen_y[2]})
for idx, val in df.iterrows():
x = [val.TURNOVER, val.cen_x]
y = [val.SIZE, val.cen_y]
plt.plot(x, y)
You must be a registered user to add a comment. If you've already registered, sign in. Otherwise, register and sign in.
User | Count |
---|---|
8 | |
5 | |
5 | |
4 | |
4 | |
4 | |
4 | |
3 | |
3 | |
3 |