mardi 14 juin 2016

how do you perfrom kmean clustering on a list, using python? [on hold]

how do I perform kmeans on the list b and display the clusters. -the ideal student can be something like 1234 , 12345 , 123 -i have tried to plot a graph using matplotlib

import itertools
# Getiing the ideal Student Path
n=input("Enter the Ideal Student Path : ")
# Calculating all the permutations of the ideal student path
a = list(map("".join,itertools.permutations(n)))
# Writing all the permutations to a file
c = open('Permutations.txt','w')
c.write("The number being permutated : " + n + "n")
c.write("List of Permutations :"+"n")
num_of_permutations = 0;
for d in a:
    if d == n:
        continue
    else:
        num_of_permutations = num_of_permutations + 1
        c.write(str(d) + "n")

c.write("Total number of Permutations : " +str(num_of_permutations) + "n")
print("Permutations generated successfully.")
c.close()
# Opening new file for storing Inverse Permutation Values
p = open('InversePermutation.txt','w')
p.write("List of Inverse Permutations of" + " " + n + " "+ "along with their respective bubblesort distance."+"n")
p.write("Total number of data points :" + " " +str(num_of_permutations) + "n")
# Calculating the Bubblesort distance of every permutation and wrting to a file
b = []
bubblesort_distance = 0
for x in a:
    bubblesort_distance = 0
    for i in range(1,len(x)):   
        for j in range(0,i):
            if(x[i]<x[j]):
                bubblesort_distance = bubblesort_distance + 1
               # print("First Condition True for ", x , "at i =" , i , "and j =" , j)
                if i == len(x)-1 and j == len(x)-2:
                    if x == n:
                        continue                    
                    else:
                        p.write(x + " " +"-"+" "+str(bubblesort_distance)+ "n")
                #        print("Final values written to file at " ,x , "at i =" , i , "and j =" , j)
                        b.append(bubblesort_distance)
                #else: 
                 #       print("All conditions failed" , x , "at i =" , i , "and j =" , j)
print ("File Updation Successful.")

p.close()
import copy
c = copy.deepcopy(b)

Applying the k-Mean Algorithm using kmeans module from Scikit

import numpy as np
import matplotlib.pyplot as plt
from matplotlib import style
style.use("ggplot")
from sklearn.cluster import KMeans

X = np.array(b])
kmeans = KMeans(n_clusters=2)
kmeans.fit(X) 

centroids = kmeans.cluster_centers_
labels = kmeans.labels_

print(centroids)
print(labels)

colors = ["g.","r."]
"""
for i in range(len(X)):
    plt.plot(X[i][0 ], X[i][1], colors[labels[i]], markersize = 10)

plt.scatter(centroids[:, 0],centroids[:, 1], marker ="x",s=150, linewidths = 5, zorder = 10)
"""
plt.show()

Aucun commentaire:

Enregistrer un commentaire