diff python/ml.py @ 382:ba813f148ade

development for clustering
author Nicolas Saunier <nicolas.saunier@polymtl.ca>
date Sun, 21 Jul 2013 10:23:15 -0400
parents adfd4f70ee1d
children 8bc632cb8344
line wrap: on
line diff
--- a/python/ml.py	Fri Jul 19 11:58:35 2013 -0400
+++ b/python/ml.py	Sun Jul 21 10:23:15 2013 -0400
@@ -54,7 +54,7 @@
         text(self.instance.position.x+1, self.instance.position.y+1, str(self.nInstances))
 
 
-def clustering(data, similar, initialCentroids = []):
+def assignCluster(data, similarFunc, initialCentroids = [], shuffleData = True):
     '''k-means algorithm with similarity function
     Two instances should be in the same cluster if the sameCluster function returns true for two instances. It is supposed that the average centroid of a set of instances can be computed, using the function. 
     The number of clusters will be determined accordingly
@@ -65,14 +65,15 @@
     from random import shuffle
     from copy import copy, deepcopy
     localdata = copy(data) # shallow copy to avoid modifying data
-    shuffle(localdata)
+    if shuffleData:
+        shuffle(localdata)
     if initialCentroids:
         centroids = deepcopy(initialCentroids)
     else:
         centroids = [Centroid(localdata[0])]
     for instance in localdata[1:]:
         i = 0
-        while i<len(centroids) and not similar(centroids[i].instance, instance):
+        while i<len(centroids) and not similarFunc(centroids[i].instance, instance):
             i += 1
         if i == len(centroids):
             centroids.append(Centroid(instance))
@@ -81,6 +82,8 @@
 
     return centroids
 
+# TODO recompute centroids for each cluster: instance that minimizes some measure to all other elements
+
 def spectralClustering(similarityMatrix, k, iter=20):
 	'''Spectral Clustering algorithm'''
 	n = len(similarityMatrix)