comparison python/ml.py @ 382:ba813f148ade

development for clustering
author Nicolas Saunier <nicolas.saunier@polymtl.ca>
date Sun, 21 Jul 2013 10:23:15 -0400
parents adfd4f70ee1d
children 8bc632cb8344
comparison
equal deleted inserted replaced
381:387cc0142211 382:ba813f148ade
52 from matplotlib.pylab import text 52 from matplotlib.pylab import text
53 self.instance.draw(options) 53 self.instance.draw(options)
54 text(self.instance.position.x+1, self.instance.position.y+1, str(self.nInstances)) 54 text(self.instance.position.x+1, self.instance.position.y+1, str(self.nInstances))
55 55
56 56
57 def clustering(data, similar, initialCentroids = []): 57 def assignCluster(data, similarFunc, initialCentroids = [], shuffleData = True):
58 '''k-means algorithm with similarity function 58 '''k-means algorithm with similarity function
59 Two instances should be in the same cluster if the sameCluster function returns true for two instances. It is supposed that the average centroid of a set of instances can be computed, using the function. 59 Two instances should be in the same cluster if the sameCluster function returns true for two instances. It is supposed that the average centroid of a set of instances can be computed, using the function.
60 The number of clusters will be determined accordingly 60 The number of clusters will be determined accordingly
61 61
62 data: list of instances 62 data: list of instances
63 averageCentroid: ''' 63 averageCentroid: '''
64 64
65 from random import shuffle 65 from random import shuffle
66 from copy import copy, deepcopy 66 from copy import copy, deepcopy
67 localdata = copy(data) # shallow copy to avoid modifying data 67 localdata = copy(data) # shallow copy to avoid modifying data
68 shuffle(localdata) 68 if shuffleData:
69 shuffle(localdata)
69 if initialCentroids: 70 if initialCentroids:
70 centroids = deepcopy(initialCentroids) 71 centroids = deepcopy(initialCentroids)
71 else: 72 else:
72 centroids = [Centroid(localdata[0])] 73 centroids = [Centroid(localdata[0])]
73 for instance in localdata[1:]: 74 for instance in localdata[1:]:
74 i = 0 75 i = 0
75 while i<len(centroids) and not similar(centroids[i].instance, instance): 76 while i<len(centroids) and not similarFunc(centroids[i].instance, instance):
76 i += 1 77 i += 1
77 if i == len(centroids): 78 if i == len(centroids):
78 centroids.append(Centroid(instance)) 79 centroids.append(Centroid(instance))
79 else: 80 else:
80 centroids[i].add(instance) 81 centroids[i].add(instance)
81 82
82 return centroids 83 return centroids
84
85 # TODO recompute centroids for each cluster: instance that minimizes some measure to all other elements
83 86
84 def spectralClustering(similarityMatrix, k, iter=20): 87 def spectralClustering(similarityMatrix, k, iter=20):
85 '''Spectral Clustering algorithm''' 88 '''Spectral Clustering algorithm'''
86 n = len(similarityMatrix) 89 n = len(similarityMatrix)
87 # create Laplacian matrix 90 # create Laplacian matrix