Mercurial Hosting > traffic-intelligence
diff python/ml.py @ 382:ba813f148ade
development for clustering
author | Nicolas Saunier <nicolas.saunier@polymtl.ca> |
---|---|
date | Sun, 21 Jul 2013 10:23:15 -0400 |
parents | adfd4f70ee1d |
children | 8bc632cb8344 |
line wrap: on
line diff
--- a/python/ml.py Fri Jul 19 11:58:35 2013 -0400 +++ b/python/ml.py Sun Jul 21 10:23:15 2013 -0400 @@ -54,7 +54,7 @@ text(self.instance.position.x+1, self.instance.position.y+1, str(self.nInstances)) -def clustering(data, similar, initialCentroids = []): +def assignCluster(data, similarFunc, initialCentroids = [], shuffleData = True): '''k-means algorithm with similarity function Two instances should be in the same cluster if the sameCluster function returns true for two instances. It is supposed that the average centroid of a set of instances can be computed, using the function. The number of clusters will be determined accordingly @@ -65,14 +65,15 @@ from random import shuffle from copy import copy, deepcopy localdata = copy(data) # shallow copy to avoid modifying data - shuffle(localdata) + if shuffleData: + shuffle(localdata) if initialCentroids: centroids = deepcopy(initialCentroids) else: centroids = [Centroid(localdata[0])] for instance in localdata[1:]: i = 0 - while i<len(centroids) and not similar(centroids[i].instance, instance): + while i<len(centroids) and not similarFunc(centroids[i].instance, instance): i += 1 if i == len(centroids): centroids.append(Centroid(instance)) @@ -81,6 +82,8 @@ return centroids +# TODO recompute centroids for each cluster: instance that minimizes some measure to all other elements + def spectralClustering(similarityMatrix, k, iter=20): '''Spectral Clustering algorithm''' n = len(similarityMatrix)