Mercurial Hosting > traffic-intelligence
comparison python/ml.py @ 382:ba813f148ade
development for clustering
author | Nicolas Saunier <nicolas.saunier@polymtl.ca> |
---|---|
date | Sun, 21 Jul 2013 10:23:15 -0400 |
parents | adfd4f70ee1d |
children | 8bc632cb8344 |
comparison
equal
deleted
inserted
replaced
381:387cc0142211 | 382:ba813f148ade |
---|---|
52 from matplotlib.pylab import text | 52 from matplotlib.pylab import text |
53 self.instance.draw(options) | 53 self.instance.draw(options) |
54 text(self.instance.position.x+1, self.instance.position.y+1, str(self.nInstances)) | 54 text(self.instance.position.x+1, self.instance.position.y+1, str(self.nInstances)) |
55 | 55 |
56 | 56 |
57 def clustering(data, similar, initialCentroids = []): | 57 def assignCluster(data, similarFunc, initialCentroids = [], shuffleData = True): |
58 '''k-means algorithm with similarity function | 58 '''k-means algorithm with similarity function |
59 Two instances should be in the same cluster if the sameCluster function returns true for two instances. It is supposed that the average centroid of a set of instances can be computed, using the function. | 59 Two instances should be in the same cluster if the sameCluster function returns true for two instances. It is supposed that the average centroid of a set of instances can be computed, using the function. |
60 The number of clusters will be determined accordingly | 60 The number of clusters will be determined accordingly |
61 | 61 |
62 data: list of instances | 62 data: list of instances |
63 averageCentroid: ''' | 63 averageCentroid: ''' |
64 | 64 |
65 from random import shuffle | 65 from random import shuffle |
66 from copy import copy, deepcopy | 66 from copy import copy, deepcopy |
67 localdata = copy(data) # shallow copy to avoid modifying data | 67 localdata = copy(data) # shallow copy to avoid modifying data |
68 shuffle(localdata) | 68 if shuffleData: |
69 shuffle(localdata) | |
69 if initialCentroids: | 70 if initialCentroids: |
70 centroids = deepcopy(initialCentroids) | 71 centroids = deepcopy(initialCentroids) |
71 else: | 72 else: |
72 centroids = [Centroid(localdata[0])] | 73 centroids = [Centroid(localdata[0])] |
73 for instance in localdata[1:]: | 74 for instance in localdata[1:]: |
74 i = 0 | 75 i = 0 |
75 while i<len(centroids) and not similar(centroids[i].instance, instance): | 76 while i<len(centroids) and not similarFunc(centroids[i].instance, instance): |
76 i += 1 | 77 i += 1 |
77 if i == len(centroids): | 78 if i == len(centroids): |
78 centroids.append(Centroid(instance)) | 79 centroids.append(Centroid(instance)) |
79 else: | 80 else: |
80 centroids[i].add(instance) | 81 centroids[i].add(instance) |
81 | 82 |
82 return centroids | 83 return centroids |
84 | |
85 # TODO recompute centroids for each cluster: instance that minimizes some measure to all other elements | |
83 | 86 |
84 def spectralClustering(similarityMatrix, k, iter=20): | 87 def spectralClustering(similarityMatrix, k, iter=20): |
85 '''Spectral Clustering algorithm''' | 88 '''Spectral Clustering algorithm''' |
86 n = len(similarityMatrix) | 89 n = len(similarityMatrix) |
87 # create Laplacian matrix | 90 # create Laplacian matrix |