Mercurial Hosting > traffic-intelligence
comparison python/ml.py @ 952:a9b2beef0db4
loading and assigning motion patterns works
author | Nicolas Saunier <nicolas.saunier@polymtl.ca> |
---|---|
date | Mon, 24 Jul 2017 21:22:18 -0400 |
parents | d6c1c05d11f5 |
children | 989917b1ed85 |
comparison
equal
deleted
inserted
replaced
951:2a4f174879dd | 952:a9b2beef0db4 |
---|---|
165 if assign: | 165 if assign: |
166 prototypeIndices.remove(smallestClusterIndex) | 166 prototypeIndices.remove(smallestClusterIndex) |
167 indices = [i for i in range(similarities.shape[0]) if labels[i] == smallestClusterIndex] | 167 indices = [i for i in range(similarities.shape[0]) if labels[i] == smallestClusterIndex] |
168 return prototypeIndices, labels | 168 return prototypeIndices, labels |
169 | 169 |
170 def prototypeCluster(instances, similarities, minSimilarity, similarityFunc = None, minClusterSize = 0, optimizeCentroid = True, randomInitialization = False, assign = True, initialPrototypeIndices = None): | 170 def prototypeCluster(instances, similarities, minSimilarity, similarityFunc = None, minClusterSize = 0, optimizeCentroid = True, randomInitialization = False, initialPrototypeIndices = None): |
171 '''Finds exemplar (prototype) instance that represent each cluster | 171 '''Finds exemplar (prototype) instance that represent each cluster |
172 Returns the prototype indices (in the instances list) and the cluster label of each instance | 172 Returns the prototype indices (in the instances list) |
173 | 173 |
174 the elements in the instances list must have a length (method __len__), or one can use the random initialization | 174 the elements in the instances list must have a length (method __len__), or one can use the random initialization |
175 the positions in the instances list corresponds to the similarities | 175 the positions in the instances list corresponds to the similarities |
176 if similarityFunc is provided, the similarities are calculated as needed (this is faster) if not in similarities (negative if not computed) | 176 if similarityFunc is provided, the similarities are calculated as needed (this is faster) if not in similarities (negative if not computed) |
177 similarities must still be allocated with the right size | 177 similarities must still be allocated with the right size |
234 clusterIndices = clusters[label] | 234 clusterIndices = clusters[label] |
235 clusterSimilarities = similarities[clusterIndices][:,clusterIndices] | 235 clusterSimilarities = similarities[clusterIndices][:,clusterIndices] |
236 newCentroidIdx = clusterIndices[clusterSimilarities.sum(0).argmax()] | 236 newCentroidIdx = clusterIndices[clusterSimilarities.sum(0).argmax()] |
237 if prototypeIndices[label] != newCentroidIdx: | 237 if prototypeIndices[label] != newCentroidIdx: |
238 prototypeIndices[label] = newCentroidIdx | 238 prototypeIndices[label] = newCentroidIdx |
239 elif randomInitialization: # replace prototype by current instance i if longer | 239 elif len(instances[prototypeIndices[label]]) < len(instances[i]): # replace prototype by current instance i if longer # otherwise, possible to test if randomInitialization or initialPrototypes is not None |
240 if len(instances[prototypeIndices[label]]) < len(instances[i]): | 240 prototypeIndices[label] = i |
241 prototypeIndices[label] = i | 241 return prototypeIndices |
242 | |
243 if assign: | |
244 return assignToPrototypeClusters(instances, prototypeIndices, similarities, minSimilarity, similarityFunc, minClusterSize) | |
245 else: | |
246 return prototypeIndices, None | |
247 | 242 |
248 def computeClusterSizes(labels, prototypeIndices, outlierIndex = -1): | 243 def computeClusterSizes(labels, prototypeIndices, outlierIndex = -1): |
249 clusterSizes = {i: sum(np.array(labels) == i) for i in prototypeIndices} | 244 clusterSizes = {i: sum(np.array(labels) == i) for i in prototypeIndices} |
250 clusterSizes['outlier'] = sum(np.array(labels) == outlierIndex) | 245 clusterSizes['outlier'] = sum(np.array(labels) == outlierIndex) |
251 return clusterSizes | 246 return clusterSizes |