repo/traffic-intelligence: python/ml.py comparison

comparison python/ml.py @ 731:b02431a8234c dev

made prototypecluster generic, in ml module, and added randominitialization

author	Nicolas Saunier <nicolas.saunier@polymtl.ca>
date	Tue, 11 Aug 2015 11:38:05 -0400
parents	da1352b89d02
children	1d4dcb5c8708

comparison

equal deleted inserted replaced

-:a850a4f92735
+:b02431a8234c
 	features = whiten(features)
 	centroids,distortion = kmeans(features,k, iter)
 	code,distance = vq(features,centroids) # code starting from 0 (represent first cluster) to k-1 (last cluster)
 	return code,sigma
-def motionPatterLearning(objects, maxDistance):
+def prototypeCluster(instances, similarityMatrix, minSimilarity, minClusterSize = None, randomInitialization = False):
+'''Finds exemplar (prototype) instance that represent each cluster
+Returns the prototype indices (in the instances list) and the cluster label of each instance
+the elements in the instances list must have a length (method __len__), or one can use the random initialization
+the positions in the instances list corresponds to the similarityMatrix
+if an instance is different enough (<minSimilarity),
+it will become a new prototype.
+Non-prototype instances will be assigned to an existing prototype
+if minClusterSize is not None, the clusters will be refined by removing iteratively the smallest clusters
+and reassigning all elements in the cluster until no cluster is smaller than minClusterSize'''
+# sort instances based on length
+indices = range(similarityMatrix.shape[0])
+if randomInitialization:
+indices = np.random.permutation(indices)
+else:
+def compare(i, j):
+if len(instances[i]) > len(instances[j]):
+return -1
+elif len(instances[i]) == len(instances[j]):
+return 0
+else:
+return 1
+indices.sort(compare)
+# go through all instances
+prototypeIndices = [indices[0]]
+for i in indices[1:]:
+if similarityMatrix[i][prototypeIndices].max() < minSimilarity:
+prototypeIndices.append(i)
+# assignment
+indices = [i for i in range(similarityMatrix.shape[0]) if i not in prototypeIndices]
+assign = True
+while assign:
+labels = [-1]*similarityMatrix.shape[0]
+for i in prototypeIndices:
+labels[i] = i
+for i in indices:
+prototypeIndex = similarityMatrix[i][prototypeIndices].argmax()
+labels[i] = prototypeIndices[prototypeIndex]
+clusterSizes = {i: sum(np.array(labels) == i) for i in prototypeIndices}
+smallestClusterIndex = min(clusterSizes, key = clusterSizes.get)
+assign = (clusterSizes[smallestClusterIndex] < minClusterSize)
+if assign:
+prototypeIndices.remove(smallestClusterIndex)
+indices.append(smallestClusterIndex)
+return prototypeIndices, labels
+def motionPatternLearning(objects, maxDistance):
 '''
 Option to use only the (n?) longest features per object instead of all for speed up
 TODO'''
 pass
-def prototypeCluster():
-'''
-TODO'''
-pass

Mercurial Hosting > traffic-intelligence

comparison python/ml.py @ 731:b02431a8234c dev