diff python/ml.py @ 731:b02431a8234c dev

made prototypecluster generic, in ml module, and added randominitialization
author Nicolas Saunier <nicolas.saunier@polymtl.ca>
date Tue, 11 Aug 2015 11:38:05 -0400
parents da1352b89d02
children 1d4dcb5c8708
line wrap: on
line diff
--- a/python/ml.py	Tue Aug 11 10:52:04 2015 -0400
+++ b/python/ml.py	Tue Aug 11 11:38:05 2015 -0400
@@ -111,13 +111,59 @@
 	code,distance = vq(features,centroids) # code starting from 0 (represent first cluster) to k-1 (last cluster)
 	return code,sigma	
 
-def motionPatterLearning(objects, maxDistance):
+def prototypeCluster(instances, similarityMatrix, minSimilarity, minClusterSize = None, randomInitialization = False):
+    '''Finds exemplar (prototype) instance that represent each cluster
+    Returns the prototype indices (in the instances list) and the cluster label of each instance
+
+    the elements in the instances list must have a length (method __len__), or one can use the random initialization
+    the positions in the instances list corresponds to the similarityMatrix
+
+    if an instance is different enough (<minSimilarity), 
+    it will become a new prototype. 
+    Non-prototype instances will be assigned to an existing prototype
+    if minClusterSize is not None, the clusters will be refined by removing iteratively the smallest clusters
+    and reassigning all elements in the cluster until no cluster is smaller than minClusterSize'''
+
+    # sort instances based on length
+    indices = range(similarityMatrix.shape[0])
+    if randomInitialization:
+        indices = np.random.permutation(indices)
+    else:
+        def compare(i, j):
+            if len(instances[i]) > len(instances[j]):
+                return -1
+            elif len(instances[i]) == len(instances[j]):
+                return 0
+            else:
+                return 1
+        indices.sort(compare)
+    # go through all instances
+    prototypeIndices = [indices[0]]
+    for i in indices[1:]:
+        if similarityMatrix[i][prototypeIndices].max() < minSimilarity:
+             prototypeIndices.append(i)
+
+    # assignment
+    indices = [i for i in range(similarityMatrix.shape[0]) if i not in prototypeIndices]
+    assign = True
+    while assign:
+        labels = [-1]*similarityMatrix.shape[0]
+        for i in prototypeIndices:
+            labels[i] = i
+        for i in indices:
+            prototypeIndex = similarityMatrix[i][prototypeIndices].argmax()
+            labels[i] = prototypeIndices[prototypeIndex]
+        clusterSizes = {i: sum(np.array(labels) == i) for i in prototypeIndices}
+        smallestClusterIndex = min(clusterSizes, key = clusterSizes.get) 
+        assign = (clusterSizes[smallestClusterIndex] < minClusterSize)
+        if assign:
+            prototypeIndices.remove(smallestClusterIndex)
+            indices.append(smallestClusterIndex)
+
+    return prototypeIndices, labels
+
+def motionPatternLearning(objects, maxDistance):
     ''' 
     Option to use only the (n?) longest features per object instead of all for speed up
     TODO'''
     pass
-
-def prototypeCluster():
-    ''' 
-    TODO'''
-    pass