changeset 728:4e89341edd29 dev

added the capability to enforce a minimum cluster size and re-assign the indicators to other clusters
author Nicolas Saunier <nicolas.saunier@polymtl.ca>
date Mon, 10 Aug 2015 17:51:49 -0400
parents 85af65b6d531
children dad99b86a104
files python/events.py
diffstat 1 files changed, 19 insertions(+), 8 deletions(-) [+]
line wrap: on
line diff
--- a/python/events.py	Sat Jul 25 23:28:52 2015 -0400
+++ b/python/events.py	Mon Aug 10 17:51:49 2015 -0400
@@ -295,13 +295,15 @@
         print('unknown type of point: '+pointType)
     return allPoints
 
-def prototypeCluster(interactions, similarityMatrix, alignmentMatrix, indicatorName, minSimilarity):
+def prototypeCluster(interactions, similarityMatrix, alignmentMatrix, indicatorName, minSimilarity, minClusterSize = None):
     '''Finds exemplar indicator time series for all interactions
     Returns the prototype indices (in the interaction list) and the label of each indicator (interaction)
 
     if an indicator profile (time series) is different enough (<minSimilarity), 
     it will become a new prototype. 
-    Non-prototype interactions will be assigned to an existing prototype'''
+    Non-prototype interactions will be assigned to an existing prototype
+    if minClusterSize is not None, the clusters will be refined by removing iteratively the smallest clusters
+    and reassigning all elements in the cluster until no cluster is smaller than minClusterSize'''
 
     # sort indicators based on length
     indices = range(similarityMatrix.shape[0])
@@ -320,13 +322,22 @@
              prototypeIndices.append(i)
 
     # assignment
-    labels = [-1]*similarityMatrix.shape[0]
     indices = [i for i in range(similarityMatrix.shape[0]) if i not in prototypeIndices]
-    for i in prototypeIndices:
-        labels[i] = i
-    for i in indices:
-        prototypeIndex = similarityMatrix[i][prototypeIndices].argmax()
-        labels[i] = prototypeIndices[prototypeIndex]
+    assign = True
+    while assign:
+        labels = [-1]*similarityMatrix.shape[0]
+        for i in prototypeIndices:
+            labels[i] = i
+        for i in indices:
+            prototypeIndex = similarityMatrix[i][prototypeIndices].argmax()
+            labels[i] = prototypeIndices[prototypeIndex]
+        clusterSizes = {i: sum(np.array(labels) == i) for i in prototypeIndices}
+        smallestClusterIndex = min(clusterSizes, key = clusterSizes.get) 
+        assign = (clusterSizes[smallestClusterIndex] < minClusterSize)
+        print prototypeIndices, smallestClusterIndex, clusterSizes[smallestClusterIndex]
+        if assign:
+            prototypeIndices.remove(smallestClusterIndex)
+            indices.append(smallestClusterIndex)
 
     return prototypeIndices, labels