changeset 735:0e875a7f5759 dev

modified prototypeCluster algorithm to enforce similarity when re-assigning and to compute only the necessary similarities
author Nicolas Saunier <nicolas.saunier@polymtl.ca>
date Wed, 12 Aug 2015 00:24:06 -0400
parents 1d4dcb5c8708
children 967d244968a4
files python/events.py python/ml.py scripts/learn-motion-patterns.py
diffstat 3 files changed, 38 insertions(+), 18 deletions(-) [+]
line wrap: on
line diff
--- a/python/events.py	Tue Aug 11 12:55:09 2015 -0400
+++ b/python/events.py	Wed Aug 12 00:24:06 2015 -0400
@@ -295,8 +295,8 @@
         print('unknown type of point: '+pointType)
     return allPoints
 
-def prototypeCluster(interactions, similarityMatrix, indicatorName, minSimilarity, minClusterSize = None, randomInitialization = False):
-    return ml.prototypeCluster([inter.getIndicator(indicatorName) for inter in interactions], similarityMatrix, minSimilarity, minClusterSize, randomInitialization)
+def prototypeCluster(interactions, similarities, indicatorName, minSimilarity, similarityFunc = None, minClusterSize = None, randomInitialization = False):
+    return ml.prototypeCluster([inter.getIndicator(indicatorName) for inter in interactions], similarities, minSimilarity, similarityFunc, minClusterSize, randomInitialization)
 
 class Crossing(moving.STObject):
     '''Class for the event of a street crossing
--- a/python/ml.py	Tue Aug 11 12:55:09 2015 -0400
+++ b/python/ml.py	Wed Aug 12 00:24:06 2015 -0400
@@ -111,12 +111,14 @@
 	code,distance = vq(features,centroids) # code starting from 0 (represent first cluster) to k-1 (last cluster)
 	return code,sigma	
 
-def prototypeCluster(instances, similarityMatrix, minSimilarity, minClusterSize = None, randomInitialization = False):
+def prototypeCluster(instances, similarities, minSimilarity, similarityFunc = None, minClusterSize = None, randomInitialization = False):
     '''Finds exemplar (prototype) instance that represent each cluster
     Returns the prototype indices (in the instances list) and the cluster label of each instance
 
     the elements in the instances list must have a length (method __len__), or one can use the random initialization
-    the positions in the instances list corresponds to the similarityMatrix
+    the positions in the instances list corresponds to the similarities
+    if similarityFunc is provided, the similarities are calculated as needed (this is faster) if not in similarities (negative if not computed)
+    similarities must still be allocated with the right size
 
     if an instance is different enough (<minSimilarity), 
     it will become a new prototype. 
@@ -125,7 +127,7 @@
     and reassigning all elements in the cluster until no cluster is smaller than minClusterSize'''
 
     # sort instances based on length
-    indices = range(similarityMatrix.shape[0])
+    indices = range(len(instances))
     if randomInitialization:
         indices = np.random.permutation(indices)
     else:
@@ -140,21 +142,34 @@
     # go through all instances
     prototypeIndices = [indices[0]]
     for i in indices[1:]:
-        if similarityMatrix[i][prototypeIndices].max() < minSimilarity:
+        if similarityFunc is not None:
+            for j in prototypeIndices:
+                if similarities[i][j] < 0:
+                    similarities[i][j] = similarityFunc(instances[i], instances[j])
+                    similarities[j][i] = similarities[i][j]
+        if similarities[i][prototypeIndices].max() < minSimilarity:
              prototypeIndices.append(i)
 
     # assignment
-    indices = [i for i in range(similarityMatrix.shape[0]) if i not in prototypeIndices]
+    indices = [i for i in range(similarities.shape[0]) if i not in prototypeIndices]
     assign = True
     while assign:
-        labels = [-1]*similarityMatrix.shape[0]
+        labels = [-1]*similarities.shape[0]
         for i in prototypeIndices:
             labels[i] = i
         for i in indices:
-            prototypeIndex = similarityMatrix[i][prototypeIndices].argmax()
-            labels[i] = prototypeIndices[prototypeIndex]
+            if similarityFunc is not None:
+                for j in prototypeIndices:
+                    if similarities[i][j] < 0:
+                        similarities[i][j] = similarityFunc(instances[i], instances[j])
+                        similarities[j][i] = similarities[i][j]
+            prototypeIdx = similarities[i][prototypeIndices].argmax()
+            if similarities[i][prototypeIndices[prototypeIdx]] >= minSimilarity:
+                labels[i] = prototypeIndices[prototypeIdx]
+            else:
+                labels[i] = -1 # outlier
         clusterSizes = {i: sum(np.array(labels) == i) for i in prototypeIndices}
-        smallestClusterIndex = min(clusterSizes, key = clusterSizes.get) 
+        smallestClusterIndex = min(clusterSizes, key = clusterSizes.get)
         assign = (clusterSizes[smallestClusterIndex] < minClusterSize)
         if assign:
             prototypeIndices.remove(smallestClusterIndex)
--- a/scripts/learn-motion-patterns.py	Tue Aug 11 12:55:09 2015 -0400
+++ b/scripts/learn-motion-patterns.py	Wed Aug 12 00:24:06 2015 -0400
@@ -42,18 +42,23 @@
 lcss = utils.LCSS(metric = args.metric, epsilon = args.epsilon)
 nTrajectories = len(trajectories)
 
-similarities = np.zeros((nTrajectories, nTrajectories))
-for i in xrange(nTrajectories):
-    for j in xrange(i):
-        similarities[i,j] = lcss.computeNormalized(trajectories[i], trajectories[j])
-        similarities[j,i] = similarities[i,j]
+similarities = -np.ones((nTrajectories, nTrajectories))
+# for i in xrange(nTrajectories):
+#     for j in xrange(i):
+#         similarities[i,j] = lcss.computeNormalized(trajectories[i], trajectories[j])
+#         similarities[j,i] = similarities[i,j]
 
-prototypeIndices, labels = ml.prototypeCluster(trajectories, similarities, args.minSimilarity, args.minClusterSize)
+prototypeIndices, labels = ml.prototypeCluster(trajectories, similarities, args.minSimilarity, lambda x,y : lcss.computeNormalized(x, y), args.minClusterSize) # this line can be called again without reinitializing similarities
 
 if args.display:
+    from matplotlib.pyplot import figure
+    figure()
     for i,o in enumerate(objects):
         if i not in prototypeIndices:
-            o.plot(utils.colors[labels[i]])
+            if labels[i] < 0:
+                o.plot('kx')
+            else:
+                o.plot(utils.colors[labels[i]])
     for i in prototypeIndices:
             objects[i].plot(utils.colors[i]+'o')