Mercurial Hosting > traffic-intelligence

--- a/python/ml.py	Mon Jul 11 17:52:06 2016 -0400
+++ b/python/ml.py	Wed Jul 13 23:45:47 2016 -0400
@@ -145,7 +145,9 @@
     it will become a new prototype.
     Non-prototype instances will be assigned to an existing prototype
     if minClusterSize is not None, the clusters will be refined by removing iteratively the smallest clusters
-    and reassigning all elements in the cluster until no cluster is smaller than minClusterSize'''
+    and reassigning all elements in the cluster until no cluster is smaller than minClusterSize
+
+    TODO: at each step, optimize the prototype as the most similar in its current cluster (can be done easily if similarities are already computed)'''

     # sort instances based on length
     indices = range(len(instances))
@@ -169,7 +171,11 @@
                     similarities[i][j] = similarityFunc(instances[i], instances[j])
                     similarities[j][i] = similarities[i][j]
         if similarities[i][prototypeIndices].max() < minSimilarity:
-             prototypeIndices.append(i)
+            prototypeIndices.append(i)
+        elif randomInitialization: # replace prototype by current instance i if longer
+            label = similarities[i][prototypeIndices].argmax()
+            if len(instances[prototypeIndices[label]]) < len(instances[i]):
+                prototypeIndices[label] = i

     # assignment
     indices = [i for i in range(similarities.shape[0]) if i not in prototypeIndices]
--- a/scripts/learn-motion-patterns.py	Mon Jul 11 17:52:06 2016 -0400
+++ b/scripts/learn-motion-patterns.py	Wed Jul 13 23:45:47 2016 -0400
@@ -17,8 +17,10 @@
 parser.add_argument('--metric', dest = 'metric', help = 'metric for the similarity of trajectory points', default = 'cityblock') # default is manhattan distance
 parser.add_argument('-s', dest = 'minSimilarity', help = 'minimum similarity to put a trajectory in a cluster', type = float, required = True)
 parser.add_argument('-c', dest = 'minClusterSize', help = 'minimum cluster size', type = int, default = None)
+parser.add_argument('--random', dest = 'randomInitialization', help = 'random initialization of clustering algorithm', action = 'store_true')
 parser.add_argument('--subsample', dest = 'positionSubsamplingRate', help = 'rate of position subsampling (1 every n positions)', type = int, default = None)
-parser.add_argument('--display', dest = 'display', help = 'display trajectories', action = 'store_true') # default is manhattan distance
+parser.add_argument('--display', dest = 'display', help = 'display trajectories', action = 'store_true')
+parser.add_argument('--save-similarities', dest = 'saveSimilarities', help = 'save computed similarities (in addition to prototypes)', action = 'store_true')

 args = parser.parse_args()

@@ -45,15 +47,14 @@
 nTrajectories = len(trajectories)

 similarities = -np.ones((nTrajectories, nTrajectories))
-# for i in xrange(nTrajectories):
-#     for j in xrange(i):
-#         similarities[i,j] = lcss.computeNormalized(trajectories[i], trajectories[j])
-#         similarities[j,i] = similarities[i,j]

-prototypeIndices, labels = ml.prototypeCluster(trajectories, similarities, args.minSimilarity, lambda x,y : lcss.computeNormalized(x, y), args.minClusterSize) # this line can be called again without reinitializing similarities
+prototypeIndices, labels = ml.prototypeCluster(trajectories, similarities, args.minSimilarity, lambda x,y : lcss.computeNormalized(x, y), args.minClusterSize, args.randomInitialization) # this line can be called again without reinitializing similarities

 print(ml.computeClusterSizes(labels, prototypeIndices, -1))

+if args.saveSimilarities:
+    np.savetxt(utils.removeExtension(args.databaseFilename)+'-prototype-similarities.txt.gz', similarities, '%.4')
+
 if args.display:
     from matplotlib.pyplot import figure, show
     figure()
@@ -67,4 +68,4 @@
             objects[i].plot(utils.colors[i]+'o')
     show()

-# TODO store the prototypes (if features, easy, if objects, info must be stored about the type)
+# TODO store the prototypes trajectories, add option so store similarities (the most expensive stuff) with limited accuracy