Mercurial Hosting > traffic-intelligence

--- a/python/ml.py	Mon Jul 24 00:28:52 2017 -0400
+++ b/python/ml.py	Mon Jul 24 21:22:18 2017 -0400
@@ -167,9 +167,9 @@
             indices = [i for i in range(similarities.shape[0]) if labels[i] == smallestClusterIndex]
     return prototypeIndices, labels

-def prototypeCluster(instances, similarities, minSimilarity, similarityFunc = None, minClusterSize = 0, optimizeCentroid = True, randomInitialization = False, assign = True, initialPrototypeIndices = None):
+def prototypeCluster(instances, similarities, minSimilarity, similarityFunc = None, minClusterSize = 0, optimizeCentroid = True, randomInitialization = False, initialPrototypeIndices = None):
     '''Finds exemplar (prototype) instance that represent each cluster
-    Returns the prototype indices (in the instances list) and the cluster label of each instance
+    Returns the prototype indices (in the instances list)

     the elements in the instances list must have a length (method __len__), or one can use the random initialization
     the positions in the instances list corresponds to the similarities
@@ -236,14 +236,9 @@
                     newCentroidIdx = clusterIndices[clusterSimilarities.sum(0).argmax()]
                     if prototypeIndices[label] != newCentroidIdx:
                         prototypeIndices[label] = newCentroidIdx
-            elif randomInitialization: # replace prototype by current instance i if longer
-                if len(instances[prototypeIndices[label]]) < len(instances[i]):
-                    prototypeIndices[label] = i
-
-    if assign:
-        return assignToPrototypeClusters(instances, prototypeIndices, similarities, minSimilarity, similarityFunc, minClusterSize)
-    else:
-        return prototypeIndices, None
+            elif len(instances[prototypeIndices[label]]) < len(instances[i]): # replace prototype by current instance i if longer # otherwise, possible to test if randomInitialization or initialPrototypes is not None
+                prototypeIndices[label] = i
+    return prototypeIndices

 def computeClusterSizes(labels, prototypeIndices, outlierIndex = -1):
     clusterSizes = {i: sum(np.array(labels) == i) for i in prototypeIndices}
--- a/scripts/learn-motion-patterns.py	Mon Jul 24 00:28:52 2017 -0400
+++ b/scripts/learn-motion-patterns.py	Mon Jul 24 21:22:18 2017 -0400
@@ -19,25 +19,26 @@
 parser.add_argument('--metric', dest = 'metric', help = 'metric for the similarity of trajectory points', default = 'cityblock') # default is manhattan distance
 parser.add_argument('-s', dest = 'minSimilarity', help = 'minimum similarity to put a trajectory in a cluster', type = float, required = True)
 parser.add_argument('-c', dest = 'minClusterSize', help = 'minimum cluster size', type = int, default = None)
+parser.add_argument('--learn', dest = 'learn', help = 'learn', action = 'store_true')
 parser.add_argument('--optimize', dest = 'optimizeCentroid', help = 'recompute centroid at each assignment', action = 'store_true')
 parser.add_argument('--random', dest = 'randomInitialization', help = 'random initialization of clustering algorithm', action = 'store_true')
 parser.add_argument('--subsample', dest = 'positionSubsamplingRate', help = 'rate of position subsampling (1 every n positions)', type = int)
 parser.add_argument('--display', dest = 'display', help = 'display trajectories', action = 'store_true')
 parser.add_argument('--save-similarities', dest = 'saveSimilarities', help = 'save computed similarities (in addition to prototypes)', action = 'store_true')
 parser.add_argument('--save-matches', dest = 'saveMatches', help = 'saves the assignments of the objects (not for features) to the prototypes', action = 'store_true')
-#parser.add_argument('--assign', dest = 'assign', help = 'saves the assignments of the objects (not for features) to the prototypes', action = 'store_true') # default is train, but one could want only to assign the objects to the loaded prototypes without learning
+parser.add_argument('--assign', dest = 'assign', help = 'assigns the objects to the prototypes and saves them (do not use min cluster size as it will discard prototypes at the beginning if the initial cluster is too small)', action = 'store_true')

 args = parser.parse_args()

 # use cases
-# 1. learn proto from one file, save in same or another (with traj)
+# 1. learn proto from one file, save in same or another
 # 2. load proto, load objects, update proto, save proto
 # 3. assign objects from one db to proto
 # 4. load objects from several files, save in another -> see metadata: site with view and times
 # 5. keep prototypes, with positions/velocities, in separate db (keep link to original data through filename, type and index)

 # TODO add possibility to cluster with velocities
-# TODO add possibility to start with saved prototypes so that one can incrementally learn from several databases
+# TODO add possibilite to load all trajectories and use minclustersize
 # save the objects that match the prototypes
 # write an assignment function for objects

@@ -61,7 +62,10 @@
 if args.inputPrototypeDatabaseFilename is not None:
     initialPrototypes = storage.loadPrototypesFromSqlite(args.inputPrototypeDatabaseFilename, True)
     trajectories = [p.getMovingObject().getPositions().asArray().T for p in initialPrototypes]+trajectories
-    initialPrototypeIndices = range(len(initialPrototypes))
+    if len(initialPrototypes) > 0:
+        initialPrototypeIndices = range(len(initialPrototypes))
+    else:
+        initialPrototypeIndices = None
 else:
     initialPrototypes = []
     initialPrototypeIndices = None
@@ -70,49 +74,61 @@
 nTrajectories = len(trajectories)

 similarities = -np.ones((nTrajectories, nTrajectories))
+similarityFunc = lambda x,y : lcss.computeNormalized(x, y)
 # the next line can be called again without reinitializing similarities
-prototypeIndices, labels = ml.prototypeCluster(trajectories, similarities, args.minSimilarity, lambda x,y : lcss.computeNormalized(x, y), args.minClusterSize, args.optimizeCentroid, args.randomInitialization, args.inputPrototypeDatabaseFilename is not None, initialPrototypeIndices) # assignment is done only if working on the same database, otherwise the matchings will not compare and one has to to matchings on a large scale at once
+if args.learn:
+    prototypeIndices = ml.prototypeCluster(trajectories, similarities, args.minSimilarity, similarityFunc, args.minClusterSize, args.optimizeCentroid, args.randomInitialization, initialPrototypeIndices)
+# assignment is done if explicitly passed as argument or if working on the same database (starting prototypes from scratch and assigning the )
+# (otherwise the matchings will not compare and one has to to matchings on a large scale at once)
+
+if args.assign:
+    prototypeIndices, labels = ml.assignToPrototypeClusters(trajectories, prototypeIndices, similarities, args.minSimilarity, similarityFunc, args.minClusterSize)
+    clusterSizes = ml.computeClusterSizes(labels, prototypeIndices, -1)
+    print(clusterSizes)

-clusterSizes = ml.computeClusterSizes(labels, prototypeIndices, -1)
-print(clusterSizes)
+if args.learn or args.assign:
+    prototypes = []
+    for i in prototypeIndices:
+        if args.assign:
+            nMatchings = clusterSizes[i]
+        else:
+            nMatchings = 0
+        if i<len(initialPrototypes):
+            initialPrototypes[i].nMatchings += nMatchings
+            prototypes.append(initialPrototypes[i])
+        else:
+            prototypes.append(moving.Prototype(args.databaseFilename, objects[i-len(initialPrototypes)].getNum(), prototypeType, nMatchings))

-prototypes = []
-for i in prototypeIndices:
-    if i<len(initialPrototypes):
-        initialPrototypes[i].nMatchings = 0
-        prototypes.append(initialPrototypes[i])
+    if args.outputPrototypeDatabaseFilename is None:
+        outputPrototypeDatabaseFilename = args.databaseFilename
     else:
-        if args.inputPrototypeDatabaseFilename is None:
-            nmatchings = clusterSizes[i]
-        else:
-            nmatchings = 0
-        prototypes.append(moving.Prototype(args.databaseFilename, objects[i].getNum(), prototypeType, nmatchings))
+        outputPrototypeDatabaseFilename = args.outputPrototypeDatabaseFilename
+        if args.inputPrototypeDatabaseFilename == args.outputPrototypeDatabaseFilename:
+            storage.deleteFromSqlite(args.outputPrototypeDatabaseFilename, 'prototype')
+    storage.savePrototypesToSqlite(outputPrototypeDatabaseFilename, prototypes)
+
+    if args.saveSimilarities:
+        # todo save trajectories and prototypes
+        np.savetxt(utils.removeExtension(args.databaseFilename)+'-prototype-similarities.txt.gz', similarities, '%.4f')

-if args.outputPrototypeDatabaseFilename is None:
-    outputPrototypeDatabaseFilename = args.databaseFilename
+    labelsToProtoIndices = {protoId: i for i, protoId in enumerate(prototypeIndices)}
+    if args.assign and args.saveMatches: # or args.assign
+    # save in the db that contained originally the data
+        # retirer les assignations anterieures?
+        storage.savePrototypeAssignmentsToSqlite(args.databaseFilename, objects, [labelsToProtoIndices[l] for l in labels], prototypes)
+
+    if args.display and args.assign:
+        from matplotlib.pyplot import figure, show, axis
+        figure()
+        for i,o in enumerate(objects):
+            if i not in prototypeIndices:
+                if labels[i] < 0:
+                    o.plot('kx')
+                else:
+                    o.plot(utils.colors[labels[i]])
+        for i in prototypeIndices:
+                objects[i].plot(utils.colors[i]+'o')
+        axis('equal')
+        show()
 else:
-    outputPrototypeDatabaseFilename = args.outputPrototypeDatabaseFilename
-storage.savePrototypesToSqlite(outputPrototypeDatabaseFilename, prototypes)
-
-if args.saveSimilarities:
-    np.savetxt(utils.removeExtension(args.databaseFilename)+'-prototype-similarities.txt.gz', similarities, '%.4f')
-
-labelsToProtoIndices = {protoId: i for i, protoId in enumerate(prototypeIndices)}
-if args.saveMatches: # or args.assign
-# save in the db that contained originally the data
-    # retirer les assignations anterieures?
-    storage.savePrototypeAssignmentsToSqlite(args.databaseFilename, objects, [labelsToProtoIndices[l] for l in labels], prototypes)
-
-if args.display:
-    from matplotlib.pyplot import figure, show, axis
-    figure()
-    for i,o in enumerate(objects):
-        if i not in prototypeIndices:
-            if labels[i] < 0:
-                o.plot('kx')
-            else:
-                o.plot(utils.colors[labels[i]])
-    for i in prototypeIndices:
-            objects[i].plot(utils.colors[i]+'o')
-    axis('equal')
-    show()
+    print('Not learning nor assigning: doing nothing')
--- a/tracking.cfg	Mon Jul 24 00:28:52 2017 -0400
+++ b/tracking.cfg	Mon Jul 24 21:22:18 2017 -0400
@@ -1,4 +1,4 @@
-# filename of the video to process
+# filename of the video to process (can be images, eg image%04d.png)
 video-filename = laurier.avi
 # filename of the database where results are saved
 database-filename = laurier.sqlite