Mercurial Hosting > traffic-intelligence

--- a/python/ml.py	Fri Jun 23 00:03:17 2017 -0400
+++ b/python/ml.py	Fri Jun 23 23:50:02 2017 -0400
@@ -114,25 +114,54 @@
 # TODO recompute centroids for each cluster: instance that minimizes some measure to all other elements

 def spectralClustering(similarityMatrix, k, iter=20):
-	'''Spectral Clustering algorithm'''
-	n = len(similarityMatrix)
-	# create Laplacian matrix
-	rowsum = np.sum(similarityMatrix,axis=0)
-	D = np.diag(1 / np.sqrt(rowsum))
-	I = np.identity(n)
-	L = I - np.dot(D,np.dot(similarityMatrix,D))
-	# compute eigenvectors of L
-	U,sigma,V = np.linalg.svd(L)
-	# create feature vector from k first eigenvectors
-	# by stacking eigenvectors as columns
-	features = np.array(V[:k]).T
-	# k-means
-	features = whiten(features)
-	centroids,distortion = kmeans(features,k, iter)
-	code,distance = vq(features,centroids) # code starting from 0 (represent first cluster) to k-1 (last cluster)
-	return code,sigma
+    '''Spectral Clustering algorithm'''
+    n = len(similarityMatrix)
+    # create Laplacian matrix
+    rowsum = np.sum(similarityMatrix,axis=0)
+    D = np.diag(1 / np.sqrt(rowsum))
+    I = np.identity(n)
+    L = I - np.dot(D,np.dot(similarityMatrix,D))
+    # compute eigenvectors of L
+    U,sigma,V = np.linalg.svd(L)
+    # create feature vector from k first eigenvectors
+    # by stacking eigenvectors as columns
+    features = np.array(V[:k]).T
+    # k-means
+    features = whiten(features)
+    centroids,distortion = kmeans(features,k, iter)
+    code,distance = vq(features,centroids) # code starting from 0 (represent first cluster) to k-1 (last cluster)
+    return code,sigma

-def prototypeCluster(instances, similarities, minSimilarity, similarityFunc = None, minClusterSize = None, randomInitialization = False):
+def assignToPrototypeClusters(instances, prototypeIndices, similarities, minSimilarity, similarityFunc = None, minClusterSize = None):
+    '''Assigns instances to prototypes
+    if minClusterSize is not None, the clusters will be refined by removing iteratively the smallest clusters
+    and reassigning all elements in the cluster until no cluster is smaller than minClusterSize'''
+    indices = [i for i in range(len(instances)) if i not in prototypeIndices]
+    labels = [-1]*len(instances)
+    assign = True
+    while assign:
+        for i in prototypeIndices:
+            labels[i] = i
+        for i in indices:
+            if similarityFunc is not None:
+                for j in prototypeIndices:
+                    if similarities[i][j] < 0:
+                        similarities[i][j] = similarityFunc(instances[i], instances[j])
+                        similarities[j][i] = similarities[i][j]
+            prototypeIdx = similarities[i][prototypeIndices].argmax()
+            if similarities[i][prototypeIndices[prototypeIdx]] >= minSimilarity:
+                labels[i] = prototypeIndices[prototypeIdx]
+            else:
+                labels[i] = -1 # outlier
+        clusterSizes = {i: sum(np.array(labels) == i) for i in prototypeIndices}
+        smallestClusterIndex = min(clusterSizes, key = clusterSizes.get)
+        assign = (clusterSizes[smallestClusterIndex] < minClusterSize)
+        if assign:
+            prototypeIndices.remove(smallestClusterIndex)
+            indices = [i for i in range(similarities.shape[0]) if labels[i] == smallestClusterIndex]
+    return prototypeIndices, labels
+
+def prototypeCluster(instances, similarities, minSimilarity, similarityFunc = None, minClusterSize = 0, randomInitialization = False, assign = True, initialPrototypeIndices = None):
     '''Finds exemplar (prototype) instance that represent each cluster
     Returns the prototype indices (in the instances list) and the cluster label of each instance

@@ -144,8 +173,6 @@
     if an instance is different enough (<minSimilarity),
     it will become a new prototype.
     Non-prototype instances will be assigned to an existing prototype
-    if minClusterSize is not None, the clusters will be refined by removing iteratively the smallest clusters
-    and reassigning all elements in the cluster until no cluster is smaller than minClusterSize

     TODO: at each step, optimize the prototype as the most similar in its current cluster (can be done easily if similarities are already computed)'''

@@ -167,7 +194,10 @@
                 return 1
         indices.sort(compare)
     # go through all instances
-    prototypeIndices = [indices[0]]
+    if initialPrototypeIndices is None:
+        prototypeIndices = [indices[0]]
+    else:
+        prototypeIndices = initialPrototypeIndices
     for i in indices[1:]:
         if similarityFunc is not None:
             for j in prototypeIndices:
@@ -180,33 +210,10 @@
             label = similarities[i][prototypeIndices].argmax()
             if len(instances[prototypeIndices[label]]) < len(instances[i]):
                 prototypeIndices[label] = i
-
-    # assignment
-    indices = [i for i in range(similarities.shape[0]) if i not in prototypeIndices]
-    assign = True
-    while assign:
-        labels = [-1]*similarities.shape[0]
-        for i in prototypeIndices:
-            labels[i] = i
-        for i in indices:
-            if similarityFunc is not None:
-                for j in prototypeIndices:
-                    if similarities[i][j] < 0:
-                        similarities[i][j] = similarityFunc(instances[i], instances[j])
-                        similarities[j][i] = similarities[i][j]
-            prototypeIdx = similarities[i][prototypeIndices].argmax()
-            if similarities[i][prototypeIndices[prototypeIdx]] >= minSimilarity:
-                labels[i] = prototypeIndices[prototypeIdx]
-            else:
-                labels[i] = -1 # outlier
-        clusterSizes = {i: sum(np.array(labels) == i) for i in prototypeIndices}
-        smallestClusterIndex = min(clusterSizes, key = clusterSizes.get)
-        assign = (clusterSizes[smallestClusterIndex] < minClusterSize)
-        if assign:
-            prototypeIndices.remove(smallestClusterIndex)
-            indices.append(smallestClusterIndex)
-
-    return prototypeIndices, labels
+    if assign:
+        return assignToPrototypeClusters(instances, prototypeIndices, similarities, minSimilarity, similarityFunc, minClusterSize)
+    else:
+        return prototypeIndices, None

 def computeClusterSizes(labels, prototypeIndices, outlierIndex = -1):
     clusterSizes = {i: sum(np.array(labels) == i) for i in prototypeIndices}
@@ -232,7 +239,7 @@
         v, w = np.linalg.eigh(covariance)
         angle = np.arctan2(w[0][1], w[0][0])
         angle = 180*angle/np.pi  # convert to degrees
-	v *= 4
+        v *= 4
         ell = mpl.patches.Ellipse(mean, v[0], v[1], 180+angle, color=colors[i])
         ell.set_clip_box(fig.bbox)
         ell.set_alpha(alpha)
--- a/scripts/learn-motion-patterns.py	Fri Jun 23 00:03:17 2017 -0400
+++ b/scripts/learn-motion-patterns.py	Fri Jun 23 23:50:02 2017 -0400
@@ -18,14 +18,25 @@
 parser.add_argument('-s', dest = 'minSimilarity', help = 'minimum similarity to put a trajectory in a cluster', type = float, required = True)
 parser.add_argument('-c', dest = 'minClusterSize', help = 'minimum cluster size', type = int, default = None)
 parser.add_argument('--random', dest = 'randomInitialization', help = 'random initialization of clustering algorithm', action = 'store_true')
-parser.add_argument('--subsample', dest = 'positionSubsamplingRate', help = 'rate of position subsampling (1 every n positions)', type = int, default = None)
+parser.add_argument('--subsample', dest = 'positionSubsamplingRate', help = 'rate of position subsampling (1 every n positions)', type = int)
 parser.add_argument('--display', dest = 'display', help = 'display trajectories', action = 'store_true')
 parser.add_argument('--save-similarities', dest = 'saveSimilarities', help = 'save computed similarities (in addition to prototypes)', action = 'store_true')
+#parser.add_argument('--save-matches', dest = 'saveMatches', help = 'save the matched prototype information', action = 'store_true')

 args = parser.parse_args()

-# TODO parameters (random init?) and what to learn from: objects, features, longest features from objects
+# use cases
+# 1. learn proto from one file, save in same or another (with traj)
+# 2. load proto, load objects, update proto, save proto
+# 3. assign objects from one db to proto
+# 4. load objects from several files, save in another
+
 # TODO add possibility to cluter with velocities
+# TODO add possibility to start with saved prototypes so that one can incrementally learn from several databases
+# save prototypes with database name, add option to keep trajectory along: if saved in same db, no need
+# load proto must load the movingobject
+# save the objects that match the prototypes
+# write an assignment function for objects

 trajectoryType = args.trajectoryType
 prototypeType = args.trajectoryType
@@ -49,7 +60,7 @@

 similarities = -np.ones((nTrajectories, nTrajectories))

-prototypeIndices, labels = ml.prototypeCluster(trajectories, similarities, args.minSimilarity, lambda x,y : lcss.computeNormalized(x, y), args.minClusterSize, args.randomInitialization) # this line can be called again without reinitializing similarities
+prototypeIndices, labels = ml.prototypeCluster(trajectories, similarities, args.minSimilarity, lambda x,y : lcss.computeNormalized(x, y), args.minClusterSize, args.randomInitialization, True, None) # this line can be called again without reinitializing similarities

 clusterSizes = ml.computeClusterSizes(labels, prototypeIndices, -1)
 print(clusterSizes)
@@ -59,6 +70,11 @@
 if args.saveSimilarities:
     np.savetxt(utils.removeExtension(args.databaseFilename)+'-prototype-similarities.txt.gz', similarities, '%.4f')

+# if args.saveMatches:
+#     out = storage.openCheck(utils.removeExtension(args.databaseFilename)+'prototypes-matches.csv', 'w')
+#     for o in ojbects:
+#         out.write('')
+
 if args.display:
     from matplotlib.pyplot import figure, show, axis
     figure()
@@ -72,5 +88,3 @@
             objects[i].plot(utils.colors[i]+'o')
     axis('equal')
     show()
-
-# TODO store the prototypes trajectories, add option so store similarities (the most expensive stuff) with limited accuracy