Mercurial Hosting > traffic-intelligence
changeset 907:9fd7b18f75b4
re arranged motion pattern learning
author | Nicolas Saunier <nicolas.saunier@polymtl.ca> |
---|---|
date | Fri, 23 Jun 2017 23:50:02 -0400 |
parents | a57e6fbcd8e3 |
children | b297525b2cbf |
files | python/ml.py scripts/learn-motion-patterns.py |
diffstat | 2 files changed, 75 insertions(+), 54 deletions(-) [+] |
line wrap: on
line diff
--- a/python/ml.py Fri Jun 23 00:03:17 2017 -0400 +++ b/python/ml.py Fri Jun 23 23:50:02 2017 -0400 @@ -114,25 +114,54 @@ # TODO recompute centroids for each cluster: instance that minimizes some measure to all other elements def spectralClustering(similarityMatrix, k, iter=20): - '''Spectral Clustering algorithm''' - n = len(similarityMatrix) - # create Laplacian matrix - rowsum = np.sum(similarityMatrix,axis=0) - D = np.diag(1 / np.sqrt(rowsum)) - I = np.identity(n) - L = I - np.dot(D,np.dot(similarityMatrix,D)) - # compute eigenvectors of L - U,sigma,V = np.linalg.svd(L) - # create feature vector from k first eigenvectors - # by stacking eigenvectors as columns - features = np.array(V[:k]).T - # k-means - features = whiten(features) - centroids,distortion = kmeans(features,k, iter) - code,distance = vq(features,centroids) # code starting from 0 (represent first cluster) to k-1 (last cluster) - return code,sigma + '''Spectral Clustering algorithm''' + n = len(similarityMatrix) + # create Laplacian matrix + rowsum = np.sum(similarityMatrix,axis=0) + D = np.diag(1 / np.sqrt(rowsum)) + I = np.identity(n) + L = I - np.dot(D,np.dot(similarityMatrix,D)) + # compute eigenvectors of L + U,sigma,V = np.linalg.svd(L) + # create feature vector from k first eigenvectors + # by stacking eigenvectors as columns + features = np.array(V[:k]).T + # k-means + features = whiten(features) + centroids,distortion = kmeans(features,k, iter) + code,distance = vq(features,centroids) # code starting from 0 (represent first cluster) to k-1 (last cluster) + return code,sigma -def prototypeCluster(instances, similarities, minSimilarity, similarityFunc = None, minClusterSize = None, randomInitialization = False): +def assignToPrototypeClusters(instances, prototypeIndices, similarities, minSimilarity, similarityFunc = None, minClusterSize = None): + '''Assigns instances to prototypes + if minClusterSize is not None, the clusters will be refined by removing iteratively the smallest clusters + and reassigning all elements in the cluster until no cluster is smaller than minClusterSize''' + indices = [i for i in range(len(instances)) if i not in prototypeIndices] + labels = [-1]*len(instances) + assign = True + while assign: + for i in prototypeIndices: + labels[i] = i + for i in indices: + if similarityFunc is not None: + for j in prototypeIndices: + if similarities[i][j] < 0: + similarities[i][j] = similarityFunc(instances[i], instances[j]) + similarities[j][i] = similarities[i][j] + prototypeIdx = similarities[i][prototypeIndices].argmax() + if similarities[i][prototypeIndices[prototypeIdx]] >= minSimilarity: + labels[i] = prototypeIndices[prototypeIdx] + else: + labels[i] = -1 # outlier + clusterSizes = {i: sum(np.array(labels) == i) for i in prototypeIndices} + smallestClusterIndex = min(clusterSizes, key = clusterSizes.get) + assign = (clusterSizes[smallestClusterIndex] < minClusterSize) + if assign: + prototypeIndices.remove(smallestClusterIndex) + indices = [i for i in range(similarities.shape[0]) if labels[i] == smallestClusterIndex] + return prototypeIndices, labels + +def prototypeCluster(instances, similarities, minSimilarity, similarityFunc = None, minClusterSize = 0, randomInitialization = False, assign = True, initialPrototypeIndices = None): '''Finds exemplar (prototype) instance that represent each cluster Returns the prototype indices (in the instances list) and the cluster label of each instance @@ -144,8 +173,6 @@ if an instance is different enough (<minSimilarity), it will become a new prototype. Non-prototype instances will be assigned to an existing prototype - if minClusterSize is not None, the clusters will be refined by removing iteratively the smallest clusters - and reassigning all elements in the cluster until no cluster is smaller than minClusterSize TODO: at each step, optimize the prototype as the most similar in its current cluster (can be done easily if similarities are already computed)''' @@ -167,7 +194,10 @@ return 1 indices.sort(compare) # go through all instances - prototypeIndices = [indices[0]] + if initialPrototypeIndices is None: + prototypeIndices = [indices[0]] + else: + prototypeIndices = initialPrototypeIndices for i in indices[1:]: if similarityFunc is not None: for j in prototypeIndices: @@ -180,33 +210,10 @@ label = similarities[i][prototypeIndices].argmax() if len(instances[prototypeIndices[label]]) < len(instances[i]): prototypeIndices[label] = i - - # assignment - indices = [i for i in range(similarities.shape[0]) if i not in prototypeIndices] - assign = True - while assign: - labels = [-1]*similarities.shape[0] - for i in prototypeIndices: - labels[i] = i - for i in indices: - if similarityFunc is not None: - for j in prototypeIndices: - if similarities[i][j] < 0: - similarities[i][j] = similarityFunc(instances[i], instances[j]) - similarities[j][i] = similarities[i][j] - prototypeIdx = similarities[i][prototypeIndices].argmax() - if similarities[i][prototypeIndices[prototypeIdx]] >= minSimilarity: - labels[i] = prototypeIndices[prototypeIdx] - else: - labels[i] = -1 # outlier - clusterSizes = {i: sum(np.array(labels) == i) for i in prototypeIndices} - smallestClusterIndex = min(clusterSizes, key = clusterSizes.get) - assign = (clusterSizes[smallestClusterIndex] < minClusterSize) - if assign: - prototypeIndices.remove(smallestClusterIndex) - indices.append(smallestClusterIndex) - - return prototypeIndices, labels + if assign: + return assignToPrototypeClusters(instances, prototypeIndices, similarities, minSimilarity, similarityFunc, minClusterSize) + else: + return prototypeIndices, None def computeClusterSizes(labels, prototypeIndices, outlierIndex = -1): clusterSizes = {i: sum(np.array(labels) == i) for i in prototypeIndices} @@ -232,7 +239,7 @@ v, w = np.linalg.eigh(covariance) angle = np.arctan2(w[0][1], w[0][0]) angle = 180*angle/np.pi # convert to degrees - v *= 4 + v *= 4 ell = mpl.patches.Ellipse(mean, v[0], v[1], 180+angle, color=colors[i]) ell.set_clip_box(fig.bbox) ell.set_alpha(alpha)
--- a/scripts/learn-motion-patterns.py Fri Jun 23 00:03:17 2017 -0400 +++ b/scripts/learn-motion-patterns.py Fri Jun 23 23:50:02 2017 -0400 @@ -18,14 +18,25 @@ parser.add_argument('-s', dest = 'minSimilarity', help = 'minimum similarity to put a trajectory in a cluster', type = float, required = True) parser.add_argument('-c', dest = 'minClusterSize', help = 'minimum cluster size', type = int, default = None) parser.add_argument('--random', dest = 'randomInitialization', help = 'random initialization of clustering algorithm', action = 'store_true') -parser.add_argument('--subsample', dest = 'positionSubsamplingRate', help = 'rate of position subsampling (1 every n positions)', type = int, default = None) +parser.add_argument('--subsample', dest = 'positionSubsamplingRate', help = 'rate of position subsampling (1 every n positions)', type = int) parser.add_argument('--display', dest = 'display', help = 'display trajectories', action = 'store_true') parser.add_argument('--save-similarities', dest = 'saveSimilarities', help = 'save computed similarities (in addition to prototypes)', action = 'store_true') +#parser.add_argument('--save-matches', dest = 'saveMatches', help = 'save the matched prototype information', action = 'store_true') args = parser.parse_args() -# TODO parameters (random init?) and what to learn from: objects, features, longest features from objects +# use cases +# 1. learn proto from one file, save in same or another (with traj) +# 2. load proto, load objects, update proto, save proto +# 3. assign objects from one db to proto +# 4. load objects from several files, save in another + # TODO add possibility to cluter with velocities +# TODO add possibility to start with saved prototypes so that one can incrementally learn from several databases +# save prototypes with database name, add option to keep trajectory along: if saved in same db, no need +# load proto must load the movingobject +# save the objects that match the prototypes +# write an assignment function for objects trajectoryType = args.trajectoryType prototypeType = args.trajectoryType @@ -49,7 +60,7 @@ similarities = -np.ones((nTrajectories, nTrajectories)) -prototypeIndices, labels = ml.prototypeCluster(trajectories, similarities, args.minSimilarity, lambda x,y : lcss.computeNormalized(x, y), args.minClusterSize, args.randomInitialization) # this line can be called again without reinitializing similarities +prototypeIndices, labels = ml.prototypeCluster(trajectories, similarities, args.minSimilarity, lambda x,y : lcss.computeNormalized(x, y), args.minClusterSize, args.randomInitialization, True, None) # this line can be called again without reinitializing similarities clusterSizes = ml.computeClusterSizes(labels, prototypeIndices, -1) print(clusterSizes) @@ -59,6 +70,11 @@ if args.saveSimilarities: np.savetxt(utils.removeExtension(args.databaseFilename)+'-prototype-similarities.txt.gz', similarities, '%.4f') +# if args.saveMatches: +# out = storage.openCheck(utils.removeExtension(args.databaseFilename)+'prototypes-matches.csv', 'w') +# for o in ojbects: +# out.write('') + if args.display: from matplotlib.pyplot import figure, show, axis figure() @@ -72,5 +88,3 @@ objects[i].plot(utils.colors[i]+'o') axis('equal') show() - -# TODO store the prototypes trajectories, add option so store similarities (the most expensive stuff) with limited accuracy