Mercurial Hosting > traffic-intelligence
changeset 908:b297525b2cbf
added options to the prototype cluster algorithm, work in progress
author | Nicolas Saunier <nicolas.saunier@polymtl.ca> |
---|---|
date | Mon, 26 Jun 2017 00:10:35 -0400 |
parents | 9fd7b18f75b4 |
children | cd038493f8c6 |
files | python/ml.py python/storage.py scripts/learn-motion-patterns.py |
diffstat | 3 files changed, 69 insertions(+), 16 deletions(-) [+] |
line wrap: on
line diff
--- a/python/ml.py Fri Jun 23 23:50:02 2017 -0400 +++ b/python/ml.py Mon Jun 26 00:10:35 2017 -0400 @@ -132,6 +132,12 @@ code,distance = vq(features,centroids) # code starting from 0 (represent first cluster) to k-1 (last cluster) return code,sigma +class Cluster: + 'Represents a cluster, with a prototype id and the list of instances in cluster' + def __init__(prototypeId, memberIndices = []): + self.prototypeId = prototypeId + self.memberIndices = memberIndices + def assignToPrototypeClusters(instances, prototypeIndices, similarities, minSimilarity, similarityFunc = None, minClusterSize = None): '''Assigns instances to prototypes if minClusterSize is not None, the clusters will be refined by removing iteratively the smallest clusters @@ -161,7 +167,7 @@ indices = [i for i in range(similarities.shape[0]) if labels[i] == smallestClusterIndex] return prototypeIndices, labels -def prototypeCluster(instances, similarities, minSimilarity, similarityFunc = None, minClusterSize = 0, randomInitialization = False, assign = True, initialPrototypeIndices = None): +def prototypeCluster(instances, similarities, minSimilarity, similarityFunc = None, minClusterSize = 0, optimizeCentroid = True, randomInitialization = False, assign = True, initialPrototypeIndices = None): '''Finds exemplar (prototype) instance that represent each cluster Returns the prototype indices (in the instances list) and the cluster label of each instance @@ -174,15 +180,19 @@ it will become a new prototype. Non-prototype instances will be assigned to an existing prototype - TODO: at each step, optimize the prototype as the most similar in its current cluster (can be done easily if similarities are already computed)''' + if optimizeCentroid is True, each time an element is added, we recompute the centroid trajectory as the most similar to all in the cluster - # sort instances based on length + TODO: check how similarity evolves in clusters''' if len(instances) == 0: print('no instances to cluster (empty list)') return None + if similarityFunc is None: + print('similarityFunc is None') + return None + # sort instances based on length indices = range(len(instances)) - if randomInitialization: + if randomInitialization or optimizeCentroid: indices = np.random.permutation(indices) else: def compare(i, j): @@ -194,22 +204,39 @@ return 1 indices.sort(compare) # go through all instances + clusters = [] if initialPrototypeIndices is None: prototypeIndices = [indices[0]] else: - prototypeIndices = initialPrototypeIndices + prototypeIndices = initialPrototypeIndices # think of the format: if indices, have to be in instances + for i in prototypeIndices: + clusters.append([i]) for i in indices[1:]: - if similarityFunc is not None: - for j in prototypeIndices: - if similarities[i][j] < 0: - similarities[i][j] = similarityFunc(instances[i], instances[j]) - similarities[j][i] = similarities[i][j] - if similarities[i][prototypeIndices].max() < minSimilarity: + for j in prototypeIndices: + if similarities[i][j] < 0: + similarities[i][j] = similarityFunc(instances[i], instances[j]) + similarities[j][i] = similarities[i][j] + label = similarities[i][prototypeIndices].argmax() + if similarities[i][prototypeIndices[label]] < minSimilarity: prototypeIndices.append(i) - elif randomInitialization: # replace prototype by current instance i if longer - label = similarities[i][prototypeIndices].argmax() - if len(instances[prototypeIndices[label]]) < len(instances[i]): - prototypeIndices[label] = i + clusters.append([]) + else: + clusters[label].append(i) + if optimizeCentroid: + if len(clusters[label]) >= 2: # no point if only one element in cluster + for j in clusters[label][:-1]: + if similarities[i][j] < 0: + similarities[i][j] = similarityFunc(instances[i], instances[j]) + similarities[j][i] = similarities[i][j] + clusterIndices = clusters[label] + clusterSimilarities = similarities[clusterIndices][:,clusterIndices] + newCentroidIdx = clusterIndices[clusterSimilarities.sum(0).argmax()] + if prototypeIndices[label] != newCentroidIdx: + prototypeIndices[label] = newCentroidIdx + elif randomInitialization: # replace prototype by current instance i if longer + if len(instances[prototypeIndices[label]]) < len(instances[i]): + prototypeIndices[label] = i + if assign: return assignToPrototypeClusters(instances, prototypeIndices, similarities, minSimilarity, similarityFunc, minClusterSize) else: @@ -220,6 +247,24 @@ clusterSizes['outlier'] = sum(np.array(labels) == outlierIndex) return clusterSizes +def computeClusterStatistics(labels, prototypeIndices, instances, similarities, similarityFunc, clusters = None, outlierIndex = -1): + if clusters is None: + clusters = {protoId:[] for protoId in prototypeIndices+[-1]} + for i,l in enumerate(labels): + clusters[l].append(i) + clusters = [clusters[protoId] for protoId in prototypeIndices] + for i, cluster in enumerate(clusters): + n = len(cluster) + print('cluster {}: {} elements'.format(prototypeIndices[i], n)) + if n >=2: + for j,k in enumerate(cluster): + for l in cluster[:j]: + if similarities[k][l] < 0: + similarities[k][l] = similarityFunc(instances[k], instances[l]) + similarities[l][k] = similarities[k][l] + print('Mean similarity to prototype: {}'.format((similarities[prototypeIndices[i]][cluster].sum()+1)/(n-1))) + print('Mean overall similarity: {}'.format((similarities[cluster][:,cluster].sum()+n)/(n*(n-1)))) + # Gaussian Mixture Models def plotGMMClusters(model, dataset = None, fig = None, colors = utils.colors, nUnitsPerPixel = 1., alpha = 0.3): '''plot the ellipse corresponding to the Gaussians
--- a/python/storage.py Fri Jun 23 23:50:02 2017 -0400 +++ b/python/storage.py Mon Jun 26 00:10:35 2017 -0400 @@ -19,6 +19,10 @@ 'car':2, 'truck':3} +tableNames = {'feature':'positions', + 'object': 'objects', + 'objectfeatures': 'positions'} + ######################### # Sqlite ######################### @@ -411,6 +415,9 @@ else: dbfn = filename cursor.execute('INSERT INTO prototypes (id, dbfilename, trajectory_type, nMatchings) VALUES ({},\"{}\",\"{}\",{})'.format(protoId, dbfn, trajectoryType, n)) + cursor.execute('SELECT * from sqlite_master WHERE type = \"table\" and name = \"{}\"'.format(tableNames[trajectoryType])) + if len(cursor.fetchall) == 0: + pass # save prototype trajectory data except sqlite3.OperationalError as error: printDBError(error) connection.commit()
--- a/scripts/learn-motion-patterns.py Fri Jun 23 23:50:02 2017 -0400 +++ b/scripts/learn-motion-patterns.py Mon Jun 26 00:10:35 2017 -0400 @@ -17,6 +17,7 @@ parser.add_argument('--metric', dest = 'metric', help = 'metric for the similarity of trajectory points', default = 'cityblock') # default is manhattan distance parser.add_argument('-s', dest = 'minSimilarity', help = 'minimum similarity to put a trajectory in a cluster', type = float, required = True) parser.add_argument('-c', dest = 'minClusterSize', help = 'minimum cluster size', type = int, default = None) +parser.add_argument('--optimize', dest = 'optimizeCentroid', help = 'recompute centroid at each assignment', action = 'store_true') parser.add_argument('--random', dest = 'randomInitialization', help = 'random initialization of clustering algorithm', action = 'store_true') parser.add_argument('--subsample', dest = 'positionSubsamplingRate', help = 'rate of position subsampling (1 every n positions)', type = int) parser.add_argument('--display', dest = 'display', help = 'display trajectories', action = 'store_true') @@ -60,7 +61,7 @@ similarities = -np.ones((nTrajectories, nTrajectories)) -prototypeIndices, labels = ml.prototypeCluster(trajectories, similarities, args.minSimilarity, lambda x,y : lcss.computeNormalized(x, y), args.minClusterSize, args.randomInitialization, True, None) # this line can be called again without reinitializing similarities +prototypeIndices, labels = ml.prototypeCluster(trajectories, similarities, args.minSimilarity, lambda x,y : lcss.computeNormalized(x, y), args.minClusterSize, args.optimizeCentroid, args.randomInitialization, True, None) # this line can be called again without reinitializing similarities clusterSizes = ml.computeClusterSizes(labels, prototypeIndices, -1) print(clusterSizes)