Mercurial Hosting > traffic-intelligence
changeset 786:1f2b2d1f4fbf dev
added script and code to learn POIs
author | Nicolas Saunier <nicolas.saunier@polymtl.ca> |
---|---|
date | Fri, 11 Mar 2016 17:38:48 -0500 |
parents | 3aa6102ccc12 |
children | 0a428b449b80 |
files | python/ml.py scripts/learn-poi.py |
diffstat | 2 files changed, 84 insertions(+), 7 deletions(-) [+] |
line wrap: on
line diff
--- a/python/ml.py Thu Mar 03 17:01:30 2016 -0500 +++ b/python/ml.py Fri Mar 11 17:38:48 2016 -0500 @@ -1,13 +1,26 @@ #! /usr/bin/env python '''Libraries for machine learning algorithms''' +from os import path +from random import shuffle +from copy import copy, deepcopy + import numpy as np +from matplotlib.pylab import text +import matplotlib as mpl +import matplotlib.pyplot as plt +from scipy.cluster.vq import kmeans, whiten, vq +from sklearn import mixture +import utils + +##################### +# OpenCV ML models +##################### class Model(object): '''Abstract class for loading/saving model''' def load(self, filename): - from os import path if path.exists(filename): self.model.load(filename) else: @@ -31,6 +44,10 @@ return self.model.predict(hog) +##################### +# Clustering +##################### + class Centroid(object): 'Wrapper around instances to add a counter' @@ -52,7 +69,6 @@ return Centroid(inst, self.nInstances+instance.nInstances) def plot(self, options = ''): - from matplotlib.pylab import text self.instance.plot(options) text(self.instance.position.x+1, self.instance.position.y+1, str(self.nInstances)) @@ -68,9 +84,6 @@ data: list of instances averageCentroid: ''' - - from random import shuffle - from copy import copy, deepcopy localdata = copy(data) # shallow copy to avoid modifying data if shuffleData: shuffle(localdata) @@ -105,7 +118,6 @@ # by stacking eigenvectors as columns features = np.array(V[:k]).T # k-means - from scipy.cluster.vq import kmeans, whiten, vq features = whiten(features) centroids,distortion = kmeans(features,k, iter) code,distance = vq(features,centroids) # code starting from 0 (represent first cluster) to k-1 (last cluster) @@ -179,5 +191,27 @@ def computeClusterSizes(labels, prototypeIndices, outlierIndex = -1): clusterSizes = {i: sum(np.array(labels) == i) for i in prototypeIndices} - clusterSizes['outlier'] = sum(np.array(labels) == -1) + clusterSizes['outlier'] = sum(np.array(labels) == outlierIndex) return clusterSizes + +# Gaussian Mixture Models +def plotGMMClusters(model, dataset = None, colors = utils.colors): + '''plot the ellipse corresponding to the Gaussians + and the predicted classes of the instances in the dataset''' + fig = plt.figure() + labels = model.predict(dataset) + for i in xrange(model.n_components): + mean = model.means_[i] + if dataset is not None: + plt.scatter(dataset[labels == i, 0], dataset[labels == i, 1], .8, color=colors[i]) + plt.annotate(str(i), xy=(mean[0]+1, mean[1]+1)) + + # Plot an ellipse to show the Gaussian component + v, w = np.linalg.eigh(model.covars_[i]) + angle = np.arctan2(w[0][1], w[0][0]) + angle = 180*angle/np.pi # convert to degrees + v *= 4 + ell = mpl.patches.Ellipse(mean, v[0], v[1], 180+angle, color=colors[i]) + ell.set_clip_box(fig.bbox) + ell.set_alpha(.5) + fig.axes[0].add_artist(ell)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/scripts/learn-poi.py Fri Mar 11 17:38:48 2016 -0500 @@ -0,0 +1,43 @@ +#! /usr/bin/env python + +import argparse + +import numpy as np +from sklearn import mixture +import matplotlib.pyplot as plt + +import storage, ml + +parser = argparse.ArgumentParser(description='The program learns and displays Gaussians fit to beginnings and ends of object trajectories (based on Mohamed Gomaa Mohamed 2015 PhD). TODO: save the data') +parser.add_argument('-d', dest = 'databaseFilename', help = 'name of the Sqlite database file', required = True) +parser.add_argument('-t', dest = 'trajectoryType', help = 'type of trajectories to display', choices = ['feature', 'object'], default = 'object') +parser.add_argument('-n', dest = 'nClusters', help = 'number of point clusters', required = True, type = int) +parser.add_argument('--covariance-type', dest = 'covarianceType', help = 'type of covariance of Gaussian model', default = "full") + +args = parser.parse_args() + +objects = storage.loadTrajectoriesFromSqlite(args.databaseFilename, args.trajectoryType) + +beginnings = [] +ends = [] +for o in objects: + beginnings.append(o.getPositionAt(0).aslist()) + ends.append(o.getPositionAt(int(o.length())-1).aslist()) + +beginnings = np.array(beginnings) +ends = np.array(ends) + +gmm = mixture.GMM(n_components=args.nClusters, covariance_type = args.covarianceType) +beginningModel=gmm.fit(beginnings) +gmm = mixture.GMM(n_components=args.nClusters, covariance_type = args.covarianceType) +endModel=gmm.fit(ends) + +ml.plotGMMClusters(beginningModel, beginnings) +plt.axis('equal') +plt.title('Origins') +print('Origin Clusters:\n{}'.format(ml.computeClusterSizes(beginningModel.predict(beginnings), range(args.nClusters)))) + +ml.plotGMMClusters(endModel, ends) +plt.axis('equal') +plt.title('Destinations') +print('Destination Clusters:\n{}'.format(ml.computeClusterSizes(endModel.predict(ends), range(args.nClusters))))