Mercurial Hosting > traffic-intelligence
diff python/ml.py @ 786:1f2b2d1f4fbf dev
added script and code to learn POIs
author | Nicolas Saunier <nicolas.saunier@polymtl.ca> |
---|---|
date | Fri, 11 Mar 2016 17:38:48 -0500 |
parents | 2472b4d59aea |
children | 0a428b449b80 |
line wrap: on
line diff
--- a/python/ml.py Thu Mar 03 17:01:30 2016 -0500 +++ b/python/ml.py Fri Mar 11 17:38:48 2016 -0500 @@ -1,13 +1,26 @@ #! /usr/bin/env python '''Libraries for machine learning algorithms''' +from os import path +from random import shuffle +from copy import copy, deepcopy + import numpy as np +from matplotlib.pylab import text +import matplotlib as mpl +import matplotlib.pyplot as plt +from scipy.cluster.vq import kmeans, whiten, vq +from sklearn import mixture +import utils + +##################### +# OpenCV ML models +##################### class Model(object): '''Abstract class for loading/saving model''' def load(self, filename): - from os import path if path.exists(filename): self.model.load(filename) else: @@ -31,6 +44,10 @@ return self.model.predict(hog) +##################### +# Clustering +##################### + class Centroid(object): 'Wrapper around instances to add a counter' @@ -52,7 +69,6 @@ return Centroid(inst, self.nInstances+instance.nInstances) def plot(self, options = ''): - from matplotlib.pylab import text self.instance.plot(options) text(self.instance.position.x+1, self.instance.position.y+1, str(self.nInstances)) @@ -68,9 +84,6 @@ data: list of instances averageCentroid: ''' - - from random import shuffle - from copy import copy, deepcopy localdata = copy(data) # shallow copy to avoid modifying data if shuffleData: shuffle(localdata) @@ -105,7 +118,6 @@ # by stacking eigenvectors as columns features = np.array(V[:k]).T # k-means - from scipy.cluster.vq import kmeans, whiten, vq features = whiten(features) centroids,distortion = kmeans(features,k, iter) code,distance = vq(features,centroids) # code starting from 0 (represent first cluster) to k-1 (last cluster) @@ -179,5 +191,27 @@ def computeClusterSizes(labels, prototypeIndices, outlierIndex = -1): clusterSizes = {i: sum(np.array(labels) == i) for i in prototypeIndices} - clusterSizes['outlier'] = sum(np.array(labels) == -1) + clusterSizes['outlier'] = sum(np.array(labels) == outlierIndex) return clusterSizes + +# Gaussian Mixture Models +def plotGMMClusters(model, dataset = None, colors = utils.colors): + '''plot the ellipse corresponding to the Gaussians + and the predicted classes of the instances in the dataset''' + fig = plt.figure() + labels = model.predict(dataset) + for i in xrange(model.n_components): + mean = model.means_[i] + if dataset is not None: + plt.scatter(dataset[labels == i, 0], dataset[labels == i, 1], .8, color=colors[i]) + plt.annotate(str(i), xy=(mean[0]+1, mean[1]+1)) + + # Plot an ellipse to show the Gaussian component + v, w = np.linalg.eigh(model.covars_[i]) + angle = np.arctan2(w[0][1], w[0][0]) + angle = 180*angle/np.pi # convert to degrees + v *= 4 + ell = mpl.patches.Ellipse(mean, v[0], v[1], 180+angle, color=colors[i]) + ell.set_clip_box(fig.bbox) + ell.set_alpha(.5) + fig.axes[0].add_artist(ell)