repo/traffic-intelligence: python/ml.py comparison

comparison python/ml.py @ 786:1f2b2d1f4fbf dev

added script and code to learn POIs

author	Nicolas Saunier <nicolas.saunier@polymtl.ca>
date	Fri, 11 Mar 2016 17:38:48 -0500
parents	2472b4d59aea
children	0a428b449b80

comparison

equal deleted inserted replaced

-:3aa6102ccc12
+:1f2b2d1f4fbf
 #! /usr/bin/env python
 '''Libraries for machine learning algorithms'''
+from os import path
+from random import shuffle
+from copy import copy, deepcopy
 import numpy as np
+from matplotlib.pylab import text
+import matplotlib as mpl
+import matplotlib.pyplot as plt
+from scipy.cluster.vq import kmeans, whiten, vq
+from sklearn import mixture
+import utils
+#####################
+# OpenCV ML models
+#####################
 class Model(object):
 '''Abstract class for loading/saving model'''
 def load(self, filename):
-from os import path
 if path.exists(filename):
 self.model.load(filename)
 else:
 print('Provided filename {} does not exist: model not loaded!'.format(filename))
 def predict(self, hog):
 return self.model.predict(hog)
+#####################
+# Clustering
+#####################
 class Centroid(object):
 'Wrapper around instances to add a counter'
 def __init__(self, instance, nInstances = 1):
 self.instance = instance
 inst = self.instance.multiply(self.nInstances)+c.instance.multiply(instance.nInstances)
 inst.multiply(1/(self.nInstances+instance.nInstances))
 return Centroid(inst, self.nInstances+instance.nInstances)
 def plot(self, options = ''):
-from matplotlib.pylab import text
 self.instance.plot(options)
 text(self.instance.position.x+1, self.instance.position.y+1, str(self.nInstances))
 def kMedoids(similarityMatrix, initialCentroids = None, k = None):
 '''Algorithm that clusters any dataset based on a similarity matrix
 Two instances should be in the same cluster if the sameCluster function returns true for two instances. It is supposed that the average centroid of a set of instances can be computed, using the function.
 The number of clusters will be determined accordingly
 data: list of instances
 averageCentroid: '''
-from random import shuffle
-from copy import copy, deepcopy
 localdata = copy(data) # shallow copy to avoid modifying data
 if shuffleData:
 shuffle(localdata)
 if initialCentroids is None:
 centroids = [Centroid(localdata[0])]
 	U,sigma,V = np.linalg.svd(L)
 	# create feature vector from k first eigenvectors
 	# by stacking eigenvectors as columns
 	features = np.array(V[:k]).T
 	# k-means
-	from scipy.cluster.vq import kmeans, whiten, vq
 	features = whiten(features)
 	centroids,distortion = kmeans(features,k, iter)
 	code,distance = vq(features,centroids) # code starting from 0 (represent first cluster) to k-1 (last cluster)
 	return code,sigma
 return prototypeIndices, labels
 def computeClusterSizes(labels, prototypeIndices, outlierIndex = -1):
 clusterSizes = {i: sum(np.array(labels) == i) for i in prototypeIndices}
-clusterSizes['outlier'] = sum(np.array(labels) == -1)
+clusterSizes['outlier'] = sum(np.array(labels) == outlierIndex)
 return clusterSizes
+# Gaussian Mixture Models
+def plotGMMClusters(model, dataset = None, colors = utils.colors):
+'''plot the ellipse corresponding to the Gaussians
+and the predicted classes of the instances in the dataset'''
+fig = plt.figure()
+labels = model.predict(dataset)
+for i in xrange(model.n_components):
+mean = model.means_[i]
+if dataset is not None:
+plt.scatter(dataset[labels == i, 0], dataset[labels == i, 1], .8, color=colors[i])
+plt.annotate(str(i), xy=(mean[0]+1, mean[1]+1))
+# Plot an ellipse to show the Gaussian component
+v, w = np.linalg.eigh(model.covars_[i])
+angle = np.arctan2(w[0][1], w[0][0])
+angle = 180*angle/np.pi  # convert to degrees
+	v *= 4
+ell = mpl.patches.Ellipse(mean, v[0], v[1], 180+angle, color=colors[i])
+ell.set_clip_box(fig.bbox)
+ell.set_alpha(.5)
+fig.axes[0].add_artist(ell)

Mercurial Hosting > traffic-intelligence

comparison python/ml.py @ 786:1f2b2d1f4fbf dev