Mercurial Hosting > traffic-intelligence
comparison python/ml.py @ 786:1f2b2d1f4fbf dev
added script and code to learn POIs
author | Nicolas Saunier <nicolas.saunier@polymtl.ca> |
---|---|
date | Fri, 11 Mar 2016 17:38:48 -0500 |
parents | 2472b4d59aea |
children | 0a428b449b80 |
comparison
equal
deleted
inserted
replaced
785:3aa6102ccc12 | 786:1f2b2d1f4fbf |
---|---|
1 #! /usr/bin/env python | 1 #! /usr/bin/env python |
2 '''Libraries for machine learning algorithms''' | 2 '''Libraries for machine learning algorithms''' |
3 | 3 |
4 from os import path | |
5 from random import shuffle | |
6 from copy import copy, deepcopy | |
7 | |
4 import numpy as np | 8 import numpy as np |
5 | 9 from matplotlib.pylab import text |
10 import matplotlib as mpl | |
11 import matplotlib.pyplot as plt | |
12 from scipy.cluster.vq import kmeans, whiten, vq | |
13 from sklearn import mixture | |
14 | |
15 import utils | |
16 | |
17 ##################### | |
18 # OpenCV ML models | |
19 ##################### | |
6 | 20 |
7 class Model(object): | 21 class Model(object): |
8 '''Abstract class for loading/saving model''' | 22 '''Abstract class for loading/saving model''' |
9 def load(self, filename): | 23 def load(self, filename): |
10 from os import path | |
11 if path.exists(filename): | 24 if path.exists(filename): |
12 self.model.load(filename) | 25 self.model.load(filename) |
13 else: | 26 else: |
14 print('Provided filename {} does not exist: model not loaded!'.format(filename)) | 27 print('Provided filename {} does not exist: model not loaded!'.format(filename)) |
15 | 28 |
29 | 42 |
30 def predict(self, hog): | 43 def predict(self, hog): |
31 return self.model.predict(hog) | 44 return self.model.predict(hog) |
32 | 45 |
33 | 46 |
47 ##################### | |
48 # Clustering | |
49 ##################### | |
50 | |
34 class Centroid(object): | 51 class Centroid(object): |
35 'Wrapper around instances to add a counter' | 52 'Wrapper around instances to add a counter' |
36 | 53 |
37 def __init__(self, instance, nInstances = 1): | 54 def __init__(self, instance, nInstances = 1): |
38 self.instance = instance | 55 self.instance = instance |
50 inst = self.instance.multiply(self.nInstances)+c.instance.multiply(instance.nInstances) | 67 inst = self.instance.multiply(self.nInstances)+c.instance.multiply(instance.nInstances) |
51 inst.multiply(1/(self.nInstances+instance.nInstances)) | 68 inst.multiply(1/(self.nInstances+instance.nInstances)) |
52 return Centroid(inst, self.nInstances+instance.nInstances) | 69 return Centroid(inst, self.nInstances+instance.nInstances) |
53 | 70 |
54 def plot(self, options = ''): | 71 def plot(self, options = ''): |
55 from matplotlib.pylab import text | |
56 self.instance.plot(options) | 72 self.instance.plot(options) |
57 text(self.instance.position.x+1, self.instance.position.y+1, str(self.nInstances)) | 73 text(self.instance.position.x+1, self.instance.position.y+1, str(self.nInstances)) |
58 | 74 |
59 def kMedoids(similarityMatrix, initialCentroids = None, k = None): | 75 def kMedoids(similarityMatrix, initialCentroids = None, k = None): |
60 '''Algorithm that clusters any dataset based on a similarity matrix | 76 '''Algorithm that clusters any dataset based on a similarity matrix |
66 Two instances should be in the same cluster if the sameCluster function returns true for two instances. It is supposed that the average centroid of a set of instances can be computed, using the function. | 82 Two instances should be in the same cluster if the sameCluster function returns true for two instances. It is supposed that the average centroid of a set of instances can be computed, using the function. |
67 The number of clusters will be determined accordingly | 83 The number of clusters will be determined accordingly |
68 | 84 |
69 data: list of instances | 85 data: list of instances |
70 averageCentroid: ''' | 86 averageCentroid: ''' |
71 | |
72 from random import shuffle | |
73 from copy import copy, deepcopy | |
74 localdata = copy(data) # shallow copy to avoid modifying data | 87 localdata = copy(data) # shallow copy to avoid modifying data |
75 if shuffleData: | 88 if shuffleData: |
76 shuffle(localdata) | 89 shuffle(localdata) |
77 if initialCentroids is None: | 90 if initialCentroids is None: |
78 centroids = [Centroid(localdata[0])] | 91 centroids = [Centroid(localdata[0])] |
103 U,sigma,V = np.linalg.svd(L) | 116 U,sigma,V = np.linalg.svd(L) |
104 # create feature vector from k first eigenvectors | 117 # create feature vector from k first eigenvectors |
105 # by stacking eigenvectors as columns | 118 # by stacking eigenvectors as columns |
106 features = np.array(V[:k]).T | 119 features = np.array(V[:k]).T |
107 # k-means | 120 # k-means |
108 from scipy.cluster.vq import kmeans, whiten, vq | |
109 features = whiten(features) | 121 features = whiten(features) |
110 centroids,distortion = kmeans(features,k, iter) | 122 centroids,distortion = kmeans(features,k, iter) |
111 code,distance = vq(features,centroids) # code starting from 0 (represent first cluster) to k-1 (last cluster) | 123 code,distance = vq(features,centroids) # code starting from 0 (represent first cluster) to k-1 (last cluster) |
112 return code,sigma | 124 return code,sigma |
113 | 125 |
177 | 189 |
178 return prototypeIndices, labels | 190 return prototypeIndices, labels |
179 | 191 |
180 def computeClusterSizes(labels, prototypeIndices, outlierIndex = -1): | 192 def computeClusterSizes(labels, prototypeIndices, outlierIndex = -1): |
181 clusterSizes = {i: sum(np.array(labels) == i) for i in prototypeIndices} | 193 clusterSizes = {i: sum(np.array(labels) == i) for i in prototypeIndices} |
182 clusterSizes['outlier'] = sum(np.array(labels) == -1) | 194 clusterSizes['outlier'] = sum(np.array(labels) == outlierIndex) |
183 return clusterSizes | 195 return clusterSizes |
196 | |
197 # Gaussian Mixture Models | |
198 def plotGMMClusters(model, dataset = None, colors = utils.colors): | |
199 '''plot the ellipse corresponding to the Gaussians | |
200 and the predicted classes of the instances in the dataset''' | |
201 fig = plt.figure() | |
202 labels = model.predict(dataset) | |
203 for i in xrange(model.n_components): | |
204 mean = model.means_[i] | |
205 if dataset is not None: | |
206 plt.scatter(dataset[labels == i, 0], dataset[labels == i, 1], .8, color=colors[i]) | |
207 plt.annotate(str(i), xy=(mean[0]+1, mean[1]+1)) | |
208 | |
209 # Plot an ellipse to show the Gaussian component | |
210 v, w = np.linalg.eigh(model.covars_[i]) | |
211 angle = np.arctan2(w[0][1], w[0][0]) | |
212 angle = 180*angle/np.pi # convert to degrees | |
213 v *= 4 | |
214 ell = mpl.patches.Ellipse(mean, v[0], v[1], 180+angle, color=colors[i]) | |
215 ell.set_clip_box(fig.bbox) | |
216 ell.set_alpha(.5) | |
217 fig.axes[0].add_artist(ell) |