comparison python/ml.py @ 786:1f2b2d1f4fbf dev

added script and code to learn POIs
author Nicolas Saunier <nicolas.saunier@polymtl.ca>
date Fri, 11 Mar 2016 17:38:48 -0500
parents 2472b4d59aea
children 0a428b449b80
comparison
equal deleted inserted replaced
785:3aa6102ccc12 786:1f2b2d1f4fbf
1 #! /usr/bin/env python 1 #! /usr/bin/env python
2 '''Libraries for machine learning algorithms''' 2 '''Libraries for machine learning algorithms'''
3 3
4 from os import path
5 from random import shuffle
6 from copy import copy, deepcopy
7
4 import numpy as np 8 import numpy as np
5 9 from matplotlib.pylab import text
10 import matplotlib as mpl
11 import matplotlib.pyplot as plt
12 from scipy.cluster.vq import kmeans, whiten, vq
13 from sklearn import mixture
14
15 import utils
16
17 #####################
18 # OpenCV ML models
19 #####################
6 20
7 class Model(object): 21 class Model(object):
8 '''Abstract class for loading/saving model''' 22 '''Abstract class for loading/saving model'''
9 def load(self, filename): 23 def load(self, filename):
10 from os import path
11 if path.exists(filename): 24 if path.exists(filename):
12 self.model.load(filename) 25 self.model.load(filename)
13 else: 26 else:
14 print('Provided filename {} does not exist: model not loaded!'.format(filename)) 27 print('Provided filename {} does not exist: model not loaded!'.format(filename))
15 28
29 42
30 def predict(self, hog): 43 def predict(self, hog):
31 return self.model.predict(hog) 44 return self.model.predict(hog)
32 45
33 46
47 #####################
48 # Clustering
49 #####################
50
34 class Centroid(object): 51 class Centroid(object):
35 'Wrapper around instances to add a counter' 52 'Wrapper around instances to add a counter'
36 53
37 def __init__(self, instance, nInstances = 1): 54 def __init__(self, instance, nInstances = 1):
38 self.instance = instance 55 self.instance = instance
50 inst = self.instance.multiply(self.nInstances)+c.instance.multiply(instance.nInstances) 67 inst = self.instance.multiply(self.nInstances)+c.instance.multiply(instance.nInstances)
51 inst.multiply(1/(self.nInstances+instance.nInstances)) 68 inst.multiply(1/(self.nInstances+instance.nInstances))
52 return Centroid(inst, self.nInstances+instance.nInstances) 69 return Centroid(inst, self.nInstances+instance.nInstances)
53 70
54 def plot(self, options = ''): 71 def plot(self, options = ''):
55 from matplotlib.pylab import text
56 self.instance.plot(options) 72 self.instance.plot(options)
57 text(self.instance.position.x+1, self.instance.position.y+1, str(self.nInstances)) 73 text(self.instance.position.x+1, self.instance.position.y+1, str(self.nInstances))
58 74
59 def kMedoids(similarityMatrix, initialCentroids = None, k = None): 75 def kMedoids(similarityMatrix, initialCentroids = None, k = None):
60 '''Algorithm that clusters any dataset based on a similarity matrix 76 '''Algorithm that clusters any dataset based on a similarity matrix
66 Two instances should be in the same cluster if the sameCluster function returns true for two instances. It is supposed that the average centroid of a set of instances can be computed, using the function. 82 Two instances should be in the same cluster if the sameCluster function returns true for two instances. It is supposed that the average centroid of a set of instances can be computed, using the function.
67 The number of clusters will be determined accordingly 83 The number of clusters will be determined accordingly
68 84
69 data: list of instances 85 data: list of instances
70 averageCentroid: ''' 86 averageCentroid: '''
71
72 from random import shuffle
73 from copy import copy, deepcopy
74 localdata = copy(data) # shallow copy to avoid modifying data 87 localdata = copy(data) # shallow copy to avoid modifying data
75 if shuffleData: 88 if shuffleData:
76 shuffle(localdata) 89 shuffle(localdata)
77 if initialCentroids is None: 90 if initialCentroids is None:
78 centroids = [Centroid(localdata[0])] 91 centroids = [Centroid(localdata[0])]
103 U,sigma,V = np.linalg.svd(L) 116 U,sigma,V = np.linalg.svd(L)
104 # create feature vector from k first eigenvectors 117 # create feature vector from k first eigenvectors
105 # by stacking eigenvectors as columns 118 # by stacking eigenvectors as columns
106 features = np.array(V[:k]).T 119 features = np.array(V[:k]).T
107 # k-means 120 # k-means
108 from scipy.cluster.vq import kmeans, whiten, vq
109 features = whiten(features) 121 features = whiten(features)
110 centroids,distortion = kmeans(features,k, iter) 122 centroids,distortion = kmeans(features,k, iter)
111 code,distance = vq(features,centroids) # code starting from 0 (represent first cluster) to k-1 (last cluster) 123 code,distance = vq(features,centroids) # code starting from 0 (represent first cluster) to k-1 (last cluster)
112 return code,sigma 124 return code,sigma
113 125
177 189
178 return prototypeIndices, labels 190 return prototypeIndices, labels
179 191
180 def computeClusterSizes(labels, prototypeIndices, outlierIndex = -1): 192 def computeClusterSizes(labels, prototypeIndices, outlierIndex = -1):
181 clusterSizes = {i: sum(np.array(labels) == i) for i in prototypeIndices} 193 clusterSizes = {i: sum(np.array(labels) == i) for i in prototypeIndices}
182 clusterSizes['outlier'] = sum(np.array(labels) == -1) 194 clusterSizes['outlier'] = sum(np.array(labels) == outlierIndex)
183 return clusterSizes 195 return clusterSizes
196
197 # Gaussian Mixture Models
198 def plotGMMClusters(model, dataset = None, colors = utils.colors):
199 '''plot the ellipse corresponding to the Gaussians
200 and the predicted classes of the instances in the dataset'''
201 fig = plt.figure()
202 labels = model.predict(dataset)
203 for i in xrange(model.n_components):
204 mean = model.means_[i]
205 if dataset is not None:
206 plt.scatter(dataset[labels == i, 0], dataset[labels == i, 1], .8, color=colors[i])
207 plt.annotate(str(i), xy=(mean[0]+1, mean[1]+1))
208
209 # Plot an ellipse to show the Gaussian component
210 v, w = np.linalg.eigh(model.covars_[i])
211 angle = np.arctan2(w[0][1], w[0][0])
212 angle = 180*angle/np.pi # convert to degrees
213 v *= 4
214 ell = mpl.patches.Ellipse(mean, v[0], v[1], 180+angle, color=colors[i])
215 ell.set_clip_box(fig.bbox)
216 ell.set_alpha(.5)
217 fig.axes[0].add_artist(ell)