annotate python/ml.py @ 788:5b970a5bc233 dev

updated classifying code to OpenCV 3.x (bug in function to load classification models)
author Nicolas Saunier <nicolas.saunier@polymtl.ca>
date Thu, 24 Mar 2016 16:37:37 -0400
parents 0a428b449b80
children 1158a6e2d28e
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
183
ed944ff45e8c first simple clustering algorithm implementation
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
diff changeset
1 #! /usr/bin/env python
ed944ff45e8c first simple clustering algorithm implementation
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
diff changeset
2 '''Libraries for machine learning algorithms'''
ed944ff45e8c first simple clustering algorithm implementation
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
diff changeset
3
786
1f2b2d1f4fbf added script and code to learn POIs
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 738
diff changeset
4 from os import path
1f2b2d1f4fbf added script and code to learn POIs
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 738
diff changeset
5 from random import shuffle
1f2b2d1f4fbf added script and code to learn POIs
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 738
diff changeset
6 from copy import copy, deepcopy
1f2b2d1f4fbf added script and code to learn POIs
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 738
diff changeset
7
308
8bafd054cda4 Added a function to compute LCSS distance between two indcators
Mohamed Gomaa
parents: 184
diff changeset
8 import numpy as np
786
1f2b2d1f4fbf added script and code to learn POIs
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 738
diff changeset
9 from matplotlib.pylab import text
1f2b2d1f4fbf added script and code to learn POIs
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 738
diff changeset
10 import matplotlib as mpl
1f2b2d1f4fbf added script and code to learn POIs
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 738
diff changeset
11 import matplotlib.pyplot as plt
1f2b2d1f4fbf added script and code to learn POIs
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 738
diff changeset
12 from scipy.cluster.vq import kmeans, whiten, vq
1f2b2d1f4fbf added script and code to learn POIs
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 738
diff changeset
13 from sklearn import mixture
788
5b970a5bc233 updated classifying code to OpenCV 3.x (bug in function to load classification models)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 787
diff changeset
14 import cv2
308
8bafd054cda4 Added a function to compute LCSS distance between two indcators
Mohamed Gomaa
parents: 184
diff changeset
15
786
1f2b2d1f4fbf added script and code to learn POIs
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 738
diff changeset
16 import utils
1f2b2d1f4fbf added script and code to learn POIs
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 738
diff changeset
17
1f2b2d1f4fbf added script and code to learn POIs
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 738
diff changeset
18 #####################
1f2b2d1f4fbf added script and code to learn POIs
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 738
diff changeset
19 # OpenCV ML models
1f2b2d1f4fbf added script and code to learn POIs
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 738
diff changeset
20 #####################
183
ed944ff45e8c first simple clustering algorithm implementation
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
diff changeset
21
788
5b970a5bc233 updated classifying code to OpenCV 3.x (bug in function to load classification models)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 787
diff changeset
22 class StatModel(object):
380
adfd4f70ee1d added SVM
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 312
diff changeset
23 '''Abstract class for loading/saving model'''
680
da1352b89d02 classification is working
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
24 def load(self, filename):
da1352b89d02 classification is working
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
25 if path.exists(filename):
da1352b89d02 classification is working
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
26 self.model.load(filename)
da1352b89d02 classification is working
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
27 else:
da1352b89d02 classification is working
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
28 print('Provided filename {} does not exist: model not loaded!'.format(filename))
380
adfd4f70ee1d added SVM
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 312
diff changeset
29
680
da1352b89d02 classification is working
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
30 def save(self, filename):
da1352b89d02 classification is working
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
31 self.model.save(filename)
380
adfd4f70ee1d added SVM
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 312
diff changeset
32
788
5b970a5bc233 updated classifying code to OpenCV 3.x (bug in function to load classification models)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 787
diff changeset
33 class SVM(StatModel):
380
adfd4f70ee1d added SVM
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 312
diff changeset
34 '''wrapper for OpenCV SimpleVectorMachine algorithm'''
788
5b970a5bc233 updated classifying code to OpenCV 3.x (bug in function to load classification models)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 787
diff changeset
35 def __init__(self, svmType = cv2.ml.SVM_C_SVC, kernelType = cv2.ml.SVM_RBF, degree = 0, gamma = 1, coef0 = 0, Cvalue = 1, nu = 0, p = 0):
5b970a5bc233 updated classifying code to OpenCV 3.x (bug in function to load classification models)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 787
diff changeset
36 self.model = cv2.ml.SVM_create()
5b970a5bc233 updated classifying code to OpenCV 3.x (bug in function to load classification models)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 787
diff changeset
37 self.model.setType(svmType)
5b970a5bc233 updated classifying code to OpenCV 3.x (bug in function to load classification models)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 787
diff changeset
38 self.model.setKernel(kernelType)
5b970a5bc233 updated classifying code to OpenCV 3.x (bug in function to load classification models)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 787
diff changeset
39 self.model.setDegree(degree)
5b970a5bc233 updated classifying code to OpenCV 3.x (bug in function to load classification models)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 787
diff changeset
40 self.model.setGamma(gamma)
5b970a5bc233 updated classifying code to OpenCV 3.x (bug in function to load classification models)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 787
diff changeset
41 self.model.setCoef0(coef0)
5b970a5bc233 updated classifying code to OpenCV 3.x (bug in function to load classification models)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 787
diff changeset
42 self.model.setC(Cvalue)
5b970a5bc233 updated classifying code to OpenCV 3.x (bug in function to load classification models)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 787
diff changeset
43 self.model.setNu(nu)
5b970a5bc233 updated classifying code to OpenCV 3.x (bug in function to load classification models)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 787
diff changeset
44 self.model.setP(p)
380
adfd4f70ee1d added SVM
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 312
diff changeset
45
788
5b970a5bc233 updated classifying code to OpenCV 3.x (bug in function to load classification models)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 787
diff changeset
46 def train(self, samples, layout, responses):
5b970a5bc233 updated classifying code to OpenCV 3.x (bug in function to load classification models)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 787
diff changeset
47 self.model.train(samples, layout, responses)
380
adfd4f70ee1d added SVM
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 312
diff changeset
48
680
da1352b89d02 classification is working
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
49 def predict(self, hog):
da1352b89d02 classification is working
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
50 return self.model.predict(hog)
380
adfd4f70ee1d added SVM
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 312
diff changeset
51
adfd4f70ee1d added SVM
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 312
diff changeset
52
786
1f2b2d1f4fbf added script and code to learn POIs
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 738
diff changeset
53 #####################
1f2b2d1f4fbf added script and code to learn POIs
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 738
diff changeset
54 # Clustering
1f2b2d1f4fbf added script and code to learn POIs
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 738
diff changeset
55 #####################
1f2b2d1f4fbf added script and code to learn POIs
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 738
diff changeset
56
665
15e244d2a1b5 corrected bug with circular import for VideoFilenameAddable, moved to base module
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 636
diff changeset
57 class Centroid(object):
184
d70e9b36889c initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 183
diff changeset
58 'Wrapper around instances to add a counter'
d70e9b36889c initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 183
diff changeset
59
d70e9b36889c initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 183
diff changeset
60 def __init__(self, instance, nInstances = 1):
d70e9b36889c initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 183
diff changeset
61 self.instance = instance
d70e9b36889c initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 183
diff changeset
62 self.nInstances = nInstances
d70e9b36889c initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 183
diff changeset
63
d70e9b36889c initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 183
diff changeset
64 # def similar(instance2):
d70e9b36889c initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 183
diff changeset
65 # return self.instance.similar(instance2)
d70e9b36889c initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 183
diff changeset
66
d70e9b36889c initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 183
diff changeset
67 def add(self, instance2):
d70e9b36889c initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 183
diff changeset
68 self.instance = self.instance.multiply(self.nInstances)+instance2
d70e9b36889c initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 183
diff changeset
69 self.nInstances += 1
d70e9b36889c initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 183
diff changeset
70 self.instance = self.instance.multiply(1/float(self.nInstances))
d70e9b36889c initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 183
diff changeset
71
d70e9b36889c initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 183
diff changeset
72 def average(c):
d70e9b36889c initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 183
diff changeset
73 inst = self.instance.multiply(self.nInstances)+c.instance.multiply(instance.nInstances)
d70e9b36889c initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 183
diff changeset
74 inst.multiply(1/(self.nInstances+instance.nInstances))
d70e9b36889c initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 183
diff changeset
75 return Centroid(inst, self.nInstances+instance.nInstances)
d70e9b36889c initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 183
diff changeset
76
515
727e3c529519 renamed all draw functions to plot for consistency
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 501
diff changeset
77 def plot(self, options = ''):
727e3c529519 renamed all draw functions to plot for consistency
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 501
diff changeset
78 self.instance.plot(options)
184
d70e9b36889c initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 183
diff changeset
79 text(self.instance.position.x+1, self.instance.position.y+1, str(self.nInstances))
d70e9b36889c initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 183
diff changeset
80
386
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 382
diff changeset
81 def kMedoids(similarityMatrix, initialCentroids = None, k = None):
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 382
diff changeset
82 '''Algorithm that clusters any dataset based on a similarity matrix
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 382
diff changeset
83 Either the initialCentroids or k are passed'''
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 382
diff changeset
84 pass
184
d70e9b36889c initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 183
diff changeset
85
526
21bdeb29f855 corrected bug in initialization of lists and loading trajectories from vissim files
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 515
diff changeset
86 def assignCluster(data, similarFunc, initialCentroids = None, shuffleData = True):
183
ed944ff45e8c first simple clustering algorithm implementation
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
diff changeset
87 '''k-means algorithm with similarity function
184
d70e9b36889c initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 183
diff changeset
88 Two instances should be in the same cluster if the sameCluster function returns true for two instances. It is supposed that the average centroid of a set of instances can be computed, using the function.
183
ed944ff45e8c first simple clustering algorithm implementation
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
diff changeset
89 The number of clusters will be determined accordingly
ed944ff45e8c first simple clustering algorithm implementation
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
diff changeset
90
ed944ff45e8c first simple clustering algorithm implementation
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
diff changeset
91 data: list of instances
184
d70e9b36889c initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 183
diff changeset
92 averageCentroid: '''
d70e9b36889c initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 183
diff changeset
93 localdata = copy(data) # shallow copy to avoid modifying data
382
ba813f148ade development for clustering
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 380
diff changeset
94 if shuffleData:
ba813f148ade development for clustering
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 380
diff changeset
95 shuffle(localdata)
636
3058e00887bc removed all issues because of tests with None, using is instead of == or !=
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 563
diff changeset
96 if initialCentroids is None:
526
21bdeb29f855 corrected bug in initialization of lists and loading trajectories from vissim files
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 515
diff changeset
97 centroids = [Centroid(localdata[0])]
21bdeb29f855 corrected bug in initialization of lists and loading trajectories from vissim files
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 515
diff changeset
98 else:
184
d70e9b36889c initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 183
diff changeset
99 centroids = deepcopy(initialCentroids)
d70e9b36889c initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 183
diff changeset
100 for instance in localdata[1:]:
183
ed944ff45e8c first simple clustering algorithm implementation
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
diff changeset
101 i = 0
382
ba813f148ade development for clustering
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 380
diff changeset
102 while i<len(centroids) and not similarFunc(centroids[i].instance, instance):
183
ed944ff45e8c first simple clustering algorithm implementation
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
diff changeset
103 i += 1
ed944ff45e8c first simple clustering algorithm implementation
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
diff changeset
104 if i == len(centroids):
184
d70e9b36889c initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 183
diff changeset
105 centroids.append(Centroid(instance))
183
ed944ff45e8c first simple clustering algorithm implementation
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
diff changeset
106 else:
184
d70e9b36889c initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 183
diff changeset
107 centroids[i].add(instance)
183
ed944ff45e8c first simple clustering algorithm implementation
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
diff changeset
108
ed944ff45e8c first simple clustering algorithm implementation
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
diff changeset
109 return centroids
308
8bafd054cda4 Added a function to compute LCSS distance between two indcators
Mohamed Gomaa
parents: 184
diff changeset
110
382
ba813f148ade development for clustering
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 380
diff changeset
111 # TODO recompute centroids for each cluster: instance that minimizes some measure to all other elements
ba813f148ade development for clustering
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 380
diff changeset
112
293
ee3302528cdc rearranged new code by Paul (works now)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 285
diff changeset
113 def spectralClustering(similarityMatrix, k, iter=20):
285
5957aa1d69e1 Integrating Mohamed's changes
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 184
diff changeset
114 '''Spectral Clustering algorithm'''
5957aa1d69e1 Integrating Mohamed's changes
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 184
diff changeset
115 n = len(similarityMatrix)
308
8bafd054cda4 Added a function to compute LCSS distance between two indcators
Mohamed Gomaa
parents: 184
diff changeset
116 # create Laplacian matrix
8bafd054cda4 Added a function to compute LCSS distance between two indcators
Mohamed Gomaa
parents: 184
diff changeset
117 rowsum = np.sum(similarityMatrix,axis=0)
8bafd054cda4 Added a function to compute LCSS distance between two indcators
Mohamed Gomaa
parents: 184
diff changeset
118 D = np.diag(1 / np.sqrt(rowsum))
8bafd054cda4 Added a function to compute LCSS distance between two indcators
Mohamed Gomaa
parents: 184
diff changeset
119 I = np.identity(n)
8bafd054cda4 Added a function to compute LCSS distance between two indcators
Mohamed Gomaa
parents: 184
diff changeset
120 L = I - np.dot(D,np.dot(similarityMatrix,D))
8bafd054cda4 Added a function to compute LCSS distance between two indcators
Mohamed Gomaa
parents: 184
diff changeset
121 # compute eigenvectors of L
8bafd054cda4 Added a function to compute LCSS distance between two indcators
Mohamed Gomaa
parents: 184
diff changeset
122 U,sigma,V = np.linalg.svd(L)
8bafd054cda4 Added a function to compute LCSS distance between two indcators
Mohamed Gomaa
parents: 184
diff changeset
123 # create feature vector from k first eigenvectors
8bafd054cda4 Added a function to compute LCSS distance between two indcators
Mohamed Gomaa
parents: 184
diff changeset
124 # by stacking eigenvectors as columns
8bafd054cda4 Added a function to compute LCSS distance between two indcators
Mohamed Gomaa
parents: 184
diff changeset
125 features = np.array(V[:k]).T
8bafd054cda4 Added a function to compute LCSS distance between two indcators
Mohamed Gomaa
parents: 184
diff changeset
126 # k-means
8bafd054cda4 Added a function to compute LCSS distance between two indcators
Mohamed Gomaa
parents: 184
diff changeset
127 features = whiten(features)
293
ee3302528cdc rearranged new code by Paul (works now)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 285
diff changeset
128 centroids,distortion = kmeans(features,k, iter)
308
8bafd054cda4 Added a function to compute LCSS distance between two indcators
Mohamed Gomaa
parents: 184
diff changeset
129 code,distance = vq(features,centroids) # code starting from 0 (represent first cluster) to k-1 (last cluster)
309
80cbafd69109 Added spectral clustering function
Mohamed Gomaa
parents: 308
diff changeset
130 return code,sigma
563
39de5c532559 place holder functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 526
diff changeset
131
735
0e875a7f5759 modified prototypeCluster algorithm to enforce similarity when re-assigning and to compute only the necessary similarities
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 734
diff changeset
132 def prototypeCluster(instances, similarities, minSimilarity, similarityFunc = None, minClusterSize = None, randomInitialization = False):
731
b02431a8234c made prototypecluster generic, in ml module, and added randominitialization
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 680
diff changeset
133 '''Finds exemplar (prototype) instance that represent each cluster
b02431a8234c made prototypecluster generic, in ml module, and added randominitialization
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 680
diff changeset
134 Returns the prototype indices (in the instances list) and the cluster label of each instance
b02431a8234c made prototypecluster generic, in ml module, and added randominitialization
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 680
diff changeset
135
b02431a8234c made prototypecluster generic, in ml module, and added randominitialization
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 680
diff changeset
136 the elements in the instances list must have a length (method __len__), or one can use the random initialization
735
0e875a7f5759 modified prototypeCluster algorithm to enforce similarity when re-assigning and to compute only the necessary similarities
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 734
diff changeset
137 the positions in the instances list corresponds to the similarities
0e875a7f5759 modified prototypeCluster algorithm to enforce similarity when re-assigning and to compute only the necessary similarities
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 734
diff changeset
138 if similarityFunc is provided, the similarities are calculated as needed (this is faster) if not in similarities (negative if not computed)
0e875a7f5759 modified prototypeCluster algorithm to enforce similarity when re-assigning and to compute only the necessary similarities
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 734
diff changeset
139 similarities must still be allocated with the right size
731
b02431a8234c made prototypecluster generic, in ml module, and added randominitialization
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 680
diff changeset
140
b02431a8234c made prototypecluster generic, in ml module, and added randominitialization
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 680
diff changeset
141 if an instance is different enough (<minSimilarity),
b02431a8234c made prototypecluster generic, in ml module, and added randominitialization
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 680
diff changeset
142 it will become a new prototype.
b02431a8234c made prototypecluster generic, in ml module, and added randominitialization
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 680
diff changeset
143 Non-prototype instances will be assigned to an existing prototype
b02431a8234c made prototypecluster generic, in ml module, and added randominitialization
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 680
diff changeset
144 if minClusterSize is not None, the clusters will be refined by removing iteratively the smallest clusters
b02431a8234c made prototypecluster generic, in ml module, and added randominitialization
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 680
diff changeset
145 and reassigning all elements in the cluster until no cluster is smaller than minClusterSize'''
b02431a8234c made prototypecluster generic, in ml module, and added randominitialization
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 680
diff changeset
146
b02431a8234c made prototypecluster generic, in ml module, and added randominitialization
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 680
diff changeset
147 # sort instances based on length
735
0e875a7f5759 modified prototypeCluster algorithm to enforce similarity when re-assigning and to compute only the necessary similarities
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 734
diff changeset
148 indices = range(len(instances))
731
b02431a8234c made prototypecluster generic, in ml module, and added randominitialization
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 680
diff changeset
149 if randomInitialization:
b02431a8234c made prototypecluster generic, in ml module, and added randominitialization
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 680
diff changeset
150 indices = np.random.permutation(indices)
b02431a8234c made prototypecluster generic, in ml module, and added randominitialization
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 680
diff changeset
151 else:
b02431a8234c made prototypecluster generic, in ml module, and added randominitialization
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 680
diff changeset
152 def compare(i, j):
b02431a8234c made prototypecluster generic, in ml module, and added randominitialization
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 680
diff changeset
153 if len(instances[i]) > len(instances[j]):
b02431a8234c made prototypecluster generic, in ml module, and added randominitialization
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 680
diff changeset
154 return -1
b02431a8234c made prototypecluster generic, in ml module, and added randominitialization
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 680
diff changeset
155 elif len(instances[i]) == len(instances[j]):
b02431a8234c made prototypecluster generic, in ml module, and added randominitialization
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 680
diff changeset
156 return 0
b02431a8234c made prototypecluster generic, in ml module, and added randominitialization
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 680
diff changeset
157 else:
b02431a8234c made prototypecluster generic, in ml module, and added randominitialization
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 680
diff changeset
158 return 1
b02431a8234c made prototypecluster generic, in ml module, and added randominitialization
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 680
diff changeset
159 indices.sort(compare)
b02431a8234c made prototypecluster generic, in ml module, and added randominitialization
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 680
diff changeset
160 # go through all instances
b02431a8234c made prototypecluster generic, in ml module, and added randominitialization
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 680
diff changeset
161 prototypeIndices = [indices[0]]
b02431a8234c made prototypecluster generic, in ml module, and added randominitialization
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 680
diff changeset
162 for i in indices[1:]:
735
0e875a7f5759 modified prototypeCluster algorithm to enforce similarity when re-assigning and to compute only the necessary similarities
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 734
diff changeset
163 if similarityFunc is not None:
0e875a7f5759 modified prototypeCluster algorithm to enforce similarity when re-assigning and to compute only the necessary similarities
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 734
diff changeset
164 for j in prototypeIndices:
0e875a7f5759 modified prototypeCluster algorithm to enforce similarity when re-assigning and to compute only the necessary similarities
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 734
diff changeset
165 if similarities[i][j] < 0:
0e875a7f5759 modified prototypeCluster algorithm to enforce similarity when re-assigning and to compute only the necessary similarities
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 734
diff changeset
166 similarities[i][j] = similarityFunc(instances[i], instances[j])
0e875a7f5759 modified prototypeCluster algorithm to enforce similarity when re-assigning and to compute only the necessary similarities
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 734
diff changeset
167 similarities[j][i] = similarities[i][j]
0e875a7f5759 modified prototypeCluster algorithm to enforce similarity when re-assigning and to compute only the necessary similarities
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 734
diff changeset
168 if similarities[i][prototypeIndices].max() < minSimilarity:
731
b02431a8234c made prototypecluster generic, in ml module, and added randominitialization
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 680
diff changeset
169 prototypeIndices.append(i)
b02431a8234c made prototypecluster generic, in ml module, and added randominitialization
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 680
diff changeset
170
b02431a8234c made prototypecluster generic, in ml module, and added randominitialization
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 680
diff changeset
171 # assignment
735
0e875a7f5759 modified prototypeCluster algorithm to enforce similarity when re-assigning and to compute only the necessary similarities
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 734
diff changeset
172 indices = [i for i in range(similarities.shape[0]) if i not in prototypeIndices]
731
b02431a8234c made prototypecluster generic, in ml module, and added randominitialization
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 680
diff changeset
173 assign = True
b02431a8234c made prototypecluster generic, in ml module, and added randominitialization
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 680
diff changeset
174 while assign:
735
0e875a7f5759 modified prototypeCluster algorithm to enforce similarity when re-assigning and to compute only the necessary similarities
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 734
diff changeset
175 labels = [-1]*similarities.shape[0]
731
b02431a8234c made prototypecluster generic, in ml module, and added randominitialization
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 680
diff changeset
176 for i in prototypeIndices:
b02431a8234c made prototypecluster generic, in ml module, and added randominitialization
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 680
diff changeset
177 labels[i] = i
b02431a8234c made prototypecluster generic, in ml module, and added randominitialization
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 680
diff changeset
178 for i in indices:
735
0e875a7f5759 modified prototypeCluster algorithm to enforce similarity when re-assigning and to compute only the necessary similarities
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 734
diff changeset
179 if similarityFunc is not None:
0e875a7f5759 modified prototypeCluster algorithm to enforce similarity when re-assigning and to compute only the necessary similarities
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 734
diff changeset
180 for j in prototypeIndices:
0e875a7f5759 modified prototypeCluster algorithm to enforce similarity when re-assigning and to compute only the necessary similarities
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 734
diff changeset
181 if similarities[i][j] < 0:
0e875a7f5759 modified prototypeCluster algorithm to enforce similarity when re-assigning and to compute only the necessary similarities
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 734
diff changeset
182 similarities[i][j] = similarityFunc(instances[i], instances[j])
0e875a7f5759 modified prototypeCluster algorithm to enforce similarity when re-assigning and to compute only the necessary similarities
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 734
diff changeset
183 similarities[j][i] = similarities[i][j]
0e875a7f5759 modified prototypeCluster algorithm to enforce similarity when re-assigning and to compute only the necessary similarities
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 734
diff changeset
184 prototypeIdx = similarities[i][prototypeIndices].argmax()
0e875a7f5759 modified prototypeCluster algorithm to enforce similarity when re-assigning and to compute only the necessary similarities
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 734
diff changeset
185 if similarities[i][prototypeIndices[prototypeIdx]] >= minSimilarity:
0e875a7f5759 modified prototypeCluster algorithm to enforce similarity when re-assigning and to compute only the necessary similarities
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 734
diff changeset
186 labels[i] = prototypeIndices[prototypeIdx]
0e875a7f5759 modified prototypeCluster algorithm to enforce similarity when re-assigning and to compute only the necessary similarities
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 734
diff changeset
187 else:
0e875a7f5759 modified prototypeCluster algorithm to enforce similarity when re-assigning and to compute only the necessary similarities
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 734
diff changeset
188 labels[i] = -1 # outlier
731
b02431a8234c made prototypecluster generic, in ml module, and added randominitialization
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 680
diff changeset
189 clusterSizes = {i: sum(np.array(labels) == i) for i in prototypeIndices}
735
0e875a7f5759 modified prototypeCluster algorithm to enforce similarity when re-assigning and to compute only the necessary similarities
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 734
diff changeset
190 smallestClusterIndex = min(clusterSizes, key = clusterSizes.get)
731
b02431a8234c made prototypecluster generic, in ml module, and added randominitialization
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 680
diff changeset
191 assign = (clusterSizes[smallestClusterIndex] < minClusterSize)
b02431a8234c made prototypecluster generic, in ml module, and added randominitialization
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 680
diff changeset
192 if assign:
b02431a8234c made prototypecluster generic, in ml module, and added randominitialization
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 680
diff changeset
193 prototypeIndices.remove(smallestClusterIndex)
b02431a8234c made prototypecluster generic, in ml module, and added randominitialization
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 680
diff changeset
194 indices.append(smallestClusterIndex)
b02431a8234c made prototypecluster generic, in ml module, and added randominitialization
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 680
diff changeset
195
b02431a8234c made prototypecluster generic, in ml module, and added randominitialization
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 680
diff changeset
196 return prototypeIndices, labels
738
2472b4d59aea small function
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 735
diff changeset
197
2472b4d59aea small function
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 735
diff changeset
198 def computeClusterSizes(labels, prototypeIndices, outlierIndex = -1):
2472b4d59aea small function
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 735
diff changeset
199 clusterSizes = {i: sum(np.array(labels) == i) for i in prototypeIndices}
786
1f2b2d1f4fbf added script and code to learn POIs
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 738
diff changeset
200 clusterSizes['outlier'] = sum(np.array(labels) == outlierIndex)
738
2472b4d59aea small function
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 735
diff changeset
201 return clusterSizes
786
1f2b2d1f4fbf added script and code to learn POIs
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 738
diff changeset
202
1f2b2d1f4fbf added script and code to learn POIs
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 738
diff changeset
203 # Gaussian Mixture Models
787
0a428b449b80 improved script to display over world image
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 786
diff changeset
204 def plotGMMClusters(model, dataset = None, fig = None, colors = utils.colors, nPixelsPerUnit = 1., alpha = 0.3):
786
1f2b2d1f4fbf added script and code to learn POIs
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 738
diff changeset
205 '''plot the ellipse corresponding to the Gaussians
1f2b2d1f4fbf added script and code to learn POIs
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 738
diff changeset
206 and the predicted classes of the instances in the dataset'''
787
0a428b449b80 improved script to display over world image
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 786
diff changeset
207 if fig is None:
0a428b449b80 improved script to display over world image
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 786
diff changeset
208 fig = plt.figure()
786
1f2b2d1f4fbf added script and code to learn POIs
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 738
diff changeset
209 labels = model.predict(dataset)
787
0a428b449b80 improved script to display over world image
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 786
diff changeset
210 tmpDataset = nPixelsPerUnit*dataset
786
1f2b2d1f4fbf added script and code to learn POIs
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 738
diff changeset
211 for i in xrange(model.n_components):
787
0a428b449b80 improved script to display over world image
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 786
diff changeset
212 mean = nPixelsPerUnit*model.means_[i]
0a428b449b80 improved script to display over world image
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 786
diff changeset
213 covariance = nPixelsPerUnit*model.covars_[i]
786
1f2b2d1f4fbf added script and code to learn POIs
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 738
diff changeset
214 if dataset is not None:
787
0a428b449b80 improved script to display over world image
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 786
diff changeset
215 plt.scatter(tmpDataset[labels == i, 0], tmpDataset[labels == i, 1], .8, color=colors[i])
786
1f2b2d1f4fbf added script and code to learn POIs
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 738
diff changeset
216 plt.annotate(str(i), xy=(mean[0]+1, mean[1]+1))
1f2b2d1f4fbf added script and code to learn POIs
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 738
diff changeset
217
1f2b2d1f4fbf added script and code to learn POIs
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 738
diff changeset
218 # Plot an ellipse to show the Gaussian component
787
0a428b449b80 improved script to display over world image
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 786
diff changeset
219 v, w = np.linalg.eigh(covariance)
786
1f2b2d1f4fbf added script and code to learn POIs
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 738
diff changeset
220 angle = np.arctan2(w[0][1], w[0][0])
1f2b2d1f4fbf added script and code to learn POIs
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 738
diff changeset
221 angle = 180*angle/np.pi # convert to degrees
1f2b2d1f4fbf added script and code to learn POIs
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 738
diff changeset
222 v *= 4
1f2b2d1f4fbf added script and code to learn POIs
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 738
diff changeset
223 ell = mpl.patches.Ellipse(mean, v[0], v[1], 180+angle, color=colors[i])
1f2b2d1f4fbf added script and code to learn POIs
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 738
diff changeset
224 ell.set_clip_box(fig.bbox)
787
0a428b449b80 improved script to display over world image
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 786
diff changeset
225 ell.set_alpha(alpha)
786
1f2b2d1f4fbf added script and code to learn POIs
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 738
diff changeset
226 fig.axes[0].add_artist(ell)