annotate python/ml.py @ 805:180b6b0231c0

added saving/loading points of interests
author Nicolas Saunier <nicolas.saunier@polymtl.ca>
date Thu, 09 Jun 2016 15:36:21 -0400
parents 1158a6e2d28e
children 52aa03260f03
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
183
ed944ff45e8c first simple clustering algorithm implementation
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
diff changeset
1 #! /usr/bin/env python
ed944ff45e8c first simple clustering algorithm implementation
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
diff changeset
2 '''Libraries for machine learning algorithms'''
ed944ff45e8c first simple clustering algorithm implementation
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
diff changeset
3
786
1f2b2d1f4fbf added script and code to learn POIs
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 738
diff changeset
4 from os import path
1f2b2d1f4fbf added script and code to learn POIs
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 738
diff changeset
5 from random import shuffle
1f2b2d1f4fbf added script and code to learn POIs
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 738
diff changeset
6 from copy import copy, deepcopy
1f2b2d1f4fbf added script and code to learn POIs
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 738
diff changeset
7
308
8bafd054cda4 Added a function to compute LCSS distance between two indcators
Mohamed Gomaa
parents: 184
diff changeset
8 import numpy as np
786
1f2b2d1f4fbf added script and code to learn POIs
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 738
diff changeset
9 from matplotlib.pylab import text
1f2b2d1f4fbf added script and code to learn POIs
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 738
diff changeset
10 import matplotlib as mpl
1f2b2d1f4fbf added script and code to learn POIs
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 738
diff changeset
11 import matplotlib.pyplot as plt
1f2b2d1f4fbf added script and code to learn POIs
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 738
diff changeset
12 from scipy.cluster.vq import kmeans, whiten, vq
1f2b2d1f4fbf added script and code to learn POIs
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 738
diff changeset
13 from sklearn import mixture
788
5b970a5bc233 updated classifying code to OpenCV 3.x (bug in function to load classification models)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 787
diff changeset
14 import cv2
308
8bafd054cda4 Added a function to compute LCSS distance between two indcators
Mohamed Gomaa
parents: 184
diff changeset
15
786
1f2b2d1f4fbf added script and code to learn POIs
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 738
diff changeset
16 import utils
1f2b2d1f4fbf added script and code to learn POIs
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 738
diff changeset
17
1f2b2d1f4fbf added script and code to learn POIs
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 738
diff changeset
18 #####################
1f2b2d1f4fbf added script and code to learn POIs
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 738
diff changeset
19 # OpenCV ML models
1f2b2d1f4fbf added script and code to learn POIs
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 738
diff changeset
20 #####################
183
ed944ff45e8c first simple clustering algorithm implementation
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
diff changeset
21
788
5b970a5bc233 updated classifying code to OpenCV 3.x (bug in function to load classification models)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 787
diff changeset
22 class StatModel(object):
791
1158a6e2d28e temporary solution for classification, with corrected svm.cpp and ml.hpp for loading saved classifiers
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 788
diff changeset
23 '''Abstract class for loading/saving model
1158a6e2d28e temporary solution for classification, with corrected svm.cpp and ml.hpp for loading saved classifiers
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 788
diff changeset
24
1158a6e2d28e temporary solution for classification, with corrected svm.cpp and ml.hpp for loading saved classifiers
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 788
diff changeset
25 Issues with OpenCV, does not seem to work'''
680
da1352b89d02 classification is working
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
26 def load(self, filename):
da1352b89d02 classification is working
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
27 if path.exists(filename):
da1352b89d02 classification is working
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
28 self.model.load(filename)
da1352b89d02 classification is working
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
29 else:
da1352b89d02 classification is working
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
30 print('Provided filename {} does not exist: model not loaded!'.format(filename))
380
adfd4f70ee1d added SVM
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 312
diff changeset
31
680
da1352b89d02 classification is working
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
32 def save(self, filename):
da1352b89d02 classification is working
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
33 self.model.save(filename)
380
adfd4f70ee1d added SVM
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 312
diff changeset
34
788
5b970a5bc233 updated classifying code to OpenCV 3.x (bug in function to load classification models)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 787
diff changeset
35 class SVM(StatModel):
380
adfd4f70ee1d added SVM
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 312
diff changeset
36 '''wrapper for OpenCV SimpleVectorMachine algorithm'''
788
5b970a5bc233 updated classifying code to OpenCV 3.x (bug in function to load classification models)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 787
diff changeset
37 def __init__(self, svmType = cv2.ml.SVM_C_SVC, kernelType = cv2.ml.SVM_RBF, degree = 0, gamma = 1, coef0 = 0, Cvalue = 1, nu = 0, p = 0):
5b970a5bc233 updated classifying code to OpenCV 3.x (bug in function to load classification models)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 787
diff changeset
38 self.model = cv2.ml.SVM_create()
5b970a5bc233 updated classifying code to OpenCV 3.x (bug in function to load classification models)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 787
diff changeset
39 self.model.setType(svmType)
5b970a5bc233 updated classifying code to OpenCV 3.x (bug in function to load classification models)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 787
diff changeset
40 self.model.setKernel(kernelType)
5b970a5bc233 updated classifying code to OpenCV 3.x (bug in function to load classification models)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 787
diff changeset
41 self.model.setDegree(degree)
5b970a5bc233 updated classifying code to OpenCV 3.x (bug in function to load classification models)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 787
diff changeset
42 self.model.setGamma(gamma)
5b970a5bc233 updated classifying code to OpenCV 3.x (bug in function to load classification models)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 787
diff changeset
43 self.model.setCoef0(coef0)
5b970a5bc233 updated classifying code to OpenCV 3.x (bug in function to load classification models)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 787
diff changeset
44 self.model.setC(Cvalue)
5b970a5bc233 updated classifying code to OpenCV 3.x (bug in function to load classification models)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 787
diff changeset
45 self.model.setNu(nu)
5b970a5bc233 updated classifying code to OpenCV 3.x (bug in function to load classification models)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 787
diff changeset
46 self.model.setP(p)
380
adfd4f70ee1d added SVM
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 312
diff changeset
47
791
1158a6e2d28e temporary solution for classification, with corrected svm.cpp and ml.hpp for loading saved classifiers
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 788
diff changeset
48 def load(self, filename):
1158a6e2d28e temporary solution for classification, with corrected svm.cpp and ml.hpp for loading saved classifiers
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 788
diff changeset
49 if path.exists(filename):
1158a6e2d28e temporary solution for classification, with corrected svm.cpp and ml.hpp for loading saved classifiers
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 788
diff changeset
50 cv2.ml.SVM_load(filename)
1158a6e2d28e temporary solution for classification, with corrected svm.cpp and ml.hpp for loading saved classifiers
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 788
diff changeset
51 else:
1158a6e2d28e temporary solution for classification, with corrected svm.cpp and ml.hpp for loading saved classifiers
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 788
diff changeset
52 print('Provided filename {} does not exist: model not loaded!'.format(filename))
1158a6e2d28e temporary solution for classification, with corrected svm.cpp and ml.hpp for loading saved classifiers
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 788
diff changeset
53
788
5b970a5bc233 updated classifying code to OpenCV 3.x (bug in function to load classification models)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 787
diff changeset
54 def train(self, samples, layout, responses):
5b970a5bc233 updated classifying code to OpenCV 3.x (bug in function to load classification models)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 787
diff changeset
55 self.model.train(samples, layout, responses)
380
adfd4f70ee1d added SVM
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 312
diff changeset
56
680
da1352b89d02 classification is working
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
57 def predict(self, hog):
da1352b89d02 classification is working
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
58 return self.model.predict(hog)
380
adfd4f70ee1d added SVM
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 312
diff changeset
59
adfd4f70ee1d added SVM
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 312
diff changeset
60
786
1f2b2d1f4fbf added script and code to learn POIs
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 738
diff changeset
61 #####################
1f2b2d1f4fbf added script and code to learn POIs
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 738
diff changeset
62 # Clustering
1f2b2d1f4fbf added script and code to learn POIs
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 738
diff changeset
63 #####################
1f2b2d1f4fbf added script and code to learn POIs
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 738
diff changeset
64
665
15e244d2a1b5 corrected bug with circular import for VideoFilenameAddable, moved to base module
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 636
diff changeset
65 class Centroid(object):
184
d70e9b36889c initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 183
diff changeset
66 'Wrapper around instances to add a counter'
d70e9b36889c initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 183
diff changeset
67
d70e9b36889c initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 183
diff changeset
68 def __init__(self, instance, nInstances = 1):
d70e9b36889c initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 183
diff changeset
69 self.instance = instance
d70e9b36889c initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 183
diff changeset
70 self.nInstances = nInstances
d70e9b36889c initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 183
diff changeset
71
d70e9b36889c initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 183
diff changeset
72 # def similar(instance2):
d70e9b36889c initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 183
diff changeset
73 # return self.instance.similar(instance2)
d70e9b36889c initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 183
diff changeset
74
d70e9b36889c initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 183
diff changeset
75 def add(self, instance2):
d70e9b36889c initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 183
diff changeset
76 self.instance = self.instance.multiply(self.nInstances)+instance2
d70e9b36889c initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 183
diff changeset
77 self.nInstances += 1
d70e9b36889c initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 183
diff changeset
78 self.instance = self.instance.multiply(1/float(self.nInstances))
d70e9b36889c initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 183
diff changeset
79
d70e9b36889c initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 183
diff changeset
80 def average(c):
d70e9b36889c initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 183
diff changeset
81 inst = self.instance.multiply(self.nInstances)+c.instance.multiply(instance.nInstances)
d70e9b36889c initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 183
diff changeset
82 inst.multiply(1/(self.nInstances+instance.nInstances))
d70e9b36889c initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 183
diff changeset
83 return Centroid(inst, self.nInstances+instance.nInstances)
d70e9b36889c initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 183
diff changeset
84
515
727e3c529519 renamed all draw functions to plot for consistency
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 501
diff changeset
85 def plot(self, options = ''):
727e3c529519 renamed all draw functions to plot for consistency
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 501
diff changeset
86 self.instance.plot(options)
184
d70e9b36889c initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 183
diff changeset
87 text(self.instance.position.x+1, self.instance.position.y+1, str(self.nInstances))
d70e9b36889c initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 183
diff changeset
88
386
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 382
diff changeset
89 def kMedoids(similarityMatrix, initialCentroids = None, k = None):
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 382
diff changeset
90 '''Algorithm that clusters any dataset based on a similarity matrix
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 382
diff changeset
91 Either the initialCentroids or k are passed'''
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 382
diff changeset
92 pass
184
d70e9b36889c initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 183
diff changeset
93
526
21bdeb29f855 corrected bug in initialization of lists and loading trajectories from vissim files
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 515
diff changeset
94 def assignCluster(data, similarFunc, initialCentroids = None, shuffleData = True):
183
ed944ff45e8c first simple clustering algorithm implementation
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
diff changeset
95 '''k-means algorithm with similarity function
184
d70e9b36889c initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 183
diff changeset
96 Two instances should be in the same cluster if the sameCluster function returns true for two instances. It is supposed that the average centroid of a set of instances can be computed, using the function.
183
ed944ff45e8c first simple clustering algorithm implementation
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
diff changeset
97 The number of clusters will be determined accordingly
ed944ff45e8c first simple clustering algorithm implementation
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
diff changeset
98
ed944ff45e8c first simple clustering algorithm implementation
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
diff changeset
99 data: list of instances
184
d70e9b36889c initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 183
diff changeset
100 averageCentroid: '''
d70e9b36889c initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 183
diff changeset
101 localdata = copy(data) # shallow copy to avoid modifying data
382
ba813f148ade development for clustering
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 380
diff changeset
102 if shuffleData:
ba813f148ade development for clustering
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 380
diff changeset
103 shuffle(localdata)
636
3058e00887bc removed all issues because of tests with None, using is instead of == or !=
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 563
diff changeset
104 if initialCentroids is None:
526
21bdeb29f855 corrected bug in initialization of lists and loading trajectories from vissim files
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 515
diff changeset
105 centroids = [Centroid(localdata[0])]
21bdeb29f855 corrected bug in initialization of lists and loading trajectories from vissim files
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 515
diff changeset
106 else:
184
d70e9b36889c initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 183
diff changeset
107 centroids = deepcopy(initialCentroids)
d70e9b36889c initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 183
diff changeset
108 for instance in localdata[1:]:
183
ed944ff45e8c first simple clustering algorithm implementation
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
diff changeset
109 i = 0
382
ba813f148ade development for clustering
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 380
diff changeset
110 while i<len(centroids) and not similarFunc(centroids[i].instance, instance):
183
ed944ff45e8c first simple clustering algorithm implementation
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
diff changeset
111 i += 1
ed944ff45e8c first simple clustering algorithm implementation
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
diff changeset
112 if i == len(centroids):
184
d70e9b36889c initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 183
diff changeset
113 centroids.append(Centroid(instance))
183
ed944ff45e8c first simple clustering algorithm implementation
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
diff changeset
114 else:
184
d70e9b36889c initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 183
diff changeset
115 centroids[i].add(instance)
183
ed944ff45e8c first simple clustering algorithm implementation
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
diff changeset
116
ed944ff45e8c first simple clustering algorithm implementation
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
diff changeset
117 return centroids
308
8bafd054cda4 Added a function to compute LCSS distance between two indcators
Mohamed Gomaa
parents: 184
diff changeset
118
382
ba813f148ade development for clustering
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 380
diff changeset
119 # TODO recompute centroids for each cluster: instance that minimizes some measure to all other elements
ba813f148ade development for clustering
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 380
diff changeset
120
293
ee3302528cdc rearranged new code by Paul (works now)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 285
diff changeset
121 def spectralClustering(similarityMatrix, k, iter=20):
285
5957aa1d69e1 Integrating Mohamed's changes
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 184
diff changeset
122 '''Spectral Clustering algorithm'''
5957aa1d69e1 Integrating Mohamed's changes
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 184
diff changeset
123 n = len(similarityMatrix)
308
8bafd054cda4 Added a function to compute LCSS distance between two indcators
Mohamed Gomaa
parents: 184
diff changeset
124 # create Laplacian matrix
8bafd054cda4 Added a function to compute LCSS distance between two indcators
Mohamed Gomaa
parents: 184
diff changeset
125 rowsum = np.sum(similarityMatrix,axis=0)
8bafd054cda4 Added a function to compute LCSS distance between two indcators
Mohamed Gomaa
parents: 184
diff changeset
126 D = np.diag(1 / np.sqrt(rowsum))
8bafd054cda4 Added a function to compute LCSS distance between two indcators
Mohamed Gomaa
parents: 184
diff changeset
127 I = np.identity(n)
8bafd054cda4 Added a function to compute LCSS distance between two indcators
Mohamed Gomaa
parents: 184
diff changeset
128 L = I - np.dot(D,np.dot(similarityMatrix,D))
8bafd054cda4 Added a function to compute LCSS distance between two indcators
Mohamed Gomaa
parents: 184
diff changeset
129 # compute eigenvectors of L
8bafd054cda4 Added a function to compute LCSS distance between two indcators
Mohamed Gomaa
parents: 184
diff changeset
130 U,sigma,V = np.linalg.svd(L)
8bafd054cda4 Added a function to compute LCSS distance between two indcators
Mohamed Gomaa
parents: 184
diff changeset
131 # create feature vector from k first eigenvectors
8bafd054cda4 Added a function to compute LCSS distance between two indcators
Mohamed Gomaa
parents: 184
diff changeset
132 # by stacking eigenvectors as columns
8bafd054cda4 Added a function to compute LCSS distance between two indcators
Mohamed Gomaa
parents: 184
diff changeset
133 features = np.array(V[:k]).T
8bafd054cda4 Added a function to compute LCSS distance between two indcators
Mohamed Gomaa
parents: 184
diff changeset
134 # k-means
8bafd054cda4 Added a function to compute LCSS distance between two indcators
Mohamed Gomaa
parents: 184
diff changeset
135 features = whiten(features)
293
ee3302528cdc rearranged new code by Paul (works now)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 285
diff changeset
136 centroids,distortion = kmeans(features,k, iter)
308
8bafd054cda4 Added a function to compute LCSS distance between two indcators
Mohamed Gomaa
parents: 184
diff changeset
137 code,distance = vq(features,centroids) # code starting from 0 (represent first cluster) to k-1 (last cluster)
309
80cbafd69109 Added spectral clustering function
Mohamed Gomaa
parents: 308
diff changeset
138 return code,sigma
563
39de5c532559 place holder functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 526
diff changeset
139
735
0e875a7f5759 modified prototypeCluster algorithm to enforce similarity when re-assigning and to compute only the necessary similarities
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 734
diff changeset
140 def prototypeCluster(instances, similarities, minSimilarity, similarityFunc = None, minClusterSize = None, randomInitialization = False):
731
b02431a8234c made prototypecluster generic, in ml module, and added randominitialization
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 680
diff changeset
141 '''Finds exemplar (prototype) instance that represent each cluster
b02431a8234c made prototypecluster generic, in ml module, and added randominitialization
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 680
diff changeset
142 Returns the prototype indices (in the instances list) and the cluster label of each instance
b02431a8234c made prototypecluster generic, in ml module, and added randominitialization
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 680
diff changeset
143
b02431a8234c made prototypecluster generic, in ml module, and added randominitialization
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 680
diff changeset
144 the elements in the instances list must have a length (method __len__), or one can use the random initialization
735
0e875a7f5759 modified prototypeCluster algorithm to enforce similarity when re-assigning and to compute only the necessary similarities
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 734
diff changeset
145 the positions in the instances list corresponds to the similarities
0e875a7f5759 modified prototypeCluster algorithm to enforce similarity when re-assigning and to compute only the necessary similarities
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 734
diff changeset
146 if similarityFunc is provided, the similarities are calculated as needed (this is faster) if not in similarities (negative if not computed)
0e875a7f5759 modified prototypeCluster algorithm to enforce similarity when re-assigning and to compute only the necessary similarities
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 734
diff changeset
147 similarities must still be allocated with the right size
731
b02431a8234c made prototypecluster generic, in ml module, and added randominitialization
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 680
diff changeset
148
b02431a8234c made prototypecluster generic, in ml module, and added randominitialization
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 680
diff changeset
149 if an instance is different enough (<minSimilarity),
b02431a8234c made prototypecluster generic, in ml module, and added randominitialization
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 680
diff changeset
150 it will become a new prototype.
b02431a8234c made prototypecluster generic, in ml module, and added randominitialization
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 680
diff changeset
151 Non-prototype instances will be assigned to an existing prototype
b02431a8234c made prototypecluster generic, in ml module, and added randominitialization
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 680
diff changeset
152 if minClusterSize is not None, the clusters will be refined by removing iteratively the smallest clusters
b02431a8234c made prototypecluster generic, in ml module, and added randominitialization
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 680
diff changeset
153 and reassigning all elements in the cluster until no cluster is smaller than minClusterSize'''
b02431a8234c made prototypecluster generic, in ml module, and added randominitialization
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 680
diff changeset
154
b02431a8234c made prototypecluster generic, in ml module, and added randominitialization
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 680
diff changeset
155 # sort instances based on length
735
0e875a7f5759 modified prototypeCluster algorithm to enforce similarity when re-assigning and to compute only the necessary similarities
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 734
diff changeset
156 indices = range(len(instances))
731
b02431a8234c made prototypecluster generic, in ml module, and added randominitialization
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 680
diff changeset
157 if randomInitialization:
b02431a8234c made prototypecluster generic, in ml module, and added randominitialization
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 680
diff changeset
158 indices = np.random.permutation(indices)
b02431a8234c made prototypecluster generic, in ml module, and added randominitialization
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 680
diff changeset
159 else:
b02431a8234c made prototypecluster generic, in ml module, and added randominitialization
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 680
diff changeset
160 def compare(i, j):
b02431a8234c made prototypecluster generic, in ml module, and added randominitialization
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 680
diff changeset
161 if len(instances[i]) > len(instances[j]):
b02431a8234c made prototypecluster generic, in ml module, and added randominitialization
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 680
diff changeset
162 return -1
b02431a8234c made prototypecluster generic, in ml module, and added randominitialization
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 680
diff changeset
163 elif len(instances[i]) == len(instances[j]):
b02431a8234c made prototypecluster generic, in ml module, and added randominitialization
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 680
diff changeset
164 return 0
b02431a8234c made prototypecluster generic, in ml module, and added randominitialization
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 680
diff changeset
165 else:
b02431a8234c made prototypecluster generic, in ml module, and added randominitialization
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 680
diff changeset
166 return 1
b02431a8234c made prototypecluster generic, in ml module, and added randominitialization
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 680
diff changeset
167 indices.sort(compare)
b02431a8234c made prototypecluster generic, in ml module, and added randominitialization
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 680
diff changeset
168 # go through all instances
b02431a8234c made prototypecluster generic, in ml module, and added randominitialization
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 680
diff changeset
169 prototypeIndices = [indices[0]]
b02431a8234c made prototypecluster generic, in ml module, and added randominitialization
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 680
diff changeset
170 for i in indices[1:]:
735
0e875a7f5759 modified prototypeCluster algorithm to enforce similarity when re-assigning and to compute only the necessary similarities
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 734
diff changeset
171 if similarityFunc is not None:
0e875a7f5759 modified prototypeCluster algorithm to enforce similarity when re-assigning and to compute only the necessary similarities
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 734
diff changeset
172 for j in prototypeIndices:
0e875a7f5759 modified prototypeCluster algorithm to enforce similarity when re-assigning and to compute only the necessary similarities
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 734
diff changeset
173 if similarities[i][j] < 0:
0e875a7f5759 modified prototypeCluster algorithm to enforce similarity when re-assigning and to compute only the necessary similarities
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 734
diff changeset
174 similarities[i][j] = similarityFunc(instances[i], instances[j])
0e875a7f5759 modified prototypeCluster algorithm to enforce similarity when re-assigning and to compute only the necessary similarities
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 734
diff changeset
175 similarities[j][i] = similarities[i][j]
0e875a7f5759 modified prototypeCluster algorithm to enforce similarity when re-assigning and to compute only the necessary similarities
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 734
diff changeset
176 if similarities[i][prototypeIndices].max() < minSimilarity:
731
b02431a8234c made prototypecluster generic, in ml module, and added randominitialization
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 680
diff changeset
177 prototypeIndices.append(i)
b02431a8234c made prototypecluster generic, in ml module, and added randominitialization
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 680
diff changeset
178
b02431a8234c made prototypecluster generic, in ml module, and added randominitialization
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 680
diff changeset
179 # assignment
735
0e875a7f5759 modified prototypeCluster algorithm to enforce similarity when re-assigning and to compute only the necessary similarities
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 734
diff changeset
180 indices = [i for i in range(similarities.shape[0]) if i not in prototypeIndices]
731
b02431a8234c made prototypecluster generic, in ml module, and added randominitialization
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 680
diff changeset
181 assign = True
b02431a8234c made prototypecluster generic, in ml module, and added randominitialization
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 680
diff changeset
182 while assign:
735
0e875a7f5759 modified prototypeCluster algorithm to enforce similarity when re-assigning and to compute only the necessary similarities
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 734
diff changeset
183 labels = [-1]*similarities.shape[0]
731
b02431a8234c made prototypecluster generic, in ml module, and added randominitialization
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 680
diff changeset
184 for i in prototypeIndices:
b02431a8234c made prototypecluster generic, in ml module, and added randominitialization
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 680
diff changeset
185 labels[i] = i
b02431a8234c made prototypecluster generic, in ml module, and added randominitialization
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 680
diff changeset
186 for i in indices:
735
0e875a7f5759 modified prototypeCluster algorithm to enforce similarity when re-assigning and to compute only the necessary similarities
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 734
diff changeset
187 if similarityFunc is not None:
0e875a7f5759 modified prototypeCluster algorithm to enforce similarity when re-assigning and to compute only the necessary similarities
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 734
diff changeset
188 for j in prototypeIndices:
0e875a7f5759 modified prototypeCluster algorithm to enforce similarity when re-assigning and to compute only the necessary similarities
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 734
diff changeset
189 if similarities[i][j] < 0:
0e875a7f5759 modified prototypeCluster algorithm to enforce similarity when re-assigning and to compute only the necessary similarities
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 734
diff changeset
190 similarities[i][j] = similarityFunc(instances[i], instances[j])
0e875a7f5759 modified prototypeCluster algorithm to enforce similarity when re-assigning and to compute only the necessary similarities
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 734
diff changeset
191 similarities[j][i] = similarities[i][j]
0e875a7f5759 modified prototypeCluster algorithm to enforce similarity when re-assigning and to compute only the necessary similarities
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 734
diff changeset
192 prototypeIdx = similarities[i][prototypeIndices].argmax()
0e875a7f5759 modified prototypeCluster algorithm to enforce similarity when re-assigning and to compute only the necessary similarities
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 734
diff changeset
193 if similarities[i][prototypeIndices[prototypeIdx]] >= minSimilarity:
0e875a7f5759 modified prototypeCluster algorithm to enforce similarity when re-assigning and to compute only the necessary similarities
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 734
diff changeset
194 labels[i] = prototypeIndices[prototypeIdx]
0e875a7f5759 modified prototypeCluster algorithm to enforce similarity when re-assigning and to compute only the necessary similarities
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 734
diff changeset
195 else:
0e875a7f5759 modified prototypeCluster algorithm to enforce similarity when re-assigning and to compute only the necessary similarities
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 734
diff changeset
196 labels[i] = -1 # outlier
731
b02431a8234c made prototypecluster generic, in ml module, and added randominitialization
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 680
diff changeset
197 clusterSizes = {i: sum(np.array(labels) == i) for i in prototypeIndices}
735
0e875a7f5759 modified prototypeCluster algorithm to enforce similarity when re-assigning and to compute only the necessary similarities
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 734
diff changeset
198 smallestClusterIndex = min(clusterSizes, key = clusterSizes.get)
731
b02431a8234c made prototypecluster generic, in ml module, and added randominitialization
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 680
diff changeset
199 assign = (clusterSizes[smallestClusterIndex] < minClusterSize)
b02431a8234c made prototypecluster generic, in ml module, and added randominitialization
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 680
diff changeset
200 if assign:
b02431a8234c made prototypecluster generic, in ml module, and added randominitialization
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 680
diff changeset
201 prototypeIndices.remove(smallestClusterIndex)
b02431a8234c made prototypecluster generic, in ml module, and added randominitialization
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 680
diff changeset
202 indices.append(smallestClusterIndex)
b02431a8234c made prototypecluster generic, in ml module, and added randominitialization
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 680
diff changeset
203
b02431a8234c made prototypecluster generic, in ml module, and added randominitialization
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 680
diff changeset
204 return prototypeIndices, labels
738
2472b4d59aea small function
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 735
diff changeset
205
2472b4d59aea small function
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 735
diff changeset
206 def computeClusterSizes(labels, prototypeIndices, outlierIndex = -1):
2472b4d59aea small function
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 735
diff changeset
207 clusterSizes = {i: sum(np.array(labels) == i) for i in prototypeIndices}
786
1f2b2d1f4fbf added script and code to learn POIs
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 738
diff changeset
208 clusterSizes['outlier'] = sum(np.array(labels) == outlierIndex)
738
2472b4d59aea small function
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 735
diff changeset
209 return clusterSizes
786
1f2b2d1f4fbf added script and code to learn POIs
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 738
diff changeset
210
1f2b2d1f4fbf added script and code to learn POIs
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 738
diff changeset
211 # Gaussian Mixture Models
805
180b6b0231c0 added saving/loading points of interests
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 791
diff changeset
212 def plotGMMClusters(model, dataset = None, fig = None, colors = utils.colors, nUnitsPerPixel = 1., alpha = 0.3):
786
1f2b2d1f4fbf added script and code to learn POIs
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 738
diff changeset
213 '''plot the ellipse corresponding to the Gaussians
1f2b2d1f4fbf added script and code to learn POIs
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 738
diff changeset
214 and the predicted classes of the instances in the dataset'''
787
0a428b449b80 improved script to display over world image
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 786
diff changeset
215 if fig is None:
0a428b449b80 improved script to display over world image
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 786
diff changeset
216 fig = plt.figure()
786
1f2b2d1f4fbf added script and code to learn POIs
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 738
diff changeset
217 labels = model.predict(dataset)
805
180b6b0231c0 added saving/loading points of interests
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 791
diff changeset
218 tmpDataset = dataset/nUnitsPerPixel
786
1f2b2d1f4fbf added script and code to learn POIs
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 738
diff changeset
219 for i in xrange(model.n_components):
805
180b6b0231c0 added saving/loading points of interests
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 791
diff changeset
220 mean = model.means_[i]/nUnitsPerPixel
180b6b0231c0 added saving/loading points of interests
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 791
diff changeset
221 covariance = model.covars_[i]/nUnitsPerPixel
786
1f2b2d1f4fbf added script and code to learn POIs
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 738
diff changeset
222 if dataset is not None:
787
0a428b449b80 improved script to display over world image
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 786
diff changeset
223 plt.scatter(tmpDataset[labels == i, 0], tmpDataset[labels == i, 1], .8, color=colors[i])
786
1f2b2d1f4fbf added script and code to learn POIs
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 738
diff changeset
224 plt.annotate(str(i), xy=(mean[0]+1, mean[1]+1))
1f2b2d1f4fbf added script and code to learn POIs
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 738
diff changeset
225
1f2b2d1f4fbf added script and code to learn POIs
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 738
diff changeset
226 # Plot an ellipse to show the Gaussian component
787
0a428b449b80 improved script to display over world image
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 786
diff changeset
227 v, w = np.linalg.eigh(covariance)
786
1f2b2d1f4fbf added script and code to learn POIs
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 738
diff changeset
228 angle = np.arctan2(w[0][1], w[0][0])
1f2b2d1f4fbf added script and code to learn POIs
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 738
diff changeset
229 angle = 180*angle/np.pi # convert to degrees
1f2b2d1f4fbf added script and code to learn POIs
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 738
diff changeset
230 v *= 4
1f2b2d1f4fbf added script and code to learn POIs
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 738
diff changeset
231 ell = mpl.patches.Ellipse(mean, v[0], v[1], 180+angle, color=colors[i])
1f2b2d1f4fbf added script and code to learn POIs
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 738
diff changeset
232 ell.set_clip_box(fig.bbox)
787
0a428b449b80 improved script to display over world image
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 786
diff changeset
233 ell.set_alpha(alpha)
786
1f2b2d1f4fbf added script and code to learn POIs
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 738
diff changeset
234 fig.axes[0].add_artist(ell)
805
180b6b0231c0 added saving/loading points of interests
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 791
diff changeset
235 return labels