annotate python/ml.py @ 917:89cc05867c4c

reorg and work in progress
author Nicolas Saunier <nicolas.saunier@polymtl.ca>
date Tue, 04 Jul 2017 18:00:01 -0400
parents 7345f0d51faa
children d6c1c05d11f5
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
183
ed944ff45e8c first simple clustering algorithm implementation
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
diff changeset
1 #! /usr/bin/env python
ed944ff45e8c first simple clustering algorithm implementation
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
diff changeset
2 '''Libraries for machine learning algorithms'''
ed944ff45e8c first simple clustering algorithm implementation
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
diff changeset
3
786
1f2b2d1f4fbf added script and code to learn POIs
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 738
diff changeset
4 from os import path
1f2b2d1f4fbf added script and code to learn POIs
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 738
diff changeset
5 from random import shuffle
1f2b2d1f4fbf added script and code to learn POIs
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 738
diff changeset
6 from copy import copy, deepcopy
1f2b2d1f4fbf added script and code to learn POIs
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 738
diff changeset
7
308
8bafd054cda4 Added a function to compute LCSS distance between two indcators
Mohamed Gomaa
parents: 184
diff changeset
8 import numpy as np
786
1f2b2d1f4fbf added script and code to learn POIs
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 738
diff changeset
9 from matplotlib.pylab import text
1f2b2d1f4fbf added script and code to learn POIs
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 738
diff changeset
10 import matplotlib as mpl
1f2b2d1f4fbf added script and code to learn POIs
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 738
diff changeset
11 import matplotlib.pyplot as plt
1f2b2d1f4fbf added script and code to learn POIs
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 738
diff changeset
12 from scipy.cluster.vq import kmeans, whiten, vq
1f2b2d1f4fbf added script and code to learn POIs
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 738
diff changeset
13 from sklearn import mixture
788
5b970a5bc233 updated classifying code to OpenCV 3.x (bug in function to load classification models)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 787
diff changeset
14 import cv2
308
8bafd054cda4 Added a function to compute LCSS distance between two indcators
Mohamed Gomaa
parents: 184
diff changeset
15
786
1f2b2d1f4fbf added script and code to learn POIs
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 738
diff changeset
16 import utils
1f2b2d1f4fbf added script and code to learn POIs
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 738
diff changeset
17
1f2b2d1f4fbf added script and code to learn POIs
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 738
diff changeset
18 #####################
1f2b2d1f4fbf added script and code to learn POIs
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 738
diff changeset
19 # OpenCV ML models
1f2b2d1f4fbf added script and code to learn POIs
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 738
diff changeset
20 #####################
183
ed944ff45e8c first simple clustering algorithm implementation
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
diff changeset
21
788
5b970a5bc233 updated classifying code to OpenCV 3.x (bug in function to load classification models)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 787
diff changeset
22 class StatModel(object):
807
52aa03260f03 reversed all code to OpenCV 2.4.13
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 805
diff changeset
23 '''Abstract class for loading/saving model'''
680
da1352b89d02 classification is working
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
24 def load(self, filename):
da1352b89d02 classification is working
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
25 if path.exists(filename):
da1352b89d02 classification is working
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
26 self.model.load(filename)
da1352b89d02 classification is working
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
27 else:
da1352b89d02 classification is working
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
28 print('Provided filename {} does not exist: model not loaded!'.format(filename))
380
adfd4f70ee1d added SVM
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 312
diff changeset
29
680
da1352b89d02 classification is working
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
30 def save(self, filename):
da1352b89d02 classification is working
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
31 self.model.save(filename)
380
adfd4f70ee1d added SVM
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 312
diff changeset
32
788
5b970a5bc233 updated classifying code to OpenCV 3.x (bug in function to load classification models)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 787
diff changeset
33 class SVM(StatModel):
380
adfd4f70ee1d added SVM
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 312
diff changeset
34 '''wrapper for OpenCV SimpleVectorMachine algorithm'''
807
52aa03260f03 reversed all code to OpenCV 2.4.13
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 805
diff changeset
35 def __init__(self, svmType = cv2.SVM_C_SVC, kernelType = cv2.SVM_RBF, degree = 0, gamma = 1, coef0 = 0, Cvalue = 1, nu = 0, p = 0):
52aa03260f03 reversed all code to OpenCV 2.4.13
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 805
diff changeset
36 self.model = cv2.SVM()
52aa03260f03 reversed all code to OpenCV 2.4.13
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 805
diff changeset
37 self.params = dict(svm_type = svmType, kernel_type = kernelType, degree = degree, gamma = gamma, coef0 = coef0, Cvalue = Cvalue, nu = nu, p = p)
52aa03260f03 reversed all code to OpenCV 2.4.13
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 805
diff changeset
38 # OpenCV3
52aa03260f03 reversed all code to OpenCV 2.4.13
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 805
diff changeset
39 # self.model = cv2.SVM()
52aa03260f03 reversed all code to OpenCV 2.4.13
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 805
diff changeset
40 # self.model.setType(svmType)
52aa03260f03 reversed all code to OpenCV 2.4.13
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 805
diff changeset
41 # self.model.setKernel(kernelType)
52aa03260f03 reversed all code to OpenCV 2.4.13
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 805
diff changeset
42 # self.model.setDegree(degree)
52aa03260f03 reversed all code to OpenCV 2.4.13
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 805
diff changeset
43 # self.model.setGamma(gamma)
52aa03260f03 reversed all code to OpenCV 2.4.13
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 805
diff changeset
44 # self.model.setCoef0(coef0)
52aa03260f03 reversed all code to OpenCV 2.4.13
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 805
diff changeset
45 # self.model.setC(Cvalue)
52aa03260f03 reversed all code to OpenCV 2.4.13
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 805
diff changeset
46 # self.model.setNu(nu)
52aa03260f03 reversed all code to OpenCV 2.4.13
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 805
diff changeset
47 # self.model.setP(p)
380
adfd4f70ee1d added SVM
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 312
diff changeset
48
807
52aa03260f03 reversed all code to OpenCV 2.4.13
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 805
diff changeset
49 def train(self, samples, responses):
52aa03260f03 reversed all code to OpenCV 2.4.13
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 805
diff changeset
50 self.model.train(samples, responses, params = self.params)
380
adfd4f70ee1d added SVM
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 312
diff changeset
51
680
da1352b89d02 classification is working
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
52 def predict(self, hog):
da1352b89d02 classification is working
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
53 return self.model.predict(hog)
380
adfd4f70ee1d added SVM
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 312
diff changeset
54
adfd4f70ee1d added SVM
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 312
diff changeset
55
786
1f2b2d1f4fbf added script and code to learn POIs
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 738
diff changeset
56 #####################
1f2b2d1f4fbf added script and code to learn POIs
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 738
diff changeset
57 # Clustering
1f2b2d1f4fbf added script and code to learn POIs
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 738
diff changeset
58 #####################
1f2b2d1f4fbf added script and code to learn POIs
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 738
diff changeset
59
665
15e244d2a1b5 corrected bug with circular import for VideoFilenameAddable, moved to base module
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 636
diff changeset
60 class Centroid(object):
184
d70e9b36889c initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 183
diff changeset
61 'Wrapper around instances to add a counter'
d70e9b36889c initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 183
diff changeset
62
d70e9b36889c initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 183
diff changeset
63 def __init__(self, instance, nInstances = 1):
d70e9b36889c initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 183
diff changeset
64 self.instance = instance
d70e9b36889c initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 183
diff changeset
65 self.nInstances = nInstances
d70e9b36889c initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 183
diff changeset
66
d70e9b36889c initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 183
diff changeset
67 # def similar(instance2):
d70e9b36889c initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 183
diff changeset
68 # return self.instance.similar(instance2)
d70e9b36889c initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 183
diff changeset
69
d70e9b36889c initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 183
diff changeset
70 def add(self, instance2):
d70e9b36889c initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 183
diff changeset
71 self.instance = self.instance.multiply(self.nInstances)+instance2
d70e9b36889c initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 183
diff changeset
72 self.nInstances += 1
d70e9b36889c initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 183
diff changeset
73 self.instance = self.instance.multiply(1/float(self.nInstances))
d70e9b36889c initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 183
diff changeset
74
d70e9b36889c initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 183
diff changeset
75 def average(c):
d70e9b36889c initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 183
diff changeset
76 inst = self.instance.multiply(self.nInstances)+c.instance.multiply(instance.nInstances)
d70e9b36889c initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 183
diff changeset
77 inst.multiply(1/(self.nInstances+instance.nInstances))
d70e9b36889c initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 183
diff changeset
78 return Centroid(inst, self.nInstances+instance.nInstances)
d70e9b36889c initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 183
diff changeset
79
515
727e3c529519 renamed all draw functions to plot for consistency
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 501
diff changeset
80 def plot(self, options = ''):
727e3c529519 renamed all draw functions to plot for consistency
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 501
diff changeset
81 self.instance.plot(options)
184
d70e9b36889c initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 183
diff changeset
82 text(self.instance.position.x+1, self.instance.position.y+1, str(self.nInstances))
d70e9b36889c initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 183
diff changeset
83
386
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 382
diff changeset
84 def kMedoids(similarityMatrix, initialCentroids = None, k = None):
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 382
diff changeset
85 '''Algorithm that clusters any dataset based on a similarity matrix
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 382
diff changeset
86 Either the initialCentroids or k are passed'''
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 382
diff changeset
87 pass
184
d70e9b36889c initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 183
diff changeset
88
526
21bdeb29f855 corrected bug in initialization of lists and loading trajectories from vissim files
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 515
diff changeset
89 def assignCluster(data, similarFunc, initialCentroids = None, shuffleData = True):
183
ed944ff45e8c first simple clustering algorithm implementation
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
diff changeset
90 '''k-means algorithm with similarity function
184
d70e9b36889c initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 183
diff changeset
91 Two instances should be in the same cluster if the sameCluster function returns true for two instances. It is supposed that the average centroid of a set of instances can be computed, using the function.
183
ed944ff45e8c first simple clustering algorithm implementation
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
diff changeset
92 The number of clusters will be determined accordingly
ed944ff45e8c first simple clustering algorithm implementation
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
diff changeset
93
ed944ff45e8c first simple clustering algorithm implementation
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
diff changeset
94 data: list of instances
184
d70e9b36889c initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 183
diff changeset
95 averageCentroid: '''
d70e9b36889c initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 183
diff changeset
96 localdata = copy(data) # shallow copy to avoid modifying data
382
ba813f148ade development for clustering
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 380
diff changeset
97 if shuffleData:
ba813f148ade development for clustering
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 380
diff changeset
98 shuffle(localdata)
636
3058e00887bc removed all issues because of tests with None, using is instead of == or !=
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 563
diff changeset
99 if initialCentroids is None:
526
21bdeb29f855 corrected bug in initialization of lists and loading trajectories from vissim files
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 515
diff changeset
100 centroids = [Centroid(localdata[0])]
21bdeb29f855 corrected bug in initialization of lists and loading trajectories from vissim files
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 515
diff changeset
101 else:
184
d70e9b36889c initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 183
diff changeset
102 centroids = deepcopy(initialCentroids)
d70e9b36889c initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 183
diff changeset
103 for instance in localdata[1:]:
183
ed944ff45e8c first simple clustering algorithm implementation
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
diff changeset
104 i = 0
382
ba813f148ade development for clustering
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 380
diff changeset
105 while i<len(centroids) and not similarFunc(centroids[i].instance, instance):
183
ed944ff45e8c first simple clustering algorithm implementation
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
diff changeset
106 i += 1
ed944ff45e8c first simple clustering algorithm implementation
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
diff changeset
107 if i == len(centroids):
184
d70e9b36889c initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 183
diff changeset
108 centroids.append(Centroid(instance))
183
ed944ff45e8c first simple clustering algorithm implementation
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
diff changeset
109 else:
184
d70e9b36889c initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 183
diff changeset
110 centroids[i].add(instance)
183
ed944ff45e8c first simple clustering algorithm implementation
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
diff changeset
111
ed944ff45e8c first simple clustering algorithm implementation
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
diff changeset
112 return centroids
308
8bafd054cda4 Added a function to compute LCSS distance between two indcators
Mohamed Gomaa
parents: 184
diff changeset
113
382
ba813f148ade development for clustering
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 380
diff changeset
114 # TODO recompute centroids for each cluster: instance that minimizes some measure to all other elements
ba813f148ade development for clustering
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 380
diff changeset
115
293
ee3302528cdc rearranged new code by Paul (works now)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 285
diff changeset
116 def spectralClustering(similarityMatrix, k, iter=20):
907
9fd7b18f75b4 re arranged motion pattern learning
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 878
diff changeset
117 '''Spectral Clustering algorithm'''
9fd7b18f75b4 re arranged motion pattern learning
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 878
diff changeset
118 n = len(similarityMatrix)
9fd7b18f75b4 re arranged motion pattern learning
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 878
diff changeset
119 # create Laplacian matrix
9fd7b18f75b4 re arranged motion pattern learning
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 878
diff changeset
120 rowsum = np.sum(similarityMatrix,axis=0)
9fd7b18f75b4 re arranged motion pattern learning
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 878
diff changeset
121 D = np.diag(1 / np.sqrt(rowsum))
9fd7b18f75b4 re arranged motion pattern learning
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 878
diff changeset
122 I = np.identity(n)
9fd7b18f75b4 re arranged motion pattern learning
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 878
diff changeset
123 L = I - np.dot(D,np.dot(similarityMatrix,D))
9fd7b18f75b4 re arranged motion pattern learning
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 878
diff changeset
124 # compute eigenvectors of L
9fd7b18f75b4 re arranged motion pattern learning
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 878
diff changeset
125 U,sigma,V = np.linalg.svd(L)
9fd7b18f75b4 re arranged motion pattern learning
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 878
diff changeset
126 # create feature vector from k first eigenvectors
9fd7b18f75b4 re arranged motion pattern learning
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 878
diff changeset
127 # by stacking eigenvectors as columns
9fd7b18f75b4 re arranged motion pattern learning
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 878
diff changeset
128 features = np.array(V[:k]).T
9fd7b18f75b4 re arranged motion pattern learning
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 878
diff changeset
129 # k-means
9fd7b18f75b4 re arranged motion pattern learning
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 878
diff changeset
130 features = whiten(features)
9fd7b18f75b4 re arranged motion pattern learning
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 878
diff changeset
131 centroids,distortion = kmeans(features,k, iter)
9fd7b18f75b4 re arranged motion pattern learning
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 878
diff changeset
132 code,distance = vq(features,centroids) # code starting from 0 (represent first cluster) to k-1 (last cluster)
9fd7b18f75b4 re arranged motion pattern learning
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 878
diff changeset
133 return code,sigma
563
39de5c532559 place holder functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 526
diff changeset
134
908
b297525b2cbf added options to the prototype cluster algorithm, work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 907
diff changeset
135 class Cluster:
b297525b2cbf added options to the prototype cluster algorithm, work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 907
diff changeset
136 'Represents a cluster, with a prototype id and the list of instances in cluster'
b297525b2cbf added options to the prototype cluster algorithm, work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 907
diff changeset
137 def __init__(prototypeId, memberIndices = []):
b297525b2cbf added options to the prototype cluster algorithm, work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 907
diff changeset
138 self.prototypeId = prototypeId
b297525b2cbf added options to the prototype cluster algorithm, work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 907
diff changeset
139 self.memberIndices = memberIndices
b297525b2cbf added options to the prototype cluster algorithm, work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 907
diff changeset
140
907
9fd7b18f75b4 re arranged motion pattern learning
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 878
diff changeset
141 def assignToPrototypeClusters(instances, prototypeIndices, similarities, minSimilarity, similarityFunc = None, minClusterSize = None):
9fd7b18f75b4 re arranged motion pattern learning
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 878
diff changeset
142 '''Assigns instances to prototypes
9fd7b18f75b4 re arranged motion pattern learning
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 878
diff changeset
143 if minClusterSize is not None, the clusters will be refined by removing iteratively the smallest clusters
9fd7b18f75b4 re arranged motion pattern learning
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 878
diff changeset
144 and reassigning all elements in the cluster until no cluster is smaller than minClusterSize'''
9fd7b18f75b4 re arranged motion pattern learning
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 878
diff changeset
145 indices = [i for i in range(len(instances)) if i not in prototypeIndices]
9fd7b18f75b4 re arranged motion pattern learning
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 878
diff changeset
146 labels = [-1]*len(instances)
9fd7b18f75b4 re arranged motion pattern learning
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 878
diff changeset
147 assign = True
9fd7b18f75b4 re arranged motion pattern learning
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 878
diff changeset
148 while assign:
9fd7b18f75b4 re arranged motion pattern learning
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 878
diff changeset
149 for i in prototypeIndices:
9fd7b18f75b4 re arranged motion pattern learning
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 878
diff changeset
150 labels[i] = i
9fd7b18f75b4 re arranged motion pattern learning
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 878
diff changeset
151 for i in indices:
9fd7b18f75b4 re arranged motion pattern learning
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 878
diff changeset
152 if similarityFunc is not None:
9fd7b18f75b4 re arranged motion pattern learning
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 878
diff changeset
153 for j in prototypeIndices:
9fd7b18f75b4 re arranged motion pattern learning
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 878
diff changeset
154 if similarities[i][j] < 0:
9fd7b18f75b4 re arranged motion pattern learning
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 878
diff changeset
155 similarities[i][j] = similarityFunc(instances[i], instances[j])
9fd7b18f75b4 re arranged motion pattern learning
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 878
diff changeset
156 similarities[j][i] = similarities[i][j]
9fd7b18f75b4 re arranged motion pattern learning
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 878
diff changeset
157 prototypeIdx = similarities[i][prototypeIndices].argmax()
9fd7b18f75b4 re arranged motion pattern learning
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 878
diff changeset
158 if similarities[i][prototypeIndices[prototypeIdx]] >= minSimilarity:
9fd7b18f75b4 re arranged motion pattern learning
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 878
diff changeset
159 labels[i] = prototypeIndices[prototypeIdx]
9fd7b18f75b4 re arranged motion pattern learning
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 878
diff changeset
160 else:
9fd7b18f75b4 re arranged motion pattern learning
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 878
diff changeset
161 labels[i] = -1 # outlier
9fd7b18f75b4 re arranged motion pattern learning
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 878
diff changeset
162 clusterSizes = {i: sum(np.array(labels) == i) for i in prototypeIndices}
9fd7b18f75b4 re arranged motion pattern learning
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 878
diff changeset
163 smallestClusterIndex = min(clusterSizes, key = clusterSizes.get)
9fd7b18f75b4 re arranged motion pattern learning
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 878
diff changeset
164 assign = (clusterSizes[smallestClusterIndex] < minClusterSize)
9fd7b18f75b4 re arranged motion pattern learning
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 878
diff changeset
165 if assign:
9fd7b18f75b4 re arranged motion pattern learning
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 878
diff changeset
166 prototypeIndices.remove(smallestClusterIndex)
9fd7b18f75b4 re arranged motion pattern learning
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 878
diff changeset
167 indices = [i for i in range(similarities.shape[0]) if labels[i] == smallestClusterIndex]
9fd7b18f75b4 re arranged motion pattern learning
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 878
diff changeset
168 return prototypeIndices, labels
9fd7b18f75b4 re arranged motion pattern learning
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 878
diff changeset
169
908
b297525b2cbf added options to the prototype cluster algorithm, work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 907
diff changeset
170 def prototypeCluster(instances, similarities, minSimilarity, similarityFunc = None, minClusterSize = 0, optimizeCentroid = True, randomInitialization = False, assign = True, initialPrototypeIndices = None):
731
b02431a8234c made prototypecluster generic, in ml module, and added randominitialization
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 680
diff changeset
171 '''Finds exemplar (prototype) instance that represent each cluster
b02431a8234c made prototypecluster generic, in ml module, and added randominitialization
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 680
diff changeset
172 Returns the prototype indices (in the instances list) and the cluster label of each instance
b02431a8234c made prototypecluster generic, in ml module, and added randominitialization
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 680
diff changeset
173
b02431a8234c made prototypecluster generic, in ml module, and added randominitialization
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 680
diff changeset
174 the elements in the instances list must have a length (method __len__), or one can use the random initialization
735
0e875a7f5759 modified prototypeCluster algorithm to enforce similarity when re-assigning and to compute only the necessary similarities
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 734
diff changeset
175 the positions in the instances list corresponds to the similarities
0e875a7f5759 modified prototypeCluster algorithm to enforce similarity when re-assigning and to compute only the necessary similarities
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 734
diff changeset
176 if similarityFunc is provided, the similarities are calculated as needed (this is faster) if not in similarities (negative if not computed)
0e875a7f5759 modified prototypeCluster algorithm to enforce similarity when re-assigning and to compute only the necessary similarities
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 734
diff changeset
177 similarities must still be allocated with the right size
731
b02431a8234c made prototypecluster generic, in ml module, and added randominitialization
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 680
diff changeset
178
b02431a8234c made prototypecluster generic, in ml module, and added randominitialization
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 680
diff changeset
179 if an instance is different enough (<minSimilarity),
b02431a8234c made prototypecluster generic, in ml module, and added randominitialization
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 680
diff changeset
180 it will become a new prototype.
b02431a8234c made prototypecluster generic, in ml module, and added randominitialization
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 680
diff changeset
181 Non-prototype instances will be assigned to an existing prototype
843
5dc7a507353e updated to learn prototypes
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 807
diff changeset
182
908
b297525b2cbf added options to the prototype cluster algorithm, work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 907
diff changeset
183 if optimizeCentroid is True, each time an element is added, we recompute the centroid trajectory as the most similar to all in the cluster
731
b02431a8234c made prototypecluster generic, in ml module, and added randominitialization
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 680
diff changeset
184
908
b297525b2cbf added options to the prototype cluster algorithm, work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 907
diff changeset
185 TODO: check how similarity evolves in clusters'''
878
8e8ec4ece66e minor + bug corrected in motion pattern learning
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 843
diff changeset
186 if len(instances) == 0:
8e8ec4ece66e minor + bug corrected in motion pattern learning
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 843
diff changeset
187 print('no instances to cluster (empty list)')
8e8ec4ece66e minor + bug corrected in motion pattern learning
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 843
diff changeset
188 return None
908
b297525b2cbf added options to the prototype cluster algorithm, work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 907
diff changeset
189 if similarityFunc is None:
b297525b2cbf added options to the prototype cluster algorithm, work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 907
diff changeset
190 print('similarityFunc is None')
b297525b2cbf added options to the prototype cluster algorithm, work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 907
diff changeset
191 return None
878
8e8ec4ece66e minor + bug corrected in motion pattern learning
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 843
diff changeset
192
908
b297525b2cbf added options to the prototype cluster algorithm, work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 907
diff changeset
193 # sort instances based on length
735
0e875a7f5759 modified prototypeCluster algorithm to enforce similarity when re-assigning and to compute only the necessary similarities
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 734
diff changeset
194 indices = range(len(instances))
908
b297525b2cbf added options to the prototype cluster algorithm, work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 907
diff changeset
195 if randomInitialization or optimizeCentroid:
731
b02431a8234c made prototypecluster generic, in ml module, and added randominitialization
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 680
diff changeset
196 indices = np.random.permutation(indices)
b02431a8234c made prototypecluster generic, in ml module, and added randominitialization
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 680
diff changeset
197 else:
b02431a8234c made prototypecluster generic, in ml module, and added randominitialization
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 680
diff changeset
198 def compare(i, j):
b02431a8234c made prototypecluster generic, in ml module, and added randominitialization
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 680
diff changeset
199 if len(instances[i]) > len(instances[j]):
b02431a8234c made prototypecluster generic, in ml module, and added randominitialization
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 680
diff changeset
200 return -1
b02431a8234c made prototypecluster generic, in ml module, and added randominitialization
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 680
diff changeset
201 elif len(instances[i]) == len(instances[j]):
b02431a8234c made prototypecluster generic, in ml module, and added randominitialization
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 680
diff changeset
202 return 0
b02431a8234c made prototypecluster generic, in ml module, and added randominitialization
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 680
diff changeset
203 else:
b02431a8234c made prototypecluster generic, in ml module, and added randominitialization
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 680
diff changeset
204 return 1
b02431a8234c made prototypecluster generic, in ml module, and added randominitialization
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 680
diff changeset
205 indices.sort(compare)
b02431a8234c made prototypecluster generic, in ml module, and added randominitialization
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 680
diff changeset
206 # go through all instances
908
b297525b2cbf added options to the prototype cluster algorithm, work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 907
diff changeset
207 clusters = []
907
9fd7b18f75b4 re arranged motion pattern learning
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 878
diff changeset
208 if initialPrototypeIndices is None:
9fd7b18f75b4 re arranged motion pattern learning
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 878
diff changeset
209 prototypeIndices = [indices[0]]
9fd7b18f75b4 re arranged motion pattern learning
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 878
diff changeset
210 else:
908
b297525b2cbf added options to the prototype cluster algorithm, work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 907
diff changeset
211 prototypeIndices = initialPrototypeIndices # think of the format: if indices, have to be in instances
b297525b2cbf added options to the prototype cluster algorithm, work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 907
diff changeset
212 for i in prototypeIndices:
b297525b2cbf added options to the prototype cluster algorithm, work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 907
diff changeset
213 clusters.append([i])
731
b02431a8234c made prototypecluster generic, in ml module, and added randominitialization
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 680
diff changeset
214 for i in indices[1:]:
908
b297525b2cbf added options to the prototype cluster algorithm, work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 907
diff changeset
215 for j in prototypeIndices:
b297525b2cbf added options to the prototype cluster algorithm, work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 907
diff changeset
216 if similarities[i][j] < 0:
b297525b2cbf added options to the prototype cluster algorithm, work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 907
diff changeset
217 similarities[i][j] = similarityFunc(instances[i], instances[j])
b297525b2cbf added options to the prototype cluster algorithm, work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 907
diff changeset
218 similarities[j][i] = similarities[i][j]
b297525b2cbf added options to the prototype cluster algorithm, work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 907
diff changeset
219 label = similarities[i][prototypeIndices].argmax()
b297525b2cbf added options to the prototype cluster algorithm, work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 907
diff changeset
220 if similarities[i][prototypeIndices[label]] < minSimilarity:
843
5dc7a507353e updated to learn prototypes
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 807
diff changeset
221 prototypeIndices.append(i)
908
b297525b2cbf added options to the prototype cluster algorithm, work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 907
diff changeset
222 clusters.append([])
b297525b2cbf added options to the prototype cluster algorithm, work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 907
diff changeset
223 else:
b297525b2cbf added options to the prototype cluster algorithm, work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 907
diff changeset
224 clusters[label].append(i)
b297525b2cbf added options to the prototype cluster algorithm, work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 907
diff changeset
225 if optimizeCentroid:
b297525b2cbf added options to the prototype cluster algorithm, work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 907
diff changeset
226 if len(clusters[label]) >= 2: # no point if only one element in cluster
b297525b2cbf added options to the prototype cluster algorithm, work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 907
diff changeset
227 for j in clusters[label][:-1]:
b297525b2cbf added options to the prototype cluster algorithm, work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 907
diff changeset
228 if similarities[i][j] < 0:
b297525b2cbf added options to the prototype cluster algorithm, work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 907
diff changeset
229 similarities[i][j] = similarityFunc(instances[i], instances[j])
b297525b2cbf added options to the prototype cluster algorithm, work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 907
diff changeset
230 similarities[j][i] = similarities[i][j]
b297525b2cbf added options to the prototype cluster algorithm, work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 907
diff changeset
231 clusterIndices = clusters[label]
b297525b2cbf added options to the prototype cluster algorithm, work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 907
diff changeset
232 clusterSimilarities = similarities[clusterIndices][:,clusterIndices]
b297525b2cbf added options to the prototype cluster algorithm, work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 907
diff changeset
233 newCentroidIdx = clusterIndices[clusterSimilarities.sum(0).argmax()]
b297525b2cbf added options to the prototype cluster algorithm, work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 907
diff changeset
234 if prototypeIndices[label] != newCentroidIdx:
b297525b2cbf added options to the prototype cluster algorithm, work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 907
diff changeset
235 prototypeIndices[label] = newCentroidIdx
b297525b2cbf added options to the prototype cluster algorithm, work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 907
diff changeset
236 elif randomInitialization: # replace prototype by current instance i if longer
b297525b2cbf added options to the prototype cluster algorithm, work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 907
diff changeset
237 if len(instances[prototypeIndices[label]]) < len(instances[i]):
b297525b2cbf added options to the prototype cluster algorithm, work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 907
diff changeset
238 prototypeIndices[label] = i
b297525b2cbf added options to the prototype cluster algorithm, work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 907
diff changeset
239
907
9fd7b18f75b4 re arranged motion pattern learning
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 878
diff changeset
240 if assign:
9fd7b18f75b4 re arranged motion pattern learning
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 878
diff changeset
241 return assignToPrototypeClusters(instances, prototypeIndices, similarities, minSimilarity, similarityFunc, minClusterSize)
9fd7b18f75b4 re arranged motion pattern learning
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 878
diff changeset
242 else:
9fd7b18f75b4 re arranged motion pattern learning
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 878
diff changeset
243 return prototypeIndices, None
738
2472b4d59aea small function
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 735
diff changeset
244
2472b4d59aea small function
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 735
diff changeset
245 def computeClusterSizes(labels, prototypeIndices, outlierIndex = -1):
2472b4d59aea small function
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 735
diff changeset
246 clusterSizes = {i: sum(np.array(labels) == i) for i in prototypeIndices}
786
1f2b2d1f4fbf added script and code to learn POIs
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 738
diff changeset
247 clusterSizes['outlier'] = sum(np.array(labels) == outlierIndex)
738
2472b4d59aea small function
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 735
diff changeset
248 return clusterSizes
786
1f2b2d1f4fbf added script and code to learn POIs
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 738
diff changeset
249
908
b297525b2cbf added options to the prototype cluster algorithm, work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 907
diff changeset
250 def computeClusterStatistics(labels, prototypeIndices, instances, similarities, similarityFunc, clusters = None, outlierIndex = -1):
b297525b2cbf added options to the prototype cluster algorithm, work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 907
diff changeset
251 if clusters is None:
b297525b2cbf added options to the prototype cluster algorithm, work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 907
diff changeset
252 clusters = {protoId:[] for protoId in prototypeIndices+[-1]}
b297525b2cbf added options to the prototype cluster algorithm, work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 907
diff changeset
253 for i,l in enumerate(labels):
b297525b2cbf added options to the prototype cluster algorithm, work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 907
diff changeset
254 clusters[l].append(i)
b297525b2cbf added options to the prototype cluster algorithm, work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 907
diff changeset
255 clusters = [clusters[protoId] for protoId in prototypeIndices]
b297525b2cbf added options to the prototype cluster algorithm, work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 907
diff changeset
256 for i, cluster in enumerate(clusters):
b297525b2cbf added options to the prototype cluster algorithm, work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 907
diff changeset
257 n = len(cluster)
b297525b2cbf added options to the prototype cluster algorithm, work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 907
diff changeset
258 print('cluster {}: {} elements'.format(prototypeIndices[i], n))
b297525b2cbf added options to the prototype cluster algorithm, work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 907
diff changeset
259 if n >=2:
b297525b2cbf added options to the prototype cluster algorithm, work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 907
diff changeset
260 for j,k in enumerate(cluster):
b297525b2cbf added options to the prototype cluster algorithm, work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 907
diff changeset
261 for l in cluster[:j]:
b297525b2cbf added options to the prototype cluster algorithm, work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 907
diff changeset
262 if similarities[k][l] < 0:
b297525b2cbf added options to the prototype cluster algorithm, work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 907
diff changeset
263 similarities[k][l] = similarityFunc(instances[k], instances[l])
b297525b2cbf added options to the prototype cluster algorithm, work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 907
diff changeset
264 similarities[l][k] = similarities[k][l]
b297525b2cbf added options to the prototype cluster algorithm, work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 907
diff changeset
265 print('Mean similarity to prototype: {}'.format((similarities[prototypeIndices[i]][cluster].sum()+1)/(n-1)))
b297525b2cbf added options to the prototype cluster algorithm, work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 907
diff changeset
266 print('Mean overall similarity: {}'.format((similarities[cluster][:,cluster].sum()+n)/(n*(n-1))))
915
13434f5017dd work to save trajectory assignment to origin and destinations
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 914
diff changeset
267
786
1f2b2d1f4fbf added script and code to learn POIs
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 738
diff changeset
268 # Gaussian Mixture Models
917
89cc05867c4c reorg and work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 916
diff changeset
269 def plotGMM(mean, covariance, gmmId, fig, color, alpha = 0.3):
916
7345f0d51faa added display of paths
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 915
diff changeset
270 v, w = np.linalg.eigh(covariance)
7345f0d51faa added display of paths
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 915
diff changeset
271 angle = 180*np.arctan2(w[0][1], w[0][0])/np.pi
7345f0d51faa added display of paths
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 915
diff changeset
272 v *= 4
7345f0d51faa added display of paths
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 915
diff changeset
273 ell = mpl.patches.Ellipse(mean, v[0], v[1], 180+angle, color=color)
7345f0d51faa added display of paths
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 915
diff changeset
274 ell.set_clip_box(fig.bbox)
7345f0d51faa added display of paths
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 915
diff changeset
275 ell.set_alpha(alpha)
7345f0d51faa added display of paths
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 915
diff changeset
276 fig.axes[0].add_artist(ell)
7345f0d51faa added display of paths
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 915
diff changeset
277 plt.plot([mean[0]], [mean[1]], 'x'+color)
917
89cc05867c4c reorg and work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 916
diff changeset
278 plt.annotate(str(gmmId), xy=(mean[0]+1, mean[1]+1))
916
7345f0d51faa added display of paths
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 915
diff changeset
279
915
13434f5017dd work to save trajectory assignment to origin and destinations
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 914
diff changeset
280 def plotGMMClusters(model, labels = None, dataset = None, fig = None, colors = utils.colors, nUnitsPerPixel = 1., alpha = 0.3):
786
1f2b2d1f4fbf added script and code to learn POIs
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 738
diff changeset
281 '''plot the ellipse corresponding to the Gaussians
1f2b2d1f4fbf added script and code to learn POIs
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 738
diff changeset
282 and the predicted classes of the instances in the dataset'''
787
0a428b449b80 improved script to display over world image
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 786
diff changeset
283 if fig is None:
0a428b449b80 improved script to display over world image
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 786
diff changeset
284 fig = plt.figure()
916
7345f0d51faa added display of paths
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 915
diff changeset
285 if len(fig.get_axes()) == 0:
7345f0d51faa added display of paths
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 915
diff changeset
286 fig.add_subplot(111)
786
1f2b2d1f4fbf added script and code to learn POIs
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 738
diff changeset
287 for i in xrange(model.n_components):
805
180b6b0231c0 added saving/loading points of interests
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 791
diff changeset
288 mean = model.means_[i]/nUnitsPerPixel
914
f228fd649644 corrected bugs in learn-pois.py
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 913
diff changeset
289 covariance = model.covariances_[i]/nUnitsPerPixel
915
13434f5017dd work to save trajectory assignment to origin and destinations
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 914
diff changeset
290 # plot points
786
1f2b2d1f4fbf added script and code to learn POIs
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 738
diff changeset
291 if dataset is not None:
915
13434f5017dd work to save trajectory assignment to origin and destinations
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 914
diff changeset
292 tmpDataset = dataset/nUnitsPerPixel
787
0a428b449b80 improved script to display over world image
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 786
diff changeset
293 plt.scatter(tmpDataset[labels == i, 0], tmpDataset[labels == i, 1], .8, color=colors[i])
916
7345f0d51faa added display of paths
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 915
diff changeset
294 # plot an ellipse to show the Gaussian component
7345f0d51faa added display of paths
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 915
diff changeset
295 plotGMM(mean, covariance, i, fig, colors[i], alpha)
915
13434f5017dd work to save trajectory assignment to origin and destinations
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 914
diff changeset
296 if dataset is None: # to address issues without points, the axes limits are not redrawn
13434f5017dd work to save trajectory assignment to origin and destinations
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 914
diff changeset
297 minima = model.means_.min(0)
13434f5017dd work to save trajectory assignment to origin and destinations
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 914
diff changeset
298 maxima = model.means_.max(0)
13434f5017dd work to save trajectory assignment to origin and destinations
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 914
diff changeset
299 xwidth = 0.5*(maxima[0]-minima[0])
13434f5017dd work to save trajectory assignment to origin and destinations
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 914
diff changeset
300 ywidth = 0.5*(maxima[1]-minima[1])
13434f5017dd work to save trajectory assignment to origin and destinations
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 914
diff changeset
301 plt.xlim(minima[0]-xwidth,maxima[0]+xwidth)
13434f5017dd work to save trajectory assignment to origin and destinations
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 914
diff changeset
302 plt.ylim(minima[1]-ywidth,maxima[1]+ywidth)