annotate python/ml.py @ 978:184f1dd307f9

corrected print and exception statements for Python 3
author Nicolas Saunier <nicolas.saunier@polymtl.ca>
date Thu, 08 Feb 2018 05:53:50 -0500
parents ec1682ed999f
children 23f98ebb113f
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
183
ed944ff45e8c first simple clustering algorithm implementation
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
diff changeset
1 #! /usr/bin/env python
ed944ff45e8c first simple clustering algorithm implementation
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
diff changeset
2 '''Libraries for machine learning algorithms'''
ed944ff45e8c first simple clustering algorithm implementation
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
diff changeset
3
786
1f2b2d1f4fbf added script and code to learn POIs
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 738
diff changeset
4 from os import path
1f2b2d1f4fbf added script and code to learn POIs
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 738
diff changeset
5 from random import shuffle
1f2b2d1f4fbf added script and code to learn POIs
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 738
diff changeset
6 from copy import copy, deepcopy
1f2b2d1f4fbf added script and code to learn POIs
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 738
diff changeset
7
308
8bafd054cda4 Added a function to compute LCSS distance between two indcators
Mohamed Gomaa
parents: 184
diff changeset
8 import numpy as np
786
1f2b2d1f4fbf added script and code to learn POIs
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 738
diff changeset
9 from matplotlib.pylab import text
1f2b2d1f4fbf added script and code to learn POIs
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 738
diff changeset
10 import matplotlib as mpl
1f2b2d1f4fbf added script and code to learn POIs
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 738
diff changeset
11 import matplotlib.pyplot as plt
1f2b2d1f4fbf added script and code to learn POIs
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 738
diff changeset
12 from scipy.cluster.vq import kmeans, whiten, vq
1f2b2d1f4fbf added script and code to learn POIs
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 738
diff changeset
13 from sklearn import mixture
978
184f1dd307f9 corrected print and exception statements for Python 3
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 961
diff changeset
14 try:
184f1dd307f9 corrected print and exception statements for Python 3
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 961
diff changeset
15 import cv2
184f1dd307f9 corrected print and exception statements for Python 3
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 961
diff changeset
16 opencvAvailable = True
184f1dd307f9 corrected print and exception statements for Python 3
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 961
diff changeset
17 except ImportError:
184f1dd307f9 corrected print and exception statements for Python 3
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 961
diff changeset
18 print('OpenCV library could not be loaded (video replay functions will not be available)') # TODO change to logging module
184f1dd307f9 corrected print and exception statements for Python 3
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 961
diff changeset
19 opencvAvailable = False
308
8bafd054cda4 Added a function to compute LCSS distance between two indcators
Mohamed Gomaa
parents: 184
diff changeset
20
786
1f2b2d1f4fbf added script and code to learn POIs
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 738
diff changeset
21 import utils
1f2b2d1f4fbf added script and code to learn POIs
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 738
diff changeset
22
1f2b2d1f4fbf added script and code to learn POIs
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 738
diff changeset
23 #####################
1f2b2d1f4fbf added script and code to learn POIs
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 738
diff changeset
24 # OpenCV ML models
1f2b2d1f4fbf added script and code to learn POIs
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 738
diff changeset
25 #####################
183
ed944ff45e8c first simple clustering algorithm implementation
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
diff changeset
26
961
ec1682ed999f added computation of confusion matrix and improved default parameter for block normalization for SVM classification
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 953
diff changeset
27 def computeConfusionMatrix(model, samples, responses):
ec1682ed999f added computation of confusion matrix and improved default parameter for block normalization for SVM classification
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 953
diff changeset
28 'computes the confusion matrix of the classifier (model)'
ec1682ed999f added computation of confusion matrix and improved default parameter for block normalization for SVM classification
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 953
diff changeset
29 classifications = {}
ec1682ed999f added computation of confusion matrix and improved default parameter for block normalization for SVM classification
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 953
diff changeset
30 for x,y in zip(samples, responses):
ec1682ed999f added computation of confusion matrix and improved default parameter for block normalization for SVM classification
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 953
diff changeset
31 predicted = model.predict(x)
ec1682ed999f added computation of confusion matrix and improved default parameter for block normalization for SVM classification
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 953
diff changeset
32 classifications[(y, predicted)] = classifications.get((y, predicted), 0)+1
ec1682ed999f added computation of confusion matrix and improved default parameter for block normalization for SVM classification
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 953
diff changeset
33 return classifications
ec1682ed999f added computation of confusion matrix and improved default parameter for block normalization for SVM classification
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 953
diff changeset
34
788
5b970a5bc233 updated classifying code to OpenCV 3.x (bug in function to load classification models)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 787
diff changeset
35 class StatModel(object):
807
52aa03260f03 reversed all code to OpenCV 2.4.13
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 805
diff changeset
36 '''Abstract class for loading/saving model'''
680
da1352b89d02 classification is working
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
37 def load(self, filename):
da1352b89d02 classification is working
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
38 if path.exists(filename):
da1352b89d02 classification is working
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
39 self.model.load(filename)
da1352b89d02 classification is working
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
40 else:
da1352b89d02 classification is working
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
41 print('Provided filename {} does not exist: model not loaded!'.format(filename))
380
adfd4f70ee1d added SVM
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 312
diff changeset
42
680
da1352b89d02 classification is working
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
43 def save(self, filename):
da1352b89d02 classification is working
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
44 self.model.save(filename)
380
adfd4f70ee1d added SVM
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 312
diff changeset
45
978
184f1dd307f9 corrected print and exception statements for Python 3
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 961
diff changeset
46 if opencvAvailable:
184f1dd307f9 corrected print and exception statements for Python 3
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 961
diff changeset
47 class SVM(StatModel):
184f1dd307f9 corrected print and exception statements for Python 3
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 961
diff changeset
48 '''wrapper for OpenCV SimpleVectorMachine algorithm'''
184f1dd307f9 corrected print and exception statements for Python 3
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 961
diff changeset
49 def __init__(self, svmType = cv2.SVM_C_SVC, kernelType = cv2.SVM_RBF, degree = 0, gamma = 1, coef0 = 0, Cvalue = 1, nu = 0, p = 0):
184f1dd307f9 corrected print and exception statements for Python 3
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 961
diff changeset
50 self.model = cv2.SVM()
184f1dd307f9 corrected print and exception statements for Python 3
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 961
diff changeset
51 self.params = dict(svm_type = svmType, kernel_type = kernelType, degree = degree, gamma = gamma, coef0 = coef0, Cvalue = Cvalue, nu = nu, p = p)
184f1dd307f9 corrected print and exception statements for Python 3
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 961
diff changeset
52 # OpenCV3
184f1dd307f9 corrected print and exception statements for Python 3
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 961
diff changeset
53 # self.model = cv2.SVM()
184f1dd307f9 corrected print and exception statements for Python 3
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 961
diff changeset
54 # self.model.setType(svmType)
184f1dd307f9 corrected print and exception statements for Python 3
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 961
diff changeset
55 # self.model.setKernel(kernelType)
184f1dd307f9 corrected print and exception statements for Python 3
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 961
diff changeset
56 # self.model.setDegree(degree)
184f1dd307f9 corrected print and exception statements for Python 3
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 961
diff changeset
57 # self.model.setGamma(gamma)
184f1dd307f9 corrected print and exception statements for Python 3
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 961
diff changeset
58 # self.model.setCoef0(coef0)
184f1dd307f9 corrected print and exception statements for Python 3
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 961
diff changeset
59 # self.model.setC(Cvalue)
184f1dd307f9 corrected print and exception statements for Python 3
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 961
diff changeset
60 # self.model.setNu(nu)
184f1dd307f9 corrected print and exception statements for Python 3
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 961
diff changeset
61 # self.model.setP(p)
380
adfd4f70ee1d added SVM
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 312
diff changeset
62
978
184f1dd307f9 corrected print and exception statements for Python 3
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 961
diff changeset
63 def train(self, samples, responses, computePerformance = False):
184f1dd307f9 corrected print and exception statements for Python 3
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 961
diff changeset
64 self.model.train(samples, responses, params = self.params)
184f1dd307f9 corrected print and exception statements for Python 3
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 961
diff changeset
65 if computePerformance:
184f1dd307f9 corrected print and exception statements for Python 3
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 961
diff changeset
66 return computeConfusionMatrix(self, samples, responses)
380
adfd4f70ee1d added SVM
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 312
diff changeset
67
978
184f1dd307f9 corrected print and exception statements for Python 3
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 961
diff changeset
68 def predict(self, hog):
184f1dd307f9 corrected print and exception statements for Python 3
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 961
diff changeset
69 return self.model.predict(hog)
380
adfd4f70ee1d added SVM
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 312
diff changeset
70
adfd4f70ee1d added SVM
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 312
diff changeset
71
786
1f2b2d1f4fbf added script and code to learn POIs
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 738
diff changeset
72 #####################
1f2b2d1f4fbf added script and code to learn POIs
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 738
diff changeset
73 # Clustering
1f2b2d1f4fbf added script and code to learn POIs
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 738
diff changeset
74 #####################
1f2b2d1f4fbf added script and code to learn POIs
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 738
diff changeset
75
665
15e244d2a1b5 corrected bug with circular import for VideoFilenameAddable, moved to base module
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 636
diff changeset
76 class Centroid(object):
184
d70e9b36889c initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 183
diff changeset
77 'Wrapper around instances to add a counter'
d70e9b36889c initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 183
diff changeset
78
d70e9b36889c initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 183
diff changeset
79 def __init__(self, instance, nInstances = 1):
d70e9b36889c initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 183
diff changeset
80 self.instance = instance
d70e9b36889c initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 183
diff changeset
81 self.nInstances = nInstances
d70e9b36889c initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 183
diff changeset
82
d70e9b36889c initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 183
diff changeset
83 # def similar(instance2):
d70e9b36889c initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 183
diff changeset
84 # return self.instance.similar(instance2)
d70e9b36889c initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 183
diff changeset
85
d70e9b36889c initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 183
diff changeset
86 def add(self, instance2):
d70e9b36889c initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 183
diff changeset
87 self.instance = self.instance.multiply(self.nInstances)+instance2
d70e9b36889c initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 183
diff changeset
88 self.nInstances += 1
d70e9b36889c initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 183
diff changeset
89 self.instance = self.instance.multiply(1/float(self.nInstances))
d70e9b36889c initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 183
diff changeset
90
d70e9b36889c initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 183
diff changeset
91 def average(c):
d70e9b36889c initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 183
diff changeset
92 inst = self.instance.multiply(self.nInstances)+c.instance.multiply(instance.nInstances)
d70e9b36889c initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 183
diff changeset
93 inst.multiply(1/(self.nInstances+instance.nInstances))
d70e9b36889c initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 183
diff changeset
94 return Centroid(inst, self.nInstances+instance.nInstances)
d70e9b36889c initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 183
diff changeset
95
515
727e3c529519 renamed all draw functions to plot for consistency
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 501
diff changeset
96 def plot(self, options = ''):
727e3c529519 renamed all draw functions to plot for consistency
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 501
diff changeset
97 self.instance.plot(options)
184
d70e9b36889c initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 183
diff changeset
98 text(self.instance.position.x+1, self.instance.position.y+1, str(self.nInstances))
d70e9b36889c initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 183
diff changeset
99
386
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 382
diff changeset
100 def kMedoids(similarityMatrix, initialCentroids = None, k = None):
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 382
diff changeset
101 '''Algorithm that clusters any dataset based on a similarity matrix
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 382
diff changeset
102 Either the initialCentroids or k are passed'''
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 382
diff changeset
103 pass
184
d70e9b36889c initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 183
diff changeset
104
526
21bdeb29f855 corrected bug in initialization of lists and loading trajectories from vissim files
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 515
diff changeset
105 def assignCluster(data, similarFunc, initialCentroids = None, shuffleData = True):
183
ed944ff45e8c first simple clustering algorithm implementation
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
diff changeset
106 '''k-means algorithm with similarity function
184
d70e9b36889c initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 183
diff changeset
107 Two instances should be in the same cluster if the sameCluster function returns true for two instances. It is supposed that the average centroid of a set of instances can be computed, using the function.
183
ed944ff45e8c first simple clustering algorithm implementation
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
diff changeset
108 The number of clusters will be determined accordingly
ed944ff45e8c first simple clustering algorithm implementation
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
diff changeset
109
ed944ff45e8c first simple clustering algorithm implementation
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
diff changeset
110 data: list of instances
184
d70e9b36889c initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 183
diff changeset
111 averageCentroid: '''
d70e9b36889c initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 183
diff changeset
112 localdata = copy(data) # shallow copy to avoid modifying data
382
ba813f148ade development for clustering
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 380
diff changeset
113 if shuffleData:
ba813f148ade development for clustering
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 380
diff changeset
114 shuffle(localdata)
636
3058e00887bc removed all issues because of tests with None, using is instead of == or !=
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 563
diff changeset
115 if initialCentroids is None:
526
21bdeb29f855 corrected bug in initialization of lists and loading trajectories from vissim files
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 515
diff changeset
116 centroids = [Centroid(localdata[0])]
21bdeb29f855 corrected bug in initialization of lists and loading trajectories from vissim files
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 515
diff changeset
117 else:
184
d70e9b36889c initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 183
diff changeset
118 centroids = deepcopy(initialCentroids)
d70e9b36889c initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 183
diff changeset
119 for instance in localdata[1:]:
183
ed944ff45e8c first simple clustering algorithm implementation
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
diff changeset
120 i = 0
382
ba813f148ade development for clustering
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 380
diff changeset
121 while i<len(centroids) and not similarFunc(centroids[i].instance, instance):
183
ed944ff45e8c first simple clustering algorithm implementation
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
diff changeset
122 i += 1
ed944ff45e8c first simple clustering algorithm implementation
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
diff changeset
123 if i == len(centroids):
184
d70e9b36889c initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 183
diff changeset
124 centroids.append(Centroid(instance))
183
ed944ff45e8c first simple clustering algorithm implementation
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
diff changeset
125 else:
184
d70e9b36889c initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 183
diff changeset
126 centroids[i].add(instance)
183
ed944ff45e8c first simple clustering algorithm implementation
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
diff changeset
127
ed944ff45e8c first simple clustering algorithm implementation
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
diff changeset
128 return centroids
308
8bafd054cda4 Added a function to compute LCSS distance between two indcators
Mohamed Gomaa
parents: 184
diff changeset
129
382
ba813f148ade development for clustering
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 380
diff changeset
130 # TODO recompute centroids for each cluster: instance that minimizes some measure to all other elements
ba813f148ade development for clustering
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 380
diff changeset
131
293
ee3302528cdc rearranged new code by Paul (works now)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 285
diff changeset
132 def spectralClustering(similarityMatrix, k, iter=20):
907
9fd7b18f75b4 re arranged motion pattern learning
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 878
diff changeset
133 '''Spectral Clustering algorithm'''
9fd7b18f75b4 re arranged motion pattern learning
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 878
diff changeset
134 n = len(similarityMatrix)
9fd7b18f75b4 re arranged motion pattern learning
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 878
diff changeset
135 # create Laplacian matrix
9fd7b18f75b4 re arranged motion pattern learning
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 878
diff changeset
136 rowsum = np.sum(similarityMatrix,axis=0)
9fd7b18f75b4 re arranged motion pattern learning
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 878
diff changeset
137 D = np.diag(1 / np.sqrt(rowsum))
9fd7b18f75b4 re arranged motion pattern learning
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 878
diff changeset
138 I = np.identity(n)
9fd7b18f75b4 re arranged motion pattern learning
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 878
diff changeset
139 L = I - np.dot(D,np.dot(similarityMatrix,D))
9fd7b18f75b4 re arranged motion pattern learning
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 878
diff changeset
140 # compute eigenvectors of L
9fd7b18f75b4 re arranged motion pattern learning
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 878
diff changeset
141 U,sigma,V = np.linalg.svd(L)
9fd7b18f75b4 re arranged motion pattern learning
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 878
diff changeset
142 # create feature vector from k first eigenvectors
9fd7b18f75b4 re arranged motion pattern learning
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 878
diff changeset
143 # by stacking eigenvectors as columns
9fd7b18f75b4 re arranged motion pattern learning
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 878
diff changeset
144 features = np.array(V[:k]).T
9fd7b18f75b4 re arranged motion pattern learning
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 878
diff changeset
145 # k-means
9fd7b18f75b4 re arranged motion pattern learning
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 878
diff changeset
146 features = whiten(features)
9fd7b18f75b4 re arranged motion pattern learning
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 878
diff changeset
147 centroids,distortion = kmeans(features,k, iter)
9fd7b18f75b4 re arranged motion pattern learning
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 878
diff changeset
148 code,distance = vq(features,centroids) # code starting from 0 (represent first cluster) to k-1 (last cluster)
9fd7b18f75b4 re arranged motion pattern learning
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 878
diff changeset
149 return code,sigma
563
39de5c532559 place holder functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 526
diff changeset
150
908
b297525b2cbf added options to the prototype cluster algorithm, work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 907
diff changeset
151 class Cluster:
b297525b2cbf added options to the prototype cluster algorithm, work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 907
diff changeset
152 'Represents a cluster, with a prototype id and the list of instances in cluster'
b297525b2cbf added options to the prototype cluster algorithm, work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 907
diff changeset
153 def __init__(prototypeId, memberIndices = []):
b297525b2cbf added options to the prototype cluster algorithm, work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 907
diff changeset
154 self.prototypeId = prototypeId
b297525b2cbf added options to the prototype cluster algorithm, work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 907
diff changeset
155 self.memberIndices = memberIndices
b297525b2cbf added options to the prototype cluster algorithm, work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 907
diff changeset
156
953
989917b1ed85 assign and learn work
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 952
diff changeset
157 def assignToPrototypeClusters(instances, prototypeIndices, similarities, minSimilarity, similarityFunc = None, minClusterSize = 0):
907
9fd7b18f75b4 re arranged motion pattern learning
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 878
diff changeset
158 '''Assigns instances to prototypes
9fd7b18f75b4 re arranged motion pattern learning
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 878
diff changeset
159 if minClusterSize is not None, the clusters will be refined by removing iteratively the smallest clusters
9fd7b18f75b4 re arranged motion pattern learning
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 878
diff changeset
160 and reassigning all elements in the cluster until no cluster is smaller than minClusterSize'''
9fd7b18f75b4 re arranged motion pattern learning
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 878
diff changeset
161 indices = [i for i in range(len(instances)) if i not in prototypeIndices]
9fd7b18f75b4 re arranged motion pattern learning
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 878
diff changeset
162 labels = [-1]*len(instances)
9fd7b18f75b4 re arranged motion pattern learning
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 878
diff changeset
163 assign = True
9fd7b18f75b4 re arranged motion pattern learning
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 878
diff changeset
164 while assign:
9fd7b18f75b4 re arranged motion pattern learning
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 878
diff changeset
165 for i in prototypeIndices:
9fd7b18f75b4 re arranged motion pattern learning
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 878
diff changeset
166 labels[i] = i
9fd7b18f75b4 re arranged motion pattern learning
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 878
diff changeset
167 for i in indices:
9fd7b18f75b4 re arranged motion pattern learning
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 878
diff changeset
168 if similarityFunc is not None:
9fd7b18f75b4 re arranged motion pattern learning
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 878
diff changeset
169 for j in prototypeIndices:
9fd7b18f75b4 re arranged motion pattern learning
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 878
diff changeset
170 if similarities[i][j] < 0:
9fd7b18f75b4 re arranged motion pattern learning
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 878
diff changeset
171 similarities[i][j] = similarityFunc(instances[i], instances[j])
9fd7b18f75b4 re arranged motion pattern learning
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 878
diff changeset
172 similarities[j][i] = similarities[i][j]
9fd7b18f75b4 re arranged motion pattern learning
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 878
diff changeset
173 prototypeIdx = similarities[i][prototypeIndices].argmax()
9fd7b18f75b4 re arranged motion pattern learning
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 878
diff changeset
174 if similarities[i][prototypeIndices[prototypeIdx]] >= minSimilarity:
9fd7b18f75b4 re arranged motion pattern learning
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 878
diff changeset
175 labels[i] = prototypeIndices[prototypeIdx]
9fd7b18f75b4 re arranged motion pattern learning
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 878
diff changeset
176 else:
9fd7b18f75b4 re arranged motion pattern learning
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 878
diff changeset
177 labels[i] = -1 # outlier
9fd7b18f75b4 re arranged motion pattern learning
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 878
diff changeset
178 clusterSizes = {i: sum(np.array(labels) == i) for i in prototypeIndices}
9fd7b18f75b4 re arranged motion pattern learning
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 878
diff changeset
179 smallestClusterIndex = min(clusterSizes, key = clusterSizes.get)
9fd7b18f75b4 re arranged motion pattern learning
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 878
diff changeset
180 assign = (clusterSizes[smallestClusterIndex] < minClusterSize)
9fd7b18f75b4 re arranged motion pattern learning
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 878
diff changeset
181 if assign:
9fd7b18f75b4 re arranged motion pattern learning
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 878
diff changeset
182 prototypeIndices.remove(smallestClusterIndex)
9fd7b18f75b4 re arranged motion pattern learning
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 878
diff changeset
183 indices = [i for i in range(similarities.shape[0]) if labels[i] == smallestClusterIndex]
9fd7b18f75b4 re arranged motion pattern learning
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 878
diff changeset
184 return prototypeIndices, labels
953
989917b1ed85 assign and learn work
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 952
diff changeset
185 def prototypeCluster(instances, similarities, minSimilarity, similarityFunc = None, optimizeCentroid = True, randomInitialization = False, initialPrototypeIndices = None):
731
b02431a8234c made prototypecluster generic, in ml module, and added randominitialization
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 680
diff changeset
186 '''Finds exemplar (prototype) instance that represent each cluster
952
a9b2beef0db4 loading and assigning motion patterns works
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 949
diff changeset
187 Returns the prototype indices (in the instances list)
731
b02431a8234c made prototypecluster generic, in ml module, and added randominitialization
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 680
diff changeset
188
b02431a8234c made prototypecluster generic, in ml module, and added randominitialization
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 680
diff changeset
189 the elements in the instances list must have a length (method __len__), or one can use the random initialization
735
0e875a7f5759 modified prototypeCluster algorithm to enforce similarity when re-assigning and to compute only the necessary similarities
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 734
diff changeset
190 the positions in the instances list corresponds to the similarities
0e875a7f5759 modified prototypeCluster algorithm to enforce similarity when re-assigning and to compute only the necessary similarities
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 734
diff changeset
191 if similarityFunc is provided, the similarities are calculated as needed (this is faster) if not in similarities (negative if not computed)
0e875a7f5759 modified prototypeCluster algorithm to enforce similarity when re-assigning and to compute only the necessary similarities
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 734
diff changeset
192 similarities must still be allocated with the right size
731
b02431a8234c made prototypecluster generic, in ml module, and added randominitialization
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 680
diff changeset
193
b02431a8234c made prototypecluster generic, in ml module, and added randominitialization
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 680
diff changeset
194 if an instance is different enough (<minSimilarity),
b02431a8234c made prototypecluster generic, in ml module, and added randominitialization
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 680
diff changeset
195 it will become a new prototype.
b02431a8234c made prototypecluster generic, in ml module, and added randominitialization
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 680
diff changeset
196 Non-prototype instances will be assigned to an existing prototype
843
5dc7a507353e updated to learn prototypes
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 807
diff changeset
197
908
b297525b2cbf added options to the prototype cluster algorithm, work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 907
diff changeset
198 if optimizeCentroid is True, each time an element is added, we recompute the centroid trajectory as the most similar to all in the cluster
731
b02431a8234c made prototypecluster generic, in ml module, and added randominitialization
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 680
diff changeset
199
949
d6c1c05d11f5 modified multithreading at the interaction level for safety computations
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 917
diff changeset
200 initialPrototypeIndices are indices in instances
d6c1c05d11f5 modified multithreading at the interaction level for safety computations
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 917
diff changeset
201
908
b297525b2cbf added options to the prototype cluster algorithm, work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 907
diff changeset
202 TODO: check how similarity evolves in clusters'''
878
8e8ec4ece66e minor + bug corrected in motion pattern learning
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 843
diff changeset
203 if len(instances) == 0:
8e8ec4ece66e minor + bug corrected in motion pattern learning
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 843
diff changeset
204 print('no instances to cluster (empty list)')
8e8ec4ece66e minor + bug corrected in motion pattern learning
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 843
diff changeset
205 return None
908
b297525b2cbf added options to the prototype cluster algorithm, work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 907
diff changeset
206 if similarityFunc is None:
b297525b2cbf added options to the prototype cluster algorithm, work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 907
diff changeset
207 print('similarityFunc is None')
b297525b2cbf added options to the prototype cluster algorithm, work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 907
diff changeset
208 return None
878
8e8ec4ece66e minor + bug corrected in motion pattern learning
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 843
diff changeset
209
908
b297525b2cbf added options to the prototype cluster algorithm, work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 907
diff changeset
210 # sort instances based on length
735
0e875a7f5759 modified prototypeCluster algorithm to enforce similarity when re-assigning and to compute only the necessary similarities
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 734
diff changeset
211 indices = range(len(instances))
908
b297525b2cbf added options to the prototype cluster algorithm, work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 907
diff changeset
212 if randomInitialization or optimizeCentroid:
731
b02431a8234c made prototypecluster generic, in ml module, and added randominitialization
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 680
diff changeset
213 indices = np.random.permutation(indices)
b02431a8234c made prototypecluster generic, in ml module, and added randominitialization
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 680
diff changeset
214 else:
b02431a8234c made prototypecluster generic, in ml module, and added randominitialization
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 680
diff changeset
215 def compare(i, j):
b02431a8234c made prototypecluster generic, in ml module, and added randominitialization
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 680
diff changeset
216 if len(instances[i]) > len(instances[j]):
b02431a8234c made prototypecluster generic, in ml module, and added randominitialization
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 680
diff changeset
217 return -1
b02431a8234c made prototypecluster generic, in ml module, and added randominitialization
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 680
diff changeset
218 elif len(instances[i]) == len(instances[j]):
b02431a8234c made prototypecluster generic, in ml module, and added randominitialization
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 680
diff changeset
219 return 0
b02431a8234c made prototypecluster generic, in ml module, and added randominitialization
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 680
diff changeset
220 else:
b02431a8234c made prototypecluster generic, in ml module, and added randominitialization
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 680
diff changeset
221 return 1
b02431a8234c made prototypecluster generic, in ml module, and added randominitialization
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 680
diff changeset
222 indices.sort(compare)
b02431a8234c made prototypecluster generic, in ml module, and added randominitialization
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 680
diff changeset
223 # go through all instances
908
b297525b2cbf added options to the prototype cluster algorithm, work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 907
diff changeset
224 clusters = []
907
9fd7b18f75b4 re arranged motion pattern learning
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 878
diff changeset
225 if initialPrototypeIndices is None:
9fd7b18f75b4 re arranged motion pattern learning
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 878
diff changeset
226 prototypeIndices = [indices[0]]
9fd7b18f75b4 re arranged motion pattern learning
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 878
diff changeset
227 else:
908
b297525b2cbf added options to the prototype cluster algorithm, work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 907
diff changeset
228 prototypeIndices = initialPrototypeIndices # think of the format: if indices, have to be in instances
b297525b2cbf added options to the prototype cluster algorithm, work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 907
diff changeset
229 for i in prototypeIndices:
b297525b2cbf added options to the prototype cluster algorithm, work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 907
diff changeset
230 clusters.append([i])
949
d6c1c05d11f5 modified multithreading at the interaction level for safety computations
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 917
diff changeset
231 indices.remove(i)
d6c1c05d11f5 modified multithreading at the interaction level for safety computations
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 917
diff changeset
232 for i in indices:
908
b297525b2cbf added options to the prototype cluster algorithm, work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 907
diff changeset
233 for j in prototypeIndices:
b297525b2cbf added options to the prototype cluster algorithm, work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 907
diff changeset
234 if similarities[i][j] < 0:
b297525b2cbf added options to the prototype cluster algorithm, work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 907
diff changeset
235 similarities[i][j] = similarityFunc(instances[i], instances[j])
b297525b2cbf added options to the prototype cluster algorithm, work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 907
diff changeset
236 similarities[j][i] = similarities[i][j]
b297525b2cbf added options to the prototype cluster algorithm, work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 907
diff changeset
237 label = similarities[i][prototypeIndices].argmax()
b297525b2cbf added options to the prototype cluster algorithm, work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 907
diff changeset
238 if similarities[i][prototypeIndices[label]] < minSimilarity:
843
5dc7a507353e updated to learn prototypes
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 807
diff changeset
239 prototypeIndices.append(i)
908
b297525b2cbf added options to the prototype cluster algorithm, work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 907
diff changeset
240 clusters.append([])
b297525b2cbf added options to the prototype cluster algorithm, work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 907
diff changeset
241 else:
b297525b2cbf added options to the prototype cluster algorithm, work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 907
diff changeset
242 clusters[label].append(i)
b297525b2cbf added options to the prototype cluster algorithm, work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 907
diff changeset
243 if optimizeCentroid:
b297525b2cbf added options to the prototype cluster algorithm, work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 907
diff changeset
244 if len(clusters[label]) >= 2: # no point if only one element in cluster
b297525b2cbf added options to the prototype cluster algorithm, work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 907
diff changeset
245 for j in clusters[label][:-1]:
b297525b2cbf added options to the prototype cluster algorithm, work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 907
diff changeset
246 if similarities[i][j] < 0:
b297525b2cbf added options to the prototype cluster algorithm, work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 907
diff changeset
247 similarities[i][j] = similarityFunc(instances[i], instances[j])
b297525b2cbf added options to the prototype cluster algorithm, work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 907
diff changeset
248 similarities[j][i] = similarities[i][j]
b297525b2cbf added options to the prototype cluster algorithm, work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 907
diff changeset
249 clusterIndices = clusters[label]
b297525b2cbf added options to the prototype cluster algorithm, work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 907
diff changeset
250 clusterSimilarities = similarities[clusterIndices][:,clusterIndices]
b297525b2cbf added options to the prototype cluster algorithm, work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 907
diff changeset
251 newCentroidIdx = clusterIndices[clusterSimilarities.sum(0).argmax()]
b297525b2cbf added options to the prototype cluster algorithm, work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 907
diff changeset
252 if prototypeIndices[label] != newCentroidIdx:
b297525b2cbf added options to the prototype cluster algorithm, work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 907
diff changeset
253 prototypeIndices[label] = newCentroidIdx
952
a9b2beef0db4 loading and assigning motion patterns works
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 949
diff changeset
254 elif len(instances[prototypeIndices[label]]) < len(instances[i]): # replace prototype by current instance i if longer # otherwise, possible to test if randomInitialization or initialPrototypes is not None
a9b2beef0db4 loading and assigning motion patterns works
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 949
diff changeset
255 prototypeIndices[label] = i
a9b2beef0db4 loading and assigning motion patterns works
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 949
diff changeset
256 return prototypeIndices
738
2472b4d59aea small function
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 735
diff changeset
257
2472b4d59aea small function
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 735
diff changeset
258 def computeClusterSizes(labels, prototypeIndices, outlierIndex = -1):
2472b4d59aea small function
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 735
diff changeset
259 clusterSizes = {i: sum(np.array(labels) == i) for i in prototypeIndices}
786
1f2b2d1f4fbf added script and code to learn POIs
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 738
diff changeset
260 clusterSizes['outlier'] = sum(np.array(labels) == outlierIndex)
738
2472b4d59aea small function
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 735
diff changeset
261 return clusterSizes
786
1f2b2d1f4fbf added script and code to learn POIs
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 738
diff changeset
262
908
b297525b2cbf added options to the prototype cluster algorithm, work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 907
diff changeset
263 def computeClusterStatistics(labels, prototypeIndices, instances, similarities, similarityFunc, clusters = None, outlierIndex = -1):
b297525b2cbf added options to the prototype cluster algorithm, work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 907
diff changeset
264 if clusters is None:
b297525b2cbf added options to the prototype cluster algorithm, work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 907
diff changeset
265 clusters = {protoId:[] for protoId in prototypeIndices+[-1]}
b297525b2cbf added options to the prototype cluster algorithm, work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 907
diff changeset
266 for i,l in enumerate(labels):
b297525b2cbf added options to the prototype cluster algorithm, work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 907
diff changeset
267 clusters[l].append(i)
b297525b2cbf added options to the prototype cluster algorithm, work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 907
diff changeset
268 clusters = [clusters[protoId] for protoId in prototypeIndices]
b297525b2cbf added options to the prototype cluster algorithm, work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 907
diff changeset
269 for i, cluster in enumerate(clusters):
b297525b2cbf added options to the prototype cluster algorithm, work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 907
diff changeset
270 n = len(cluster)
b297525b2cbf added options to the prototype cluster algorithm, work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 907
diff changeset
271 print('cluster {}: {} elements'.format(prototypeIndices[i], n))
b297525b2cbf added options to the prototype cluster algorithm, work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 907
diff changeset
272 if n >=2:
b297525b2cbf added options to the prototype cluster algorithm, work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 907
diff changeset
273 for j,k in enumerate(cluster):
b297525b2cbf added options to the prototype cluster algorithm, work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 907
diff changeset
274 for l in cluster[:j]:
b297525b2cbf added options to the prototype cluster algorithm, work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 907
diff changeset
275 if similarities[k][l] < 0:
b297525b2cbf added options to the prototype cluster algorithm, work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 907
diff changeset
276 similarities[k][l] = similarityFunc(instances[k], instances[l])
b297525b2cbf added options to the prototype cluster algorithm, work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 907
diff changeset
277 similarities[l][k] = similarities[k][l]
b297525b2cbf added options to the prototype cluster algorithm, work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 907
diff changeset
278 print('Mean similarity to prototype: {}'.format((similarities[prototypeIndices[i]][cluster].sum()+1)/(n-1)))
b297525b2cbf added options to the prototype cluster algorithm, work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 907
diff changeset
279 print('Mean overall similarity: {}'.format((similarities[cluster][:,cluster].sum()+n)/(n*(n-1))))
915
13434f5017dd work to save trajectory assignment to origin and destinations
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 914
diff changeset
280
786
1f2b2d1f4fbf added script and code to learn POIs
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 738
diff changeset
281 # Gaussian Mixture Models
917
89cc05867c4c reorg and work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 916
diff changeset
282 def plotGMM(mean, covariance, gmmId, fig, color, alpha = 0.3):
916
7345f0d51faa added display of paths
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 915
diff changeset
283 v, w = np.linalg.eigh(covariance)
7345f0d51faa added display of paths
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 915
diff changeset
284 angle = 180*np.arctan2(w[0][1], w[0][0])/np.pi
7345f0d51faa added display of paths
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 915
diff changeset
285 v *= 4
7345f0d51faa added display of paths
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 915
diff changeset
286 ell = mpl.patches.Ellipse(mean, v[0], v[1], 180+angle, color=color)
7345f0d51faa added display of paths
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 915
diff changeset
287 ell.set_clip_box(fig.bbox)
7345f0d51faa added display of paths
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 915
diff changeset
288 ell.set_alpha(alpha)
7345f0d51faa added display of paths
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 915
diff changeset
289 fig.axes[0].add_artist(ell)
7345f0d51faa added display of paths
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 915
diff changeset
290 plt.plot([mean[0]], [mean[1]], 'x'+color)
917
89cc05867c4c reorg and work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 916
diff changeset
291 plt.annotate(str(gmmId), xy=(mean[0]+1, mean[1]+1))
916
7345f0d51faa added display of paths
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 915
diff changeset
292
915
13434f5017dd work to save trajectory assignment to origin and destinations
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 914
diff changeset
293 def plotGMMClusters(model, labels = None, dataset = None, fig = None, colors = utils.colors, nUnitsPerPixel = 1., alpha = 0.3):
786
1f2b2d1f4fbf added script and code to learn POIs
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 738
diff changeset
294 '''plot the ellipse corresponding to the Gaussians
1f2b2d1f4fbf added script and code to learn POIs
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 738
diff changeset
295 and the predicted classes of the instances in the dataset'''
787
0a428b449b80 improved script to display over world image
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 786
diff changeset
296 if fig is None:
0a428b449b80 improved script to display over world image
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 786
diff changeset
297 fig = plt.figure()
916
7345f0d51faa added display of paths
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 915
diff changeset
298 if len(fig.get_axes()) == 0:
7345f0d51faa added display of paths
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 915
diff changeset
299 fig.add_subplot(111)
786
1f2b2d1f4fbf added script and code to learn POIs
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 738
diff changeset
300 for i in xrange(model.n_components):
805
180b6b0231c0 added saving/loading points of interests
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 791
diff changeset
301 mean = model.means_[i]/nUnitsPerPixel
914
f228fd649644 corrected bugs in learn-pois.py
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 913
diff changeset
302 covariance = model.covariances_[i]/nUnitsPerPixel
915
13434f5017dd work to save trajectory assignment to origin and destinations
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 914
diff changeset
303 # plot points
786
1f2b2d1f4fbf added script and code to learn POIs
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 738
diff changeset
304 if dataset is not None:
915
13434f5017dd work to save trajectory assignment to origin and destinations
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 914
diff changeset
305 tmpDataset = dataset/nUnitsPerPixel
787
0a428b449b80 improved script to display over world image
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 786
diff changeset
306 plt.scatter(tmpDataset[labels == i, 0], tmpDataset[labels == i, 1], .8, color=colors[i])
916
7345f0d51faa added display of paths
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 915
diff changeset
307 # plot an ellipse to show the Gaussian component
7345f0d51faa added display of paths
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 915
diff changeset
308 plotGMM(mean, covariance, i, fig, colors[i], alpha)
915
13434f5017dd work to save trajectory assignment to origin and destinations
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 914
diff changeset
309 if dataset is None: # to address issues without points, the axes limits are not redrawn
13434f5017dd work to save trajectory assignment to origin and destinations
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 914
diff changeset
310 minima = model.means_.min(0)
13434f5017dd work to save trajectory assignment to origin and destinations
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 914
diff changeset
311 maxima = model.means_.max(0)
13434f5017dd work to save trajectory assignment to origin and destinations
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 914
diff changeset
312 xwidth = 0.5*(maxima[0]-minima[0])
13434f5017dd work to save trajectory assignment to origin and destinations
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 914
diff changeset
313 ywidth = 0.5*(maxima[1]-minima[1])
13434f5017dd work to save trajectory assignment to origin and destinations
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 914
diff changeset
314 plt.xlim(minima[0]-xwidth,maxima[0]+xwidth)
13434f5017dd work to save trajectory assignment to origin and destinations
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 914
diff changeset
315 plt.ylim(minima[1]-ywidth,maxima[1]+ywidth)