Mercurial Hosting > traffic-intelligence
annotate trafficintelligence/ml.py @ 1273:655a1646f0d5
updated setRoadUsers to force the new objects
author | Nicolas Saunier <nicolas.saunier@polymtl.ca> |
---|---|
date | Tue, 18 Jun 2024 15:29:28 -0400 |
parents | ab4c72b9475c |
children |
rev | line source |
---|---|
183
ed944ff45e8c
first simple clustering algorithm implementation
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
diff
changeset
|
1 #! /usr/bin/env python |
ed944ff45e8c
first simple clustering algorithm implementation
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
diff
changeset
|
2 '''Libraries for machine learning algorithms''' |
ed944ff45e8c
first simple clustering algorithm implementation
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
diff
changeset
|
3 |
786
1f2b2d1f4fbf
added script and code to learn POIs
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
738
diff
changeset
|
4 from os import path |
1f2b2d1f4fbf
added script and code to learn POIs
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
738
diff
changeset
|
5 from random import shuffle |
1f2b2d1f4fbf
added script and code to learn POIs
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
738
diff
changeset
|
6 from copy import copy, deepcopy |
1f2b2d1f4fbf
added script and code to learn POIs
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
738
diff
changeset
|
7 |
308
8bafd054cda4
Added a function to compute LCSS distance between two indcators
Mohamed Gomaa
parents:
184
diff
changeset
|
8 import numpy as np |
786
1f2b2d1f4fbf
added script and code to learn POIs
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
738
diff
changeset
|
9 from matplotlib.pylab import text |
1f2b2d1f4fbf
added script and code to learn POIs
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
738
diff
changeset
|
10 import matplotlib as mpl |
1f2b2d1f4fbf
added script and code to learn POIs
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
738
diff
changeset
|
11 import matplotlib.pyplot as plt |
1f2b2d1f4fbf
added script and code to learn POIs
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
738
diff
changeset
|
12 from scipy.cluster.vq import kmeans, whiten, vq |
1f2b2d1f4fbf
added script and code to learn POIs
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
738
diff
changeset
|
13 from sklearn import mixture |
978
184f1dd307f9
corrected print and exception statements for Python 3
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
961
diff
changeset
|
14 try: |
184f1dd307f9
corrected print and exception statements for Python 3
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
961
diff
changeset
|
15 import cv2 |
184f1dd307f9
corrected print and exception statements for Python 3
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
961
diff
changeset
|
16 opencvAvailable = True |
184f1dd307f9
corrected print and exception statements for Python 3
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
961
diff
changeset
|
17 except ImportError: |
184f1dd307f9
corrected print and exception statements for Python 3
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
961
diff
changeset
|
18 print('OpenCV library could not be loaded (video replay functions will not be available)') # TODO change to logging module |
184f1dd307f9
corrected print and exception statements for Python 3
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
961
diff
changeset
|
19 opencvAvailable = False |
308
8bafd054cda4
Added a function to compute LCSS distance between two indcators
Mohamed Gomaa
parents:
184
diff
changeset
|
20 |
1028
cc5cb04b04b0
major update using the trafficintelligence package name and install through pip
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1025
diff
changeset
|
21 from trafficintelligence import utils |
786
1f2b2d1f4fbf
added script and code to learn POIs
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
738
diff
changeset
|
22 |
1f2b2d1f4fbf
added script and code to learn POIs
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
738
diff
changeset
|
23 ##################### |
1f2b2d1f4fbf
added script and code to learn POIs
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
738
diff
changeset
|
24 # OpenCV ML models |
1f2b2d1f4fbf
added script and code to learn POIs
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
738
diff
changeset
|
25 ##################### |
183
ed944ff45e8c
first simple clustering algorithm implementation
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
diff
changeset
|
26 |
961
ec1682ed999f
added computation of confusion matrix and improved default parameter for block normalization for SVM classification
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
953
diff
changeset
|
27 def computeConfusionMatrix(model, samples, responses): |
993
e8eabef7857c
update to OpenCV3 for python
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
980
diff
changeset
|
28 '''computes the confusion matrix of the classifier (model) |
e8eabef7857c
update to OpenCV3 for python
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
980
diff
changeset
|
29 |
e8eabef7857c
update to OpenCV3 for python
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
980
diff
changeset
|
30 samples should be n samples by m variables''' |
961
ec1682ed999f
added computation of confusion matrix and improved default parameter for block normalization for SVM classification
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
953
diff
changeset
|
31 classifications = {} |
993
e8eabef7857c
update to OpenCV3 for python
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
980
diff
changeset
|
32 predictions = model.predict(samples) |
e8eabef7857c
update to OpenCV3 for python
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
980
diff
changeset
|
33 for predicted, y in zip(predictions, responses): |
961
ec1682ed999f
added computation of confusion matrix and improved default parameter for block normalization for SVM classification
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
953
diff
changeset
|
34 classifications[(y, predicted)] = classifications.get((y, predicted), 0)+1 |
ec1682ed999f
added computation of confusion matrix and improved default parameter for block normalization for SVM classification
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
953
diff
changeset
|
35 return classifications |
ec1682ed999f
added computation of confusion matrix and improved default parameter for block normalization for SVM classification
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
953
diff
changeset
|
36 |
978
184f1dd307f9
corrected print and exception statements for Python 3
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
961
diff
changeset
|
37 if opencvAvailable: |
993
e8eabef7857c
update to OpenCV3 for python
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
980
diff
changeset
|
38 class SVM(object): |
978
184f1dd307f9
corrected print and exception statements for Python 3
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
961
diff
changeset
|
39 '''wrapper for OpenCV SimpleVectorMachine algorithm''' |
993
e8eabef7857c
update to OpenCV3 for python
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
980
diff
changeset
|
40 def __init__(self, svmType = cv2.ml.SVM_C_SVC, kernelType = cv2.ml.SVM_RBF, degree = 0, gamma = 1, coef0 = 0, Cvalue = 1, nu = 0, p = 0): |
e8eabef7857c
update to OpenCV3 for python
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
980
diff
changeset
|
41 self.model = cv2.ml.SVM_create() |
e8eabef7857c
update to OpenCV3 for python
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
980
diff
changeset
|
42 self.model.setType(svmType) |
e8eabef7857c
update to OpenCV3 for python
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
980
diff
changeset
|
43 self.model.setKernel(kernelType) |
e8eabef7857c
update to OpenCV3 for python
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
980
diff
changeset
|
44 self.model.setDegree(degree) |
e8eabef7857c
update to OpenCV3 for python
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
980
diff
changeset
|
45 self.model.setGamma(gamma) |
e8eabef7857c
update to OpenCV3 for python
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
980
diff
changeset
|
46 self.model.setCoef0(coef0) |
e8eabef7857c
update to OpenCV3 for python
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
980
diff
changeset
|
47 self.model.setC(Cvalue) |
e8eabef7857c
update to OpenCV3 for python
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
980
diff
changeset
|
48 self.model.setNu(nu) |
e8eabef7857c
update to OpenCV3 for python
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
980
diff
changeset
|
49 self.model.setP(p) |
380 | 50 |
993
e8eabef7857c
update to OpenCV3 for python
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
980
diff
changeset
|
51 def save(self, filename): |
e8eabef7857c
update to OpenCV3 for python
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
980
diff
changeset
|
52 self.model.save(filename) |
e8eabef7857c
update to OpenCV3 for python
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
980
diff
changeset
|
53 |
e8eabef7857c
update to OpenCV3 for python
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
980
diff
changeset
|
54 def train(self, samples, layout, responses, computePerformance = False): |
e8eabef7857c
update to OpenCV3 for python
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
980
diff
changeset
|
55 self.model.train(samples, layout, responses) |
978
184f1dd307f9
corrected print and exception statements for Python 3
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
961
diff
changeset
|
56 if computePerformance: |
184f1dd307f9
corrected print and exception statements for Python 3
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
961
diff
changeset
|
57 return computeConfusionMatrix(self, samples, responses) |
380 | 58 |
978
184f1dd307f9
corrected print and exception statements for Python 3
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
961
diff
changeset
|
59 def predict(self, hog): |
993
e8eabef7857c
update to OpenCV3 for python
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
980
diff
changeset
|
60 retval, predictions = self.model.predict(hog) |
e8eabef7857c
update to OpenCV3 for python
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
980
diff
changeset
|
61 if hog.shape[0] == 1: |
e8eabef7857c
update to OpenCV3 for python
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
980
diff
changeset
|
62 return predictions[0][0] |
e8eabef7857c
update to OpenCV3 for python
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
980
diff
changeset
|
63 else: |
e8eabef7857c
update to OpenCV3 for python
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
980
diff
changeset
|
64 return np.asarray(predictions, dtype = np.int).ravel().tolist() |
380 | 65 |
993
e8eabef7857c
update to OpenCV3 for python
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
980
diff
changeset
|
66 def SVM_load(filename): |
e8eabef7857c
update to OpenCV3 for python
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
980
diff
changeset
|
67 if path.exists(filename): |
e8eabef7857c
update to OpenCV3 for python
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
980
diff
changeset
|
68 svm = SVM() |
e8eabef7857c
update to OpenCV3 for python
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
980
diff
changeset
|
69 svm.model = cv2.ml.SVM_load(filename) |
e8eabef7857c
update to OpenCV3 for python
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
980
diff
changeset
|
70 return svm |
e8eabef7857c
update to OpenCV3 for python
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
980
diff
changeset
|
71 else: |
e8eabef7857c
update to OpenCV3 for python
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
980
diff
changeset
|
72 print('Provided filename {} does not exist: model not loaded!'.format(filename)) |
1241
ab4c72b9475c
work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1044
diff
changeset
|
73 return None |
993
e8eabef7857c
update to OpenCV3 for python
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
980
diff
changeset
|
74 |
786
1f2b2d1f4fbf
added script and code to learn POIs
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
738
diff
changeset
|
75 ##################### |
1f2b2d1f4fbf
added script and code to learn POIs
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
738
diff
changeset
|
76 # Clustering |
1f2b2d1f4fbf
added script and code to learn POIs
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
738
diff
changeset
|
77 ##################### |
1f2b2d1f4fbf
added script and code to learn POIs
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
738
diff
changeset
|
78 |
665
15e244d2a1b5
corrected bug with circular import for VideoFilenameAddable, moved to base module
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
636
diff
changeset
|
79 class Centroid(object): |
184
d70e9b36889c
initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
183
diff
changeset
|
80 'Wrapper around instances to add a counter' |
d70e9b36889c
initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
183
diff
changeset
|
81 |
d70e9b36889c
initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
183
diff
changeset
|
82 def __init__(self, instance, nInstances = 1): |
d70e9b36889c
initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
183
diff
changeset
|
83 self.instance = instance |
d70e9b36889c
initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
183
diff
changeset
|
84 self.nInstances = nInstances |
d70e9b36889c
initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
183
diff
changeset
|
85 |
d70e9b36889c
initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
183
diff
changeset
|
86 # def similar(instance2): |
d70e9b36889c
initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
183
diff
changeset
|
87 # return self.instance.similar(instance2) |
d70e9b36889c
initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
183
diff
changeset
|
88 |
d70e9b36889c
initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
183
diff
changeset
|
89 def add(self, instance2): |
d70e9b36889c
initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
183
diff
changeset
|
90 self.instance = self.instance.multiply(self.nInstances)+instance2 |
d70e9b36889c
initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
183
diff
changeset
|
91 self.nInstances += 1 |
d70e9b36889c
initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
183
diff
changeset
|
92 self.instance = self.instance.multiply(1/float(self.nInstances)) |
d70e9b36889c
initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
183
diff
changeset
|
93 |
d70e9b36889c
initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
183
diff
changeset
|
94 def average(c): |
d70e9b36889c
initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
183
diff
changeset
|
95 inst = self.instance.multiply(self.nInstances)+c.instance.multiply(instance.nInstances) |
d70e9b36889c
initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
183
diff
changeset
|
96 inst.multiply(1/(self.nInstances+instance.nInstances)) |
d70e9b36889c
initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
183
diff
changeset
|
97 return Centroid(inst, self.nInstances+instance.nInstances) |
d70e9b36889c
initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
183
diff
changeset
|
98 |
515
727e3c529519
renamed all draw functions to plot for consistency
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
501
diff
changeset
|
99 def plot(self, options = ''): |
727e3c529519
renamed all draw functions to plot for consistency
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
501
diff
changeset
|
100 self.instance.plot(options) |
184
d70e9b36889c
initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
183
diff
changeset
|
101 text(self.instance.position.x+1, self.instance.position.y+1, str(self.nInstances)) |
d70e9b36889c
initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
183
diff
changeset
|
102 |
386 | 103 def kMedoids(similarityMatrix, initialCentroids = None, k = None): |
104 '''Algorithm that clusters any dataset based on a similarity matrix | |
105 Either the initialCentroids or k are passed''' | |
106 pass | |
184
d70e9b36889c
initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
183
diff
changeset
|
107 |
526
21bdeb29f855
corrected bug in initialization of lists and loading trajectories from vissim files
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
515
diff
changeset
|
108 def assignCluster(data, similarFunc, initialCentroids = None, shuffleData = True): |
183
ed944ff45e8c
first simple clustering algorithm implementation
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
diff
changeset
|
109 '''k-means algorithm with similarity function |
184
d70e9b36889c
initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
183
diff
changeset
|
110 Two instances should be in the same cluster if the sameCluster function returns true for two instances. It is supposed that the average centroid of a set of instances can be computed, using the function. |
183
ed944ff45e8c
first simple clustering algorithm implementation
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
diff
changeset
|
111 The number of clusters will be determined accordingly |
ed944ff45e8c
first simple clustering algorithm implementation
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
diff
changeset
|
112 |
ed944ff45e8c
first simple clustering algorithm implementation
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
diff
changeset
|
113 data: list of instances |
184
d70e9b36889c
initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
183
diff
changeset
|
114 averageCentroid: ''' |
d70e9b36889c
initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
183
diff
changeset
|
115 localdata = copy(data) # shallow copy to avoid modifying data |
382
ba813f148ade
development for clustering
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
380
diff
changeset
|
116 if shuffleData: |
ba813f148ade
development for clustering
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
380
diff
changeset
|
117 shuffle(localdata) |
636
3058e00887bc
removed all issues because of tests with None, using is instead of == or !=
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
563
diff
changeset
|
118 if initialCentroids is None: |
526
21bdeb29f855
corrected bug in initialization of lists and loading trajectories from vissim files
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
515
diff
changeset
|
119 centroids = [Centroid(localdata[0])] |
21bdeb29f855
corrected bug in initialization of lists and loading trajectories from vissim files
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
515
diff
changeset
|
120 else: |
184
d70e9b36889c
initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
183
diff
changeset
|
121 centroids = deepcopy(initialCentroids) |
d70e9b36889c
initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
183
diff
changeset
|
122 for instance in localdata[1:]: |
183
ed944ff45e8c
first simple clustering algorithm implementation
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
diff
changeset
|
123 i = 0 |
382
ba813f148ade
development for clustering
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
380
diff
changeset
|
124 while i<len(centroids) and not similarFunc(centroids[i].instance, instance): |
183
ed944ff45e8c
first simple clustering algorithm implementation
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
diff
changeset
|
125 i += 1 |
ed944ff45e8c
first simple clustering algorithm implementation
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
diff
changeset
|
126 if i == len(centroids): |
184
d70e9b36889c
initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
183
diff
changeset
|
127 centroids.append(Centroid(instance)) |
183
ed944ff45e8c
first simple clustering algorithm implementation
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
diff
changeset
|
128 else: |
184
d70e9b36889c
initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
183
diff
changeset
|
129 centroids[i].add(instance) |
183
ed944ff45e8c
first simple clustering algorithm implementation
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
diff
changeset
|
130 |
ed944ff45e8c
first simple clustering algorithm implementation
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
diff
changeset
|
131 return centroids |
308
8bafd054cda4
Added a function to compute LCSS distance between two indcators
Mohamed Gomaa
parents:
184
diff
changeset
|
132 |
382
ba813f148ade
development for clustering
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
380
diff
changeset
|
133 # TODO recompute centroids for each cluster: instance that minimizes some measure to all other elements |
ba813f148ade
development for clustering
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
380
diff
changeset
|
134 |
293
ee3302528cdc
rearranged new code by Paul (works now)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
285
diff
changeset
|
135 def spectralClustering(similarityMatrix, k, iter=20): |
907
9fd7b18f75b4
re arranged motion pattern learning
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
878
diff
changeset
|
136 '''Spectral Clustering algorithm''' |
9fd7b18f75b4
re arranged motion pattern learning
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
878
diff
changeset
|
137 n = len(similarityMatrix) |
9fd7b18f75b4
re arranged motion pattern learning
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
878
diff
changeset
|
138 # create Laplacian matrix |
9fd7b18f75b4
re arranged motion pattern learning
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
878
diff
changeset
|
139 rowsum = np.sum(similarityMatrix,axis=0) |
9fd7b18f75b4
re arranged motion pattern learning
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
878
diff
changeset
|
140 D = np.diag(1 / np.sqrt(rowsum)) |
9fd7b18f75b4
re arranged motion pattern learning
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
878
diff
changeset
|
141 I = np.identity(n) |
9fd7b18f75b4
re arranged motion pattern learning
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
878
diff
changeset
|
142 L = I - np.dot(D,np.dot(similarityMatrix,D)) |
9fd7b18f75b4
re arranged motion pattern learning
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
878
diff
changeset
|
143 # compute eigenvectors of L |
9fd7b18f75b4
re arranged motion pattern learning
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
878
diff
changeset
|
144 U,sigma,V = np.linalg.svd(L) |
9fd7b18f75b4
re arranged motion pattern learning
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
878
diff
changeset
|
145 # create feature vector from k first eigenvectors |
9fd7b18f75b4
re arranged motion pattern learning
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
878
diff
changeset
|
146 # by stacking eigenvectors as columns |
9fd7b18f75b4
re arranged motion pattern learning
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
878
diff
changeset
|
147 features = np.array(V[:k]).T |
9fd7b18f75b4
re arranged motion pattern learning
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
878
diff
changeset
|
148 # k-means |
9fd7b18f75b4
re arranged motion pattern learning
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
878
diff
changeset
|
149 features = whiten(features) |
9fd7b18f75b4
re arranged motion pattern learning
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
878
diff
changeset
|
150 centroids,distortion = kmeans(features,k, iter) |
9fd7b18f75b4
re arranged motion pattern learning
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
878
diff
changeset
|
151 code,distance = vq(features,centroids) # code starting from 0 (represent first cluster) to k-1 (last cluster) |
9fd7b18f75b4
re arranged motion pattern learning
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
878
diff
changeset
|
152 return code,sigma |
563
39de5c532559
place holder functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
526
diff
changeset
|
153 |
1044
75a6ad604cc5
work on motion patterns
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1033
diff
changeset
|
154 def assignToPrototypeClusters(instances, initialPrototypeIndices, similarities, minSimilarity, similarityFunc, minClusterSize = 0): |
907
9fd7b18f75b4
re arranged motion pattern learning
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
878
diff
changeset
|
155 '''Assigns instances to prototypes |
980
23f98ebb113f
first tests for clustering algo
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
978
diff
changeset
|
156 if minClusterSize is not 0, the clusters will be refined by removing iteratively the smallest clusters |
23f98ebb113f
first tests for clustering algo
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
978
diff
changeset
|
157 and reassigning all elements in the cluster until no cluster is smaller than minClusterSize |
23f98ebb113f
first tests for clustering algo
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
978
diff
changeset
|
158 |
23f98ebb113f
first tests for clustering algo
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
978
diff
changeset
|
159 labels are indices in the prototypeIndices''' |
1044
75a6ad604cc5
work on motion patterns
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1033
diff
changeset
|
160 prototypeIndices = copy(initialPrototypeIndices) |
907
9fd7b18f75b4
re arranged motion pattern learning
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
878
diff
changeset
|
161 indices = [i for i in range(len(instances)) if i not in prototypeIndices] |
9fd7b18f75b4
re arranged motion pattern learning
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
878
diff
changeset
|
162 labels = [-1]*len(instances) |
9fd7b18f75b4
re arranged motion pattern learning
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
878
diff
changeset
|
163 assign = True |
9fd7b18f75b4
re arranged motion pattern learning
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
878
diff
changeset
|
164 while assign: |
9fd7b18f75b4
re arranged motion pattern learning
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
878
diff
changeset
|
165 for i in prototypeIndices: |
9fd7b18f75b4
re arranged motion pattern learning
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
878
diff
changeset
|
166 labels[i] = i |
9fd7b18f75b4
re arranged motion pattern learning
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
878
diff
changeset
|
167 for i in indices: |
980
23f98ebb113f
first tests for clustering algo
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
978
diff
changeset
|
168 for j in prototypeIndices: |
23f98ebb113f
first tests for clustering algo
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
978
diff
changeset
|
169 if similarities[i][j] < 0: |
23f98ebb113f
first tests for clustering algo
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
978
diff
changeset
|
170 similarities[i][j] = similarityFunc(instances[i], instances[j]) |
23f98ebb113f
first tests for clustering algo
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
978
diff
changeset
|
171 similarities[j][i] = similarities[i][j] |
23f98ebb113f
first tests for clustering algo
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
978
diff
changeset
|
172 label = similarities[i][prototypeIndices].argmax() |
23f98ebb113f
first tests for clustering algo
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
978
diff
changeset
|
173 if similarities[i][prototypeIndices[label]] >= minSimilarity: |
23f98ebb113f
first tests for clustering algo
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
978
diff
changeset
|
174 labels[i] = prototypeIndices[label] |
907
9fd7b18f75b4
re arranged motion pattern learning
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
878
diff
changeset
|
175 else: |
9fd7b18f75b4
re arranged motion pattern learning
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
878
diff
changeset
|
176 labels[i] = -1 # outlier |
9fd7b18f75b4
re arranged motion pattern learning
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
878
diff
changeset
|
177 clusterSizes = {i: sum(np.array(labels) == i) for i in prototypeIndices} |
9fd7b18f75b4
re arranged motion pattern learning
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
878
diff
changeset
|
178 smallestClusterIndex = min(clusterSizes, key = clusterSizes.get) |
9fd7b18f75b4
re arranged motion pattern learning
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
878
diff
changeset
|
179 assign = (clusterSizes[smallestClusterIndex] < minClusterSize) |
9fd7b18f75b4
re arranged motion pattern learning
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
878
diff
changeset
|
180 if assign: |
9fd7b18f75b4
re arranged motion pattern learning
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
878
diff
changeset
|
181 prototypeIndices.remove(smallestClusterIndex) |
9fd7b18f75b4
re arranged motion pattern learning
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
878
diff
changeset
|
182 indices = [i for i in range(similarities.shape[0]) if labels[i] == smallestClusterIndex] |
9fd7b18f75b4
re arranged motion pattern learning
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
878
diff
changeset
|
183 return prototypeIndices, labels |
980
23f98ebb113f
first tests for clustering algo
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
978
diff
changeset
|
184 |
1044
75a6ad604cc5
work on motion patterns
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1033
diff
changeset
|
185 def prototypeCluster(instances, similarities, minSimilarity, similarityFunc, optimizeCentroid = False, randomInitialization = False, initialPrototypeIndices = None): |
731
b02431a8234c
made prototypecluster generic, in ml module, and added randominitialization
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
680
diff
changeset
|
186 '''Finds exemplar (prototype) instance that represent each cluster |
952
a9b2beef0db4
loading and assigning motion patterns works
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
949
diff
changeset
|
187 Returns the prototype indices (in the instances list) |
731
b02431a8234c
made prototypecluster generic, in ml module, and added randominitialization
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
680
diff
changeset
|
188 |
980
23f98ebb113f
first tests for clustering algo
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
978
diff
changeset
|
189 the elements in the instances list must have a length (method __len__), or one can use the optimizeCentroid |
735
0e875a7f5759
modified prototypeCluster algorithm to enforce similarity when re-assigning and to compute only the necessary similarities
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
734
diff
changeset
|
190 the positions in the instances list corresponds to the similarities |
0e875a7f5759
modified prototypeCluster algorithm to enforce similarity when re-assigning and to compute only the necessary similarities
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
734
diff
changeset
|
191 if similarityFunc is provided, the similarities are calculated as needed (this is faster) if not in similarities (negative if not computed) |
0e875a7f5759
modified prototypeCluster algorithm to enforce similarity when re-assigning and to compute only the necessary similarities
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
734
diff
changeset
|
192 similarities must still be allocated with the right size |
731
b02431a8234c
made prototypecluster generic, in ml module, and added randominitialization
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
680
diff
changeset
|
193 |
b02431a8234c
made prototypecluster generic, in ml module, and added randominitialization
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
680
diff
changeset
|
194 if an instance is different enough (<minSimilarity), |
b02431a8234c
made prototypecluster generic, in ml module, and added randominitialization
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
680
diff
changeset
|
195 it will become a new prototype. |
b02431a8234c
made prototypecluster generic, in ml module, and added randominitialization
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
680
diff
changeset
|
196 Non-prototype instances will be assigned to an existing prototype |
843
5dc7a507353e
updated to learn prototypes
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
807
diff
changeset
|
197 |
908
b297525b2cbf
added options to the prototype cluster algorithm, work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
907
diff
changeset
|
198 if optimizeCentroid is True, each time an element is added, we recompute the centroid trajectory as the most similar to all in the cluster |
731
b02431a8234c
made prototypecluster generic, in ml module, and added randominitialization
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
680
diff
changeset
|
199 |
949
d6c1c05d11f5
modified multithreading at the interaction level for safety computations
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
917
diff
changeset
|
200 initialPrototypeIndices are indices in instances |
d6c1c05d11f5
modified multithreading at the interaction level for safety computations
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
917
diff
changeset
|
201 |
908
b297525b2cbf
added options to the prototype cluster algorithm, work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
907
diff
changeset
|
202 TODO: check how similarity evolves in clusters''' |
878
8e8ec4ece66e
minor + bug corrected in motion pattern learning
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
843
diff
changeset
|
203 if len(instances) == 0: |
8e8ec4ece66e
minor + bug corrected in motion pattern learning
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
843
diff
changeset
|
204 print('no instances to cluster (empty list)') |
8e8ec4ece66e
minor + bug corrected in motion pattern learning
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
843
diff
changeset
|
205 return None |
8e8ec4ece66e
minor + bug corrected in motion pattern learning
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
843
diff
changeset
|
206 |
908
b297525b2cbf
added options to the prototype cluster algorithm, work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
907
diff
changeset
|
207 # sort instances based on length |
1033
8ffb3ae9f3d2
work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1028
diff
changeset
|
208 indices = list(range(len(instances))) |
908
b297525b2cbf
added options to the prototype cluster algorithm, work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
907
diff
changeset
|
209 if randomInitialization or optimizeCentroid: |
980
23f98ebb113f
first tests for clustering algo
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
978
diff
changeset
|
210 indices = np.random.permutation(indices).tolist() |
731
b02431a8234c
made prototypecluster generic, in ml module, and added randominitialization
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
680
diff
changeset
|
211 else: |
1033
8ffb3ae9f3d2
work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1028
diff
changeset
|
212 indices.sort(key=lambda i: len(instances[i])) |
980
23f98ebb113f
first tests for clustering algo
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
978
diff
changeset
|
213 # initialize clusters |
908
b297525b2cbf
added options to the prototype cluster algorithm, work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
907
diff
changeset
|
214 clusters = [] |
907
9fd7b18f75b4
re arranged motion pattern learning
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
878
diff
changeset
|
215 if initialPrototypeIndices is None: |
9fd7b18f75b4
re arranged motion pattern learning
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
878
diff
changeset
|
216 prototypeIndices = [indices[0]] |
9fd7b18f75b4
re arranged motion pattern learning
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
878
diff
changeset
|
217 else: |
908
b297525b2cbf
added options to the prototype cluster algorithm, work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
907
diff
changeset
|
218 prototypeIndices = initialPrototypeIndices # think of the format: if indices, have to be in instances |
b297525b2cbf
added options to the prototype cluster algorithm, work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
907
diff
changeset
|
219 for i in prototypeIndices: |
b297525b2cbf
added options to the prototype cluster algorithm, work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
907
diff
changeset
|
220 clusters.append([i]) |
949
d6c1c05d11f5
modified multithreading at the interaction level for safety computations
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
917
diff
changeset
|
221 indices.remove(i) |
980
23f98ebb113f
first tests for clustering algo
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
978
diff
changeset
|
222 # go through all instances |
949
d6c1c05d11f5
modified multithreading at the interaction level for safety computations
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
917
diff
changeset
|
223 for i in indices: |
908
b297525b2cbf
added options to the prototype cluster algorithm, work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
907
diff
changeset
|
224 for j in prototypeIndices: |
b297525b2cbf
added options to the prototype cluster algorithm, work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
907
diff
changeset
|
225 if similarities[i][j] < 0: |
b297525b2cbf
added options to the prototype cluster algorithm, work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
907
diff
changeset
|
226 similarities[i][j] = similarityFunc(instances[i], instances[j]) |
b297525b2cbf
added options to the prototype cluster algorithm, work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
907
diff
changeset
|
227 similarities[j][i] = similarities[i][j] |
980
23f98ebb113f
first tests for clustering algo
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
978
diff
changeset
|
228 label = similarities[i][prototypeIndices].argmax() # index in prototypeIndices |
908
b297525b2cbf
added options to the prototype cluster algorithm, work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
907
diff
changeset
|
229 if similarities[i][prototypeIndices[label]] < minSimilarity: |
843
5dc7a507353e
updated to learn prototypes
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
807
diff
changeset
|
230 prototypeIndices.append(i) |
908
b297525b2cbf
added options to the prototype cluster algorithm, work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
907
diff
changeset
|
231 clusters.append([]) |
b297525b2cbf
added options to the prototype cluster algorithm, work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
907
diff
changeset
|
232 else: |
b297525b2cbf
added options to the prototype cluster algorithm, work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
907
diff
changeset
|
233 clusters[label].append(i) |
b297525b2cbf
added options to the prototype cluster algorithm, work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
907
diff
changeset
|
234 if optimizeCentroid: |
b297525b2cbf
added options to the prototype cluster algorithm, work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
907
diff
changeset
|
235 if len(clusters[label]) >= 2: # no point if only one element in cluster |
b297525b2cbf
added options to the prototype cluster algorithm, work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
907
diff
changeset
|
236 for j in clusters[label][:-1]: |
b297525b2cbf
added options to the prototype cluster algorithm, work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
907
diff
changeset
|
237 if similarities[i][j] < 0: |
b297525b2cbf
added options to the prototype cluster algorithm, work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
907
diff
changeset
|
238 similarities[i][j] = similarityFunc(instances[i], instances[j]) |
b297525b2cbf
added options to the prototype cluster algorithm, work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
907
diff
changeset
|
239 similarities[j][i] = similarities[i][j] |
b297525b2cbf
added options to the prototype cluster algorithm, work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
907
diff
changeset
|
240 clusterIndices = clusters[label] |
b297525b2cbf
added options to the prototype cluster algorithm, work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
907
diff
changeset
|
241 clusterSimilarities = similarities[clusterIndices][:,clusterIndices] |
b297525b2cbf
added options to the prototype cluster algorithm, work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
907
diff
changeset
|
242 newCentroidIdx = clusterIndices[clusterSimilarities.sum(0).argmax()] |
b297525b2cbf
added options to the prototype cluster algorithm, work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
907
diff
changeset
|
243 if prototypeIndices[label] != newCentroidIdx: |
b297525b2cbf
added options to the prototype cluster algorithm, work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
907
diff
changeset
|
244 prototypeIndices[label] = newCentroidIdx |
952
a9b2beef0db4
loading and assigning motion patterns works
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
949
diff
changeset
|
245 elif len(instances[prototypeIndices[label]]) < len(instances[i]): # replace prototype by current instance i if longer # otherwise, possible to test if randomInitialization or initialPrototypes is not None |
a9b2beef0db4
loading and assigning motion patterns works
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
949
diff
changeset
|
246 prototypeIndices[label] = i |
a9b2beef0db4
loading and assigning motion patterns works
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
949
diff
changeset
|
247 return prototypeIndices |
738
2472b4d59aea
small function
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
735
diff
changeset
|
248 |
2472b4d59aea
small function
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
735
diff
changeset
|
249 def computeClusterSizes(labels, prototypeIndices, outlierIndex = -1): |
2472b4d59aea
small function
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
735
diff
changeset
|
250 clusterSizes = {i: sum(np.array(labels) == i) for i in prototypeIndices} |
786
1f2b2d1f4fbf
added script and code to learn POIs
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
738
diff
changeset
|
251 clusterSizes['outlier'] = sum(np.array(labels) == outlierIndex) |
738
2472b4d59aea
small function
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
735
diff
changeset
|
252 return clusterSizes |
786
1f2b2d1f4fbf
added script and code to learn POIs
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
738
diff
changeset
|
253 |
908
b297525b2cbf
added options to the prototype cluster algorithm, work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
907
diff
changeset
|
254 def computeClusterStatistics(labels, prototypeIndices, instances, similarities, similarityFunc, clusters = None, outlierIndex = -1): |
b297525b2cbf
added options to the prototype cluster algorithm, work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
907
diff
changeset
|
255 if clusters is None: |
b297525b2cbf
added options to the prototype cluster algorithm, work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
907
diff
changeset
|
256 clusters = {protoId:[] for protoId in prototypeIndices+[-1]} |
b297525b2cbf
added options to the prototype cluster algorithm, work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
907
diff
changeset
|
257 for i,l in enumerate(labels): |
b297525b2cbf
added options to the prototype cluster algorithm, work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
907
diff
changeset
|
258 clusters[l].append(i) |
b297525b2cbf
added options to the prototype cluster algorithm, work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
907
diff
changeset
|
259 clusters = [clusters[protoId] for protoId in prototypeIndices] |
b297525b2cbf
added options to the prototype cluster algorithm, work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
907
diff
changeset
|
260 for i, cluster in enumerate(clusters): |
b297525b2cbf
added options to the prototype cluster algorithm, work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
907
diff
changeset
|
261 n = len(cluster) |
b297525b2cbf
added options to the prototype cluster algorithm, work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
907
diff
changeset
|
262 print('cluster {}: {} elements'.format(prototypeIndices[i], n)) |
b297525b2cbf
added options to the prototype cluster algorithm, work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
907
diff
changeset
|
263 if n >=2: |
b297525b2cbf
added options to the prototype cluster algorithm, work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
907
diff
changeset
|
264 for j,k in enumerate(cluster): |
b297525b2cbf
added options to the prototype cluster algorithm, work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
907
diff
changeset
|
265 for l in cluster[:j]: |
b297525b2cbf
added options to the prototype cluster algorithm, work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
907
diff
changeset
|
266 if similarities[k][l] < 0: |
b297525b2cbf
added options to the prototype cluster algorithm, work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
907
diff
changeset
|
267 similarities[k][l] = similarityFunc(instances[k], instances[l]) |
b297525b2cbf
added options to the prototype cluster algorithm, work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
907
diff
changeset
|
268 similarities[l][k] = similarities[k][l] |
b297525b2cbf
added options to the prototype cluster algorithm, work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
907
diff
changeset
|
269 print('Mean similarity to prototype: {}'.format((similarities[prototypeIndices[i]][cluster].sum()+1)/(n-1))) |
b297525b2cbf
added options to the prototype cluster algorithm, work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
907
diff
changeset
|
270 print('Mean overall similarity: {}'.format((similarities[cluster][:,cluster].sum()+n)/(n*(n-1)))) |
915
13434f5017dd
work to save trajectory assignment to origin and destinations
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
914
diff
changeset
|
271 |
786
1f2b2d1f4fbf
added script and code to learn POIs
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
738
diff
changeset
|
272 # Gaussian Mixture Models |
917
89cc05867c4c
reorg and work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
916
diff
changeset
|
273 def plotGMM(mean, covariance, gmmId, fig, color, alpha = 0.3): |
916
7345f0d51faa
added display of paths
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
915
diff
changeset
|
274 v, w = np.linalg.eigh(covariance) |
7345f0d51faa
added display of paths
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
915
diff
changeset
|
275 angle = 180*np.arctan2(w[0][1], w[0][0])/np.pi |
7345f0d51faa
added display of paths
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
915
diff
changeset
|
276 v *= 4 |
7345f0d51faa
added display of paths
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
915
diff
changeset
|
277 ell = mpl.patches.Ellipse(mean, v[0], v[1], 180+angle, color=color) |
7345f0d51faa
added display of paths
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
915
diff
changeset
|
278 ell.set_clip_box(fig.bbox) |
7345f0d51faa
added display of paths
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
915
diff
changeset
|
279 ell.set_alpha(alpha) |
7345f0d51faa
added display of paths
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
915
diff
changeset
|
280 fig.axes[0].add_artist(ell) |
7345f0d51faa
added display of paths
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
915
diff
changeset
|
281 plt.plot([mean[0]], [mean[1]], 'x'+color) |
917
89cc05867c4c
reorg and work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
916
diff
changeset
|
282 plt.annotate(str(gmmId), xy=(mean[0]+1, mean[1]+1)) |
916
7345f0d51faa
added display of paths
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
915
diff
changeset
|
283 |
915
13434f5017dd
work to save trajectory assignment to origin and destinations
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
914
diff
changeset
|
284 def plotGMMClusters(model, labels = None, dataset = None, fig = None, colors = utils.colors, nUnitsPerPixel = 1., alpha = 0.3): |
786
1f2b2d1f4fbf
added script and code to learn POIs
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
738
diff
changeset
|
285 '''plot the ellipse corresponding to the Gaussians |
1f2b2d1f4fbf
added script and code to learn POIs
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
738
diff
changeset
|
286 and the predicted classes of the instances in the dataset''' |
787
0a428b449b80
improved script to display over world image
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
786
diff
changeset
|
287 if fig is None: |
0a428b449b80
improved script to display over world image
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
786
diff
changeset
|
288 fig = plt.figure() |
916
7345f0d51faa
added display of paths
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
915
diff
changeset
|
289 if len(fig.get_axes()) == 0: |
7345f0d51faa
added display of paths
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
915
diff
changeset
|
290 fig.add_subplot(111) |
998
933670761a57
updated code to python 3 (tests pass and scripts run, but non-executed parts of code are probably still not correct)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
993
diff
changeset
|
291 for i in range(model.n_components): |
805
180b6b0231c0
added saving/loading points of interests
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
791
diff
changeset
|
292 mean = model.means_[i]/nUnitsPerPixel |
914
f228fd649644
corrected bugs in learn-pois.py
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
913
diff
changeset
|
293 covariance = model.covariances_[i]/nUnitsPerPixel |
915
13434f5017dd
work to save trajectory assignment to origin and destinations
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
914
diff
changeset
|
294 # plot points |
786
1f2b2d1f4fbf
added script and code to learn POIs
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
738
diff
changeset
|
295 if dataset is not None: |
915
13434f5017dd
work to save trajectory assignment to origin and destinations
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
914
diff
changeset
|
296 tmpDataset = dataset/nUnitsPerPixel |
787
0a428b449b80
improved script to display over world image
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
786
diff
changeset
|
297 plt.scatter(tmpDataset[labels == i, 0], tmpDataset[labels == i, 1], .8, color=colors[i]) |
916
7345f0d51faa
added display of paths
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
915
diff
changeset
|
298 # plot an ellipse to show the Gaussian component |
7345f0d51faa
added display of paths
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
915
diff
changeset
|
299 plotGMM(mean, covariance, i, fig, colors[i], alpha) |
915
13434f5017dd
work to save trajectory assignment to origin and destinations
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
914
diff
changeset
|
300 if dataset is None: # to address issues without points, the axes limits are not redrawn |
13434f5017dd
work to save trajectory assignment to origin and destinations
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
914
diff
changeset
|
301 minima = model.means_.min(0) |
13434f5017dd
work to save trajectory assignment to origin and destinations
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
914
diff
changeset
|
302 maxima = model.means_.max(0) |
13434f5017dd
work to save trajectory assignment to origin and destinations
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
914
diff
changeset
|
303 xwidth = 0.5*(maxima[0]-minima[0]) |
13434f5017dd
work to save trajectory assignment to origin and destinations
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
914
diff
changeset
|
304 ywidth = 0.5*(maxima[1]-minima[1]) |
13434f5017dd
work to save trajectory assignment to origin and destinations
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
914
diff
changeset
|
305 plt.xlim(minima[0]-xwidth,maxima[0]+xwidth) |
13434f5017dd
work to save trajectory assignment to origin and destinations
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
914
diff
changeset
|
306 plt.ylim(minima[1]-ywidth,maxima[1]+ywidth) |
980
23f98ebb113f
first tests for clustering algo
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
978
diff
changeset
|
307 |
23f98ebb113f
first tests for clustering algo
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
978
diff
changeset
|
308 if __name__ == "__main__": |
23f98ebb113f
first tests for clustering algo
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
978
diff
changeset
|
309 import doctest |
23f98ebb113f
first tests for clustering algo
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
978
diff
changeset
|
310 import unittest |
23f98ebb113f
first tests for clustering algo
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
978
diff
changeset
|
311 suite = doctest.DocFileSuite('tests/ml.txt') |
23f98ebb113f
first tests for clustering algo
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
978
diff
changeset
|
312 unittest.TextTestRunner().run(suite) |
23f98ebb113f
first tests for clustering algo
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
978
diff
changeset
|
313 # #doctest.testmod() |
23f98ebb113f
first tests for clustering algo
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
978
diff
changeset
|
314 # #doctest.testfile("example.txt") |