annotate python/ml.py @ 680:da1352b89d02 dev

classification is working
author Nicolas Saunier <nicolas.saunier@polymtl.ca>
date Fri, 05 Jun 2015 02:25:30 +0200
parents 15e244d2a1b5
children b02431a8234c
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
183
ed944ff45e8c first simple clustering algorithm implementation
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
diff changeset
1 #! /usr/bin/env python
ed944ff45e8c first simple clustering algorithm implementation
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
diff changeset
2 '''Libraries for machine learning algorithms'''
ed944ff45e8c first simple clustering algorithm implementation
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
diff changeset
3
308
8bafd054cda4 Added a function to compute LCSS distance between two indcators
Mohamed Gomaa
parents: 184
diff changeset
4 import numpy as np
8bafd054cda4 Added a function to compute LCSS distance between two indcators
Mohamed Gomaa
parents: 184
diff changeset
5
183
ed944ff45e8c first simple clustering algorithm implementation
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
diff changeset
6
380
adfd4f70ee1d added SVM
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 312
diff changeset
7 class Model(object):
adfd4f70ee1d added SVM
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 312
diff changeset
8 '''Abstract class for loading/saving model'''
680
da1352b89d02 classification is working
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
9 def load(self, filename):
da1352b89d02 classification is working
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
10 from os import path
da1352b89d02 classification is working
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
11 if path.exists(filename):
da1352b89d02 classification is working
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
12 self.model.load(filename)
da1352b89d02 classification is working
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
13 else:
da1352b89d02 classification is working
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
14 print('Provided filename {} does not exist: model not loaded!'.format(filename))
380
adfd4f70ee1d added SVM
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 312
diff changeset
15
680
da1352b89d02 classification is working
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
16 def save(self, filename):
da1352b89d02 classification is working
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
17 self.model.save(filename)
380
adfd4f70ee1d added SVM
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 312
diff changeset
18
adfd4f70ee1d added SVM
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 312
diff changeset
19 class SVM(Model):
adfd4f70ee1d added SVM
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 312
diff changeset
20 '''wrapper for OpenCV SimpleVectorMachine algorithm'''
adfd4f70ee1d added SVM
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 312
diff changeset
21
680
da1352b89d02 classification is working
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
22 def __init__(self):
380
adfd4f70ee1d added SVM
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 312
diff changeset
23 import cv2
adfd4f70ee1d added SVM
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 312
diff changeset
24 self.model = cv2.SVM()
680
da1352b89d02 classification is working
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
25
da1352b89d02 classification is working
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
26 def train(self, samples, responses, svm_type, kernel_type, degree = 0, gamma = 1, coef0 = 0, Cvalue = 1, nu = 0, p = 0):
380
adfd4f70ee1d added SVM
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 312
diff changeset
27 self.params = dict(svm_type = svm_type, kernel_type = kernel_type, degree = degree, gamma = gamma, coef0 = coef0, Cvalue = Cvalue, nu = nu, p = p)
adfd4f70ee1d added SVM
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 312
diff changeset
28 self.model.train(samples, responses, params = self.params)
adfd4f70ee1d added SVM
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 312
diff changeset
29
680
da1352b89d02 classification is working
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
30 def predict(self, hog):
da1352b89d02 classification is working
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
31 return self.model.predict(hog)
380
adfd4f70ee1d added SVM
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 312
diff changeset
32
adfd4f70ee1d added SVM
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 312
diff changeset
33
665
15e244d2a1b5 corrected bug with circular import for VideoFilenameAddable, moved to base module
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 636
diff changeset
34 class Centroid(object):
184
d70e9b36889c initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 183
diff changeset
35 'Wrapper around instances to add a counter'
d70e9b36889c initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 183
diff changeset
36
d70e9b36889c initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 183
diff changeset
37 def __init__(self, instance, nInstances = 1):
d70e9b36889c initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 183
diff changeset
38 self.instance = instance
d70e9b36889c initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 183
diff changeset
39 self.nInstances = nInstances
d70e9b36889c initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 183
diff changeset
40
d70e9b36889c initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 183
diff changeset
41 # def similar(instance2):
d70e9b36889c initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 183
diff changeset
42 # return self.instance.similar(instance2)
d70e9b36889c initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 183
diff changeset
43
d70e9b36889c initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 183
diff changeset
44 def add(self, instance2):
d70e9b36889c initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 183
diff changeset
45 self.instance = self.instance.multiply(self.nInstances)+instance2
d70e9b36889c initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 183
diff changeset
46 self.nInstances += 1
d70e9b36889c initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 183
diff changeset
47 self.instance = self.instance.multiply(1/float(self.nInstances))
d70e9b36889c initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 183
diff changeset
48
d70e9b36889c initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 183
diff changeset
49 def average(c):
d70e9b36889c initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 183
diff changeset
50 inst = self.instance.multiply(self.nInstances)+c.instance.multiply(instance.nInstances)
d70e9b36889c initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 183
diff changeset
51 inst.multiply(1/(self.nInstances+instance.nInstances))
d70e9b36889c initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 183
diff changeset
52 return Centroid(inst, self.nInstances+instance.nInstances)
d70e9b36889c initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 183
diff changeset
53
515
727e3c529519 renamed all draw functions to plot for consistency
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 501
diff changeset
54 def plot(self, options = ''):
184
d70e9b36889c initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 183
diff changeset
55 from matplotlib.pylab import text
515
727e3c529519 renamed all draw functions to plot for consistency
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 501
diff changeset
56 self.instance.plot(options)
184
d70e9b36889c initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 183
diff changeset
57 text(self.instance.position.x+1, self.instance.position.y+1, str(self.nInstances))
d70e9b36889c initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 183
diff changeset
58
386
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 382
diff changeset
59 def kMedoids(similarityMatrix, initialCentroids = None, k = None):
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 382
diff changeset
60 '''Algorithm that clusters any dataset based on a similarity matrix
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 382
diff changeset
61 Either the initialCentroids or k are passed'''
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 382
diff changeset
62 pass
184
d70e9b36889c initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 183
diff changeset
63
526
21bdeb29f855 corrected bug in initialization of lists and loading trajectories from vissim files
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 515
diff changeset
64 def assignCluster(data, similarFunc, initialCentroids = None, shuffleData = True):
183
ed944ff45e8c first simple clustering algorithm implementation
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
diff changeset
65 '''k-means algorithm with similarity function
184
d70e9b36889c initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 183
diff changeset
66 Two instances should be in the same cluster if the sameCluster function returns true for two instances. It is supposed that the average centroid of a set of instances can be computed, using the function.
183
ed944ff45e8c first simple clustering algorithm implementation
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
diff changeset
67 The number of clusters will be determined accordingly
ed944ff45e8c first simple clustering algorithm implementation
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
diff changeset
68
ed944ff45e8c first simple clustering algorithm implementation
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
diff changeset
69 data: list of instances
184
d70e9b36889c initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 183
diff changeset
70 averageCentroid: '''
183
ed944ff45e8c first simple clustering algorithm implementation
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
diff changeset
71
184
d70e9b36889c initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 183
diff changeset
72 from random import shuffle
d70e9b36889c initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 183
diff changeset
73 from copy import copy, deepcopy
d70e9b36889c initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 183
diff changeset
74 localdata = copy(data) # shallow copy to avoid modifying data
382
ba813f148ade development for clustering
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 380
diff changeset
75 if shuffleData:
ba813f148ade development for clustering
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 380
diff changeset
76 shuffle(localdata)
636
3058e00887bc removed all issues because of tests with None, using is instead of == or !=
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 563
diff changeset
77 if initialCentroids is None:
526
21bdeb29f855 corrected bug in initialization of lists and loading trajectories from vissim files
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 515
diff changeset
78 centroids = [Centroid(localdata[0])]
21bdeb29f855 corrected bug in initialization of lists and loading trajectories from vissim files
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 515
diff changeset
79 else:
184
d70e9b36889c initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 183
diff changeset
80 centroids = deepcopy(initialCentroids)
d70e9b36889c initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 183
diff changeset
81 for instance in localdata[1:]:
183
ed944ff45e8c first simple clustering algorithm implementation
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
diff changeset
82 i = 0
382
ba813f148ade development for clustering
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 380
diff changeset
83 while i<len(centroids) and not similarFunc(centroids[i].instance, instance):
183
ed944ff45e8c first simple clustering algorithm implementation
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
diff changeset
84 i += 1
ed944ff45e8c first simple clustering algorithm implementation
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
diff changeset
85 if i == len(centroids):
184
d70e9b36889c initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 183
diff changeset
86 centroids.append(Centroid(instance))
183
ed944ff45e8c first simple clustering algorithm implementation
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
diff changeset
87 else:
184
d70e9b36889c initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 183
diff changeset
88 centroids[i].add(instance)
183
ed944ff45e8c first simple clustering algorithm implementation
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
diff changeset
89
ed944ff45e8c first simple clustering algorithm implementation
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
diff changeset
90 return centroids
308
8bafd054cda4 Added a function to compute LCSS distance between two indcators
Mohamed Gomaa
parents: 184
diff changeset
91
382
ba813f148ade development for clustering
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 380
diff changeset
92 # TODO recompute centroids for each cluster: instance that minimizes some measure to all other elements
ba813f148ade development for clustering
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 380
diff changeset
93
293
ee3302528cdc rearranged new code by Paul (works now)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 285
diff changeset
94 def spectralClustering(similarityMatrix, k, iter=20):
285
5957aa1d69e1 Integrating Mohamed's changes
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 184
diff changeset
95 '''Spectral Clustering algorithm'''
5957aa1d69e1 Integrating Mohamed's changes
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 184
diff changeset
96 n = len(similarityMatrix)
308
8bafd054cda4 Added a function to compute LCSS distance between two indcators
Mohamed Gomaa
parents: 184
diff changeset
97 # create Laplacian matrix
8bafd054cda4 Added a function to compute LCSS distance between two indcators
Mohamed Gomaa
parents: 184
diff changeset
98 rowsum = np.sum(similarityMatrix,axis=0)
8bafd054cda4 Added a function to compute LCSS distance between two indcators
Mohamed Gomaa
parents: 184
diff changeset
99 D = np.diag(1 / np.sqrt(rowsum))
8bafd054cda4 Added a function to compute LCSS distance between two indcators
Mohamed Gomaa
parents: 184
diff changeset
100 I = np.identity(n)
8bafd054cda4 Added a function to compute LCSS distance between two indcators
Mohamed Gomaa
parents: 184
diff changeset
101 L = I - np.dot(D,np.dot(similarityMatrix,D))
8bafd054cda4 Added a function to compute LCSS distance between two indcators
Mohamed Gomaa
parents: 184
diff changeset
102 # compute eigenvectors of L
8bafd054cda4 Added a function to compute LCSS distance between two indcators
Mohamed Gomaa
parents: 184
diff changeset
103 U,sigma,V = np.linalg.svd(L)
8bafd054cda4 Added a function to compute LCSS distance between two indcators
Mohamed Gomaa
parents: 184
diff changeset
104 # create feature vector from k first eigenvectors
8bafd054cda4 Added a function to compute LCSS distance between two indcators
Mohamed Gomaa
parents: 184
diff changeset
105 # by stacking eigenvectors as columns
8bafd054cda4 Added a function to compute LCSS distance between two indcators
Mohamed Gomaa
parents: 184
diff changeset
106 features = np.array(V[:k]).T
8bafd054cda4 Added a function to compute LCSS distance between two indcators
Mohamed Gomaa
parents: 184
diff changeset
107 # k-means
8bafd054cda4 Added a function to compute LCSS distance between two indcators
Mohamed Gomaa
parents: 184
diff changeset
108 from scipy.cluster.vq import kmeans, whiten, vq
8bafd054cda4 Added a function to compute LCSS distance between two indcators
Mohamed Gomaa
parents: 184
diff changeset
109 features = whiten(features)
293
ee3302528cdc rearranged new code by Paul (works now)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 285
diff changeset
110 centroids,distortion = kmeans(features,k, iter)
308
8bafd054cda4 Added a function to compute LCSS distance between two indcators
Mohamed Gomaa
parents: 184
diff changeset
111 code,distance = vq(features,centroids) # code starting from 0 (represent first cluster) to k-1 (last cluster)
309
80cbafd69109 Added spectral clustering function
Mohamed Gomaa
parents: 308
diff changeset
112 return code,sigma
563
39de5c532559 place holder functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 526
diff changeset
113
39de5c532559 place holder functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 526
diff changeset
114 def motionPatterLearning(objects, maxDistance):
39de5c532559 place holder functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 526
diff changeset
115 '''
39de5c532559 place holder functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 526
diff changeset
116 Option to use only the (n?) longest features per object instead of all for speed up
39de5c532559 place holder functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 526
diff changeset
117 TODO'''
39de5c532559 place holder functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 526
diff changeset
118 pass
39de5c532559 place holder functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 526
diff changeset
119
39de5c532559 place holder functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 526
diff changeset
120 def prototypeCluster():
39de5c532559 place holder functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 526
diff changeset
121 '''
39de5c532559 place holder functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 526
diff changeset
122 TODO'''
39de5c532559 place holder functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 526
diff changeset
123 pass