annotate python/ml.py @ 398:3399bd48cb40

Ajout d'une méthode pour obtenir le nombre de FPS Méthode de capture des trames vidéos plus résistante aux erreur Utilisation d'un dictionnaire pour les fichier de configuration afin de garder le nom des sections
author Jean-Philippe Jodoin <jpjodoin@gmail.com>
date Mon, 29 Jul 2013 13:46:07 -0400
parents 8bc632cb8344
children c81cbd6953fb
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
183
ed944ff45e8c first simple clustering algorithm implementation
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
diff changeset
1 #! /usr/bin/env python
ed944ff45e8c first simple clustering algorithm implementation
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
diff changeset
2 '''Libraries for machine learning algorithms'''
ed944ff45e8c first simple clustering algorithm implementation
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
diff changeset
3
308
8bafd054cda4 Added a function to compute LCSS distance between two indcators
Mohamed Gomaa
parents: 184
diff changeset
4 import numpy as np
8bafd054cda4 Added a function to compute LCSS distance between two indcators
Mohamed Gomaa
parents: 184
diff changeset
5
183
ed944ff45e8c first simple clustering algorithm implementation
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
diff changeset
6 __metaclass__ = type
ed944ff45e8c first simple clustering algorithm implementation
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
diff changeset
7
380
adfd4f70ee1d added SVM
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 312
diff changeset
8 class Model(object):
adfd4f70ee1d added SVM
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 312
diff changeset
9 '''Abstract class for loading/saving model'''
adfd4f70ee1d added SVM
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 312
diff changeset
10 def load(self, fn):
adfd4f70ee1d added SVM
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 312
diff changeset
11 self.model.load(fn)
adfd4f70ee1d added SVM
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 312
diff changeset
12
adfd4f70ee1d added SVM
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 312
diff changeset
13 def save(self, fn):
adfd4f70ee1d added SVM
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 312
diff changeset
14 self.model.save(fn)
adfd4f70ee1d added SVM
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 312
diff changeset
15
adfd4f70ee1d added SVM
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 312
diff changeset
16 class SVM(Model):
adfd4f70ee1d added SVM
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 312
diff changeset
17 '''wrapper for OpenCV SimpleVectorMachine algorithm'''
adfd4f70ee1d added SVM
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 312
diff changeset
18
adfd4f70ee1d added SVM
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 312
diff changeset
19 def __init__(self, svm_type, kernel_type, degree = 0, gamma = 1, coef0 = 0, Cvalue = 1, nu = 0, p = 0):
adfd4f70ee1d added SVM
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 312
diff changeset
20 import cv2
adfd4f70ee1d added SVM
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 312
diff changeset
21 self.model = cv2.SVM()
adfd4f70ee1d added SVM
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 312
diff changeset
22 self.params = dict(svm_type = svm_type, kernel_type = kernel_type, degree = degree, gamma = gamma, coef0 = coef0, Cvalue = Cvalue, nu = nu, p = p)
adfd4f70ee1d added SVM
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 312
diff changeset
23
adfd4f70ee1d added SVM
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 312
diff changeset
24 def train(self, samples, responses):
adfd4f70ee1d added SVM
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 312
diff changeset
25 self.model.train(samples, responses, params = self.params)
adfd4f70ee1d added SVM
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 312
diff changeset
26
adfd4f70ee1d added SVM
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 312
diff changeset
27 def predict(self, sample):
adfd4f70ee1d added SVM
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 312
diff changeset
28 return np.float32(self.model.predict(s))
adfd4f70ee1d added SVM
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 312
diff changeset
29
adfd4f70ee1d added SVM
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 312
diff changeset
30
184
d70e9b36889c initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 183
diff changeset
31 class Centroid:
d70e9b36889c initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 183
diff changeset
32 'Wrapper around instances to add a counter'
d70e9b36889c initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 183
diff changeset
33
d70e9b36889c initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 183
diff changeset
34 def __init__(self, instance, nInstances = 1):
d70e9b36889c initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 183
diff changeset
35 self.instance = instance
d70e9b36889c initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 183
diff changeset
36 self.nInstances = nInstances
d70e9b36889c initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 183
diff changeset
37
d70e9b36889c initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 183
diff changeset
38 # def similar(instance2):
d70e9b36889c initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 183
diff changeset
39 # return self.instance.similar(instance2)
d70e9b36889c initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 183
diff changeset
40
d70e9b36889c initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 183
diff changeset
41 def add(self, instance2):
d70e9b36889c initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 183
diff changeset
42 self.instance = self.instance.multiply(self.nInstances)+instance2
d70e9b36889c initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 183
diff changeset
43 self.nInstances += 1
d70e9b36889c initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 183
diff changeset
44 self.instance = self.instance.multiply(1/float(self.nInstances))
d70e9b36889c initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 183
diff changeset
45
d70e9b36889c initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 183
diff changeset
46 def average(c):
d70e9b36889c initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 183
diff changeset
47 inst = self.instance.multiply(self.nInstances)+c.instance.multiply(instance.nInstances)
d70e9b36889c initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 183
diff changeset
48 inst.multiply(1/(self.nInstances+instance.nInstances))
d70e9b36889c initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 183
diff changeset
49 return Centroid(inst, self.nInstances+instance.nInstances)
d70e9b36889c initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 183
diff changeset
50
d70e9b36889c initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 183
diff changeset
51 def draw(self, options = ''):
d70e9b36889c initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 183
diff changeset
52 from matplotlib.pylab import text
d70e9b36889c initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 183
diff changeset
53 self.instance.draw(options)
d70e9b36889c initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 183
diff changeset
54 text(self.instance.position.x+1, self.instance.position.y+1, str(self.nInstances))
d70e9b36889c initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 183
diff changeset
55
386
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 382
diff changeset
56 def kMedoids(similarityMatrix, initialCentroids = None, k = None):
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 382
diff changeset
57 '''Algorithm that clusters any dataset based on a similarity matrix
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 382
diff changeset
58 Either the initialCentroids or k are passed'''
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 382
diff changeset
59 pass
184
d70e9b36889c initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 183
diff changeset
60
382
ba813f148ade development for clustering
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 380
diff changeset
61 def assignCluster(data, similarFunc, initialCentroids = [], shuffleData = True):
183
ed944ff45e8c first simple clustering algorithm implementation
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
diff changeset
62 '''k-means algorithm with similarity function
184
d70e9b36889c initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 183
diff changeset
63 Two instances should be in the same cluster if the sameCluster function returns true for two instances. It is supposed that the average centroid of a set of instances can be computed, using the function.
183
ed944ff45e8c first simple clustering algorithm implementation
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
diff changeset
64 The number of clusters will be determined accordingly
ed944ff45e8c first simple clustering algorithm implementation
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
diff changeset
65
ed944ff45e8c first simple clustering algorithm implementation
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
diff changeset
66 data: list of instances
184
d70e9b36889c initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 183
diff changeset
67 averageCentroid: '''
183
ed944ff45e8c first simple clustering algorithm implementation
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
diff changeset
68
184
d70e9b36889c initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 183
diff changeset
69 from random import shuffle
d70e9b36889c initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 183
diff changeset
70 from copy import copy, deepcopy
d70e9b36889c initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 183
diff changeset
71 localdata = copy(data) # shallow copy to avoid modifying data
382
ba813f148ade development for clustering
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 380
diff changeset
72 if shuffleData:
ba813f148ade development for clustering
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 380
diff changeset
73 shuffle(localdata)
184
d70e9b36889c initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 183
diff changeset
74 if initialCentroids:
d70e9b36889c initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 183
diff changeset
75 centroids = deepcopy(initialCentroids)
d70e9b36889c initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 183
diff changeset
76 else:
d70e9b36889c initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 183
diff changeset
77 centroids = [Centroid(localdata[0])]
d70e9b36889c initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 183
diff changeset
78 for instance in localdata[1:]:
183
ed944ff45e8c first simple clustering algorithm implementation
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
diff changeset
79 i = 0
382
ba813f148ade development for clustering
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 380
diff changeset
80 while i<len(centroids) and not similarFunc(centroids[i].instance, instance):
183
ed944ff45e8c first simple clustering algorithm implementation
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
diff changeset
81 i += 1
ed944ff45e8c first simple clustering algorithm implementation
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
diff changeset
82 if i == len(centroids):
184
d70e9b36889c initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 183
diff changeset
83 centroids.append(Centroid(instance))
183
ed944ff45e8c first simple clustering algorithm implementation
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
diff changeset
84 else:
184
d70e9b36889c initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 183
diff changeset
85 centroids[i].add(instance)
183
ed944ff45e8c first simple clustering algorithm implementation
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
diff changeset
86
ed944ff45e8c first simple clustering algorithm implementation
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
diff changeset
87 return centroids
308
8bafd054cda4 Added a function to compute LCSS distance between two indcators
Mohamed Gomaa
parents: 184
diff changeset
88
382
ba813f148ade development for clustering
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 380
diff changeset
89 # TODO recompute centroids for each cluster: instance that minimizes some measure to all other elements
ba813f148ade development for clustering
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 380
diff changeset
90
293
ee3302528cdc rearranged new code by Paul (works now)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 285
diff changeset
91 def spectralClustering(similarityMatrix, k, iter=20):
285
5957aa1d69e1 Integrating Mohamed's changes
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 184
diff changeset
92 '''Spectral Clustering algorithm'''
5957aa1d69e1 Integrating Mohamed's changes
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 184
diff changeset
93 n = len(similarityMatrix)
308
8bafd054cda4 Added a function to compute LCSS distance between two indcators
Mohamed Gomaa
parents: 184
diff changeset
94 # create Laplacian matrix
8bafd054cda4 Added a function to compute LCSS distance between two indcators
Mohamed Gomaa
parents: 184
diff changeset
95 rowsum = np.sum(similarityMatrix,axis=0)
8bafd054cda4 Added a function to compute LCSS distance between two indcators
Mohamed Gomaa
parents: 184
diff changeset
96 D = np.diag(1 / np.sqrt(rowsum))
8bafd054cda4 Added a function to compute LCSS distance between two indcators
Mohamed Gomaa
parents: 184
diff changeset
97 I = np.identity(n)
8bafd054cda4 Added a function to compute LCSS distance between two indcators
Mohamed Gomaa
parents: 184
diff changeset
98 L = I - np.dot(D,np.dot(similarityMatrix,D))
8bafd054cda4 Added a function to compute LCSS distance between two indcators
Mohamed Gomaa
parents: 184
diff changeset
99 # compute eigenvectors of L
8bafd054cda4 Added a function to compute LCSS distance between two indcators
Mohamed Gomaa
parents: 184
diff changeset
100 U,sigma,V = np.linalg.svd(L)
8bafd054cda4 Added a function to compute LCSS distance between two indcators
Mohamed Gomaa
parents: 184
diff changeset
101 # create feature vector from k first eigenvectors
8bafd054cda4 Added a function to compute LCSS distance between two indcators
Mohamed Gomaa
parents: 184
diff changeset
102 # by stacking eigenvectors as columns
8bafd054cda4 Added a function to compute LCSS distance between two indcators
Mohamed Gomaa
parents: 184
diff changeset
103 features = np.array(V[:k]).T
8bafd054cda4 Added a function to compute LCSS distance between two indcators
Mohamed Gomaa
parents: 184
diff changeset
104 # k-means
8bafd054cda4 Added a function to compute LCSS distance between two indcators
Mohamed Gomaa
parents: 184
diff changeset
105 from scipy.cluster.vq import kmeans, whiten, vq
8bafd054cda4 Added a function to compute LCSS distance between two indcators
Mohamed Gomaa
parents: 184
diff changeset
106 features = whiten(features)
293
ee3302528cdc rearranged new code by Paul (works now)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 285
diff changeset
107 centroids,distortion = kmeans(features,k, iter)
308
8bafd054cda4 Added a function to compute LCSS distance between two indcators
Mohamed Gomaa
parents: 184
diff changeset
108 code,distance = vq(features,centroids) # code starting from 0 (represent first cluster) to k-1 (last cluster)
309
80cbafd69109 Added spectral clustering function
Mohamed Gomaa
parents: 308
diff changeset
109 return code,sigma