comparison python/ml.py @ 614:5e09583275a4

Merged Nicolas/trafficintelligence into default
author Mohamed Gomaa <eng.m.gom3a@gmail.com>
date Fri, 05 Dec 2014 12:13:53 -0500
parents 39de5c532559
children 3058e00887bc
comparison
equal deleted inserted replaced
598:11f96bd08552 614:5e09583275a4
2 '''Libraries for machine learning algorithms''' 2 '''Libraries for machine learning algorithms'''
3 3
4 import numpy as np 4 import numpy as np
5 5
6 __metaclass__ = type 6 __metaclass__ = type
7
8 class Model(object):
9 '''Abstract class for loading/saving model'''
10 def load(self, fn):
11 self.model.load(fn)
12
13 def save(self, fn):
14 self.model.save(fn)
15
16 class SVM(Model):
17 '''wrapper for OpenCV SimpleVectorMachine algorithm'''
18
19 def __init__(self, svm_type, kernel_type, degree = 0, gamma = 1, coef0 = 0, Cvalue = 1, nu = 0, p = 0):
20 import cv2
21 self.model = cv2.SVM()
22 self.params = dict(svm_type = svm_type, kernel_type = kernel_type, degree = degree, gamma = gamma, coef0 = coef0, Cvalue = Cvalue, nu = nu, p = p)
23
24 def train(self, samples, responses):
25 self.model.train(samples, responses, params = self.params)
26
27 def predict(self, samples):
28 return np.float32([self.model.predict(s) for s in samples])
29
7 30
8 class Centroid: 31 class Centroid:
9 'Wrapper around instances to add a counter' 32 'Wrapper around instances to add a counter'
10 33
11 def __init__(self, instance, nInstances = 1): 34 def __init__(self, instance, nInstances = 1):
23 def average(c): 46 def average(c):
24 inst = self.instance.multiply(self.nInstances)+c.instance.multiply(instance.nInstances) 47 inst = self.instance.multiply(self.nInstances)+c.instance.multiply(instance.nInstances)
25 inst.multiply(1/(self.nInstances+instance.nInstances)) 48 inst.multiply(1/(self.nInstances+instance.nInstances))
26 return Centroid(inst, self.nInstances+instance.nInstances) 49 return Centroid(inst, self.nInstances+instance.nInstances)
27 50
28 def draw(self, options = ''): 51 def plot(self, options = ''):
29 from matplotlib.pylab import text 52 from matplotlib.pylab import text
30 self.instance.draw(options) 53 self.instance.plot(options)
31 text(self.instance.position.x+1, self.instance.position.y+1, str(self.nInstances)) 54 text(self.instance.position.x+1, self.instance.position.y+1, str(self.nInstances))
32 55
56 def kMedoids(similarityMatrix, initialCentroids = None, k = None):
57 '''Algorithm that clusters any dataset based on a similarity matrix
58 Either the initialCentroids or k are passed'''
59 pass
33 60
34 def clustering(data, similar, initialCentroids = []): 61 def assignCluster(data, similarFunc, initialCentroids = None, shuffleData = True):
35 '''k-means algorithm with similarity function 62 '''k-means algorithm with similarity function
36 Two instances should be in the same cluster if the sameCluster function returns true for two instances. It is supposed that the average centroid of a set of instances can be computed, using the function. 63 Two instances should be in the same cluster if the sameCluster function returns true for two instances. It is supposed that the average centroid of a set of instances can be computed, using the function.
37 The number of clusters will be determined accordingly 64 The number of clusters will be determined accordingly
38 65
39 data: list of instances 66 data: list of instances
40 averageCentroid: ''' 67 averageCentroid: '''
41 68
42 from random import shuffle 69 from random import shuffle
43 from copy import copy, deepcopy 70 from copy import copy, deepcopy
44 localdata = copy(data) # shallow copy to avoid modifying data 71 localdata = copy(data) # shallow copy to avoid modifying data
45 shuffle(localdata) 72 if shuffleData:
46 if initialCentroids: 73 shuffle(localdata)
74 if initialCentroids == None:
75 centroids = [Centroid(localdata[0])]
76 else:
47 centroids = deepcopy(initialCentroids) 77 centroids = deepcopy(initialCentroids)
48 else:
49 centroids = [Centroid(localdata[0])]
50 for instance in localdata[1:]: 78 for instance in localdata[1:]:
51 i = 0 79 i = 0
52 while i<len(centroids) and not similar(centroids[i].instance, instance): 80 while i<len(centroids) and not similarFunc(centroids[i].instance, instance):
53 i += 1 81 i += 1
54 if i == len(centroids): 82 if i == len(centroids):
55 centroids.append(Centroid(instance)) 83 centroids.append(Centroid(instance))
56 else: 84 else:
57 centroids[i].add(instance) 85 centroids[i].add(instance)
58 86
59 return centroids 87 return centroids
88
89 # TODO recompute centroids for each cluster: instance that minimizes some measure to all other elements
60 90
61 def spectralClustering(similarityMatrix, k, iter=20): 91 def spectralClustering(similarityMatrix, k, iter=20):
62 '''Spectral Clustering algorithm''' 92 '''Spectral Clustering algorithm'''
63 n = len(similarityMatrix) 93 n = len(similarityMatrix)
64 # create Laplacian matrix 94 # create Laplacian matrix
75 from scipy.cluster.vq import kmeans, whiten, vq 105 from scipy.cluster.vq import kmeans, whiten, vq
76 features = whiten(features) 106 features = whiten(features)
77 centroids,distortion = kmeans(features,k, iter) 107 centroids,distortion = kmeans(features,k, iter)
78 code,distance = vq(features,centroids) # code starting from 0 (represent first cluster) to k-1 (last cluster) 108 code,distance = vq(features,centroids) # code starting from 0 (represent first cluster) to k-1 (last cluster)
79 return code,sigma 109 return code,sigma
110
111 def motionPatterLearning(objects, maxDistance):
112 '''
113 Option to use only the (n?) longest features per object instead of all for speed up
114 TODO'''
115 pass
116
117 def prototypeCluster():
118 '''
119 TODO'''
120 pass