Mercurial Hosting > traffic-intelligence
comparison python/ml.py @ 614:5e09583275a4
Merged Nicolas/trafficintelligence into default
author | Mohamed Gomaa <eng.m.gom3a@gmail.com> |
---|---|
date | Fri, 05 Dec 2014 12:13:53 -0500 |
parents | 39de5c532559 |
children | 3058e00887bc |
comparison
equal
deleted
inserted
replaced
598:11f96bd08552 | 614:5e09583275a4 |
---|---|
2 '''Libraries for machine learning algorithms''' | 2 '''Libraries for machine learning algorithms''' |
3 | 3 |
4 import numpy as np | 4 import numpy as np |
5 | 5 |
6 __metaclass__ = type | 6 __metaclass__ = type |
7 | |
8 class Model(object): | |
9 '''Abstract class for loading/saving model''' | |
10 def load(self, fn): | |
11 self.model.load(fn) | |
12 | |
13 def save(self, fn): | |
14 self.model.save(fn) | |
15 | |
16 class SVM(Model): | |
17 '''wrapper for OpenCV SimpleVectorMachine algorithm''' | |
18 | |
19 def __init__(self, svm_type, kernel_type, degree = 0, gamma = 1, coef0 = 0, Cvalue = 1, nu = 0, p = 0): | |
20 import cv2 | |
21 self.model = cv2.SVM() | |
22 self.params = dict(svm_type = svm_type, kernel_type = kernel_type, degree = degree, gamma = gamma, coef0 = coef0, Cvalue = Cvalue, nu = nu, p = p) | |
23 | |
24 def train(self, samples, responses): | |
25 self.model.train(samples, responses, params = self.params) | |
26 | |
27 def predict(self, samples): | |
28 return np.float32([self.model.predict(s) for s in samples]) | |
29 | |
7 | 30 |
8 class Centroid: | 31 class Centroid: |
9 'Wrapper around instances to add a counter' | 32 'Wrapper around instances to add a counter' |
10 | 33 |
11 def __init__(self, instance, nInstances = 1): | 34 def __init__(self, instance, nInstances = 1): |
23 def average(c): | 46 def average(c): |
24 inst = self.instance.multiply(self.nInstances)+c.instance.multiply(instance.nInstances) | 47 inst = self.instance.multiply(self.nInstances)+c.instance.multiply(instance.nInstances) |
25 inst.multiply(1/(self.nInstances+instance.nInstances)) | 48 inst.multiply(1/(self.nInstances+instance.nInstances)) |
26 return Centroid(inst, self.nInstances+instance.nInstances) | 49 return Centroid(inst, self.nInstances+instance.nInstances) |
27 | 50 |
28 def draw(self, options = ''): | 51 def plot(self, options = ''): |
29 from matplotlib.pylab import text | 52 from matplotlib.pylab import text |
30 self.instance.draw(options) | 53 self.instance.plot(options) |
31 text(self.instance.position.x+1, self.instance.position.y+1, str(self.nInstances)) | 54 text(self.instance.position.x+1, self.instance.position.y+1, str(self.nInstances)) |
32 | 55 |
56 def kMedoids(similarityMatrix, initialCentroids = None, k = None): | |
57 '''Algorithm that clusters any dataset based on a similarity matrix | |
58 Either the initialCentroids or k are passed''' | |
59 pass | |
33 | 60 |
34 def clustering(data, similar, initialCentroids = []): | 61 def assignCluster(data, similarFunc, initialCentroids = None, shuffleData = True): |
35 '''k-means algorithm with similarity function | 62 '''k-means algorithm with similarity function |
36 Two instances should be in the same cluster if the sameCluster function returns true for two instances. It is supposed that the average centroid of a set of instances can be computed, using the function. | 63 Two instances should be in the same cluster if the sameCluster function returns true for two instances. It is supposed that the average centroid of a set of instances can be computed, using the function. |
37 The number of clusters will be determined accordingly | 64 The number of clusters will be determined accordingly |
38 | 65 |
39 data: list of instances | 66 data: list of instances |
40 averageCentroid: ''' | 67 averageCentroid: ''' |
41 | 68 |
42 from random import shuffle | 69 from random import shuffle |
43 from copy import copy, deepcopy | 70 from copy import copy, deepcopy |
44 localdata = copy(data) # shallow copy to avoid modifying data | 71 localdata = copy(data) # shallow copy to avoid modifying data |
45 shuffle(localdata) | 72 if shuffleData: |
46 if initialCentroids: | 73 shuffle(localdata) |
74 if initialCentroids == None: | |
75 centroids = [Centroid(localdata[0])] | |
76 else: | |
47 centroids = deepcopy(initialCentroids) | 77 centroids = deepcopy(initialCentroids) |
48 else: | |
49 centroids = [Centroid(localdata[0])] | |
50 for instance in localdata[1:]: | 78 for instance in localdata[1:]: |
51 i = 0 | 79 i = 0 |
52 while i<len(centroids) and not similar(centroids[i].instance, instance): | 80 while i<len(centroids) and not similarFunc(centroids[i].instance, instance): |
53 i += 1 | 81 i += 1 |
54 if i == len(centroids): | 82 if i == len(centroids): |
55 centroids.append(Centroid(instance)) | 83 centroids.append(Centroid(instance)) |
56 else: | 84 else: |
57 centroids[i].add(instance) | 85 centroids[i].add(instance) |
58 | 86 |
59 return centroids | 87 return centroids |
88 | |
89 # TODO recompute centroids for each cluster: instance that minimizes some measure to all other elements | |
60 | 90 |
61 def spectralClustering(similarityMatrix, k, iter=20): | 91 def spectralClustering(similarityMatrix, k, iter=20): |
62 '''Spectral Clustering algorithm''' | 92 '''Spectral Clustering algorithm''' |
63 n = len(similarityMatrix) | 93 n = len(similarityMatrix) |
64 # create Laplacian matrix | 94 # create Laplacian matrix |
75 from scipy.cluster.vq import kmeans, whiten, vq | 105 from scipy.cluster.vq import kmeans, whiten, vq |
76 features = whiten(features) | 106 features = whiten(features) |
77 centroids,distortion = kmeans(features,k, iter) | 107 centroids,distortion = kmeans(features,k, iter) |
78 code,distance = vq(features,centroids) # code starting from 0 (represent first cluster) to k-1 (last cluster) | 108 code,distance = vq(features,centroids) # code starting from 0 (represent first cluster) to k-1 (last cluster) |
79 return code,sigma | 109 return code,sigma |
110 | |
111 def motionPatterLearning(objects, maxDistance): | |
112 ''' | |
113 Option to use only the (n?) longest features per object instead of all for speed up | |
114 TODO''' | |
115 pass | |
116 | |
117 def prototypeCluster(): | |
118 ''' | |
119 TODO''' | |
120 pass |