Mercurial Hosting > traffic-intelligence
diff python/ml.py @ 614:5e09583275a4
Merged Nicolas/trafficintelligence into default
author | Mohamed Gomaa <eng.m.gom3a@gmail.com> |
---|---|
date | Fri, 05 Dec 2014 12:13:53 -0500 |
parents | 39de5c532559 |
children | 3058e00887bc |
line wrap: on
line diff
--- a/python/ml.py Thu Apr 18 15:29:33 2013 -0400 +++ b/python/ml.py Fri Dec 05 12:13:53 2014 -0500 @@ -5,6 +5,29 @@ __metaclass__ = type +class Model(object): + '''Abstract class for loading/saving model''' + def load(self, fn): + self.model.load(fn) + + def save(self, fn): + self.model.save(fn) + +class SVM(Model): + '''wrapper for OpenCV SimpleVectorMachine algorithm''' + + def __init__(self, svm_type, kernel_type, degree = 0, gamma = 1, coef0 = 0, Cvalue = 1, nu = 0, p = 0): + import cv2 + self.model = cv2.SVM() + self.params = dict(svm_type = svm_type, kernel_type = kernel_type, degree = degree, gamma = gamma, coef0 = coef0, Cvalue = Cvalue, nu = nu, p = p) + + def train(self, samples, responses): + self.model.train(samples, responses, params = self.params) + + def predict(self, samples): + return np.float32([self.model.predict(s) for s in samples]) + + class Centroid: 'Wrapper around instances to add a counter' @@ -25,13 +48,17 @@ inst.multiply(1/(self.nInstances+instance.nInstances)) return Centroid(inst, self.nInstances+instance.nInstances) - def draw(self, options = ''): + def plot(self, options = ''): from matplotlib.pylab import text - self.instance.draw(options) + self.instance.plot(options) text(self.instance.position.x+1, self.instance.position.y+1, str(self.nInstances)) +def kMedoids(similarityMatrix, initialCentroids = None, k = None): + '''Algorithm that clusters any dataset based on a similarity matrix + Either the initialCentroids or k are passed''' + pass -def clustering(data, similar, initialCentroids = []): +def assignCluster(data, similarFunc, initialCentroids = None, shuffleData = True): '''k-means algorithm with similarity function Two instances should be in the same cluster if the sameCluster function returns true for two instances. It is supposed that the average centroid of a set of instances can be computed, using the function. The number of clusters will be determined accordingly @@ -42,14 +69,15 @@ from random import shuffle from copy import copy, deepcopy localdata = copy(data) # shallow copy to avoid modifying data - shuffle(localdata) - if initialCentroids: + if shuffleData: + shuffle(localdata) + if initialCentroids == None: + centroids = [Centroid(localdata[0])] + else: centroids = deepcopy(initialCentroids) - else: - centroids = [Centroid(localdata[0])] for instance in localdata[1:]: i = 0 - while i<len(centroids) and not similar(centroids[i].instance, instance): + while i<len(centroids) and not similarFunc(centroids[i].instance, instance): i += 1 if i == len(centroids): centroids.append(Centroid(instance)) @@ -58,6 +86,8 @@ return centroids +# TODO recompute centroids for each cluster: instance that minimizes some measure to all other elements + def spectralClustering(similarityMatrix, k, iter=20): '''Spectral Clustering algorithm''' n = len(similarityMatrix) @@ -77,3 +107,14 @@ centroids,distortion = kmeans(features,k, iter) code,distance = vq(features,centroids) # code starting from 0 (represent first cluster) to k-1 (last cluster) return code,sigma + +def motionPatterLearning(objects, maxDistance): + ''' + Option to use only the (n?) longest features per object instead of all for speed up + TODO''' + pass + +def prototypeCluster(): + ''' + TODO''' + pass