diff python/ml.py @ 614:5e09583275a4

Merged Nicolas/trafficintelligence into default
author Mohamed Gomaa <eng.m.gom3a@gmail.com>
date Fri, 05 Dec 2014 12:13:53 -0500
parents 39de5c532559
children 3058e00887bc
line wrap: on
line diff
--- a/python/ml.py	Thu Apr 18 15:29:33 2013 -0400
+++ b/python/ml.py	Fri Dec 05 12:13:53 2014 -0500
@@ -5,6 +5,29 @@
 
 __metaclass__ = type
 
+class Model(object):
+    '''Abstract class for loading/saving model'''    
+    def load(self, fn):
+        self.model.load(fn)
+
+    def save(self, fn):
+        self.model.save(fn)
+
+class SVM(Model):
+    '''wrapper for OpenCV SimpleVectorMachine algorithm'''
+
+    def __init__(self, svm_type, kernel_type, degree = 0, gamma = 1, coef0 = 0, Cvalue = 1, nu = 0, p = 0):
+        import cv2
+        self.model = cv2.SVM()
+        self.params = dict(svm_type = svm_type, kernel_type = kernel_type, degree = degree, gamma = gamma, coef0 = coef0, Cvalue = Cvalue, nu = nu, p = p)
+
+    def train(self, samples, responses):
+        self.model.train(samples, responses, params = self.params)
+
+    def predict(self, samples):
+        return np.float32([self.model.predict(s) for s in samples])
+
+
 class Centroid:
     'Wrapper around instances to add a counter'
 
@@ -25,13 +48,17 @@
         inst.multiply(1/(self.nInstances+instance.nInstances))
         return Centroid(inst, self.nInstances+instance.nInstances)
 
-    def draw(self, options = ''):
+    def plot(self, options = ''):
         from matplotlib.pylab import text
-        self.instance.draw(options)
+        self.instance.plot(options)
         text(self.instance.position.x+1, self.instance.position.y+1, str(self.nInstances))
 
+def kMedoids(similarityMatrix, initialCentroids = None, k = None):
+    '''Algorithm that clusters any dataset based on a similarity matrix
+    Either the initialCentroids or k are passed'''
+    pass
 
-def clustering(data, similar, initialCentroids = []):
+def assignCluster(data, similarFunc, initialCentroids = None, shuffleData = True):
     '''k-means algorithm with similarity function
     Two instances should be in the same cluster if the sameCluster function returns true for two instances. It is supposed that the average centroid of a set of instances can be computed, using the function. 
     The number of clusters will be determined accordingly
@@ -42,14 +69,15 @@
     from random import shuffle
     from copy import copy, deepcopy
     localdata = copy(data) # shallow copy to avoid modifying data
-    shuffle(localdata)
-    if initialCentroids:
+    if shuffleData:
+        shuffle(localdata)
+    if initialCentroids == None:
+        centroids = [Centroid(localdata[0])]
+    else:
         centroids = deepcopy(initialCentroids)
-    else:
-        centroids = [Centroid(localdata[0])]
     for instance in localdata[1:]:
         i = 0
-        while i<len(centroids) and not similar(centroids[i].instance, instance):
+        while i<len(centroids) and not similarFunc(centroids[i].instance, instance):
             i += 1
         if i == len(centroids):
             centroids.append(Centroid(instance))
@@ -58,6 +86,8 @@
 
     return centroids
 
+# TODO recompute centroids for each cluster: instance that minimizes some measure to all other elements
+
 def spectralClustering(similarityMatrix, k, iter=20):
 	'''Spectral Clustering algorithm'''
 	n = len(similarityMatrix)
@@ -77,3 +107,14 @@
 	centroids,distortion = kmeans(features,k, iter)
 	code,distance = vq(features,centroids) # code starting from 0 (represent first cluster) to k-1 (last cluster)
 	return code,sigma	
+
+def motionPatterLearning(objects, maxDistance):
+    ''' 
+    Option to use only the (n?) longest features per object instead of all for speed up
+    TODO'''
+    pass
+
+def prototypeCluster():
+    ''' 
+    TODO'''
+    pass