view python/ml.py @ 398:3399bd48cb40

Ajout d'une méthode pour obtenir le nombre de FPS Méthode de capture des trames vidéos plus résistante aux erreur Utilisation d'un dictionnaire pour les fichier de configuration afin de garder le nom des sections
author Jean-Philippe Jodoin <jpjodoin@gmail.com>
date Mon, 29 Jul 2013 13:46:07 -0400
parents 8bc632cb8344
children c81cbd6953fb
line wrap: on
line source

#! /usr/bin/env python
'''Libraries for machine learning algorithms'''

import numpy as np

__metaclass__ = type

class Model(object):
    '''Abstract class for loading/saving model'''    
    def load(self, fn):
        self.model.load(fn)

    def save(self, fn):
        self.model.save(fn)

class SVM(Model):
    '''wrapper for OpenCV SimpleVectorMachine algorithm'''

    def __init__(self, svm_type, kernel_type, degree = 0, gamma = 1, coef0 = 0, Cvalue = 1, nu = 0, p = 0):
        import cv2
        self.model = cv2.SVM()
        self.params = dict(svm_type = svm_type, kernel_type = kernel_type, degree = degree, gamma = gamma, coef0 = coef0, Cvalue = Cvalue, nu = nu, p = p)

    def train(self, samples, responses):
        self.model.train(samples, responses, params = self.params)

    def predict(self, sample):
        return np.float32(self.model.predict(s))


class Centroid:
    'Wrapper around instances to add a counter'

    def __init__(self, instance, nInstances = 1):
        self.instance = instance
        self.nInstances = nInstances

    # def similar(instance2):
    #     return self.instance.similar(instance2)

    def add(self, instance2):
        self.instance = self.instance.multiply(self.nInstances)+instance2
        self.nInstances += 1
        self.instance = self.instance.multiply(1/float(self.nInstances))

    def average(c):
        inst = self.instance.multiply(self.nInstances)+c.instance.multiply(instance.nInstances)
        inst.multiply(1/(self.nInstances+instance.nInstances))
        return Centroid(inst, self.nInstances+instance.nInstances)

    def draw(self, options = ''):
        from matplotlib.pylab import text
        self.instance.draw(options)
        text(self.instance.position.x+1, self.instance.position.y+1, str(self.nInstances))

def kMedoids(similarityMatrix, initialCentroids = None, k = None):
    '''Algorithm that clusters any dataset based on a similarity matrix
    Either the initialCentroids or k are passed'''
    pass

def assignCluster(data, similarFunc, initialCentroids = [], shuffleData = True):
    '''k-means algorithm with similarity function
    Two instances should be in the same cluster if the sameCluster function returns true for two instances. It is supposed that the average centroid of a set of instances can be computed, using the function. 
    The number of clusters will be determined accordingly

    data: list of instances
    averageCentroid: '''

    from random import shuffle
    from copy import copy, deepcopy
    localdata = copy(data) # shallow copy to avoid modifying data
    if shuffleData:
        shuffle(localdata)
    if initialCentroids:
        centroids = deepcopy(initialCentroids)
    else:
        centroids = [Centroid(localdata[0])]
    for instance in localdata[1:]:
        i = 0
        while i<len(centroids) and not similarFunc(centroids[i].instance, instance):
            i += 1
        if i == len(centroids):
            centroids.append(Centroid(instance))
        else:
            centroids[i].add(instance)

    return centroids

# TODO recompute centroids for each cluster: instance that minimizes some measure to all other elements

def spectralClustering(similarityMatrix, k, iter=20):
	'''Spectral Clustering algorithm'''
	n = len(similarityMatrix)
	# create Laplacian matrix
	rowsum = np.sum(similarityMatrix,axis=0)
	D = np.diag(1 / np.sqrt(rowsum))
	I = np.identity(n)
	L = I - np.dot(D,np.dot(similarityMatrix,D))
	# compute eigenvectors of L
	U,sigma,V = np.linalg.svd(L)
	# create feature vector from k first eigenvectors
	# by stacking eigenvectors as columns
	features = np.array(V[:k]).T
	# k-means
	from scipy.cluster.vq import kmeans, whiten, vq
	features = whiten(features)
	centroids,distortion = kmeans(features,k, iter)
	code,distance = vq(features,centroids) # code starting from 0 (represent first cluster) to k-1 (last cluster)
	return code,sigma