Mercurial Hosting > traffic-intelligence

#! /usr/bin/env python
'''Libraries for machine learning algorithms'''

import numpy as np


class Model(object):
    '''Abstract class for loading/saving model'''
    def load(self, filename):
        from os import path
        if path.exists(filename):
            self.model.load(filename)
        else:
            print('Provided filename {} does not exist: model not loaded!'.format(filename))

    def save(self, filename):
        self.model.save(filename)

class SVM(Model):
    '''wrapper for OpenCV SimpleVectorMachine algorithm'''

    def __init__(self):
        import cv2
        self.model = cv2.SVM()

    def train(self, samples, responses, svm_type, kernel_type, degree = 0, gamma = 1, coef0 = 0, Cvalue = 1, nu = 0, p = 0):
        self.params = dict(svm_type = svm_type, kernel_type = kernel_type, degree = degree, gamma = gamma, coef0 = coef0, Cvalue = Cvalue, nu = nu, p = p)
        self.model.train(samples, responses, params = self.params)

    def predict(self, hog):
        return self.model.predict(hog)


class Centroid(object):
    'Wrapper around instances to add a counter'

    def __init__(self, instance, nInstances = 1):
        self.instance = instance
        self.nInstances = nInstances

    # def similar(instance2):
    #     return self.instance.similar(instance2)

    def add(self, instance2):
        self.instance = self.instance.multiply(self.nInstances)+instance2
        self.nInstances += 1
        self.instance = self.instance.multiply(1/float(self.nInstances))

    def average(c):
        inst = self.instance.multiply(self.nInstances)+c.instance.multiply(instance.nInstances)
        inst.multiply(1/(self.nInstances+instance.nInstances))
        return Centroid(inst, self.nInstances+instance.nInstances)

    def plot(self, options = ''):
        from matplotlib.pylab import text
        self.instance.plot(options)
        text(self.instance.position.x+1, self.instance.position.y+1, str(self.nInstances))

def kMedoids(similarityMatrix, initialCentroids = None, k = None):
    '''Algorithm that clusters any dataset based on a similarity matrix
    Either the initialCentroids or k are passed'''
    pass

def assignCluster(data, similarFunc, initialCentroids = None, shuffleData = True):
    '''k-means algorithm with similarity function
    Two instances should be in the same cluster if the sameCluster function returns true for two instances. It is supposed that the average centroid of a set of instances can be computed, using the function.
    The number of clusters will be determined accordingly

    data: list of instances
    averageCentroid: '''

    from random import shuffle
    from copy import copy, deepcopy
    localdata = copy(data) # shallow copy to avoid modifying data
    if shuffleData:
        shuffle(localdata)
    if initialCentroids is None:
        centroids = [Centroid(localdata[0])]
    else:
        centroids = deepcopy(initialCentroids)
    for instance in localdata[1:]:
        i = 0
        while i<len(centroids) and not similarFunc(centroids[i].instance, instance):
            i += 1
        if i == len(centroids):
            centroids.append(Centroid(instance))
        else:
            centroids[i].add(instance)

    return centroids

# TODO recompute centroids for each cluster: instance that minimizes some measure to all other elements

def spectralClustering(similarityMatrix, k, iter=20):
	'''Spectral Clustering algorithm'''
	n = len(similarityMatrix)
	# create Laplacian matrix
	rowsum = np.sum(similarityMatrix,axis=0)
	D = np.diag(1 / np.sqrt(rowsum))
	I = np.identity(n)
	L = I - np.dot(D,np.dot(similarityMatrix,D))
	# compute eigenvectors of L
	U,sigma,V = np.linalg.svd(L)
	# create feature vector from k first eigenvectors
	# by stacking eigenvectors as columns
	features = np.array(V[:k]).T
	# k-means
	from scipy.cluster.vq import kmeans, whiten, vq
	features = whiten(features)
	centroids,distortion = kmeans(features,k, iter)
	code,distance = vq(features,centroids) # code starting from 0 (represent first cluster) to k-1 (last cluster)
	return code,sigma

def motionPatterLearning(objects, maxDistance):
    '''
    Option to use only the (n?) longest features per object instead of all for speed up
    TODO'''
    pass

def prototypeCluster():
    '''
    TODO'''
    pass
author	Nicolas Saunier <nicolas.saunier@polymtl.ca>
date	Tue, 30 Jun 2015 15:46:31 -0400
parents	da1352b89d02
children	b02431a8234c