Mercurial Hosting > traffic-intelligence
view python/ml.py @ 693:5ee22bf7e4d5 dev
corrected bug when loading indicator time intervals and updated how queries are created for better legibility
author | Nicolas Saunier <nicolas.saunier@polymtl.ca> |
---|---|
date | Tue, 30 Jun 2015 15:46:31 -0400 |
parents | da1352b89d02 |
children | b02431a8234c |
line wrap: on
line source
#! /usr/bin/env python '''Libraries for machine learning algorithms''' import numpy as np class Model(object): '''Abstract class for loading/saving model''' def load(self, filename): from os import path if path.exists(filename): self.model.load(filename) else: print('Provided filename {} does not exist: model not loaded!'.format(filename)) def save(self, filename): self.model.save(filename) class SVM(Model): '''wrapper for OpenCV SimpleVectorMachine algorithm''' def __init__(self): import cv2 self.model = cv2.SVM() def train(self, samples, responses, svm_type, kernel_type, degree = 0, gamma = 1, coef0 = 0, Cvalue = 1, nu = 0, p = 0): self.params = dict(svm_type = svm_type, kernel_type = kernel_type, degree = degree, gamma = gamma, coef0 = coef0, Cvalue = Cvalue, nu = nu, p = p) self.model.train(samples, responses, params = self.params) def predict(self, hog): return self.model.predict(hog) class Centroid(object): 'Wrapper around instances to add a counter' def __init__(self, instance, nInstances = 1): self.instance = instance self.nInstances = nInstances # def similar(instance2): # return self.instance.similar(instance2) def add(self, instance2): self.instance = self.instance.multiply(self.nInstances)+instance2 self.nInstances += 1 self.instance = self.instance.multiply(1/float(self.nInstances)) def average(c): inst = self.instance.multiply(self.nInstances)+c.instance.multiply(instance.nInstances) inst.multiply(1/(self.nInstances+instance.nInstances)) return Centroid(inst, self.nInstances+instance.nInstances) def plot(self, options = ''): from matplotlib.pylab import text self.instance.plot(options) text(self.instance.position.x+1, self.instance.position.y+1, str(self.nInstances)) def kMedoids(similarityMatrix, initialCentroids = None, k = None): '''Algorithm that clusters any dataset based on a similarity matrix Either the initialCentroids or k are passed''' pass def assignCluster(data, similarFunc, initialCentroids = None, shuffleData = True): '''k-means algorithm with similarity function Two instances should be in the same cluster if the sameCluster function returns true for two instances. It is supposed that the average centroid of a set of instances can be computed, using the function. The number of clusters will be determined accordingly data: list of instances averageCentroid: ''' from random import shuffle from copy import copy, deepcopy localdata = copy(data) # shallow copy to avoid modifying data if shuffleData: shuffle(localdata) if initialCentroids is None: centroids = [Centroid(localdata[0])] else: centroids = deepcopy(initialCentroids) for instance in localdata[1:]: i = 0 while i<len(centroids) and not similarFunc(centroids[i].instance, instance): i += 1 if i == len(centroids): centroids.append(Centroid(instance)) else: centroids[i].add(instance) return centroids # TODO recompute centroids for each cluster: instance that minimizes some measure to all other elements def spectralClustering(similarityMatrix, k, iter=20): '''Spectral Clustering algorithm''' n = len(similarityMatrix) # create Laplacian matrix rowsum = np.sum(similarityMatrix,axis=0) D = np.diag(1 / np.sqrt(rowsum)) I = np.identity(n) L = I - np.dot(D,np.dot(similarityMatrix,D)) # compute eigenvectors of L U,sigma,V = np.linalg.svd(L) # create feature vector from k first eigenvectors # by stacking eigenvectors as columns features = np.array(V[:k]).T # k-means from scipy.cluster.vq import kmeans, whiten, vq features = whiten(features) centroids,distortion = kmeans(features,k, iter) code,distance = vq(features,centroids) # code starting from 0 (represent first cluster) to k-1 (last cluster) return code,sigma def motionPatterLearning(objects, maxDistance): ''' Option to use only the (n?) longest features per object instead of all for speed up TODO''' pass def prototypeCluster(): ''' TODO''' pass