Mercurial Hosting > traffic-intelligence
diff python/ml.py @ 308:8bafd054cda4
Added a function to compute LCSS distance between two indcators
author | Mohamed Gomaa |
---|---|
date | Tue, 25 Dec 2012 02:20:25 -0500 |
parents | d70e9b36889c |
children | 80cbafd69109 |
line wrap: on
line diff
--- a/python/ml.py Tue Dec 25 02:16:10 2012 -0500 +++ b/python/ml.py Tue Dec 25 02:20:25 2012 -0500 @@ -1,6 +1,8 @@ #! /usr/bin/env python '''Libraries for machine learning algorithms''' +import numpy as np + __metaclass__ = type class Centroid: @@ -55,3 +57,24 @@ centroids[i].add(instance) return centroids + +def spectralClustering(similarityMatrix,k): + ''' Steps of Spectral Clustering''' + n= len(similarityMatrix) + # create Laplacian matrix + rowsum = np.sum(similarityMatrix,axis=0) + D = np.diag(1 / np.sqrt(rowsum)) + I = np.identity(n) + L = I - np.dot(D,np.dot(similarityMatrix,D)) + # compute eigenvectors of L + U,sigma,V = np.linalg.svd(L) + # create feature vector from k first eigenvectors + # by stacking eigenvectors as columns + features = np.array(V[:k]).T + # k-means + from scipy.cluster.vq import kmeans, whiten, vq + features = whiten(features) + centroids,distortion = kmeans(features,k,iter=20) # default iter = 20 + code,distance = vq(features,centroids) # code starting from 0 (represent first cluster) to k-1 (last cluster) + return code,sigma +