diff python/ml.py @ 308:8bafd054cda4

Added a function to compute LCSS distance between two indcators
author Mohamed Gomaa
date Tue, 25 Dec 2012 02:20:25 -0500
parents d70e9b36889c
children 80cbafd69109
line wrap: on
line diff
--- a/python/ml.py	Tue Dec 25 02:16:10 2012 -0500
+++ b/python/ml.py	Tue Dec 25 02:20:25 2012 -0500
@@ -1,6 +1,8 @@
 #! /usr/bin/env python
 '''Libraries for machine learning algorithms'''
 
+import numpy as np
+
 __metaclass__ = type
 
 class Centroid:
@@ -55,3 +57,24 @@
             centroids[i].add(instance)
 
     return centroids
+
+def spectralClustering(similarityMatrix,k):	
+	''' Steps of Spectral Clustering'''
+	n= len(similarityMatrix)
+	# create Laplacian matrix
+	rowsum = np.sum(similarityMatrix,axis=0)
+	D = np.diag(1 / np.sqrt(rowsum))
+	I = np.identity(n)
+	L = I - np.dot(D,np.dot(similarityMatrix,D))
+	# compute eigenvectors of L
+	U,sigma,V = np.linalg.svd(L)
+	# create feature vector from k first eigenvectors
+	# by stacking eigenvectors as columns
+	features = np.array(V[:k]).T
+	# k-means
+	from scipy.cluster.vq import kmeans, whiten, vq
+	features = whiten(features)
+	centroids,distortion = kmeans(features,k,iter=20) # default iter = 20
+	code,distance = vq(features,centroids) # code starting from 0 (represent first cluster) to k-1 (last cluster)
+	return code,sigma
+