changeset 183:ed944ff45e8c

first simple clustering algorithm implementation
author Nicolas Saunier <nicolas.saunier@polymtl.ca>
date Thu, 24 Nov 2011 19:20:07 -0500
parents d3f6de6c3918
children d70e9b36889c
files python/ml.py
diffstat 1 files changed, 25 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/python/ml.py	Thu Nov 24 19:20:07 2011 -0500
@@ -0,0 +1,25 @@
+#! /usr/bin/env python
+'''Libraries for machine learning algorithms'''
+
+__metaclass__ = type
+
+def kMeansFixedDistance(data, sameCluster, centroid):
+    '''k-means algorithm with similarity function
+    Two instances should be in the same cluster if the sameCluster function returns true for two instances. It is supposed that the centroid of a set of instances can be computed, using the function. 
+    The number of clusters will be determined accordingly
+
+    data: list of instances
+    centroid: '''
+
+    # todo randomize input
+    centroids = [data[0]]
+    for instance in data:
+        i = 0
+        while i<len(centroids) and not sameCluster(instance, centroids[i]):
+            i += 1
+        if i == len(centroids):
+            centroids.append(instance)
+        else:
+            centroids[i] = centroid(centroids[i], instance)
+
+    return centroids