Mercurial Hosting > traffic-intelligence

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/python/ml.py	Thu Nov 24 19:20:07 2011 -0500
@@ -0,0 +1,25 @@
+#! /usr/bin/env python
+'''Libraries for machine learning algorithms'''
+
+__metaclass__ = type
+
+def kMeansFixedDistance(data, sameCluster, centroid):
+    '''k-means algorithm with similarity function
+    Two instances should be in the same cluster if the sameCluster function returns true for two instances. It is supposed that the centroid of a set of instances can be computed, using the function.
+    The number of clusters will be determined accordingly
+
+    data: list of instances
+    centroid: '''
+
+    # todo randomize input
+    centroids = [data[0]]
+    for instance in data:
+        i = 0
+        while i<len(centroids) and not sameCluster(instance, centroids[i]):
+            i += 1
+        if i == len(centroids):
+            centroids.append(instance)
+        else:
+            centroids[i] = centroid(centroids[i], instance)
+
+    return centroids