comparison python/ml.py @ 183:ed944ff45e8c

first simple clustering algorithm implementation
author Nicolas Saunier <nicolas.saunier@polymtl.ca>
date Thu, 24 Nov 2011 19:20:07 -0500
parents
children d70e9b36889c
comparison
equal deleted inserted replaced
182:d3f6de6c3918 183:ed944ff45e8c
1 #! /usr/bin/env python
2 '''Libraries for machine learning algorithms'''
3
4 __metaclass__ = type
5
6 def kMeansFixedDistance(data, sameCluster, centroid):
7 '''k-means algorithm with similarity function
8 Two instances should be in the same cluster if the sameCluster function returns true for two instances. It is supposed that the centroid of a set of instances can be computed, using the function.
9 The number of clusters will be determined accordingly
10
11 data: list of instances
12 centroid: '''
13
14 # todo randomize input
15 centroids = [data[0]]
16 for instance in data:
17 i = 0
18 while i<len(centroids) and not sameCluster(instance, centroids[i]):
19 i += 1
20 if i == len(centroids):
21 centroids.append(instance)
22 else:
23 centroids[i] = centroid(centroids[i], instance)
24
25 return centroids