view python/ml.py @ 183:ed944ff45e8c

first simple clustering algorithm implementation
author Nicolas Saunier <nicolas.saunier@polymtl.ca>
date Thu, 24 Nov 2011 19:20:07 -0500
parents
children d70e9b36889c
line wrap: on
line source

#! /usr/bin/env python
'''Libraries for machine learning algorithms'''

__metaclass__ = type

def kMeansFixedDistance(data, sameCluster, centroid):
    '''k-means algorithm with similarity function
    Two instances should be in the same cluster if the sameCluster function returns true for two instances. It is supposed that the centroid of a set of instances can be computed, using the function. 
    The number of clusters will be determined accordingly

    data: list of instances
    centroid: '''

    # todo randomize input
    centroids = [data[0]]
    for instance in data:
        i = 0
        while i<len(centroids) and not sameCluster(instance, centroids[i]):
            i += 1
        if i == len(centroids):
            centroids.append(instance)
        else:
            centroids[i] = centroid(centroids[i], instance)

    return centroids