Mercurial Hosting > traffic-intelligence

#! /usr/bin/env python
'''Libraries for machine learning algorithms'''

__metaclass__ = type

def kMeansFixedDistance(data, sameCluster, centroid):
    '''k-means algorithm with similarity function
    Two instances should be in the same cluster if the sameCluster function returns true for two instances. It is supposed that the centroid of a set of instances can be computed, using the function.
    The number of clusters will be determined accordingly

    data: list of instances
    centroid: '''

    # todo randomize input
    centroids = [data[0]]
    for instance in data:
        i = 0
        while i<len(centroids) and not sameCluster(instance, centroids[i]):
            i += 1
        if i == len(centroids):
            centroids.append(instance)
        else:
            centroids[i] = centroid(centroids[i], instance)

    return centroids
author	Nicolas Saunier <nicolas.saunier@polymtl.ca>
date	Thu, 24 Nov 2011 19:20:07 -0500
parents
children	d70e9b36889c