Mercurial Hosting > traffic-intelligence
changeset 184:d70e9b36889c
initial work on flow vectors and clustering algorithms
author | Nicolas Saunier <nicolas.saunier@polymtl.ca> |
---|---|
date | Fri, 25 Nov 2011 18:38:54 -0500 |
parents | ed944ff45e8c |
children | c06379f25ab8 |
files | include/learning.hpp python/ml.py python/moving.py |
diffstat | 3 files changed, 73 insertions(+), 9 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/include/learning.hpp Fri Nov 25 18:38:54 2011 -0500 @@ -0,0 +1,6 @@ +#ifndef LEARNING_HPP +#define LEARNING_HPP + +// todo search max-cut algorithm to maximize homogeneity of clusters and minimize weak connections + +#endif
--- a/python/ml.py Thu Nov 24 19:20:07 2011 -0500 +++ b/python/ml.py Fri Nov 25 18:38:54 2011 -0500 @@ -3,23 +3,55 @@ __metaclass__ = type -def kMeansFixedDistance(data, sameCluster, centroid): +class Centroid: + 'Wrapper around instances to add a counter' + + def __init__(self, instance, nInstances = 1): + self.instance = instance + self.nInstances = nInstances + + # def similar(instance2): + # return self.instance.similar(instance2) + + def add(self, instance2): + self.instance = self.instance.multiply(self.nInstances)+instance2 + self.nInstances += 1 + self.instance = self.instance.multiply(1/float(self.nInstances)) + + def average(c): + inst = self.instance.multiply(self.nInstances)+c.instance.multiply(instance.nInstances) + inst.multiply(1/(self.nInstances+instance.nInstances)) + return Centroid(inst, self.nInstances+instance.nInstances) + + def draw(self, options = ''): + from matplotlib.pylab import text + self.instance.draw(options) + text(self.instance.position.x+1, self.instance.position.y+1, str(self.nInstances)) + + +def clustering(data, similar, initialCentroids = []): '''k-means algorithm with similarity function - Two instances should be in the same cluster if the sameCluster function returns true for two instances. It is supposed that the centroid of a set of instances can be computed, using the function. + Two instances should be in the same cluster if the sameCluster function returns true for two instances. It is supposed that the average centroid of a set of instances can be computed, using the function. The number of clusters will be determined accordingly data: list of instances - centroid: ''' + averageCentroid: ''' - # todo randomize input - centroids = [data[0]] - for instance in data: + from random import shuffle + from copy import copy, deepcopy + localdata = copy(data) # shallow copy to avoid modifying data + shuffle(localdata) + if initialCentroids: + centroids = deepcopy(initialCentroids) + else: + centroids = [Centroid(localdata[0])] + for instance in localdata[1:]: i = 0 - while i<len(centroids) and not sameCluster(instance, centroids[i]): + while i<len(centroids) and not similar(centroids[i].instance, instance): i += 1 if i == len(centroids): - centroids.append(instance) + centroids.append(Centroid(instance)) else: - centroids[i] = centroid(centroids[i], instance) + centroids[i].add(instance) return centroids
--- a/python/moving.py Thu Nov 24 19:20:07 2011 -0500 +++ b/python/moving.py Fri Nov 25 18:38:54 2011 -0500 @@ -129,6 +129,9 @@ def __repr__(self): return self.__str__() + def __add__(self, other): + return Point(self.x+other.x, self.y+other.y) + def __sub__(self, other): return Point(self.x-other.x, self.y-other.y) @@ -199,6 +202,29 @@ from matplotlib.pyplot import scatter scatter([p.x for p in points],[p.y for p in points], c=color) +class FlowVector: + '''Class to represent 4-D flow vectors, + ie a position and a velocity''' + def __init__(self, position, velocity): + 'position and velocity should be Point instances' + self.position = position + self.velocity = velocity + + def __add__(self, other): + return FlowVector(self.position+other.position, self.velocity+other.velocity) + + def multiply(self, alpha): + return FlowVector(self.position.multiply(alpha), self.velocity.multiply(alpha)) + + def draw(self, options = ''): + from matplotlib.pylab import plot + plot([self.position.x, self.position.x+self.velocity.x], [self.position.y, self.position.y+self.velocity.y], options) + self.position.draw(options+'x') + + @staticmethod + def similar(f1, f2, maxDistance2, maxDeltavelocity2): + return (f1.position-f2.position).norm2Squared()<maxDistance2 and (f1.velocity-f2.velocity).norm2Squared()<maxDeltavelocity2 + def segmentIntersection(p1, p2, p3, p4): '''Returns the intersecting point of the segments [p1, p2] and [p3, p4], None otherwise''' from numpy import matrix