changeset 184:d70e9b36889c

initial work on flow vectors and clustering algorithms
author Nicolas Saunier <nicolas.saunier@polymtl.ca>
date Fri, 25 Nov 2011 18:38:54 -0500
parents ed944ff45e8c
children c06379f25ab8
files include/learning.hpp python/ml.py python/moving.py
diffstat 3 files changed, 73 insertions(+), 9 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/include/learning.hpp	Fri Nov 25 18:38:54 2011 -0500
@@ -0,0 +1,6 @@
+#ifndef LEARNING_HPP
+#define LEARNING_HPP
+
+// todo search max-cut algorithm to maximize homogeneity of clusters and minimize weak connections
+
+#endif
--- a/python/ml.py	Thu Nov 24 19:20:07 2011 -0500
+++ b/python/ml.py	Fri Nov 25 18:38:54 2011 -0500
@@ -3,23 +3,55 @@
 
 __metaclass__ = type
 
-def kMeansFixedDistance(data, sameCluster, centroid):
+class Centroid:
+    'Wrapper around instances to add a counter'
+
+    def __init__(self, instance, nInstances = 1):
+        self.instance = instance
+        self.nInstances = nInstances
+
+    # def similar(instance2):
+    #     return self.instance.similar(instance2)
+
+    def add(self, instance2):
+        self.instance = self.instance.multiply(self.nInstances)+instance2
+        self.nInstances += 1
+        self.instance = self.instance.multiply(1/float(self.nInstances))
+
+    def average(c):
+        inst = self.instance.multiply(self.nInstances)+c.instance.multiply(instance.nInstances)
+        inst.multiply(1/(self.nInstances+instance.nInstances))
+        return Centroid(inst, self.nInstances+instance.nInstances)
+
+    def draw(self, options = ''):
+        from matplotlib.pylab import text
+        self.instance.draw(options)
+        text(self.instance.position.x+1, self.instance.position.y+1, str(self.nInstances))
+
+
+def clustering(data, similar, initialCentroids = []):
     '''k-means algorithm with similarity function
-    Two instances should be in the same cluster if the sameCluster function returns true for two instances. It is supposed that the centroid of a set of instances can be computed, using the function. 
+    Two instances should be in the same cluster if the sameCluster function returns true for two instances. It is supposed that the average centroid of a set of instances can be computed, using the function. 
     The number of clusters will be determined accordingly
 
     data: list of instances
-    centroid: '''
+    averageCentroid: '''
 
-    # todo randomize input
-    centroids = [data[0]]
-    for instance in data:
+    from random import shuffle
+    from copy import copy, deepcopy
+    localdata = copy(data) # shallow copy to avoid modifying data
+    shuffle(localdata)
+    if initialCentroids:
+        centroids = deepcopy(initialCentroids)
+    else:
+        centroids = [Centroid(localdata[0])]
+    for instance in localdata[1:]:
         i = 0
-        while i<len(centroids) and not sameCluster(instance, centroids[i]):
+        while i<len(centroids) and not similar(centroids[i].instance, instance):
             i += 1
         if i == len(centroids):
-            centroids.append(instance)
+            centroids.append(Centroid(instance))
         else:
-            centroids[i] = centroid(centroids[i], instance)
+            centroids[i].add(instance)
 
     return centroids
--- a/python/moving.py	Thu Nov 24 19:20:07 2011 -0500
+++ b/python/moving.py	Fri Nov 25 18:38:54 2011 -0500
@@ -129,6 +129,9 @@
     def __repr__(self):
         return self.__str__()
 
+    def __add__(self, other):
+        return Point(self.x+other.x, self.y+other.y)
+
     def __sub__(self, other):
         return Point(self.x-other.x, self.y-other.y)
 
@@ -199,6 +202,29 @@
         from matplotlib.pyplot import scatter
         scatter([p.x for p in points],[p.y for p in points], c=color)
 
+class FlowVector:
+    '''Class to represent 4-D flow vectors,
+    ie a position and a velocity'''
+    def __init__(self, position, velocity):
+        'position and velocity should be Point instances'
+        self.position = position
+        self.velocity = velocity
+
+    def __add__(self, other):
+        return FlowVector(self.position+other.position, self.velocity+other.velocity)
+
+    def multiply(self, alpha):
+        return FlowVector(self.position.multiply(alpha), self.velocity.multiply(alpha))
+
+    def draw(self, options = ''):
+        from matplotlib.pylab import plot
+        plot([self.position.x, self.position.x+self.velocity.x], [self.position.y, self.position.y+self.velocity.y], options)
+        self.position.draw(options+'x')
+    
+    @staticmethod
+    def similar(f1, f2, maxDistance2, maxDeltavelocity2):
+        return (f1.position-f2.position).norm2Squared()<maxDistance2 and (f1.velocity-f2.velocity).norm2Squared()<maxDeltavelocity2
+
 def segmentIntersection(p1, p2, p3, p4):
     '''Returns the intersecting point of the segments [p1, p2] and [p3, p4], None otherwise'''
     from numpy import matrix