Mercurial Hosting > traffic-intelligence
annotate python/ml.py @ 251:4978b5baf8f1
Win32 version for traffic intelligence. Use the README-Win32.txt to know how to
fetch the dependencies. A batch file is included to automate most of the process
author | Jeep-Tour@Jeep-Tour-PC |
---|---|
date | Mon, 23 Jul 2012 12:27:50 -0400 |
parents | d70e9b36889c |
children | 5957aa1d69e1 8bafd054cda4 |
rev | line source |
---|---|
183
ed944ff45e8c
first simple clustering algorithm implementation
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
diff
changeset
|
1 #! /usr/bin/env python |
ed944ff45e8c
first simple clustering algorithm implementation
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
diff
changeset
|
2 '''Libraries for machine learning algorithms''' |
ed944ff45e8c
first simple clustering algorithm implementation
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
diff
changeset
|
3 |
ed944ff45e8c
first simple clustering algorithm implementation
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
diff
changeset
|
4 __metaclass__ = type |
ed944ff45e8c
first simple clustering algorithm implementation
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
diff
changeset
|
5 |
184
d70e9b36889c
initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
183
diff
changeset
|
6 class Centroid: |
d70e9b36889c
initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
183
diff
changeset
|
7 'Wrapper around instances to add a counter' |
d70e9b36889c
initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
183
diff
changeset
|
8 |
d70e9b36889c
initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
183
diff
changeset
|
9 def __init__(self, instance, nInstances = 1): |
d70e9b36889c
initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
183
diff
changeset
|
10 self.instance = instance |
d70e9b36889c
initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
183
diff
changeset
|
11 self.nInstances = nInstances |
d70e9b36889c
initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
183
diff
changeset
|
12 |
d70e9b36889c
initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
183
diff
changeset
|
13 # def similar(instance2): |
d70e9b36889c
initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
183
diff
changeset
|
14 # return self.instance.similar(instance2) |
d70e9b36889c
initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
183
diff
changeset
|
15 |
d70e9b36889c
initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
183
diff
changeset
|
16 def add(self, instance2): |
d70e9b36889c
initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
183
diff
changeset
|
17 self.instance = self.instance.multiply(self.nInstances)+instance2 |
d70e9b36889c
initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
183
diff
changeset
|
18 self.nInstances += 1 |
d70e9b36889c
initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
183
diff
changeset
|
19 self.instance = self.instance.multiply(1/float(self.nInstances)) |
d70e9b36889c
initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
183
diff
changeset
|
20 |
d70e9b36889c
initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
183
diff
changeset
|
21 def average(c): |
d70e9b36889c
initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
183
diff
changeset
|
22 inst = self.instance.multiply(self.nInstances)+c.instance.multiply(instance.nInstances) |
d70e9b36889c
initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
183
diff
changeset
|
23 inst.multiply(1/(self.nInstances+instance.nInstances)) |
d70e9b36889c
initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
183
diff
changeset
|
24 return Centroid(inst, self.nInstances+instance.nInstances) |
d70e9b36889c
initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
183
diff
changeset
|
25 |
d70e9b36889c
initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
183
diff
changeset
|
26 def draw(self, options = ''): |
d70e9b36889c
initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
183
diff
changeset
|
27 from matplotlib.pylab import text |
d70e9b36889c
initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
183
diff
changeset
|
28 self.instance.draw(options) |
d70e9b36889c
initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
183
diff
changeset
|
29 text(self.instance.position.x+1, self.instance.position.y+1, str(self.nInstances)) |
d70e9b36889c
initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
183
diff
changeset
|
30 |
d70e9b36889c
initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
183
diff
changeset
|
31 |
d70e9b36889c
initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
183
diff
changeset
|
32 def clustering(data, similar, initialCentroids = []): |
183
ed944ff45e8c
first simple clustering algorithm implementation
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
diff
changeset
|
33 '''k-means algorithm with similarity function |
184
d70e9b36889c
initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
183
diff
changeset
|
34 Two instances should be in the same cluster if the sameCluster function returns true for two instances. It is supposed that the average centroid of a set of instances can be computed, using the function. |
183
ed944ff45e8c
first simple clustering algorithm implementation
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
diff
changeset
|
35 The number of clusters will be determined accordingly |
ed944ff45e8c
first simple clustering algorithm implementation
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
diff
changeset
|
36 |
ed944ff45e8c
first simple clustering algorithm implementation
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
diff
changeset
|
37 data: list of instances |
184
d70e9b36889c
initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
183
diff
changeset
|
38 averageCentroid: ''' |
183
ed944ff45e8c
first simple clustering algorithm implementation
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
diff
changeset
|
39 |
184
d70e9b36889c
initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
183
diff
changeset
|
40 from random import shuffle |
d70e9b36889c
initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
183
diff
changeset
|
41 from copy import copy, deepcopy |
d70e9b36889c
initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
183
diff
changeset
|
42 localdata = copy(data) # shallow copy to avoid modifying data |
d70e9b36889c
initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
183
diff
changeset
|
43 shuffle(localdata) |
d70e9b36889c
initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
183
diff
changeset
|
44 if initialCentroids: |
d70e9b36889c
initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
183
diff
changeset
|
45 centroids = deepcopy(initialCentroids) |
d70e9b36889c
initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
183
diff
changeset
|
46 else: |
d70e9b36889c
initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
183
diff
changeset
|
47 centroids = [Centroid(localdata[0])] |
d70e9b36889c
initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
183
diff
changeset
|
48 for instance in localdata[1:]: |
183
ed944ff45e8c
first simple clustering algorithm implementation
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
diff
changeset
|
49 i = 0 |
184
d70e9b36889c
initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
183
diff
changeset
|
50 while i<len(centroids) and not similar(centroids[i].instance, instance): |
183
ed944ff45e8c
first simple clustering algorithm implementation
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
diff
changeset
|
51 i += 1 |
ed944ff45e8c
first simple clustering algorithm implementation
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
diff
changeset
|
52 if i == len(centroids): |
184
d70e9b36889c
initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
183
diff
changeset
|
53 centroids.append(Centroid(instance)) |
183
ed944ff45e8c
first simple clustering algorithm implementation
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
diff
changeset
|
54 else: |
184
d70e9b36889c
initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
183
diff
changeset
|
55 centroids[i].add(instance) |
183
ed944ff45e8c
first simple clustering algorithm implementation
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
diff
changeset
|
56 |
ed944ff45e8c
first simple clustering algorithm implementation
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
diff
changeset
|
57 return centroids |