Mercurial Hosting > traffic-intelligence

--- a/python/ml.py	Thu Mar 03 17:01:30 2016 -0500
+++ b/python/ml.py	Fri Mar 11 17:38:48 2016 -0500
@@ -1,13 +1,26 @@
 #! /usr/bin/env python
 '''Libraries for machine learning algorithms'''

+from os import path
+from random import shuffle
+from copy import copy, deepcopy
+
 import numpy as np
+from matplotlib.pylab import text
+import matplotlib as mpl
+import matplotlib.pyplot as plt
+from scipy.cluster.vq import kmeans, whiten, vq
+from sklearn import mixture

+import utils
+
+#####################
+# OpenCV ML models
+#####################

 class Model(object):
     '''Abstract class for loading/saving model'''
     def load(self, filename):
-        from os import path
         if path.exists(filename):
             self.model.load(filename)
         else:
@@ -31,6 +44,10 @@
         return self.model.predict(hog)


+#####################
+# Clustering
+#####################
+
 class Centroid(object):
     'Wrapper around instances to add a counter'

@@ -52,7 +69,6 @@
         return Centroid(inst, self.nInstances+instance.nInstances)

     def plot(self, options = ''):
-        from matplotlib.pylab import text
         self.instance.plot(options)
         text(self.instance.position.x+1, self.instance.position.y+1, str(self.nInstances))

@@ -68,9 +84,6 @@

     data: list of instances
     averageCentroid: '''
-
-    from random import shuffle
-    from copy import copy, deepcopy
     localdata = copy(data) # shallow copy to avoid modifying data
     if shuffleData:
         shuffle(localdata)
@@ -105,7 +118,6 @@
 	# by stacking eigenvectors as columns
 	features = np.array(V[:k]).T
 	# k-means
-	from scipy.cluster.vq import kmeans, whiten, vq
 	features = whiten(features)
 	centroids,distortion = kmeans(features,k, iter)
 	code,distance = vq(features,centroids) # code starting from 0 (represent first cluster) to k-1 (last cluster)
@@ -179,5 +191,27 @@

 def computeClusterSizes(labels, prototypeIndices, outlierIndex = -1):
     clusterSizes = {i: sum(np.array(labels) == i) for i in prototypeIndices}
-    clusterSizes['outlier'] = sum(np.array(labels) == -1)
+    clusterSizes['outlier'] = sum(np.array(labels) == outlierIndex)
     return clusterSizes
+
+# Gaussian Mixture Models
+def plotGMMClusters(model, dataset = None, colors = utils.colors):
+    '''plot the ellipse corresponding to the Gaussians
+    and the predicted classes of the instances in the dataset'''
+    fig = plt.figure()
+    labels = model.predict(dataset)
+    for i in xrange(model.n_components):
+        mean = model.means_[i]
+        if dataset is not None:
+            plt.scatter(dataset[labels == i, 0], dataset[labels == i, 1], .8, color=colors[i])
+        plt.annotate(str(i), xy=(mean[0]+1, mean[1]+1))
+
+        # Plot an ellipse to show the Gaussian component
+        v, w = np.linalg.eigh(model.covars_[i])
+        angle = np.arctan2(w[0][1], w[0][0])
+        angle = 180*angle/np.pi  # convert to degrees
+	v *= 4
+        ell = mpl.patches.Ellipse(mean, v[0], v[1], 180+angle, color=colors[i])
+        ell.set_clip_box(fig.bbox)
+        ell.set_alpha(.5)
+        fig.axes[0].add_artist(ell)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/scripts/learn-poi.py	Fri Mar 11 17:38:48 2016 -0500
@@ -0,0 +1,43 @@
+#! /usr/bin/env python
+
+import argparse
+
+import numpy as np
+from sklearn import mixture
+import matplotlib.pyplot as plt
+
+import storage, ml
+
+parser = argparse.ArgumentParser(description='The program learns and displays Gaussians fit to beginnings and ends of object trajectories (based on Mohamed Gomaa Mohamed 2015 PhD). TODO: save the data')
+parser.add_argument('-d', dest = 'databaseFilename', help = 'name of the Sqlite database file', required = True)
+parser.add_argument('-t', dest = 'trajectoryType', help = 'type of trajectories to display', choices = ['feature', 'object'], default = 'object')
+parser.add_argument('-n', dest = 'nClusters', help = 'number of point clusters', required = True, type = int)
+parser.add_argument('--covariance-type', dest = 'covarianceType', help = 'type of covariance of Gaussian model', default = "full")
+
+args = parser.parse_args()
+
+objects = storage.loadTrajectoriesFromSqlite(args.databaseFilename, args.trajectoryType)
+
+beginnings = []
+ends = []
+for o in objects:
+    beginnings.append(o.getPositionAt(0).aslist())
+    ends.append(o.getPositionAt(int(o.length())-1).aslist())
+
+beginnings = np.array(beginnings)
+ends = np.array(ends)
+
+gmm = mixture.GMM(n_components=args.nClusters, covariance_type = args.covarianceType)
+beginningModel=gmm.fit(beginnings)
+gmm = mixture.GMM(n_components=args.nClusters, covariance_type = args.covarianceType)
+endModel=gmm.fit(ends)
+
+ml.plotGMMClusters(beginningModel, beginnings)
+plt.axis('equal')
+plt.title('Origins')
+print('Origin Clusters:\n{}'.format(ml.computeClusterSizes(beginningModel.predict(beginnings), range(args.nClusters))))
+
+ml.plotGMMClusters(endModel, ends)
+plt.axis('equal')
+plt.title('Destinations')
+print('Destination Clusters:\n{}'.format(ml.computeClusterSizes(endModel.predict(ends), range(args.nClusters))))