Mercurial Hosting > traffic-intelligence

--- a/python/tests/utils.txt	Fri Jun 26 23:49:44 2015 -0400
+++ b/python/tests/utils.txt	Mon Jun 29 08:35:27 2015 -0400
@@ -50,7 +50,7 @@
 >>> mostCommon([range(2), range(4), range(2)])
 [0, 1]

->>> lcss = LCSS(lambda x,y: abs(x-y) <= 0.1)
+>>> lcss = LCSS(similarityFunc = lambda x,y: abs(x-y) <= 0.1)
 >>> lcss.compute(range(5), range(5))
 5
 >>> lcss.compute(range(1,5), range(5))
@@ -59,8 +59,6 @@
 0
 >>> lcss.compute(range(5), range(10))
 5
->>> lcss.compute(range(5), range(10), 2)
-5
 >>> lcss.similarityFunc = lambda x,y: x == y
 >>> lcss.compute(['a','b','c'], ['a','b','c', 'd'])
 3
@@ -117,3 +115,14 @@
 8
 >>> lcss.subSequenceIndices
 [(2, 0), (4, 1), (6, 2), (7, 3), (8, 4), (9, 5), (11, 6), (13, 7)]
+
+>>> lcss = LCSS(metric = 'cityblock', epsilon = 0.1)
+>>> lcss.compute([[i] for i in range(5)], [[i] for i in range(5)])
+5
+>>> lcss.compute([[i] for i in range(1,5)], [[i] for i in range(5)])
+4
+>>> lcss.compute([[i] for i in range(5,10)], [[i] for i in range(5)])
+0
+>>> lcss.compute([[i] for i in range(5)], [[i] for i in range(10)])
+5
+
--- a/python/utils.py	Fri Jun 26 23:49:44 2015 -0400
+++ b/python/utils.py	Mon Jun 29 08:35:27 2015 -0400
@@ -6,6 +6,7 @@
 from datetime import time, datetime
 from math import sqrt, ceil, floor
 from scipy.stats import kruskal, shapiro
+from scipy.spatial import distance
 from numpy import zeros, array, exp, sum as npsum, int as npint, arange, cumsum, median, isnan, ones, convolve,  dtype, isnan, NaN, mean, ma


@@ -631,23 +632,50 @@
     the methods with names starting with _ are not to be shadowed
     in child classes, who will shadow the other methods,
     ie compute and computeXX methods'''
-    def __init__(self, similarityFunc, delta = float('inf'), aligned = False, lengthFunc = min):
-        self.similarityFunc = similarityFunc
-        self.aligned = aligned
-        self.delta = delta
-        self.lengthFunc = lengthFunc
-        self.subSequenceIndices = [(0,0)]
+    def __init__(self, similarityFunc = None, metric = None, epsilon = None, delta = float('inf'), aligned = False, lengthFunc = min):
+        '''One should provide either a similarity function
+        that indicates (return bool) whether elements in the compares lists are similar
+
+        eg distance(p1, p2) < epsilon
+
+        or a type of metric usable in scipy.spatial.distance.cdist with an epsilon'''
+        if similarityFunc is None and metric is None:
+            print("No way to compute LCSS, similarityFunc and metric are None. Exiting")
+            import sys
+            sys.exit()
+        elif metric is not None and epsilon is None:
+            print("Please provide a value for epsilon if using a cdist metric. Exiting")
+            import sys
+            sys.exit()
+        else:
+            self.similarityFunc = similarityFunc
+            self.metric = metric
+            self.epsilon = epsilon
+            self.aligned = aligned
+            self.delta = delta
+            self.lengthFunc = lengthFunc
+            self.subSequenceIndices = [(0,0)]

     def similarities(self, l1, l2, jshift=0):
         n1 = len(l1)
         n2 = len(l2)
         self.similarityTable = zeros((n1+1,n2+1), dtype = npint)
-        for i in xrange(1,n1+1):
-            for j in xrange(max(1,i-jshift-self.delta),min(n2,i-jshift+self.delta)+1):
-                if self.similarityFunc(l1[i-1], l2[j-1]):
-                    self.similarityTable[i,j] = self.similarityTable[i-1,j-1]+1
-                else:
-                    self.similarityTable[i,j] = max(self.similarityTable[i-1,j], self.similarityTable[i,j-1])
+        if self.similarityFunc is not None:
+            for i in xrange(1,n1+1):
+                for j in xrange(max(1,i-jshift-self.delta),min(n2,i-jshift+self.delta)+1):
+                    if self.similarityFunc(l1[i-1], l2[j-1]):
+                        self.similarityTable[i,j] = self.similarityTable[i-1,j-1]+1
+                    else:
+                        self.similarityTable[i,j] = max(self.similarityTable[i-1,j], self.similarityTable[i,j-1])
+        elif self.metric is not None:
+            similarElements = distance.cdist(l1, l2, self.metric) <= self.epsilon
+            for i in xrange(1,n1+1):
+                for j in xrange(max(1,i-jshift-self.delta),min(n2,i-jshift+self.delta)+1):
+                    if similarElements[i-1, j-1]:
+                        self.similarityTable[i,j] = self.similarityTable[i-1,j-1]+1
+                    else:
+                        self.similarityTable[i,j] = max(self.similarityTable[i-1,j], self.similarityTable[i,j-1])
+

     def subSequence(self, i, j):
         '''Returns the subsequence of two sequences
@@ -663,12 +691,11 @@

     def _compute(self, _l1, _l2, computeSubSequence = False):
         '''returns the longest common subsequence similarity
-        based on the threshold on distance between two elements of lists l1, l2
-        similarityFunc returns True or False whether the two points are considered similar
+        l1 and l2 should be the right format
+        eg list of tuple points for cdist
+        or elements that can be compare using similarityFunc

         if aligned, returns the best matching if using a finite delta by shifting the series alignments
-
-        eg distance(p1, p2) < epsilon
         '''
         if len(_l2) < len(_l1): # l1 is the shortest
             l1 = _l2