diff python/utils.py @ 689:9990ef119bce dev

added version of LCSS with cdist computations
author Nicolas Saunier <nicolas.saunier@polymtl.ca>
date Mon, 29 Jun 2015 08:35:27 -0400
parents de278c5e65f6
children 8d99a9e16644
line wrap: on
line diff
--- a/python/utils.py	Fri Jun 26 23:49:44 2015 -0400
+++ b/python/utils.py	Mon Jun 29 08:35:27 2015 -0400
@@ -6,6 +6,7 @@
 from datetime import time, datetime
 from math import sqrt, ceil, floor
 from scipy.stats import kruskal, shapiro
+from scipy.spatial import distance
 from numpy import zeros, array, exp, sum as npsum, int as npint, arange, cumsum, median, isnan, ones, convolve,  dtype, isnan, NaN, mean, ma
 
 
@@ -631,23 +632,50 @@
     the methods with names starting with _ are not to be shadowed
     in child classes, who will shadow the other methods, 
     ie compute and computeXX methods'''
-    def __init__(self, similarityFunc, delta = float('inf'), aligned = False, lengthFunc = min):
-        self.similarityFunc = similarityFunc
-        self.aligned = aligned
-        self.delta = delta
-        self.lengthFunc = lengthFunc
-        self.subSequenceIndices = [(0,0)]
+    def __init__(self, similarityFunc = None, metric = None, epsilon = None, delta = float('inf'), aligned = False, lengthFunc = min):
+        '''One should provide either a similarity function
+        that indicates (return bool) whether elements in the compares lists are similar
+
+        eg distance(p1, p2) < epsilon
+        
+        or a type of metric usable in scipy.spatial.distance.cdist with an epsilon'''
+        if similarityFunc is None and metric is None:
+            print("No way to compute LCSS, similarityFunc and metric are None. Exiting")
+            import sys
+            sys.exit()
+        elif metric is not None and epsilon is None:
+            print("Please provide a value for epsilon if using a cdist metric. Exiting")
+            import sys
+            sys.exit()
+        else:
+            self.similarityFunc = similarityFunc
+            self.metric = metric
+            self.epsilon = epsilon
+            self.aligned = aligned
+            self.delta = delta
+            self.lengthFunc = lengthFunc
+            self.subSequenceIndices = [(0,0)]
 
     def similarities(self, l1, l2, jshift=0):
         n1 = len(l1)
         n2 = len(l2)
         self.similarityTable = zeros((n1+1,n2+1), dtype = npint)
-        for i in xrange(1,n1+1):
-            for j in xrange(max(1,i-jshift-self.delta),min(n2,i-jshift+self.delta)+1):
-                if self.similarityFunc(l1[i-1], l2[j-1]):
-                    self.similarityTable[i,j] = self.similarityTable[i-1,j-1]+1
-                else:
-                    self.similarityTable[i,j] = max(self.similarityTable[i-1,j], self.similarityTable[i,j-1])
+        if self.similarityFunc is not None:
+            for i in xrange(1,n1+1):
+                for j in xrange(max(1,i-jshift-self.delta),min(n2,i-jshift+self.delta)+1):
+                    if self.similarityFunc(l1[i-1], l2[j-1]):
+                        self.similarityTable[i,j] = self.similarityTable[i-1,j-1]+1
+                    else:
+                        self.similarityTable[i,j] = max(self.similarityTable[i-1,j], self.similarityTable[i,j-1])
+        elif self.metric is not None:
+            similarElements = distance.cdist(l1, l2, self.metric) <= self.epsilon
+            for i in xrange(1,n1+1):
+                for j in xrange(max(1,i-jshift-self.delta),min(n2,i-jshift+self.delta)+1):
+                    if similarElements[i-1, j-1]:
+                        self.similarityTable[i,j] = self.similarityTable[i-1,j-1]+1
+                    else:
+                        self.similarityTable[i,j] = max(self.similarityTable[i-1,j], self.similarityTable[i,j-1])
+            
 
     def subSequence(self, i, j):
         '''Returns the subsequence of two sequences
@@ -663,12 +691,11 @@
 
     def _compute(self, _l1, _l2, computeSubSequence = False):
         '''returns the longest common subsequence similarity
-        based on the threshold on distance between two elements of lists l1, l2
-        similarityFunc returns True or False whether the two points are considered similar
+        l1 and l2 should be the right format
+        eg list of tuple points for cdist 
+        or elements that can be compare using similarityFunc
 
         if aligned, returns the best matching if using a finite delta by shifting the series alignments
-
-        eg distance(p1, p2) < epsilon
         '''
         if len(_l2) < len(_l1): # l1 is the shortest
             l1 = _l2