changeset 368:2db4e76599a1

implemented subsequence extraction and rearranged arguments
author Nicolas Saunier <nicolas.saunier@polymtl.ca>
date Mon, 15 Jul 2013 15:08:53 -0400
parents d44eba0db517
children 027e254f0b53
files python/indicators.py python/moving.py python/tests/utils.txt python/utils.py
diffstat 4 files changed, 73 insertions(+), 45 deletions(-) [+]
line wrap: on
line diff
--- a/python/indicators.py	Mon Jul 15 12:13:08 2013 -0400
+++ b/python/indicators.py	Mon Jul 15 15:08:53 2013 -0400
@@ -96,21 +96,6 @@
             values.append(self.values[key]) 
         return values
 
-    @staticmethod
-    def getDLCSS(indic1, indic2, threshold, delta = float('inf') , method ='min' ):
-	''' compute the distance between two indicators using LCSS
-	two common methods are used: min or mean of the indicators length'''
-        print('Deprecated: this is not appropriate method for indicator comparison')
-        l1 = indic1.valueSorted
-        l2 = indic2.valueSorted
-        DLCSS = None
-        if method == 'min':
-            DLCSS = 1- (LCSS(l1,l2, threshold, delta, distance))/min(len(l1),len(l2))
-        if method == 'mean':
-            average = len(l1)+len(l2)/2
-            DLCSS = 1- ((LCSS(l1,l2, threshold, delta, distance))/average)
-        return DLCSS
-		
 
 def distanceForLCSS(x, y): # lambda x,y:abs(x-y)
     if x == None or y == None:
@@ -123,7 +108,7 @@
     ''' compute the LCSS between two indicators using LCSS'''
     from utils import LCSS
     if indicator1 and indicator2:
-        return LCSS(indicator1.getValues(), indicator2.getValues(), threshold, distanceForLCSS, delta)
+        return LCSS(indicator1.getValues(), indicator2.getValues(), lambda x,y: (distanceForLCSS(x,y) <= threshold), delta)
     else:
         return 0
 
@@ -132,21 +117,24 @@
     ie, the LCSS divided by the min or mean of the indicator lengths'''
     from utils import normalizedLCSS
     if indicator1 and indicator2:
-        return normalizedLCSS(indicator1.getValues(), indicator2.getValues(), threshold, distanceForLCSS, delta, method)
+        return normalizedLCSS(indicator1.getValues(), indicator2.getValues(), lambda x,y: (distanceForLCSS(x,y) <= threshold), delta, method)
     else:
         return 0.
 
 def computeDLCSS(indicator1, indicator2, threshold, delta = float('inf'), method = min):
     ''' compute the LCSS distance between two indicators using LCSS'''
     from utils import DLCSS
-    return DLCSS(indicator1.getValues(), indicator2.getValues(), threshold, distanceForLCSS, delta, method)
+    if indicator1 and indicator2:
+        return DLCSS(indicator1.getValues(), indicator2.getValues(), lambda x,y: (distanceForLCSS(x,y) <= threshold), delta, method)
+    else:
+        return 1.
 
 # aligned LCSS computations
 def computeAlignedLCSS(indicator1, indicator2, threshold, delta = float('inf')):
     ''' compute the aligned LCSS between two indicators using LCSS'''
     from utils import alignedLCSS
     if indicator1 and indicator2:
-        return alignedLCSS(indicator1.getValues(), indicator2.getValues(), threshold, distanceForLCSS, delta)
+        return alignedLCSS(indicator1.getValues(), indicator2.getValues(), lambda x,y: (distanceForLCSS(x,y) <= threshold), delta)
     else:
         return 0
 
@@ -155,14 +143,17 @@
     ie, the LCSS divided by the min or mean of the indicator lengths'''
     from utils import normalizedAlignedLCSS
     if indicator1 and indicator2:
-        return normalizedAlignedLCSS(indicator1.getValues(), indicator2.getValues(), threshold, distanceForLCSS, delta, method)
+        return normalizedAlignedLCSS(indicator1.getValues(), indicator2.getValues(), lambda x,y: (distanceForLCSS(x,y) <= threshold), delta, method)
     else:
         return 0.
 
 def computeAlignedDLCSS(indicator1, indicator2, threshold, delta = float('inf'), method = min):
     ''' compute the aligned LCSS distance between two indicators using LCSS'''
     from utils import alignedDLCSS
-    return alignedDLCSS(indicator1.getValues(), indicator2.getValues(), threshold, distanceForLCSS, delta, method)
+    if indicator1 and indicator2:
+        return alignedDLCSS(indicator1.getValues(), indicator2.getValues(), lambda x,y: (distanceForLCSS(x,y) <= threshold), delta, method)
+    else:
+        return 1.
 
 class SeverityIndicator(TemporalIndicator):
     '''Class for severity indicators 
--- a/python/moving.py	Mon Jul 15 12:13:08 2013 -0400
+++ b/python/moving.py	Mon Jul 15 15:08:53 2013 -0400
@@ -540,11 +540,11 @@
 
     @staticmethod
     def norm2LCSS(t1, t2, threshold):
-        return utils.LCSS(t1, t2, threshold, Point.distanceNorm2)
+        return utils.LCSS(t1, t2, lambda x,y: Point.distanceNorm2(x,y) <= threshold)
 
     @staticmethod
     def normMaxLCSS(t1, t2, threshold):
-        return utils.LCSS(t1, t2, threshold, lambda p1, p2: (p1-p2).normMax())
+        return utils.LCSS(t1, t2, lambda p1, p2: (p1-p2).normMax() <= threshold)
 
 class CurvilinearTrajectory(Trajectory):
     '''Sub class of trajectory for trajectories with curvilinear coordinates and lane assignements
--- a/python/tests/utils.txt	Mon Jul 15 12:13:08 2013 -0400
+++ b/python/tests/utils.txt	Mon Jul 15 15:08:53 2013 -0400
@@ -62,6 +62,15 @@
 3
 >>> LCSS(['a','b','c','x','d'], ['a','b','c','d','x'], lambda x,y: x == y)
 4
+>>> LCSS(['a','b','c'], ['a','b','x','x','c'], lambda x,y: x == y, 1)
+2
+
+>>> LCSS(['a','b','c'], ['a','b','c', 'd'], lambda x,y: x == y, returnSubSequence = True)
+(3, [(0, 0), (1, 1), (2, 2)])
+>>> LCSS(['a','b','c'], ['x','a','b','c'], lambda x,y: x == y, returnSubSequence = True)
+(3, [(0, 1), (1, 2), (2, 3)])
+>>> LCSS(['a','g','b','c'], ['a','b','c', 'd'], lambda x,y: x == y, returnSubSequence = True)
+(3, [(0, 0), (2, 1), (3, 2)])
 
 >>> alignedLCSS(range(5), range(5), lambda x,y:(abs(x-y) <= 0.1), 2)
 5
--- a/python/utils.py	Mon Jul 15 12:13:08 2013 -0400
+++ b/python/utils.py	Mon Jul 15 15:08:53 2013 -0400
@@ -141,34 +141,62 @@
 # sequence section
 #########################
 
-def LCSS(l1, l2, similarityFunc, delta = float('inf')):
+def sequenceSimilarities(l1, l2, similarityFunc, delta = float('inf')):
+    from numpy import zeros, int as npint
+    n1 = len(l1)
+    n2 = len(l2)
+    similarities = zeros((n1+1,n2+1), dtype = npint)
+    for i in xrange(1,n1+1):
+        for j in xrange(max(1,i-delta),min(n2+1,i+delta+1)):
+            if similarityFunc(l1[i-1], l2[j-1]):
+                similarities[i,j] = similarities[i-1,j-1]+1
+            else:
+                similarities[i,j] = max(similarities[i-1,j], similarities[i,j-1])
+    return similarities
+
+def subSequence(similarities, i, j):
+    '''Returns the subsequence of two sequences
+    http://en.wikipedia.org/wiki/Longest_common_subsequence_problem'''
+    if i == 0 or j == 0:
+        return []
+    elif similarities[i][j] == similarities[i][j-1]:
+        return subSequence(similarities, i, j-1)
+    elif similarities[i][j] == similarities[i-1][j]:
+        return subSequence(similarities, i-1, j)
+    else:
+        return subSequence(similarities, i-1, j-1) + [(i-1,j-1)]
+
+def LCSS(l1, l2, similarityFunc, delta = float('inf'), returnSubSequence = False):
     '''returns the longest common subsequence similarity
     based on the threshold on distance between two elements of lists l1, l2
     similarityFunc returns True or False whether the two points are considered similar
 
     eg distance(p1, p2) < epsilon
     '''
-    from numpy import zeros, int as npint
-    n1 = len(l1)
-    n2 = len(l2)
-    similarity = zeros((n1+1,n2+1), dtype = npint)
-    for i in xrange(1,n1+1):
-        for j in xrange(max(1,i-delta),min(n2+1,i+delta+1)):
-            if similarityFunc(l1[i-1], l2[j-1]):
-                similarity[i][j] = similarity[i-1][j-1]+1
-            else:
-                similarity[i][j] = max(similarity[i-1][j], similarity[i][j-1])
-    return max(max(similarity[:,-1]), max(similarity[-1,:]))
+    from numpy import argmax
+    similarities = sequenceSimilarities(l1, l2, similarityFunc, delta)
+    imax = argmax(similarities[:,-1])
+    jmax = argmax(similarities[-1,:])
+    if returnSubSequence:
+        if similarities[imax, -1] > similarities[-1, jmax]:
+            lcss = similarities[imax, -1]
+            subseq = subSequence(similarities, imax, len(l2))
+        else:
+            lcss = similarities[-1, jmax]
+            subseq = subSequence(similarities, len(l1), jmax)
+        return lcss, subseq
+    else:
+        return max(similarities[imax, -1], similarities[-1, jmax])
 
-def normalizedLCSS(l1, l2, similarityFunc, delta = float('inf'), lengthMethod = min):
+def normalizedLCSS(l1, l2, similarityFunc, delta = float('inf'), lengthFunc = min):
     ''' compute the normalized LCSS
-    ie, the LCSS divided by the min or mean of the indicator lengths (using lengthMethod)
-    lengthMethod = lambda x,y:float(x,y)/2'''
-    return float(LCSS(l1, l2, similarityFunc, delta))/lengthMethod(len(l1), len(l2))
+    ie, the LCSS divided by the min or mean of the indicator lengths (using lengthFunc)
+    lengthFunc = lambda x,y:float(x,y)/2'''
+    return float(LCSS(l1, l2, similarityFunc, delta))/lengthFunc(len(l1), len(l2))
 
-def DLCSS(l1, l2, similarityFunc, delta = float('inf'), lengthMethod = min):
+def DLCSS(l1, l2, similarityFunc, delta = float('inf'), lengthFunc = min):
     ''' compute the LCSS distance'''
-    return 1-normalizedLCSS(l1, l2, similarityFunc, delta, lengthMethod)
+    return 1-normalizedLCSS(l1, l2, similarityFunc, delta, lengthFunc)
 
 def alignedLCSS(_l1, _l2, similarityFunc, delta):
     '''returns the best matching if using a finite delta by shiftinig the series alignments'''
@@ -187,11 +215,11 @@
     lcss = [LCSS(l1[min(-i-1,n1):], l2[max(0,i-n1):], similarityFunc, delta) for i in xrange(min(delta,n1), max(n1+n2-delta, n2+1))]
     return max(lcss)
 
-def normalizedAlignedLCSS(l1, l2, similarityFunc, delta, lengthMethod = min):
-    return float(alignedLCSS(l1, l2, similarityFunc, delta))/lengthMethod(len(l1), len(l2))
+def normalizedAlignedLCSS(l1, l2, similarityFunc, delta, lengthFunc = min):
+    return float(alignedLCSS(l1, l2, similarityFunc, delta))/lengthFunc(len(l1), len(l2))
 
-def alignedDLCSS(l1, l2, similarityFunc, delta, lengthMethod = min):
-    return 1-normalizedAlignedLCSS(l1, l2, similarityFunc, delta, lengthMethod)
+def alignedDLCSS(l1, l2, similarityFunc, delta, lengthFunc = min):
+    return 1-normalizedAlignedLCSS(l1, l2, similarityFunc, delta, lengthFunc)
 
 #########################
 # maths section