diff python/utils.py @ 368:2db4e76599a1

implemented subsequence extraction and rearranged arguments
author Nicolas Saunier <nicolas.saunier@polymtl.ca>
date Mon, 15 Jul 2013 15:08:53 -0400
parents 90bdabc06e9f
children 027e254f0b53
line wrap: on
line diff
--- a/python/utils.py	Mon Jul 15 12:13:08 2013 -0400
+++ b/python/utils.py	Mon Jul 15 15:08:53 2013 -0400
@@ -141,34 +141,62 @@
 # sequence section
 #########################
 
-def LCSS(l1, l2, similarityFunc, delta = float('inf')):
+def sequenceSimilarities(l1, l2, similarityFunc, delta = float('inf')):
+    from numpy import zeros, int as npint
+    n1 = len(l1)
+    n2 = len(l2)
+    similarities = zeros((n1+1,n2+1), dtype = npint)
+    for i in xrange(1,n1+1):
+        for j in xrange(max(1,i-delta),min(n2+1,i+delta+1)):
+            if similarityFunc(l1[i-1], l2[j-1]):
+                similarities[i,j] = similarities[i-1,j-1]+1
+            else:
+                similarities[i,j] = max(similarities[i-1,j], similarities[i,j-1])
+    return similarities
+
+def subSequence(similarities, i, j):
+    '''Returns the subsequence of two sequences
+    http://en.wikipedia.org/wiki/Longest_common_subsequence_problem'''
+    if i == 0 or j == 0:
+        return []
+    elif similarities[i][j] == similarities[i][j-1]:
+        return subSequence(similarities, i, j-1)
+    elif similarities[i][j] == similarities[i-1][j]:
+        return subSequence(similarities, i-1, j)
+    else:
+        return subSequence(similarities, i-1, j-1) + [(i-1,j-1)]
+
+def LCSS(l1, l2, similarityFunc, delta = float('inf'), returnSubSequence = False):
     '''returns the longest common subsequence similarity
     based on the threshold on distance between two elements of lists l1, l2
     similarityFunc returns True or False whether the two points are considered similar
 
     eg distance(p1, p2) < epsilon
     '''
-    from numpy import zeros, int as npint
-    n1 = len(l1)
-    n2 = len(l2)
-    similarity = zeros((n1+1,n2+1), dtype = npint)
-    for i in xrange(1,n1+1):
-        for j in xrange(max(1,i-delta),min(n2+1,i+delta+1)):
-            if similarityFunc(l1[i-1], l2[j-1]):
-                similarity[i][j] = similarity[i-1][j-1]+1
-            else:
-                similarity[i][j] = max(similarity[i-1][j], similarity[i][j-1])
-    return max(max(similarity[:,-1]), max(similarity[-1,:]))
+    from numpy import argmax
+    similarities = sequenceSimilarities(l1, l2, similarityFunc, delta)
+    imax = argmax(similarities[:,-1])
+    jmax = argmax(similarities[-1,:])
+    if returnSubSequence:
+        if similarities[imax, -1] > similarities[-1, jmax]:
+            lcss = similarities[imax, -1]
+            subseq = subSequence(similarities, imax, len(l2))
+        else:
+            lcss = similarities[-1, jmax]
+            subseq = subSequence(similarities, len(l1), jmax)
+        return lcss, subseq
+    else:
+        return max(similarities[imax, -1], similarities[-1, jmax])
 
-def normalizedLCSS(l1, l2, similarityFunc, delta = float('inf'), lengthMethod = min):
+def normalizedLCSS(l1, l2, similarityFunc, delta = float('inf'), lengthFunc = min):
     ''' compute the normalized LCSS
-    ie, the LCSS divided by the min or mean of the indicator lengths (using lengthMethod)
-    lengthMethod = lambda x,y:float(x,y)/2'''
-    return float(LCSS(l1, l2, similarityFunc, delta))/lengthMethod(len(l1), len(l2))
+    ie, the LCSS divided by the min or mean of the indicator lengths (using lengthFunc)
+    lengthFunc = lambda x,y:float(x,y)/2'''
+    return float(LCSS(l1, l2, similarityFunc, delta))/lengthFunc(len(l1), len(l2))
 
-def DLCSS(l1, l2, similarityFunc, delta = float('inf'), lengthMethod = min):
+def DLCSS(l1, l2, similarityFunc, delta = float('inf'), lengthFunc = min):
     ''' compute the LCSS distance'''
-    return 1-normalizedLCSS(l1, l2, similarityFunc, delta, lengthMethod)
+    return 1-normalizedLCSS(l1, l2, similarityFunc, delta, lengthFunc)
 
 def alignedLCSS(_l1, _l2, similarityFunc, delta):
     '''returns the best matching if using a finite delta by shiftinig the series alignments'''
@@ -187,11 +215,11 @@
     lcss = [LCSS(l1[min(-i-1,n1):], l2[max(0,i-n1):], similarityFunc, delta) for i in xrange(min(delta,n1), max(n1+n2-delta, n2+1))]
     return max(lcss)
 
-def normalizedAlignedLCSS(l1, l2, similarityFunc, delta, lengthMethod = min):
-    return float(alignedLCSS(l1, l2, similarityFunc, delta))/lengthMethod(len(l1), len(l2))
+def normalizedAlignedLCSS(l1, l2, similarityFunc, delta, lengthFunc = min):
+    return float(alignedLCSS(l1, l2, similarityFunc, delta))/lengthFunc(len(l1), len(l2))
 
-def alignedDLCSS(l1, l2, similarityFunc, delta, lengthMethod = min):
-    return 1-normalizedAlignedLCSS(l1, l2, similarityFunc, delta, lengthMethod)
+def alignedDLCSS(l1, l2, similarityFunc, delta, lengthFunc = min):
+    return 1-normalizedAlignedLCSS(l1, l2, similarityFunc, delta, lengthFunc)
 
 #########################
 # maths section