changeset 370:97e8fa0ee9a1

work in progress for complete alignment
author Nicolas Saunier <nicolas.saunier@polymtl.ca>
date Mon, 15 Jul 2013 18:18:44 -0400
parents 027e254f0b53
children 924e38c9f70e
files python/indicators.py python/utils.py
diffstat 2 files changed, 125 insertions(+), 109 deletions(-) [+]
line wrap: on
line diff
--- a/python/indicators.py	Mon Jul 15 16:47:09 2013 -0400
+++ b/python/indicators.py	Mon Jul 15 18:18:44 2013 -0400
@@ -110,21 +110,21 @@
     def __init__(self, threshold, delta = float('inf'), aligned = False, lengthFunc = min):
         utilsLCSS.__init__(self, lambda x,y: (distanceForLCSS(x,y) <= threshold), delta, aligned, lengthFunc)
 
-    def get(self, indicator1, indicator2):
+    def compute(self, indicator1, indicator2):
         if indicator1 and indicator2:
-            return self.compute(indicator1.getValues(), indicator2.getValues())
+            return self._compute(indicator1.getValues(), indicator2.getValues())
         else:
             return 0
 
-    def getNormalized(self, indicator1, indicator2):
+    def computeNormalized(self, indicator1, indicator2):
         if indicator1 and indicator2:
-            return self.computeNormalized(indicator1.getValues(), indicator2.getValues())
+            return self._computeNormalized(indicator1.getValues(), indicator2.getValues())
         else:
             return 0.
 
-    def getDistance(self, indicator1, indicator2):
+    def computeDistance(self, indicator1, indicator2):
         if indicator1 and indicator2:
-            return self.computeDistance(indicator1.getValues(), indicator2.getValues())
+            return self._computeDistance(indicator1.getValues(), indicator2.getValues())
         else:
             return 1.
         
--- a/python/utils.py	Mon Jul 15 16:47:09 2013 -0400
+++ b/python/utils.py	Mon Jul 15 18:18:44 2013 -0400
@@ -138,111 +138,11 @@
 
 
 #########################
-# sequence section
+# maths section
 #########################
 
-class LCSS:
-    '''Class that keeps the LCSS parameters
-    and puts together the various computations'''
-    def __init__(self, similarityFunc, delta = float('inf'), aligned = False, lengthFunc = min):
-        self.similarityFunc = similarityFunc
-        self.aligned = aligned
-        self.delta = delta
-        self.lengthFunc = lengthFunc
-
-    def similarities(self, l1, l2):
-        from numpy import zeros, int as npint
-        n1 = len(l1)
-        n2 = len(l2)
-        self.similarityTable = zeros((n1+1,n2+1), dtype = npint)
-        for i in xrange(1,n1+1):
-            for j in xrange(max(1,i-self.delta),min(n2+1,i+self.delta+1)):
-                if self.similarityFunc(l1[i-1], l2[j-1]):
-                    self.similarityTable[i,j] = self.similarityTable[i-1,j-1]+1
-                else:
-                    self.similarityTable[i,j] = max(self.similarityTable[i-1,j], self.similarityTable[i,j-1])
-
-    def subSequence(self, i, j):
-        '''Returns the subsequence of two sequences
-        http://en.wikipedia.org/wiki/Longest_common_subsequence_problem'''
-        if i == 0 or j == 0:
-            return []
-        elif self.similarityTable[i][j] == self.similarityTable[i][j-1]:
-            return self.subSequence(i, j-1)
-        elif self.similarityTable[i][j] == self.similarityTable[i-1][j]:
-            return self.subSequence(i-1, j)
-        else:
-            return self.subSequence(i-1, j-1) + [(i-1,j-1)]
-
-    def computeLCSS(self, l1, l2, computeSubSequence = False):
-        '''returns the longest common subsequence similarity
-        based on the threshold on distance between two elements of lists l1, l2
-        similarityFunc returns True or False whether the two points are considered similar
-
-        eg distance(p1, p2) < epsilon
-        '''
-        from numpy import argmax
-        self.similarities(l1, l2)
-        imax = argmax(self.similarityTable[:,-1])
-        jmax = argmax(self.similarityTable[-1,:])
-        if computeSubSequence:
-            if self.similarityTable[imax, -1] > self.similarityTable[-1, jmax]:
-                lcss = self.similarityTable[imax, -1]
-                self.subSequenceIndices = self.subSequence(imax, len(l2))
-            else:
-                lcss = self.similarityTable[imax, -1]
-                self.subSequenceIndices = self.subSequence(len(l1), jmax)
-            return lcss
-        else:
-            return max(self.similarityTable[imax, -1], self.similarityTable[-1, jmax])
-
-    def compute(self, _l1, _l2, computeSubSequence = False):
-        '''returns the best matching if using a finite delta by shiftinig the series alignments'''
-        if self.aligned:
-            if len(_l2) < len(_l1): # l1 is the shortest
-                l1 = _l2
-                l2 = _l1
-            else:
-                l1 = _l1
-                l2 = _l2
-            n1 = len(l1)
-            n2 = len(l2)
-            # for i in xrange(min(delta,n1), max(n1+n2-delta, n2+1)): # i is the alignment of the end of l1 in l2
-            #     print l1[min(-i-1,n1):] # min(n1+n2-i,n1)
-            #     print l2[max(0,i-n1):]
-            #     print LCSS(l1[min(-i-1,n1):], l2[max(0,i-n1):], similarityFunc, delta)
-            lcss = [self.computeLCSS(l1[min(-i-1,n1):], l2[max(0,i-n1):]) for i in xrange(min(self.delta,n1), max(n1+n2-self.delta, n2+1))]
-            return max(lcss)
-        else:
-            return self.computeLCSS(_l1, _l2, computeSubSequence)
-
-    def get(self, l1, l2, computeSubSequence = False):
-        '''get methods are to be shadowed in child classes '''
-        return self.compute(l1, l2, computeSubSequence)
-
-    def computeAlignement(self):
-        from numpy import mean
-        return mean([j-i for i,j in self.subSequenceIndices])
-
-    def computeNormalized(self, l1, l2):
-        ''' compute the normalized LCSS
-        ie, the LCSS divided by the min or mean of the indicator lengths (using lengthFunc)
-        lengthFunc = lambda x,y:float(x,y)/2'''
-        return float(self.compute(l1, l2))/self.lengthFunc(len(l1), len(l2))
-
-    def getNormalized(self, l1, l2):
-        return self.computeNormalized(l1, l2)
-
-    def computeDistance(self, l1, l2):
-        ''' compute the LCSS distance'''
-        return 1-self.computeNormalized(l1, l2)
-
-    def getDistance(self, l1, l2):
-        return self.computeDistance(l1, l2)
-    
-#########################
-# maths section
-#########################
+def argMaxDict(d):
+    return max(d.iterkeys(), key=(lambda key: d[key]))
 
 def framesToTime(nFrames, frameRate, initialTime = (0.,0.,0.)):
     'returns hour, minutes and seconds'
@@ -305,6 +205,122 @@
     return coef
 
 #########################
+# sequence section
+#########################
+
+class LCSS:
+    '''Class that keeps the LCSS parameters
+    and puts together the various computations'''
+    def __init__(self, similarityFunc, delta = float('inf'), aligned = False, lengthFunc = min):
+        self.similarityFunc = similarityFunc
+        self.aligned = aligned
+        self.delta = delta
+        self.lengthFunc = lengthFunc
+        self.alignmentShift = 0
+
+    def similarities(self, l1, l2):
+        from numpy import zeros, int as npint
+        n1 = len(l1)
+        n2 = len(l2)
+        self.similarityTable = zeros((n1+1,n2+1), dtype = npint)
+        for i in xrange(1,n1+1):
+            for j in xrange(max(1,i-self.delta),min(n2+1,i+self.delta+1)):
+                if self.similarityFunc(l1[i-1], l2[j-1]):
+                    self.similarityTable[i,j] = self.similarityTable[i-1,j-1]+1
+                else:
+                    self.similarityTable[i,j] = max(self.similarityTable[i-1,j], self.similarityTable[i,j-1])
+
+    def subSequence(self, i, j):
+        '''Returns the subsequence of two sequences
+        http://en.wikipedia.org/wiki/Longest_common_subsequence_problem'''
+        if i == 0 or j == 0:
+            return []
+        elif self.similarityTable[i][j] == self.similarityTable[i][j-1]:
+            return self.subSequence(i, j-1)
+        elif self.similarityTable[i][j] == self.similarityTable[i-1][j]:
+            return self.subSequence(i-1, j)
+        else:
+            return self.subSequence(i-1, j-1) + [(i-1,j-1)]
+
+    def computeLCSS(self, l1, l2, computeSubSequence = False):
+        '''returns the longest common subsequence similarity
+        based on the threshold on distance between two elements of lists l1, l2
+        similarityFunc returns True or False whether the two points are considered similar
+
+        eg distance(p1, p2) < epsilon
+        '''
+        from numpy import argmax
+        self.similarities(l1, l2)
+        imax = argmax(self.similarityTable[:,-1])
+        jmax = argmax(self.similarityTable[-1,:])
+        if computeSubSequence:
+            if self.similarityTable[imax, -1] > self.similarityTable[-1, jmax]:
+                self.similarityTable = self.similarityTable[:imax+1, :]
+                self.subSequenceIndices = self.subSequence(imax, len(l2))
+            else:
+                self.similarityTable = self.similarityTable[:, :jmax+1]
+                self.subSequenceIndices = self.subSequence(len(l1), jmax)
+            return self.similarityTable[-1,-1]
+        else:
+            return max(self.similarityTable[imax, -1], self.similarityTable[-1, jmax])
+
+    def _compute(self, _l1, _l2, computeSubSequence = False):
+        '''returns the best matching if using a finite delta by shiftinig the series alignments'''
+        if self.aligned:
+            from numpy import argmax
+            if len(_l2) < len(_l1): # l1 is the shortest
+                l1 = _l2
+                l2 = _l1
+            else:
+                l1 = _l1
+                l2 = _l2
+            n1 = len(l1)
+            n2 = len(l2)
+            # for i in xrange(min(delta,n1), max(n1+n2-delta, n2+1)): # i is the alignment of the end of l1 in l2
+            #     print l1[min(-i-1,n1):] # min(n1+n2-i,n1)
+            #     print l2[max(0,i-n1):]
+            #     print LCSS(l1[min(-i-1,n1):], l2[max(0,i-n1):], similarityFunc, delta)
+            lcssValues = {}
+            similarityTables = {}
+            for i in xrange(min(self.delta,n1), max(n1+n2-self.delta, n2+1), max(1,self.delta)):
+                print l1[min(-i-1,n1):] # min(n1+n2-i,n1)
+                print l2[max(0,i-n1):]
+                lcssValues[i] = self.computeLCSS(l1[min(-i-1,n1):], l2[max(0,i-n1):])
+                print i, lcssValues[i]
+                similarityTables[i] = self.similarityTable
+            imax = argMaxDict(lcssValues)
+            self.similarityTable = similarityTables[imax]
+            self.subSequenceIndices = self.subSequence(self.similarityTable.shape[0]-1, self.similarityTable.shape[1]-1)
+            self.alignmentShift = max(0,imax-n1)-min(-imax-1,n1)
+            return lcssValues[imax]
+        else:
+            return self.computeLCSS(_l1, _l2, computeSubSequence)
+
+    def compute(self, l1, l2, computeSubSequence = False):
+        '''get methods are to be shadowed in child classes '''
+        return self._compute(l1, l2, computeSubSequence)
+
+    def computeAlignement(self):
+        from numpy import mean
+        return mean([j-i for i,j in self.subSequenceIndices])+self.alignmentShift
+
+    def _computeNormalized(self, l1, l2):
+        ''' compute the normalized LCSS
+        ie, the LCSS divided by the min or mean of the indicator lengths (using lengthFunc)
+        lengthFunc = lambda x,y:float(x,y)/2'''
+        return float(self.compute(l1, l2))/self.lengthFunc(len(l1), len(l2))
+
+    def computeNormalized(self, l1, l2):
+        return self._computeNormalized(l1, l2)
+
+    def _computeDistance(self, l1, l2):
+        ''' compute the LCSS distance'''
+        return 1-self.computeNormalized(l1, l2)
+
+    def computeDistance(self, l1, l2):
+        return self._computeDistance(l1, l2)
+    
+#########################
 # plotting section
 #########################