Mercurial Hosting > traffic-intelligence

--- a/python/tests/utils.txt	Tue Jul 16 17:45:07 2013 -0400
+++ b/python/tests/utils.txt	Wed Jul 17 00:50:44 2013 -0400
@@ -106,3 +106,8 @@
 >>> lcss.delta = 10
 >>> alignedLcss.compute(range(1,7), range(6))
 5
+>>> lcss = LCSS(lambda x,y: x == y, delta = 2, aligned = True)
+>>> lcss.compute(range(20), [2,4,6,7,8,9,11,13], True)
+8
+>>> lcss.subSequenceIndices
+[(2, 0), (4, 1), (6, 2), (7, 3), (8, 4), (9, 5), (11, 6), (13, 7)]
--- a/python/utils.py	Tue Jul 16 17:45:07 2013 -0400
+++ b/python/utils.py	Wed Jul 17 00:50:44 2013 -0400
@@ -224,8 +224,7 @@
         n2 = len(l2)
         self.similarityTable = zeros((n1+1,n2+1), dtype = npint)
         for i in xrange(1,n1+1):
-            for j in xrange(max(1,i-jshift-self.delta),min(n2+1,i-jshift+self.delta+1)):
-                print max(1,i-jshift-self.delta),min(n2+1,i-jshift+self.delta+1)
+            for j in xrange(max(1,i-jshift-self.delta),min(n2,i-jshift+self.delta)+1):
                 if self.similarityFunc(l1[i-1], l2[j-1]):
                     self.similarityTable[i,j] = self.similarityTable[i-1,j-1]+1
                 else:
@@ -243,20 +242,6 @@
         else:
             return self.subSequence(i-1, j-1) + [(i-1,j-1)]

-    # def computeLCSS(self, l1, l2, computeSubSequence = False):
-    #     '''returns the longest common subsequence similarity
-    #     based on the threshold on distance between two elements of lists l1, l2
-    #     similarityFunc returns True or False whether the two points are considered similar
-
-    #     eg distance(p1, p2) < epsilon
-    #     '''
-    #     #from numpy import argmax
-    #     self.similarities(l1, l2)
-    #     #self.similarityTable = self.similarityTable[:, :min(len(l2), len(l1)+self.delta)+1]
-    #     if computeSubSequence:
-    #         self.subSequenceIndices = self.subSequence(len(l1), len(l2))
-    #     return self.similarityTable.max()
-
     def _compute(self, _l1, _l2, computeSubSequence = False):
         '''returns the longest common subsequence similarity
         based on the threshold on distance between two elements of lists l1, l2
@@ -280,29 +265,24 @@
         if self.aligned:
             lcssValues = {}
             similarityTables = {}
-            for i in xrange(-n1-self.delta-1, +max(n1,n2)+self.delta): # larger interval
-                #print l1[min(-i-1,n1):] # min(n1+n2-i,n1)
-                #print l2[max(0,i-n1):]
-                #lcssValues[i] = self.computeLCSS(l1[min(-i-1,n1):], l2[max(0,i-n1):])
-                #print i, lcssValues[i]
+            for i in xrange(-n2-self.delta+1, n1+self.delta): # interval such that [i-shift-delta, i-shift+delta] is never empty, which happens when i-shift+delta < 1 or when i-shift-delta > n2
                 self.similarities(l1, l2, i)
                 lcssValues[i] = self.similarityTable.max()
                 similarityTables[i] = self.similarityTable
-                print self.similarityTable
+                #print self.similarityTable
             self.alignmentShift = argMaxDict(lcssValues) # ideally get the medium alignment shift, the one that minimizes distance
             self.similarityTable = similarityTables[self.alignmentShift]
         else:
             self.alignmentShift = 0
             self.similarities(l1, l2)

-        self.similarityTable = self.similarityTable[:, :min(len(l2), len(l1)+self.delta-self.alignmentShift)+1]
+        # threshold values for the useful part of the similarity table are n2-n1-delta and n1-n2-delta
+        self.similarityTable = self.similarityTable[:min(n1, n2+self.alignmentShift+self.delta)+1, :min(n2, n1-self.alignmentShift+self.delta)+1]

         if computeSubSequence:
             self.subSequenceIndices = self.subSequence(self.similarityTable.shape[0]-1, self.similarityTable.shape[1]-1)
             if revertIndices:
-                self.subSequenceIndices = [(j+self.alignmentShift,i) for i,j in self.subSequenceIndices]
-            else:
-                self.subSequenceIndices = [(i+self.alignmentShift,j) for i,j in self.subSequenceIndices]
+                self.subSequenceIndices = [(j,i) for i,j in self.subSequenceIndices]
         return self.similarityTable[-1,-1]

     def compute(self, l1, l2, computeSubSequence = False):
@@ -311,7 +291,7 @@

     def computeAlignement(self):
         from numpy import mean
-        return mean([j-i for i,j in self.subSequenceIndices])+self.alignmentShift
+        return mean([j-i for i,j in self.subSequenceIndices])

     def _computeNormalized(self, l1, l2):
         ''' compute the normalized LCSS