Mercurial Hosting > traffic-intelligence

--- a/python/utils.py	Tue Jul 16 17:00:17 2013 -0400
+++ b/python/utils.py	Tue Jul 16 17:45:07 2013 -0400
@@ -218,14 +218,14 @@
         self.lengthFunc = lengthFunc
         self.alignmentShift = 0

-    def similarities(self, l1, l2, shift=0):
+    def similarities(self, l1, l2, jshift=0):
         from numpy import zeros, int as npint
         n1 = len(l1)
         n2 = len(l2)
         self.similarityTable = zeros((n1+1,n2+1), dtype = npint)
         for i in xrange(1,n1+1):
-            for j in xrange(max(1,i-shift-self.delta),min(n2+1,i-shift+self.delta+1)):
-                #print max(1,i-shift-self.delta),min(n2+1,i-shift+self.delta+1)
+            for j in xrange(max(1,i-jshift-self.delta),min(n2+1,i-jshift+self.delta+1)):
+                print max(1,i-jshift-self.delta),min(n2+1,i-jshift+self.delta+1)
                 if self.similarityFunc(l1[i-1], l2[j-1]):
                     self.similarityTable[i,j] = self.similarityTable[i-1,j-1]+1
                 else:
@@ -243,22 +243,29 @@
         else:
             return self.subSequence(i-1, j-1) + [(i-1,j-1)]

-    def computeLCSS(self, l1, l2, computeSubSequence = False):
+    # def computeLCSS(self, l1, l2, computeSubSequence = False):
+    #     '''returns the longest common subsequence similarity
+    #     based on the threshold on distance between two elements of lists l1, l2
+    #     similarityFunc returns True or False whether the two points are considered similar
+
+    #     eg distance(p1, p2) < epsilon
+    #     '''
+    #     #from numpy import argmax
+    #     self.similarities(l1, l2)
+    #     #self.similarityTable = self.similarityTable[:, :min(len(l2), len(l1)+self.delta)+1]
+    #     if computeSubSequence:
+    #         self.subSequenceIndices = self.subSequence(len(l1), len(l2))
+    #     return self.similarityTable.max()
+
+    def _compute(self, _l1, _l2, computeSubSequence = False):
         '''returns the longest common subsequence similarity
         based on the threshold on distance between two elements of lists l1, l2
         similarityFunc returns True or False whether the two points are considered similar

+        if aligned, returns the best matching if using a finite delta by shiftinig the series alignments
+
         eg distance(p1, p2) < epsilon
         '''
-        #from numpy import argmax
-        self.similarities(l1, l2)
-        #self.similarityTable = self.similarityTable[:, :min(len(l2), len(l1)+self.delta)+1]
-        if computeSubSequence:
-            self.subSequenceIndices = self.subSequence(len(l1), len(l2))
-        return self.similarityTable.max()
-
-    def _compute(self, _l1, _l2, computeSubSequence = False):
-        '''returns the best matching if using a finite delta by shiftinig the series alignments'''
         if len(_l2) < len(_l1): # l1 is the shortest
             l1 = _l2
             l2 = _l1
@@ -271,39 +278,31 @@
         n2 = len(l2)

         if self.aligned:
-            # for i in xrange(min(delta,n1), max(n1+n2-delta, n2+1)): # i is the alignment of the end of l1 in l2
-            #     print l1[min(-i-1,n1):] # min(n1+n2-i,n1)
-            #     print l2[max(0,i-n1):]
-            #     print LCSS(l1[min(-i-1,n1):], l2[max(0,i-n1):], similarityFunc, delta)
             lcssValues = {}
             similarityTables = {}
-            #for i in xrange(min(self.delta,n1), max(n1+n2-self.delta, n2+1)):
-            for i in xrange(-max(n1,n2)-self.delta, +max(n1,n2)+self.delta):
+            for i in xrange(-n1-self.delta-1, +max(n1,n2)+self.delta): # larger interval
                 #print l1[min(-i-1,n1):] # min(n1+n2-i,n1)
                 #print l2[max(0,i-n1):]
                 #lcssValues[i] = self.computeLCSS(l1[min(-i-1,n1):], l2[max(0,i-n1):])
                 #print i, lcssValues[i]
-                lcssValues[i] = self.similarities(l1, l2, i)
+                self.similarities(l1, l2, i)
+                lcssValues[i] = self.similarityTable.max()
                 similarityTables[i] = self.similarityTable
-                #print i
                 print self.similarityTable
-            self.alignmentShift = argMaxDict(lcssValues)
+            self.alignmentShift = argMaxDict(lcssValues) # ideally get the medium alignment shift, the one that minimizes distance
             self.similarityTable = similarityTables[self.alignmentShift]
-            # do the subsequence computation here, once similarityTable is set
-            #self.subSequenceIndices = self.subSequence(self.similarityTable.shape[0]-1, self.similarityTable.shape[1]-1)
-            #lcss = lcssValues[imax]
         else:
             self.alignmentShift = 0
             self.similarities(l1, l2)
-        self.similarityTable = self.similarityTable[:, :min(len(l2), len(l1)+self.delta)+1]
+
+        self.similarityTable = self.similarityTable[:, :min(len(l2), len(l1)+self.delta-self.alignmentShift)+1]
+
         if computeSubSequence:
             self.subSequenceIndices = self.subSequence(self.similarityTable.shape[0]-1, self.similarityTable.shape[1]-1)
             if revertIndices:
                 self.subSequenceIndices = [(j+self.alignmentShift,i) for i,j in self.subSequenceIndices]
-            #self.alignmentShift = imax-n1
             else:
                 self.subSequenceIndices = [(i+self.alignmentShift,j) for i,j in self.subSequenceIndices]
-            #self.alignmentShift = n1-imax
         return self.similarityTable[-1,-1]

     def compute(self, l1, l2, computeSubSequence = False):