repo/traffic-intelligence: python/utils.py comparison

comparison python/utils.py @ 374:a7af3519687e

finished implementation of aligned LCSS with matching sequence decoded

author	Nicolas Saunier <nicolas.saunier@polymtl.ca>
date	Wed, 17 Jul 2013 00:50:44 -0400
parents	d0b86ed50f32
children	2ea8584aa80a

comparison

equal deleted inserted replaced

-:d0b86ed50f32
+:a7af3519687e
 from numpy import zeros, int as npint
 n1 = len(l1)
 n2 = len(l2)
 self.similarityTable = zeros((n1+1,n2+1), dtype = npint)
 for i in xrange(1,n1+1):
-for j in xrange(max(1,i-jshift-self.delta),min(n2+1,i-jshift+self.delta+1)):
+for j in xrange(max(1,i-jshift-self.delta),min(n2,i-jshift+self.delta)+1):
-print max(1,i-jshift-self.delta),min(n2+1,i-jshift+self.delta+1)
 if self.similarityFunc(l1[i-1], l2[j-1]):
 self.similarityTable[i,j] = self.similarityTable[i-1,j-1]+1
 else:
 self.similarityTable[i,j] = max(self.similarityTable[i-1,j], self.similarityTable[i,j-1])
 elif self.similarityTable[i][j] == self.similarityTable[i-1][j]:
 return self.subSequence(i-1, j)
 else:
 return self.subSequence(i-1, j-1) + [(i-1,j-1)]
-# def computeLCSS(self, l1, l2, computeSubSequence = False):
-#     '''returns the longest common subsequence similarity
-#     based on the threshold on distance between two elements of lists l1, l2
-#     similarityFunc returns True or False whether the two points are considered similar
-#     eg distance(p1, p2) < epsilon
-#     '''
-#     #from numpy import argmax
-#     self.similarities(l1, l2)
-#     #self.similarityTable = self.similarityTable[:, :min(len(l2), len(l1)+self.delta)+1]
-#     if computeSubSequence:
-#         self.subSequenceIndices = self.subSequence(len(l1), len(l2))
-#     return self.similarityTable.max()
 def _compute(self, _l1, _l2, computeSubSequence = False):
 '''returns the longest common subsequence similarity
 based on the threshold on distance between two elements of lists l1, l2
 similarityFunc returns True or False whether the two points are considered similar
 n2 = len(l2)
 if self.aligned:
 lcssValues = {}
 similarityTables = {}
-for i in xrange(-n1-self.delta-1, +max(n1,n2)+self.delta): # larger interval
+for i in xrange(-n2-self.delta+1, n1+self.delta): # interval such that [i-shift-delta, i-shift+delta] is never empty, which happens when i-shift+delta < 1 or when i-shift-delta > n2
-#print l1[min(-i-1,n1):] # min(n1+n2-i,n1)
-#print l2[max(0,i-n1):]
-#lcssValues[i] = self.computeLCSS(l1[min(-i-1,n1):], l2[max(0,i-n1):])
-#print i, lcssValues[i]
 self.similarities(l1, l2, i)
 lcssValues[i] = self.similarityTable.max()
 similarityTables[i] = self.similarityTable
-print self.similarityTable
+#print self.similarityTable
 self.alignmentShift = argMaxDict(lcssValues) # ideally get the medium alignment shift, the one that minimizes distance
 self.similarityTable = similarityTables[self.alignmentShift]
 else:
 self.alignmentShift = 0
 self.similarities(l1, l2)
-self.similarityTable = self.similarityTable[:, :min(len(l2), len(l1)+self.delta-self.alignmentShift)+1]
+# threshold values for the useful part of the similarity table are n2-n1-delta and n1-n2-delta
+self.similarityTable = self.similarityTable[:min(n1, n2+self.alignmentShift+self.delta)+1, :min(n2, n1-self.alignmentShift+self.delta)+1]
 if computeSubSequence:
 self.subSequenceIndices = self.subSequence(self.similarityTable.shape[0]-1, self.similarityTable.shape[1]-1)
 if revertIndices:
-self.subSequenceIndices = [(j+self.alignmentShift,i) for i,j in self.subSequenceIndices]
+self.subSequenceIndices = [(j,i) for i,j in self.subSequenceIndices]
-else:
-self.subSequenceIndices = [(i+self.alignmentShift,j) for i,j in self.subSequenceIndices]
 return self.similarityTable[-1,-1]
 def compute(self, l1, l2, computeSubSequence = False):
 '''get methods are to be shadowed in child classes '''
 return self._compute(l1, l2, computeSubSequence)
 def computeAlignement(self):
 from numpy import mean
-return mean([j-i for i,j in self.subSequenceIndices])+self.alignmentShift
+return mean([j-i for i,j in self.subSequenceIndices])
 def _computeNormalized(self, l1, l2):
 ''' compute the normalized LCSS
 ie, the LCSS divided by the min or mean of the indicator lengths (using lengthFunc)
 lengthFunc = lambda x,y:float(x,y)/2'''

Mercurial Hosting > traffic-intelligence

comparison python/utils.py @ 374:a7af3519687e