Mercurial Hosting > traffic-intelligence
changeset 374:a7af3519687e
finished implementation of aligned LCSS with matching sequence decoded
author | Nicolas Saunier <nicolas.saunier@polymtl.ca> |
---|---|
date | Wed, 17 Jul 2013 00:50:44 -0400 |
parents | d0b86ed50f32 |
children | 2ea8584aa80a |
files | python/tests/utils.txt python/utils.py |
diffstat | 2 files changed, 12 insertions(+), 27 deletions(-) [+] |
line wrap: on
line diff
--- a/python/tests/utils.txt Tue Jul 16 17:45:07 2013 -0400 +++ b/python/tests/utils.txt Wed Jul 17 00:50:44 2013 -0400 @@ -106,3 +106,8 @@ >>> lcss.delta = 10 >>> alignedLcss.compute(range(1,7), range(6)) 5 +>>> lcss = LCSS(lambda x,y: x == y, delta = 2, aligned = True) +>>> lcss.compute(range(20), [2,4,6,7,8,9,11,13], True) +8 +>>> lcss.subSequenceIndices +[(2, 0), (4, 1), (6, 2), (7, 3), (8, 4), (9, 5), (11, 6), (13, 7)]
--- a/python/utils.py Tue Jul 16 17:45:07 2013 -0400 +++ b/python/utils.py Wed Jul 17 00:50:44 2013 -0400 @@ -224,8 +224,7 @@ n2 = len(l2) self.similarityTable = zeros((n1+1,n2+1), dtype = npint) for i in xrange(1,n1+1): - for j in xrange(max(1,i-jshift-self.delta),min(n2+1,i-jshift+self.delta+1)): - print max(1,i-jshift-self.delta),min(n2+1,i-jshift+self.delta+1) + for j in xrange(max(1,i-jshift-self.delta),min(n2,i-jshift+self.delta)+1): if self.similarityFunc(l1[i-1], l2[j-1]): self.similarityTable[i,j] = self.similarityTable[i-1,j-1]+1 else: @@ -243,20 +242,6 @@ else: return self.subSequence(i-1, j-1) + [(i-1,j-1)] - # def computeLCSS(self, l1, l2, computeSubSequence = False): - # '''returns the longest common subsequence similarity - # based on the threshold on distance between two elements of lists l1, l2 - # similarityFunc returns True or False whether the two points are considered similar - - # eg distance(p1, p2) < epsilon - # ''' - # #from numpy import argmax - # self.similarities(l1, l2) - # #self.similarityTable = self.similarityTable[:, :min(len(l2), len(l1)+self.delta)+1] - # if computeSubSequence: - # self.subSequenceIndices = self.subSequence(len(l1), len(l2)) - # return self.similarityTable.max() - def _compute(self, _l1, _l2, computeSubSequence = False): '''returns the longest common subsequence similarity based on the threshold on distance between two elements of lists l1, l2 @@ -280,29 +265,24 @@ if self.aligned: lcssValues = {} similarityTables = {} - for i in xrange(-n1-self.delta-1, +max(n1,n2)+self.delta): # larger interval - #print l1[min(-i-1,n1):] # min(n1+n2-i,n1) - #print l2[max(0,i-n1):] - #lcssValues[i] = self.computeLCSS(l1[min(-i-1,n1):], l2[max(0,i-n1):]) - #print i, lcssValues[i] + for i in xrange(-n2-self.delta+1, n1+self.delta): # interval such that [i-shift-delta, i-shift+delta] is never empty, which happens when i-shift+delta < 1 or when i-shift-delta > n2 self.similarities(l1, l2, i) lcssValues[i] = self.similarityTable.max() similarityTables[i] = self.similarityTable - print self.similarityTable + #print self.similarityTable self.alignmentShift = argMaxDict(lcssValues) # ideally get the medium alignment shift, the one that minimizes distance self.similarityTable = similarityTables[self.alignmentShift] else: self.alignmentShift = 0 self.similarities(l1, l2) - self.similarityTable = self.similarityTable[:, :min(len(l2), len(l1)+self.delta-self.alignmentShift)+1] + # threshold values for the useful part of the similarity table are n2-n1-delta and n1-n2-delta + self.similarityTable = self.similarityTable[:min(n1, n2+self.alignmentShift+self.delta)+1, :min(n2, n1-self.alignmentShift+self.delta)+1] if computeSubSequence: self.subSequenceIndices = self.subSequence(self.similarityTable.shape[0]-1, self.similarityTable.shape[1]-1) if revertIndices: - self.subSequenceIndices = [(j+self.alignmentShift,i) for i,j in self.subSequenceIndices] - else: - self.subSequenceIndices = [(i+self.alignmentShift,j) for i,j in self.subSequenceIndices] + self.subSequenceIndices = [(j,i) for i,j in self.subSequenceIndices] return self.similarityTable[-1,-1] def compute(self, l1, l2, computeSubSequence = False): @@ -311,7 +291,7 @@ def computeAlignement(self): from numpy import mean - return mean([j-i for i,j in self.subSequenceIndices])+self.alignmentShift + return mean([j-i for i,j in self.subSequenceIndices]) def _computeNormalized(self, l1, l2): ''' compute the normalized LCSS