Mercurial Hosting > traffic-intelligence
changeset 373:d0b86ed50f32
work in progress on LCSS
author | Nicolas Saunier <nicolas.saunier@polymtl.ca> |
---|---|
date | Tue, 16 Jul 2013 17:45:07 -0400 |
parents | 349eb1e09f45 |
children | a7af3519687e |
files | python/utils.py |
diffstat | 1 files changed, 27 insertions(+), 28 deletions(-) [+] |
line wrap: on
line diff
--- a/python/utils.py Tue Jul 16 17:00:17 2013 -0400 +++ b/python/utils.py Tue Jul 16 17:45:07 2013 -0400 @@ -218,14 +218,14 @@ self.lengthFunc = lengthFunc self.alignmentShift = 0 - def similarities(self, l1, l2, shift=0): + def similarities(self, l1, l2, jshift=0): from numpy import zeros, int as npint n1 = len(l1) n2 = len(l2) self.similarityTable = zeros((n1+1,n2+1), dtype = npint) for i in xrange(1,n1+1): - for j in xrange(max(1,i-shift-self.delta),min(n2+1,i-shift+self.delta+1)): - #print max(1,i-shift-self.delta),min(n2+1,i-shift+self.delta+1) + for j in xrange(max(1,i-jshift-self.delta),min(n2+1,i-jshift+self.delta+1)): + print max(1,i-jshift-self.delta),min(n2+1,i-jshift+self.delta+1) if self.similarityFunc(l1[i-1], l2[j-1]): self.similarityTable[i,j] = self.similarityTable[i-1,j-1]+1 else: @@ -243,22 +243,29 @@ else: return self.subSequence(i-1, j-1) + [(i-1,j-1)] - def computeLCSS(self, l1, l2, computeSubSequence = False): + # def computeLCSS(self, l1, l2, computeSubSequence = False): + # '''returns the longest common subsequence similarity + # based on the threshold on distance between two elements of lists l1, l2 + # similarityFunc returns True or False whether the two points are considered similar + + # eg distance(p1, p2) < epsilon + # ''' + # #from numpy import argmax + # self.similarities(l1, l2) + # #self.similarityTable = self.similarityTable[:, :min(len(l2), len(l1)+self.delta)+1] + # if computeSubSequence: + # self.subSequenceIndices = self.subSequence(len(l1), len(l2)) + # return self.similarityTable.max() + + def _compute(self, _l1, _l2, computeSubSequence = False): '''returns the longest common subsequence similarity based on the threshold on distance between two elements of lists l1, l2 similarityFunc returns True or False whether the two points are considered similar + if aligned, returns the best matching if using a finite delta by shiftinig the series alignments + eg distance(p1, p2) < epsilon ''' - #from numpy import argmax - self.similarities(l1, l2) - #self.similarityTable = self.similarityTable[:, :min(len(l2), len(l1)+self.delta)+1] - if computeSubSequence: - self.subSequenceIndices = self.subSequence(len(l1), len(l2)) - return self.similarityTable.max() - - def _compute(self, _l1, _l2, computeSubSequence = False): - '''returns the best matching if using a finite delta by shiftinig the series alignments''' if len(_l2) < len(_l1): # l1 is the shortest l1 = _l2 l2 = _l1 @@ -271,39 +278,31 @@ n2 = len(l2) if self.aligned: - # for i in xrange(min(delta,n1), max(n1+n2-delta, n2+1)): # i is the alignment of the end of l1 in l2 - # print l1[min(-i-1,n1):] # min(n1+n2-i,n1) - # print l2[max(0,i-n1):] - # print LCSS(l1[min(-i-1,n1):], l2[max(0,i-n1):], similarityFunc, delta) lcssValues = {} similarityTables = {} - #for i in xrange(min(self.delta,n1), max(n1+n2-self.delta, n2+1)): - for i in xrange(-max(n1,n2)-self.delta, +max(n1,n2)+self.delta): + for i in xrange(-n1-self.delta-1, +max(n1,n2)+self.delta): # larger interval #print l1[min(-i-1,n1):] # min(n1+n2-i,n1) #print l2[max(0,i-n1):] #lcssValues[i] = self.computeLCSS(l1[min(-i-1,n1):], l2[max(0,i-n1):]) #print i, lcssValues[i] - lcssValues[i] = self.similarities(l1, l2, i) + self.similarities(l1, l2, i) + lcssValues[i] = self.similarityTable.max() similarityTables[i] = self.similarityTable - #print i print self.similarityTable - self.alignmentShift = argMaxDict(lcssValues) + self.alignmentShift = argMaxDict(lcssValues) # ideally get the medium alignment shift, the one that minimizes distance self.similarityTable = similarityTables[self.alignmentShift] - # do the subsequence computation here, once similarityTable is set - #self.subSequenceIndices = self.subSequence(self.similarityTable.shape[0]-1, self.similarityTable.shape[1]-1) - #lcss = lcssValues[imax] else: self.alignmentShift = 0 self.similarities(l1, l2) - self.similarityTable = self.similarityTable[:, :min(len(l2), len(l1)+self.delta)+1] + + self.similarityTable = self.similarityTable[:, :min(len(l2), len(l1)+self.delta-self.alignmentShift)+1] + if computeSubSequence: self.subSequenceIndices = self.subSequence(self.similarityTable.shape[0]-1, self.similarityTable.shape[1]-1) if revertIndices: self.subSequenceIndices = [(j+self.alignmentShift,i) for i,j in self.subSequenceIndices] - #self.alignmentShift = imax-n1 else: self.subSequenceIndices = [(i+self.alignmentShift,j) for i,j in self.subSequenceIndices] - #self.alignmentShift = n1-imax return self.similarityTable[-1,-1] def compute(self, l1, l2, computeSubSequence = False):