Mercurial Hosting > traffic-intelligence
diff python/utils.py @ 369:027e254f0b53
lcss subclass for indicators
author | Nicolas Saunier <nicolas.saunier@polymtl.ca> |
---|---|
date | Mon, 15 Jul 2013 16:47:09 -0400 |
parents | 2db4e76599a1 |
children | 97e8fa0ee9a1 |
line wrap: on
line diff
--- a/python/utils.py Mon Jul 15 15:08:53 2013 -0400 +++ b/python/utils.py Mon Jul 15 16:47:09 2013 -0400 @@ -141,86 +141,105 @@ # sequence section ######################### -def sequenceSimilarities(l1, l2, similarityFunc, delta = float('inf')): - from numpy import zeros, int as npint - n1 = len(l1) - n2 = len(l2) - similarities = zeros((n1+1,n2+1), dtype = npint) - for i in xrange(1,n1+1): - for j in xrange(max(1,i-delta),min(n2+1,i+delta+1)): - if similarityFunc(l1[i-1], l2[j-1]): - similarities[i,j] = similarities[i-1,j-1]+1 - else: - similarities[i,j] = max(similarities[i-1,j], similarities[i,j-1]) - return similarities +class LCSS: + '''Class that keeps the LCSS parameters + and puts together the various computations''' + def __init__(self, similarityFunc, delta = float('inf'), aligned = False, lengthFunc = min): + self.similarityFunc = similarityFunc + self.aligned = aligned + self.delta = delta + self.lengthFunc = lengthFunc -def subSequence(similarities, i, j): - '''Returns the subsequence of two sequences - http://en.wikipedia.org/wiki/Longest_common_subsequence_problem''' - if i == 0 or j == 0: - return [] - elif similarities[i][j] == similarities[i][j-1]: - return subSequence(similarities, i, j-1) - elif similarities[i][j] == similarities[i-1][j]: - return subSequence(similarities, i-1, j) - else: - return subSequence(similarities, i-1, j-1) + [(i-1,j-1)] + def similarities(self, l1, l2): + from numpy import zeros, int as npint + n1 = len(l1) + n2 = len(l2) + self.similarityTable = zeros((n1+1,n2+1), dtype = npint) + for i in xrange(1,n1+1): + for j in xrange(max(1,i-self.delta),min(n2+1,i+self.delta+1)): + if self.similarityFunc(l1[i-1], l2[j-1]): + self.similarityTable[i,j] = self.similarityTable[i-1,j-1]+1 + else: + self.similarityTable[i,j] = max(self.similarityTable[i-1,j], self.similarityTable[i,j-1]) -def LCSS(l1, l2, similarityFunc, delta = float('inf'), returnSubSequence = False): - '''returns the longest common subsequence similarity - based on the threshold on distance between two elements of lists l1, l2 - similarityFunc returns True or False whether the two points are considered similar + def subSequence(self, i, j): + '''Returns the subsequence of two sequences + http://en.wikipedia.org/wiki/Longest_common_subsequence_problem''' + if i == 0 or j == 0: + return [] + elif self.similarityTable[i][j] == self.similarityTable[i][j-1]: + return self.subSequence(i, j-1) + elif self.similarityTable[i][j] == self.similarityTable[i-1][j]: + return self.subSequence(i-1, j) + else: + return self.subSequence(i-1, j-1) + [(i-1,j-1)] + + def computeLCSS(self, l1, l2, computeSubSequence = False): + '''returns the longest common subsequence similarity + based on the threshold on distance between two elements of lists l1, l2 + similarityFunc returns True or False whether the two points are considered similar - eg distance(p1, p2) < epsilon - ''' - from numpy import argmax - similarities = sequenceSimilarities(l1, l2, similarityFunc, delta) - imax = argmax(similarities[:,-1]) - jmax = argmax(similarities[-1,:]) - if returnSubSequence: - if similarities[imax, -1] > similarities[-1, jmax]: - lcss = similarities[imax, -1] - subseq = subSequence(similarities, imax, len(l2)) + eg distance(p1, p2) < epsilon + ''' + from numpy import argmax + self.similarities(l1, l2) + imax = argmax(self.similarityTable[:,-1]) + jmax = argmax(self.similarityTable[-1,:]) + if computeSubSequence: + if self.similarityTable[imax, -1] > self.similarityTable[-1, jmax]: + lcss = self.similarityTable[imax, -1] + self.subSequenceIndices = self.subSequence(imax, len(l2)) + else: + lcss = self.similarityTable[imax, -1] + self.subSequenceIndices = self.subSequence(len(l1), jmax) + return lcss else: - lcss = similarities[-1, jmax] - subseq = subSequence(similarities, len(l1), jmax) - return lcss, subseq - else: - return max(similarities[imax, -1], similarities[-1, jmax]) - -def normalizedLCSS(l1, l2, similarityFunc, delta = float('inf'), lengthFunc = min): - ''' compute the normalized LCSS - ie, the LCSS divided by the min or mean of the indicator lengths (using lengthFunc) - lengthFunc = lambda x,y:float(x,y)/2''' - return float(LCSS(l1, l2, similarityFunc, delta))/lengthFunc(len(l1), len(l2)) + return max(self.similarityTable[imax, -1], self.similarityTable[-1, jmax]) -def DLCSS(l1, l2, similarityFunc, delta = float('inf'), lengthFunc = min): - ''' compute the LCSS distance''' - return 1-normalizedLCSS(l1, l2, similarityFunc, delta, lengthFunc) + def compute(self, _l1, _l2, computeSubSequence = False): + '''returns the best matching if using a finite delta by shiftinig the series alignments''' + if self.aligned: + if len(_l2) < len(_l1): # l1 is the shortest + l1 = _l2 + l2 = _l1 + else: + l1 = _l1 + l2 = _l2 + n1 = len(l1) + n2 = len(l2) + # for i in xrange(min(delta,n1), max(n1+n2-delta, n2+1)): # i is the alignment of the end of l1 in l2 + # print l1[min(-i-1,n1):] # min(n1+n2-i,n1) + # print l2[max(0,i-n1):] + # print LCSS(l1[min(-i-1,n1):], l2[max(0,i-n1):], similarityFunc, delta) + lcss = [self.computeLCSS(l1[min(-i-1,n1):], l2[max(0,i-n1):]) for i in xrange(min(self.delta,n1), max(n1+n2-self.delta, n2+1))] + return max(lcss) + else: + return self.computeLCSS(_l1, _l2, computeSubSequence) -def alignedLCSS(_l1, _l2, similarityFunc, delta): - '''returns the best matching if using a finite delta by shiftinig the series alignments''' - if len(_l2) < len(_l1): # l1 is the shortest - l1 = _l2 - l2 = _l1 - else: - l1 = _l1 - l2 = _l2 - n1 = len(l1) - n2 = len(l2) - # for i in xrange(min(delta,n1), max(n1+n2-delta, n2+1)): # i is the alignment of the end of l1 in l2 - # print l1[min(-i-1,n1):] # min(n1+n2-i,n1) - # print l2[max(0,i-n1):] - # print LCSS(l1[min(-i-1,n1):], l2[max(0,i-n1):], similarityFunc, delta) - lcss = [LCSS(l1[min(-i-1,n1):], l2[max(0,i-n1):], similarityFunc, delta) for i in xrange(min(delta,n1), max(n1+n2-delta, n2+1))] - return max(lcss) + def get(self, l1, l2, computeSubSequence = False): + '''get methods are to be shadowed in child classes ''' + return self.compute(l1, l2, computeSubSequence) + + def computeAlignement(self): + from numpy import mean + return mean([j-i for i,j in self.subSequenceIndices]) -def normalizedAlignedLCSS(l1, l2, similarityFunc, delta, lengthFunc = min): - return float(alignedLCSS(l1, l2, similarityFunc, delta))/lengthFunc(len(l1), len(l2)) + def computeNormalized(self, l1, l2): + ''' compute the normalized LCSS + ie, the LCSS divided by the min or mean of the indicator lengths (using lengthFunc) + lengthFunc = lambda x,y:float(x,y)/2''' + return float(self.compute(l1, l2))/self.lengthFunc(len(l1), len(l2)) -def alignedDLCSS(l1, l2, similarityFunc, delta, lengthFunc = min): - return 1-normalizedAlignedLCSS(l1, l2, similarityFunc, delta, lengthFunc) + def getNormalized(self, l1, l2): + return self.computeNormalized(l1, l2) + def computeDistance(self, l1, l2): + ''' compute the LCSS distance''' + return 1-self.computeNormalized(l1, l2) + + def getDistance(self, l1, l2): + return self.computeDistance(l1, l2) + ######################### # maths section #########################