comparison python/utils.py @ 368:2db4e76599a1

implemented subsequence extraction and rearranged arguments
author Nicolas Saunier <nicolas.saunier@polymtl.ca>
date Mon, 15 Jul 2013 15:08:53 -0400
parents 90bdabc06e9f
children 027e254f0b53
comparison
equal deleted inserted replaced
367:d44eba0db517 368:2db4e76599a1
139 139
140 ######################### 140 #########################
141 # sequence section 141 # sequence section
142 ######################### 142 #########################
143 143
144 def LCSS(l1, l2, similarityFunc, delta = float('inf')): 144 def sequenceSimilarities(l1, l2, similarityFunc, delta = float('inf')):
145 from numpy import zeros, int as npint
146 n1 = len(l1)
147 n2 = len(l2)
148 similarities = zeros((n1+1,n2+1), dtype = npint)
149 for i in xrange(1,n1+1):
150 for j in xrange(max(1,i-delta),min(n2+1,i+delta+1)):
151 if similarityFunc(l1[i-1], l2[j-1]):
152 similarities[i,j] = similarities[i-1,j-1]+1
153 else:
154 similarities[i,j] = max(similarities[i-1,j], similarities[i,j-1])
155 return similarities
156
157 def subSequence(similarities, i, j):
158 '''Returns the subsequence of two sequences
159 http://en.wikipedia.org/wiki/Longest_common_subsequence_problem'''
160 if i == 0 or j == 0:
161 return []
162 elif similarities[i][j] == similarities[i][j-1]:
163 return subSequence(similarities, i, j-1)
164 elif similarities[i][j] == similarities[i-1][j]:
165 return subSequence(similarities, i-1, j)
166 else:
167 return subSequence(similarities, i-1, j-1) + [(i-1,j-1)]
168
169 def LCSS(l1, l2, similarityFunc, delta = float('inf'), returnSubSequence = False):
145 '''returns the longest common subsequence similarity 170 '''returns the longest common subsequence similarity
146 based on the threshold on distance between two elements of lists l1, l2 171 based on the threshold on distance between two elements of lists l1, l2
147 similarityFunc returns True or False whether the two points are considered similar 172 similarityFunc returns True or False whether the two points are considered similar
148 173
149 eg distance(p1, p2) < epsilon 174 eg distance(p1, p2) < epsilon
150 ''' 175 '''
151 from numpy import zeros, int as npint 176 from numpy import argmax
152 n1 = len(l1) 177 similarities = sequenceSimilarities(l1, l2, similarityFunc, delta)
153 n2 = len(l2) 178 imax = argmax(similarities[:,-1])
154 similarity = zeros((n1+1,n2+1), dtype = npint) 179 jmax = argmax(similarities[-1,:])
155 for i in xrange(1,n1+1): 180 if returnSubSequence:
156 for j in xrange(max(1,i-delta),min(n2+1,i+delta+1)): 181 if similarities[imax, -1] > similarities[-1, jmax]:
157 if similarityFunc(l1[i-1], l2[j-1]): 182 lcss = similarities[imax, -1]
158 similarity[i][j] = similarity[i-1][j-1]+1 183 subseq = subSequence(similarities, imax, len(l2))
159 else: 184 else:
160 similarity[i][j] = max(similarity[i-1][j], similarity[i][j-1]) 185 lcss = similarities[-1, jmax]
161 return max(max(similarity[:,-1]), max(similarity[-1,:])) 186 subseq = subSequence(similarities, len(l1), jmax)
162 187 return lcss, subseq
163 def normalizedLCSS(l1, l2, similarityFunc, delta = float('inf'), lengthMethod = min): 188 else:
189 return max(similarities[imax, -1], similarities[-1, jmax])
190
191 def normalizedLCSS(l1, l2, similarityFunc, delta = float('inf'), lengthFunc = min):
164 ''' compute the normalized LCSS 192 ''' compute the normalized LCSS
165 ie, the LCSS divided by the min or mean of the indicator lengths (using lengthMethod) 193 ie, the LCSS divided by the min or mean of the indicator lengths (using lengthFunc)
166 lengthMethod = lambda x,y:float(x,y)/2''' 194 lengthFunc = lambda x,y:float(x,y)/2'''
167 return float(LCSS(l1, l2, similarityFunc, delta))/lengthMethod(len(l1), len(l2)) 195 return float(LCSS(l1, l2, similarityFunc, delta))/lengthFunc(len(l1), len(l2))
168 196
169 def DLCSS(l1, l2, similarityFunc, delta = float('inf'), lengthMethod = min): 197 def DLCSS(l1, l2, similarityFunc, delta = float('inf'), lengthFunc = min):
170 ''' compute the LCSS distance''' 198 ''' compute the LCSS distance'''
171 return 1-normalizedLCSS(l1, l2, similarityFunc, delta, lengthMethod) 199 return 1-normalizedLCSS(l1, l2, similarityFunc, delta, lengthFunc)
172 200
173 def alignedLCSS(_l1, _l2, similarityFunc, delta): 201 def alignedLCSS(_l1, _l2, similarityFunc, delta):
174 '''returns the best matching if using a finite delta by shiftinig the series alignments''' 202 '''returns the best matching if using a finite delta by shiftinig the series alignments'''
175 if len(_l2) < len(_l1): # l1 is the shortest 203 if len(_l2) < len(_l1): # l1 is the shortest
176 l1 = _l2 204 l1 = _l2
185 # print l2[max(0,i-n1):] 213 # print l2[max(0,i-n1):]
186 # print LCSS(l1[min(-i-1,n1):], l2[max(0,i-n1):], similarityFunc, delta) 214 # print LCSS(l1[min(-i-1,n1):], l2[max(0,i-n1):], similarityFunc, delta)
187 lcss = [LCSS(l1[min(-i-1,n1):], l2[max(0,i-n1):], similarityFunc, delta) for i in xrange(min(delta,n1), max(n1+n2-delta, n2+1))] 215 lcss = [LCSS(l1[min(-i-1,n1):], l2[max(0,i-n1):], similarityFunc, delta) for i in xrange(min(delta,n1), max(n1+n2-delta, n2+1))]
188 return max(lcss) 216 return max(lcss)
189 217
190 def normalizedAlignedLCSS(l1, l2, similarityFunc, delta, lengthMethod = min): 218 def normalizedAlignedLCSS(l1, l2, similarityFunc, delta, lengthFunc = min):
191 return float(alignedLCSS(l1, l2, similarityFunc, delta))/lengthMethod(len(l1), len(l2)) 219 return float(alignedLCSS(l1, l2, similarityFunc, delta))/lengthFunc(len(l1), len(l2))
192 220
193 def alignedDLCSS(l1, l2, similarityFunc, delta, lengthMethod = min): 221 def alignedDLCSS(l1, l2, similarityFunc, delta, lengthFunc = min):
194 return 1-normalizedAlignedLCSS(l1, l2, similarityFunc, delta, lengthMethod) 222 return 1-normalizedAlignedLCSS(l1, l2, similarityFunc, delta, lengthFunc)
195 223
196 ######################### 224 #########################
197 # maths section 225 # maths section
198 ######################### 226 #########################
199 227