comparison python/utils.py @ 369:027e254f0b53

lcss subclass for indicators
author Nicolas Saunier <nicolas.saunier@polymtl.ca>
date Mon, 15 Jul 2013 16:47:09 -0400
parents 2db4e76599a1
children 97e8fa0ee9a1
comparison
equal deleted inserted replaced
368:2db4e76599a1 369:027e254f0b53
139 139
140 ######################### 140 #########################
141 # sequence section 141 # sequence section
142 ######################### 142 #########################
143 143
144 def sequenceSimilarities(l1, l2, similarityFunc, delta = float('inf')): 144 class LCSS:
145 from numpy import zeros, int as npint 145 '''Class that keeps the LCSS parameters
146 n1 = len(l1) 146 and puts together the various computations'''
147 n2 = len(l2) 147 def __init__(self, similarityFunc, delta = float('inf'), aligned = False, lengthFunc = min):
148 similarities = zeros((n1+1,n2+1), dtype = npint) 148 self.similarityFunc = similarityFunc
149 for i in xrange(1,n1+1): 149 self.aligned = aligned
150 for j in xrange(max(1,i-delta),min(n2+1,i+delta+1)): 150 self.delta = delta
151 if similarityFunc(l1[i-1], l2[j-1]): 151 self.lengthFunc = lengthFunc
152 similarities[i,j] = similarities[i-1,j-1]+1 152
153 def similarities(self, l1, l2):
154 from numpy import zeros, int as npint
155 n1 = len(l1)
156 n2 = len(l2)
157 self.similarityTable = zeros((n1+1,n2+1), dtype = npint)
158 for i in xrange(1,n1+1):
159 for j in xrange(max(1,i-self.delta),min(n2+1,i+self.delta+1)):
160 if self.similarityFunc(l1[i-1], l2[j-1]):
161 self.similarityTable[i,j] = self.similarityTable[i-1,j-1]+1
162 else:
163 self.similarityTable[i,j] = max(self.similarityTable[i-1,j], self.similarityTable[i,j-1])
164
165 def subSequence(self, i, j):
166 '''Returns the subsequence of two sequences
167 http://en.wikipedia.org/wiki/Longest_common_subsequence_problem'''
168 if i == 0 or j == 0:
169 return []
170 elif self.similarityTable[i][j] == self.similarityTable[i][j-1]:
171 return self.subSequence(i, j-1)
172 elif self.similarityTable[i][j] == self.similarityTable[i-1][j]:
173 return self.subSequence(i-1, j)
174 else:
175 return self.subSequence(i-1, j-1) + [(i-1,j-1)]
176
177 def computeLCSS(self, l1, l2, computeSubSequence = False):
178 '''returns the longest common subsequence similarity
179 based on the threshold on distance between two elements of lists l1, l2
180 similarityFunc returns True or False whether the two points are considered similar
181
182 eg distance(p1, p2) < epsilon
183 '''
184 from numpy import argmax
185 self.similarities(l1, l2)
186 imax = argmax(self.similarityTable[:,-1])
187 jmax = argmax(self.similarityTable[-1,:])
188 if computeSubSequence:
189 if self.similarityTable[imax, -1] > self.similarityTable[-1, jmax]:
190 lcss = self.similarityTable[imax, -1]
191 self.subSequenceIndices = self.subSequence(imax, len(l2))
153 else: 192 else:
154 similarities[i,j] = max(similarities[i-1,j], similarities[i,j-1]) 193 lcss = self.similarityTable[imax, -1]
155 return similarities 194 self.subSequenceIndices = self.subSequence(len(l1), jmax)
156 195 return lcss
157 def subSequence(similarities, i, j): 196 else:
158 '''Returns the subsequence of two sequences 197 return max(self.similarityTable[imax, -1], self.similarityTable[-1, jmax])
159 http://en.wikipedia.org/wiki/Longest_common_subsequence_problem''' 198
160 if i == 0 or j == 0: 199 def compute(self, _l1, _l2, computeSubSequence = False):
161 return [] 200 '''returns the best matching if using a finite delta by shiftinig the series alignments'''
162 elif similarities[i][j] == similarities[i][j-1]: 201 if self.aligned:
163 return subSequence(similarities, i, j-1) 202 if len(_l2) < len(_l1): # l1 is the shortest
164 elif similarities[i][j] == similarities[i-1][j]: 203 l1 = _l2
165 return subSequence(similarities, i-1, j) 204 l2 = _l1
166 else: 205 else:
167 return subSequence(similarities, i-1, j-1) + [(i-1,j-1)] 206 l1 = _l1
168 207 l2 = _l2
169 def LCSS(l1, l2, similarityFunc, delta = float('inf'), returnSubSequence = False): 208 n1 = len(l1)
170 '''returns the longest common subsequence similarity 209 n2 = len(l2)
171 based on the threshold on distance between two elements of lists l1, l2 210 # for i in xrange(min(delta,n1), max(n1+n2-delta, n2+1)): # i is the alignment of the end of l1 in l2
172 similarityFunc returns True or False whether the two points are considered similar 211 # print l1[min(-i-1,n1):] # min(n1+n2-i,n1)
173 212 # print l2[max(0,i-n1):]
174 eg distance(p1, p2) < epsilon 213 # print LCSS(l1[min(-i-1,n1):], l2[max(0,i-n1):], similarityFunc, delta)
175 ''' 214 lcss = [self.computeLCSS(l1[min(-i-1,n1):], l2[max(0,i-n1):]) for i in xrange(min(self.delta,n1), max(n1+n2-self.delta, n2+1))]
176 from numpy import argmax 215 return max(lcss)
177 similarities = sequenceSimilarities(l1, l2, similarityFunc, delta) 216 else:
178 imax = argmax(similarities[:,-1]) 217 return self.computeLCSS(_l1, _l2, computeSubSequence)
179 jmax = argmax(similarities[-1,:]) 218
180 if returnSubSequence: 219 def get(self, l1, l2, computeSubSequence = False):
181 if similarities[imax, -1] > similarities[-1, jmax]: 220 '''get methods are to be shadowed in child classes '''
182 lcss = similarities[imax, -1] 221 return self.compute(l1, l2, computeSubSequence)
183 subseq = subSequence(similarities, imax, len(l2)) 222
184 else: 223 def computeAlignement(self):
185 lcss = similarities[-1, jmax] 224 from numpy import mean
186 subseq = subSequence(similarities, len(l1), jmax) 225 return mean([j-i for i,j in self.subSequenceIndices])
187 return lcss, subseq 226
188 else: 227 def computeNormalized(self, l1, l2):
189 return max(similarities[imax, -1], similarities[-1, jmax]) 228 ''' compute the normalized LCSS
190 229 ie, the LCSS divided by the min or mean of the indicator lengths (using lengthFunc)
191 def normalizedLCSS(l1, l2, similarityFunc, delta = float('inf'), lengthFunc = min): 230 lengthFunc = lambda x,y:float(x,y)/2'''
192 ''' compute the normalized LCSS 231 return float(self.compute(l1, l2))/self.lengthFunc(len(l1), len(l2))
193 ie, the LCSS divided by the min or mean of the indicator lengths (using lengthFunc) 232
194 lengthFunc = lambda x,y:float(x,y)/2''' 233 def getNormalized(self, l1, l2):
195 return float(LCSS(l1, l2, similarityFunc, delta))/lengthFunc(len(l1), len(l2)) 234 return self.computeNormalized(l1, l2)
196 235
197 def DLCSS(l1, l2, similarityFunc, delta = float('inf'), lengthFunc = min): 236 def computeDistance(self, l1, l2):
198 ''' compute the LCSS distance''' 237 ''' compute the LCSS distance'''
199 return 1-normalizedLCSS(l1, l2, similarityFunc, delta, lengthFunc) 238 return 1-self.computeNormalized(l1, l2)
200 239
201 def alignedLCSS(_l1, _l2, similarityFunc, delta): 240 def getDistance(self, l1, l2):
202 '''returns the best matching if using a finite delta by shiftinig the series alignments''' 241 return self.computeDistance(l1, l2)
203 if len(_l2) < len(_l1): # l1 is the shortest 242
204 l1 = _l2
205 l2 = _l1
206 else:
207 l1 = _l1
208 l2 = _l2
209 n1 = len(l1)
210 n2 = len(l2)
211 # for i in xrange(min(delta,n1), max(n1+n2-delta, n2+1)): # i is the alignment of the end of l1 in l2
212 # print l1[min(-i-1,n1):] # min(n1+n2-i,n1)
213 # print l2[max(0,i-n1):]
214 # print LCSS(l1[min(-i-1,n1):], l2[max(0,i-n1):], similarityFunc, delta)
215 lcss = [LCSS(l1[min(-i-1,n1):], l2[max(0,i-n1):], similarityFunc, delta) for i in xrange(min(delta,n1), max(n1+n2-delta, n2+1))]
216 return max(lcss)
217
218 def normalizedAlignedLCSS(l1, l2, similarityFunc, delta, lengthFunc = min):
219 return float(alignedLCSS(l1, l2, similarityFunc, delta))/lengthFunc(len(l1), len(l2))
220
221 def alignedDLCSS(l1, l2, similarityFunc, delta, lengthFunc = min):
222 return 1-normalizedAlignedLCSS(l1, l2, similarityFunc, delta, lengthFunc)
223
224 ######################### 243 #########################
225 # maths section 244 # maths section
226 ######################### 245 #########################
227 246
228 def framesToTime(nFrames, frameRate, initialTime = (0.,0.,0.)): 247 def framesToTime(nFrames, frameRate, initialTime = (0.,0.,0.)):