comparison python/utils.py @ 373:d0b86ed50f32

work in progress on LCSS
author Nicolas Saunier <nicolas.saunier@polymtl.ca>
date Tue, 16 Jul 2013 17:45:07 -0400
parents 349eb1e09f45
children a7af3519687e
comparison
equal deleted inserted replaced
372:349eb1e09f45 373:d0b86ed50f32
216 self.aligned = aligned 216 self.aligned = aligned
217 self.delta = delta 217 self.delta = delta
218 self.lengthFunc = lengthFunc 218 self.lengthFunc = lengthFunc
219 self.alignmentShift = 0 219 self.alignmentShift = 0
220 220
221 def similarities(self, l1, l2, shift=0): 221 def similarities(self, l1, l2, jshift=0):
222 from numpy import zeros, int as npint 222 from numpy import zeros, int as npint
223 n1 = len(l1) 223 n1 = len(l1)
224 n2 = len(l2) 224 n2 = len(l2)
225 self.similarityTable = zeros((n1+1,n2+1), dtype = npint) 225 self.similarityTable = zeros((n1+1,n2+1), dtype = npint)
226 for i in xrange(1,n1+1): 226 for i in xrange(1,n1+1):
227 for j in xrange(max(1,i-shift-self.delta),min(n2+1,i-shift+self.delta+1)): 227 for j in xrange(max(1,i-jshift-self.delta),min(n2+1,i-jshift+self.delta+1)):
228 #print max(1,i-shift-self.delta),min(n2+1,i-shift+self.delta+1) 228 print max(1,i-jshift-self.delta),min(n2+1,i-jshift+self.delta+1)
229 if self.similarityFunc(l1[i-1], l2[j-1]): 229 if self.similarityFunc(l1[i-1], l2[j-1]):
230 self.similarityTable[i,j] = self.similarityTable[i-1,j-1]+1 230 self.similarityTable[i,j] = self.similarityTable[i-1,j-1]+1
231 else: 231 else:
232 self.similarityTable[i,j] = max(self.similarityTable[i-1,j], self.similarityTable[i,j-1]) 232 self.similarityTable[i,j] = max(self.similarityTable[i-1,j], self.similarityTable[i,j-1])
233 233
241 elif self.similarityTable[i][j] == self.similarityTable[i-1][j]: 241 elif self.similarityTable[i][j] == self.similarityTable[i-1][j]:
242 return self.subSequence(i-1, j) 242 return self.subSequence(i-1, j)
243 else: 243 else:
244 return self.subSequence(i-1, j-1) + [(i-1,j-1)] 244 return self.subSequence(i-1, j-1) + [(i-1,j-1)]
245 245
246 def computeLCSS(self, l1, l2, computeSubSequence = False): 246 # def computeLCSS(self, l1, l2, computeSubSequence = False):
247 # '''returns the longest common subsequence similarity
248 # based on the threshold on distance between two elements of lists l1, l2
249 # similarityFunc returns True or False whether the two points are considered similar
250
251 # eg distance(p1, p2) < epsilon
252 # '''
253 # #from numpy import argmax
254 # self.similarities(l1, l2)
255 # #self.similarityTable = self.similarityTable[:, :min(len(l2), len(l1)+self.delta)+1]
256 # if computeSubSequence:
257 # self.subSequenceIndices = self.subSequence(len(l1), len(l2))
258 # return self.similarityTable.max()
259
260 def _compute(self, _l1, _l2, computeSubSequence = False):
247 '''returns the longest common subsequence similarity 261 '''returns the longest common subsequence similarity
248 based on the threshold on distance between two elements of lists l1, l2 262 based on the threshold on distance between two elements of lists l1, l2
249 similarityFunc returns True or False whether the two points are considered similar 263 similarityFunc returns True or False whether the two points are considered similar
250 264
265 if aligned, returns the best matching if using a finite delta by shiftinig the series alignments
266
251 eg distance(p1, p2) < epsilon 267 eg distance(p1, p2) < epsilon
252 ''' 268 '''
253 #from numpy import argmax
254 self.similarities(l1, l2)
255 #self.similarityTable = self.similarityTable[:, :min(len(l2), len(l1)+self.delta)+1]
256 if computeSubSequence:
257 self.subSequenceIndices = self.subSequence(len(l1), len(l2))
258 return self.similarityTable.max()
259
260 def _compute(self, _l1, _l2, computeSubSequence = False):
261 '''returns the best matching if using a finite delta by shiftinig the series alignments'''
262 if len(_l2) < len(_l1): # l1 is the shortest 269 if len(_l2) < len(_l1): # l1 is the shortest
263 l1 = _l2 270 l1 = _l2
264 l2 = _l1 271 l2 = _l1
265 revertIndices = True 272 revertIndices = True
266 else: 273 else:
269 revertIndices = False 276 revertIndices = False
270 n1 = len(l1) 277 n1 = len(l1)
271 n2 = len(l2) 278 n2 = len(l2)
272 279
273 if self.aligned: 280 if self.aligned:
274 # for i in xrange(min(delta,n1), max(n1+n2-delta, n2+1)): # i is the alignment of the end of l1 in l2
275 # print l1[min(-i-1,n1):] # min(n1+n2-i,n1)
276 # print l2[max(0,i-n1):]
277 # print LCSS(l1[min(-i-1,n1):], l2[max(0,i-n1):], similarityFunc, delta)
278 lcssValues = {} 281 lcssValues = {}
279 similarityTables = {} 282 similarityTables = {}
280 #for i in xrange(min(self.delta,n1), max(n1+n2-self.delta, n2+1)): 283 for i in xrange(-n1-self.delta-1, +max(n1,n2)+self.delta): # larger interval
281 for i in xrange(-max(n1,n2)-self.delta, +max(n1,n2)+self.delta):
282 #print l1[min(-i-1,n1):] # min(n1+n2-i,n1) 284 #print l1[min(-i-1,n1):] # min(n1+n2-i,n1)
283 #print l2[max(0,i-n1):] 285 #print l2[max(0,i-n1):]
284 #lcssValues[i] = self.computeLCSS(l1[min(-i-1,n1):], l2[max(0,i-n1):]) 286 #lcssValues[i] = self.computeLCSS(l1[min(-i-1,n1):], l2[max(0,i-n1):])
285 #print i, lcssValues[i] 287 #print i, lcssValues[i]
286 lcssValues[i] = self.similarities(l1, l2, i) 288 self.similarities(l1, l2, i)
289 lcssValues[i] = self.similarityTable.max()
287 similarityTables[i] = self.similarityTable 290 similarityTables[i] = self.similarityTable
288 #print i
289 print self.similarityTable 291 print self.similarityTable
290 self.alignmentShift = argMaxDict(lcssValues) 292 self.alignmentShift = argMaxDict(lcssValues) # ideally get the medium alignment shift, the one that minimizes distance
291 self.similarityTable = similarityTables[self.alignmentShift] 293 self.similarityTable = similarityTables[self.alignmentShift]
292 # do the subsequence computation here, once similarityTable is set
293 #self.subSequenceIndices = self.subSequence(self.similarityTable.shape[0]-1, self.similarityTable.shape[1]-1)
294 #lcss = lcssValues[imax]
295 else: 294 else:
296 self.alignmentShift = 0 295 self.alignmentShift = 0
297 self.similarities(l1, l2) 296 self.similarities(l1, l2)
298 self.similarityTable = self.similarityTable[:, :min(len(l2), len(l1)+self.delta)+1] 297
298 self.similarityTable = self.similarityTable[:, :min(len(l2), len(l1)+self.delta-self.alignmentShift)+1]
299
299 if computeSubSequence: 300 if computeSubSequence:
300 self.subSequenceIndices = self.subSequence(self.similarityTable.shape[0]-1, self.similarityTable.shape[1]-1) 301 self.subSequenceIndices = self.subSequence(self.similarityTable.shape[0]-1, self.similarityTable.shape[1]-1)
301 if revertIndices: 302 if revertIndices:
302 self.subSequenceIndices = [(j+self.alignmentShift,i) for i,j in self.subSequenceIndices] 303 self.subSequenceIndices = [(j+self.alignmentShift,i) for i,j in self.subSequenceIndices]
303 #self.alignmentShift = imax-n1
304 else: 304 else:
305 self.subSequenceIndices = [(i+self.alignmentShift,j) for i,j in self.subSequenceIndices] 305 self.subSequenceIndices = [(i+self.alignmentShift,j) for i,j in self.subSequenceIndices]
306 #self.alignmentShift = n1-imax
307 return self.similarityTable[-1,-1] 306 return self.similarityTable[-1,-1]
308 307
309 def compute(self, l1, l2, computeSubSequence = False): 308 def compute(self, l1, l2, computeSubSequence = False):
310 '''get methods are to be shadowed in child classes ''' 309 '''get methods are to be shadowed in child classes '''
311 return self._compute(l1, l2, computeSubSequence) 310 return self._compute(l1, l2, computeSubSequence)