comparison python/utils.py @ 374:a7af3519687e

finished implementation of aligned LCSS with matching sequence decoded
author Nicolas Saunier <nicolas.saunier@polymtl.ca>
date Wed, 17 Jul 2013 00:50:44 -0400
parents d0b86ed50f32
children 2ea8584aa80a
comparison
equal deleted inserted replaced
373:d0b86ed50f32 374:a7af3519687e
222 from numpy import zeros, int as npint 222 from numpy import zeros, int as npint
223 n1 = len(l1) 223 n1 = len(l1)
224 n2 = len(l2) 224 n2 = len(l2)
225 self.similarityTable = zeros((n1+1,n2+1), dtype = npint) 225 self.similarityTable = zeros((n1+1,n2+1), dtype = npint)
226 for i in xrange(1,n1+1): 226 for i in xrange(1,n1+1):
227 for j in xrange(max(1,i-jshift-self.delta),min(n2+1,i-jshift+self.delta+1)): 227 for j in xrange(max(1,i-jshift-self.delta),min(n2,i-jshift+self.delta)+1):
228 print max(1,i-jshift-self.delta),min(n2+1,i-jshift+self.delta+1)
229 if self.similarityFunc(l1[i-1], l2[j-1]): 228 if self.similarityFunc(l1[i-1], l2[j-1]):
230 self.similarityTable[i,j] = self.similarityTable[i-1,j-1]+1 229 self.similarityTable[i,j] = self.similarityTable[i-1,j-1]+1
231 else: 230 else:
232 self.similarityTable[i,j] = max(self.similarityTable[i-1,j], self.similarityTable[i,j-1]) 231 self.similarityTable[i,j] = max(self.similarityTable[i-1,j], self.similarityTable[i,j-1])
233 232
241 elif self.similarityTable[i][j] == self.similarityTable[i-1][j]: 240 elif self.similarityTable[i][j] == self.similarityTable[i-1][j]:
242 return self.subSequence(i-1, j) 241 return self.subSequence(i-1, j)
243 else: 242 else:
244 return self.subSequence(i-1, j-1) + [(i-1,j-1)] 243 return self.subSequence(i-1, j-1) + [(i-1,j-1)]
245 244
246 # def computeLCSS(self, l1, l2, computeSubSequence = False):
247 # '''returns the longest common subsequence similarity
248 # based on the threshold on distance between two elements of lists l1, l2
249 # similarityFunc returns True or False whether the two points are considered similar
250
251 # eg distance(p1, p2) < epsilon
252 # '''
253 # #from numpy import argmax
254 # self.similarities(l1, l2)
255 # #self.similarityTable = self.similarityTable[:, :min(len(l2), len(l1)+self.delta)+1]
256 # if computeSubSequence:
257 # self.subSequenceIndices = self.subSequence(len(l1), len(l2))
258 # return self.similarityTable.max()
259
260 def _compute(self, _l1, _l2, computeSubSequence = False): 245 def _compute(self, _l1, _l2, computeSubSequence = False):
261 '''returns the longest common subsequence similarity 246 '''returns the longest common subsequence similarity
262 based on the threshold on distance between two elements of lists l1, l2 247 based on the threshold on distance between two elements of lists l1, l2
263 similarityFunc returns True or False whether the two points are considered similar 248 similarityFunc returns True or False whether the two points are considered similar
264 249
278 n2 = len(l2) 263 n2 = len(l2)
279 264
280 if self.aligned: 265 if self.aligned:
281 lcssValues = {} 266 lcssValues = {}
282 similarityTables = {} 267 similarityTables = {}
283 for i in xrange(-n1-self.delta-1, +max(n1,n2)+self.delta): # larger interval 268 for i in xrange(-n2-self.delta+1, n1+self.delta): # interval such that [i-shift-delta, i-shift+delta] is never empty, which happens when i-shift+delta < 1 or when i-shift-delta > n2
284 #print l1[min(-i-1,n1):] # min(n1+n2-i,n1)
285 #print l2[max(0,i-n1):]
286 #lcssValues[i] = self.computeLCSS(l1[min(-i-1,n1):], l2[max(0,i-n1):])
287 #print i, lcssValues[i]
288 self.similarities(l1, l2, i) 269 self.similarities(l1, l2, i)
289 lcssValues[i] = self.similarityTable.max() 270 lcssValues[i] = self.similarityTable.max()
290 similarityTables[i] = self.similarityTable 271 similarityTables[i] = self.similarityTable
291 print self.similarityTable 272 #print self.similarityTable
292 self.alignmentShift = argMaxDict(lcssValues) # ideally get the medium alignment shift, the one that minimizes distance 273 self.alignmentShift = argMaxDict(lcssValues) # ideally get the medium alignment shift, the one that minimizes distance
293 self.similarityTable = similarityTables[self.alignmentShift] 274 self.similarityTable = similarityTables[self.alignmentShift]
294 else: 275 else:
295 self.alignmentShift = 0 276 self.alignmentShift = 0
296 self.similarities(l1, l2) 277 self.similarities(l1, l2)
297 278
298 self.similarityTable = self.similarityTable[:, :min(len(l2), len(l1)+self.delta-self.alignmentShift)+1] 279 # threshold values for the useful part of the similarity table are n2-n1-delta and n1-n2-delta
280 self.similarityTable = self.similarityTable[:min(n1, n2+self.alignmentShift+self.delta)+1, :min(n2, n1-self.alignmentShift+self.delta)+1]
299 281
300 if computeSubSequence: 282 if computeSubSequence:
301 self.subSequenceIndices = self.subSequence(self.similarityTable.shape[0]-1, self.similarityTable.shape[1]-1) 283 self.subSequenceIndices = self.subSequence(self.similarityTable.shape[0]-1, self.similarityTable.shape[1]-1)
302 if revertIndices: 284 if revertIndices:
303 self.subSequenceIndices = [(j+self.alignmentShift,i) for i,j in self.subSequenceIndices] 285 self.subSequenceIndices = [(j,i) for i,j in self.subSequenceIndices]
304 else:
305 self.subSequenceIndices = [(i+self.alignmentShift,j) for i,j in self.subSequenceIndices]
306 return self.similarityTable[-1,-1] 286 return self.similarityTable[-1,-1]
307 287
308 def compute(self, l1, l2, computeSubSequence = False): 288 def compute(self, l1, l2, computeSubSequence = False):
309 '''get methods are to be shadowed in child classes ''' 289 '''get methods are to be shadowed in child classes '''
310 return self._compute(l1, l2, computeSubSequence) 290 return self._compute(l1, l2, computeSubSequence)
311 291
312 def computeAlignement(self): 292 def computeAlignement(self):
313 from numpy import mean 293 from numpy import mean
314 return mean([j-i for i,j in self.subSequenceIndices])+self.alignmentShift 294 return mean([j-i for i,j in self.subSequenceIndices])
315 295
316 def _computeNormalized(self, l1, l2): 296 def _computeNormalized(self, l1, l2):
317 ''' compute the normalized LCSS 297 ''' compute the normalized LCSS
318 ie, the LCSS divided by the min or mean of the indicator lengths (using lengthFunc) 298 ie, the LCSS divided by the min or mean of the indicator lengths (using lengthFunc)
319 lengthFunc = lambda x,y:float(x,y)/2''' 299 lengthFunc = lambda x,y:float(x,y)/2'''