Mercurial Hosting > traffic-intelligence
comparison python/utils.py @ 374:a7af3519687e
finished implementation of aligned LCSS with matching sequence decoded
author | Nicolas Saunier <nicolas.saunier@polymtl.ca> |
---|---|
date | Wed, 17 Jul 2013 00:50:44 -0400 |
parents | d0b86ed50f32 |
children | 2ea8584aa80a |
comparison
equal
deleted
inserted
replaced
373:d0b86ed50f32 | 374:a7af3519687e |
---|---|
222 from numpy import zeros, int as npint | 222 from numpy import zeros, int as npint |
223 n1 = len(l1) | 223 n1 = len(l1) |
224 n2 = len(l2) | 224 n2 = len(l2) |
225 self.similarityTable = zeros((n1+1,n2+1), dtype = npint) | 225 self.similarityTable = zeros((n1+1,n2+1), dtype = npint) |
226 for i in xrange(1,n1+1): | 226 for i in xrange(1,n1+1): |
227 for j in xrange(max(1,i-jshift-self.delta),min(n2+1,i-jshift+self.delta+1)): | 227 for j in xrange(max(1,i-jshift-self.delta),min(n2,i-jshift+self.delta)+1): |
228 print max(1,i-jshift-self.delta),min(n2+1,i-jshift+self.delta+1) | |
229 if self.similarityFunc(l1[i-1], l2[j-1]): | 228 if self.similarityFunc(l1[i-1], l2[j-1]): |
230 self.similarityTable[i,j] = self.similarityTable[i-1,j-1]+1 | 229 self.similarityTable[i,j] = self.similarityTable[i-1,j-1]+1 |
231 else: | 230 else: |
232 self.similarityTable[i,j] = max(self.similarityTable[i-1,j], self.similarityTable[i,j-1]) | 231 self.similarityTable[i,j] = max(self.similarityTable[i-1,j], self.similarityTable[i,j-1]) |
233 | 232 |
241 elif self.similarityTable[i][j] == self.similarityTable[i-1][j]: | 240 elif self.similarityTable[i][j] == self.similarityTable[i-1][j]: |
242 return self.subSequence(i-1, j) | 241 return self.subSequence(i-1, j) |
243 else: | 242 else: |
244 return self.subSequence(i-1, j-1) + [(i-1,j-1)] | 243 return self.subSequence(i-1, j-1) + [(i-1,j-1)] |
245 | 244 |
246 # def computeLCSS(self, l1, l2, computeSubSequence = False): | |
247 # '''returns the longest common subsequence similarity | |
248 # based on the threshold on distance between two elements of lists l1, l2 | |
249 # similarityFunc returns True or False whether the two points are considered similar | |
250 | |
251 # eg distance(p1, p2) < epsilon | |
252 # ''' | |
253 # #from numpy import argmax | |
254 # self.similarities(l1, l2) | |
255 # #self.similarityTable = self.similarityTable[:, :min(len(l2), len(l1)+self.delta)+1] | |
256 # if computeSubSequence: | |
257 # self.subSequenceIndices = self.subSequence(len(l1), len(l2)) | |
258 # return self.similarityTable.max() | |
259 | |
260 def _compute(self, _l1, _l2, computeSubSequence = False): | 245 def _compute(self, _l1, _l2, computeSubSequence = False): |
261 '''returns the longest common subsequence similarity | 246 '''returns the longest common subsequence similarity |
262 based on the threshold on distance between two elements of lists l1, l2 | 247 based on the threshold on distance between two elements of lists l1, l2 |
263 similarityFunc returns True or False whether the two points are considered similar | 248 similarityFunc returns True or False whether the two points are considered similar |
264 | 249 |
278 n2 = len(l2) | 263 n2 = len(l2) |
279 | 264 |
280 if self.aligned: | 265 if self.aligned: |
281 lcssValues = {} | 266 lcssValues = {} |
282 similarityTables = {} | 267 similarityTables = {} |
283 for i in xrange(-n1-self.delta-1, +max(n1,n2)+self.delta): # larger interval | 268 for i in xrange(-n2-self.delta+1, n1+self.delta): # interval such that [i-shift-delta, i-shift+delta] is never empty, which happens when i-shift+delta < 1 or when i-shift-delta > n2 |
284 #print l1[min(-i-1,n1):] # min(n1+n2-i,n1) | |
285 #print l2[max(0,i-n1):] | |
286 #lcssValues[i] = self.computeLCSS(l1[min(-i-1,n1):], l2[max(0,i-n1):]) | |
287 #print i, lcssValues[i] | |
288 self.similarities(l1, l2, i) | 269 self.similarities(l1, l2, i) |
289 lcssValues[i] = self.similarityTable.max() | 270 lcssValues[i] = self.similarityTable.max() |
290 similarityTables[i] = self.similarityTable | 271 similarityTables[i] = self.similarityTable |
291 print self.similarityTable | 272 #print self.similarityTable |
292 self.alignmentShift = argMaxDict(lcssValues) # ideally get the medium alignment shift, the one that minimizes distance | 273 self.alignmentShift = argMaxDict(lcssValues) # ideally get the medium alignment shift, the one that minimizes distance |
293 self.similarityTable = similarityTables[self.alignmentShift] | 274 self.similarityTable = similarityTables[self.alignmentShift] |
294 else: | 275 else: |
295 self.alignmentShift = 0 | 276 self.alignmentShift = 0 |
296 self.similarities(l1, l2) | 277 self.similarities(l1, l2) |
297 | 278 |
298 self.similarityTable = self.similarityTable[:, :min(len(l2), len(l1)+self.delta-self.alignmentShift)+1] | 279 # threshold values for the useful part of the similarity table are n2-n1-delta and n1-n2-delta |
280 self.similarityTable = self.similarityTable[:min(n1, n2+self.alignmentShift+self.delta)+1, :min(n2, n1-self.alignmentShift+self.delta)+1] | |
299 | 281 |
300 if computeSubSequence: | 282 if computeSubSequence: |
301 self.subSequenceIndices = self.subSequence(self.similarityTable.shape[0]-1, self.similarityTable.shape[1]-1) | 283 self.subSequenceIndices = self.subSequence(self.similarityTable.shape[0]-1, self.similarityTable.shape[1]-1) |
302 if revertIndices: | 284 if revertIndices: |
303 self.subSequenceIndices = [(j+self.alignmentShift,i) for i,j in self.subSequenceIndices] | 285 self.subSequenceIndices = [(j,i) for i,j in self.subSequenceIndices] |
304 else: | |
305 self.subSequenceIndices = [(i+self.alignmentShift,j) for i,j in self.subSequenceIndices] | |
306 return self.similarityTable[-1,-1] | 286 return self.similarityTable[-1,-1] |
307 | 287 |
308 def compute(self, l1, l2, computeSubSequence = False): | 288 def compute(self, l1, l2, computeSubSequence = False): |
309 '''get methods are to be shadowed in child classes ''' | 289 '''get methods are to be shadowed in child classes ''' |
310 return self._compute(l1, l2, computeSubSequence) | 290 return self._compute(l1, l2, computeSubSequence) |
311 | 291 |
312 def computeAlignement(self): | 292 def computeAlignement(self): |
313 from numpy import mean | 293 from numpy import mean |
314 return mean([j-i for i,j in self.subSequenceIndices])+self.alignmentShift | 294 return mean([j-i for i,j in self.subSequenceIndices]) |
315 | 295 |
316 def _computeNormalized(self, l1, l2): | 296 def _computeNormalized(self, l1, l2): |
317 ''' compute the normalized LCSS | 297 ''' compute the normalized LCSS |
318 ie, the LCSS divided by the min or mean of the indicator lengths (using lengthFunc) | 298 ie, the LCSS divided by the min or mean of the indicator lengths (using lengthFunc) |
319 lengthFunc = lambda x,y:float(x,y)/2''' | 299 lengthFunc = lambda x,y:float(x,y)/2''' |