Mercurial Hosting > traffic-intelligence
comparison python/utils.py @ 368:2db4e76599a1
implemented subsequence extraction and rearranged arguments
author | Nicolas Saunier <nicolas.saunier@polymtl.ca> |
---|---|
date | Mon, 15 Jul 2013 15:08:53 -0400 |
parents | 90bdabc06e9f |
children | 027e254f0b53 |
comparison
equal
deleted
inserted
replaced
367:d44eba0db517 | 368:2db4e76599a1 |
---|---|
139 | 139 |
140 ######################### | 140 ######################### |
141 # sequence section | 141 # sequence section |
142 ######################### | 142 ######################### |
143 | 143 |
144 def LCSS(l1, l2, similarityFunc, delta = float('inf')): | 144 def sequenceSimilarities(l1, l2, similarityFunc, delta = float('inf')): |
145 from numpy import zeros, int as npint | |
146 n1 = len(l1) | |
147 n2 = len(l2) | |
148 similarities = zeros((n1+1,n2+1), dtype = npint) | |
149 for i in xrange(1,n1+1): | |
150 for j in xrange(max(1,i-delta),min(n2+1,i+delta+1)): | |
151 if similarityFunc(l1[i-1], l2[j-1]): | |
152 similarities[i,j] = similarities[i-1,j-1]+1 | |
153 else: | |
154 similarities[i,j] = max(similarities[i-1,j], similarities[i,j-1]) | |
155 return similarities | |
156 | |
157 def subSequence(similarities, i, j): | |
158 '''Returns the subsequence of two sequences | |
159 http://en.wikipedia.org/wiki/Longest_common_subsequence_problem''' | |
160 if i == 0 or j == 0: | |
161 return [] | |
162 elif similarities[i][j] == similarities[i][j-1]: | |
163 return subSequence(similarities, i, j-1) | |
164 elif similarities[i][j] == similarities[i-1][j]: | |
165 return subSequence(similarities, i-1, j) | |
166 else: | |
167 return subSequence(similarities, i-1, j-1) + [(i-1,j-1)] | |
168 | |
169 def LCSS(l1, l2, similarityFunc, delta = float('inf'), returnSubSequence = False): | |
145 '''returns the longest common subsequence similarity | 170 '''returns the longest common subsequence similarity |
146 based on the threshold on distance between two elements of lists l1, l2 | 171 based on the threshold on distance between two elements of lists l1, l2 |
147 similarityFunc returns True or False whether the two points are considered similar | 172 similarityFunc returns True or False whether the two points are considered similar |
148 | 173 |
149 eg distance(p1, p2) < epsilon | 174 eg distance(p1, p2) < epsilon |
150 ''' | 175 ''' |
151 from numpy import zeros, int as npint | 176 from numpy import argmax |
152 n1 = len(l1) | 177 similarities = sequenceSimilarities(l1, l2, similarityFunc, delta) |
153 n2 = len(l2) | 178 imax = argmax(similarities[:,-1]) |
154 similarity = zeros((n1+1,n2+1), dtype = npint) | 179 jmax = argmax(similarities[-1,:]) |
155 for i in xrange(1,n1+1): | 180 if returnSubSequence: |
156 for j in xrange(max(1,i-delta),min(n2+1,i+delta+1)): | 181 if similarities[imax, -1] > similarities[-1, jmax]: |
157 if similarityFunc(l1[i-1], l2[j-1]): | 182 lcss = similarities[imax, -1] |
158 similarity[i][j] = similarity[i-1][j-1]+1 | 183 subseq = subSequence(similarities, imax, len(l2)) |
159 else: | 184 else: |
160 similarity[i][j] = max(similarity[i-1][j], similarity[i][j-1]) | 185 lcss = similarities[-1, jmax] |
161 return max(max(similarity[:,-1]), max(similarity[-1,:])) | 186 subseq = subSequence(similarities, len(l1), jmax) |
162 | 187 return lcss, subseq |
163 def normalizedLCSS(l1, l2, similarityFunc, delta = float('inf'), lengthMethod = min): | 188 else: |
189 return max(similarities[imax, -1], similarities[-1, jmax]) | |
190 | |
191 def normalizedLCSS(l1, l2, similarityFunc, delta = float('inf'), lengthFunc = min): | |
164 ''' compute the normalized LCSS | 192 ''' compute the normalized LCSS |
165 ie, the LCSS divided by the min or mean of the indicator lengths (using lengthMethod) | 193 ie, the LCSS divided by the min or mean of the indicator lengths (using lengthFunc) |
166 lengthMethod = lambda x,y:float(x,y)/2''' | 194 lengthFunc = lambda x,y:float(x,y)/2''' |
167 return float(LCSS(l1, l2, similarityFunc, delta))/lengthMethod(len(l1), len(l2)) | 195 return float(LCSS(l1, l2, similarityFunc, delta))/lengthFunc(len(l1), len(l2)) |
168 | 196 |
169 def DLCSS(l1, l2, similarityFunc, delta = float('inf'), lengthMethod = min): | 197 def DLCSS(l1, l2, similarityFunc, delta = float('inf'), lengthFunc = min): |
170 ''' compute the LCSS distance''' | 198 ''' compute the LCSS distance''' |
171 return 1-normalizedLCSS(l1, l2, similarityFunc, delta, lengthMethod) | 199 return 1-normalizedLCSS(l1, l2, similarityFunc, delta, lengthFunc) |
172 | 200 |
173 def alignedLCSS(_l1, _l2, similarityFunc, delta): | 201 def alignedLCSS(_l1, _l2, similarityFunc, delta): |
174 '''returns the best matching if using a finite delta by shiftinig the series alignments''' | 202 '''returns the best matching if using a finite delta by shiftinig the series alignments''' |
175 if len(_l2) < len(_l1): # l1 is the shortest | 203 if len(_l2) < len(_l1): # l1 is the shortest |
176 l1 = _l2 | 204 l1 = _l2 |
185 # print l2[max(0,i-n1):] | 213 # print l2[max(0,i-n1):] |
186 # print LCSS(l1[min(-i-1,n1):], l2[max(0,i-n1):], similarityFunc, delta) | 214 # print LCSS(l1[min(-i-1,n1):], l2[max(0,i-n1):], similarityFunc, delta) |
187 lcss = [LCSS(l1[min(-i-1,n1):], l2[max(0,i-n1):], similarityFunc, delta) for i in xrange(min(delta,n1), max(n1+n2-delta, n2+1))] | 215 lcss = [LCSS(l1[min(-i-1,n1):], l2[max(0,i-n1):], similarityFunc, delta) for i in xrange(min(delta,n1), max(n1+n2-delta, n2+1))] |
188 return max(lcss) | 216 return max(lcss) |
189 | 217 |
190 def normalizedAlignedLCSS(l1, l2, similarityFunc, delta, lengthMethod = min): | 218 def normalizedAlignedLCSS(l1, l2, similarityFunc, delta, lengthFunc = min): |
191 return float(alignedLCSS(l1, l2, similarityFunc, delta))/lengthMethod(len(l1), len(l2)) | 219 return float(alignedLCSS(l1, l2, similarityFunc, delta))/lengthFunc(len(l1), len(l2)) |
192 | 220 |
193 def alignedDLCSS(l1, l2, similarityFunc, delta, lengthMethod = min): | 221 def alignedDLCSS(l1, l2, similarityFunc, delta, lengthFunc = min): |
194 return 1-normalizedAlignedLCSS(l1, l2, similarityFunc, delta, lengthMethod) | 222 return 1-normalizedAlignedLCSS(l1, l2, similarityFunc, delta, lengthFunc) |
195 | 223 |
196 ######################### | 224 ######################### |
197 # maths section | 225 # maths section |
198 ######################### | 226 ######################### |
199 | 227 |