annotate python/utils.py @ 676:58b9ac2f262f

fine tuning
author Nicolas Saunier <nicolas.saunier@polymtl.ca>
date Wed, 27 May 2015 04:08:19 +0200
parents ab3fdff42624
children ae07c7b4cf87
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
aed8eb63cdde initial commit with non-functional python code for NGSIM
Nicolas Saunier <nico@confins.net>
parents:
diff changeset
1 #! /usr/bin/env python
aed8eb63cdde initial commit with non-functional python code for NGSIM
Nicolas Saunier <nico@confins.net>
parents:
diff changeset
2 ''' Generic utilities.'''
aed8eb63cdde initial commit with non-functional python code for NGSIM
Nicolas Saunier <nico@confins.net>
parents:
diff changeset
3
668
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
4 import matplotlib.pyplot as plt
397
b36b00dd27c3 added function to read scene metadata
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 395
diff changeset
5 from datetime import time, datetime
670
f72ed51c6b65 corrected other missing imports
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 669
diff changeset
6 from math import sqrt, ceil, floor
668
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
7 from scipy.stats import kruskal, shapiro
672
5473b7460375 moved and rationalized imports in modules
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 671
diff changeset
8 from numpy import zeros, array, exp, sum as npsum, arange, cumsum
0
aed8eb63cdde initial commit with non-functional python code for NGSIM
Nicolas Saunier <nico@confins.net>
parents:
diff changeset
9
421
4fce27946c60 first example of video metadata using sqlalchemy
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 405
diff changeset
10 datetimeFormat = "%Y-%m-%d %H:%M:%S"
4fce27946c60 first example of video metadata using sqlalchemy
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 405
diff changeset
11
185
c06379f25ab8 utilities for user types
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 181
diff changeset
12 #########################
c06379f25ab8 utilities for user types
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 181
diff changeset
13 # Enumerations
c06379f25ab8 utilities for user types
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 181
diff changeset
14 #########################
c06379f25ab8 utilities for user types
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 181
diff changeset
15
c06379f25ab8 utilities for user types
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 181
diff changeset
16 def inverseEnumeration(l):
c06379f25ab8 utilities for user types
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 181
diff changeset
17 'Returns the dictionary that provides for each element in the input list its index in the input list'
c06379f25ab8 utilities for user types
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 181
diff changeset
18 result = {}
c06379f25ab8 utilities for user types
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 181
diff changeset
19 for i,x in enumerate(l):
c06379f25ab8 utilities for user types
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 181
diff changeset
20 result[x] = i
c06379f25ab8 utilities for user types
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 181
diff changeset
21 return result
155
f03fe3d6d0c8 added functions to parse options
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 152
diff changeset
22
f03fe3d6d0c8 added functions to parse options
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 152
diff changeset
23 #########################
637
c9a0b72979fd added function to get canadian public weather data
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 615
diff changeset
24 # Simple statistics
31
c000f37c316d moved tests to independent file, added chi2 computation
Nicolas Saunier <nico@confins.net>
parents: 29
diff changeset
25 #########################
c000f37c316d moved tests to independent file, added chi2 computation
Nicolas Saunier <nico@confins.net>
parents: 29
diff changeset
26
423
f738fa1b69f0 added sample size and Student distribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 421
diff changeset
27 def sampleSize(stdev, tolerance, percentConfidence, printLatex = False):
301
27f06d28036d added simple helper for confidence intervals
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 297
diff changeset
28 from scipy.stats.distributions import norm
27f06d28036d added simple helper for confidence intervals
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 297
diff changeset
29 k = round(norm.ppf(0.5+percentConfidence/200., 0, 1)*100)/100. # 1.-(100-percentConfidence)/200.
423
f738fa1b69f0 added sample size and Student distribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 421
diff changeset
30 if printLatex:
f738fa1b69f0 added sample size and Student distribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 421
diff changeset
31 print('${0}^2\\frac{{{1}^2}}{{{2}^2}}$'.format(k, stdev, tolerance))
f738fa1b69f0 added sample size and Student distribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 421
diff changeset
32 return (k*stdev/tolerance)**2
f738fa1b69f0 added sample size and Student distribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 421
diff changeset
33
f738fa1b69f0 added sample size and Student distribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 421
diff changeset
34 def confidenceInterval(mean, stdev, nSamples, percentConfidence, trueStd = True, printLatex = False):
499
0a93afea8243 alternative confidence interval
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 491
diff changeset
35 '''if trueStd, use normal distribution, otherwise, Student
0a93afea8243 alternative confidence interval
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 491
diff changeset
36
0a93afea8243 alternative confidence interval
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 491
diff changeset
37 Use otherwise t.interval or norm.interval
0a93afea8243 alternative confidence interval
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 491
diff changeset
38 ex: norm.interval(0.95, loc = 0., scale = 2.3/sqrt(11))
0a93afea8243 alternative confidence interval
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 491
diff changeset
39 t.interval(0.95, 10, loc=1.2, scale = 2.3/sqrt(nSamples))
0a93afea8243 alternative confidence interval
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 491
diff changeset
40 loc is mean, scale is sigma/sqrt(n) (for Student, 10 is df)'''
423
f738fa1b69f0 added sample size and Student distribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 421
diff changeset
41 from scipy.stats.distributions import norm, t
f738fa1b69f0 added sample size and Student distribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 421
diff changeset
42 if trueStd:
f738fa1b69f0 added sample size and Student distribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 421
diff changeset
43 k = round(norm.ppf(0.5+percentConfidence/200., 0, 1)*100)/100. # 1.-(100-percentConfidence)/200.
f738fa1b69f0 added sample size and Student distribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 421
diff changeset
44 else: # use Student
f738fa1b69f0 added sample size and Student distribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 421
diff changeset
45 k = round(t.ppf(0.5+percentConfidence/200., nSamples-1)*100)/100.
301
27f06d28036d added simple helper for confidence intervals
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 297
diff changeset
46 e = k*stdev/sqrt(nSamples)
27f06d28036d added simple helper for confidence intervals
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 297
diff changeset
47 if printLatex:
27f06d28036d added simple helper for confidence intervals
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 297
diff changeset
48 print('${0} \pm {1}\\frac{{{2}}}{{\sqrt{{{3}}}}}$'.format(mean, k, stdev, nSamples))
27f06d28036d added simple helper for confidence intervals
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 297
diff changeset
49 return mean-e, mean+e
27f06d28036d added simple helper for confidence intervals
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 297
diff changeset
50
31
c000f37c316d moved tests to independent file, added chi2 computation
Nicolas Saunier <nico@confins.net>
parents: 29
diff changeset
51 def computeChi2(expected, observed):
c000f37c316d moved tests to independent file, added chi2 computation
Nicolas Saunier <nico@confins.net>
parents: 29
diff changeset
52 '''Returns the Chi2 statistics'''
c000f37c316d moved tests to independent file, added chi2 computation
Nicolas Saunier <nico@confins.net>
parents: 29
diff changeset
53 result = 0.
c000f37c316d moved tests to independent file, added chi2 computation
Nicolas Saunier <nico@confins.net>
parents: 29
diff changeset
54 for e, o in zip(expected, observed):
c000f37c316d moved tests to independent file, added chi2 computation
Nicolas Saunier <nico@confins.net>
parents: 29
diff changeset
55 result += ((e-o)*(e-o))/e
c000f37c316d moved tests to independent file, added chi2 computation
Nicolas Saunier <nico@confins.net>
parents: 29
diff changeset
56 return result
c000f37c316d moved tests to independent file, added chi2 computation
Nicolas Saunier <nico@confins.net>
parents: 29
diff changeset
57
665
15e244d2a1b5 corrected bug with circular import for VideoFilenameAddable, moved to base module
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 659
diff changeset
58 class EmpiricalDistribution(object):
85
7f1e54234f96 added empirical discrete distribution, modified class organization and names
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 77
diff changeset
59 def nSamples(self):
7f1e54234f96 added empirical discrete distribution, modified class organization and names
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 77
diff changeset
60 return sum(self.counts)
7f1e54234f96 added empirical discrete distribution, modified class organization and names
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 77
diff changeset
61
588
c5406edbcf12 added loading ground truth annotations (ground truth) from polytrack format
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 574
diff changeset
62 def cumulativeDensityFunction(sample, normalized = False):
276
78922b4de3bf minor change
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 266
diff changeset
63 '''Returns the cumulative density function of the sample of a random variable'''
588
c5406edbcf12 added loading ground truth annotations (ground truth) from polytrack format
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 574
diff changeset
64 xaxis = sorted(sample)
c5406edbcf12 added loading ground truth annotations (ground truth) from polytrack format
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 574
diff changeset
65 counts = arange(1,len(sample)+1) # dtype = float
c5406edbcf12 added loading ground truth annotations (ground truth) from polytrack format
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 574
diff changeset
66 if normalized:
c5406edbcf12 added loading ground truth annotations (ground truth) from polytrack format
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 574
diff changeset
67 counts /= float(len(sample))
197
2788b2827670 simple cumulatie function distribution computation
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 185
diff changeset
68 return xaxis, counts
85
7f1e54234f96 added empirical discrete distribution, modified class organization and names
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 77
diff changeset
69
7f1e54234f96 added empirical discrete distribution, modified class organization and names
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 77
diff changeset
70 class EmpiricalDiscreteDistribution(EmpiricalDistribution):
7f1e54234f96 added empirical discrete distribution, modified class organization and names
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 77
diff changeset
71 '''Class to represent a sample of a distribution for a discrete random variable
7f1e54234f96 added empirical discrete distribution, modified class organization and names
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 77
diff changeset
72 '''
7f1e54234f96 added empirical discrete distribution, modified class organization and names
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 77
diff changeset
73 def __init__(self, categories, counts):
7f1e54234f96 added empirical discrete distribution, modified class organization and names
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 77
diff changeset
74 self.categories = categories
7f1e54234f96 added empirical discrete distribution, modified class organization and names
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 77
diff changeset
75 self.counts = counts
7f1e54234f96 added empirical discrete distribution, modified class organization and names
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 77
diff changeset
76
7f1e54234f96 added empirical discrete distribution, modified class organization and names
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 77
diff changeset
77 def mean(self):
7f1e54234f96 added empirical discrete distribution, modified class organization and names
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 77
diff changeset
78 result = [float(x*y) for x,y in zip(self.categories, self.counts)]
672
5473b7460375 moved and rationalized imports in modules
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 671
diff changeset
79 return npsum(result)/self.nSamples()
85
7f1e54234f96 added empirical discrete distribution, modified class organization and names
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 77
diff changeset
80
7f1e54234f96 added empirical discrete distribution, modified class organization and names
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 77
diff changeset
81 def var(self, mean = None):
7f1e54234f96 added empirical discrete distribution, modified class organization and names
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 77
diff changeset
82 if not mean:
7f1e54234f96 added empirical discrete distribution, modified class organization and names
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 77
diff changeset
83 m = self.mean()
7f1e54234f96 added empirical discrete distribution, modified class organization and names
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 77
diff changeset
84 else:
7f1e54234f96 added empirical discrete distribution, modified class organization and names
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 77
diff changeset
85 m = mean
7f1e54234f96 added empirical discrete distribution, modified class organization and names
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 77
diff changeset
86 result = 0.
7f1e54234f96 added empirical discrete distribution, modified class organization and names
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 77
diff changeset
87 squares = [float((x-m)*(x-m)*y) for x,y in zip(self.categories, self.counts)]
672
5473b7460375 moved and rationalized imports in modules
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 671
diff changeset
88 return npsum(squares)/(self.nSamples()-1)
85
7f1e54234f96 added empirical discrete distribution, modified class organization and names
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 77
diff changeset
89
86
f03ec4697a09 corrected discrete distribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 85
diff changeset
90 def referenceCounts(self, probability):
f03ec4697a09 corrected discrete distribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 85
diff changeset
91 '''probability is a function that returns the probability of the random variable for the category values'''
f03ec4697a09 corrected discrete distribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 85
diff changeset
92 refProba = [probability(c) for c in self.categories]
672
5473b7460375 moved and rationalized imports in modules
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 671
diff changeset
93 refProba[-1] = 1-npsum(refProba[:-1])
86
f03ec4697a09 corrected discrete distribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 85
diff changeset
94 refCounts = [r*self.nSamples() for r in refProba]
f03ec4697a09 corrected discrete distribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 85
diff changeset
95 return refCounts, refProba
85
7f1e54234f96 added empirical discrete distribution, modified class organization and names
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 77
diff changeset
96
7f1e54234f96 added empirical discrete distribution, modified class organization and names
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 77
diff changeset
97 class EmpiricalContinuousDistribution(EmpiricalDistribution):
31
c000f37c316d moved tests to independent file, added chi2 computation
Nicolas Saunier <nico@confins.net>
parents: 29
diff changeset
98 '''Class to represent a sample of a distribution for a continuous random variable
76
64fde2b1f96d simplified intervales in empiricalDistribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 75
diff changeset
99 with the number of observations for each interval
64fde2b1f96d simplified intervales in empiricalDistribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 75
diff changeset
100 intervals (categories variable) are defined by their left limits, the last one being the right limit
64fde2b1f96d simplified intervales in empiricalDistribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 75
diff changeset
101 categories contain therefore one more element than the counts'''
35
8cafee54466f forgotten update of histogram class
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 32
diff changeset
102 def __init__(self, categories, counts):
276
78922b4de3bf minor change
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 266
diff changeset
103 # todo add samples for initialization and everything to None? (or setSamples?)
35
8cafee54466f forgotten update of histogram class
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 32
diff changeset
104 self.categories = categories
8cafee54466f forgotten update of histogram class
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 32
diff changeset
105 self.counts = counts
31
c000f37c316d moved tests to independent file, added chi2 computation
Nicolas Saunier <nico@confins.net>
parents: 29
diff changeset
106
35
8cafee54466f forgotten update of histogram class
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 32
diff changeset
107 def mean(self):
8cafee54466f forgotten update of histogram class
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 32
diff changeset
108 result = 0.
76
64fde2b1f96d simplified intervales in empiricalDistribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 75
diff changeset
109 for i in range(len(self.counts)-1):
64fde2b1f96d simplified intervales in empiricalDistribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 75
diff changeset
110 result += self.counts[i]*(self.categories[i]+self.categories[i+1])/2
64fde2b1f96d simplified intervales in empiricalDistribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 75
diff changeset
111 return result/self.nSamples()
35
8cafee54466f forgotten update of histogram class
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 32
diff changeset
112
8cafee54466f forgotten update of histogram class
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 32
diff changeset
113 def var(self, mean = None):
8cafee54466f forgotten update of histogram class
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 32
diff changeset
114 if not mean:
8cafee54466f forgotten update of histogram class
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 32
diff changeset
115 m = self.mean()
8cafee54466f forgotten update of histogram class
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 32
diff changeset
116 else:
8cafee54466f forgotten update of histogram class
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 32
diff changeset
117 m = mean
8cafee54466f forgotten update of histogram class
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 32
diff changeset
118 result = 0.
76
64fde2b1f96d simplified intervales in empiricalDistribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 75
diff changeset
119 for i in range(len(self.counts)-1):
64fde2b1f96d simplified intervales in empiricalDistribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 75
diff changeset
120 mid = (self.categories[i]+self.categories[i+1])/2
64fde2b1f96d simplified intervales in empiricalDistribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 75
diff changeset
121 result += self.counts[i]*(mid - m)*(mid - m)
35
8cafee54466f forgotten update of histogram class
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 32
diff changeset
122 return result/(self.nSamples()-1)
8cafee54466f forgotten update of histogram class
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 32
diff changeset
123
8cafee54466f forgotten update of histogram class
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 32
diff changeset
124 def referenceCounts(self, cdf):
8cafee54466f forgotten update of histogram class
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 32
diff changeset
125 '''cdf is a cumulative distribution function
8cafee54466f forgotten update of histogram class
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 32
diff changeset
126 returning the probability of the variable being less that x'''
8cafee54466f forgotten update of histogram class
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 32
diff changeset
127 # refCumulativeCounts = [0]#[cdf(self.categories[0][0])]
8cafee54466f forgotten update of histogram class
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 32
diff changeset
128 # for inter in self.categories:
8cafee54466f forgotten update of histogram class
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 32
diff changeset
129 # refCumulativeCounts.append(cdf(inter[1]))
76
64fde2b1f96d simplified intervales in empiricalDistribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 75
diff changeset
130 refCumulativeCounts = [cdf(x) for x in self.categories[1:-1]]
35
8cafee54466f forgotten update of histogram class
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 32
diff changeset
131
8cafee54466f forgotten update of histogram class
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 32
diff changeset
132 refProba = [refCumulativeCounts[0]]
8cafee54466f forgotten update of histogram class
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 32
diff changeset
133 for i in xrange(1,len(refCumulativeCounts)):
8cafee54466f forgotten update of histogram class
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 32
diff changeset
134 refProba.append(refCumulativeCounts[i]-refCumulativeCounts[i-1])
8cafee54466f forgotten update of histogram class
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 32
diff changeset
135 refProba.append(1-refCumulativeCounts[-1])
8cafee54466f forgotten update of histogram class
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 32
diff changeset
136 refCounts = [p*self.nSamples() for p in refProba]
8cafee54466f forgotten update of histogram class
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 32
diff changeset
137
8cafee54466f forgotten update of histogram class
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 32
diff changeset
138 return refCounts, refProba
31
c000f37c316d moved tests to independent file, added chi2 computation
Nicolas Saunier <nico@confins.net>
parents: 29
diff changeset
139
77
5e6cd36a991c added pretty print in empiricalDistribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 76
diff changeset
140 def printReferenceCounts(self, refCounts=None):
5e6cd36a991c added pretty print in empiricalDistribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 76
diff changeset
141 if refCounts:
5e6cd36a991c added pretty print in empiricalDistribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 76
diff changeset
142 ref = refCounts
5e6cd36a991c added pretty print in empiricalDistribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 76
diff changeset
143 else:
5e6cd36a991c added pretty print in empiricalDistribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 76
diff changeset
144 ref = self.referenceCounts
5e6cd36a991c added pretty print in empiricalDistribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 76
diff changeset
145 for i in xrange(len(ref[0])):
5e6cd36a991c added pretty print in empiricalDistribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 76
diff changeset
146 print('{0}-{1} & {2:0.3} & {3:0.3} \\\\'.format(self.categories[i],self.categories[i+1],ref[1][i], ref[0][i]))
5e6cd36a991c added pretty print in empiricalDistribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 76
diff changeset
147
5e6cd36a991c added pretty print in empiricalDistribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 76
diff changeset
148
31
c000f37c316d moved tests to independent file, added chi2 computation
Nicolas Saunier <nico@confins.net>
parents: 29
diff changeset
149 #########################
370
97e8fa0ee9a1 work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 369
diff changeset
150 # maths section
27
44689029a86f updated segmentIntersection and other
Nicolas Saunier <nico@confins.net>
parents: 24
diff changeset
151 #########################
24
6fb59cfb201e first version of segmentIntersection
Nicolas Saunier <nico@confins.net>
parents: 19
diff changeset
152
433
d40ad901b272 added kernel smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 423
diff changeset
153 # def kernelSmoothing(sampleX, X, Y, weightFunc, halfwidth):
d40ad901b272 added kernel smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 423
diff changeset
154 # '''Returns a smoothed weighted version of Y at the predefined values of sampleX
d40ad901b272 added kernel smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 423
diff changeset
155 # Sum_x weight(sample_x,x) * y(x)'''
d40ad901b272 added kernel smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 423
diff changeset
156 # from numpy import zeros, array
d40ad901b272 added kernel smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 423
diff changeset
157 # smoothed = zeros(len(sampleX))
d40ad901b272 added kernel smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 423
diff changeset
158 # for i,x in enumerate(sampleX):
d40ad901b272 added kernel smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 423
diff changeset
159 # weights = array([weightFunc(x,xx, halfwidth) for xx in X])
d40ad901b272 added kernel smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 423
diff changeset
160 # if sum(weights)>0:
d40ad901b272 added kernel smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 423
diff changeset
161 # smoothed[i] = sum(weights*Y)/sum(weights)
d40ad901b272 added kernel smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 423
diff changeset
162 # else:
d40ad901b272 added kernel smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 423
diff changeset
163 # smoothed[i] = 0
d40ad901b272 added kernel smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 423
diff changeset
164 # return smoothed
d40ad901b272 added kernel smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 423
diff changeset
165
d40ad901b272 added kernel smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 423
diff changeset
166 def kernelSmoothing(x, X, Y, weightFunc, halfwidth):
d40ad901b272 added kernel smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 423
diff changeset
167 '''Returns the smoothed estimate of (X,Y) at x
d40ad901b272 added kernel smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 423
diff changeset
168 Sum_x weight(sample_x,x) * y(x)'''
d40ad901b272 added kernel smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 423
diff changeset
169 weights = array([weightFunc(x,observedx, halfwidth) for observedx in X])
d40ad901b272 added kernel smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 423
diff changeset
170 if sum(weights)>0:
d40ad901b272 added kernel smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 423
diff changeset
171 return sum(weights*Y)/sum(weights)
d40ad901b272 added kernel smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 423
diff changeset
172 else:
d40ad901b272 added kernel smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 423
diff changeset
173 return 0
d40ad901b272 added kernel smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 423
diff changeset
174
d40ad901b272 added kernel smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 423
diff changeset
175 def uniform(center, x, halfwidth):
d40ad901b272 added kernel smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 423
diff changeset
176 if abs(center-x)<halfwidth:
d40ad901b272 added kernel smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 423
diff changeset
177 return 1.
d40ad901b272 added kernel smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 423
diff changeset
178 else:
d40ad901b272 added kernel smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 423
diff changeset
179 return 0.
d40ad901b272 added kernel smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 423
diff changeset
180
d40ad901b272 added kernel smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 423
diff changeset
181 def gaussian(center, x, halfwidth):
d40ad901b272 added kernel smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 423
diff changeset
182 return exp(-((center-x)/halfwidth)**2/2)
d40ad901b272 added kernel smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 423
diff changeset
183
d40ad901b272 added kernel smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 423
diff changeset
184 def epanechnikov(center, x, halfwidth):
d40ad901b272 added kernel smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 423
diff changeset
185 diff = abs(center-x)
d40ad901b272 added kernel smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 423
diff changeset
186 if diff<halfwidth:
d40ad901b272 added kernel smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 423
diff changeset
187 return 1.-(diff/halfwidth)**2
d40ad901b272 added kernel smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 423
diff changeset
188 else:
d40ad901b272 added kernel smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 423
diff changeset
189 return 0.
d40ad901b272 added kernel smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 423
diff changeset
190
434
9a714f32fc9f small updates
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 433
diff changeset
191 def triangular(center, x, halfwidth):
9a714f32fc9f small updates
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 433
diff changeset
192 diff = abs(center-x)
9a714f32fc9f small updates
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 433
diff changeset
193 if diff<halfwidth:
9a714f32fc9f small updates
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 433
diff changeset
194 return 1.-abs(diff/halfwidth)
9a714f32fc9f small updates
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 433
diff changeset
195 else:
9a714f32fc9f small updates
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 433
diff changeset
196 return 0.
433
d40ad901b272 added kernel smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 423
diff changeset
197
518
0c86c73f3c09 median smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 511
diff changeset
198 def medianSmoothing(x, X, Y, halfwidth):
0c86c73f3c09 median smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 511
diff changeset
199 '''Returns the media of Y's corresponding to X's in the interval [x-halfwidth, x+halfwidth]'''
0c86c73f3c09 median smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 511
diff changeset
200 from numpy import median
0c86c73f3c09 median smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 511
diff changeset
201 return median([y for observedx, y in zip(X,Y) if abs(x-observedx)<halfwidth])
0c86c73f3c09 median smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 511
diff changeset
202
521
3707eeb20f25 changed argMaxDict name to argmaxDict
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 518
diff changeset
203 def argmaxDict(d):
561
ee45c6eb6d49 added Mohamed Gomaa Mohamed function to smooth object trajectories
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 553
diff changeset
204 return max(d, key=d.get)
279
3af4c267a7bf generic simple LCSS
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 276
diff changeset
205
395
6fba1ab040f1 minor modification to framestotime
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 391
diff changeset
206 def framesToTime(nFrames, frameRate, initialTime = time()):
6fba1ab040f1 minor modification to framestotime
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 391
diff changeset
207 '''returns a datetime.time for the time in hour, minutes and seconds
6fba1ab040f1 minor modification to framestotime
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 391
diff changeset
208 initialTime is a datetime.time'''
6fba1ab040f1 minor modification to framestotime
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 391
diff changeset
209 seconds = int(floor(float(nFrames)/float(frameRate))+initialTime.hour*3600+initialTime.minute*60+initialTime.second)
261
4aa792cb0fa9 changing framesToTime to return a datetime.time
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 248
diff changeset
210 h = int(floor(seconds/3600.))
248
571ba5ed22e2 added utils for bus processing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 241
diff changeset
211 seconds = seconds - h*3600
261
4aa792cb0fa9 changing framesToTime to return a datetime.time
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 248
diff changeset
212 m = int(floor(seconds/60))
248
571ba5ed22e2 added utils for bus processing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 241
diff changeset
213 seconds = seconds - m*60
262
a048066bd20f correcting bug in framesToTime
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 261
diff changeset
214 return time(h, m, seconds)
248
571ba5ed22e2 added utils for bus processing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 241
diff changeset
215
381
387cc0142211 script to replay event annotations
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 376
diff changeset
216 def timeToFrames(t, frameRate):
387cc0142211 script to replay event annotations
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 376
diff changeset
217 return frameRate*(t.hour*3600+t.minute*60+t.second)
387cc0142211 script to replay event annotations
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 376
diff changeset
218
241
ee1caff48b03 added function to sort to list of paired data X,Y
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 235
diff changeset
219 def sortXY(X,Y):
ee1caff48b03 added function to sort to list of paired data X,Y
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 235
diff changeset
220 'returns the sorted (x, Y(x)) sorted on X'
ee1caff48b03 added function to sort to list of paired data X,Y
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 235
diff changeset
221 D = {}
ee1caff48b03 added function to sort to list of paired data X,Y
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 235
diff changeset
222 for x, y in zip(X,Y):
ee1caff48b03 added function to sort to list of paired data X,Y
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 235
diff changeset
223 D[x]=y
ee1caff48b03 added function to sort to list of paired data X,Y
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 235
diff changeset
224 xsorted = sorted(D.keys())
ee1caff48b03 added function to sort to list of paired data X,Y
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 235
diff changeset
225 return xsorted, [D[x] for x in xsorted]
ee1caff48b03 added function to sort to list of paired data X,Y
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 235
diff changeset
226
32
48e56179c39e added ceil function
Nicolas Saunier <nico@confins.net>
parents: 31
diff changeset
227 def ceilDecimals(v, nDecimals):
48e56179c39e added ceil function
Nicolas Saunier <nico@confins.net>
parents: 31
diff changeset
228 '''Rounds the number at the nth decimal
48e56179c39e added ceil function
Nicolas Saunier <nico@confins.net>
parents: 31
diff changeset
229 eg 1.23 at 0 decimal is 2, at 1 decimal is 1.3'''
670
f72ed51c6b65 corrected other missing imports
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 669
diff changeset
230 tens = 10**nDecimals
32
48e56179c39e added ceil function
Nicolas Saunier <nico@confins.net>
parents: 31
diff changeset
231 return ceil(v*tens)/tens
48e56179c39e added ceil function
Nicolas Saunier <nico@confins.net>
parents: 31
diff changeset
232
152
74b1fc68d4df re-organized code to avoid cyclic python module dependencies
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 116
diff changeset
233 def inBetween(bound1, bound2, x):
569
0057c04f94d5 work in progress on intersections (for PET)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 561
diff changeset
234 return bound1 <= x <= bound2 or bound2 <= x <= bound1
0057c04f94d5 work in progress on intersections (for PET)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 561
diff changeset
235
0057c04f94d5 work in progress on intersections (for PET)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 561
diff changeset
236 def pointDistanceL2(x1,y1,x2,y2):
0057c04f94d5 work in progress on intersections (for PET)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 561
diff changeset
237 ''' Compute point-to-point distance (L2 norm, ie Euclidean distance)'''
0057c04f94d5 work in progress on intersections (for PET)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 561
diff changeset
238 return sqrt((x2-x1)**2+(y2-y1)**2)
24
6fb59cfb201e first version of segmentIntersection
Nicolas Saunier <nico@confins.net>
parents: 19
diff changeset
239
6fb59cfb201e first version of segmentIntersection
Nicolas Saunier <nico@confins.net>
parents: 19
diff changeset
240 def crossProduct(l1, l2):
6fb59cfb201e first version of segmentIntersection
Nicolas Saunier <nico@confins.net>
parents: 19
diff changeset
241 return l1[0]*l2[1]-l1[1]*l2[0]
6fb59cfb201e first version of segmentIntersection
Nicolas Saunier <nico@confins.net>
parents: 19
diff changeset
242
574
e24eeb244698 first implementation of projection to curvilinear coordinates
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 569
diff changeset
243 def cat_mvgavg(cat_list, halfWidth):
e24eeb244698 first implementation of projection to curvilinear coordinates
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 569
diff changeset
244 ''' Return a list of categories/values smoothed according to a window.
e24eeb244698 first implementation of projection to curvilinear coordinates
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 569
diff changeset
245 halfWidth is the search radius on either side'''
e24eeb244698 first implementation of projection to curvilinear coordinates
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 569
diff changeset
246 from copy import deepcopy
659
784298512b60 minor modifications
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 637
diff changeset
247 smoothed = deepcopy(cat_list)
784298512b60 minor modifications
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 637
diff changeset
248 for point in range(len(cat_list)):
574
e24eeb244698 first implementation of projection to curvilinear coordinates
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 569
diff changeset
249 lower_bound_check = max(0,point-halfWidth)
659
784298512b60 minor modifications
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 637
diff changeset
250 upper_bound_check = min(len(cat_list)-1,point+halfWidth+1)
784298512b60 minor modifications
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 637
diff changeset
251 window_values = cat_list[lower_bound_check:upper_bound_check]
574
e24eeb244698 first implementation of projection to curvilinear coordinates
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 569
diff changeset
252 smoothed[point] = max(set(window_values), key=window_values.count)
e24eeb244698 first implementation of projection to curvilinear coordinates
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 569
diff changeset
253 return smoothed
e24eeb244698 first implementation of projection to curvilinear coordinates
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 569
diff changeset
254
547
97c5fef5b2d6 corrected bugs
MohamedGomaa
parents: 521
diff changeset
255 def filterMovingWindow(inputSignal, halfWidth):
29
ca8e716cc231 added moving average filter
Nicolas Saunier <nico@confins.net>
parents: 27
diff changeset
256 '''Returns an array obtained after the smoothing of the input by a moving average
ca8e716cc231 added moving average filter
Nicolas Saunier <nico@confins.net>
parents: 27
diff changeset
257 The first and last points are copied from the original.'''
547
97c5fef5b2d6 corrected bugs
MohamedGomaa
parents: 521
diff changeset
258 from numpy import ones,convolve,array
29
ca8e716cc231 added moving average filter
Nicolas Saunier <nico@confins.net>
parents: 27
diff changeset
259 width = float(halfWidth*2+1)
ca8e716cc231 added moving average filter
Nicolas Saunier <nico@confins.net>
parents: 27
diff changeset
260 win = ones(width,'d')
ca8e716cc231 added moving average filter
Nicolas Saunier <nico@confins.net>
parents: 27
diff changeset
261 result = convolve(win/width,array(inputSignal),'same')
ca8e716cc231 added moving average filter
Nicolas Saunier <nico@confins.net>
parents: 27
diff changeset
262 result[:halfWidth] = inputSignal[:halfWidth]
ca8e716cc231 added moving average filter
Nicolas Saunier <nico@confins.net>
parents: 27
diff changeset
263 result[-halfWidth:] = inputSignal[-halfWidth:]
ca8e716cc231 added moving average filter
Nicolas Saunier <nico@confins.net>
parents: 27
diff changeset
264 return result
ca8e716cc231 added moving average filter
Nicolas Saunier <nico@confins.net>
parents: 27
diff changeset
265
199
ca9d9104afba added utility to calibrate polynoms and plot
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 197
diff changeset
266 def linearRegression(x, y, deg = 1, plotData = False):
ca9d9104afba added utility to calibrate polynoms and plot
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 197
diff changeset
267 '''returns the least square estimation of the linear regression of y = ax+b
ca9d9104afba added utility to calibrate polynoms and plot
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 197
diff changeset
268 as well as the plot'''
ca9d9104afba added utility to calibrate polynoms and plot
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 197
diff changeset
269 from numpy.lib.polynomial import polyfit
ca9d9104afba added utility to calibrate polynoms and plot
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 197
diff changeset
270 from numpy.core.multiarray import arange
ca9d9104afba added utility to calibrate polynoms and plot
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 197
diff changeset
271 coef = polyfit(x, y, deg)
ca9d9104afba added utility to calibrate polynoms and plot
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 197
diff changeset
272 if plotData:
ca9d9104afba added utility to calibrate polynoms and plot
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 197
diff changeset
273 def poly(x):
ca9d9104afba added utility to calibrate polynoms and plot
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 197
diff changeset
274 result = 0
ca9d9104afba added utility to calibrate polynoms and plot
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 197
diff changeset
275 for i in range(len(coef)):
ca9d9104afba added utility to calibrate polynoms and plot
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 197
diff changeset
276 result += coef[i]*x**(len(coef)-i-1)
ca9d9104afba added utility to calibrate polynoms and plot
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 197
diff changeset
277 return result
668
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
278 plt.plot(x, y, 'x')
199
ca9d9104afba added utility to calibrate polynoms and plot
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 197
diff changeset
279 xx = arange(min(x), max(x),(max(x)-min(x))/1000)
668
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
280 plt.plot(xx, [poly(z) for z in xx])
199
ca9d9104afba added utility to calibrate polynoms and plot
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 197
diff changeset
281 return coef
ca9d9104afba added utility to calibrate polynoms and plot
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 197
diff changeset
282
668
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
283 def correlation(data, correlationMethod = 'pearson', plotFigure = False, displayNames = None, figureFilename = None):
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
284 '''Computes (and displays) the correlation matrix for a pandas DataFrame'''
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
285 c=data.corr(correlationMethod)
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
286 if plotFigure:
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
287 fig = plt.figure(figsize=(2+0.4*c.shape[0], 0.4*c.shape[0]))
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
288 fig.add_subplot(1,1,1)
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
289 #plt.imshow(np.fabs(c), interpolation='none')
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
290 plt.imshow(c, vmin=-1., vmax = 1., interpolation='none', cmap = 'RdYlBu_r') # coolwarm
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
291 colnames = [displayNames.get(s.strip(), s.strip()) for s in c.columns.tolist()]
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
292 #correlation.plot_corr(c, xnames = colnames, normcolor=True, title = filename)
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
293 plt.xticks(range(len(colnames)), colnames, rotation=90)
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
294 plt.yticks(range(len(colnames)), colnames)
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
295 plt.tick_params('both', length=0)
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
296 plt.subplots_adjust(bottom = 0.29)
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
297 plt.colorbar()
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
298 plt.title('Correlation ({})'.format(correlationMethod))
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
299 plt.tight_layout()
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
300 if figureFilename is not None:
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
301 plt.savefig(figureFilename, dpi = 150, transparent = True)
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
302 return c
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
303
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
304 def addDummies(data, variables, allVariables = True):
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
305 '''Add binary dummy variables for each value of a nominal variable
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
306 in a pandas DataFrame'''
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
307 from numpy import NaN, dtype
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
308 newVariables = []
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
309 for var in variables:
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
310 if var in data.columns and data.dtypes[var] == dtype('O') and len(data[var].unique()) > 2:
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
311 values = data[var].unique()
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
312 if not allVariables:
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
313 values = values[:-1]
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
314 for val in values:
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
315 if val is not NaN:
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
316 newVariable = (var+'_{}'.format(val)).replace('.','').replace(' ','').replace('-','')
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
317 data[newVariable] = (data[var] == val)
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
318 newVariables.append(newVariable)
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
319 return newVariables
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
320
676
58b9ac2f262f fine tuning
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 675
diff changeset
321 def kruskalWallis(data, dependentVariable, independentVariable, plotFigure = False, filenamePrefix = None, figureFileType = 'pdf', saveLatex = False, translate = lambda s: s, kwCaption = u''):
668
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
322 '''Studies the influence of (nominal) independent variable over the dependent variable
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
323
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
324 Makes tests if the conditional distributions are normal
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
325 using the Shapiro-Wilk test (in which case ANOVA could be used)
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
326 Implements uses the non-parametric Kruskal Wallis test'''
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
327 tmp = data[data[independentVariable].notnull()]
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
328 independentVariableValues = sorted(tmp[independentVariable].unique().tolist())
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
329 if len(independentVariableValues) >= 2:
674
01b89182891a corrected bug for intersection of lines (thanks to Paul for finding)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 672
diff changeset
330 if saveLatex:
01b89182891a corrected bug for intersection of lines (thanks to Paul for finding)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 672
diff changeset
331 from storage import openCheck
01b89182891a corrected bug for intersection of lines (thanks to Paul for finding)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 672
diff changeset
332 out = openCheck(filenamePrefix+'-{}-{}.tex'.format(dependentVariable, independentVariable), 'w')
668
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
333 for x in independentVariableValues:
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
334 print('Shapiro-Wilk normality test for {} when {}={}: {} obs'.format(dependentVariable,independentVariable, x, len(tmp.loc[tmp[independentVariable] == x, dependentVariable])))
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
335 if len(tmp.loc[tmp[independentVariable] == x, dependentVariable]) >= 3:
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
336 print shapiro(tmp.loc[tmp[independentVariable] == x, dependentVariable])
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
337 if plotFigure:
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
338 plt.figure()
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
339 plt.boxplot([tmp.loc[tmp[independentVariable] == x, dependentVariable] for x in independentVariableValues])
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
340 #q25, q75 = tmp[dependentVariable].quantile([.25, .75])
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
341 #plt.ylim(ymax = q75+1.5*(q75-q25))
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
342 plt.xticks(range(1,len(independentVariableValues)+1), independentVariableValues)
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
343 plt.title('{} vs {}'.format(dependentVariable, independentVariable))
674
01b89182891a corrected bug for intersection of lines (thanks to Paul for finding)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 672
diff changeset
344 if filenamePrefix is not None:
01b89182891a corrected bug for intersection of lines (thanks to Paul for finding)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 672
diff changeset
345 plt.savefig(filenamePrefix+'-{}-{}.{}'.format(dependentVariable, independentVariable, figureFileType))
01b89182891a corrected bug for intersection of lines (thanks to Paul for finding)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 672
diff changeset
346 table = tmp.groupby([independentVariable])[dependentVariable].describe().unstack().sort(['50%'], ascending = False)
676
58b9ac2f262f fine tuning
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 675
diff changeset
347 table['count'] = table['count'].astype(int)
58b9ac2f262f fine tuning
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 675
diff changeset
348 #table.index.rename(translate(table.index.name), inplace = True)
58b9ac2f262f fine tuning
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 675
diff changeset
349 testResult = kruskal(*[tmp.loc[tmp[independentVariable] == x, dependentVariable] for x in independentVariableValues])
674
01b89182891a corrected bug for intersection of lines (thanks to Paul for finding)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 672
diff changeset
350 if saveLatex:
676
58b9ac2f262f fine tuning
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 675
diff changeset
351 out.write(translate('\\begin{minipage}{\\linewidth}\n'
58b9ac2f262f fine tuning
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 675
diff changeset
352 +'\\centering\n'
58b9ac2f262f fine tuning
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 675
diff changeset
353 +'\\captionof{table}{'+(kwCaption.format(dependentVariable, independentVariable, *testResult))+'}\n'
58b9ac2f262f fine tuning
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 675
diff changeset
354 +table.to_latex(float_format = lambda x: '{:.2f}'.format(x)).encode('ascii')+'\n'
58b9ac2f262f fine tuning
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 675
diff changeset
355 +'\\end{minipage}\n'
58b9ac2f262f fine tuning
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 675
diff changeset
356 +'\\vspace{0.5cm}\n'))
674
01b89182891a corrected bug for intersection of lines (thanks to Paul for finding)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 672
diff changeset
357 else:
01b89182891a corrected bug for intersection of lines (thanks to Paul for finding)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 672
diff changeset
358 print table
676
58b9ac2f262f fine tuning
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 675
diff changeset
359 return testResult
668
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
360 else:
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
361 return None
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
362
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
363 def prepareRegression(data, dependentVariable, independentVariables, maxCorrelationThreshold, correlations, maxCorrelationP, correlationFunc):
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
364 '''Removes variables from candidate independent variables if
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
365 - if two independent variables are correlated (> maxCorrelationThreshold), one is removed
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
366 - if an independent variable is not correlated with the dependent variable (p>maxCorrelationP)
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
367 Returns the remaining non-correlated variables, correlated with the dependent variable
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
368
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
369 correlationFunc is spearmanr or pearsonr from scipy.stats
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
370
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
371 TODO: pass the dummies for nominal variables and remove if all dummies are correlated, or none is correlated with the dependentvariable'''
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
372 from numpy import dtype
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
373 from copy import copy
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
374 result = copy(independentVariables)
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
375 for v1 in independentVariables:
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
376 if v1 in correlations.index:
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
377 for v2 in independentVariables:
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
378 if v2 != v1 and v2 in correlations.index:
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
379 if abs(correlations.loc[v1, v2]) > maxCorrelationThreshold:
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
380 if v1 in result and v2 in result:
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
381 print('Removing {} (correlation {} with {})'.format(v2, correlations.loc[v1, v2], v1))
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
382 result.remove(v2)
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
383 #regressionIndependentVariables = result
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
384 for var in copy(result):
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
385 if data.dtypes[var] != dtype('O'):
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
386 cor, p = correlationFunc(data[dependentVariable], data[var])
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
387 if p > maxCorrelationP:
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
388 print('Removing {} (no correlation p={})'.format(var, p))
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
389 result.remove(var)
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
390 return result
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
391
667
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
392
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
393 #########################
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
394 # regression analysis using statsmodels (and pandas)
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
395 #########################
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
396
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
397 # TODO make class for experiments?
668
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
398 # TODO add tests with public dataset downloaded from Internet (IRIS et al)
667
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
399 def modelString(experiment, dependentVariable, independentVariables):
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
400 return dependentVariable+' ~ '+' + '.join([independentVariable for independentVariable in independentVariables if experiment[independentVariable]])
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
401
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
402 def runModel(experiment, data, dependentVariable, independentVariables, regressionType = 'ols'):
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
403 import statsmodels.formula.api as smf
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
404 modelStr = modelString(experiment, dependentVariable, independentVariables)
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
405 if regressionType == 'ols':
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
406 model = smf.ols(modelStr, data = data)
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
407 elif regressionType == 'gls':
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
408 model = smf.gls(modelStr, data = data)
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
409 elif regressionType == 'rlm':
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
410 model = smf.rlm(modelStr, data = data)
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
411 else:
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
412 print('Unknown regression type {}. Exiting'.format(regressionType))
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
413 import sys
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
414 sys.exit()
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
415 return model.fit()
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
416
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
417 def runModels(experiments, data, dependentVariable, independentVariables, regressionType = 'ols'):
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
418 '''Runs several models and stores 3 statistics
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
419 adjusted R2, condition number (should be small, eg < 1000)
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
420 and p-value for Shapiro-Wilk test of residual normality'''
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
421 for i,experiment in experiments.iterrows():
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
422 if experiment[independentVariables].any():
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
423 results = runModel(experiment, data, dependentVariable, independentVariables, regressionType = 'ols')
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
424 experiments.loc[i,'r2adj'] = results.rsquared_adj
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
425 experiments.loc[i,'condNum'] = results.condition_number
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
426 experiments.loc[i, 'shapiroP'] = shapiro(results.resid)[1]
668
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
427 experiments.loc[i,'nobs'] = int(results.nobs)
667
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
428 return experiments
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
429
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
430 def generateExperiments(independentVariables):
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
431 '''Generates all possible models for including or not each independent variable'''
670
f72ed51c6b65 corrected other missing imports
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 669
diff changeset
432 from numpy import nan
f72ed51c6b65 corrected other missing imports
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 669
diff changeset
433 from pandas import DataFrame
667
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
434 experiments = {}
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
435 nIndependentVariables = len(independentVariables)
669
df6be882f325 bug corrected
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 668
diff changeset
436 if nIndependentVariables != len(set(independentVariables)):
667
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
437 print("Duplicate variables. Exiting")
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
438 import sys
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
439 sys.exit()
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
440 nModels = 2**nIndependentVariables
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
441 for i,var in enumerate(independentVariables):
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
442 pattern = [False]*(2**i)+[True]*(2**i)
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
443 experiments[var] = pattern*(2**(nIndependentVariables-i-1))
670
f72ed51c6b65 corrected other missing imports
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 669
diff changeset
444 experiments = DataFrame(experiments)
667
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
445 experiments['r2adj'] = 0.
670
f72ed51c6b65 corrected other missing imports
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 669
diff changeset
446 experiments['condNum'] = nan
667
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
447 experiments['shapiroP'] = -1
668
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
448 experiments['nobs'] = -1
667
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
449 return experiments
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
450
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
451 def findBestModel(data, dependentVariable, independentVariables, regressionType = 'ols', nProcesses = 1):
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
452 '''Generates all possible model with the independentVariables
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
453 and runs them, saving the results in experiments
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
454 with multiprocess option'''
670
f72ed51c6b65 corrected other missing imports
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 669
diff changeset
455 from pandas import concat
671
849f5f8bf4b9 corrected other missing imports
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 670
diff changeset
456 from multiprocessing import Pool
667
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
457 experiments = generateExperiments(independentVariables)
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
458 nModels = len(experiments)
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
459 print("Running {} models with {} processes".format(nModels, nProcesses))
674
01b89182891a corrected bug for intersection of lines (thanks to Paul for finding)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 672
diff changeset
460 print("IndependentVariables: {}".format(independentVariables))
667
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
461 if nProcesses == 1:
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
462 return runModels(experiments, data, dependentVariable, independentVariables, regressionType)
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
463 else:
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
464 pool = Pool(processes = nProcesses)
670
f72ed51c6b65 corrected other missing imports
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 669
diff changeset
465 chunkSize = int(ceil(nModels/nProcesses))
667
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
466 jobs = [pool.apply_async(runModels, args = (experiments[i*chunkSize:(i+1)*chunkSize], data, dependentVariable, independentVariables, regressionType)) for i in range(nProcesses)]
670
f72ed51c6b65 corrected other missing imports
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 669
diff changeset
467 return concat([job.get() for job in jobs])
667
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
468
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
469 def findBestModelFwd(data, dependentVariable, independentVariables, modelFunc, experiments = None):
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
470 '''Forward search for best model (based on adjusted R2)
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
471 Randomly starting with one variable and adding randomly variables
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
472 if they improve the model
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
473
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
474 The results are added to experiments if provided as argument
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
475 Storing in experiment relies on the index being the number equal
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
476 to the binary code derived from the independent variables'''
670
f72ed51c6b65 corrected other missing imports
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 669
diff changeset
477 from numpy.random import permutation as nppermutation
667
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
478 if experiments is None:
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
479 experiments = generateExperiments(independentVariables)
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
480 nIndependentVariables = len(independentVariables)
670
f72ed51c6b65 corrected other missing imports
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 669
diff changeset
481 permutation = nppermutation(range(nIndependentVariables)).tolist()
667
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
482 variableMapping = {j: independentVariables[i] for i,j in enumerate(permutation)}
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
483 print('Tested variables '+', '.join([variableMapping[i] for i in xrange(nIndependentVariables)]))
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
484 bestModel = [False]*nIndependentVariables
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
485 currentVarNum = 0
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
486 currentR2Adj = 0.
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
487 for currentVarNum in xrange(nIndependentVariables):
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
488 currentModel = [i for i in bestModel]
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
489 currentModel[currentVarNum] = True
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
490 rowIdx = sum([0]+[2**i for i in xrange(nIndependentVariables) if currentModel[permutation[i]]])
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
491 #print currentVarNum, sum(currentModel), ', '.join([independentVariables[i] for i in xrange(nIndependentVariables) if currentModel[permutation[i]]])
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
492 if experiments.loc[rowIdx, 'shapiroP'] < 0:
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
493 modelStr = modelString(experiments.loc[rowIdx], dependentVariable, independentVariables)
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
494 model = modelFunc(modelStr, data = data)
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
495 results = model.fit()
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
496 experiments.loc[rowIdx, 'r2adj'] = results.rsquared_adj
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
497 experiments.loc[rowIdx, 'condNum'] = results.condition_number
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
498 experiments.loc[rowIdx, 'shapiroP'] = shapiro(results.resid)[1]
668
f8dcf483b296 code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 667
diff changeset
499 experiments.loc[rowIdx, 'nobs'] = int(results.nobs)
667
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
500 if currentR2Adj < experiments.loc[rowIdx, 'r2adj']:
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
501 currentR2Adj = experiments.loc[rowIdx, 'r2adj']
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
502 bestModel[currentVarNum] = True
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
503 return experiments
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
504
676
58b9ac2f262f fine tuning
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 675
diff changeset
505 def displayModelResults(results, model = None, plotFigures = True, filenamePrefix = None, figureFileType = 'pdf'):
667
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
506 import statsmodels.api as sm
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
507 '''Displays some model results'''
676
58b9ac2f262f fine tuning
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 675
diff changeset
508 print(results.summary())
667
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
509 print('Shapiro-Wilk normality test for residuals: {}'.format(shapiro(results.resid)))
676
58b9ac2f262f fine tuning
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 675
diff changeset
510 if plotFigures:
58b9ac2f262f fine tuning
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 675
diff changeset
511 if model is not None:
58b9ac2f262f fine tuning
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 675
diff changeset
512 plt.figure()
58b9ac2f262f fine tuning
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 675
diff changeset
513 plt.plot(results.predict(), model.endog, 'x')
58b9ac2f262f fine tuning
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 675
diff changeset
514 x=plt.xlim()
58b9ac2f262f fine tuning
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 675
diff changeset
515 y=plt.ylim()
58b9ac2f262f fine tuning
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 675
diff changeset
516 plt.plot([max(x[0], y[0]), min(x[1], y[1])], [max(x[0], y[0]), min(x[1], y[1])], 'r')
58b9ac2f262f fine tuning
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 675
diff changeset
517 plt.title('true vs predicted')
58b9ac2f262f fine tuning
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 675
diff changeset
518 if filenamePrefix is not None:
58b9ac2f262f fine tuning
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 675
diff changeset
519 plt.savefig(filenamePrefix+'-true-predicted.'+figureFileType)
667
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
520 plt.figure()
676
58b9ac2f262f fine tuning
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 675
diff changeset
521 plt.plot(results.predict(), results.resid, 'x')
58b9ac2f262f fine tuning
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 675
diff changeset
522 if filenamePrefix is not None:
58b9ac2f262f fine tuning
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 675
diff changeset
523 plt.savefig(filenamePrefix+'-residuals.'+figureFileType)
58b9ac2f262f fine tuning
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 675
diff changeset
524 plt.title('residuals vs predicted')
58b9ac2f262f fine tuning
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 675
diff changeset
525 sm.qqplot(results.resid, fit = True, line = '45')
58b9ac2f262f fine tuning
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 675
diff changeset
526 if filenamePrefix is not None:
58b9ac2f262f fine tuning
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 675
diff changeset
527 plt.savefig(filenamePrefix+'-qq.'+figureFileType)
667
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
528
179b81faa1f8 added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 665
diff changeset
529
27
44689029a86f updated segmentIntersection and other
Nicolas Saunier <nico@confins.net>
parents: 24
diff changeset
530 #########################
455
abe0b2347d4c added most common utility function
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 434
diff changeset
531 # iterable section
abe0b2347d4c added most common utility function
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 434
diff changeset
532 #########################
abe0b2347d4c added most common utility function
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 434
diff changeset
533
abe0b2347d4c added most common utility function
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 434
diff changeset
534 def mostCommon(L):
456
825e5d49325d slight update
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 455
diff changeset
535 '''Returns the most frequent element in a iterable
825e5d49325d slight update
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 455
diff changeset
536
825e5d49325d slight update
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 455
diff changeset
537 taken from http://stackoverflow.com/questions/1518522/python-most-common-element-in-a-list'''
455
abe0b2347d4c added most common utility function
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 434
diff changeset
538 from itertools import groupby
abe0b2347d4c added most common utility function
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 434
diff changeset
539 from operator import itemgetter
abe0b2347d4c added most common utility function
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 434
diff changeset
540 # get an iterable of (item, iterable) pairs
abe0b2347d4c added most common utility function
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 434
diff changeset
541 SL = sorted((x, i) for i, x in enumerate(L))
abe0b2347d4c added most common utility function
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 434
diff changeset
542 # print 'SL:', SL
abe0b2347d4c added most common utility function
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 434
diff changeset
543 groups = groupby(SL, key=itemgetter(0))
abe0b2347d4c added most common utility function
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 434
diff changeset
544 # auxiliary function to get "quality" for an item
abe0b2347d4c added most common utility function
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 434
diff changeset
545 def _auxfun(g):
abe0b2347d4c added most common utility function
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 434
diff changeset
546 item, iterable = g
abe0b2347d4c added most common utility function
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 434
diff changeset
547 count = 0
abe0b2347d4c added most common utility function
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 434
diff changeset
548 min_index = len(L)
abe0b2347d4c added most common utility function
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 434
diff changeset
549 for _, where in iterable:
abe0b2347d4c added most common utility function
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 434
diff changeset
550 count += 1
abe0b2347d4c added most common utility function
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 434
diff changeset
551 min_index = min(min_index, where)
abe0b2347d4c added most common utility function
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 434
diff changeset
552 # print 'item %r, count %r, minind %r' % (item, count, min_index)
abe0b2347d4c added most common utility function
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 434
diff changeset
553 return count, -min_index
abe0b2347d4c added most common utility function
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 434
diff changeset
554 # pick the highest-count/earliest item
abe0b2347d4c added most common utility function
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 434
diff changeset
555 return max(groups, key=_auxfun)[0]
abe0b2347d4c added most common utility function
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 434
diff changeset
556
abe0b2347d4c added most common utility function
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 434
diff changeset
557 #########################
370
97e8fa0ee9a1 work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 369
diff changeset
558 # sequence section
97e8fa0ee9a1 work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 369
diff changeset
559 #########################
97e8fa0ee9a1 work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 369
diff changeset
560
665
15e244d2a1b5 corrected bug with circular import for VideoFilenameAddable, moved to base module
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 659
diff changeset
561 class LCSS(object):
370
97e8fa0ee9a1 work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 369
diff changeset
562 '''Class that keeps the LCSS parameters
97e8fa0ee9a1 work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 369
diff changeset
563 and puts together the various computations'''
97e8fa0ee9a1 work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 369
diff changeset
564 def __init__(self, similarityFunc, delta = float('inf'), aligned = False, lengthFunc = min):
97e8fa0ee9a1 work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 369
diff changeset
565 self.similarityFunc = similarityFunc
97e8fa0ee9a1 work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 369
diff changeset
566 self.aligned = aligned
97e8fa0ee9a1 work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 369
diff changeset
567 self.delta = delta
97e8fa0ee9a1 work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 369
diff changeset
568 self.lengthFunc = lengthFunc
389
6d26dcc7bba0 modifications to compute alignment for None indicators
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 381
diff changeset
569 self.subSequenceIndices = [(0,0)]
370
97e8fa0ee9a1 work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 369
diff changeset
570
373
d0b86ed50f32 work in progress on LCSS
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 372
diff changeset
571 def similarities(self, l1, l2, jshift=0):
370
97e8fa0ee9a1 work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 369
diff changeset
572 from numpy import zeros, int as npint
97e8fa0ee9a1 work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 369
diff changeset
573 n1 = len(l1)
97e8fa0ee9a1 work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 369
diff changeset
574 n2 = len(l2)
97e8fa0ee9a1 work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 369
diff changeset
575 self.similarityTable = zeros((n1+1,n2+1), dtype = npint)
97e8fa0ee9a1 work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 369
diff changeset
576 for i in xrange(1,n1+1):
374
a7af3519687e finished implementation of aligned LCSS with matching sequence decoded
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 373
diff changeset
577 for j in xrange(max(1,i-jshift-self.delta),min(n2,i-jshift+self.delta)+1):
370
97e8fa0ee9a1 work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 369
diff changeset
578 if self.similarityFunc(l1[i-1], l2[j-1]):
97e8fa0ee9a1 work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 369
diff changeset
579 self.similarityTable[i,j] = self.similarityTable[i-1,j-1]+1
97e8fa0ee9a1 work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 369
diff changeset
580 else:
97e8fa0ee9a1 work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 369
diff changeset
581 self.similarityTable[i,j] = max(self.similarityTable[i-1,j], self.similarityTable[i,j-1])
97e8fa0ee9a1 work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 369
diff changeset
582
97e8fa0ee9a1 work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 369
diff changeset
583 def subSequence(self, i, j):
97e8fa0ee9a1 work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 369
diff changeset
584 '''Returns the subsequence of two sequences
97e8fa0ee9a1 work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 369
diff changeset
585 http://en.wikipedia.org/wiki/Longest_common_subsequence_problem'''
97e8fa0ee9a1 work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 369
diff changeset
586 if i == 0 or j == 0:
97e8fa0ee9a1 work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 369
diff changeset
587 return []
97e8fa0ee9a1 work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 369
diff changeset
588 elif self.similarityTable[i][j] == self.similarityTable[i][j-1]:
97e8fa0ee9a1 work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 369
diff changeset
589 return self.subSequence(i, j-1)
97e8fa0ee9a1 work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 369
diff changeset
590 elif self.similarityTable[i][j] == self.similarityTable[i-1][j]:
97e8fa0ee9a1 work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 369
diff changeset
591 return self.subSequence(i-1, j)
97e8fa0ee9a1 work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 369
diff changeset
592 else:
97e8fa0ee9a1 work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 369
diff changeset
593 return self.subSequence(i-1, j-1) + [(i-1,j-1)]
97e8fa0ee9a1 work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 369
diff changeset
594
373
d0b86ed50f32 work in progress on LCSS
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 372
diff changeset
595 def _compute(self, _l1, _l2, computeSubSequence = False):
370
97e8fa0ee9a1 work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 369
diff changeset
596 '''returns the longest common subsequence similarity
97e8fa0ee9a1 work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 369
diff changeset
597 based on the threshold on distance between two elements of lists l1, l2
97e8fa0ee9a1 work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 369
diff changeset
598 similarityFunc returns True or False whether the two points are considered similar
97e8fa0ee9a1 work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 369
diff changeset
599
607
84690dfe5560 add some functions for behaviour analysis
MohamedGomaa
parents: 574
diff changeset
600 if aligned, returns the best matching if using a finite delta by shifting the series alignments
373
d0b86ed50f32 work in progress on LCSS
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 372
diff changeset
601
370
97e8fa0ee9a1 work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 369
diff changeset
602 eg distance(p1, p2) < epsilon
97e8fa0ee9a1 work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 369
diff changeset
603 '''
372
349eb1e09f45 Cleaned the methods/functions indicating if a point is in a polygon
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 371
diff changeset
604 if len(_l2) < len(_l1): # l1 is the shortest
349eb1e09f45 Cleaned the methods/functions indicating if a point is in a polygon
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 371
diff changeset
605 l1 = _l2
349eb1e09f45 Cleaned the methods/functions indicating if a point is in a polygon
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 371
diff changeset
606 l2 = _l1
349eb1e09f45 Cleaned the methods/functions indicating if a point is in a polygon
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 371
diff changeset
607 revertIndices = True
349eb1e09f45 Cleaned the methods/functions indicating if a point is in a polygon
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 371
diff changeset
608 else:
349eb1e09f45 Cleaned the methods/functions indicating if a point is in a polygon
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 371
diff changeset
609 l1 = _l1
349eb1e09f45 Cleaned the methods/functions indicating if a point is in a polygon
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 371
diff changeset
610 l2 = _l2
349eb1e09f45 Cleaned the methods/functions indicating if a point is in a polygon
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 371
diff changeset
611 revertIndices = False
349eb1e09f45 Cleaned the methods/functions indicating if a point is in a polygon
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 371
diff changeset
612 n1 = len(l1)
349eb1e09f45 Cleaned the methods/functions indicating if a point is in a polygon
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 371
diff changeset
613 n2 = len(l2)
349eb1e09f45 Cleaned the methods/functions indicating if a point is in a polygon
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 371
diff changeset
614
370
97e8fa0ee9a1 work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 369
diff changeset
615 if self.aligned:
97e8fa0ee9a1 work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 369
diff changeset
616 lcssValues = {}
97e8fa0ee9a1 work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 369
diff changeset
617 similarityTables = {}
374
a7af3519687e finished implementation of aligned LCSS with matching sequence decoded
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 373
diff changeset
618 for i in xrange(-n2-self.delta+1, n1+self.delta): # interval such that [i-shift-delta, i-shift+delta] is never empty, which happens when i-shift+delta < 1 or when i-shift-delta > n2
373
d0b86ed50f32 work in progress on LCSS
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 372
diff changeset
619 self.similarities(l1, l2, i)
d0b86ed50f32 work in progress on LCSS
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 372
diff changeset
620 lcssValues[i] = self.similarityTable.max()
370
97e8fa0ee9a1 work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 369
diff changeset
621 similarityTables[i] = self.similarityTable
374
a7af3519687e finished implementation of aligned LCSS with matching sequence decoded
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 373
diff changeset
622 #print self.similarityTable
521
3707eeb20f25 changed argMaxDict name to argmaxDict
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 518
diff changeset
623 alignmentShift = argmaxDict(lcssValues) # ideally get the medium alignment shift, the one that minimizes distance
389
6d26dcc7bba0 modifications to compute alignment for None indicators
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 381
diff changeset
624 self.similarityTable = similarityTables[alignmentShift]
372
349eb1e09f45 Cleaned the methods/functions indicating if a point is in a polygon
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 371
diff changeset
625 else:
389
6d26dcc7bba0 modifications to compute alignment for None indicators
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 381
diff changeset
626 alignmentShift = 0
372
349eb1e09f45 Cleaned the methods/functions indicating if a point is in a polygon
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 371
diff changeset
627 self.similarities(l1, l2)
373
d0b86ed50f32 work in progress on LCSS
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 372
diff changeset
628
374
a7af3519687e finished implementation of aligned LCSS with matching sequence decoded
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 373
diff changeset
629 # threshold values for the useful part of the similarity table are n2-n1-delta and n1-n2-delta
389
6d26dcc7bba0 modifications to compute alignment for None indicators
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 381
diff changeset
630 self.similarityTable = self.similarityTable[:min(n1, n2+alignmentShift+self.delta)+1, :min(n2, n1-alignmentShift+self.delta)+1]
373
d0b86ed50f32 work in progress on LCSS
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 372
diff changeset
631
372
349eb1e09f45 Cleaned the methods/functions indicating if a point is in a polygon
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 371
diff changeset
632 if computeSubSequence:
349eb1e09f45 Cleaned the methods/functions indicating if a point is in a polygon
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 371
diff changeset
633 self.subSequenceIndices = self.subSequence(self.similarityTable.shape[0]-1, self.similarityTable.shape[1]-1)
371
924e38c9f70e work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 370
diff changeset
634 if revertIndices:
374
a7af3519687e finished implementation of aligned LCSS with matching sequence decoded
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 373
diff changeset
635 self.subSequenceIndices = [(j,i) for i,j in self.subSequenceIndices]
372
349eb1e09f45 Cleaned the methods/functions indicating if a point is in a polygon
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 371
diff changeset
636 return self.similarityTable[-1,-1]
370
97e8fa0ee9a1 work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 369
diff changeset
637
97e8fa0ee9a1 work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 369
diff changeset
638 def compute(self, l1, l2, computeSubSequence = False):
97e8fa0ee9a1 work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 369
diff changeset
639 '''get methods are to be shadowed in child classes '''
97e8fa0ee9a1 work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 369
diff changeset
640 return self._compute(l1, l2, computeSubSequence)
97e8fa0ee9a1 work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 369
diff changeset
641
375
2ea8584aa80a making indicator LCSS work
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 374
diff changeset
642 def computeAlignment(self):
370
97e8fa0ee9a1 work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 369
diff changeset
643 from numpy import mean
374
a7af3519687e finished implementation of aligned LCSS with matching sequence decoded
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 373
diff changeset
644 return mean([j-i for i,j in self.subSequenceIndices])
370
97e8fa0ee9a1 work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 369
diff changeset
645
376
2e6b8610bcaa work on indicator similarity
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 375
diff changeset
646 def _computeNormalized(self, l1, l2, computeSubSequence = False):
370
97e8fa0ee9a1 work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 369
diff changeset
647 ''' compute the normalized LCSS
97e8fa0ee9a1 work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 369
diff changeset
648 ie, the LCSS divided by the min or mean of the indicator lengths (using lengthFunc)
97e8fa0ee9a1 work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 369
diff changeset
649 lengthFunc = lambda x,y:float(x,y)/2'''
376
2e6b8610bcaa work on indicator similarity
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 375
diff changeset
650 return float(self._compute(l1, l2, computeSubSequence))/self.lengthFunc(len(l1), len(l2))
370
97e8fa0ee9a1 work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 369
diff changeset
651
376
2e6b8610bcaa work on indicator similarity
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 375
diff changeset
652 def computeNormalized(self, l1, l2, computeSubSequence = False):
2e6b8610bcaa work on indicator similarity
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 375
diff changeset
653 return self._computeNormalized(l1, l2, computeSubSequence)
370
97e8fa0ee9a1 work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 369
diff changeset
654
376
2e6b8610bcaa work on indicator similarity
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 375
diff changeset
655 def _computeDistance(self, l1, l2, computeSubSequence = False):
370
97e8fa0ee9a1 work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 369
diff changeset
656 ''' compute the LCSS distance'''
376
2e6b8610bcaa work on indicator similarity
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 375
diff changeset
657 return 1-self._computeNormalized(l1, l2, computeSubSequence)
370
97e8fa0ee9a1 work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 369
diff changeset
658
376
2e6b8610bcaa work on indicator similarity
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 375
diff changeset
659 def computeDistance(self, l1, l2, computeSubSequence = False):
2e6b8610bcaa work on indicator similarity
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 375
diff changeset
660 return self._computeDistance(l1, l2, computeSubSequence)
370
97e8fa0ee9a1 work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 369
diff changeset
661
97e8fa0ee9a1 work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 369
diff changeset
662 #########################
45
74d2de078baf added colors, linestyles and markers to pick from
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 42
diff changeset
663 # plotting section
74d2de078baf added colors, linestyles and markers to pick from
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 42
diff changeset
664 #########################
74d2de078baf added colors, linestyles and markers to pick from
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 42
diff changeset
665
332
a6ca86107f27 reorganized utils module
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 324
diff changeset
666 def plotPolygon(poly, options = ''):
a6ca86107f27 reorganized utils module
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 324
diff changeset
667 'Plots shapely polygon poly'
a6ca86107f27 reorganized utils module
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 324
diff changeset
668 from numpy.core.multiarray import array
a6ca86107f27 reorganized utils module
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 324
diff changeset
669 from matplotlib.pyplot import plot
a6ca86107f27 reorganized utils module
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 324
diff changeset
670 from shapely.geometry import Polygon
a6ca86107f27 reorganized utils module
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 324
diff changeset
671
a6ca86107f27 reorganized utils module
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 324
diff changeset
672 tmp = array(poly.exterior)
a6ca86107f27 reorganized utils module
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 324
diff changeset
673 plot(tmp[:,0], tmp[:,1], options)
a6ca86107f27 reorganized utils module
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 324
diff changeset
674
324
99ca91a46007 minor change
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 323
diff changeset
675 def stepPlot(X, firstX, lastX, initialCount = 0, increment = 1):
99ca91a46007 minor change
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 323
diff changeset
676 '''for each value in X, increment by increment the initial count
297
f6f423e25c7f adding function to generate step plots (for cumulative number of vehicles)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 286
diff changeset
677 returns the lists that can be plotted
324
99ca91a46007 minor change
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 323
diff changeset
678 to obtain a step plot increasing by one for each value in x, from first to last value
99ca91a46007 minor change
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 323
diff changeset
679 firstX and lastX should be respectively smaller and larger than all elements in X'''
297
f6f423e25c7f adding function to generate step plots (for cumulative number of vehicles)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 286
diff changeset
680
f6f423e25c7f adding function to generate step plots (for cumulative number of vehicles)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 286
diff changeset
681 sortedX = []
f6f423e25c7f adding function to generate step plots (for cumulative number of vehicles)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 286
diff changeset
682 counts = [initialCount]
f6f423e25c7f adding function to generate step plots (for cumulative number of vehicles)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 286
diff changeset
683 for x in sorted(X):
f6f423e25c7f adding function to generate step plots (for cumulative number of vehicles)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 286
diff changeset
684 sortedX += [x,x]
f6f423e25c7f adding function to generate step plots (for cumulative number of vehicles)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 286
diff changeset
685 counts.append(counts[-1])
324
99ca91a46007 minor change
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 323
diff changeset
686 counts.append(counts[-1]+increment)
297
f6f423e25c7f adding function to generate step plots (for cumulative number of vehicles)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 286
diff changeset
687 counts.append(counts[-1])
f6f423e25c7f adding function to generate step plots (for cumulative number of vehicles)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 286
diff changeset
688 return [firstX]+sortedX+[lastX], counts
f6f423e25c7f adding function to generate step plots (for cumulative number of vehicles)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 286
diff changeset
689
665
15e244d2a1b5 corrected bug with circular import for VideoFilenameAddable, moved to base module
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 659
diff changeset
690 class PlottingPropertyValues(object):
45
74d2de078baf added colors, linestyles and markers to pick from
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 42
diff changeset
691 def __init__(self, values):
74d2de078baf added colors, linestyles and markers to pick from
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 42
diff changeset
692 self.values = values
74d2de078baf added colors, linestyles and markers to pick from
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 42
diff changeset
693
116
2bf5b76320c0 moved intersection plotting and added markers for scatter plots
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 115
diff changeset
694 def __getitem__(self, i):
45
74d2de078baf added colors, linestyles and markers to pick from
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 42
diff changeset
695 return self.values[i%len(self.values)]
74d2de078baf added colors, linestyles and markers to pick from
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 42
diff changeset
696
74d2de078baf added colors, linestyles and markers to pick from
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 42
diff changeset
697 markers = PlottingPropertyValues(['+', '*', ',', '.', 'x', 'D', 's', 'o'])
116
2bf5b76320c0 moved intersection plotting and added markers for scatter plots
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 115
diff changeset
698 scatterMarkers = PlottingPropertyValues(['s','o','^','>','v','<','d','p','h','8','+','x'])
45
74d2de078baf added colors, linestyles and markers to pick from
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 42
diff changeset
699
74d2de078baf added colors, linestyles and markers to pick from
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 42
diff changeset
700 linestyles = PlottingPropertyValues(['-', '--', '-.', ':'])
74d2de078baf added colors, linestyles and markers to pick from
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 42
diff changeset
701
74d2de078baf added colors, linestyles and markers to pick from
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 42
diff changeset
702 colors = PlottingPropertyValues('brgmyck') # 'w'
74d2de078baf added colors, linestyles and markers to pick from
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 42
diff changeset
703
115
550556378466 added functionalities to indicator maps
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 86
diff changeset
704 def plotIndicatorMap(indicatorMap, squareSize, masked = True, defaultValue=-1):
65
75cf537b8d88 moved and generalized map making functions to the library
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 48
diff changeset
705 from numpy import array, arange, ones, ma
75cf537b8d88 moved and generalized map making functions to the library
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 48
diff changeset
706 from matplotlib.pyplot import pcolor
75cf537b8d88 moved and generalized map making functions to the library
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 48
diff changeset
707 coords = array(indicatorMap.keys())
75cf537b8d88 moved and generalized map making functions to the library
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 48
diff changeset
708 minX = min(coords[:,0])
75cf537b8d88 moved and generalized map making functions to the library
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 48
diff changeset
709 minY = min(coords[:,1])
75cf537b8d88 moved and generalized map making functions to the library
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 48
diff changeset
710 X = arange(minX, max(coords[:,0])+1.1)*squareSize
75cf537b8d88 moved and generalized map making functions to the library
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 48
diff changeset
711 Y = arange(minY, max(coords[:,1])+1.1)*squareSize
115
550556378466 added functionalities to indicator maps
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 86
diff changeset
712 C = defaultValue*ones((len(Y), len(X)))
65
75cf537b8d88 moved and generalized map making functions to the library
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 48
diff changeset
713 for k,v in indicatorMap.iteritems():
75cf537b8d88 moved and generalized map making functions to the library
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 48
diff changeset
714 C[k[1]-minY,k[0]-minX] = v
115
550556378466 added functionalities to indicator maps
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 86
diff changeset
715 if masked:
550556378466 added functionalities to indicator maps
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 86
diff changeset
716 pcolor(X, Y, ma.masked_where(C==defaultValue,C))
550556378466 added functionalities to indicator maps
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 86
diff changeset
717 else:
550556378466 added functionalities to indicator maps
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 86
diff changeset
718 pcolor(X, Y, C)
65
75cf537b8d88 moved and generalized map making functions to the library
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 48
diff changeset
719
45
74d2de078baf added colors, linestyles and markers to pick from
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 42
diff changeset
720 #########################
637
c9a0b72979fd added function to get canadian public weather data
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 615
diff changeset
721 # Data download
c9a0b72979fd added function to get canadian public weather data
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 615
diff changeset
722 #########################
c9a0b72979fd added function to get canadian public weather data
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 615
diff changeset
723
c9a0b72979fd added function to get canadian public weather data
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 615
diff changeset
724 def downloadECWeather(stationID, years, months = [], outputDirectoryname = '.', english = True):
c9a0b72979fd added function to get canadian public weather data
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 615
diff changeset
725 '''Downloads monthly weather data from Environment Canada
c9a0b72979fd added function to get canadian public weather data
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 615
diff changeset
726 If month is provided (number 1 to 12), it means hourly data for the whole month
c9a0b72979fd added function to get canadian public weather data
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 615
diff changeset
727 Otherwise, means the data for each day, for the whole year
c9a0b72979fd added function to get canadian public weather data
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 615
diff changeset
728
c9a0b72979fd added function to get canadian public weather data
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 615
diff changeset
729 Example: MONTREAL MCTAVISH 10761
c9a0b72979fd added function to get canadian public weather data
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 615
diff changeset
730 MONTREALPIERRE ELLIOTT TRUDEAU INTL A 5415
c9a0b72979fd added function to get canadian public weather data
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 615
diff changeset
731
c9a0b72979fd added function to get canadian public weather data
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 615
diff changeset
732 To get daily data for 2010 and 2011, downloadECWeather(10761, [2010,2011], [], '/tmp')
c9a0b72979fd added function to get canadian public weather data
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 615
diff changeset
733 To get hourly data for 2009 and 2012, January, March and October, downloadECWeather(10761, [2009,2012], [1,3,10], '/tmp')'''
c9a0b72979fd added function to get canadian public weather data
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 615
diff changeset
734 import urllib2
c9a0b72979fd added function to get canadian public weather data
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 615
diff changeset
735 if english:
c9a0b72979fd added function to get canadian public weather data
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 615
diff changeset
736 language = 'e'
c9a0b72979fd added function to get canadian public weather data
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 615
diff changeset
737 else:
c9a0b72979fd added function to get canadian public weather data
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 615
diff changeset
738 language = 'f'
c9a0b72979fd added function to get canadian public weather data
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 615
diff changeset
739 if len(months) == 0:
c9a0b72979fd added function to get canadian public weather data
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 615
diff changeset
740 timeFrame = 2
c9a0b72979fd added function to get canadian public weather data
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 615
diff changeset
741 months = [1]
c9a0b72979fd added function to get canadian public weather data
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 615
diff changeset
742 else:
c9a0b72979fd added function to get canadian public weather data
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 615
diff changeset
743 timeFrame = 1
c9a0b72979fd added function to get canadian public weather data
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 615
diff changeset
744
c9a0b72979fd added function to get canadian public weather data
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 615
diff changeset
745 for year in years:
c9a0b72979fd added function to get canadian public weather data
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 615
diff changeset
746 for month in months:
c9a0b72979fd added function to get canadian public weather data
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 615
diff changeset
747 url = urllib2.urlopen('http://climat.meteo.gc.ca/climateData/bulkdata_{}.html?format=csv&stationID={}&Year={}&Month={}&Day=1&timeframe={}&submit=++T%C3%A9l%C3%A9charger+%0D%0Ades+donn%C3%A9es'.format(language, stationID, year, month, timeFrame))
c9a0b72979fd added function to get canadian public weather data
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 615
diff changeset
748 data = url.read()
c9a0b72979fd added function to get canadian public weather data
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 615
diff changeset
749 outFilename = '{}/{}-{}'.format(outputDirectoryname, stationID, year)
c9a0b72979fd added function to get canadian public weather data
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 615
diff changeset
750 if timeFrame == 1:
c9a0b72979fd added function to get canadian public weather data
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 615
diff changeset
751 outFilename += '-{}-hourly'.format(month)
c9a0b72979fd added function to get canadian public weather data
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 615
diff changeset
752 else:
c9a0b72979fd added function to get canadian public weather data
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 615
diff changeset
753 outFilename += '-daily'
c9a0b72979fd added function to get canadian public weather data
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 615
diff changeset
754 outFilename += '.csv'
c9a0b72979fd added function to get canadian public weather data
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 615
diff changeset
755 out = open(outFilename, 'w')
c9a0b72979fd added function to get canadian public weather data
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 615
diff changeset
756 out.write(data)
c9a0b72979fd added function to get canadian public weather data
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 615
diff changeset
757 out.close()
c9a0b72979fd added function to get canadian public weather data
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 615
diff changeset
758
c9a0b72979fd added function to get canadian public weather data
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 615
diff changeset
759 #########################
c9a0b72979fd added function to get canadian public weather data
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 615
diff changeset
760 # File I/O
27
44689029a86f updated segmentIntersection and other
Nicolas Saunier <nico@confins.net>
parents: 24
diff changeset
761 #########################
24
6fb59cfb201e first version of segmentIntersection
Nicolas Saunier <nico@confins.net>
parents: 19
diff changeset
762
0
aed8eb63cdde initial commit with non-functional python code for NGSIM
Nicolas Saunier <nico@confins.net>
parents:
diff changeset
763 def removeExtension(filename, delimiter = '.'):
31
c000f37c316d moved tests to independent file, added chi2 computation
Nicolas Saunier <nico@confins.net>
parents: 29
diff changeset
764 '''Returns the filename minus the extension (all characters after last .)'''
0
aed8eb63cdde initial commit with non-functional python code for NGSIM
Nicolas Saunier <nico@confins.net>
parents:
diff changeset
765 i = filename.rfind(delimiter)
aed8eb63cdde initial commit with non-functional python code for NGSIM
Nicolas Saunier <nico@confins.net>
parents:
diff changeset
766 if i>0:
aed8eb63cdde initial commit with non-functional python code for NGSIM
Nicolas Saunier <nico@confins.net>
parents:
diff changeset
767 return filename[:i]
aed8eb63cdde initial commit with non-functional python code for NGSIM
Nicolas Saunier <nico@confins.net>
parents:
diff changeset
768 else:
aed8eb63cdde initial commit with non-functional python code for NGSIM
Nicolas Saunier <nico@confins.net>
parents:
diff changeset
769 return filename
aed8eb63cdde initial commit with non-functional python code for NGSIM
Nicolas Saunier <nico@confins.net>
parents:
diff changeset
770
46
b5d007612e16 added filename util
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 45
diff changeset
771 def cleanFilename(s):
b5d007612e16 added filename util
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 45
diff changeset
772 'cleans filenames obtained when contatenating figure characteristics'
266
aba9711b3149 small modificatons and reorganization
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 262
diff changeset
773 return s.replace(' ','-').replace('.','').replace('/','-')
46
b5d007612e16 added filename util
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 45
diff changeset
774
0
aed8eb63cdde initial commit with non-functional python code for NGSIM
Nicolas Saunier <nico@confins.net>
parents:
diff changeset
775 def listfiles(dirname, extension, remove = False):
14
e7bbe8465591 homography and other utils
Nicolas Saunier <nico@confins.net>
parents: 7
diff changeset
776 '''Returns the list of files with the extension in the directory dirname
e7bbe8465591 homography and other utils
Nicolas Saunier <nico@confins.net>
parents: 7
diff changeset
777 If remove is True, the filenames are stripped from the extension'''
0
aed8eb63cdde initial commit with non-functional python code for NGSIM
Nicolas Saunier <nico@confins.net>
parents:
diff changeset
778 from os import listdir
aed8eb63cdde initial commit with non-functional python code for NGSIM
Nicolas Saunier <nico@confins.net>
parents:
diff changeset
779 tmp = [f for f in listdir(dirname) if f.endswith(extension)]
aed8eb63cdde initial commit with non-functional python code for NGSIM
Nicolas Saunier <nico@confins.net>
parents:
diff changeset
780 tmp.sort()
aed8eb63cdde initial commit with non-functional python code for NGSIM
Nicolas Saunier <nico@confins.net>
parents:
diff changeset
781 if remove:
aed8eb63cdde initial commit with non-functional python code for NGSIM
Nicolas Saunier <nico@confins.net>
parents:
diff changeset
782 return [removeExtension(f, extension) for f in tmp]
aed8eb63cdde initial commit with non-functional python code for NGSIM
Nicolas Saunier <nico@confins.net>
parents:
diff changeset
783 else:
aed8eb63cdde initial commit with non-functional python code for NGSIM
Nicolas Saunier <nico@confins.net>
parents:
diff changeset
784 return tmp
aed8eb63cdde initial commit with non-functional python code for NGSIM
Nicolas Saunier <nico@confins.net>
parents:
diff changeset
785
266
aba9711b3149 small modificatons and reorganization
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 262
diff changeset
786 def mkdir(dirname):
aba9711b3149 small modificatons and reorganization
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 262
diff changeset
787 'Creates a directory if it does not exist'
aba9711b3149 small modificatons and reorganization
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 262
diff changeset
788 import os
aba9711b3149 small modificatons and reorganization
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 262
diff changeset
789 if not os.path.exists(dirname):
aba9711b3149 small modificatons and reorganization
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 262
diff changeset
790 os.mkdir(dirname)
aba9711b3149 small modificatons and reorganization
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 262
diff changeset
791 else:
aba9711b3149 small modificatons and reorganization
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 262
diff changeset
792 print(dirname+' already exists')
aba9711b3149 small modificatons and reorganization
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 262
diff changeset
793
14
e7bbe8465591 homography and other utils
Nicolas Saunier <nico@confins.net>
parents: 7
diff changeset
794 def removeFile(filename):
e7bbe8465591 homography and other utils
Nicolas Saunier <nico@confins.net>
parents: 7
diff changeset
795 '''Deletes the file while avoiding raising an error
e7bbe8465591 homography and other utils
Nicolas Saunier <nico@confins.net>
parents: 7
diff changeset
796 if the file does not exist'''
266
aba9711b3149 small modificatons and reorganization
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 262
diff changeset
797 import os
14
e7bbe8465591 homography and other utils
Nicolas Saunier <nico@confins.net>
parents: 7
diff changeset
798 if (os.path.exists(filename)):
e7bbe8465591 homography and other utils
Nicolas Saunier <nico@confins.net>
parents: 7
diff changeset
799 os.remove(filename)
266
aba9711b3149 small modificatons and reorganization
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 262
diff changeset
800 else:
aba9711b3149 small modificatons and reorganization
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 262
diff changeset
801 print(filename+' does not exist')
14
e7bbe8465591 homography and other utils
Nicolas Saunier <nico@confins.net>
parents: 7
diff changeset
802
42
1a2ac2d4f53a added loading of the rest of the data for objects
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 35
diff changeset
803 def line2Floats(l, separator=' '):
1a2ac2d4f53a added loading of the rest of the data for objects
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 35
diff changeset
804 '''Returns the list of floats corresponding to the string'''
1a2ac2d4f53a added loading of the rest of the data for objects
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 35
diff changeset
805 return [float(x) for x in l.split(separator)]
1a2ac2d4f53a added loading of the rest of the data for objects
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 35
diff changeset
806
1a2ac2d4f53a added loading of the rest of the data for objects
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 35
diff changeset
807 def line2Ints(l, separator=' '):
1a2ac2d4f53a added loading of the rest of the data for objects
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 35
diff changeset
808 '''Returns the list of ints corresponding to the string'''
1a2ac2d4f53a added loading of the rest of the data for objects
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 35
diff changeset
809 return [int(x) for x in l.split(separator)]
31
c000f37c316d moved tests to independent file, added chi2 computation
Nicolas Saunier <nico@confins.net>
parents: 29
diff changeset
810
c000f37c316d moved tests to independent file, added chi2 computation
Nicolas Saunier <nico@confins.net>
parents: 29
diff changeset
811 #########################
332
a6ca86107f27 reorganized utils module
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 324
diff changeset
812 # CLI utils
a6ca86107f27 reorganized utils module
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 324
diff changeset
813 #########################
a6ca86107f27 reorganized utils module
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 324
diff changeset
814
a6ca86107f27 reorganized utils module
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 324
diff changeset
815 def parseCLIOptions(helpMessage, options, cliArgs, optionalOptions=[]):
a6ca86107f27 reorganized utils module
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 324
diff changeset
816 ''' Simple function to handle similar argument parsing
a6ca86107f27 reorganized utils module
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 324
diff changeset
817 Returns the dictionary of options and their values
a6ca86107f27 reorganized utils module
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 324
diff changeset
818
a6ca86107f27 reorganized utils module
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 324
diff changeset
819 * cliArgs are most likely directly sys.argv
a6ca86107f27 reorganized utils module
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 324
diff changeset
820 (only the elements after the first one are considered)
a6ca86107f27 reorganized utils module
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 324
diff changeset
821
a6ca86107f27 reorganized utils module
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 324
diff changeset
822 * options should be a list of strings for getopt options,
a6ca86107f27 reorganized utils module
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 324
diff changeset
823 eg ['frame=','correspondences=','video=']
a6ca86107f27 reorganized utils module
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 324
diff changeset
824 A value must be provided for each option, or the program quits'''
a6ca86107f27 reorganized utils module
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 324
diff changeset
825 import sys, getopt
a6ca86107f27 reorganized utils module
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 324
diff changeset
826 from numpy.core.fromnumeric import all
a6ca86107f27 reorganized utils module
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 324
diff changeset
827 optionValues, args = getopt.getopt(cliArgs[1:], 'h', ['help']+options+optionalOptions)
a6ca86107f27 reorganized utils module
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 324
diff changeset
828 optionValues = dict(optionValues)
a6ca86107f27 reorganized utils module
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 324
diff changeset
829
a6ca86107f27 reorganized utils module
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 324
diff changeset
830 if '--help' in optionValues.keys() or '-h' in optionValues.keys():
a6ca86107f27 reorganized utils module
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 324
diff changeset
831 print(helpMessage+
a6ca86107f27 reorganized utils module
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 324
diff changeset
832 '\n - Compulsory options: '+' '.join([opt.replace('=','') for opt in options])+
a6ca86107f27 reorganized utils module
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 324
diff changeset
833 '\n - Non-compulsory options: '+' '.join([opt.replace('=','') for opt in optionalOptions]))
a6ca86107f27 reorganized utils module
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 324
diff changeset
834 sys.exit()
a6ca86107f27 reorganized utils module
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 324
diff changeset
835
a6ca86107f27 reorganized utils module
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 324
diff changeset
836 missingArgument = [('--'+opt.replace('=','') in optionValues.keys()) for opt in options]
a6ca86107f27 reorganized utils module
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 324
diff changeset
837 if not all(missingArgument):
a6ca86107f27 reorganized utils module
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 324
diff changeset
838 print('Missing argument')
a6ca86107f27 reorganized utils module
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 324
diff changeset
839 print(optionValues)
a6ca86107f27 reorganized utils module
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 324
diff changeset
840 sys.exit()
a6ca86107f27 reorganized utils module
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 324
diff changeset
841
a6ca86107f27 reorganized utils module
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 324
diff changeset
842 return optionValues
a6ca86107f27 reorganized utils module
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 324
diff changeset
843
397
b36b00dd27c3 added function to read scene metadata
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 395
diff changeset
844
b36b00dd27c3 added function to read scene metadata
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 395
diff changeset
845 #########################
553
3622a5653ee9 added basic info and function to profile code
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 547
diff changeset
846 # Profiling
3622a5653ee9 added basic info and function to profile code
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 547
diff changeset
847 #########################
3622a5653ee9 added basic info and function to profile code
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 547
diff changeset
848
3622a5653ee9 added basic info and function to profile code
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 547
diff changeset
849 def analyzeProfile(profileFilename, stripDirs = True):
3622a5653ee9 added basic info and function to profile code
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 547
diff changeset
850 '''Analyze the file produced by cProfile
3622a5653ee9 added basic info and function to profile code
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 547
diff changeset
851
3622a5653ee9 added basic info and function to profile code
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 547
diff changeset
852 obtained by for example:
3622a5653ee9 added basic info and function to profile code
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 547
diff changeset
853 - call in script (for main() function in script)
3622a5653ee9 added basic info and function to profile code
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 547
diff changeset
854 import cProfile, os
3622a5653ee9 added basic info and function to profile code
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 547
diff changeset
855 cProfile.run('main()', os.path.join(os.getcwd(),'main.profile'))
3622a5653ee9 added basic info and function to profile code
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 547
diff changeset
856
3622a5653ee9 added basic info and function to profile code
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 547
diff changeset
857 - or on the command line:
3622a5653ee9 added basic info and function to profile code
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 547
diff changeset
858 python -m cProfile [-o profile.bin] [-s sort] scriptfile [arg]'''
3622a5653ee9 added basic info and function to profile code
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 547
diff changeset
859 import pstats, os
3622a5653ee9 added basic info and function to profile code
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 547
diff changeset
860 p = pstats.Stats(os.path.join(os.pardir, profileFilename))
3622a5653ee9 added basic info and function to profile code
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 547
diff changeset
861 if stripDirs:
3622a5653ee9 added basic info and function to profile code
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 547
diff changeset
862 p.strip_dirs()
3622a5653ee9 added basic info and function to profile code
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 547
diff changeset
863 p.sort_stats('time')
3622a5653ee9 added basic info and function to profile code
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 547
diff changeset
864 p.print_stats(.2)
3622a5653ee9 added basic info and function to profile code
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 547
diff changeset
865 #p.sort_stats('time')
3622a5653ee9 added basic info and function to profile code
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 547
diff changeset
866 # p.print_callees(.1, 'int_prediction.py:')
3622a5653ee9 added basic info and function to profile code
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 547
diff changeset
867 return p
3622a5653ee9 added basic info and function to profile code
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 547
diff changeset
868
3622a5653ee9 added basic info and function to profile code
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 547
diff changeset
869 #########################
31
c000f37c316d moved tests to independent file, added chi2 computation
Nicolas Saunier <nico@confins.net>
parents: 29
diff changeset
870 # running tests
c000f37c316d moved tests to independent file, added chi2 computation
Nicolas Saunier <nico@confins.net>
parents: 29
diff changeset
871 #########################
c000f37c316d moved tests to independent file, added chi2 computation
Nicolas Saunier <nico@confins.net>
parents: 29
diff changeset
872
0
aed8eb63cdde initial commit with non-functional python code for NGSIM
Nicolas Saunier <nico@confins.net>
parents:
diff changeset
873 if __name__ == "__main__":
aed8eb63cdde initial commit with non-functional python code for NGSIM
Nicolas Saunier <nico@confins.net>
parents:
diff changeset
874 import doctest
2
de5642925615 started implementation of TimeInterval and Spatio-temporal object
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 0
diff changeset
875 import unittest
31
c000f37c316d moved tests to independent file, added chi2 computation
Nicolas Saunier <nico@confins.net>
parents: 29
diff changeset
876 suite = doctest.DocFileSuite('tests/utils.txt')
c000f37c316d moved tests to independent file, added chi2 computation
Nicolas Saunier <nico@confins.net>
parents: 29
diff changeset
877 #suite = doctest.DocTestSuite()
2
de5642925615 started implementation of TimeInterval and Spatio-temporal object
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 0
diff changeset
878 unittest.TextTestRunner().run(suite)
de5642925615 started implementation of TimeInterval and Spatio-temporal object
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents: 0
diff changeset
879 #doctest.testmod()
0
aed8eb63cdde initial commit with non-functional python code for NGSIM
Nicolas Saunier <nico@confins.net>
parents:
diff changeset
880 #doctest.testfile("example.txt")