Mercurial Hosting > traffic-intelligence
annotate trafficintelligence/utils.py @ 1217:5038c357b57f
updating code for direct computation (very slow solver)
author | Nicolas Saunier <nicolas.saunier@polymtl.ca> |
---|---|
date | Tue, 16 May 2023 22:12:39 -0400 |
parents | 4356065ed3ca |
children | 69b531c7a061 |
rev | line source |
---|---|
0
aed8eb63cdde
initial commit with non-functional python code for NGSIM
Nicolas Saunier <nico@confins.net>
parents:
diff
changeset
|
1 #! /usr/bin/env python |
aed8eb63cdde
initial commit with non-functional python code for NGSIM
Nicolas Saunier <nico@confins.net>
parents:
diff
changeset
|
2 ''' Generic utilities.''' |
aed8eb63cdde
initial commit with non-functional python code for NGSIM
Nicolas Saunier <nico@confins.net>
parents:
diff
changeset
|
3 |
397
b36b00dd27c3
added function to read scene metadata
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
395
diff
changeset
|
4 from datetime import time, datetime |
971
9897a13772fb
added utils to load video sequence in metadata
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
969
diff
changeset
|
5 from argparse import ArgumentTypeError |
1021
16932cefabc1
work on paths in line with new configurations from tracker
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
997
diff
changeset
|
6 from pathlib import Path |
670
f72ed51c6b65
corrected other missing imports
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
669
diff
changeset
|
7 from math import sqrt, ceil, floor |
1029
c6cf75a2ed08
reorganization of imports
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1028
diff
changeset
|
8 from copy import deepcopy, copy |
1034
4069d8545922
updated mostCommong function
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1031
diff
changeset
|
9 from collections import Counter |
1029
c6cf75a2ed08
reorganization of imports
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1028
diff
changeset
|
10 |
1156
f7fbe624fff7
added helper functions for categorical variables
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1135
diff
changeset
|
11 from scipy.stats import rv_continuous, kruskal, shapiro, lognorm, norm, t, chi2_contingency |
689
9990ef119bce
added version of LCSS with cdist computations
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
687
diff
changeset
|
12 from scipy.spatial import distance |
840
15a82ebc62c4
utils for sparse matrix
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
837
diff
changeset
|
13 from scipy.sparse import dok_matrix |
1168
d71a4d174b1a
corrected potential bug with dtype in image to world projection
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1156
diff
changeset
|
14 from numpy import zeros, array, exp, sum as npsum, int64 as npint, arange, cumsum, mean, median, percentile, isnan, ones, convolve, dtype, isnan, NaN, ma, isinf, savez, load as npload, log, polyfit |
1124 | 15 from numpy.random import random_sample, permutation as nppermutation |
1156
f7fbe624fff7
added helper functions for categorical variables
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1135
diff
changeset
|
16 from pandas import DataFrame, concat, crosstab |
1029
c6cf75a2ed08
reorganization of imports
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1028
diff
changeset
|
17 import matplotlib.pyplot as plt |
677
ae07c7b4cf87
update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
676
diff
changeset
|
18 |
421
4fce27946c60
first example of video metadata using sqlalchemy
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
405
diff
changeset
|
19 datetimeFormat = "%Y-%m-%d %H:%M:%S" |
4fce27946c60
first example of video metadata using sqlalchemy
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
405
diff
changeset
|
20 |
969
5d788d2e8ffc
work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
940
diff
changeset
|
21 sjcamDatetimeFormat = "%Y_%m%d_%H%M%S"#2017_0626_143720 |
5d788d2e8ffc
work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
940
diff
changeset
|
22 |
185
c06379f25ab8
utilities for user types
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
181
diff
changeset
|
23 ######################### |
1030
aafbc0bab925
moved method around to avoid cross-dependencies
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1029
diff
changeset
|
24 # txt files |
aafbc0bab925
moved method around to avoid cross-dependencies
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1029
diff
changeset
|
25 ######################### |
aafbc0bab925
moved method around to avoid cross-dependencies
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1029
diff
changeset
|
26 |
aafbc0bab925
moved method around to avoid cross-dependencies
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1029
diff
changeset
|
27 commentChar = '#' |
aafbc0bab925
moved method around to avoid cross-dependencies
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1029
diff
changeset
|
28 |
aafbc0bab925
moved method around to avoid cross-dependencies
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1029
diff
changeset
|
29 delimiterChar = '%'; |
aafbc0bab925
moved method around to avoid cross-dependencies
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1029
diff
changeset
|
30 |
aafbc0bab925
moved method around to avoid cross-dependencies
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1029
diff
changeset
|
31 def openCheck(filename, option = 'r', quitting = False): |
aafbc0bab925
moved method around to avoid cross-dependencies
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1029
diff
changeset
|
32 '''Open file filename in read mode by default |
aafbc0bab925
moved method around to avoid cross-dependencies
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1029
diff
changeset
|
33 and checks it is open''' |
aafbc0bab925
moved method around to avoid cross-dependencies
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1029
diff
changeset
|
34 try: |
aafbc0bab925
moved method around to avoid cross-dependencies
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1029
diff
changeset
|
35 return open(filename, option) |
aafbc0bab925
moved method around to avoid cross-dependencies
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1029
diff
changeset
|
36 except IOError: |
aafbc0bab925
moved method around to avoid cross-dependencies
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1029
diff
changeset
|
37 print('File {} could not be opened.'.format(filename)) |
aafbc0bab925
moved method around to avoid cross-dependencies
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1029
diff
changeset
|
38 if quitting: |
aafbc0bab925
moved method around to avoid cross-dependencies
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1029
diff
changeset
|
39 from sys import exit |
aafbc0bab925
moved method around to avoid cross-dependencies
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1029
diff
changeset
|
40 exit() |
aafbc0bab925
moved method around to avoid cross-dependencies
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1029
diff
changeset
|
41 return None |
aafbc0bab925
moved method around to avoid cross-dependencies
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1029
diff
changeset
|
42 |
aafbc0bab925
moved method around to avoid cross-dependencies
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1029
diff
changeset
|
43 def readline(f, commentCharacters = commentChar): |
aafbc0bab925
moved method around to avoid cross-dependencies
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1029
diff
changeset
|
44 '''Modified readline function to skip comments |
aafbc0bab925
moved method around to avoid cross-dependencies
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1029
diff
changeset
|
45 Can take a list of characters or a string (in will work in both)''' |
aafbc0bab925
moved method around to avoid cross-dependencies
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1029
diff
changeset
|
46 s = f.readline() |
aafbc0bab925
moved method around to avoid cross-dependencies
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1029
diff
changeset
|
47 while (len(s) > 0) and s[0] in commentCharacters: |
aafbc0bab925
moved method around to avoid cross-dependencies
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1029
diff
changeset
|
48 s = f.readline() |
aafbc0bab925
moved method around to avoid cross-dependencies
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1029
diff
changeset
|
49 return s.strip() |
aafbc0bab925
moved method around to avoid cross-dependencies
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1029
diff
changeset
|
50 |
aafbc0bab925
moved method around to avoid cross-dependencies
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1029
diff
changeset
|
51 def getLines(f, delimiterChar = delimiterChar, commentCharacters = commentChar): |
aafbc0bab925
moved method around to avoid cross-dependencies
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1029
diff
changeset
|
52 '''Gets a complete entry (all the lines) in between delimiterChar.''' |
aafbc0bab925
moved method around to avoid cross-dependencies
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1029
diff
changeset
|
53 dataStrings = [] |
aafbc0bab925
moved method around to avoid cross-dependencies
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1029
diff
changeset
|
54 s = readline(f, commentCharacters) |
aafbc0bab925
moved method around to avoid cross-dependencies
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1029
diff
changeset
|
55 while len(s) > 0 and s[0] != delimiterChar: |
aafbc0bab925
moved method around to avoid cross-dependencies
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1029
diff
changeset
|
56 dataStrings += [s.strip()] |
aafbc0bab925
moved method around to avoid cross-dependencies
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1029
diff
changeset
|
57 s = readline(f, commentCharacters) |
aafbc0bab925
moved method around to avoid cross-dependencies
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1029
diff
changeset
|
58 return dataStrings |
aafbc0bab925
moved method around to avoid cross-dependencies
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1029
diff
changeset
|
59 |
aafbc0bab925
moved method around to avoid cross-dependencies
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1029
diff
changeset
|
60 ######################### |
742
fe71639f1ee7
merge and added function to up-/lower-case strings
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
741
diff
changeset
|
61 # Strings |
fe71639f1ee7
merge and added function to up-/lower-case strings
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
741
diff
changeset
|
62 ######################### |
fe71639f1ee7
merge and added function to up-/lower-case strings
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
741
diff
changeset
|
63 |
fe71639f1ee7
merge and added function to up-/lower-case strings
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
741
diff
changeset
|
64 def upperCaseFirstLetter(s): |
fe71639f1ee7
merge and added function to up-/lower-case strings
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
741
diff
changeset
|
65 words = s.split(' ') |
fe71639f1ee7
merge and added function to up-/lower-case strings
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
741
diff
changeset
|
66 lowerWords = [w[0].upper()+w[1:].lower() for w in words] |
fe71639f1ee7
merge and added function to up-/lower-case strings
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
741
diff
changeset
|
67 return ' '.join(lowerWords) |
fe71639f1ee7
merge and added function to up-/lower-case strings
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
741
diff
changeset
|
68 |
971
9897a13772fb
added utils to load video sequence in metadata
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
969
diff
changeset
|
69 class TimeConverter: |
9897a13772fb
added utils to load video sequence in metadata
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
969
diff
changeset
|
70 def __init__(self, datetimeFormat = datetimeFormat): |
9897a13772fb
added utils to load video sequence in metadata
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
969
diff
changeset
|
71 self.datetimeFormat = datetimeFormat |
9897a13772fb
added utils to load video sequence in metadata
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
969
diff
changeset
|
72 |
9897a13772fb
added utils to load video sequence in metadata
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
969
diff
changeset
|
73 def convert(self, s): |
9897a13772fb
added utils to load video sequence in metadata
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
969
diff
changeset
|
74 try: |
9897a13772fb
added utils to load video sequence in metadata
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
969
diff
changeset
|
75 return datetime.strptime(s, self.datetimeFormat) |
9897a13772fb
added utils to load video sequence in metadata
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
969
diff
changeset
|
76 except ValueError: |
9897a13772fb
added utils to load video sequence in metadata
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
969
diff
changeset
|
77 msg = "Not a valid date: '{0}'.".format(s) |
9897a13772fb
added utils to load video sequence in metadata
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
969
diff
changeset
|
78 raise ArgumentTypeError(msg) |
9897a13772fb
added utils to load video sequence in metadata
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
969
diff
changeset
|
79 |
742
fe71639f1ee7
merge and added function to up-/lower-case strings
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
741
diff
changeset
|
80 ######################### |
185
c06379f25ab8
utilities for user types
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
181
diff
changeset
|
81 # Enumerations |
c06379f25ab8
utilities for user types
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
181
diff
changeset
|
82 ######################### |
c06379f25ab8
utilities for user types
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
181
diff
changeset
|
83 |
c06379f25ab8
utilities for user types
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
181
diff
changeset
|
84 def inverseEnumeration(l): |
c06379f25ab8
utilities for user types
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
181
diff
changeset
|
85 'Returns the dictionary that provides for each element in the input list its index in the input list' |
c06379f25ab8
utilities for user types
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
181
diff
changeset
|
86 result = {} |
c06379f25ab8
utilities for user types
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
181
diff
changeset
|
87 for i,x in enumerate(l): |
c06379f25ab8
utilities for user types
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
181
diff
changeset
|
88 result[x] = i |
c06379f25ab8
utilities for user types
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
181
diff
changeset
|
89 return result |
155
f03fe3d6d0c8
added functions to parse options
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
152
diff
changeset
|
90 |
1135
342701cdac30
bug fix for concatenate
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1124
diff
changeset
|
91 def findElement(l, num): |
342701cdac30
bug fix for concatenate
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1124
diff
changeset
|
92 i = 0 |
342701cdac30
bug fix for concatenate
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1124
diff
changeset
|
93 while l[i].getNum() != num: |
342701cdac30
bug fix for concatenate
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1124
diff
changeset
|
94 i += 1 |
342701cdac30
bug fix for concatenate
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1124
diff
changeset
|
95 if i < len(l): |
342701cdac30
bug fix for concatenate
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1124
diff
changeset
|
96 return l[i] |
342701cdac30
bug fix for concatenate
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1124
diff
changeset
|
97 else: |
342701cdac30
bug fix for concatenate
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1124
diff
changeset
|
98 return None |
342701cdac30
bug fix for concatenate
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1124
diff
changeset
|
99 |
155
f03fe3d6d0c8
added functions to parse options
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
152
diff
changeset
|
100 ######################### |
637
c9a0b72979fd
added function to get canadian public weather data
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
615
diff
changeset
|
101 # Simple statistics |
31
c000f37c316d
moved tests to independent file, added chi2 computation
Nicolas Saunier <nico@confins.net>
parents:
29
diff
changeset
|
102 ######################### |
c000f37c316d
moved tests to independent file, added chi2 computation
Nicolas Saunier <nico@confins.net>
parents:
29
diff
changeset
|
103 |
680
da1352b89d02
classification is working
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
677
diff
changeset
|
104 def logNormalMeanVar(loc, scale): |
687
de278c5e65f6
minor comments for lognormal parameters (numpy and usual names differ)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
686
diff
changeset
|
105 '''location and scale are respectively the mean and standard deviation of the normal in the log-normal distribution |
854
33d296984dd8
rework and more info on speed probabilities for classification
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
847
diff
changeset
|
106 https://en.wikipedia.org/wiki/Log-normal_distribution |
33d296984dd8
rework and more info on speed probabilities for classification
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
847
diff
changeset
|
107 |
33d296984dd8
rework and more info on speed probabilities for classification
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
847
diff
changeset
|
108 same as lognorm.stats(scale, 0, exp(loc))''' |
680
da1352b89d02
classification is working
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
677
diff
changeset
|
109 mean = exp(loc+(scale**2)/2) |
854
33d296984dd8
rework and more info on speed probabilities for classification
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
847
diff
changeset
|
110 var = (exp(scale**2)-1)*exp(2*loc+scale**2) |
680
da1352b89d02
classification is working
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
677
diff
changeset
|
111 return mean, var |
da1352b89d02
classification is working
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
677
diff
changeset
|
112 |
855
2277ab1a8141
added utility for lognorm estimation
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
854
diff
changeset
|
113 def fitLogNormal(x): |
2277ab1a8141
added utility for lognorm estimation
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
854
diff
changeset
|
114 'returns the fitted location and scale of the lognormal (general definition)' |
2277ab1a8141
added utility for lognorm estimation
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
854
diff
changeset
|
115 shape, loc, scale = lognorm.fit(x, floc=0.) |
2277ab1a8141
added utility for lognorm estimation
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
854
diff
changeset
|
116 return log(scale), shape |
2277ab1a8141
added utility for lognorm estimation
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
854
diff
changeset
|
117 |
859
a8de3c93f6b7
minor modifications to helper stat functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
856
diff
changeset
|
118 def sampleSize(stdev, tolerance, percentConfidence, nRoundingDigits = None, printLatex = False): |
a8de3c93f6b7
minor modifications to helper stat functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
856
diff
changeset
|
119 if nRoundingDigits is None: |
a8de3c93f6b7
minor modifications to helper stat functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
856
diff
changeset
|
120 k = round(norm.ppf(0.5+percentConfidence/200., 0, 1), 2) # 1.-(100-percentConfidence)/200. |
a8de3c93f6b7
minor modifications to helper stat functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
856
diff
changeset
|
121 else: |
a8de3c93f6b7
minor modifications to helper stat functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
856
diff
changeset
|
122 k = round(norm.ppf(0.5+percentConfidence/200., 0, 1), nRoundingDigits) |
a8de3c93f6b7
minor modifications to helper stat functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
856
diff
changeset
|
123 stdev = round(stdev, nRoundingDigits) |
a8de3c93f6b7
minor modifications to helper stat functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
856
diff
changeset
|
124 tolerance = round(tolerance, nRoundingDigits) |
423
f738fa1b69f0
added sample size and Student distribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
421
diff
changeset
|
125 if printLatex: |
859
a8de3c93f6b7
minor modifications to helper stat functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
856
diff
changeset
|
126 print('$z_{{{}}}^2\\frac{{s^2}}{{e^2}}={}^2\\frac{{{}^2}}{{{}^2}}$'.format(0.5+percentConfidence/200.,k, stdev, tolerance)) |
423
f738fa1b69f0
added sample size and Student distribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
421
diff
changeset
|
127 return (k*stdev/tolerance)**2 |
f738fa1b69f0
added sample size and Student distribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
421
diff
changeset
|
128 |
f738fa1b69f0
added sample size and Student distribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
421
diff
changeset
|
129 def confidenceInterval(mean, stdev, nSamples, percentConfidence, trueStd = True, printLatex = False): |
499
0a93afea8243
alternative confidence interval
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
491
diff
changeset
|
130 '''if trueStd, use normal distribution, otherwise, Student |
0a93afea8243
alternative confidence interval
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
491
diff
changeset
|
131 |
973 | 132 Use otherwise t.interval or norm.interval for the boundaries |
133 ex: norm.interval(0.95) | |
134 t.interval(0.95, nSamples-1)''' | |
423
f738fa1b69f0
added sample size and Student distribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
421
diff
changeset
|
135 if trueStd: |
859
a8de3c93f6b7
minor modifications to helper stat functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
856
diff
changeset
|
136 k = round(norm.ppf(0.5+percentConfidence/200., 0, 1), 2) |
423
f738fa1b69f0
added sample size and Student distribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
421
diff
changeset
|
137 else: # use Student |
859
a8de3c93f6b7
minor modifications to helper stat functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
856
diff
changeset
|
138 k = round(t.ppf(0.5+percentConfidence/200., nSamples-1), 2) |
301
27f06d28036d
added simple helper for confidence intervals
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
297
diff
changeset
|
139 e = k*stdev/sqrt(nSamples) |
27f06d28036d
added simple helper for confidence intervals
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
297
diff
changeset
|
140 if printLatex: |
27f06d28036d
added simple helper for confidence intervals
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
297
diff
changeset
|
141 print('${0} \pm {1}\\frac{{{2}}}{{\sqrt{{{3}}}}}$'.format(mean, k, stdev, nSamples)) |
27f06d28036d
added simple helper for confidence intervals
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
297
diff
changeset
|
142 return mean-e, mean+e |
27f06d28036d
added simple helper for confidence intervals
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
297
diff
changeset
|
143 |
31
c000f37c316d
moved tests to independent file, added chi2 computation
Nicolas Saunier <nico@confins.net>
parents:
29
diff
changeset
|
144 def computeChi2(expected, observed): |
c000f37c316d
moved tests to independent file, added chi2 computation
Nicolas Saunier <nico@confins.net>
parents:
29
diff
changeset
|
145 '''Returns the Chi2 statistics''' |
876
c7e72d758049
minor update to avoid integer issue
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
859
diff
changeset
|
146 return sum([((e-o)*(e-o))/float(e) for e, o in zip(expected, observed)]) |
31
c000f37c316d
moved tests to independent file, added chi2 computation
Nicolas Saunier <nico@confins.net>
parents:
29
diff
changeset
|
147 |
1103
7594802f281a
added constant distribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1089
diff
changeset
|
148 class ConstantDistribution(object): |
7594802f281a
added constant distribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1089
diff
changeset
|
149 '''Distribution returning always the same value for the random variable ''' |
7594802f281a
added constant distribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1089
diff
changeset
|
150 def __init__(self, value): |
7594802f281a
added constant distribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1089
diff
changeset
|
151 self.value = value |
7594802f281a
added constant distribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1089
diff
changeset
|
152 |
7594802f281a
added constant distribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1089
diff
changeset
|
153 def rvs(self, size = 1): |
7594802f281a
added constant distribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1089
diff
changeset
|
154 if size == 1: |
7594802f281a
added constant distribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1089
diff
changeset
|
155 return self.value |
7594802f281a
added constant distribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1089
diff
changeset
|
156 else: |
7594802f281a
added constant distribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1089
diff
changeset
|
157 return array([self.value]*size) |
7594802f281a
added constant distribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1089
diff
changeset
|
158 |
1031
045cb04ad7b8
corrected bug in distribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1030
diff
changeset
|
159 class EmpiricalContinuousDistribution(rv_continuous): |
045cb04ad7b8
corrected bug in distribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1030
diff
changeset
|
160 def __init__(self, values, probabilities, **kwargs): |
1028
cc5cb04b04b0
major update using the trafficintelligence package name and install through pip
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1023
diff
changeset
|
161 '''The values (and corresponding probabilities) are supposed to be sorted by value |
cc5cb04b04b0
major update using the trafficintelligence package name and install through pip
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1023
diff
changeset
|
162 for v, p in zip(values, probabilities): P(X<=v)=p''' |
cc5cb04b04b0
major update using the trafficintelligence package name and install through pip
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1023
diff
changeset
|
163 assert probabilities[0]==0 |
1031
045cb04ad7b8
corrected bug in distribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1030
diff
changeset
|
164 super(EmpiricalContinuousDistribution, self).__init__(**kwargs) |
1028
cc5cb04b04b0
major update using the trafficintelligence package name and install through pip
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1023
diff
changeset
|
165 self.values = values |
cc5cb04b04b0
major update using the trafficintelligence package name and install through pip
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1023
diff
changeset
|
166 self.probabilities = probabilities |
1088
0680387a89bb
added basic saving capability to empirical distribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1086
diff
changeset
|
167 |
0680387a89bb
added basic saving capability to empirical distribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1086
diff
changeset
|
168 def save(self, filename): |
0680387a89bb
added basic saving capability to empirical distribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1086
diff
changeset
|
169 import yaml |
0680387a89bb
added basic saving capability to empirical distribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1086
diff
changeset
|
170 with open(filename, 'w') as out: |
0680387a89bb
added basic saving capability to empirical distribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1086
diff
changeset
|
171 yaml.dump([self.values, self.probabilities], out) |
0680387a89bb
added basic saving capability to empirical distribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1086
diff
changeset
|
172 |
0680387a89bb
added basic saving capability to empirical distribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1086
diff
changeset
|
173 @staticmethod |
0680387a89bb
added basic saving capability to empirical distribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1086
diff
changeset
|
174 def load(filename): |
0680387a89bb
added basic saving capability to empirical distribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1086
diff
changeset
|
175 import yaml |
0680387a89bb
added basic saving capability to empirical distribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1086
diff
changeset
|
176 with open(filename) as f: |
0680387a89bb
added basic saving capability to empirical distribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1086
diff
changeset
|
177 values, probabilities = yaml.load(f) |
0680387a89bb
added basic saving capability to empirical distribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1086
diff
changeset
|
178 return EmpiricalContinuousDistribution(values, probabilities) |
0680387a89bb
added basic saving capability to empirical distribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1086
diff
changeset
|
179 |
1028
cc5cb04b04b0
major update using the trafficintelligence package name and install through pip
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1023
diff
changeset
|
180 def _cdf(self, x): |
cc5cb04b04b0
major update using the trafficintelligence package name and install through pip
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1023
diff
changeset
|
181 if x < self.values[0]: |
cc5cb04b04b0
major update using the trafficintelligence package name and install through pip
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1023
diff
changeset
|
182 return self.probabilities[0] |
cc5cb04b04b0
major update using the trafficintelligence package name and install through pip
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1023
diff
changeset
|
183 else: |
cc5cb04b04b0
major update using the trafficintelligence package name and install through pip
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1023
diff
changeset
|
184 i=0 |
cc5cb04b04b0
major update using the trafficintelligence package name and install through pip
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1023
diff
changeset
|
185 while i+1<len(self.values) and self.values[i+1] < x: |
cc5cb04b04b0
major update using the trafficintelligence package name and install through pip
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1023
diff
changeset
|
186 i += 1 |
cc5cb04b04b0
major update using the trafficintelligence package name and install through pip
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1023
diff
changeset
|
187 if i == len(self.values)-1: |
cc5cb04b04b0
major update using the trafficintelligence package name and install through pip
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1023
diff
changeset
|
188 return self.probabilities[-1] |
cc5cb04b04b0
major update using the trafficintelligence package name and install through pip
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1023
diff
changeset
|
189 else: |
1031
045cb04ad7b8
corrected bug in distribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1030
diff
changeset
|
190 return self.probabilities[i]+(x-self.values[i])*float(self.probabilities[i+1]-self.probabilities[i])/float(self.values[i+1]-self.values[i]) |
1028
cc5cb04b04b0
major update using the trafficintelligence package name and install through pip
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1023
diff
changeset
|
191 |
749
10dbab1e871d
modifications in samples and distributions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
742
diff
changeset
|
192 class DistributionSample(object): |
85
7f1e54234f96
added empirical discrete distribution, modified class organization and names
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
77
diff
changeset
|
193 def nSamples(self): |
7f1e54234f96
added empirical discrete distribution, modified class organization and names
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
77
diff
changeset
|
194 return sum(self.counts) |
7f1e54234f96
added empirical discrete distribution, modified class organization and names
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
77
diff
changeset
|
195 |
588
c5406edbcf12
added loading ground truth annotations (ground truth) from polytrack format
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
574
diff
changeset
|
196 def cumulativeDensityFunction(sample, normalized = False): |
276 | 197 '''Returns the cumulative density function of the sample of a random variable''' |
588
c5406edbcf12
added loading ground truth annotations (ground truth) from polytrack format
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
574
diff
changeset
|
198 xaxis = sorted(sample) |
c5406edbcf12
added loading ground truth annotations (ground truth) from polytrack format
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
574
diff
changeset
|
199 counts = arange(1,len(sample)+1) # dtype = float |
c5406edbcf12
added loading ground truth annotations (ground truth) from polytrack format
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
574
diff
changeset
|
200 if normalized: |
1031
045cb04ad7b8
corrected bug in distribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1030
diff
changeset
|
201 counts = counts.astype(float)/float(len(sample)) |
197
2788b2827670
simple cumulatie function distribution computation
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
185
diff
changeset
|
202 return xaxis, counts |
85
7f1e54234f96
added empirical discrete distribution, modified class organization and names
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
77
diff
changeset
|
203 |
749
10dbab1e871d
modifications in samples and distributions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
742
diff
changeset
|
204 class DiscreteDistributionSample(DistributionSample): |
10dbab1e871d
modifications in samples and distributions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
742
diff
changeset
|
205 '''Class to represent a sample of a distribution for a discrete random variable''' |
85
7f1e54234f96
added empirical discrete distribution, modified class organization and names
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
77
diff
changeset
|
206 def __init__(self, categories, counts): |
7f1e54234f96
added empirical discrete distribution, modified class organization and names
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
77
diff
changeset
|
207 self.categories = categories |
7f1e54234f96
added empirical discrete distribution, modified class organization and names
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
77
diff
changeset
|
208 self.counts = counts |
7f1e54234f96
added empirical discrete distribution, modified class organization and names
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
77
diff
changeset
|
209 |
7f1e54234f96
added empirical discrete distribution, modified class organization and names
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
77
diff
changeset
|
210 def mean(self): |
7f1e54234f96
added empirical discrete distribution, modified class organization and names
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
77
diff
changeset
|
211 result = [float(x*y) for x,y in zip(self.categories, self.counts)] |
672
5473b7460375
moved and rationalized imports in modules
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
671
diff
changeset
|
212 return npsum(result)/self.nSamples() |
85
7f1e54234f96
added empirical discrete distribution, modified class organization and names
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
77
diff
changeset
|
213 |
7f1e54234f96
added empirical discrete distribution, modified class organization and names
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
77
diff
changeset
|
214 def var(self, mean = None): |
7f1e54234f96
added empirical discrete distribution, modified class organization and names
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
77
diff
changeset
|
215 if not mean: |
7f1e54234f96
added empirical discrete distribution, modified class organization and names
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
77
diff
changeset
|
216 m = self.mean() |
7f1e54234f96
added empirical discrete distribution, modified class organization and names
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
77
diff
changeset
|
217 else: |
7f1e54234f96
added empirical discrete distribution, modified class organization and names
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
77
diff
changeset
|
218 m = mean |
7f1e54234f96
added empirical discrete distribution, modified class organization and names
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
77
diff
changeset
|
219 result = 0. |
7f1e54234f96
added empirical discrete distribution, modified class organization and names
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
77
diff
changeset
|
220 squares = [float((x-m)*(x-m)*y) for x,y in zip(self.categories, self.counts)] |
672
5473b7460375
moved and rationalized imports in modules
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
671
diff
changeset
|
221 return npsum(squares)/(self.nSamples()-1) |
85
7f1e54234f96
added empirical discrete distribution, modified class organization and names
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
77
diff
changeset
|
222 |
86
f03ec4697a09
corrected discrete distribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
85
diff
changeset
|
223 def referenceCounts(self, probability): |
f03ec4697a09
corrected discrete distribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
85
diff
changeset
|
224 '''probability is a function that returns the probability of the random variable for the category values''' |
f03ec4697a09
corrected discrete distribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
85
diff
changeset
|
225 refProba = [probability(c) for c in self.categories] |
672
5473b7460375
moved and rationalized imports in modules
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
671
diff
changeset
|
226 refProba[-1] = 1-npsum(refProba[:-1]) |
86
f03ec4697a09
corrected discrete distribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
85
diff
changeset
|
227 refCounts = [r*self.nSamples() for r in refProba] |
f03ec4697a09
corrected discrete distribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
85
diff
changeset
|
228 return refCounts, refProba |
85
7f1e54234f96
added empirical discrete distribution, modified class organization and names
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
77
diff
changeset
|
229 |
749
10dbab1e871d
modifications in samples and distributions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
742
diff
changeset
|
230 class ContinuousDistributionSample(DistributionSample): |
31
c000f37c316d
moved tests to independent file, added chi2 computation
Nicolas Saunier <nico@confins.net>
parents:
29
diff
changeset
|
231 '''Class to represent a sample of a distribution for a continuous random variable |
76
64fde2b1f96d
simplified intervales in empiricalDistribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
75
diff
changeset
|
232 with the number of observations for each interval |
64fde2b1f96d
simplified intervales in empiricalDistribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
75
diff
changeset
|
233 intervals (categories variable) are defined by their left limits, the last one being the right limit |
64fde2b1f96d
simplified intervales in empiricalDistribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
75
diff
changeset
|
234 categories contain therefore one more element than the counts''' |
35
8cafee54466f
forgotten update of histogram class
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
32
diff
changeset
|
235 def __init__(self, categories, counts): |
276 | 236 # todo add samples for initialization and everything to None? (or setSamples?) |
35
8cafee54466f
forgotten update of histogram class
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
32
diff
changeset
|
237 self.categories = categories |
8cafee54466f
forgotten update of histogram class
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
32
diff
changeset
|
238 self.counts = counts |
31
c000f37c316d
moved tests to independent file, added chi2 computation
Nicolas Saunier <nico@confins.net>
parents:
29
diff
changeset
|
239 |
749
10dbab1e871d
modifications in samples and distributions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
742
diff
changeset
|
240 @staticmethod |
10dbab1e871d
modifications in samples and distributions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
742
diff
changeset
|
241 def generate(sample, categories): |
10dbab1e871d
modifications in samples and distributions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
742
diff
changeset
|
242 if min(sample) < min(categories): |
10dbab1e871d
modifications in samples and distributions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
742
diff
changeset
|
243 print('Sample has lower min than proposed categories ({}, {})'.format(min(sample), min(categories))) |
10dbab1e871d
modifications in samples and distributions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
742
diff
changeset
|
244 if max(sample) > max(categories): |
10dbab1e871d
modifications in samples and distributions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
742
diff
changeset
|
245 print('Sample has higher max than proposed categories ({}, {})'.format(max(sample), max(categories))) |
10dbab1e871d
modifications in samples and distributions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
742
diff
changeset
|
246 dist = ContinuousDistributionSample(sorted(categories), [0]*(len(categories)-1)) |
10dbab1e871d
modifications in samples and distributions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
742
diff
changeset
|
247 for s in sample: |
10dbab1e871d
modifications in samples and distributions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
742
diff
changeset
|
248 i = 0 |
10dbab1e871d
modifications in samples and distributions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
742
diff
changeset
|
249 while i<len(dist.categories) and dist.categories[i] <= s: |
10dbab1e871d
modifications in samples and distributions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
742
diff
changeset
|
250 i += 1 |
10dbab1e871d
modifications in samples and distributions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
742
diff
changeset
|
251 if i <= len(dist.counts): |
10dbab1e871d
modifications in samples and distributions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
742
diff
changeset
|
252 dist.counts[i-1] += 1 |
10dbab1e871d
modifications in samples and distributions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
742
diff
changeset
|
253 #print('{} in {} {}'.format(s, dist.categories[i-1], dist.categories[i])) |
10dbab1e871d
modifications in samples and distributions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
742
diff
changeset
|
254 else: |
10dbab1e871d
modifications in samples and distributions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
742
diff
changeset
|
255 print('Element {} is not in the categories'.format(s)) |
10dbab1e871d
modifications in samples and distributions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
742
diff
changeset
|
256 return dist |
10dbab1e871d
modifications in samples and distributions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
742
diff
changeset
|
257 |
35
8cafee54466f
forgotten update of histogram class
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
32
diff
changeset
|
258 def mean(self): |
8cafee54466f
forgotten update of histogram class
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
32
diff
changeset
|
259 result = 0. |
76
64fde2b1f96d
simplified intervales in empiricalDistribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
75
diff
changeset
|
260 for i in range(len(self.counts)-1): |
64fde2b1f96d
simplified intervales in empiricalDistribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
75
diff
changeset
|
261 result += self.counts[i]*(self.categories[i]+self.categories[i+1])/2 |
64fde2b1f96d
simplified intervales in empiricalDistribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
75
diff
changeset
|
262 return result/self.nSamples() |
35
8cafee54466f
forgotten update of histogram class
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
32
diff
changeset
|
263 |
8cafee54466f
forgotten update of histogram class
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
32
diff
changeset
|
264 def var(self, mean = None): |
8cafee54466f
forgotten update of histogram class
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
32
diff
changeset
|
265 if not mean: |
8cafee54466f
forgotten update of histogram class
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
32
diff
changeset
|
266 m = self.mean() |
8cafee54466f
forgotten update of histogram class
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
32
diff
changeset
|
267 else: |
8cafee54466f
forgotten update of histogram class
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
32
diff
changeset
|
268 m = mean |
8cafee54466f
forgotten update of histogram class
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
32
diff
changeset
|
269 result = 0. |
76
64fde2b1f96d
simplified intervales in empiricalDistribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
75
diff
changeset
|
270 for i in range(len(self.counts)-1): |
64fde2b1f96d
simplified intervales in empiricalDistribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
75
diff
changeset
|
271 mid = (self.categories[i]+self.categories[i+1])/2 |
64fde2b1f96d
simplified intervales in empiricalDistribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
75
diff
changeset
|
272 result += self.counts[i]*(mid - m)*(mid - m) |
35
8cafee54466f
forgotten update of histogram class
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
32
diff
changeset
|
273 return result/(self.nSamples()-1) |
8cafee54466f
forgotten update of histogram class
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
32
diff
changeset
|
274 |
8cafee54466f
forgotten update of histogram class
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
32
diff
changeset
|
275 def referenceCounts(self, cdf): |
8cafee54466f
forgotten update of histogram class
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
32
diff
changeset
|
276 '''cdf is a cumulative distribution function |
8cafee54466f
forgotten update of histogram class
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
32
diff
changeset
|
277 returning the probability of the variable being less that x''' |
8cafee54466f
forgotten update of histogram class
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
32
diff
changeset
|
278 # refCumulativeCounts = [0]#[cdf(self.categories[0][0])] |
8cafee54466f
forgotten update of histogram class
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
32
diff
changeset
|
279 # for inter in self.categories: |
8cafee54466f
forgotten update of histogram class
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
32
diff
changeset
|
280 # refCumulativeCounts.append(cdf(inter[1])) |
76
64fde2b1f96d
simplified intervales in empiricalDistribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
75
diff
changeset
|
281 refCumulativeCounts = [cdf(x) for x in self.categories[1:-1]] |
35
8cafee54466f
forgotten update of histogram class
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
32
diff
changeset
|
282 |
8cafee54466f
forgotten update of histogram class
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
32
diff
changeset
|
283 refProba = [refCumulativeCounts[0]] |
8cafee54466f
forgotten update of histogram class
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
32
diff
changeset
|
284 for i in xrange(1,len(refCumulativeCounts)): |
8cafee54466f
forgotten update of histogram class
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
32
diff
changeset
|
285 refProba.append(refCumulativeCounts[i]-refCumulativeCounts[i-1]) |
8cafee54466f
forgotten update of histogram class
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
32
diff
changeset
|
286 refProba.append(1-refCumulativeCounts[-1]) |
8cafee54466f
forgotten update of histogram class
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
32
diff
changeset
|
287 refCounts = [p*self.nSamples() for p in refProba] |
8cafee54466f
forgotten update of histogram class
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
32
diff
changeset
|
288 |
8cafee54466f
forgotten update of histogram class
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
32
diff
changeset
|
289 return refCounts, refProba |
31
c000f37c316d
moved tests to independent file, added chi2 computation
Nicolas Saunier <nico@confins.net>
parents:
29
diff
changeset
|
290 |
77
5e6cd36a991c
added pretty print in empiricalDistribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
76
diff
changeset
|
291 def printReferenceCounts(self, refCounts=None): |
5e6cd36a991c
added pretty print in empiricalDistribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
76
diff
changeset
|
292 if refCounts: |
5e6cd36a991c
added pretty print in empiricalDistribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
76
diff
changeset
|
293 ref = refCounts |
5e6cd36a991c
added pretty print in empiricalDistribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
76
diff
changeset
|
294 else: |
5e6cd36a991c
added pretty print in empiricalDistribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
76
diff
changeset
|
295 ref = self.referenceCounts |
5e6cd36a991c
added pretty print in empiricalDistribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
76
diff
changeset
|
296 for i in xrange(len(ref[0])): |
5e6cd36a991c
added pretty print in empiricalDistribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
76
diff
changeset
|
297 print('{0}-{1} & {2:0.3} & {3:0.3} \\\\'.format(self.categories[i],self.categories[i+1],ref[1][i], ref[0][i])) |
5e6cd36a991c
added pretty print in empiricalDistribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
76
diff
changeset
|
298 |
5e6cd36a991c
added pretty print in empiricalDistribution
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
76
diff
changeset
|
299 |
31
c000f37c316d
moved tests to independent file, added chi2 computation
Nicolas Saunier <nico@confins.net>
parents:
29
diff
changeset
|
300 ######################### |
370
97e8fa0ee9a1
work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
369
diff
changeset
|
301 # maths section |
27
44689029a86f
updated segmentIntersection and other
Nicolas Saunier <nico@confins.net>
parents:
24
diff
changeset
|
302 ######################### |
24
6fb59cfb201e
first version of segmentIntersection
Nicolas Saunier <nico@confins.net>
parents:
19
diff
changeset
|
303 |
433
d40ad901b272
added kernel smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
423
diff
changeset
|
304 # def kernelSmoothing(sampleX, X, Y, weightFunc, halfwidth): |
d40ad901b272
added kernel smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
423
diff
changeset
|
305 # '''Returns a smoothed weighted version of Y at the predefined values of sampleX |
d40ad901b272
added kernel smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
423
diff
changeset
|
306 # Sum_x weight(sample_x,x) * y(x)''' |
d40ad901b272
added kernel smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
423
diff
changeset
|
307 # from numpy import zeros, array |
d40ad901b272
added kernel smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
423
diff
changeset
|
308 # smoothed = zeros(len(sampleX)) |
d40ad901b272
added kernel smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
423
diff
changeset
|
309 # for i,x in enumerate(sampleX): |
d40ad901b272
added kernel smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
423
diff
changeset
|
310 # weights = array([weightFunc(x,xx, halfwidth) for xx in X]) |
d40ad901b272
added kernel smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
423
diff
changeset
|
311 # if sum(weights)>0: |
d40ad901b272
added kernel smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
423
diff
changeset
|
312 # smoothed[i] = sum(weights*Y)/sum(weights) |
d40ad901b272
added kernel smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
423
diff
changeset
|
313 # else: |
d40ad901b272
added kernel smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
423
diff
changeset
|
314 # smoothed[i] = 0 |
d40ad901b272
added kernel smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
423
diff
changeset
|
315 # return smoothed |
d40ad901b272
added kernel smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
423
diff
changeset
|
316 |
1124 | 317 def generateData(nrows, nvariables, scale): |
318 x = random_sample(nrows*nvariables).reshape(nrows,nvariables)*scale | |
319 return DataFrame(x, columns=['x{}'.format(i+1) for i in range(nvariables)]) | |
320 | |
433
d40ad901b272
added kernel smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
423
diff
changeset
|
321 def kernelSmoothing(x, X, Y, weightFunc, halfwidth): |
d40ad901b272
added kernel smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
423
diff
changeset
|
322 '''Returns the smoothed estimate of (X,Y) at x |
d40ad901b272
added kernel smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
423
diff
changeset
|
323 Sum_x weight(sample_x,x) * y(x)''' |
d40ad901b272
added kernel smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
423
diff
changeset
|
324 weights = array([weightFunc(x,observedx, halfwidth) for observedx in X]) |
d40ad901b272
added kernel smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
423
diff
changeset
|
325 if sum(weights)>0: |
d40ad901b272
added kernel smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
423
diff
changeset
|
326 return sum(weights*Y)/sum(weights) |
d40ad901b272
added kernel smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
423
diff
changeset
|
327 else: |
d40ad901b272
added kernel smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
423
diff
changeset
|
328 return 0 |
d40ad901b272
added kernel smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
423
diff
changeset
|
329 |
d40ad901b272
added kernel smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
423
diff
changeset
|
330 def uniform(center, x, halfwidth): |
d40ad901b272
added kernel smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
423
diff
changeset
|
331 if abs(center-x)<halfwidth: |
d40ad901b272
added kernel smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
423
diff
changeset
|
332 return 1. |
d40ad901b272
added kernel smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
423
diff
changeset
|
333 else: |
d40ad901b272
added kernel smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
423
diff
changeset
|
334 return 0. |
d40ad901b272
added kernel smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
423
diff
changeset
|
335 |
d40ad901b272
added kernel smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
423
diff
changeset
|
336 def gaussian(center, x, halfwidth): |
d40ad901b272
added kernel smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
423
diff
changeset
|
337 return exp(-((center-x)/halfwidth)**2/2) |
d40ad901b272
added kernel smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
423
diff
changeset
|
338 |
d40ad901b272
added kernel smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
423
diff
changeset
|
339 def epanechnikov(center, x, halfwidth): |
d40ad901b272
added kernel smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
423
diff
changeset
|
340 diff = abs(center-x) |
d40ad901b272
added kernel smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
423
diff
changeset
|
341 if diff<halfwidth: |
d40ad901b272
added kernel smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
423
diff
changeset
|
342 return 1.-(diff/halfwidth)**2 |
d40ad901b272
added kernel smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
423
diff
changeset
|
343 else: |
d40ad901b272
added kernel smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
423
diff
changeset
|
344 return 0. |
d40ad901b272
added kernel smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
423
diff
changeset
|
345 |
434 | 346 def triangular(center, x, halfwidth): |
347 diff = abs(center-x) | |
348 if diff<halfwidth: | |
349 return 1.-abs(diff/halfwidth) | |
350 else: | |
351 return 0. | |
433
d40ad901b272
added kernel smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
423
diff
changeset
|
352 |
518
0c86c73f3c09
median smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
511
diff
changeset
|
353 def medianSmoothing(x, X, Y, halfwidth): |
0c86c73f3c09
median smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
511
diff
changeset
|
354 '''Returns the media of Y's corresponding to X's in the interval [x-halfwidth, x+halfwidth]''' |
0c86c73f3c09
median smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
511
diff
changeset
|
355 return median([y for observedx, y in zip(X,Y) if abs(x-observedx)<halfwidth]) |
0c86c73f3c09
median smoothing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
511
diff
changeset
|
356 |
521
3707eeb20f25
changed argMaxDict name to argmaxDict
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
518
diff
changeset
|
357 def argmaxDict(d): |
561
ee45c6eb6d49
added Mohamed Gomaa Mohamed function to smooth object trajectories
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
553
diff
changeset
|
358 return max(d, key=d.get) |
279
3af4c267a7bf
generic simple LCSS
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
276
diff
changeset
|
359 |
837
e01cabca4c55
minor modifications to merge-features
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
749
diff
changeset
|
360 def deltaFrames(t1, t2, frameRate): |
e01cabca4c55
minor modifications to merge-features
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
749
diff
changeset
|
361 '''Returns the number of frames between t1 and t2 |
e01cabca4c55
minor modifications to merge-features
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
749
diff
changeset
|
362 positive if t1<=t2, negative otherwise''' |
e01cabca4c55
minor modifications to merge-features
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
749
diff
changeset
|
363 if t1 > t2: |
e01cabca4c55
minor modifications to merge-features
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
749
diff
changeset
|
364 return -(t1-t2).seconds*frameRate |
e01cabca4c55
minor modifications to merge-features
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
749
diff
changeset
|
365 else: |
e01cabca4c55
minor modifications to merge-features
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
749
diff
changeset
|
366 return (t2-t1).seconds*frameRate |
e01cabca4c55
minor modifications to merge-features
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
749
diff
changeset
|
367 |
395
6fba1ab040f1
minor modification to framestotime
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
391
diff
changeset
|
368 def framesToTime(nFrames, frameRate, initialTime = time()): |
6fba1ab040f1
minor modification to framestotime
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
391
diff
changeset
|
369 '''returns a datetime.time for the time in hour, minutes and seconds |
6fba1ab040f1
minor modification to framestotime
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
391
diff
changeset
|
370 initialTime is a datetime.time''' |
6fba1ab040f1
minor modification to framestotime
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
391
diff
changeset
|
371 seconds = int(floor(float(nFrames)/float(frameRate))+initialTime.hour*3600+initialTime.minute*60+initialTime.second) |
261
4aa792cb0fa9
changing framesToTime to return a datetime.time
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
248
diff
changeset
|
372 h = int(floor(seconds/3600.)) |
248
571ba5ed22e2
added utils for bus processing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
241
diff
changeset
|
373 seconds = seconds - h*3600 |
261
4aa792cb0fa9
changing framesToTime to return a datetime.time
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
248
diff
changeset
|
374 m = int(floor(seconds/60)) |
248
571ba5ed22e2
added utils for bus processing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
241
diff
changeset
|
375 seconds = seconds - m*60 |
262
a048066bd20f
correcting bug in framesToTime
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
261
diff
changeset
|
376 return time(h, m, seconds) |
248
571ba5ed22e2
added utils for bus processing
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
241
diff
changeset
|
377 |
381
387cc0142211
script to replay event annotations
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
376
diff
changeset
|
378 def timeToFrames(t, frameRate): |
387cc0142211
script to replay event annotations
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
376
diff
changeset
|
379 return frameRate*(t.hour*3600+t.minute*60+t.second) |
387cc0142211
script to replay event annotations
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
376
diff
changeset
|
380 |
1059
a87b3072bd26
working version
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1058
diff
changeset
|
381 def timeModulo(t, duration): |
a87b3072bd26
working version
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1058
diff
changeset
|
382 'returns the time modulo the duration in min' |
a87b3072bd26
working version
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1058
diff
changeset
|
383 return time(t.hour, t.minute//duration, t.second) |
a87b3072bd26
working version
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1058
diff
changeset
|
384 |
241
ee1caff48b03
added function to sort to list of paired data X,Y
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
235
diff
changeset
|
385 def sortXY(X,Y): |
ee1caff48b03
added function to sort to list of paired data X,Y
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
235
diff
changeset
|
386 'returns the sorted (x, Y(x)) sorted on X' |
ee1caff48b03
added function to sort to list of paired data X,Y
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
235
diff
changeset
|
387 D = {} |
ee1caff48b03
added function to sort to list of paired data X,Y
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
235
diff
changeset
|
388 for x, y in zip(X,Y): |
ee1caff48b03
added function to sort to list of paired data X,Y
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
235
diff
changeset
|
389 D[x]=y |
ee1caff48b03
added function to sort to list of paired data X,Y
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
235
diff
changeset
|
390 xsorted = sorted(D.keys()) |
ee1caff48b03
added function to sort to list of paired data X,Y
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
235
diff
changeset
|
391 return xsorted, [D[x] for x in xsorted] |
ee1caff48b03
added function to sort to list of paired data X,Y
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
235
diff
changeset
|
392 |
733 | 393 def compareLengthForSort(i, j): |
394 if len(i) < len(j): | |
395 return -1 | |
396 elif len(i) == len(j): | |
397 return 0 | |
398 else: | |
399 return 1 | |
400 | |
401 def sortByLength(instances, reverse = False): | |
402 '''Returns a new list with the instances sorted by length (method __len__) | |
403 reverse is passed to sorted''' | |
997
4f3387a242a1
updated utils to python 3
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
990
diff
changeset
|
404 return sorted(instances, key = len, reverse = reverse) |
733 | 405 |
32 | 406 def ceilDecimals(v, nDecimals): |
407 '''Rounds the number at the nth decimal | |
408 eg 1.23 at 0 decimal is 2, at 1 decimal is 1.3''' | |
670
f72ed51c6b65
corrected other missing imports
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
669
diff
changeset
|
409 tens = 10**nDecimals |
32 | 410 return ceil(v*tens)/tens |
411 | |
152
74b1fc68d4df
re-organized code to avoid cyclic python module dependencies
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
116
diff
changeset
|
412 def inBetween(bound1, bound2, x): |
698
8d99a9e16644
added clarification comments
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
689
diff
changeset
|
413 'useful if one does not know the order of bound1/bound2' |
569
0057c04f94d5
work in progress on intersections (for PET)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
561
diff
changeset
|
414 return bound1 <= x <= bound2 or bound2 <= x <= bound1 |
0057c04f94d5
work in progress on intersections (for PET)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
561
diff
changeset
|
415 |
0057c04f94d5
work in progress on intersections (for PET)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
561
diff
changeset
|
416 def pointDistanceL2(x1,y1,x2,y2): |
0057c04f94d5
work in progress on intersections (for PET)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
561
diff
changeset
|
417 ''' Compute point-to-point distance (L2 norm, ie Euclidean distance)''' |
0057c04f94d5
work in progress on intersections (for PET)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
561
diff
changeset
|
418 return sqrt((x2-x1)**2+(y2-y1)**2) |
24
6fb59cfb201e
first version of segmentIntersection
Nicolas Saunier <nico@confins.net>
parents:
19
diff
changeset
|
419 |
6fb59cfb201e
first version of segmentIntersection
Nicolas Saunier <nico@confins.net>
parents:
19
diff
changeset
|
420 def crossProduct(l1, l2): |
6fb59cfb201e
first version of segmentIntersection
Nicolas Saunier <nico@confins.net>
parents:
19
diff
changeset
|
421 return l1[0]*l2[1]-l1[1]*l2[0] |
6fb59cfb201e
first version of segmentIntersection
Nicolas Saunier <nico@confins.net>
parents:
19
diff
changeset
|
422 |
1086
8734742c08c0
major refactoring of curvilinear trajectory projections
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1060
diff
changeset
|
423 def filterCategoricalMovingWindow(cat_list, halfWidth): |
574
e24eeb244698
first implementation of projection to curvilinear coordinates
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
569
diff
changeset
|
424 ''' Return a list of categories/values smoothed according to a window. |
e24eeb244698
first implementation of projection to curvilinear coordinates
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
569
diff
changeset
|
425 halfWidth is the search radius on either side''' |
659
784298512b60
minor modifications
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
637
diff
changeset
|
426 smoothed = deepcopy(cat_list) |
784298512b60
minor modifications
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
637
diff
changeset
|
427 for point in range(len(cat_list)): |
574
e24eeb244698
first implementation of projection to curvilinear coordinates
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
569
diff
changeset
|
428 lower_bound_check = max(0,point-halfWidth) |
659
784298512b60
minor modifications
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
637
diff
changeset
|
429 upper_bound_check = min(len(cat_list)-1,point+halfWidth+1) |
784298512b60
minor modifications
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
637
diff
changeset
|
430 window_values = cat_list[lower_bound_check:upper_bound_check] |
574
e24eeb244698
first implementation of projection to curvilinear coordinates
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
569
diff
changeset
|
431 smoothed[point] = max(set(window_values), key=window_values.count) |
e24eeb244698
first implementation of projection to curvilinear coordinates
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
569
diff
changeset
|
432 return smoothed |
e24eeb244698
first implementation of projection to curvilinear coordinates
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
569
diff
changeset
|
433 |
1200
4356065ed3ca
updated simple moving average filter and cleaned tests
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1168
diff
changeset
|
434 def filterMovingWindow(inputSignal, halfWidth, mode = 'valid'): |
29
ca8e716cc231
added moving average filter
Nicolas Saunier <nico@confins.net>
parents:
27
diff
changeset
|
435 '''Returns an array obtained after the smoothing of the input by a moving average |
1200
4356065ed3ca
updated simple moving average filter and cleaned tests
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1168
diff
changeset
|
436 The size of the output depends on the mode: 'full', 'same', 'valid' |
4356065ed3ca
updated simple moving average filter and cleaned tests
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1168
diff
changeset
|
437 See https://numpy.org/doc/stable/reference/generated/numpy.convolve.html.''' |
4356065ed3ca
updated simple moving average filter and cleaned tests
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1168
diff
changeset
|
438 width = min(len(inputSignal), int(halfWidth*2+1)) |
29
ca8e716cc231
added moving average filter
Nicolas Saunier <nico@confins.net>
parents:
27
diff
changeset
|
439 win = ones(width,'d') |
1200
4356065ed3ca
updated simple moving average filter and cleaned tests
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1168
diff
changeset
|
440 return convolve(win/width, array(inputSignal), mode) |
29
ca8e716cc231
added moving average filter
Nicolas Saunier <nico@confins.net>
parents:
27
diff
changeset
|
441 |
199
ca9d9104afba
added utility to calibrate polynoms and plot
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
197
diff
changeset
|
442 def linearRegression(x, y, deg = 1, plotData = False): |
ca9d9104afba
added utility to calibrate polynoms and plot
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
197
diff
changeset
|
443 '''returns the least square estimation of the linear regression of y = ax+b |
ca9d9104afba
added utility to calibrate polynoms and plot
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
197
diff
changeset
|
444 as well as the plot''' |
ca9d9104afba
added utility to calibrate polynoms and plot
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
197
diff
changeset
|
445 coef = polyfit(x, y, deg) |
ca9d9104afba
added utility to calibrate polynoms and plot
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
197
diff
changeset
|
446 if plotData: |
ca9d9104afba
added utility to calibrate polynoms and plot
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
197
diff
changeset
|
447 def poly(x): |
ca9d9104afba
added utility to calibrate polynoms and plot
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
197
diff
changeset
|
448 result = 0 |
ca9d9104afba
added utility to calibrate polynoms and plot
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
197
diff
changeset
|
449 for i in range(len(coef)): |
ca9d9104afba
added utility to calibrate polynoms and plot
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
197
diff
changeset
|
450 result += coef[i]*x**(len(coef)-i-1) |
ca9d9104afba
added utility to calibrate polynoms and plot
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
197
diff
changeset
|
451 return result |
668
f8dcf483b296
code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
667
diff
changeset
|
452 plt.plot(x, y, 'x') |
199
ca9d9104afba
added utility to calibrate polynoms and plot
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
197
diff
changeset
|
453 xx = arange(min(x), max(x),(max(x)-min(x))/1000) |
668
f8dcf483b296
code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
667
diff
changeset
|
454 plt.plot(xx, [poly(z) for z in xx]) |
199
ca9d9104afba
added utility to calibrate polynoms and plot
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
197
diff
changeset
|
455 return coef |
ca9d9104afba
added utility to calibrate polynoms and plot
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
197
diff
changeset
|
456 |
668
f8dcf483b296
code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
667
diff
changeset
|
457 def correlation(data, correlationMethod = 'pearson', plotFigure = False, displayNames = None, figureFilename = None): |
f8dcf483b296
code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
667
diff
changeset
|
458 '''Computes (and displays) the correlation matrix for a pandas DataFrame''' |
677
ae07c7b4cf87
update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
676
diff
changeset
|
459 columns = data.columns.tolist() |
ae07c7b4cf87
update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
676
diff
changeset
|
460 for var in data.columns: |
ae07c7b4cf87
update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
676
diff
changeset
|
461 uniqueValues = data[var].unique() |
ae07c7b4cf87
update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
676
diff
changeset
|
462 if len(uniqueValues) == 1 or data.dtypes[var] == dtype('O') or (len(uniqueValues) == 2 and len(data.loc[~isnan(data[var]), var].unique()) == 1): # last condition: only one other value than nan |
ae07c7b4cf87
update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
676
diff
changeset
|
463 columns.remove(var) |
ae07c7b4cf87
update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
676
diff
changeset
|
464 c=data[columns].corr(correlationMethod) |
668
f8dcf483b296
code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
667
diff
changeset
|
465 if plotFigure: |
677
ae07c7b4cf87
update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
676
diff
changeset
|
466 fig = plt.figure(figsize=(4+0.4*c.shape[0], 0.4*c.shape[0])) |
668
f8dcf483b296
code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
667
diff
changeset
|
467 fig.add_subplot(1,1,1) |
f8dcf483b296
code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
667
diff
changeset
|
468 #plt.imshow(np.fabs(c), interpolation='none') |
f8dcf483b296
code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
667
diff
changeset
|
469 plt.imshow(c, vmin=-1., vmax = 1., interpolation='none', cmap = 'RdYlBu_r') # coolwarm |
847
36c5bee9a887
bug correction
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
841
diff
changeset
|
470 if displayNames is not None: |
36c5bee9a887
bug correction
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
841
diff
changeset
|
471 colnames = [displayNames.get(s.strip(), s.strip()) for s in columns] |
36c5bee9a887
bug correction
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
841
diff
changeset
|
472 else: |
36c5bee9a887
bug correction
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
841
diff
changeset
|
473 colnames = columns |
668
f8dcf483b296
code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
667
diff
changeset
|
474 #correlation.plot_corr(c, xnames = colnames, normcolor=True, title = filename) |
f8dcf483b296
code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
667
diff
changeset
|
475 plt.xticks(range(len(colnames)), colnames, rotation=90) |
f8dcf483b296
code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
667
diff
changeset
|
476 plt.yticks(range(len(colnames)), colnames) |
f8dcf483b296
code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
667
diff
changeset
|
477 plt.tick_params('both', length=0) |
f8dcf483b296
code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
667
diff
changeset
|
478 plt.subplots_adjust(bottom = 0.29) |
f8dcf483b296
code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
667
diff
changeset
|
479 plt.colorbar() |
f8dcf483b296
code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
667
diff
changeset
|
480 plt.title('Correlation ({})'.format(correlationMethod)) |
f8dcf483b296
code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
667
diff
changeset
|
481 plt.tight_layout() |
677
ae07c7b4cf87
update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
676
diff
changeset
|
482 if len(colnames) > 50: |
ae07c7b4cf87
update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
676
diff
changeset
|
483 plt.subplots_adjust(left=.06) |
668
f8dcf483b296
code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
667
diff
changeset
|
484 if figureFilename is not None: |
f8dcf483b296
code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
667
diff
changeset
|
485 plt.savefig(figureFilename, dpi = 150, transparent = True) |
f8dcf483b296
code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
667
diff
changeset
|
486 return c |
f8dcf483b296
code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
667
diff
changeset
|
487 |
f8dcf483b296
code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
667
diff
changeset
|
488 def addDummies(data, variables, allVariables = True): |
f8dcf483b296
code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
667
diff
changeset
|
489 '''Add binary dummy variables for each value of a nominal variable |
f8dcf483b296
code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
667
diff
changeset
|
490 in a pandas DataFrame''' |
f8dcf483b296
code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
667
diff
changeset
|
491 newVariables = [] |
f8dcf483b296
code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
667
diff
changeset
|
492 for var in variables: |
f8dcf483b296
code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
667
diff
changeset
|
493 if var in data.columns and data.dtypes[var] == dtype('O') and len(data[var].unique()) > 2: |
f8dcf483b296
code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
667
diff
changeset
|
494 values = data[var].unique() |
f8dcf483b296
code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
667
diff
changeset
|
495 if not allVariables: |
f8dcf483b296
code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
667
diff
changeset
|
496 values = values[:-1] |
f8dcf483b296
code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
667
diff
changeset
|
497 for val in values: |
f8dcf483b296
code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
667
diff
changeset
|
498 if val is not NaN: |
f8dcf483b296
code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
667
diff
changeset
|
499 newVariable = (var+'_{}'.format(val)).replace('.','').replace(' ','').replace('-','') |
f8dcf483b296
code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
667
diff
changeset
|
500 data[newVariable] = (data[var] == val) |
f8dcf483b296
code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
667
diff
changeset
|
501 newVariables.append(newVariable) |
f8dcf483b296
code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
667
diff
changeset
|
502 return newVariables |
f8dcf483b296
code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
667
diff
changeset
|
503 |
997
4f3387a242a1
updated utils to python 3
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
990
diff
changeset
|
504 def kruskalWallis(data, dependentVariable, independentVariable, plotFigure = False, filenamePrefix = None, figureFileType = 'pdf', saveLatex = False, renameVariables = lambda s: s, kwCaption = ''): |
668
f8dcf483b296
code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
667
diff
changeset
|
505 '''Studies the influence of (nominal) independent variable over the dependent variable |
f8dcf483b296
code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
667
diff
changeset
|
506 |
f8dcf483b296
code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
667
diff
changeset
|
507 Makes tests if the conditional distributions are normal |
f8dcf483b296
code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
667
diff
changeset
|
508 using the Shapiro-Wilk test (in which case ANOVA could be used) |
f8dcf483b296
code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
667
diff
changeset
|
509 Implements uses the non-parametric Kruskal Wallis test''' |
f8dcf483b296
code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
667
diff
changeset
|
510 tmp = data[data[independentVariable].notnull()] |
f8dcf483b296
code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
667
diff
changeset
|
511 independentVariableValues = sorted(tmp[independentVariable].unique().tolist()) |
f8dcf483b296
code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
667
diff
changeset
|
512 if len(independentVariableValues) >= 2: |
674
01b89182891a
corrected bug for intersection of lines (thanks to Paul for finding)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
672
diff
changeset
|
513 if saveLatex: |
01b89182891a
corrected bug for intersection of lines (thanks to Paul for finding)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
672
diff
changeset
|
514 out = openCheck(filenamePrefix+'-{}-{}.tex'.format(dependentVariable, independentVariable), 'w') |
668
f8dcf483b296
code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
667
diff
changeset
|
515 for x in independentVariableValues: |
f8dcf483b296
code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
667
diff
changeset
|
516 print('Shapiro-Wilk normality test for {} when {}={}: {} obs'.format(dependentVariable,independentVariable, x, len(tmp.loc[tmp[independentVariable] == x, dependentVariable]))) |
f8dcf483b296
code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
667
diff
changeset
|
517 if len(tmp.loc[tmp[independentVariable] == x, dependentVariable]) >= 3: |
978
184f1dd307f9
corrected print and exception statements for Python 3
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
973
diff
changeset
|
518 print(shapiro(tmp.loc[tmp[independentVariable] == x, dependentVariable])) |
668
f8dcf483b296
code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
667
diff
changeset
|
519 if plotFigure: |
f8dcf483b296
code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
667
diff
changeset
|
520 plt.figure() |
f8dcf483b296
code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
667
diff
changeset
|
521 plt.boxplot([tmp.loc[tmp[independentVariable] == x, dependentVariable] for x in independentVariableValues]) |
f8dcf483b296
code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
667
diff
changeset
|
522 plt.xticks(range(1,len(independentVariableValues)+1), independentVariableValues) |
f8dcf483b296
code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
667
diff
changeset
|
523 plt.title('{} vs {}'.format(dependentVariable, independentVariable)) |
674
01b89182891a
corrected bug for intersection of lines (thanks to Paul for finding)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
672
diff
changeset
|
524 if filenamePrefix is not None: |
01b89182891a
corrected bug for intersection of lines (thanks to Paul for finding)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
672
diff
changeset
|
525 plt.savefig(filenamePrefix+'-{}-{}.{}'.format(dependentVariable, independentVariable, figureFileType)) |
01b89182891a
corrected bug for intersection of lines (thanks to Paul for finding)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
672
diff
changeset
|
526 table = tmp.groupby([independentVariable])[dependentVariable].describe().unstack().sort(['50%'], ascending = False) |
676 | 527 table['count'] = table['count'].astype(int) |
528 testResult = kruskal(*[tmp.loc[tmp[independentVariable] == x, dependentVariable] for x in independentVariableValues]) | |
674
01b89182891a
corrected bug for intersection of lines (thanks to Paul for finding)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
672
diff
changeset
|
529 if saveLatex: |
677
ae07c7b4cf87
update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
676
diff
changeset
|
530 out.write('\\begin{minipage}{\\linewidth}\n' |
ae07c7b4cf87
update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
676
diff
changeset
|
531 +'\\centering\n' |
ae07c7b4cf87
update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
676
diff
changeset
|
532 +'\\captionof{table}{'+(kwCaption.format(dependentVariable, independentVariable, *testResult))+'}\n' |
ae07c7b4cf87
update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
676
diff
changeset
|
533 +table.to_latex(float_format = lambda x: '{:.3f}'.format(x)).encode('ascii')+'\n' |
ae07c7b4cf87
update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
676
diff
changeset
|
534 +'\\end{minipage}\n' |
ae07c7b4cf87
update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
676
diff
changeset
|
535 +'\\ \\vspace{0.5cm}\n') |
674
01b89182891a
corrected bug for intersection of lines (thanks to Paul for finding)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
672
diff
changeset
|
536 else: |
978
184f1dd307f9
corrected print and exception statements for Python 3
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
973
diff
changeset
|
537 print(table) |
676 | 538 return testResult |
668
f8dcf483b296
code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
667
diff
changeset
|
539 else: |
f8dcf483b296
code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
667
diff
changeset
|
540 return None |
f8dcf483b296
code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
667
diff
changeset
|
541 |
677
ae07c7b4cf87
update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
676
diff
changeset
|
542 def prepareRegression(data, dependentVariable, independentVariables, maxCorrelationThreshold, correlations, maxCorrelationP, correlationFunc, stdoutText = ['Removing {} (constant: {})', 'Removing {} (correlation {} with {})', 'Removing {} (no correlation: {}, p={})'], saveFiles = False, filenamePrefix = None, latexHeader = '', latexTable = None, latexFooter=''): |
668
f8dcf483b296
code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
667
diff
changeset
|
543 '''Removes variables from candidate independent variables if |
f8dcf483b296
code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
667
diff
changeset
|
544 - if two independent variables are correlated (> maxCorrelationThreshold), one is removed |
f8dcf483b296
code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
667
diff
changeset
|
545 - if an independent variable is not correlated with the dependent variable (p>maxCorrelationP) |
f8dcf483b296
code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
667
diff
changeset
|
546 Returns the remaining non-correlated variables, correlated with the dependent variable |
f8dcf483b296
code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
667
diff
changeset
|
547 |
f8dcf483b296
code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
667
diff
changeset
|
548 correlationFunc is spearmanr or pearsonr from scipy.stats |
677
ae07c7b4cf87
update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
676
diff
changeset
|
549 text is the template to display for the two types of printout (see default): 3 elements if no saving to latex file, 8 otherwise |
668
f8dcf483b296
code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
667
diff
changeset
|
550 |
677
ae07c7b4cf87
update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
676
diff
changeset
|
551 TODO: pass the dummies for nominal variables and remove if all dummies are correlated, or none is correlated with the dependentvariable''' |
668
f8dcf483b296
code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
667
diff
changeset
|
552 result = copy(independentVariables) |
677
ae07c7b4cf87
update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
676
diff
changeset
|
553 table1 = '' |
ae07c7b4cf87
update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
676
diff
changeset
|
554 table2 = {} |
ae07c7b4cf87
update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
676
diff
changeset
|
555 # constant variables |
ae07c7b4cf87
update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
676
diff
changeset
|
556 for var in independentVariables: |
ae07c7b4cf87
update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
676
diff
changeset
|
557 uniqueValues = data[var].unique() |
ae07c7b4cf87
update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
676
diff
changeset
|
558 if (len(uniqueValues) == 1) or (len(uniqueValues) == 2 and uniqueValues.dtype != dtype('O') and len(data.loc[~isnan(data[var]), var].unique()) == 1): |
ae07c7b4cf87
update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
676
diff
changeset
|
559 print(stdoutText[0].format(var, uniqueValues)) |
ae07c7b4cf87
update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
676
diff
changeset
|
560 if saveFiles: |
ae07c7b4cf87
update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
676
diff
changeset
|
561 table1 += latexTable[0].format(var, *uniqueValues) |
ae07c7b4cf87
update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
676
diff
changeset
|
562 result.remove(var) |
ae07c7b4cf87
update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
676
diff
changeset
|
563 # correlated variables |
ae07c7b4cf87
update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
676
diff
changeset
|
564 for v1 in copy(result): |
668
f8dcf483b296
code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
667
diff
changeset
|
565 if v1 in correlations.index: |
677
ae07c7b4cf87
update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
676
diff
changeset
|
566 for v2 in copy(result): |
668
f8dcf483b296
code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
667
diff
changeset
|
567 if v2 != v1 and v2 in correlations.index: |
f8dcf483b296
code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
667
diff
changeset
|
568 if abs(correlations.loc[v1, v2]) > maxCorrelationThreshold: |
f8dcf483b296
code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
667
diff
changeset
|
569 if v1 in result and v2 in result: |
677
ae07c7b4cf87
update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
676
diff
changeset
|
570 if saveFiles: |
ae07c7b4cf87
update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
676
diff
changeset
|
571 table1 += latexTable[1].format(v2, v1, correlations.loc[v1, v2]) |
ae07c7b4cf87
update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
676
diff
changeset
|
572 print(stdoutText[1].format(v2, v1, correlations.loc[v1, v2])) |
668
f8dcf483b296
code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
667
diff
changeset
|
573 result.remove(v2) |
677
ae07c7b4cf87
update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
676
diff
changeset
|
574 # not correlated with dependent variable |
ae07c7b4cf87
update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
676
diff
changeset
|
575 table2['Correlations'] = [] |
ae07c7b4cf87
update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
676
diff
changeset
|
576 table2['Valeurs p'] = [] |
668
f8dcf483b296
code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
667
diff
changeset
|
577 for var in copy(result): |
f8dcf483b296
code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
667
diff
changeset
|
578 if data.dtypes[var] != dtype('O'): |
f8dcf483b296
code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
667
diff
changeset
|
579 cor, p = correlationFunc(data[dependentVariable], data[var]) |
f8dcf483b296
code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
667
diff
changeset
|
580 if p > maxCorrelationP: |
677
ae07c7b4cf87
update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
676
diff
changeset
|
581 if saveFiles: |
ae07c7b4cf87
update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
676
diff
changeset
|
582 table1 += latexTable[2].format(var, cor, p) |
ae07c7b4cf87
update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
676
diff
changeset
|
583 print(stdoutText[2].format(var, cor, p)) |
668
f8dcf483b296
code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
667
diff
changeset
|
584 result.remove(var) |
677
ae07c7b4cf87
update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
676
diff
changeset
|
585 else: |
ae07c7b4cf87
update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
676
diff
changeset
|
586 table2['Correlations'].append(cor) |
ae07c7b4cf87
update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
676
diff
changeset
|
587 table2['Valeurs p'].append(p) |
ae07c7b4cf87
update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
676
diff
changeset
|
588 |
ae07c7b4cf87
update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
676
diff
changeset
|
589 if saveFiles: |
ae07c7b4cf87
update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
676
diff
changeset
|
590 out = openCheck(filenamePrefix+'-removed-variables.tex', 'w') |
ae07c7b4cf87
update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
676
diff
changeset
|
591 out.write(latexHeader) |
ae07c7b4cf87
update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
676
diff
changeset
|
592 out.write(table1) |
ae07c7b4cf87
update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
676
diff
changeset
|
593 out.write(latexFooter) |
ae07c7b4cf87
update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
676
diff
changeset
|
594 out.close() |
ae07c7b4cf87
update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
676
diff
changeset
|
595 out = openCheck(filenamePrefix+'-correlations.html', 'w') |
ae07c7b4cf87
update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
676
diff
changeset
|
596 table2['Variables'] = [var for var in result if data.dtypes[var] != dtype('O')] |
ae07c7b4cf87
update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
676
diff
changeset
|
597 out.write(DataFrame(table2)[['Variables', 'Correlations', 'Valeurs p']].to_html(formatters = {'Correlations': lambda x: '{:.2f}'.format(x), 'Valeurs p': lambda x: '{:.3f}'.format(x)}, index = False)) |
ae07c7b4cf87
update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
676
diff
changeset
|
598 out.close() |
668
f8dcf483b296
code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
667
diff
changeset
|
599 return result |
f8dcf483b296
code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
667
diff
changeset
|
600 |
841 | 601 def saveDokMatrix(filename, m, lowerTriangle = False): |
840
15a82ebc62c4
utils for sparse matrix
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
837
diff
changeset
|
602 'Saves a dok_matrix using savez' |
841 | 603 if lowerTriangle: |
997
4f3387a242a1
updated utils to python 3
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
990
diff
changeset
|
604 keys = [k for k in m if k[0] > k[1]] |
841 | 605 savez(filename, shape = m.shape, keys = keys, values = [m[k[0],k[1]] for k in keys]) |
606 else: | |
997
4f3387a242a1
updated utils to python 3
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
990
diff
changeset
|
607 savez(filename, shape = m.shape, keys = list(m.keys()), values = list(m.values())) |
840
15a82ebc62c4
utils for sparse matrix
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
837
diff
changeset
|
608 |
15a82ebc62c4
utils for sparse matrix
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
837
diff
changeset
|
609 def loadDokMatrix(filename): |
15a82ebc62c4
utils for sparse matrix
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
837
diff
changeset
|
610 'Loads a dok_matrix saved using the above saveDokMatrix' |
15a82ebc62c4
utils for sparse matrix
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
837
diff
changeset
|
611 data = npload(filename) |
15a82ebc62c4
utils for sparse matrix
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
837
diff
changeset
|
612 m = dok_matrix(tuple(data['shape'])) |
15a82ebc62c4
utils for sparse matrix
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
837
diff
changeset
|
613 for k, v in zip(data['keys'], data['values']): |
15a82ebc62c4
utils for sparse matrix
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
837
diff
changeset
|
614 m[tuple(k)] = v |
15a82ebc62c4
utils for sparse matrix
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
837
diff
changeset
|
615 return m |
667
179b81faa1f8
added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
665
diff
changeset
|
616 |
1023
a13f47c8931d
work on processing large datasets (generate speed data)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1022
diff
changeset
|
617 def aggregationFunction(funcStr, centile = 50): |
a13f47c8931d
work on processing large datasets (generate speed data)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1022
diff
changeset
|
618 '''return the numpy function corresponding to funcStr |
a13f47c8931d
work on processing large datasets (generate speed data)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1022
diff
changeset
|
619 centile can be a list of centiles to compute at once, eg [25, 50, 75] for the 3 quartiles''' |
a13f47c8931d
work on processing large datasets (generate speed data)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1022
diff
changeset
|
620 if funcStr == 'median': |
a13f47c8931d
work on processing large datasets (generate speed data)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1022
diff
changeset
|
621 return median |
a13f47c8931d
work on processing large datasets (generate speed data)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1022
diff
changeset
|
622 elif funcStr == 'mean': |
a13f47c8931d
work on processing large datasets (generate speed data)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1022
diff
changeset
|
623 return mean |
a13f47c8931d
work on processing large datasets (generate speed data)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1022
diff
changeset
|
624 elif funcStr == 'centile': |
a13f47c8931d
work on processing large datasets (generate speed data)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1022
diff
changeset
|
625 return lambda x: percentile(x, centile) |
a13f47c8931d
work on processing large datasets (generate speed data)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1022
diff
changeset
|
626 elif funcStr == '85centile': |
a13f47c8931d
work on processing large datasets (generate speed data)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1022
diff
changeset
|
627 return lambda x: percentile(x, 85) |
a13f47c8931d
work on processing large datasets (generate speed data)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1022
diff
changeset
|
628 else: |
a13f47c8931d
work on processing large datasets (generate speed data)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1022
diff
changeset
|
629 print('Unknown aggregation method: {}'.format(funcStr)) |
a13f47c8931d
work on processing large datasets (generate speed data)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1022
diff
changeset
|
630 return None |
a13f47c8931d
work on processing large datasets (generate speed data)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1022
diff
changeset
|
631 |
1058
16575ca4537d
work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1034
diff
changeset
|
632 def aggregationMethods(methods, centiles = None): |
16575ca4537d
work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1034
diff
changeset
|
633 aggFunctions = {} |
16575ca4537d
work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1034
diff
changeset
|
634 headers = [] |
16575ca4537d
work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1034
diff
changeset
|
635 for method in methods: |
16575ca4537d
work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1034
diff
changeset
|
636 if method == 'centile': |
16575ca4537d
work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1034
diff
changeset
|
637 aggFunctions[method] = aggregationFunction(method, centiles) |
16575ca4537d
work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1034
diff
changeset
|
638 for c in centiles: |
16575ca4537d
work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1034
diff
changeset
|
639 headers.append('{}{}'.format(method,c)) |
16575ca4537d
work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1034
diff
changeset
|
640 else: |
1060
c04550f957ab
bug corrected
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1059
diff
changeset
|
641 aggFunctions[method] = aggregationFunction(method) |
1058
16575ca4537d
work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1034
diff
changeset
|
642 headers.append(method) |
16575ca4537d
work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1034
diff
changeset
|
643 return aggFunctions, headers |
1105
e62c2f5e25e6
added sampling function
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1103
diff
changeset
|
644 |
e62c2f5e25e6
added sampling function
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1103
diff
changeset
|
645 def maxSumSample(d, maxSum): |
e62c2f5e25e6
added sampling function
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1103
diff
changeset
|
646 '''Generates a sample from distribution d (type scipy.stats, using rvs method) |
e62c2f5e25e6
added sampling function
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1103
diff
changeset
|
647 until the sum of all elements is larger than maxSum''' |
e62c2f5e25e6
added sampling function
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1103
diff
changeset
|
648 s = 0 # sum |
e62c2f5e25e6
added sampling function
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1103
diff
changeset
|
649 sample = [] |
e62c2f5e25e6
added sampling function
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1103
diff
changeset
|
650 while s < maxSum: |
e62c2f5e25e6
added sampling function
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1103
diff
changeset
|
651 x = d.rvs() |
e62c2f5e25e6
added sampling function
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1103
diff
changeset
|
652 sample.append(x) |
e62c2f5e25e6
added sampling function
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1103
diff
changeset
|
653 s += x |
e62c2f5e25e6
added sampling function
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1103
diff
changeset
|
654 return sample |
1156
f7fbe624fff7
added helper functions for categorical variables
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1135
diff
changeset
|
655 |
f7fbe624fff7
added helper functions for categorical variables
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1135
diff
changeset
|
656 def cramers_v(x, y): |
f7fbe624fff7
added helper functions for categorical variables
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1135
diff
changeset
|
657 """ calculate Cramers V statistic for categorial-categorial association. |
f7fbe624fff7
added helper functions for categorical variables
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1135
diff
changeset
|
658 uses correction from Bergsma and Wicher, |
f7fbe624fff7
added helper functions for categorical variables
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1135
diff
changeset
|
659 Journal of the Korean Statistical Society 42 (2013): 323-328 |
f7fbe624fff7
added helper functions for categorical variables
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1135
diff
changeset
|
660 https://towardsdatascience.com/the-search-for-categorical-correlation-a1cf7f1888c9 |
f7fbe624fff7
added helper functions for categorical variables
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1135
diff
changeset
|
661 https://stackoverflow.com/questions/46498455/categorical-features-correlation/46498792#46498792 |
f7fbe624fff7
added helper functions for categorical variables
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1135
diff
changeset
|
662 """ |
f7fbe624fff7
added helper functions for categorical variables
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1135
diff
changeset
|
663 confusionMatrix = crosstab(x,y) |
f7fbe624fff7
added helper functions for categorical variables
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1135
diff
changeset
|
664 chi2 = chi2_contingency(confusionMatrix)[0] |
f7fbe624fff7
added helper functions for categorical variables
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1135
diff
changeset
|
665 n = confusionMatrix.sum().sum() |
f7fbe624fff7
added helper functions for categorical variables
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1135
diff
changeset
|
666 phi2 = chi2/n |
f7fbe624fff7
added helper functions for categorical variables
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1135
diff
changeset
|
667 r,k = confusionMatrix.shape |
f7fbe624fff7
added helper functions for categorical variables
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1135
diff
changeset
|
668 phi2corr = max(0, phi2-((k-1)*(r-1))/(n-1)) |
f7fbe624fff7
added helper functions for categorical variables
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1135
diff
changeset
|
669 rcorr = r-((r-1)**2)/(n-1) |
f7fbe624fff7
added helper functions for categorical variables
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1135
diff
changeset
|
670 kcorr = k-((k-1)**2)/(n-1) |
f7fbe624fff7
added helper functions for categorical variables
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1135
diff
changeset
|
671 return sqrt(phi2corr/min((kcorr-1),(rcorr-1))) |
f7fbe624fff7
added helper functions for categorical variables
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1135
diff
changeset
|
672 |
f7fbe624fff7
added helper functions for categorical variables
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1135
diff
changeset
|
673 def categoricalCorrelationMatrix(data, categoricalVariables): |
f7fbe624fff7
added helper functions for categorical variables
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1135
diff
changeset
|
674 'Returns correlation matrix for the categorical variables' |
f7fbe624fff7
added helper functions for categorical variables
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1135
diff
changeset
|
675 corr = np.ones((len(categoricalVariables), len(categoricalVariables))) |
f7fbe624fff7
added helper functions for categorical variables
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1135
diff
changeset
|
676 for i in range(len(categoricalVariables)): |
f7fbe624fff7
added helper functions for categorical variables
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1135
diff
changeset
|
677 for j in range(i): |
f7fbe624fff7
added helper functions for categorical variables
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1135
diff
changeset
|
678 corr[i,j] = utils.cramers_v(petDf[categoricalVariables[i]], petDf[categoricalVariables[j]]) |
f7fbe624fff7
added helper functions for categorical variables
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1135
diff
changeset
|
679 corr[j,i] = corr[i,j] |
f7fbe624fff7
added helper functions for categorical variables
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1135
diff
changeset
|
680 return corr |
f7fbe624fff7
added helper functions for categorical variables
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1135
diff
changeset
|
681 |
667
179b81faa1f8
added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
665
diff
changeset
|
682 ######################### |
179b81faa1f8
added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
665
diff
changeset
|
683 # regression analysis using statsmodels (and pandas) |
179b81faa1f8
added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
665
diff
changeset
|
684 ######################### |
179b81faa1f8
added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
665
diff
changeset
|
685 |
179b81faa1f8
added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
665
diff
changeset
|
686 # TODO make class for experiments? |
668
f8dcf483b296
code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
667
diff
changeset
|
687 # TODO add tests with public dataset downloaded from Internet (IRIS et al) |
667
179b81faa1f8
added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
665
diff
changeset
|
688 def modelString(experiment, dependentVariable, independentVariables): |
179b81faa1f8
added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
665
diff
changeset
|
689 return dependentVariable+' ~ '+' + '.join([independentVariable for independentVariable in independentVariables if experiment[independentVariable]]) |
179b81faa1f8
added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
665
diff
changeset
|
690 |
179b81faa1f8
added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
665
diff
changeset
|
691 def runModel(experiment, data, dependentVariable, independentVariables, regressionType = 'ols'): |
179b81faa1f8
added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
665
diff
changeset
|
692 import statsmodels.formula.api as smf |
179b81faa1f8
added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
665
diff
changeset
|
693 modelStr = modelString(experiment, dependentVariable, independentVariables) |
179b81faa1f8
added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
665
diff
changeset
|
694 if regressionType == 'ols': |
179b81faa1f8
added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
665
diff
changeset
|
695 model = smf.ols(modelStr, data = data) |
179b81faa1f8
added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
665
diff
changeset
|
696 elif regressionType == 'gls': |
179b81faa1f8
added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
665
diff
changeset
|
697 model = smf.gls(modelStr, data = data) |
179b81faa1f8
added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
665
diff
changeset
|
698 elif regressionType == 'rlm': |
179b81faa1f8
added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
665
diff
changeset
|
699 model = smf.rlm(modelStr, data = data) |
179b81faa1f8
added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
665
diff
changeset
|
700 else: |
179b81faa1f8
added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
665
diff
changeset
|
701 print('Unknown regression type {}. Exiting'.format(regressionType)) |
179b81faa1f8
added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
665
diff
changeset
|
702 import sys |
179b81faa1f8
added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
665
diff
changeset
|
703 sys.exit() |
179b81faa1f8
added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
665
diff
changeset
|
704 return model.fit() |
179b81faa1f8
added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
665
diff
changeset
|
705 |
179b81faa1f8
added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
665
diff
changeset
|
706 def runModels(experiments, data, dependentVariable, independentVariables, regressionType = 'ols'): |
179b81faa1f8
added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
665
diff
changeset
|
707 '''Runs several models and stores 3 statistics |
179b81faa1f8
added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
665
diff
changeset
|
708 adjusted R2, condition number (should be small, eg < 1000) |
179b81faa1f8
added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
665
diff
changeset
|
709 and p-value for Shapiro-Wilk test of residual normality''' |
179b81faa1f8
added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
665
diff
changeset
|
710 for i,experiment in experiments.iterrows(): |
179b81faa1f8
added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
665
diff
changeset
|
711 if experiment[independentVariables].any(): |
179b81faa1f8
added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
665
diff
changeset
|
712 results = runModel(experiment, data, dependentVariable, independentVariables, regressionType = 'ols') |
179b81faa1f8
added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
665
diff
changeset
|
713 experiments.loc[i,'r2adj'] = results.rsquared_adj |
179b81faa1f8
added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
665
diff
changeset
|
714 experiments.loc[i,'condNum'] = results.condition_number |
179b81faa1f8
added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
665
diff
changeset
|
715 experiments.loc[i, 'shapiroP'] = shapiro(results.resid)[1] |
668
f8dcf483b296
code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
667
diff
changeset
|
716 experiments.loc[i,'nobs'] = int(results.nobs) |
667
179b81faa1f8
added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
665
diff
changeset
|
717 return experiments |
179b81faa1f8
added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
665
diff
changeset
|
718 |
179b81faa1f8
added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
665
diff
changeset
|
719 def generateExperiments(independentVariables): |
179b81faa1f8
added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
665
diff
changeset
|
720 '''Generates all possible models for including or not each independent variable''' |
179b81faa1f8
added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
665
diff
changeset
|
721 experiments = {} |
179b81faa1f8
added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
665
diff
changeset
|
722 nIndependentVariables = len(independentVariables) |
669 | 723 if nIndependentVariables != len(set(independentVariables)): |
667
179b81faa1f8
added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
665
diff
changeset
|
724 print("Duplicate variables. Exiting") |
179b81faa1f8
added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
665
diff
changeset
|
725 import sys |
179b81faa1f8
added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
665
diff
changeset
|
726 sys.exit() |
179b81faa1f8
added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
665
diff
changeset
|
727 nModels = 2**nIndependentVariables |
179b81faa1f8
added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
665
diff
changeset
|
728 for i,var in enumerate(independentVariables): |
179b81faa1f8
added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
665
diff
changeset
|
729 pattern = [False]*(2**i)+[True]*(2**i) |
179b81faa1f8
added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
665
diff
changeset
|
730 experiments[var] = pattern*(2**(nIndependentVariables-i-1)) |
670
f72ed51c6b65
corrected other missing imports
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
669
diff
changeset
|
731 experiments = DataFrame(experiments) |
667
179b81faa1f8
added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
665
diff
changeset
|
732 experiments['r2adj'] = 0. |
677
ae07c7b4cf87
update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
676
diff
changeset
|
733 experiments['condNum'] = NaN |
667
179b81faa1f8
added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
665
diff
changeset
|
734 experiments['shapiroP'] = -1 |
668
f8dcf483b296
code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
667
diff
changeset
|
735 experiments['nobs'] = -1 |
667
179b81faa1f8
added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
665
diff
changeset
|
736 return experiments |
179b81faa1f8
added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
665
diff
changeset
|
737 |
179b81faa1f8
added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
665
diff
changeset
|
738 def findBestModel(data, dependentVariable, independentVariables, regressionType = 'ols', nProcesses = 1): |
179b81faa1f8
added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
665
diff
changeset
|
739 '''Generates all possible model with the independentVariables |
179b81faa1f8
added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
665
diff
changeset
|
740 and runs them, saving the results in experiments |
179b81faa1f8
added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
665
diff
changeset
|
741 with multiprocess option''' |
179b81faa1f8
added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
665
diff
changeset
|
742 experiments = generateExperiments(independentVariables) |
179b81faa1f8
added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
665
diff
changeset
|
743 nModels = len(experiments) |
179b81faa1f8
added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
665
diff
changeset
|
744 print("Running {} models with {} processes".format(nModels, nProcesses)) |
674
01b89182891a
corrected bug for intersection of lines (thanks to Paul for finding)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
672
diff
changeset
|
745 print("IndependentVariables: {}".format(independentVariables)) |
667
179b81faa1f8
added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
665
diff
changeset
|
746 if nProcesses == 1: |
179b81faa1f8
added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
665
diff
changeset
|
747 return runModels(experiments, data, dependentVariable, independentVariables, regressionType) |
179b81faa1f8
added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
665
diff
changeset
|
748 else: |
179b81faa1f8
added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
665
diff
changeset
|
749 pool = Pool(processes = nProcesses) |
670
f72ed51c6b65
corrected other missing imports
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
669
diff
changeset
|
750 chunkSize = int(ceil(nModels/nProcesses)) |
667
179b81faa1f8
added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
665
diff
changeset
|
751 jobs = [pool.apply_async(runModels, args = (experiments[i*chunkSize:(i+1)*chunkSize], data, dependentVariable, independentVariables, regressionType)) for i in range(nProcesses)] |
670
f72ed51c6b65
corrected other missing imports
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
669
diff
changeset
|
752 return concat([job.get() for job in jobs]) |
667
179b81faa1f8
added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
665
diff
changeset
|
753 |
179b81faa1f8
added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
665
diff
changeset
|
754 def findBestModelFwd(data, dependentVariable, independentVariables, modelFunc, experiments = None): |
179b81faa1f8
added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
665
diff
changeset
|
755 '''Forward search for best model (based on adjusted R2) |
179b81faa1f8
added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
665
diff
changeset
|
756 Randomly starting with one variable and adding randomly variables |
179b81faa1f8
added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
665
diff
changeset
|
757 if they improve the model |
179b81faa1f8
added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
665
diff
changeset
|
758 |
179b81faa1f8
added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
665
diff
changeset
|
759 The results are added to experiments if provided as argument |
179b81faa1f8
added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
665
diff
changeset
|
760 Storing in experiment relies on the index being the number equal |
179b81faa1f8
added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
665
diff
changeset
|
761 to the binary code derived from the independent variables''' |
179b81faa1f8
added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
665
diff
changeset
|
762 if experiments is None: |
179b81faa1f8
added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
665
diff
changeset
|
763 experiments = generateExperiments(independentVariables) |
179b81faa1f8
added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
665
diff
changeset
|
764 nIndependentVariables = len(independentVariables) |
997
4f3387a242a1
updated utils to python 3
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
990
diff
changeset
|
765 permutation = nppermutation(list(range(nIndependentVariables))) |
667
179b81faa1f8
added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
665
diff
changeset
|
766 variableMapping = {j: independentVariables[i] for i,j in enumerate(permutation)} |
997
4f3387a242a1
updated utils to python 3
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
990
diff
changeset
|
767 print('Tested variables '+', '.join([variableMapping[i] for i in range(nIndependentVariables)])) |
667
179b81faa1f8
added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
665
diff
changeset
|
768 bestModel = [False]*nIndependentVariables |
179b81faa1f8
added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
665
diff
changeset
|
769 currentVarNum = 0 |
179b81faa1f8
added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
665
diff
changeset
|
770 currentR2Adj = 0. |
997
4f3387a242a1
updated utils to python 3
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
990
diff
changeset
|
771 for currentVarNum in range(nIndependentVariables): |
667
179b81faa1f8
added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
665
diff
changeset
|
772 currentModel = [i for i in bestModel] |
179b81faa1f8
added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
665
diff
changeset
|
773 currentModel[currentVarNum] = True |
997
4f3387a242a1
updated utils to python 3
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
990
diff
changeset
|
774 rowIdx = sum([0]+[2**i for i in range(nIndependentVariables) if currentModel[permutation[i]]]) |
4f3387a242a1
updated utils to python 3
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
990
diff
changeset
|
775 #print currentVarNum, sum(currentModel), ', '.join([independentVariables[i] for i in range(nIndependentVariables) if currentModel[permutation[i]]]) |
667
179b81faa1f8
added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
665
diff
changeset
|
776 if experiments.loc[rowIdx, 'shapiroP'] < 0: |
179b81faa1f8
added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
665
diff
changeset
|
777 modelStr = modelString(experiments.loc[rowIdx], dependentVariable, independentVariables) |
179b81faa1f8
added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
665
diff
changeset
|
778 model = modelFunc(modelStr, data = data) |
179b81faa1f8
added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
665
diff
changeset
|
779 results = model.fit() |
179b81faa1f8
added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
665
diff
changeset
|
780 experiments.loc[rowIdx, 'r2adj'] = results.rsquared_adj |
179b81faa1f8
added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
665
diff
changeset
|
781 experiments.loc[rowIdx, 'condNum'] = results.condition_number |
179b81faa1f8
added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
665
diff
changeset
|
782 experiments.loc[rowIdx, 'shapiroP'] = shapiro(results.resid)[1] |
668
f8dcf483b296
code to prepare regression variables (remove correlated variables) and record dataset size in experimnets
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
667
diff
changeset
|
783 experiments.loc[rowIdx, 'nobs'] = int(results.nobs) |
667
179b81faa1f8
added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
665
diff
changeset
|
784 if currentR2Adj < experiments.loc[rowIdx, 'r2adj']: |
179b81faa1f8
added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
665
diff
changeset
|
785 currentR2Adj = experiments.loc[rowIdx, 'r2adj'] |
179b81faa1f8
added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
665
diff
changeset
|
786 bestModel[currentVarNum] = True |
179b81faa1f8
added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
665
diff
changeset
|
787 return experiments |
179b81faa1f8
added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
665
diff
changeset
|
788 |
677
ae07c7b4cf87
update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
676
diff
changeset
|
789 def displayModelResults(results, model = None, plotFigures = True, filenamePrefix = None, figureFileType = 'pdf', text = {'title-shapiro': 'Shapiro-Wilk normality test for residuals: {:.2f} (p={:.3f})', 'true-predicted.xlabel': 'Predicted values', 'true-predicted.ylabel': 'True values', 'residuals-predicted.xlabel': 'Predicted values', 'residuals-predicted.ylabel': 'Residuals'}): |
667
179b81faa1f8
added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
665
diff
changeset
|
790 import statsmodels.api as sm |
677
ae07c7b4cf87
update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
676
diff
changeset
|
791 '''Displays some model results |
ae07c7b4cf87
update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
676
diff
changeset
|
792 |
ae07c7b4cf87
update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
676
diff
changeset
|
793 3 graphics, true-predicted, residuals-predicted, ''' |
676 | 794 print(results.summary()) |
677
ae07c7b4cf87
update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
676
diff
changeset
|
795 shapiroResult = shapiro(results.resid) |
ae07c7b4cf87
update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
676
diff
changeset
|
796 print(shapiroResult) |
676 | 797 if plotFigures: |
677
ae07c7b4cf87
update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
676
diff
changeset
|
798 fig = plt.figure(figsize=(7,6.3*(2+int(model is not None)))) |
676 | 799 if model is not None: |
677
ae07c7b4cf87
update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
676
diff
changeset
|
800 ax = fig.add_subplot(3,1,1) |
676 | 801 plt.plot(results.predict(), model.endog, 'x') |
802 x=plt.xlim() | |
803 y=plt.ylim() | |
804 plt.plot([max(x[0], y[0]), min(x[1], y[1])], [max(x[0], y[0]), min(x[1], y[1])], 'r') | |
677
ae07c7b4cf87
update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
676
diff
changeset
|
805 #plt.axis('equal') |
ae07c7b4cf87
update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
676
diff
changeset
|
806 if text is not None: |
ae07c7b4cf87
update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
676
diff
changeset
|
807 plt.title(text['title-shapiro'].format(*shapiroResult)) |
ae07c7b4cf87
update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
676
diff
changeset
|
808 #plt.title(text['true-predicted.title']) |
ae07c7b4cf87
update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
676
diff
changeset
|
809 plt.xlabel(text['true-predicted.xlabel']) |
ae07c7b4cf87
update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
676
diff
changeset
|
810 plt.ylabel(text['true-predicted.ylabel']) |
ae07c7b4cf87
update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
676
diff
changeset
|
811 fig.add_subplot(3,1,2, sharex = ax) |
ae07c7b4cf87
update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
676
diff
changeset
|
812 plt.plot(results.predict(), results.resid, 'x') |
ae07c7b4cf87
update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
676
diff
changeset
|
813 nextSubplotNum = 3 |
ae07c7b4cf87
update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
676
diff
changeset
|
814 else: |
ae07c7b4cf87
update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
676
diff
changeset
|
815 fig.add_subplot(2,1,1) |
ae07c7b4cf87
update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
676
diff
changeset
|
816 plt.plot(results.predict(), results.resid, 'x') |
ae07c7b4cf87
update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
676
diff
changeset
|
817 nextSubplotNum = 2 |
ae07c7b4cf87
update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
676
diff
changeset
|
818 if text is not None: |
ae07c7b4cf87
update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
676
diff
changeset
|
819 if model is None: |
ae07c7b4cf87
update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
676
diff
changeset
|
820 plt.title(text['title-shapiro'].format(*shapiroResult)) |
ae07c7b4cf87
update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
676
diff
changeset
|
821 plt.xlabel(text['residuals-predicted.xlabel']) |
ae07c7b4cf87
update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
676
diff
changeset
|
822 plt.ylabel(text['residuals-predicted.ylabel']) |
ae07c7b4cf87
update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
676
diff
changeset
|
823 qqAx = fig.add_subplot(nextSubplotNum,1,nextSubplotNum) |
ae07c7b4cf87
update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
676
diff
changeset
|
824 sm.qqplot(results.resid, fit = True, line = '45', ax = qqAx) |
ae07c7b4cf87
update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
676
diff
changeset
|
825 plt.axis('equal') |
ae07c7b4cf87
update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
676
diff
changeset
|
826 if text is not None and 'qqplot.xlabel' in text: |
ae07c7b4cf87
update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
676
diff
changeset
|
827 plt.xlabel(text['qqplot.xlabel']) |
ae07c7b4cf87
update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
676
diff
changeset
|
828 plt.ylabel(text['qqplot.ylabel']) |
ae07c7b4cf87
update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
676
diff
changeset
|
829 plt.tight_layout() |
676 | 830 if filenamePrefix is not None: |
677
ae07c7b4cf87
update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
676
diff
changeset
|
831 out = openCheck(filenamePrefix+'-coefficients.html', 'w') |
ae07c7b4cf87
update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
676
diff
changeset
|
832 out.write(results.summary().as_html()) |
ae07c7b4cf87
update to utils for pavement results
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
676
diff
changeset
|
833 plt.savefig(filenamePrefix+'-model-results.'+figureFileType) |
667
179b81faa1f8
added regression analysis functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
665
diff
changeset
|
834 |
27
44689029a86f
updated segmentIntersection and other
Nicolas Saunier <nico@confins.net>
parents:
24
diff
changeset
|
835 ######################### |
455
abe0b2347d4c
added most common utility function
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
434
diff
changeset
|
836 # iterable section |
abe0b2347d4c
added most common utility function
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
434
diff
changeset
|
837 ######################### |
abe0b2347d4c
added most common utility function
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
434
diff
changeset
|
838 |
1034
4069d8545922
updated mostCommong function
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1031
diff
changeset
|
839 def mostCommon(l): |
456 | 840 '''Returns the most frequent element in a iterable |
1034
4069d8545922
updated mostCommong function
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1031
diff
changeset
|
841 The element must be hashable |
456 | 842 |
1034
4069d8545922
updated mostCommong function
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1031
diff
changeset
|
843 new version from https://stackoverflow.com/questions/41612368/find-most-common-element |
4069d8545922
updated mostCommong function
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1031
diff
changeset
|
844 previous version from from http://stackoverflow.com/questions/1518522/python-most-common-element-in-a-list''' |
4069d8545922
updated mostCommong function
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1031
diff
changeset
|
845 return Counter(l).most_common(1)[0][0] |
4069d8545922
updated mostCommong function
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1031
diff
changeset
|
846 |
455
abe0b2347d4c
added most common utility function
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
434
diff
changeset
|
847 ######################### |
370
97e8fa0ee9a1
work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
369
diff
changeset
|
848 # sequence section |
97e8fa0ee9a1
work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
369
diff
changeset
|
849 ######################### |
97e8fa0ee9a1
work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
369
diff
changeset
|
850 |
665
15e244d2a1b5
corrected bug with circular import for VideoFilenameAddable, moved to base module
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
659
diff
changeset
|
851 class LCSS(object): |
370
97e8fa0ee9a1
work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
369
diff
changeset
|
852 '''Class that keeps the LCSS parameters |
686
cdee6a3a47b4
allowing alternate database and filename for classify-objects
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
680
diff
changeset
|
853 and puts together the various computations |
cdee6a3a47b4
allowing alternate database and filename for classify-objects
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
680
diff
changeset
|
854 |
cdee6a3a47b4
allowing alternate database and filename for classify-objects
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
680
diff
changeset
|
855 the methods with names starting with _ are not to be shadowed |
cdee6a3a47b4
allowing alternate database and filename for classify-objects
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
680
diff
changeset
|
856 in child classes, who will shadow the other methods, |
cdee6a3a47b4
allowing alternate database and filename for classify-objects
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
680
diff
changeset
|
857 ie compute and computeXX methods''' |
689
9990ef119bce
added version of LCSS with cdist computations
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
687
diff
changeset
|
858 def __init__(self, similarityFunc = None, metric = None, epsilon = None, delta = float('inf'), aligned = False, lengthFunc = min): |
9990ef119bce
added version of LCSS with cdist computations
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
687
diff
changeset
|
859 '''One should provide either a similarity function |
9990ef119bce
added version of LCSS with cdist computations
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
687
diff
changeset
|
860 that indicates (return bool) whether elements in the compares lists are similar |
9990ef119bce
added version of LCSS with cdist computations
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
687
diff
changeset
|
861 |
9990ef119bce
added version of LCSS with cdist computations
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
687
diff
changeset
|
862 eg distance(p1, p2) < epsilon |
9990ef119bce
added version of LCSS with cdist computations
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
687
diff
changeset
|
863 |
9990ef119bce
added version of LCSS with cdist computations
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
687
diff
changeset
|
864 or a type of metric usable in scipy.spatial.distance.cdist with an epsilon''' |
9990ef119bce
added version of LCSS with cdist computations
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
687
diff
changeset
|
865 if similarityFunc is None and metric is None: |
9990ef119bce
added version of LCSS with cdist computations
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
687
diff
changeset
|
866 print("No way to compute LCSS, similarityFunc and metric are None. Exiting") |
9990ef119bce
added version of LCSS with cdist computations
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
687
diff
changeset
|
867 import sys |
9990ef119bce
added version of LCSS with cdist computations
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
687
diff
changeset
|
868 sys.exit() |
9990ef119bce
added version of LCSS with cdist computations
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
687
diff
changeset
|
869 elif metric is not None and epsilon is None: |
9990ef119bce
added version of LCSS with cdist computations
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
687
diff
changeset
|
870 print("Please provide a value for epsilon if using a cdist metric. Exiting") |
9990ef119bce
added version of LCSS with cdist computations
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
687
diff
changeset
|
871 import sys |
9990ef119bce
added version of LCSS with cdist computations
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
687
diff
changeset
|
872 sys.exit() |
9990ef119bce
added version of LCSS with cdist computations
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
687
diff
changeset
|
873 else: |
741 | 874 if similarityFunc is None and metric is not None and not isinf(delta): |
737
fb60b54e1041
added warning for finite delta
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
733
diff
changeset
|
875 print('Warning: you are using a cdist metric and a finite delta, which will make probably computation slower than using the equivalent similarityFunc (since all pairwise distances will be computed by cdist).') |
689
9990ef119bce
added version of LCSS with cdist computations
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
687
diff
changeset
|
876 self.similarityFunc = similarityFunc |
9990ef119bce
added version of LCSS with cdist computations
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
687
diff
changeset
|
877 self.metric = metric |
9990ef119bce
added version of LCSS with cdist computations
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
687
diff
changeset
|
878 self.epsilon = epsilon |
9990ef119bce
added version of LCSS with cdist computations
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
687
diff
changeset
|
879 self.aligned = aligned |
9990ef119bce
added version of LCSS with cdist computations
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
687
diff
changeset
|
880 self.delta = delta |
9990ef119bce
added version of LCSS with cdist computations
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
687
diff
changeset
|
881 self.lengthFunc = lengthFunc |
9990ef119bce
added version of LCSS with cdist computations
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
687
diff
changeset
|
882 self.subSequenceIndices = [(0,0)] |
370
97e8fa0ee9a1
work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
369
diff
changeset
|
883 |
373
d0b86ed50f32
work in progress on LCSS
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
372
diff
changeset
|
884 def similarities(self, l1, l2, jshift=0): |
370
97e8fa0ee9a1
work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
369
diff
changeset
|
885 n1 = len(l1) |
97e8fa0ee9a1
work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
369
diff
changeset
|
886 n2 = len(l2) |
97e8fa0ee9a1
work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
369
diff
changeset
|
887 self.similarityTable = zeros((n1+1,n2+1), dtype = npint) |
689
9990ef119bce
added version of LCSS with cdist computations
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
687
diff
changeset
|
888 if self.similarityFunc is not None: |
997
4f3387a242a1
updated utils to python 3
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
990
diff
changeset
|
889 for i in range(1,n1+1): |
4f3387a242a1
updated utils to python 3
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
990
diff
changeset
|
890 for j in range(max(1,i-jshift-self.delta),min(n2,i-jshift+self.delta)+1): |
689
9990ef119bce
added version of LCSS with cdist computations
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
687
diff
changeset
|
891 if self.similarityFunc(l1[i-1], l2[j-1]): |
9990ef119bce
added version of LCSS with cdist computations
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
687
diff
changeset
|
892 self.similarityTable[i,j] = self.similarityTable[i-1,j-1]+1 |
9990ef119bce
added version of LCSS with cdist computations
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
687
diff
changeset
|
893 else: |
9990ef119bce
added version of LCSS with cdist computations
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
687
diff
changeset
|
894 self.similarityTable[i,j] = max(self.similarityTable[i-1,j], self.similarityTable[i,j-1]) |
9990ef119bce
added version of LCSS with cdist computations
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
687
diff
changeset
|
895 elif self.metric is not None: |
9990ef119bce
added version of LCSS with cdist computations
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
687
diff
changeset
|
896 similarElements = distance.cdist(l1, l2, self.metric) <= self.epsilon |
997
4f3387a242a1
updated utils to python 3
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
990
diff
changeset
|
897 for i in range(1,n1+1): |
4f3387a242a1
updated utils to python 3
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
990
diff
changeset
|
898 for j in range(max(1,i-jshift-self.delta),min(n2,i-jshift+self.delta)+1): |
689
9990ef119bce
added version of LCSS with cdist computations
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
687
diff
changeset
|
899 if similarElements[i-1, j-1]: |
9990ef119bce
added version of LCSS with cdist computations
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
687
diff
changeset
|
900 self.similarityTable[i,j] = self.similarityTable[i-1,j-1]+1 |
9990ef119bce
added version of LCSS with cdist computations
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
687
diff
changeset
|
901 else: |
9990ef119bce
added version of LCSS with cdist computations
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
687
diff
changeset
|
902 self.similarityTable[i,j] = max(self.similarityTable[i-1,j], self.similarityTable[i,j-1]) |
9990ef119bce
added version of LCSS with cdist computations
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
687
diff
changeset
|
903 |
370
97e8fa0ee9a1
work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
369
diff
changeset
|
904 |
97e8fa0ee9a1
work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
369
diff
changeset
|
905 def subSequence(self, i, j): |
97e8fa0ee9a1
work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
369
diff
changeset
|
906 '''Returns the subsequence of two sequences |
97e8fa0ee9a1
work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
369
diff
changeset
|
907 http://en.wikipedia.org/wiki/Longest_common_subsequence_problem''' |
97e8fa0ee9a1
work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
369
diff
changeset
|
908 if i == 0 or j == 0: |
97e8fa0ee9a1
work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
369
diff
changeset
|
909 return [] |
97e8fa0ee9a1
work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
369
diff
changeset
|
910 elif self.similarityTable[i][j] == self.similarityTable[i][j-1]: |
97e8fa0ee9a1
work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
369
diff
changeset
|
911 return self.subSequence(i, j-1) |
97e8fa0ee9a1
work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
369
diff
changeset
|
912 elif self.similarityTable[i][j] == self.similarityTable[i-1][j]: |
97e8fa0ee9a1
work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
369
diff
changeset
|
913 return self.subSequence(i-1, j) |
97e8fa0ee9a1
work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
369
diff
changeset
|
914 else: |
97e8fa0ee9a1
work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
369
diff
changeset
|
915 return self.subSequence(i-1, j-1) + [(i-1,j-1)] |
97e8fa0ee9a1
work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
369
diff
changeset
|
916 |
373
d0b86ed50f32
work in progress on LCSS
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
372
diff
changeset
|
917 def _compute(self, _l1, _l2, computeSubSequence = False): |
370
97e8fa0ee9a1
work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
369
diff
changeset
|
918 '''returns the longest common subsequence similarity |
689
9990ef119bce
added version of LCSS with cdist computations
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
687
diff
changeset
|
919 l1 and l2 should be the right format |
9990ef119bce
added version of LCSS with cdist computations
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
687
diff
changeset
|
920 eg list of tuple points for cdist |
9990ef119bce
added version of LCSS with cdist computations
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
687
diff
changeset
|
921 or elements that can be compare using similarityFunc |
370
97e8fa0ee9a1
work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
369
diff
changeset
|
922 |
607 | 923 if aligned, returns the best matching if using a finite delta by shifting the series alignments |
370
97e8fa0ee9a1
work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
369
diff
changeset
|
924 ''' |
372
349eb1e09f45
Cleaned the methods/functions indicating if a point is in a polygon
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
371
diff
changeset
|
925 if len(_l2) < len(_l1): # l1 is the shortest |
349eb1e09f45
Cleaned the methods/functions indicating if a point is in a polygon
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
371
diff
changeset
|
926 l1 = _l2 |
349eb1e09f45
Cleaned the methods/functions indicating if a point is in a polygon
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
371
diff
changeset
|
927 l2 = _l1 |
349eb1e09f45
Cleaned the methods/functions indicating if a point is in a polygon
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
371
diff
changeset
|
928 revertIndices = True |
349eb1e09f45
Cleaned the methods/functions indicating if a point is in a polygon
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
371
diff
changeset
|
929 else: |
349eb1e09f45
Cleaned the methods/functions indicating if a point is in a polygon
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
371
diff
changeset
|
930 l1 = _l1 |
349eb1e09f45
Cleaned the methods/functions indicating if a point is in a polygon
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
371
diff
changeset
|
931 l2 = _l2 |
349eb1e09f45
Cleaned the methods/functions indicating if a point is in a polygon
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
371
diff
changeset
|
932 revertIndices = False |
349eb1e09f45
Cleaned the methods/functions indicating if a point is in a polygon
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
371
diff
changeset
|
933 n1 = len(l1) |
349eb1e09f45
Cleaned the methods/functions indicating if a point is in a polygon
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
371
diff
changeset
|
934 n2 = len(l2) |
349eb1e09f45
Cleaned the methods/functions indicating if a point is in a polygon
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
371
diff
changeset
|
935 |
370
97e8fa0ee9a1
work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
369
diff
changeset
|
936 if self.aligned: |
97e8fa0ee9a1
work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
369
diff
changeset
|
937 lcssValues = {} |
97e8fa0ee9a1
work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
369
diff
changeset
|
938 similarityTables = {} |
997
4f3387a242a1
updated utils to python 3
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
990
diff
changeset
|
939 for i in range(-n2-self.delta+1, n1+self.delta): # interval such that [i-shift-delta, i-shift+delta] is never empty, which happens when i-shift+delta < 1 or when i-shift-delta > n2 |
373
d0b86ed50f32
work in progress on LCSS
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
372
diff
changeset
|
940 self.similarities(l1, l2, i) |
d0b86ed50f32
work in progress on LCSS
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
372
diff
changeset
|
941 lcssValues[i] = self.similarityTable.max() |
370
97e8fa0ee9a1
work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
369
diff
changeset
|
942 similarityTables[i] = self.similarityTable |
374
a7af3519687e
finished implementation of aligned LCSS with matching sequence decoded
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
373
diff
changeset
|
943 #print self.similarityTable |
521
3707eeb20f25
changed argMaxDict name to argmaxDict
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
518
diff
changeset
|
944 alignmentShift = argmaxDict(lcssValues) # ideally get the medium alignment shift, the one that minimizes distance |
389
6d26dcc7bba0
modifications to compute alignment for None indicators
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
381
diff
changeset
|
945 self.similarityTable = similarityTables[alignmentShift] |
372
349eb1e09f45
Cleaned the methods/functions indicating if a point is in a polygon
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
371
diff
changeset
|
946 else: |
389
6d26dcc7bba0
modifications to compute alignment for None indicators
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
381
diff
changeset
|
947 alignmentShift = 0 |
372
349eb1e09f45
Cleaned the methods/functions indicating if a point is in a polygon
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
371
diff
changeset
|
948 self.similarities(l1, l2) |
373
d0b86ed50f32
work in progress on LCSS
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
372
diff
changeset
|
949 |
374
a7af3519687e
finished implementation of aligned LCSS with matching sequence decoded
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
373
diff
changeset
|
950 # threshold values for the useful part of the similarity table are n2-n1-delta and n1-n2-delta |
389
6d26dcc7bba0
modifications to compute alignment for None indicators
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
381
diff
changeset
|
951 self.similarityTable = self.similarityTable[:min(n1, n2+alignmentShift+self.delta)+1, :min(n2, n1-alignmentShift+self.delta)+1] |
373
d0b86ed50f32
work in progress on LCSS
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
372
diff
changeset
|
952 |
372
349eb1e09f45
Cleaned the methods/functions indicating if a point is in a polygon
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
371
diff
changeset
|
953 if computeSubSequence: |
349eb1e09f45
Cleaned the methods/functions indicating if a point is in a polygon
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
371
diff
changeset
|
954 self.subSequenceIndices = self.subSequence(self.similarityTable.shape[0]-1, self.similarityTable.shape[1]-1) |
371
924e38c9f70e
work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
370
diff
changeset
|
955 if revertIndices: |
374
a7af3519687e
finished implementation of aligned LCSS with matching sequence decoded
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
373
diff
changeset
|
956 self.subSequenceIndices = [(j,i) for i,j in self.subSequenceIndices] |
372
349eb1e09f45
Cleaned the methods/functions indicating if a point is in a polygon
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
371
diff
changeset
|
957 return self.similarityTable[-1,-1] |
370
97e8fa0ee9a1
work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
369
diff
changeset
|
958 |
97e8fa0ee9a1
work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
369
diff
changeset
|
959 def compute(self, l1, l2, computeSubSequence = False): |
97e8fa0ee9a1
work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
369
diff
changeset
|
960 '''get methods are to be shadowed in child classes ''' |
97e8fa0ee9a1
work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
369
diff
changeset
|
961 return self._compute(l1, l2, computeSubSequence) |
97e8fa0ee9a1
work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
369
diff
changeset
|
962 |
375
2ea8584aa80a
making indicator LCSS work
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
374
diff
changeset
|
963 def computeAlignment(self): |
374
a7af3519687e
finished implementation of aligned LCSS with matching sequence decoded
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
373
diff
changeset
|
964 return mean([j-i for i,j in self.subSequenceIndices]) |
370
97e8fa0ee9a1
work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
369
diff
changeset
|
965 |
376
2e6b8610bcaa
work on indicator similarity
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
375
diff
changeset
|
966 def _computeNormalized(self, l1, l2, computeSubSequence = False): |
370
97e8fa0ee9a1
work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
369
diff
changeset
|
967 ''' compute the normalized LCSS |
97e8fa0ee9a1
work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
369
diff
changeset
|
968 ie, the LCSS divided by the min or mean of the indicator lengths (using lengthFunc) |
97e8fa0ee9a1
work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
369
diff
changeset
|
969 lengthFunc = lambda x,y:float(x,y)/2''' |
376
2e6b8610bcaa
work on indicator similarity
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
375
diff
changeset
|
970 return float(self._compute(l1, l2, computeSubSequence))/self.lengthFunc(len(l1), len(l2)) |
370
97e8fa0ee9a1
work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
369
diff
changeset
|
971 |
376
2e6b8610bcaa
work on indicator similarity
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
375
diff
changeset
|
972 def computeNormalized(self, l1, l2, computeSubSequence = False): |
2e6b8610bcaa
work on indicator similarity
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
375
diff
changeset
|
973 return self._computeNormalized(l1, l2, computeSubSequence) |
370
97e8fa0ee9a1
work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
369
diff
changeset
|
974 |
376
2e6b8610bcaa
work on indicator similarity
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
375
diff
changeset
|
975 def _computeDistance(self, l1, l2, computeSubSequence = False): |
370
97e8fa0ee9a1
work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
369
diff
changeset
|
976 ''' compute the LCSS distance''' |
376
2e6b8610bcaa
work on indicator similarity
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
375
diff
changeset
|
977 return 1-self._computeNormalized(l1, l2, computeSubSequence) |
370
97e8fa0ee9a1
work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
369
diff
changeset
|
978 |
376
2e6b8610bcaa
work on indicator similarity
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
375
diff
changeset
|
979 def computeDistance(self, l1, l2, computeSubSequence = False): |
2e6b8610bcaa
work on indicator similarity
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
375
diff
changeset
|
980 return self._computeDistance(l1, l2, computeSubSequence) |
370
97e8fa0ee9a1
work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
369
diff
changeset
|
981 |
97e8fa0ee9a1
work in progress for complete alignment
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
369
diff
changeset
|
982 ######################### |
45
74d2de078baf
added colors, linestyles and markers to pick from
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
42
diff
changeset
|
983 # plotting section |
74d2de078baf
added colors, linestyles and markers to pick from
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
42
diff
changeset
|
984 ######################### |
74d2de078baf
added colors, linestyles and markers to pick from
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
42
diff
changeset
|
985 |
940
d8ab183a7351
verified motion prediction with prototypes at constant speed (test needed)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
876
diff
changeset
|
986 def plotPolygon(poly, options = '', **kwargs): |
332
a6ca86107f27
reorganized utils module
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
324
diff
changeset
|
987 'Plots shapely polygon poly' |
940
d8ab183a7351
verified motion prediction with prototypes at constant speed (test needed)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
876
diff
changeset
|
988 x,y = poly.exterior.xy |
1029
c6cf75a2ed08
reorganization of imports
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1028
diff
changeset
|
989 plt.plot(x, y, options, **kwargs) |
332
a6ca86107f27
reorganized utils module
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
324
diff
changeset
|
990 |
324 | 991 def stepPlot(X, firstX, lastX, initialCount = 0, increment = 1): |
992 '''for each value in X, increment by increment the initial count | |
297
f6f423e25c7f
adding function to generate step plots (for cumulative number of vehicles)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
286
diff
changeset
|
993 returns the lists that can be plotted |
324 | 994 to obtain a step plot increasing by one for each value in x, from first to last value |
995 firstX and lastX should be respectively smaller and larger than all elements in X''' | |
297
f6f423e25c7f
adding function to generate step plots (for cumulative number of vehicles)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
286
diff
changeset
|
996 |
f6f423e25c7f
adding function to generate step plots (for cumulative number of vehicles)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
286
diff
changeset
|
997 sortedX = [] |
f6f423e25c7f
adding function to generate step plots (for cumulative number of vehicles)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
286
diff
changeset
|
998 counts = [initialCount] |
f6f423e25c7f
adding function to generate step plots (for cumulative number of vehicles)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
286
diff
changeset
|
999 for x in sorted(X): |
f6f423e25c7f
adding function to generate step plots (for cumulative number of vehicles)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
286
diff
changeset
|
1000 sortedX += [x,x] |
f6f423e25c7f
adding function to generate step plots (for cumulative number of vehicles)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
286
diff
changeset
|
1001 counts.append(counts[-1]) |
324 | 1002 counts.append(counts[-1]+increment) |
297
f6f423e25c7f
adding function to generate step plots (for cumulative number of vehicles)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
286
diff
changeset
|
1003 counts.append(counts[-1]) |
f6f423e25c7f
adding function to generate step plots (for cumulative number of vehicles)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
286
diff
changeset
|
1004 return [firstX]+sortedX+[lastX], counts |
f6f423e25c7f
adding function to generate step plots (for cumulative number of vehicles)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
286
diff
changeset
|
1005 |
665
15e244d2a1b5
corrected bug with circular import for VideoFilenameAddable, moved to base module
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
659
diff
changeset
|
1006 class PlottingPropertyValues(object): |
45
74d2de078baf
added colors, linestyles and markers to pick from
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
42
diff
changeset
|
1007 def __init__(self, values): |
74d2de078baf
added colors, linestyles and markers to pick from
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
42
diff
changeset
|
1008 self.values = values |
74d2de078baf
added colors, linestyles and markers to pick from
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
42
diff
changeset
|
1009 |
116
2bf5b76320c0
moved intersection plotting and added markers for scatter plots
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
115
diff
changeset
|
1010 def __getitem__(self, i): |
45
74d2de078baf
added colors, linestyles and markers to pick from
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
42
diff
changeset
|
1011 return self.values[i%len(self.values)] |
74d2de078baf
added colors, linestyles and markers to pick from
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
42
diff
changeset
|
1012 |
74d2de078baf
added colors, linestyles and markers to pick from
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
42
diff
changeset
|
1013 markers = PlottingPropertyValues(['+', '*', ',', '.', 'x', 'D', 's', 'o']) |
116
2bf5b76320c0
moved intersection plotting and added markers for scatter plots
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
115
diff
changeset
|
1014 scatterMarkers = PlottingPropertyValues(['s','o','^','>','v','<','d','p','h','8','+','x']) |
45
74d2de078baf
added colors, linestyles and markers to pick from
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
42
diff
changeset
|
1015 |
74d2de078baf
added colors, linestyles and markers to pick from
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
42
diff
changeset
|
1016 linestyles = PlottingPropertyValues(['-', '--', '-.', ':']) |
74d2de078baf
added colors, linestyles and markers to pick from
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
42
diff
changeset
|
1017 |
74d2de078baf
added colors, linestyles and markers to pick from
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
42
diff
changeset
|
1018 colors = PlottingPropertyValues('brgmyck') # 'w' |
74d2de078baf
added colors, linestyles and markers to pick from
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
42
diff
changeset
|
1019 |
990 | 1020 def monochromeCycler(withMarker = False): |
1021 from cycler import cycler | |
1022 if withMarker: | |
1023 monochrome = (cycler('color', ['k']) * cycler('linestyle', ['-', '--', ':', '-.']) * cycler('marker', ['^',',', '.'])) | |
1024 else: | |
1025 monochrome = (cycler('color', ['k']) * cycler('linestyle', ['-', '--', ':', '-.'])) | |
1026 plt.rc('axes', prop_cycle=monochrome) | |
1027 | |
115
550556378466
added functionalities to indicator maps
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
86
diff
changeset
|
1028 def plotIndicatorMap(indicatorMap, squareSize, masked = True, defaultValue=-1): |
997
4f3387a242a1
updated utils to python 3
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
990
diff
changeset
|
1029 coords = array(list(indicatorMap.keys())) |
65
75cf537b8d88
moved and generalized map making functions to the library
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
48
diff
changeset
|
1030 minX = min(coords[:,0]) |
75cf537b8d88
moved and generalized map making functions to the library
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
48
diff
changeset
|
1031 minY = min(coords[:,1]) |
75cf537b8d88
moved and generalized map making functions to the library
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
48
diff
changeset
|
1032 X = arange(minX, max(coords[:,0])+1.1)*squareSize |
75cf537b8d88
moved and generalized map making functions to the library
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
48
diff
changeset
|
1033 Y = arange(minY, max(coords[:,1])+1.1)*squareSize |
115
550556378466
added functionalities to indicator maps
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
86
diff
changeset
|
1034 C = defaultValue*ones((len(Y), len(X))) |
997
4f3387a242a1
updated utils to python 3
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
990
diff
changeset
|
1035 for k,v in indicatorMap.items(): |
65
75cf537b8d88
moved and generalized map making functions to the library
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
48
diff
changeset
|
1036 C[k[1]-minY,k[0]-minX] = v |
115
550556378466
added functionalities to indicator maps
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
86
diff
changeset
|
1037 if masked: |
1029
c6cf75a2ed08
reorganization of imports
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1028
diff
changeset
|
1038 plt.pcolor(X, Y, ma.masked_where(C==defaultValue,C)) |
115
550556378466
added functionalities to indicator maps
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
86
diff
changeset
|
1039 else: |
1029
c6cf75a2ed08
reorganization of imports
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1028
diff
changeset
|
1040 plt.pcolor(X, Y, C) |
65
75cf537b8d88
moved and generalized map making functions to the library
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
48
diff
changeset
|
1041 |
45
74d2de078baf
added colors, linestyles and markers to pick from
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
42
diff
changeset
|
1042 ######################### |
637
c9a0b72979fd
added function to get canadian public weather data
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
615
diff
changeset
|
1043 # Data download |
c9a0b72979fd
added function to get canadian public weather data
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
615
diff
changeset
|
1044 ######################### |
c9a0b72979fd
added function to get canadian public weather data
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
615
diff
changeset
|
1045 |
c9a0b72979fd
added function to get canadian public weather data
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
615
diff
changeset
|
1046 def downloadECWeather(stationID, years, months = [], outputDirectoryname = '.', english = True): |
c9a0b72979fd
added function to get canadian public weather data
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
615
diff
changeset
|
1047 '''Downloads monthly weather data from Environment Canada |
c9a0b72979fd
added function to get canadian public weather data
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
615
diff
changeset
|
1048 If month is provided (number 1 to 12), it means hourly data for the whole month |
c9a0b72979fd
added function to get canadian public weather data
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
615
diff
changeset
|
1049 Otherwise, means the data for each day, for the whole year |
c9a0b72979fd
added function to get canadian public weather data
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
615
diff
changeset
|
1050 |
c9a0b72979fd
added function to get canadian public weather data
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
615
diff
changeset
|
1051 Example: MONTREAL MCTAVISH 10761 |
c9a0b72979fd
added function to get canadian public weather data
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
615
diff
changeset
|
1052 MONTREALPIERRE ELLIOTT TRUDEAU INTL A 5415 |
856
e310577cc0b8
updated function (url) for weather data
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
855
diff
changeset
|
1053 see ftp://client_climate@ftp.tor.ec.gc.ca/Pub/Get_More_Data_Plus_de_donnees/Station%20Inventory%20EN.csv |
637
c9a0b72979fd
added function to get canadian public weather data
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
615
diff
changeset
|
1054 |
c9a0b72979fd
added function to get canadian public weather data
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
615
diff
changeset
|
1055 To get daily data for 2010 and 2011, downloadECWeather(10761, [2010,2011], [], '/tmp') |
973 | 1056 To get hourly data for 2009 and 2012, January, March and October, downloadECWeather(10761, [2009,2012], [1,3,10], '/tmp') |
1057 | |
1058 for annee in `seq 2016 2017`;do wget --content-disposition "http://climat.meteo.gc.ca/climate_data/bulk_data_f.html?format=csv&stationID=10761&Year=${annee}&timeframe=2&submit=++T%C3%A9l%C3%A9charger+%0D%0Ades+donn%C3%A9es" ;done | |
1059 for annee in `seq 2016 2017`;do for mois in `seq 1 12`;do wget --content-disposition "http://climat.meteo.gc.ca/climate_data/bulk_data_f.html?format=csv&stationID=10761&Year=${annee}&Month=${mois}&timeframe=1&submit=++T%C3%A9l%C3%A9charger+%0D%0Ades+donn%C3%A9es" ;done;done | |
1060 ''' | |
997
4f3387a242a1
updated utils to python 3
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
990
diff
changeset
|
1061 import urllib.request |
637
c9a0b72979fd
added function to get canadian public weather data
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
615
diff
changeset
|
1062 if english: |
c9a0b72979fd
added function to get canadian public weather data
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
615
diff
changeset
|
1063 language = 'e' |
c9a0b72979fd
added function to get canadian public weather data
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
615
diff
changeset
|
1064 else: |
c9a0b72979fd
added function to get canadian public weather data
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
615
diff
changeset
|
1065 language = 'f' |
c9a0b72979fd
added function to get canadian public weather data
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
615
diff
changeset
|
1066 if len(months) == 0: |
c9a0b72979fd
added function to get canadian public weather data
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
615
diff
changeset
|
1067 timeFrame = 2 |
c9a0b72979fd
added function to get canadian public weather data
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
615
diff
changeset
|
1068 months = [1] |
c9a0b72979fd
added function to get canadian public weather data
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
615
diff
changeset
|
1069 else: |
c9a0b72979fd
added function to get canadian public weather data
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
615
diff
changeset
|
1070 timeFrame = 1 |
c9a0b72979fd
added function to get canadian public weather data
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
615
diff
changeset
|
1071 |
c9a0b72979fd
added function to get canadian public weather data
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
615
diff
changeset
|
1072 for year in years: |
c9a0b72979fd
added function to get canadian public weather data
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
615
diff
changeset
|
1073 for month in months: |
c9a0b72979fd
added function to get canadian public weather data
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
615
diff
changeset
|
1074 outFilename = '{}/{}-{}'.format(outputDirectoryname, stationID, year) |
c9a0b72979fd
added function to get canadian public weather data
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
615
diff
changeset
|
1075 if timeFrame == 1: |
c9a0b72979fd
added function to get canadian public weather data
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
615
diff
changeset
|
1076 outFilename += '-{}-hourly'.format(month) |
c9a0b72979fd
added function to get canadian public weather data
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
615
diff
changeset
|
1077 else: |
c9a0b72979fd
added function to get canadian public weather data
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
615
diff
changeset
|
1078 outFilename += '-daily' |
c9a0b72979fd
added function to get canadian public weather data
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
615
diff
changeset
|
1079 outFilename += '.csv' |
997
4f3387a242a1
updated utils to python 3
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
990
diff
changeset
|
1080 url = urllib.request.urlretrieve('http://climate.weather.gc.ca/climate_data/bulk_data_{}.html?format=csv&stationID={}&Year={}&Month={}&Day=1&timeframe={}&submit=Download+Data'.format(language, stationID, year, month, timeFrame), outFilename) |
637
c9a0b72979fd
added function to get canadian public weather data
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
615
diff
changeset
|
1081 |
c9a0b72979fd
added function to get canadian public weather data
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
615
diff
changeset
|
1082 ######################### |
c9a0b72979fd
added function to get canadian public weather data
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
615
diff
changeset
|
1083 # File I/O |
27
44689029a86f
updated segmentIntersection and other
Nicolas Saunier <nico@confins.net>
parents:
24
diff
changeset
|
1084 ######################### |
24
6fb59cfb201e
first version of segmentIntersection
Nicolas Saunier <nico@confins.net>
parents:
19
diff
changeset
|
1085 |
0
aed8eb63cdde
initial commit with non-functional python code for NGSIM
Nicolas Saunier <nico@confins.net>
parents:
diff
changeset
|
1086 def removeExtension(filename, delimiter = '.'): |
31
c000f37c316d
moved tests to independent file, added chi2 computation
Nicolas Saunier <nico@confins.net>
parents:
29
diff
changeset
|
1087 '''Returns the filename minus the extension (all characters after last .)''' |
0
aed8eb63cdde
initial commit with non-functional python code for NGSIM
Nicolas Saunier <nico@confins.net>
parents:
diff
changeset
|
1088 i = filename.rfind(delimiter) |
aed8eb63cdde
initial commit with non-functional python code for NGSIM
Nicolas Saunier <nico@confins.net>
parents:
diff
changeset
|
1089 if i>0: |
aed8eb63cdde
initial commit with non-functional python code for NGSIM
Nicolas Saunier <nico@confins.net>
parents:
diff
changeset
|
1090 return filename[:i] |
aed8eb63cdde
initial commit with non-functional python code for NGSIM
Nicolas Saunier <nico@confins.net>
parents:
diff
changeset
|
1091 else: |
aed8eb63cdde
initial commit with non-functional python code for NGSIM
Nicolas Saunier <nico@confins.net>
parents:
diff
changeset
|
1092 return filename |
aed8eb63cdde
initial commit with non-functional python code for NGSIM
Nicolas Saunier <nico@confins.net>
parents:
diff
changeset
|
1093 |
969
5d788d2e8ffc
work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
940
diff
changeset
|
1094 def getExtension(filename, delimiter = '.'): |
5d788d2e8ffc
work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
940
diff
changeset
|
1095 '''Returns the filename minus the extension (all characters after last .)''' |
5d788d2e8ffc
work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
940
diff
changeset
|
1096 i = filename.rfind(delimiter) |
5d788d2e8ffc
work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
940
diff
changeset
|
1097 if i>0: |
5d788d2e8ffc
work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
940
diff
changeset
|
1098 return filename[i+1:] |
5d788d2e8ffc
work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
940
diff
changeset
|
1099 else: |
5d788d2e8ffc
work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
940
diff
changeset
|
1100 return '' |
5d788d2e8ffc
work in progress
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
940
diff
changeset
|
1101 |
46
b5d007612e16
added filename util
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
45
diff
changeset
|
1102 def cleanFilename(s): |
b5d007612e16
added filename util
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
45
diff
changeset
|
1103 'cleans filenames obtained when contatenating figure characteristics' |
739 | 1104 return s.replace(' ','-').replace('.','').replace('/','-').replace(',','') |
46
b5d007612e16
added filename util
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
45
diff
changeset
|
1105 |
1021
16932cefabc1
work on paths in line with new configurations from tracker
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
997
diff
changeset
|
1106 def getRelativeFilename(parentPath, filename): |
16932cefabc1
work on paths in line with new configurations from tracker
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
997
diff
changeset
|
1107 'Returns filename if absolute, otherwise parentPath/filename as string' |
16932cefabc1
work on paths in line with new configurations from tracker
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
997
diff
changeset
|
1108 filePath = Path(filename) |
16932cefabc1
work on paths in line with new configurations from tracker
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
997
diff
changeset
|
1109 if filePath.is_absolute(): |
16932cefabc1
work on paths in line with new configurations from tracker
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
997
diff
changeset
|
1110 return filename |
16932cefabc1
work on paths in line with new configurations from tracker
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
997
diff
changeset
|
1111 else: |
16932cefabc1
work on paths in line with new configurations from tracker
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
997
diff
changeset
|
1112 return str(parentPath/filePath) |
16932cefabc1
work on paths in line with new configurations from tracker
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
997
diff
changeset
|
1113 |
0
aed8eb63cdde
initial commit with non-functional python code for NGSIM
Nicolas Saunier <nico@confins.net>
parents:
diff
changeset
|
1114 def listfiles(dirname, extension, remove = False): |
14
e7bbe8465591
homography and other utils
Nicolas Saunier <nico@confins.net>
parents:
7
diff
changeset
|
1115 '''Returns the list of files with the extension in the directory dirname |
e7bbe8465591
homography and other utils
Nicolas Saunier <nico@confins.net>
parents:
7
diff
changeset
|
1116 If remove is True, the filenames are stripped from the extension''' |
1021
16932cefabc1
work on paths in line with new configurations from tracker
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
997
diff
changeset
|
1117 d = Path(dirname) |
16932cefabc1
work on paths in line with new configurations from tracker
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
997
diff
changeset
|
1118 if d.is_dir(): |
1089
10205bd0e0b7
corrected bug
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
1088
diff
changeset
|
1119 tmp = [str(f) for f in d.glob('*.'+extension)] |
1021
16932cefabc1
work on paths in line with new configurations from tracker
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
997
diff
changeset
|
1120 if remove: |
16932cefabc1
work on paths in line with new configurations from tracker
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
997
diff
changeset
|
1121 return [removeExtension(f, extension) for f in tmp] |
16932cefabc1
work on paths in line with new configurations from tracker
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
997
diff
changeset
|
1122 else: |
16932cefabc1
work on paths in line with new configurations from tracker
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
997
diff
changeset
|
1123 return tmp |
0
aed8eb63cdde
initial commit with non-functional python code for NGSIM
Nicolas Saunier <nico@confins.net>
parents:
diff
changeset
|
1124 else: |
1021
16932cefabc1
work on paths in line with new configurations from tracker
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
997
diff
changeset
|
1125 print(dirname+' is not a directory') |
16932cefabc1
work on paths in line with new configurations from tracker
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
997
diff
changeset
|
1126 return [] |
0
aed8eb63cdde
initial commit with non-functional python code for NGSIM
Nicolas Saunier <nico@confins.net>
parents:
diff
changeset
|
1127 |
266
aba9711b3149
small modificatons and reorganization
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
262
diff
changeset
|
1128 def mkdir(dirname): |
aba9711b3149
small modificatons and reorganization
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
262
diff
changeset
|
1129 'Creates a directory if it does not exist' |
1021
16932cefabc1
work on paths in line with new configurations from tracker
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
997
diff
changeset
|
1130 p = Path(dirname) |
16932cefabc1
work on paths in line with new configurations from tracker
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
997
diff
changeset
|
1131 if not p.exists(): |
16932cefabc1
work on paths in line with new configurations from tracker
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
997
diff
changeset
|
1132 p.mkdir() |
266
aba9711b3149
small modificatons and reorganization
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
262
diff
changeset
|
1133 else: |
aba9711b3149
small modificatons and reorganization
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
262
diff
changeset
|
1134 print(dirname+' already exists') |
aba9711b3149
small modificatons and reorganization
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
262
diff
changeset
|
1135 |
14
e7bbe8465591
homography and other utils
Nicolas Saunier <nico@confins.net>
parents:
7
diff
changeset
|
1136 def removeFile(filename): |
e7bbe8465591
homography and other utils
Nicolas Saunier <nico@confins.net>
parents:
7
diff
changeset
|
1137 '''Deletes the file while avoiding raising an error |
e7bbe8465591
homography and other utils
Nicolas Saunier <nico@confins.net>
parents:
7
diff
changeset
|
1138 if the file does not exist''' |
1021
16932cefabc1
work on paths in line with new configurations from tracker
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
997
diff
changeset
|
1139 f = Path(filename) |
16932cefabc1
work on paths in line with new configurations from tracker
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
997
diff
changeset
|
1140 if (f.exists()): |
16932cefabc1
work on paths in line with new configurations from tracker
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
997
diff
changeset
|
1141 f.unlink() |
266
aba9711b3149
small modificatons and reorganization
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
262
diff
changeset
|
1142 else: |
aba9711b3149
small modificatons and reorganization
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
262
diff
changeset
|
1143 print(filename+' does not exist') |
14
e7bbe8465591
homography and other utils
Nicolas Saunier <nico@confins.net>
parents:
7
diff
changeset
|
1144 |
42
1a2ac2d4f53a
added loading of the rest of the data for objects
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
35
diff
changeset
|
1145 def line2Floats(l, separator=' '): |
1a2ac2d4f53a
added loading of the rest of the data for objects
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
35
diff
changeset
|
1146 '''Returns the list of floats corresponding to the string''' |
1a2ac2d4f53a
added loading of the rest of the data for objects
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
35
diff
changeset
|
1147 return [float(x) for x in l.split(separator)] |
1a2ac2d4f53a
added loading of the rest of the data for objects
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
35
diff
changeset
|
1148 |
1a2ac2d4f53a
added loading of the rest of the data for objects
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
35
diff
changeset
|
1149 def line2Ints(l, separator=' '): |
1a2ac2d4f53a
added loading of the rest of the data for objects
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
35
diff
changeset
|
1150 '''Returns the list of ints corresponding to the string''' |
1a2ac2d4f53a
added loading of the rest of the data for objects
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
35
diff
changeset
|
1151 return [int(x) for x in l.split(separator)] |
31
c000f37c316d
moved tests to independent file, added chi2 computation
Nicolas Saunier <nico@confins.net>
parents:
29
diff
changeset
|
1152 |
c000f37c316d
moved tests to independent file, added chi2 computation
Nicolas Saunier <nico@confins.net>
parents:
29
diff
changeset
|
1153 ######################### |
553
3622a5653ee9
added basic info and function to profile code
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
547
diff
changeset
|
1154 # Profiling |
3622a5653ee9
added basic info and function to profile code
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
547
diff
changeset
|
1155 ######################### |
3622a5653ee9
added basic info and function to profile code
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
547
diff
changeset
|
1156 |
3622a5653ee9
added basic info and function to profile code
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
547
diff
changeset
|
1157 def analyzeProfile(profileFilename, stripDirs = True): |
3622a5653ee9
added basic info and function to profile code
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
547
diff
changeset
|
1158 '''Analyze the file produced by cProfile |
3622a5653ee9
added basic info and function to profile code
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
547
diff
changeset
|
1159 |
3622a5653ee9
added basic info and function to profile code
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
547
diff
changeset
|
1160 obtained by for example: |
3622a5653ee9
added basic info and function to profile code
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
547
diff
changeset
|
1161 - call in script (for main() function in script) |
3622a5653ee9
added basic info and function to profile code
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
547
diff
changeset
|
1162 import cProfile, os |
3622a5653ee9
added basic info and function to profile code
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
547
diff
changeset
|
1163 cProfile.run('main()', os.path.join(os.getcwd(),'main.profile')) |
3622a5653ee9
added basic info and function to profile code
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
547
diff
changeset
|
1164 |
3622a5653ee9
added basic info and function to profile code
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
547
diff
changeset
|
1165 - or on the command line: |
3622a5653ee9
added basic info and function to profile code
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
547
diff
changeset
|
1166 python -m cProfile [-o profile.bin] [-s sort] scriptfile [arg]''' |
3622a5653ee9
added basic info and function to profile code
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
547
diff
changeset
|
1167 import pstats, os |
3622a5653ee9
added basic info and function to profile code
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
547
diff
changeset
|
1168 p = pstats.Stats(os.path.join(os.pardir, profileFilename)) |
3622a5653ee9
added basic info and function to profile code
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
547
diff
changeset
|
1169 if stripDirs: |
3622a5653ee9
added basic info and function to profile code
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
547
diff
changeset
|
1170 p.strip_dirs() |
3622a5653ee9
added basic info and function to profile code
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
547
diff
changeset
|
1171 p.sort_stats('time') |
3622a5653ee9
added basic info and function to profile code
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
547
diff
changeset
|
1172 p.print_stats(.2) |
3622a5653ee9
added basic info and function to profile code
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
547
diff
changeset
|
1173 #p.sort_stats('time') |
3622a5653ee9
added basic info and function to profile code
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
547
diff
changeset
|
1174 # p.print_callees(.1, 'int_prediction.py:') |
3622a5653ee9
added basic info and function to profile code
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
547
diff
changeset
|
1175 return p |
3622a5653ee9
added basic info and function to profile code
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
547
diff
changeset
|
1176 |
3622a5653ee9
added basic info and function to profile code
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
547
diff
changeset
|
1177 ######################### |
31
c000f37c316d
moved tests to independent file, added chi2 computation
Nicolas Saunier <nico@confins.net>
parents:
29
diff
changeset
|
1178 # running tests |
c000f37c316d
moved tests to independent file, added chi2 computation
Nicolas Saunier <nico@confins.net>
parents:
29
diff
changeset
|
1179 ######################### |
c000f37c316d
moved tests to independent file, added chi2 computation
Nicolas Saunier <nico@confins.net>
parents:
29
diff
changeset
|
1180 |
0
aed8eb63cdde
initial commit with non-functional python code for NGSIM
Nicolas Saunier <nico@confins.net>
parents:
diff
changeset
|
1181 if __name__ == "__main__": |
aed8eb63cdde
initial commit with non-functional python code for NGSIM
Nicolas Saunier <nico@confins.net>
parents:
diff
changeset
|
1182 import doctest |
2
de5642925615
started implementation of TimeInterval and Spatio-temporal object
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
0
diff
changeset
|
1183 import unittest |
31
c000f37c316d
moved tests to independent file, added chi2 computation
Nicolas Saunier <nico@confins.net>
parents:
29
diff
changeset
|
1184 suite = doctest.DocFileSuite('tests/utils.txt') |
c000f37c316d
moved tests to independent file, added chi2 computation
Nicolas Saunier <nico@confins.net>
parents:
29
diff
changeset
|
1185 #suite = doctest.DocTestSuite() |
2
de5642925615
started implementation of TimeInterval and Spatio-temporal object
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
0
diff
changeset
|
1186 unittest.TextTestRunner().run(suite) |
de5642925615
started implementation of TimeInterval and Spatio-temporal object
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
0
diff
changeset
|
1187 #doctest.testmod() |
0
aed8eb63cdde
initial commit with non-functional python code for NGSIM
Nicolas Saunier <nico@confins.net>
parents:
diff
changeset
|
1188 #doctest.testfile("example.txt") |