Mercurial Hosting > traffic-intelligence
comparison python/utils.py @ 85:7f1e54234f96
added empirical discrete distribution, modified class organization and names
author | Nicolas Saunier <nicolas.saunier@polymtl.ca> |
---|---|
date | Mon, 18 Apr 2011 19:31:53 -0400 |
parents | 5e6cd36a991c |
children | f03ec4697a09 |
comparison
equal
deleted
inserted
replaced
84:731df2fa0010 | 85:7f1e54234f96 |
---|---|
21 result = 0. | 21 result = 0. |
22 for e, o in zip(expected, observed): | 22 for e, o in zip(expected, observed): |
23 result += ((e-o)*(e-o))/e | 23 result += ((e-o)*(e-o))/e |
24 return result | 24 return result |
25 | 25 |
26 class empiricalDistribution: | 26 class EmpiricalDistribution: |
27 def nSamples(self): | |
28 return sum(self.counts) | |
29 | |
30 | |
31 class EmpiricalDiscreteDistribution(EmpiricalDistribution): | |
32 '''Class to represent a sample of a distribution for a discrete random variable | |
33 ''' | |
34 def __init__(self, categories, counts): | |
35 self.categories = categories | |
36 self.counts = counts | |
37 | |
38 def mean(self): | |
39 from numpy.core.fromnumeric import sum | |
40 result = [float(x*y) for x,y in zip(self.categories, self.counts)] | |
41 return sum(result)/self.nSamples() | |
42 | |
43 def var(self, mean = None): | |
44 from numpy.core.fromnumeric import sum | |
45 if not mean: | |
46 m = self.mean() | |
47 else: | |
48 m = mean | |
49 result = 0. | |
50 squares = [float((x-m)*(x-m)*y) for x,y in zip(self.categories, self.counts)] | |
51 return sum(squares)/(self.nSamples()-1) | |
52 | |
53 | |
54 class EmpiricalContinuousDistribution(EmpiricalDistribution): | |
27 '''Class to represent a sample of a distribution for a continuous random variable | 55 '''Class to represent a sample of a distribution for a continuous random variable |
28 with the number of observations for each interval | 56 with the number of observations for each interval |
29 intervals (categories variable) are defined by their left limits, the last one being the right limit | 57 intervals (categories variable) are defined by their left limits, the last one being the right limit |
30 categories contain therefore one more element than the counts''' | 58 categories contain therefore one more element than the counts''' |
31 def __init__(self, categories, counts): | 59 def __init__(self, categories, counts): |
46 result = 0. | 74 result = 0. |
47 for i in range(len(self.counts)-1): | 75 for i in range(len(self.counts)-1): |
48 mid = (self.categories[i]+self.categories[i+1])/2 | 76 mid = (self.categories[i]+self.categories[i+1])/2 |
49 result += self.counts[i]*(mid - m)*(mid - m) | 77 result += self.counts[i]*(mid - m)*(mid - m) |
50 return result/(self.nSamples()-1) | 78 return result/(self.nSamples()-1) |
51 | |
52 def nSamples(self): | |
53 return sum(self.counts) | |
54 | 79 |
55 def referenceCounts(self, cdf): | 80 def referenceCounts(self, cdf): |
56 '''cdf is a cumulative distribution function | 81 '''cdf is a cumulative distribution function |
57 returning the probability of the variable being less that x''' | 82 returning the probability of the variable being less that x''' |
58 # refCumulativeCounts = [0]#[cdf(self.categories[0][0])] | 83 # refCumulativeCounts = [0]#[cdf(self.categories[0][0])] |