Mercurial Hosting > traffic-intelligence
comparison python/utils.py @ 76:64fde2b1f96d
simplified intervales in empiricalDistribution
author | Nicolas Saunier <nicolas.saunier@polymtl.ca> |
---|---|
date | Thu, 10 Feb 2011 22:15:54 -0500 |
parents | 46ec876ce90e |
children | 5e6cd36a991c |
comparison
equal
deleted
inserted
replaced
75:46ec876ce90e | 76:64fde2b1f96d |
---|---|
23 result += ((e-o)*(e-o))/e | 23 result += ((e-o)*(e-o))/e |
24 return result | 24 return result |
25 | 25 |
26 class empiricalDistribution: | 26 class empiricalDistribution: |
27 '''Class to represent a sample of a distribution for a continuous random variable | 27 '''Class to represent a sample of a distribution for a continuous random variable |
28 with the number of observations for each interval''' | 28 with the number of observations for each interval |
29 intervals (categories variable) are defined by their left limits, the last one being the right limit | |
30 categories contain therefore one more element than the counts''' | |
29 def __init__(self, categories, counts): | 31 def __init__(self, categories, counts): |
30 self.categories = categories | 32 self.categories = categories |
31 self.counts = counts | 33 self.counts = counts |
32 | 34 |
33 def mean(self): | 35 def mean(self): |
34 result = 0. | 36 result = 0. |
35 for i,c in zip(self.categories, self.counts): | 37 for i in range(len(self.counts)-1): |
36 result += c*(i[1]+i[0])/2 | 38 result += self.counts[i]*(self.categories[i]+self.categories[i+1])/2 |
37 return result/sum(self.counts) | 39 return result/self.nSamples() |
38 | 40 |
39 def var(self, mean = None): | 41 def var(self, mean = None): |
40 if not mean: | 42 if not mean: |
41 m = self.mean() | 43 m = self.mean() |
42 else: | 44 else: |
43 m = mean | 45 m = mean |
44 result = 0. | 46 result = 0. |
45 for i,c in zip(self.categories, self.counts): | 47 for i in range(len(self.counts)-1): |
46 mid = (i[1]+i[0])/2 | 48 mid = (self.categories[i]+self.categories[i+1])/2 |
47 result += c*(mid - m)*(mid - m) | 49 result += self.counts[i]*(mid - m)*(mid - m) |
48 return result/(self.nSamples()-1) | 50 return result/(self.nSamples()-1) |
49 | 51 |
50 def nSamples(self): | 52 def nSamples(self): |
51 return sum(self.counts) | 53 return sum(self.counts) |
52 | 54 |
54 '''cdf is a cumulative distribution function | 56 '''cdf is a cumulative distribution function |
55 returning the probability of the variable being less that x''' | 57 returning the probability of the variable being less that x''' |
56 # refCumulativeCounts = [0]#[cdf(self.categories[0][0])] | 58 # refCumulativeCounts = [0]#[cdf(self.categories[0][0])] |
57 # for inter in self.categories: | 59 # for inter in self.categories: |
58 # refCumulativeCounts.append(cdf(inter[1])) | 60 # refCumulativeCounts.append(cdf(inter[1])) |
59 refCumulativeCounts = [cdf(inter[1]) for inter in self.categories[:-1]] | 61 refCumulativeCounts = [cdf(x) for x in self.categories[1:-1]] |
60 | 62 |
61 refProba = [refCumulativeCounts[0]] | 63 refProba = [refCumulativeCounts[0]] |
62 for i in xrange(1,len(refCumulativeCounts)): | 64 for i in xrange(1,len(refCumulativeCounts)): |
63 refProba.append(refCumulativeCounts[i]-refCumulativeCounts[i-1]) | 65 refProba.append(refCumulativeCounts[i]-refCumulativeCounts[i-1]) |
64 refProba.append(1-refCumulativeCounts[-1]) | 66 refProba.append(1-refCumulativeCounts[-1]) |