summaryrefslogtreecommitdiff
path: root/lib/stats.dx
diff options
context:
space:
mode:
Diffstat (limited to 'lib/stats.dx')
-rw-r--r--lib/stats.dx54
1 files changed, 28 insertions, 26 deletions
diff --git a/lib/stats.dx b/lib/stats.dx
index 3ee8cc17..fabf8d89 100644
--- a/lib/stats.dx
+++ b/lib/stats.dx
@@ -20,10 +20,10 @@ actual value being represented (the raw probability) can be computed with
use `ls_sum`, which applies the standard max-sweep log-sum-exp trick directly,
rather than relying on monoid reduction using `add`.
-struct LogSpace(a) =
+struct LogSpace(a:Type) =
log : a
-def Exp(x:a) -> LogSpace a given (a) = LogSpace x
+def Exp(x:a) -> LogSpace a given (a:Type) = LogSpace x
instance Mul(LogSpace f) given (f|Add)
def (*)(x, y) = Exp $ x.log + y.log
@@ -38,14 +38,14 @@ instance Arbitrary(LogSpace Float)
def is_infinite(x:f) -> Bool given (f|Fractional|Sub|Mul|Ord) =
# Note: According to this function, nan is not infinite.
# Todo: Make polymorphic versions of these and put them in the prelude.
- infinity = divide one zero
- neg_infinity = zero - infinity
+ infinity : f = divide one zero
+ neg_infinity : f = zero - infinity
x == infinity || x == neg_infinity
def log_add_exp(la:f, lb:f) -> f
given (f|Floating|Add|Sub|Mul|Fractional|Ord) =
- infinity = (divide one zero)
- neg_infinity = zero - infinity
+ infinity : f = divide(one, zero)
+ neg_infinity : f = zero - infinity
if la == infinity && lb == infinity
then infinity
else if la == neg_infinity && lb == neg_infinity
@@ -66,27 +66,29 @@ def ln(x:LogSpace f) -> f given (f|Floating) = x.log
def log_sum_exp(xs:n=>f) -> f
given(n|Ix, f|Fractional|Sub|Floating|Mul|Ord) =
- m_raw = maximum xs
- m = case is_infinite m_raw of
+ m_raw = maximum(xs)
+ m = case is_infinite(m_raw) of
False -> m_raw
True -> zero
- m + (log $ sum for i. exp (xs[i] - m))
+ m + (log $ sum $ each xs \x. exp(x) - m)
def ls_sum(x:n=>LogSpace f) -> LogSpace f
given (n|Ix, f|Fractional|Sub|Floating|Mul|Ord) =
- lx = map ln x
+ lx = each(x, ln)
Exp $ log_sum_exp lx
'## Probability distributions
Simulation and evaluation of [probability distributions](https://en.wikipedia.org/wiki/Probability_distribution). Probability distributions which can be sampled should implement `Random`, and those which can be evaluated should implement the `Dist` interface. Distributions over an ordered space (such as typical *univariate* distributions) should also implement `OrderedDist`.
-interface Random(d, a)
+# TODO: use an associated type for the `a` parameter
+interface Random(d:Type, a:Type)
draw : (d, Key) -> a # function for random draws
-interface Dist(d, a, f)
+# TODO: use an associated type for the `a` parameter
+interface Dist(d:Type, a:Type, f:Type)
density : (d, a) -> LogSpace f # either the density function or mass function
-interface OrderedDist(d, a, f, given () (Dist d a f))
+interface OrderedDist(d:Type, a:Type, f:Type, given () (Dist d a f))
cumulative : (d, a) -> LogSpace f # the cumulative distribution function (CDF)
survivor : (d, a) -> LogSpace f # the survivor function (complement of CDF)
quantile : (d, f) -> a # the quantile function (inverse CDF)
@@ -131,7 +133,7 @@ struct Binomial =
instance Random(Binomial, Nat)
def draw(d, k) =
- sum $ map b_to_n (rand_vec d.trials (\k_. draw(Bernoulli(d.prob), k_)) k)
+ sum $ each (rand_vec d.trials (\k_. draw(a=Bool, Bernoulli(d.prob), k_)) k) b_to_n
instance Dist(Binomial, Nat, Float)
def density(d, x) =
@@ -157,13 +159,13 @@ instance Dist(Binomial, Nat, Float)
instance OrderedDist(Binomial, Nat, Float)
def cumulative(d, x) =
xp1:Nat = x + 1
- ls_sum $ for i:(Fin xp1). density d (ordinal i)
+ ls_sum $ for i:(Fin xp1). density(f=Float, d, ordinal i)
def survivor(d, x) =
tmx = d.trials -| x
- ls_sum $ for i:(Fin tmx). density d (x + 1 + ordinal i)
+ ls_sum $ for i:(Fin tmx). density(f=Float, d, x + 1 + ordinal i)
def quantile(d, q) =
tp1:Nat = d.trials + 1
- lpdf = for i:(Fin tp1). ln $ density d (ordinal i)
+ lpdf = for i:(Fin tp1). ln $ density(f=Float, d, ordinal i)
cdf = cdf_for_categorical lpdf
mi = search_sorted cdf q
ordinal $ from_just $ left_fence mi
@@ -257,13 +259,13 @@ struct Poisson =
instance Random(Poisson, Nat)
def draw(d, k) =
exp_neg_rate = exp (-d.rate)
- [current_k, next_k] = split_key k
+ [current_k, next_k] = split_key(n=2, k)
yield_state 0 \ans.
with_state (rand current_k) \p. with_state next_k \k'.
while \.
if get p > exp_neg_rate
then
- [ck, nk] = split_key (get k')
+ [ck, nk] = split_key(n=2, get k')
p := (get p) * rand ck
ans := (get ans) + 1
k' := nk
@@ -283,16 +285,16 @@ instance Dist(Poisson, Nat, Float)
instance OrderedDist(Poisson, Nat, Float)
def cumulative(d, x) =
xp1:Nat = x + 1
- ls_sum $ for i:(Fin xp1). density d (ordinal i)
+ ls_sum $ for i:(Fin xp1). density(f=Float, d, ordinal i)
def survivor(d, x) =
xp1:Nat = x + 1
- cdf = ls_sum $ for i:(Fin xp1). density d (ordinal i)
+ cdf = ls_sum $ for i:(Fin xp1). density(f=Float, d, ordinal i)
Exp $ log1p (-ls_to_f cdf)
def quantile(d, q) =
yield_state (0::Nat) \x.
with_state 0.0 \cdf.
while \.
- cdf := (get cdf) + ls_to_f (density d (get x))
+ cdf := (get cdf) + ls_to_f (density(f=Float, d ,get x))
if (get cdf) > q
then
False
@@ -340,7 +342,7 @@ Some data summary functions. Note that `mean` is provided by the prelude.
def mean_and_variance(xs:n=>a) -> (a, a) given (n|Ix, a|VSpace|Mul) =
m = mean xs
- ss = sum for i. sq (xs[i] - m)
+ ss = sum $ each xs \x. sq(x - m)
v = ss / (n_to_f (size n) - 1)
(m, v)
@@ -353,7 +355,7 @@ def std_dev(xs:n=>a) -> a given (n|Ix, a|VSpace|Mul|Floating) =
def covariance(xs:n=>a, ys:n=>a) -> a given (n|Ix, a|VSpace|Mul) =
xm = mean xs
ym = mean ys
- ss = sum for i. (xs[i] - xm) * (ys[i] - ym)
+ ss = sum for i:n. (xs[i] - xm) * (ys[i] - ym)
ss / (n_to_f (size n) - 1)
def correlation(xs:n=>a, ys:n=>a) -> a
@@ -364,8 +366,8 @@ def correlation(xs:n=>a, ys:n=>a) -> a
def mean_and_variance_matrix(xs:n=>d=>a) -> (d=>a, d=>d=>a)
given (n|Ix, d|Ix, a|Mul|VSpace) =
- xsMean:d=>a = (for i. sum for j. xs[j,i]) / n_to_f (size n)
- xsCov:d=>d=>a = (for i i'. sum for j.
+ xsMean:d=>a = (for i:d. sum for j:n. xs[j,i]) / n_to_f (size n)
+ xsCov:d=>d=>a = (for i:d i':d. sum for j:n.
(xs[j,i'] - xsMean[i']) *
(xs[j,i] - xsMean[i] ) ) / (n_to_f (size n) - 1)
(xsMean, xsCov)