doudengshen5591 2014-05-13 15:59
浏览 753
已采纳

如何在Go中计算超几何分布的p值?

In R, I can calculate a p-value for a hypergeometric distribution by using the phyper() function, of which the first value in the returned array is the p-value.

I was wondering whether there is any package in Go / Golang, that lets me do this calculation completely within Go?

  • 写回答

2条回答 默认 最新

  • doukongyong44772 2014-06-02 23:37
    关注

    When I find problems dealing with stats, my second line of attack after having found that a library does not exist is to port from the R code. This is mixed in ease since code may be R, C/C++ or fortran.

    In this case it was pure C, so the port was trivial. Note that the Qhyper() implementation is not an exact port since I have used stirlerr() in place of lgammacor() for the lbeta() implementation. This doesn't seem to make a great deal of difference, but I advise caution if using this lbeta() (and so Qhyper()).

    // Direct port of R code from nmath/{phyper,dbinom,stirlerr}.c and {dpq,nmath}.h.
    // Code licensed under GPL for that reason (c) Dan Kortschak.
    package main
    
    import (
        "errors"
        "fmt"
        "math"
    )
    
    func main() {
        // Example values come from:
        // http://stackoverflow.com/questions/8382806/r-hypergeometric-test-phyper
        fmt.Println(Phyper(62, 1998, 5260-1998, 131, true, false))
    
        for x := 0.; x < 10; x++ {
            fmt.Println(Phyper(x, 10, 7, 8, true, false))
        }
        fmt.Println()
        for x := 0.; x < 10; x++ {
            fmt.Println(Dhyper(x, 10, 7, 8, false))
        }
        fmt.Println()
        for x := 0.; x < 10; x++ {
            fmt.Println(Qhyper(x, 10, 7, 8, true, false))
        }
    }
    
    var ErrDomain = errors.New("hyper: argument out of domain")
    
    const (
        epsilon = 2.2204460492503131e-16
        min     = 2.2250738585072014e-308
    )
    
    // Sample of n balls from r red and b black ones; x are red
    func Phyper(x, r, b, n float64, lowerTail, logP bool) (float64, error) {
        x = math.Floor(x + 1e-7)
        r = round(r)
        b = round(b)
        n = round(n)
    
        if r < 0 || b < 0 || notFinite(r+b) || n < 0 || n > r+b {
            return math.NaN(), ErrDomain
        }
    
        if x*(r+b) > n*r {
            b, r = r, b
            x = n - x - 1
            lowerTail = !lowerTail
        }
    
        if x < 0 {
            return dt0(lowerTail, logP), nil
        }
        if x >= r || x >= n {
            return dt1(lowerTail, logP), nil
        }
    
        d, err := Dhyper(x, r, b, n, logP)
        if err != nil {
            return d, err
        }
        pd := pdhyper(x, r, b, n, logP)
    
        if logP {
            return log(d+pd, lowerTail), nil
        }
        res := d * pd
        if lowerTail {
            return res, nil
        }
        // Use 0.5 - p + 0.5 to perhaps gain 1 bit of accuracy
        res = 0.5 - res
        return res + 0.5, nil
    }
    
    func Dhyper(x, r, b, n float64, giveLog bool) (float64, error) {
        if negativeOrNotInteger(r) || negativeOrNotInteger(b) || negativeOrNotInteger(n) || n > r+b {
            return math.NaN(), ErrDomain
        }
        if x < 0 {
            return 0, nil
        }
        if x != math.Floor(x) {
            return 0, fmt.Errorf("non-integer x = %f", x)
        }
    
        x = round(x)
        r = round(r)
        b = round(b)
        n = round(n)
    
        if n < x || r < x || n-x > b {
            return 0, nil
        }
        if n == 0 {
            if x == 0 {
                return 1, nil
            }
            return 0, nil
        }
    
        p := n / (r + b)
        q := (r + b - n) / (r + b)
    
        p1, err := dbinom(x, r, p, q, giveLog)
        if err != nil {
            return math.NaN(), err
        }
        p2, err := dbinom(n-x, b, p, q, giveLog)
        if err != nil {
            return math.NaN(), err
        }
        p3, err := dbinom(n, r+b, p, q, giveLog)
        if err != nil {
            return math.NaN(), err
        }
    
        if giveLog {
            return p1 + p2 - p3, nil
        }
        return p1 * p2 / p3, nil
    }
    
    func Qhyper(p, NR, NB, n float64, lowerTail, logP bool) (float64, error) {
        if notFinite(p) || notFinite(NR) || notFinite(NB) || notFinite(n) {
            return math.NaN(), ErrDomain
        }
    
        NR = round(NR)
        NB = round(NB)
        N := NR + NB
        n = round(n)
        if NR < 0 || NB < 0 || n < 0 || n > N {
            return math.NaN(), ErrDomain
        }
    
        /* Goal: Find xr (= #{red balls in sample}) such that
        * phyper(xr, NR,NB, n) >= p > phyper(xr - 1, NR,NB, n)
         */
    
        xstart := math.Max(0, n-NB)
        xend := math.Min(n, NR)
    
        if logP {
            if p > 0 {
                return math.NaN(), ErrDomain
            }
            if p == 0 { /* upper bound*/
                if lowerTail {
                    return xend, nil
                }
                return xstart, nil
            }
            if math.IsInf(p, -1) {
                if lowerTail {
                    return xstart, nil
                }
                return xend, nil
            }
        } else { /* !logP */
            if p < 0 || p > 1 {
                return math.NaN(), ErrDomain
            }
            if p == 0 {
                if lowerTail {
                    return xstart, nil
                }
                return xend, nil
            }
            if p == 1 {
                if lowerTail {
                    return xend, nil
                }
                return xstart, nil
            }
        }
    
        xr := xstart
        xb := n - xr /* always ( = #{black balls in sample} ) */
    
        smallN := N < 1000 /* won't have underflow in product below */
        /* if N is small, term := product.ratio( bin.coef );
        otherwise work with its logarithm to protect against underflow */
        t1, err := lfastchoose(NR, xr)
        if err != nil {
            return 0, err
        }
        t2, err := lfastchoose(NB, xb)
        if err != nil {
            return 0, err
        }
        t3, err := lfastchoose(N, n)
        if err != nil {
            return 0, err
        }
        term := t1 + t2 - t3
        if smallN {
            term = math.Exp(term)
        }
        NR -= xr
        NB -= xb
    
        if !lowerTail || logP {
            p = qIv(p, lowerTail, logP)
        }
        p *= 1 - 1000*epsilon /* was 64, but failed on FreeBSD sometimes */
        var sum float64
        if smallN {
            sum = term
        } else {
            sum = math.Exp(term)
        }
    
        for sum < p && xr < xend {
            xr++
            NB++
            if smallN {
                term *= (NR / xr) * (xb / NB)
            } else {
                term += math.Log((NR / xr) * (xb / NB))
            }
            if smallN {
                sum += term
            } else {
                sum += math.Exp(term)
            }
            xb--
            NR--
        }
        return xr, nil
    }
    
    func lfastchoose(n, k float64) (float64, error) {
        lb, err := lbeta(n-k+1, k+1)
        if err != nil {
            return math.NaN(), err
        }
        return -math.Log(n+1) - lb, nil
    }
    
    func lbeta(a, b float64) (float64, error) {
        p := a
        q := a
        if b < p {
            p = b
        } /* := min(a,b) */
        if b > q {
            q = b
        } /* := max(a,b) */
    
        /* both arguments must be >= 0 */
        if p < 0 {
            return math.NaN(), ErrDomain
        } else if p == 0 {
            return math.Inf(1), nil
        } else if notFinite(q) { /* q == +Inf */
            return math.Inf(1), nil
        }
    
        if p >= 10 {
            /* p and q are big. */
            corr := stirlerr(p) + stirlerr(q) - stirlerr(p+q)
            return math.Log(q)*-0.5 + logSqrt2Pi + corr + (p-0.5)*math.Log(p/(p+q)) + q*math.Log1p(-p/(p+q)), nil
        } else if q >= 10 {
            /* p is small, but q is big. */
            corr := stirlerr(q) - stirlerr(p+q)
            return math.Gamma(p) + corr + p - p*math.Log(p+q) + (q-0.5)*math.Log1p(-p/(p+q)), nil
        } else {
            /* p and q are small: p <= q < 10. */
            /* R change for very small args */
            if p < min {
                return lgamma(p) + (lgamma(q) - lgamma(p+q)), nil
            }
        }
        return math.Log(math.Gamma(p) * (math.Gamma(q) / math.Gamma(p+q))), nil
    }
    
    func lgamma(p float64) float64 {
        r, _ := math.Lgamma(p)
        return r
    }
    
    func qIv(p float64, lowerTail, logP bool) float64 {
        if logP {
            if lowerTail {
                return math.Exp(p)
            }
            return -math.Expm1(p)
        }
        if lowerTail {
            return p
        }
        p = 0.5 - p
        return p + 0.5
    }
    
    // Calculate
    //
    // phyper (x, r, b, n, TRUE, FALSE)
    // [log] ----------------------------------
    // dhyper (x, r, b, n, FALSE)
    //
    // without actually calling phyper. This assumes that
    //
    // x * (r + b) <= n * r
    func pdhyper(x, r, b, n float64, logP bool) float64 {
    
        sum := 0.
        term := 1.
    
        for x > 0 && term >= epsilon*sum {
            term *= x * (b - n + x) / (n + 1 - x) / (r + 1 - x)
            sum += term
            x--
        }
    
        if logP {
            return math.Log1p(sum)
        }
        return 1 + sum
    }
    
    var (
        ln2   = math.Log(2)
        ln2Pi = math.Log(2 * math.Pi)
    )
    
    func log(x float64, lowerTail bool) float64 {
        if lowerTail {
            return math.Log(x)
        }
        if x > -ln2 {
            return math.Log(-math.Expm1(x))
        }
        return math.Log1p(-math.Exp(x))
    }
    
    func dbinom(x, n, p, q float64, giveLog bool) (float64, error) {
        if p == 0 {
            if x == 0 {
                return 1, nil
            }
            return 0, nil
        }
        if q == 0 {
            if x == n {
                return 1, nil
            }
            return 0, nil
        }
    
        if x == 0 {
            if n == 0 {
                return 1, nil
            }
            if p < 0.1 {
                t, err := bd0(n, n*q)
                if err != nil {
                    return math.NaN(), err
                }
                return exp(-t-n*p, giveLog), nil
            }
            return exp(n*math.Log(q), giveLog), nil
        }
        if x == n {
            if q < 0.1 {
                t, err := bd0(n, n*p)
                if err != nil {
                    return math.NaN(), err
                }
                return exp(-t-n*q, giveLog), nil
            }
            return exp(n*math.Log(p), giveLog), nil
        }
        if x < 0 || x > n {
            return 0, nil
        }
    
        // n*p or n*q can underflow to zero if n and p or q are small. This
        // used to occur in dbeta, and gives NaN as from R 2.3.0.
        t1, err := bd0(x, n*p)
        if err != nil {
            return math.NaN(), err
        }
        t2, err := bd0(n-x, n*q)
        if err != nil {
            return math.NaN(), err
        }
        lc := stirlerr(n) - stirlerr(x) - stirlerr(n-x) - t1 - t2
    
        // f = (M_2PI*x*(n-x))/n; could overflow or underflow
        // Upto R 2.7.1:
        // lf = log(M_2PI) + log(x) + log(n-x) - log(n);
        // -- following is much better for x << n :
        lf := ln2Pi + math.Log(x) + math.Log1p(-x/n)
    
        return exp(lc-0.5*lf, giveLog), nil
    }
    
    func negativeOrNotInteger(x float64) bool {
        return x < 0 || x != math.Floor(x)
    }
    
    func notFinite(x float64) bool {
        return math.IsNaN(x) || math.IsInf(x, 0)
    }
    
    func round(x float64) float64 {
        if _, frac := math.Modf(x); frac >= 0.5 {
            return math.Ceil(x)
        }
        return math.Floor(x)
    }
    
    func exp(x float64, giveLog bool) float64 {
        if giveLog {
            return x
        }
        return math.Exp(x)
    }
    
    func dt0(lowerTail, logP bool) float64 {
        if lowerTail {
            return d0(logP)
        }
        return d1(logP)
    }
    
    func dt1(lowerTail, logP bool) float64 {
        if lowerTail {
            return d1(logP)
        }
        return d0(logP)
    }
    
    func d0(logP bool) float64 {
        if logP {
            return math.Inf(-1)
        }
        return 0
    }
    
    func d1(logP bool) float64 {
        if logP {
            return 0
        }
        return 1
    }
    
    // bd0(x,M) :=  M * D0(x/M) = M*[ x/M * log(x/M) + 1 - (x/M) ] =
    //       =  x * log(x/M) + M - x
    // where M = E[X] = n*p (or = lambda), for   x, M > 0
    //
    // in a manner that should be stable (with small relative error)
    // for all x and M=np. In particular for x/np close to 1, direct
    // evaluation fails, and evaluation is based on the Taylor series
    // of log((1+v)/(1-v)) with v = (x-M)/(x+M) = (x-np)/(x+np).
    //
    func bd0(x, np float64) (float64, error) {
        if notFinite(x) || notFinite(np) || np == 0 {
            return math.NaN(), ErrDomain
        }
    
        if math.Abs(x-np) < 0.1*(x+np) {
            v := (x - np) / (x + np) // might underflow to 0
            s := (x - np) * v        // s using v -- change by MM
            if math.Abs(s) < min {
                return s, nil
            }
            ej := 2 * x * v
            v = v * v
            for j := 1; j < 1000; j++ {
                // Taylor series; 1000: no infinite loop
                // as |v| < .1,  v^2000 is "zero"
                ej *= v // = v^(2j+1)
                s1 := s + ej/float64((j<<1)+1)
                if s1 == s { // last term was effectively 0
                    return s1, nil
                }
                s = s1
            }
        }
        /* else:  | x - np |  is not too small */
        return x*math.Log(x/np) + np - x, nil
    }
    
    var (
        // error for 0, 0.5, 1.0, 1.5, ..., 14.5, 15.0.
        sfErrHalves = [31]float64{
            0.0, // n=0 - wrong, place holder only
            0.1534264097200273452913848,   // 0.5
            0.0810614667953272582196702,   // 1.0
            0.0548141210519176538961390,   // 1.5
            0.0413406959554092940938221,   // 2.0
            0.03316287351993628748511048,  // 2.5
            0.02767792568499833914878929,  // 3.0
            0.02374616365629749597132920,  // 3.5
            0.02079067210376509311152277,  // 4.0
            0.01848845053267318523077934,  // 4.5
            0.01664469118982119216319487,  // 5.0
            0.01513497322191737887351255,  // 5.5
            0.01387612882307074799874573,  // 6.0
            0.01281046524292022692424986,  // 6.5
            0.01189670994589177009505572,  // 7.0
            0.01110455975820691732662991,  // 7.5
            0.010411265261972096497478567, // 8.0
            0.009799416126158803298389475, // 8.5
            0.009255462182712732917728637, // 9.0
            0.008768700134139385462952823, // 9.5
            0.008330563433362871256469318, // 10.0
            0.007934114564314020547248100, // 10.5
            0.007573675487951840794972024, // 11.0
            0.007244554301320383179543912, // 11.5
            0.006942840107209529865664152, // 12.0
            0.006665247032707682442354394, // 12.5
            0.006408994188004207068439631, // 13.0
            0.006171712263039457647532867, // 13.5
            0.005951370112758847735624416, // 14.0
            0.005746216513010115682023589, // 14.5
            0.005554733551962801371038690, // 15.0
        }
    
        logSqrt2Pi = math.Log(math.Sqrt(2 * math.Pi))
    )
    
    // stirlerr(n) = log(n!) - log( sqrt(2*pi*n)*(n/e)^n )
    //             = log Gamma(n+1) - 1/2 * [log(2*pi) + log(n)] - n*[log(n) - 1]
    //             = log Gamma(n+1) - (n + 1/2) * log(n) + n - log(2*pi)/2
    func stirlerr(n float64) float64 {
        const (
            S0 = 1. / 12.
            S1 = 1. / 360.
            S2 = 1. / 1260.
            S3 = 1. / 1680.
            S4 = 1. / 1188.
        )
    
        var nn float64
    
        if n <= 15.0 {
            nn = n + n
            if nn == math.Floor(nn) {
                return sfErrHalves[int(nn)]
            }
            lg, _ := math.Lgamma(n + 1)
            return lg - (n+0.5)*math.Log(n) + n - logSqrt2Pi
        }
    
        nn = n * n
        switch {
        case n > 500:
            return ((S0 - S1/nn) / n)
        case n > 80:
            return ((S0 - (S1-S2/nn)/nn) / n)
        case n > 35:
            return ((S0 - (S1-(S2-S3/nn)/nn)/nn) / n)
        default: // 15 < n <= 35
            return (S0 - (S1-(S2-(S3-S4/nn)/nn)/nn)/nn) / n
        }
    }
    
    本回答被题主选为最佳回答 , 对您是否有帮助呢?
    评论
查看更多回答(1条)

报告相同问题?

悬赏问题

  • ¥15 安卓adb backup备份应用数据失败
  • ¥15 eclipse运行项目时遇到的问题
  • ¥15 关于#c##的问题:最近需要用CAT工具Trados进行一些开发
  • ¥15 南大pa1 小游戏没有界面,并且报了如下错误,尝试过换显卡驱动,但是好像不行
  • ¥15 没有证书,nginx怎么反向代理到只能接受https的公网网站
  • ¥50 成都蓉城足球俱乐部小程序抢票
  • ¥15 yolov7训练自己的数据集
  • ¥15 esp8266与51单片机连接问题(标签-单片机|关键词-串口)(相关搜索:51单片机|单片机|测试代码)
  • ¥15 电力市场出清matlab yalmip kkt 双层优化问题
  • ¥30 ros小车路径规划实现不了,如何解决?(操作系统-ubuntu)