dongshenchi5364
2016-08-21 04:41 阅读 86
已采纳

Golang中的并发积分计算

I tried to compute the integral concurrently, but my program ended up being slower than computing the integral with a normal for loop. What am I doing wrong?

package main

import (
    "fmt"
    "math"
    "sync"
    "time"
)

type Result struct {
    result float64
    lock sync.RWMutex
}

var wg sync.WaitGroup
var result Result

func main() {
    now := time.Now()
    a := 0.0
    b := 1.0
    n := 100000.0
    deltax := (b - a) / n
    wg.Add(int(n))
    for i := 0.0; i < n; i++ {
        go f(a, deltax, i)
    }
    wg.Wait()
    fmt.Println(deltax * result.result)
    fmt.Println(time.Now().Sub(now))
}

func f(a float64, deltax float64, i float64) {
    fx := math.Sqrt(a + deltax * (i + 0.5))
    result.lock.Lock()
    result.result += fx
    result.lock.Unlock()
    wg.Done()
}
  • 点赞
  • 写回答
  • 关注问题
  • 收藏
  • 复制链接分享

2条回答 默认 最新

  • 已采纳
    dongqing8765 dongqing8765 2016-08-21 06:01

    Unless the time taken by the activity in the goroutine takes a lot more time than needed to switch contexts, carry out the task and use a mutex to update a value, it would be faster to do it serially.

    Take a look at a slightly modified version. All I've done is add a delay of 1 microsecond in the f() function.

    package main
    
    import (
        "fmt"
        "math"
        "sync"
        "time"
    )
    
    type Result struct {
        result float64
        lock   sync.RWMutex
    }
    
    var wg sync.WaitGroup
    var result Result
    
    func main() {
        fmt.Println("concurrent")
        concurrent()
        result.result = 0
        fmt.Println("serial")
        serial()
    }
    
    func concurrent() {
        now := time.Now()
        a := 0.0
        b := 1.0
        n := 100000.0
        deltax := (b - a) / n
        wg.Add(int(n))
        for i := 0.0; i < n; i++ {
            go f(a, deltax, i, true)
        }
        wg.Wait()
        fmt.Println(deltax * result.result)
        fmt.Println(time.Now().Sub(now))
    }
    
    func serial() {
        now := time.Now()
        a := 0.0
        b := 1.0
        n := 100000.0
        deltax := (b - a) / n
        for i := 0.0; i < n; i++ {
            f(a, deltax, i, false)
        }
        fmt.Println(deltax * result.result)
        fmt.Println(time.Now().Sub(now))
    }
    
    func f(a, deltax, i float64, concurrent bool) {
        time.Sleep(1 * time.Microsecond)
        fx := math.Sqrt(a + deltax*(i+0.5))
        if concurrent {
            result.lock.Lock()
            result.result += fx
            result.lock.Unlock()
            wg.Done()
        } else {
            result.result += fx
        }
    }
    

    With the delay, the result was as follows (the concurrent version is much faster):

    concurrent
    0.6666666685900424
    624.914165ms
    
    serial
    0.6666666685900422
    5.609195767s
    

    Without the delay:

    concurrent
    0.6666666685900428
    50.771275ms
    
    serial
    0.6666666685900422
    749.166µs
    

    As you can see, the longer it takes to complete a task, the more sense it makes to do it concurrently, if possible.

    点赞 评论 复制链接分享
  • dqe9657 dqe9657 2016-08-21 07:14

    3- For performance gain, you may divide tasks per CPU cores without using lock sync.RWMutex:

    +30x Optimizations using channels and runtime.NumCPU(), this takes 2ms on 2 Cores and 993µs on 8 Cores, while Your sample code takes 61ms on 2 Cores and 40ms on 8 Cores:

    See this working sample code and outputs:

    package main
    
    import (
        "fmt"
        "math"
        "runtime"
        "time"
    )
    
    func main() {
        nCPU := runtime.NumCPU()
        fmt.Println("nCPU =", nCPU)
        ch := make(chan float64, nCPU)
        startTime := time.Now()
        a := 0.0
        b := 1.0
        n := 100000.0 
        deltax := (b - a) / n
    
        stepPerCPU := n / float64(nCPU)
        for start := 0.0; start < n; {
            stop := start + stepPerCPU
            go f(start, stop, a, deltax, ch)
            start = stop
        }
    
        integral := 0.0
        for i := 0; i < nCPU; i++ {
            integral += <-ch
        }
    
        fmt.Println(time.Now().Sub(startTime))
        fmt.Println(deltax * integral)
    }
    
    func f(start, stop, a, deltax float64, ch chan float64) {
        result := 0.0
        for i := start; i < stop; i++ {
            result += math.Sqrt(a + deltax*(i+0.5))
        }
        ch <- result
    }
    

    Output on 2 Cores:

    nCPU = 2
    2.0001ms
    0.6666666685900485
    

    Output on 8 Cores:

    nCPU = 8
    993µs
    0.6666666685900456
    

    Your sample code, Output on 2 Cores:

    0.6666666685900424
    61.0035ms
    

    Your sample code, Output on 8 Cores:

    0.6666666685900415
    40.9964ms
    

    2- For good benchmark statistics, use large number of samples (big n):

    As you See here using 2 Cores this takes 110ms on 2 Cores, but on this same CPU using 1 Core this takes 215ms with n := 10000000.0:

    With n := 10000000.0 and single goroutine, see this working sample code:

    package main
    
    import (
        "fmt"
        "math"
        "time"
    )
    
    func main() {
        now := time.Now()
        a := 0.0
        b := 1.0
        n := 10000000.0
        deltax := (b - a) / n
        result := 0.0
        for i := 0.0; i < n; i++ {
            result += math.Sqrt(a + deltax*(i+0.5))
        }
        fmt.Println(time.Now().Sub(now))
        fmt.Println(deltax * result)
    }
    

    Output:

    215.0123ms
    0.6666666666685884
    

    With n := 10000000.0 and 2 goroutines, see this working sample code:

    package main
    
    import (
        "fmt"
        "math"
        "runtime"
        "time"
    )
    
    func main() {
        nCPU := runtime.NumCPU()
        fmt.Println("nCPU =", nCPU)
        ch := make(chan float64, nCPU)
        startTime := time.Now()
        a := 0.0
        b := 1.0
        n := 10000000.0
        deltax := (b - a) / n
    
        stepPerCPU := n / float64(nCPU)
        for start := 0.0; start < n; {
            stop := start + stepPerCPU
            go f(start, stop, a, deltax, ch)
            start = stop
        }
    
        integral := 0.0
        for i := 0; i < nCPU; i++ {
            integral += <-ch
        }
    
        fmt.Println(time.Now().Sub(startTime))
        fmt.Println(deltax * integral)
    }
    
    func f(start, stop, a, deltax float64, ch chan float64) {
        result := 0.0
        for i := start; i < stop; i++ {
            result += math.Sqrt(a + deltax*(i+0.5))
        }
        ch <- result
    }
    

    Output:

    nCPU = 2
    110.0063ms
    0.6666666666686073
    

    1- There is an optimum point for number of Goroutines, And from this point forward increasing number of Goroutines doesn't reduce program execution time:

    On 2 Cores CPU, with the following code, The result is:

    nCPU: 1,          2,          4,         8,           16
    Time: 2.1601236s, 1.1220642s, 1.1060633s, 1.1140637s, 1.1380651s
    

    As you see from nCPU=1 to nCPU=2 time decrease is big enough but after this point it is not much, so nCPU=2 on 2 Cores CPU is Optimum point for this Sample code, so using nCPU := runtime.NumCPU() is enough here.

    package main
    
    import (
        "fmt"
        "math"
        "time"
    )
    
    func main() {
        nCPU := 2 //2.1601236s@1 1.1220642s@2 1.1060633s@4 1.1140637s@8 1.1380651s@16
        fmt.Println("nCPU =", nCPU)
        ch := make(chan float64, nCPU)
        startTime := time.Now()
        a := 0.0
        b := 1.0
        n := 100000000.0
        deltax := (b - a) / n
    
        stepPerCPU := n / float64(nCPU)
        for start := 0.0; start < n; {
            stop := start + stepPerCPU
            go f(start, stop, a, deltax, ch)
            start = stop
        }
    
        integral := 0.0
        for i := 0; i < nCPU; i++ {
            integral += <-ch
        }
    
        fmt.Println(time.Now().Sub(startTime))
        fmt.Println(deltax * integral)
    }
    
    func f(start, stop, a, deltax float64, ch chan float64) {
        result := 0.0
        for i := start; i < stop; i++ {
            result += math.Sqrt(a + deltax*(i+0.5))
        }
        ch <- result
    }
    
    点赞 评论 复制链接分享

相关推荐