I was doing some experiments in Go and I found something really odd. When I run the following code on my computer it executes in ~0.5 seconds.
package main
import (
"fmt"
"runtime"
"time"
)
func waitAround(die chan bool) {
<- die
}
func main() {
var startMemory runtime.MemStats
runtime.ReadMemStats(&startMemory)
start := time.Now()
cpus := runtime.NumCPU()
runtime.GOMAXPROCS(cpus)
die := make(chan bool)
count := 100000
for i := 0; i < count; i++ {
go waitAround(die)
}
elapsed := time.Since(start)
var endMemory runtime.MemStats
runtime.ReadMemStats(&endMemory)
fmt.Printf("Started %d goroutines
%d CPUs
%f seconds
",
count, cpus, elapsed.Seconds())
fmt.Printf("Memory before %d
memory after %d
", startMemory.Alloc,
endMemory.Alloc)
fmt.Printf("%d goroutines running
", runtime.NumGoroutine())
fmt.Printf("%d bytes per goroutine
", (endMemory.Alloc - startMemory.Alloc)/uint64(runtime.NumGoroutine()))
close(die)
}
However, when I execute it using runtime.GOMAXPROCS(1)
it executes much faster (~0.15 seconds). Can anybody explain to me why running many goroutines would be slower using more cores? Is there any significant overhead to multiplexing the goroutines onto multiple cores? I realize the goroutines aren't doing anything and it would probably be a different story if I had to wait for the routines to actually do something.