What's the fastest way to read a sqlite3 table in golang?
package main
import (
"fmt"
"database/sql"
_ "github.com/mattn/go-sqlite3"
"log"
"time"
)
func main() {
start := time.Now()
db, err := sql.Open("sqlite3", "/Users/robertking/go/src/bitbucket.org/thematicanalysis/optimization_test/robs.db")
if err != nil {
log.Fatal(err)
}
defer db.Close()
rows, err := db.Query("select * from data")
if err != nil {
log.Fatal(err)
}
defer rows.Close()
for rows.Next() {
}
err = rows.Err()
if err != nil {
log.Fatal(err)
}
fmt.Println(time.Since(start))
}
This takes 8 seconds in Go because .Next
is slow. In python a fetchall
takes only 4 seconds! I'm rewriting in GO to gain performance not lose performance.
Here is the python code, I couldn't find an equivalent of fetchall
in go:
import time
start = time.time()
import sqlite3
conn = sqlite3.connect('/Users/robertking/go/src/bitbucket.org/thematicanalysis/optimization_test/robs.db')
c = conn.cursor()
c.execute("SELECT * FROM data")
x = c.fetchall()
print time.time() - start
Edit: adding bounty. I'm reading the data in go, python and C, here are results. Dont want to use C, but will stick with python if GO isnt faster.:
py: 2.45s
go: 2.13s (using github.com/mxk/go-sqlite/sqlite3 instead of github.com/mattn/go-sqlite3)
c: 0.32s
I feel like go should be closer to the c side of thing? anyone know how to make it faster? is it possible to avoid mutex with readonly mode?
edit:
It seems like all the sqlite3 implementations are slow (too much reflection and too many cgo calls for conversions). So i'll have to just write my own interface.
Here's the schema:
CREATE TABLE mytable
(
c0 REAL,
c1 INTEGER,
c15 TEXT,
c16 TEXT,
c17 TEXT,
c18 TEXT,
c19 TEXT,
c47 TEXT,
c74 REAL DEFAULT 0,
c77 TEXT,
c101 TEXT,
c103 TEXT,
c108 TEXT,
c110 TEXT,
c125 TEXT,
c126 TEXT,
c127 REAL DEFAULT 0,
x INTEGER
PRIMARY KEY
);
and the query is dynamic but usually something like this:
SELECT c77,c77,c125,c126,c127,c74 from mytable
edit:
looks like i'll fork the sqlite3 implementation and make some methods that focus on performance,
this is an example of some code which is much faster:.
package main
/*
#cgo LDFLAGS: -l sqlite3
#include "sqlite3.h"
*/
import "C"
import (
//"database/sql"
"log"
"reflect"
"unsafe"
)
type Row struct {
v77 string
v125 string
v126 string
v127 float64
v74 float64
}
// cStr returns a pointer to the first byte in s.
func cStr(s string) *C.char {
h := (*reflect.StringHeader)(unsafe.Pointer(&s))
return (*C.char)(unsafe.Pointer(h.Data))
}
func main() {
getDataFromSqlite()
}
func getDataFromSqlite() {
var db *C.sqlite3
name := "../data_dbs/all_columns.db"
rc := C.sqlite3_open_v2(cStr(name+"\x00"), &db, C.SQLITE_OPEN_READONLY, nil)
var stmt *C.sqlite3_stmt;
rc = C.sqlite3_prepare_v2(db, cStr("SELECT c77,c125,c126,c127,c74 from data\x00"), C.int(-1), &stmt, nil);
rc = C.sqlite3_reset(stmt);
var result C.double
result = 0.0
rc = C.sqlite3_step(stmt)
for rc == C.SQLITE_ROW {
C.GoString((*C.char)(unsafe.Pointer(C.sqlite3_column_text(stmt, 0))))
C.GoString((*C.char)(unsafe.Pointer(C.sqlite3_column_text(stmt, 1))))
C.GoString((*C.char)(unsafe.Pointer(C.sqlite3_column_text(stmt, 2))))
C.sqlite3_column_double(stmt, 3)
result += C.sqlite3_column_double(stmt, 4)
rc = C.sqlite3_step(stmt)
}
log.Println(result)
}