I'm trying to read the content of a bucket on Google Cloud Storage using GO. I'm able to do that, but is is very slow.
The content of the bucket is like this:
bucket name
-> folders with alphanumeric characters
----> 5 files into each of the folder
--------> each file has a json array inside
what I want to do is to inspect the content of the jsons files for all the folders in the bucket and look for a specific value. The following code work, but it is very slow:
package backend
import (
"encoding/json"
"fmt"
"golang.org/x/net/context"
"golang.org/x/oauth2"
"golang.org/x/oauth2/google"
"google.golang.org/appengine"
"google.golang.org/appengine/file"
"google.golang.org/appengine/urlfetch"
"google.golang.org/cloud"
"google.golang.org/cloud/storage"
"io"
"io/ioutil"
"net/http"
)
var bucket = "bucket_Name"
type jsonStruct struct {
Gender string `json:"gender"`
Age string `json:"age"`
ID string `json:"id"`
Done int `json:"done"`
}
type saveData struct {
c context.Context
r *http.Request //http response
w http.ResponseWriter //http writer
ctx context.Context
cleanUp []string // cleanUp is a list of filenames that need cleaning up at the end of the saving.
failed bool // failed indicates that one or more of the saving steps failed.
}
func init() {
http.HandleFunc("/", handleStatic)
http.HandleFunc("/listBuckets", listBuckets)
}
func handleStatic(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Cache-Control", "no-cache")
http.ServeFile(w, r, "static/"+r.URL.Path)
}
func listBuckets(w http.ResponseWriter, r *http.Request) {
c := appengine.NewContext(r)
if bucket == "" {
var err error
if bucket, err = file.DefaultBucketName(c); err != nil {
// log.Errorf(c, "failed to get default GCS bucket name: %v", err)
return
}
}
hc := &http.Client{
Transport: &oauth2.Transport{
Source: google.AppEngineTokenSource(c, storage.ScopeFullControl),
Base: &urlfetch.Transport{Context: c},
},
}
ctx := cloud.NewContext(appengine.AppID(c), hc)
// structure to holds information needed to run the various saving functions
d := &saveData{
c: c,
r: r,
w: w,
ctx: ctx,
}
d.listBucket(bucket)
}
func (d *saveData) errorf(format string, args ...interface{}) {
d.failed = true
// log.Errorf(d.c, format, args...)
}
func (d *saveData) listBucket(bucket string) {
io.WriteString(d.w, "
Listbucket result:
")
query := &storage.Query{}
for query != nil {
objs, err := storage.ListObjects(d.ctx, bucket, query)
if err != nil {
d.errorf("listBucket: unable to list bucket %q: %v", bucket, err)
return
}
query = objs.Next
for _, obj := range objs.Results {
d.readFile(obj.Name)
}
}
}
func (d *saveData) readFile(fileName string) {
rc, err := storage.NewReader(d.ctx, bucket, fileName)
if err != nil {
d.errorf("readFile: unable to open file from bucket %q, file %q: %v", bucket, fileName, err)
return
}
defer rc.Close()
slurp, err := ioutil.ReadAll(rc)
if err != nil {
d.errorf("readFile: unable to read data from bucket %q, file %q: %v", bucket, fileName, err)
return
}
var userDetails jsonStruct
err1 := json.Unmarshal(slurp, &userDetails)
if err1 != nil {
d.errorf("readFile: %v", err1)
return
}
fmt.Fprintf(d.w, "done is: %v
", userDetails.Done)
}
Basically now I'm reading the folder name from the bucket and then I read the content using the folder name. It would be possible to cache all the bucket content in a go variable and then work on that variable instead of read the bucket for each folder?
I really need this to be faster because I need to present the result back in real time.
Thanks a lot