douwei1930 2015-06-03 20:06
浏览 58

按名称GCS列出存储桶内容

I'm trying to read the content of a bucket on Google Cloud Storage using GO. I'm able to do that, but is is very slow.

The content of the bucket is like this:

bucket name 
-> folders with alphanumeric characters
----> 5 files into each of the folder
--------> each file has a json array inside

what I want to do is to inspect the content of the jsons files for all the folders in the bucket and look for a specific value. The following code work, but it is very slow:

package backend

import (
    "encoding/json"
    "fmt"
    "golang.org/x/net/context"
    "golang.org/x/oauth2"
    "golang.org/x/oauth2/google"
    "google.golang.org/appengine"
    "google.golang.org/appengine/file"
    "google.golang.org/appengine/urlfetch"
    "google.golang.org/cloud"
    "google.golang.org/cloud/storage"
    "io"
    "io/ioutil"
    "net/http"
)

var bucket = "bucket_Name"

type jsonStruct struct {
    Gender string `json:"gender"`
    Age    string `json:"age"`
    ID     string `json:"id"`
    Done   int    `json:"done"`
}

type saveData struct {
    c       context.Context
    r       *http.Request       //http response
    w       http.ResponseWriter //http writer
    ctx     context.Context
    cleanUp []string // cleanUp is a list of filenames that need cleaning up at the end of the saving.
    failed  bool     // failed indicates that one or more of the saving steps failed.
}

func init() {
    http.HandleFunc("/", handleStatic)
    http.HandleFunc("/listBuckets", listBuckets)
}

func handleStatic(w http.ResponseWriter, r *http.Request) {
    w.Header().Set("Cache-Control", "no-cache")
    http.ServeFile(w, r, "static/"+r.URL.Path)
}

func listBuckets(w http.ResponseWriter, r *http.Request) {
    c := appengine.NewContext(r)
    if bucket == "" {
        var err error
        if bucket, err = file.DefaultBucketName(c); err != nil {
            // log.Errorf(c, "failed to get default GCS bucket name: %v", err)
            return
        }
    }
    hc := &http.Client{
        Transport: &oauth2.Transport{
            Source: google.AppEngineTokenSource(c, storage.ScopeFullControl),
            Base:   &urlfetch.Transport{Context: c},
        },
    }
    ctx := cloud.NewContext(appengine.AppID(c), hc)

    // structure to holds information needed to run the various saving functions
    d := &saveData{
        c:   c,
        r:   r,
        w:   w,
        ctx: ctx,
    }

    d.listBucket(bucket)
}

func (d *saveData) errorf(format string, args ...interface{}) {
    d.failed = true
    // log.Errorf(d.c, format, args...)
}

func (d *saveData) listBucket(bucket string) {
    io.WriteString(d.w, "
Listbucket result:
")

    query := &storage.Query{}
    for query != nil {
        objs, err := storage.ListObjects(d.ctx, bucket, query)
        if err != nil {
            d.errorf("listBucket: unable to list bucket %q: %v", bucket, err)
            return
        }
        query = objs.Next
        for _, obj := range objs.Results {
            d.readFile(obj.Name)
        }

    }
}

func (d *saveData) readFile(fileName string) {

    rc, err := storage.NewReader(d.ctx, bucket, fileName)
    if err != nil {
        d.errorf("readFile: unable to open file from bucket %q, file %q: %v", bucket, fileName, err)
        return
    }
    defer rc.Close()
    slurp, err := ioutil.ReadAll(rc)
    if err != nil {
        d.errorf("readFile: unable to read data from bucket %q, file %q: %v", bucket, fileName, err)
        return
    }
    var userDetails jsonStruct
    err1 := json.Unmarshal(slurp, &userDetails)
    if err1 != nil {
        d.errorf("readFile: %v", err1)
        return
    }
    fmt.Fprintf(d.w, "done is: %v
", userDetails.Done)
}

Basically now I'm reading the folder name from the bucket and then I read the content using the folder name. It would be possible to cache all the bucket content in a go variable and then work on that variable instead of read the bucket for each folder?

I really need this to be faster because I need to present the result back in real time.

Thanks a lot

  • 写回答

0条回答 默认 最新

    报告相同问题?

    悬赏问题

    • ¥100 角动量包络面如何用MATLAB绘制
    • ¥15 merge函数占用内存过大
    • ¥15 Revit2020下载问题
    • ¥15 使用EMD去噪处理RML2016数据集时候的原理
    • ¥15 神经网络预测均方误差很小 但是图像上看着差别太大
    • ¥15 单片机无法进入HAL_TIM_PWM_PulseFinishedCallback回调函数
    • ¥15 Oracle中如何从clob类型截取特定字符串后面的字符
    • ¥15 想通过pywinauto自动电机应用程序按钮,但是找不到应用程序按钮信息
    • ¥15 如何在炒股软件中,爬到我想看的日k线
    • ¥15 seatunnel 怎么配置Elasticsearch