douying0108 2015-12-18 18:32
浏览 69

提高AWS日志下载速度

I'm going to start with showing the code and then what I'm trying to do, code:

package main

import (
    "fmt"
    "os"
    "path/filepath"

    "github.com/aws/aws-sdk-go/aws"
    "github.com/aws/aws-sdk-go/aws/session"
    "github.com/aws/aws-sdk-go/service/s3"
    "github.com/aws/aws-sdk-go/service/s3/s3manager"
)

var (
    // empty strings for security reasons
    Bucket         = ""                                               // Download from this bucket
    Prefix         = "" // Using this key prefix
    LocalDirectory = "s3logs"                                                    // Into this directory
)

func main() {
    sess := session.New()
    client := s3.New(sess, &aws.Config{Region: aws.String("us-west-1")})
    params := &s3.ListObjectsInput{Bucket: &Bucket, Prefix: &Prefix}

    manager := s3manager.NewDownloaderWithClient(client, func(d *s3manager.Downloader) {
        d.PartSize = 64 * 1024 * 1024 // 64MB per part
        d.Concurrency = 8
    }) // works

    //manager := s3manager.NewDownloaderWithClient(client) //works

    d := downloader{bucket: Bucket, dir: LocalDirectory, Downloader: manager}

    client.ListObjectsPages(params, d.eachPage)

}

type downloader struct {
    *s3manager.Downloader
    bucket, dir string
}

func (d *downloader) eachPage(page *s3.ListObjectsOutput, more bool) bool {
    for _, obj := range page.Contents {
        d.downloadToFile(*obj.Key)
    }

    return true
}

func (d *downloader) downloadToFile(key string) {
    // Create the directories in the path
    file := filepath.Join(d.dir, key)

    if err := os.MkdirAll(filepath.Dir(file), 0775); err != nil {
        panic(err)
    }
    fmt.Printf("Downloading " + key)
    // Setup the local file
    fd, err := os.Create(file)
    if err != nil {
        panic(err)
    }

    defer fd.Close()

    // Download the file using the AWS SDK
    fmt.Printf("Downloading s3://%s/%s to %s...
", d.bucket, key, file)
    params := &s3.GetObjectInput{Bucket: &d.bucket, Key: &key}
    _, e := d.Download(fd, params)
    if e != nil {
        panic(e)
    }

}

I'm trying to download the log files from a particular bucket and eventually many buckets. I need the download to be as fast as possible because. There is lots of data. My question is what is the most effective way to download huge amounts of data quickly? The whole process is nil if those logs can't be downloaded at a reasonable speed. Is there a faster way, it's already concurrent according to amazons doc? Any ideas? Also, i've noticed a curious thing. It doesn't matter if I set the Concurrency to 1, 4, or 20. Everything is still downloading at ~.70 - ~/.80 gb / min

  • 写回答

0条回答 默认 最新

    报告相同问题?

    悬赏问题

    • ¥15 装 pytorch 的时候出了好多问题,遇到这种情况怎么处理?
    • ¥20 IOS游览器某宝手机网页版自动立即购买JavaScript脚本
    • ¥15 手机接入宽带网线,如何释放宽带全部速度
    • ¥30 关于#r语言#的问题:如何对R语言中mfgarch包中构建的garch-midas模型进行样本内长期波动率预测和样本外长期波动率预测
    • ¥15 ETLCloud 处理json多层级问题
    • ¥15 matlab中使用gurobi时报错
    • ¥15 这个主板怎么能扩出一两个sata口
    • ¥15 不是,这到底错哪儿了😭
    • ¥15 2020长安杯与连接网探
    • ¥15 关于#matlab#的问题:在模糊控制器中选出线路信息,在simulink中根据线路信息生成速度时间目标曲线(初速度为20m/s,15秒后减为0的速度时间图像)我想问线路信息是什么