I'm going to start with showing the code and then what I'm trying to do, code:
package main
import (
"fmt"
"os"
"path/filepath"
"github.com/aws/aws-sdk-go/aws"
"github.com/aws/aws-sdk-go/aws/session"
"github.com/aws/aws-sdk-go/service/s3"
"github.com/aws/aws-sdk-go/service/s3/s3manager"
)
var (
// empty strings for security reasons
Bucket = "" // Download from this bucket
Prefix = "" // Using this key prefix
LocalDirectory = "s3logs" // Into this directory
)
func main() {
sess := session.New()
client := s3.New(sess, &aws.Config{Region: aws.String("us-west-1")})
params := &s3.ListObjectsInput{Bucket: &Bucket, Prefix: &Prefix}
manager := s3manager.NewDownloaderWithClient(client, func(d *s3manager.Downloader) {
d.PartSize = 64 * 1024 * 1024 // 64MB per part
d.Concurrency = 8
}) // works
//manager := s3manager.NewDownloaderWithClient(client) //works
d := downloader{bucket: Bucket, dir: LocalDirectory, Downloader: manager}
client.ListObjectsPages(params, d.eachPage)
}
type downloader struct {
*s3manager.Downloader
bucket, dir string
}
func (d *downloader) eachPage(page *s3.ListObjectsOutput, more bool) bool {
for _, obj := range page.Contents {
d.downloadToFile(*obj.Key)
}
return true
}
func (d *downloader) downloadToFile(key string) {
// Create the directories in the path
file := filepath.Join(d.dir, key)
if err := os.MkdirAll(filepath.Dir(file), 0775); err != nil {
panic(err)
}
fmt.Printf("Downloading " + key)
// Setup the local file
fd, err := os.Create(file)
if err != nil {
panic(err)
}
defer fd.Close()
// Download the file using the AWS SDK
fmt.Printf("Downloading s3://%s/%s to %s...
", d.bucket, key, file)
params := &s3.GetObjectInput{Bucket: &d.bucket, Key: &key}
_, e := d.Download(fd, params)
if e != nil {
panic(e)
}
}
I'm trying to download the log files from a particular bucket and eventually many buckets. I need the download to be as fast as possible because. There is lots of data. My question is what is the most effective way to download huge amounts of data quickly? The whole process is nil if those logs can't be downloaded at a reasonable speed. Is there a faster way, it's already concurrent according to amazons doc? Any ideas? Also, i've noticed a curious thing. It doesn't matter if I set the Concurrency to 1, 4, or 20. Everything is still downloading at ~.70 - ~/.80 gb / min