My Google App Engine Go project creates a zip of multiple files in a "folder" that's in Google Cloud Storage. It use to be pretty quick when it was implemented in the BlobStore using the now deprecated and removed Files API. I recently converted the code to use Google Cloud Storage and performance is really bad and sometimes will timeout. The files that are being zipped are between 1K and 2M in size.
Looking for any advice to improve zipping up file contents. The code below is what I wrote for compressing multiple files in the cloud to a new zip file in the cloud. It can take a long time to execute and requires loading the entire contents (See PERFORMANCE ISSUE below) of each file into memory before writing it to the zip. There has to be a better way.
// Pack a folder into zip file
func (cloud *Cloud) Pack(srcFolder string, fileName string, contentType string, metaData *map[string]string) {
log.Infof(cloud.c, "Packing bucket %v folder %v to file %v", cloud.bucket, srcFolder, fileName)
srcFolder = fmt.Sprintf("%v/", srcFolder)
query := &storage.Query{Prefix: srcFolder, Delimiter: "/"}
objs, err := storage.ListObjects(cloud.ctx, cloud.bucket, query)
if err != nil {
log.Errorf(cloud.c, "Packing failed to list bucket %q: %v", cloud.bucket, err)
return
}
totalFiles := len(objs.Results)
if totalFiles == 0 {
log.Errorf(cloud.c, "Packing failed to find objects found in folder %q: %v", cloud.bucket, srcFolder)
return
}
// create storage file for writing
log.Infof(cloud.c, "Writing new zip file to %v/%v for %v files", cloud.bucket, fileName, totalFiles)
storageWriter := storage.NewWriter(cloud.ctx, cloud.bucket, fileName)
// add optional content type and meta data
if len(contentType) > 0 { storageWriter.ContentType = contentType }
if metaData != nil { storageWriter.Metadata = *metaData }
// Create a buffer to write our archive to.
buf := new(bytes.Buffer)
// Create a new zip archive to memory buffer
zipWriter := zip.NewWriter(buf)
// go through each file in the folder
for _, obj := range objs.Results {
log.Infof(cloud.c, "Packing file %v of size %v to zip file", obj.Name, obj.Size)
//d.dumpStats(obj)
// read file in our source folder from storage - io.ReadCloser returned from storage
storageReader, err := storage.NewReader(cloud.ctx, cloud.bucket, obj.Name)
if err != nil {
log.Errorf(cloud.c, "Packing failed to read from bucket %q file %q: %v", cloud.bucket, obj.Name, err)
return
}
defer storageReader.Close()
// PERFORMANCE ISSUE: have to load the entire file into memory to get random access from the cloud
slurp, err := ioutil.ReadAll(storageReader)
if err != nil {
log.Errorf(cloud.c, "Packing failed to read data from bucket %q file %q: %v", cloud.bucket, obj.Name, err)
return
}
// grab just the filename from directory listing (don't want to store paths in zip)
_, zipFileName := filepath.Split(obj.Name)
newFileName := strings.ToLower(zipFileName)
// add filename to zip
zipFile, err := zipWriter.Create(newFileName)
if err != nil {
log.Errorf(cloud.c, "Packing failed to create zip file from bucket %q file %q: %v", cloud.bucket, zipFileName, err)
return
}
// write entire file into zip archive
_, err = zipFile.Write(slurp)
if err != nil {
log.Errorf(cloud.c, "Packing failed to write zip file from bucket %q file %q: %v", cloud.bucket, zipFileName, err)
return
}
// flush that to buffer so we can write it off now
//err = zipFile.Flush()
//if err != nil {
// d.errorf("pack: unable to flush write of zip file from bucket %q, file %q: %v", cloud.bucket, zipFileName, err)
// //return
//}
// now drain all that buffered zip data to the cloud storage file
log.Infof(cloud.c, "Writing zip buffer of size %v to cloud storage file %v", buf.Len(), fileName)
_, err = buf.WriteTo(storageWriter)
if err != nil {
log.Errorf(cloud.c, "Packing failed to write data to bucket %q file %q: %v", cloud.bucket, fileName, err)
return
}
}
// Make sure to check the error on Close.
log.Infof(cloud.c, "Closing zip writer")
err = zipWriter.Close()
if err != nil {
log.Errorf(cloud.c, "Packing failed to close zip file writer from bucket %q file %q : %v", cloud.bucket, fileName, err)
}
// write any leftover data
if buf.Len() > 0 {
// now drain all that buffered zip data to the cloud storage file
// log.Infof(cloud.c, "Packing zip buffer of size %v to cloud storage file %v", buf.Len(), fileName)
_, err := buf.WriteTo(storageWriter)
if err != nil {
log.Errorf(cloud.c, "Packing failed to write data to bucket %q file %q: %v", cloud.bucket, fileName, err)
}
}
// close up final write file
//log.Infof(cloud.c, "Closing cloud storage file %v", fileName)
if err := storageWriter.Close(); err != nil {
log.Errorf(cloud.c, "Packing failed to close bucket %q file %q: %v", cloud.bucket, fileName, err)
return
}
// success!
log.Infof(cloud.c, "Packed files to new cloud storage file %v successful!", fileName)
}