dongluanguang4774 2015-07-24 13:19
浏览 70
已采纳

使用Go在Google Cloud Storage中压缩文件夹的最佳方法?

My Google App Engine Go project creates a zip of multiple files in a "folder" that's in Google Cloud Storage. It use to be pretty quick when it was implemented in the BlobStore using the now deprecated and removed Files API. I recently converted the code to use Google Cloud Storage and performance is really bad and sometimes will timeout. The files that are being zipped are between 1K and 2M in size.

Looking for any advice to improve zipping up file contents. The code below is what I wrote for compressing multiple files in the cloud to a new zip file in the cloud. It can take a long time to execute and requires loading the entire contents (See PERFORMANCE ISSUE below) of each file into memory before writing it to the zip. There has to be a better way.

// Pack a folder into zip file
func (cloud *Cloud) Pack(srcFolder string, fileName string, contentType string, metaData *map[string]string) {

    log.Infof(cloud.c, "Packing bucket %v folder %v to file %v", cloud.bucket, srcFolder, fileName) 

    srcFolder = fmt.Sprintf("%v/", srcFolder)
    query := &storage.Query{Prefix: srcFolder, Delimiter: "/"}

    objs, err := storage.ListObjects(cloud.ctx, cloud.bucket, query)
    if err != nil {
        log.Errorf(cloud.c, "Packing failed to list bucket %q: %v", cloud.bucket, err)
        return
    }

    totalFiles := len(objs.Results)
    if totalFiles == 0 {
        log.Errorf(cloud.c, "Packing failed to find objects found in folder %q: %v", cloud.bucket, srcFolder)
        return
    }

    // create storage file for writing
    log.Infof(cloud.c, "Writing new zip file to %v/%v for %v files", cloud.bucket, fileName, totalFiles)
    storageWriter := storage.NewWriter(cloud.ctx, cloud.bucket, fileName)

    // add optional content type and meta data  
    if len(contentType) > 0 { storageWriter.ContentType = contentType }
    if metaData != nil { storageWriter.Metadata = *metaData }

    // Create a buffer to write our archive to.
    buf := new(bytes.Buffer)

    // Create a new zip archive to memory buffer
    zipWriter := zip.NewWriter(buf)

    // go through each file in the folder
    for _, obj := range objs.Results {

        log.Infof(cloud.c, "Packing file %v of size %v to zip file", obj.Name, obj.Size)
        //d.dumpStats(obj)

        // read file in our source folder from storage - io.ReadCloser returned from storage
        storageReader, err := storage.NewReader(cloud.ctx, cloud.bucket, obj.Name)
        if err != nil {
            log.Errorf(cloud.c, "Packing failed to read from bucket %q file %q: %v", cloud.bucket, obj.Name, err)
            return  
        }
        defer storageReader.Close()

        // PERFORMANCE ISSUE: have to load the entire file into memory to get random access from the cloud
        slurp, err := ioutil.ReadAll(storageReader)
        if err != nil {
            log.Errorf(cloud.c, "Packing failed to read data from bucket %q file %q: %v", cloud.bucket, obj.Name, err)
            return
        }

        // grab just the filename from directory listing (don't want to store paths in zip)
        _, zipFileName := filepath.Split(obj.Name)

        newFileName := strings.ToLower(zipFileName)

        // add filename to zip
        zipFile, err := zipWriter.Create(newFileName)
        if err != nil {
            log.Errorf(cloud.c, "Packing failed to create zip file from bucket %q file %q: %v", cloud.bucket, zipFileName, err)
            return
        }

        // write entire file into zip archive
        _, err = zipFile.Write(slurp)
        if err != nil {
            log.Errorf(cloud.c, "Packing failed to write zip file from bucket %q file %q: %v", cloud.bucket, zipFileName, err)
            return
        }

        // flush that to buffer so we can write it off now
        //err = zipFile.Flush()
        //if err != nil {
        //  d.errorf("pack: unable to flush write of zip file from bucket %q, file %q: %v", cloud.bucket, zipFileName, err)
        //  //return
        //}

        // now drain all that buffered zip data to the cloud storage file   
        log.Infof(cloud.c, "Writing zip buffer of size %v to cloud storage file %v", buf.Len(), fileName)   
        _, err = buf.WriteTo(storageWriter)
        if err != nil {
            log.Errorf(cloud.c, "Packing failed to write data to bucket %q file %q: %v", cloud.bucket, fileName, err)   
            return          
        }
    }

    // Make sure to check the error on Close.
    log.Infof(cloud.c, "Closing zip writer")    
    err = zipWriter.Close()
    if err != nil {
        log.Errorf(cloud.c, "Packing failed to close zip file writer from bucket %q file %q : %v", cloud.bucket, fileName, err)
    }

    // write any leftover data
    if buf.Len() > 0 {
        // now drain all that buffered zip data to the cloud storage file   
        // log.Infof(cloud.c, "Packing zip buffer of size %v to cloud storage file %v", buf.Len(), fileName)    
        _, err := buf.WriteTo(storageWriter)
        if err != nil {
            log.Errorf(cloud.c, "Packing failed to write data to bucket %q file %q: %v", cloud.bucket, fileName, err)               
        }
    }

    // close up final write file
    //log.Infof(cloud.c, "Closing cloud storage file %v", fileName) 
    if err := storageWriter.Close(); err != nil {
        log.Errorf(cloud.c, "Packing failed to close bucket %q file %q: %v", cloud.bucket, fileName, err)
        return
    }

    // success!
    log.Infof(cloud.c, "Packed files to new cloud storage file %v successful!", fileName)   
}
  • 写回答

1条回答 默认 最新

  • doufu7835 2015-07-29 18:00
    关注

    Thanks to Stephen who suggested not loading the file into memory buffers when writing to a zip. Here is the fixed code for reference:

    // Pack a folder into zip file
    func (cloud *Cloud) Pack(srcFolder string, fileName string, contentType string, metaData *map[string]string) bool {
    
        log.Infof(cloud.c, "Packing bucket %v folder %v to file %v", cloud.bucket, srcFolder, fileName) 
    
        srcFolder = fmt.Sprintf("%v/", srcFolder)
        query := &storage.Query{Prefix: srcFolder, Delimiter: "/"}
    
        objs, err := storage.ListObjects(cloud.ctx, cloud.bucket, query)
        if err != nil {
            log.Errorf(cloud.c, "Packing failed to list bucket %q: %v", cloud.bucket, err)
            return false
        }
    
        totalFiles := len(objs.Results)
        if totalFiles == 0 {
            log.Errorf(cloud.c, "Packing failed to find objects found in folder %q: %v", cloud.bucket, srcFolder)
            return false
        }
    
        // create storage file for writing
        log.Infof(cloud.c, "Writing new zip file to %v/%v for %v files", cloud.bucket, fileName, totalFiles)
        storageWriter := storage.NewWriter(cloud.ctx, cloud.bucket, fileName)
        defer storageWriter.Close()
    
        // add optional content type and meta data  
        if len(contentType) > 0 { storageWriter.ContentType = contentType }
        if metaData != nil { storageWriter.Metadata = *metaData }
    
        // Create a new zip archive to memory buffer
        zipWriter := zip.NewWriter(storageWriter)
    
        // go through each file in the folder
        for _, obj := range objs.Results {
    
            log.Infof(cloud.c, "Packing file %v of size %v to zip file", obj.Name, obj.Size)
            //d.dumpStats(obj)
    
            // read file in our source folder from storage - io.ReadCloser returned from storage
            storageReader, err := storage.NewReader(cloud.ctx, cloud.bucket, obj.Name)
            if err != nil {
                log.Errorf(cloud.c, "Packing failed to read from bucket %q file %q: %v", cloud.bucket, obj.Name, err)
                return false
            }
            defer storageReader.Close()
    
            // grab just the filename from directory listing (don't want to store paths in zip)
            _, zipFileName := filepath.Split(obj.Name)
            newFileName := strings.ToLower(zipFileName)
    
            // add filename to zip
            zipFile, err := zipWriter.Create(newFileName)
            if err != nil {
                log.Errorf(cloud.c, "Packing failed to create zip file from bucket %q file %q: %v", cloud.bucket, zipFileName, err)
                return false
            }
    
            // copy from storage reader to zip writer   
            _, err = io.Copy(zipFile, storageReader)
            if err != nil {
                log.Errorf(cloud.c, "Failed to copy from storage reader to zip file: %v", err)
                return false
            }   
        }
    
        // Make sure to check the error on Close.
        log.Infof(cloud.c, "Closing zip writer")    
        err = zipWriter.Close()
        if err != nil {
            log.Errorf(cloud.c, "Packing failed to close zip file writer from bucket %q file %q : %v", cloud.bucket, fileName, err)
            return false
        }
    
        // success!
        log.Infof(cloud.c, "Packed files to new cloud storage file %v successful!", fileName)   
        return true
    }
    
    本回答被题主选为最佳回答 , 对您是否有帮助呢?
    评论

报告相同问题?

悬赏问题

  • ¥15 前置放大电路与功率放大电路相连放大倍数出现问题
  • ¥30 关于<main>标签页面跳转的问题
  • ¥80 部署运行web自动化项目
  • ¥15 腾讯云如何建立同一个项目中物模型之间的联系
  • ¥30 VMware 云桌面水印如何添加
  • ¥15 用ns3仿真出5G核心网网元
  • ¥15 matlab答疑 关于海上风电的爬坡事件检测
  • ¥88 python部署量化回测异常问题
  • ¥30 酬劳2w元求合作写文章
  • ¥15 在现有系统基础上增加功能