dongluanguang4774 2015-07-24 13:19
浏览 70
已采纳

使用Go在Google Cloud Storage中压缩文件夹的最佳方法?

My Google App Engine Go project creates a zip of multiple files in a "folder" that's in Google Cloud Storage. It use to be pretty quick when it was implemented in the BlobStore using the now deprecated and removed Files API. I recently converted the code to use Google Cloud Storage and performance is really bad and sometimes will timeout. The files that are being zipped are between 1K and 2M in size.

Looking for any advice to improve zipping up file contents. The code below is what I wrote for compressing multiple files in the cloud to a new zip file in the cloud. It can take a long time to execute and requires loading the entire contents (See PERFORMANCE ISSUE below) of each file into memory before writing it to the zip. There has to be a better way.

// Pack a folder into zip file
func (cloud *Cloud) Pack(srcFolder string, fileName string, contentType string, metaData *map[string]string) {

    log.Infof(cloud.c, "Packing bucket %v folder %v to file %v", cloud.bucket, srcFolder, fileName) 

    srcFolder = fmt.Sprintf("%v/", srcFolder)
    query := &storage.Query{Prefix: srcFolder, Delimiter: "/"}

    objs, err := storage.ListObjects(cloud.ctx, cloud.bucket, query)
    if err != nil {
        log.Errorf(cloud.c, "Packing failed to list bucket %q: %v", cloud.bucket, err)
        return
    }

    totalFiles := len(objs.Results)
    if totalFiles == 0 {
        log.Errorf(cloud.c, "Packing failed to find objects found in folder %q: %v", cloud.bucket, srcFolder)
        return
    }

    // create storage file for writing
    log.Infof(cloud.c, "Writing new zip file to %v/%v for %v files", cloud.bucket, fileName, totalFiles)
    storageWriter := storage.NewWriter(cloud.ctx, cloud.bucket, fileName)

    // add optional content type and meta data  
    if len(contentType) > 0 { storageWriter.ContentType = contentType }
    if metaData != nil { storageWriter.Metadata = *metaData }

    // Create a buffer to write our archive to.
    buf := new(bytes.Buffer)

    // Create a new zip archive to memory buffer
    zipWriter := zip.NewWriter(buf)

    // go through each file in the folder
    for _, obj := range objs.Results {

        log.Infof(cloud.c, "Packing file %v of size %v to zip file", obj.Name, obj.Size)
        //d.dumpStats(obj)

        // read file in our source folder from storage - io.ReadCloser returned from storage
        storageReader, err := storage.NewReader(cloud.ctx, cloud.bucket, obj.Name)
        if err != nil {
            log.Errorf(cloud.c, "Packing failed to read from bucket %q file %q: %v", cloud.bucket, obj.Name, err)
            return  
        }
        defer storageReader.Close()

        // PERFORMANCE ISSUE: have to load the entire file into memory to get random access from the cloud
        slurp, err := ioutil.ReadAll(storageReader)
        if err != nil {
            log.Errorf(cloud.c, "Packing failed to read data from bucket %q file %q: %v", cloud.bucket, obj.Name, err)
            return
        }

        // grab just the filename from directory listing (don't want to store paths in zip)
        _, zipFileName := filepath.Split(obj.Name)

        newFileName := strings.ToLower(zipFileName)

        // add filename to zip
        zipFile, err := zipWriter.Create(newFileName)
        if err != nil {
            log.Errorf(cloud.c, "Packing failed to create zip file from bucket %q file %q: %v", cloud.bucket, zipFileName, err)
            return
        }

        // write entire file into zip archive
        _, err = zipFile.Write(slurp)
        if err != nil {
            log.Errorf(cloud.c, "Packing failed to write zip file from bucket %q file %q: %v", cloud.bucket, zipFileName, err)
            return
        }

        // flush that to buffer so we can write it off now
        //err = zipFile.Flush()
        //if err != nil {
        //  d.errorf("pack: unable to flush write of zip file from bucket %q, file %q: %v", cloud.bucket, zipFileName, err)
        //  //return
        //}

        // now drain all that buffered zip data to the cloud storage file   
        log.Infof(cloud.c, "Writing zip buffer of size %v to cloud storage file %v", buf.Len(), fileName)   
        _, err = buf.WriteTo(storageWriter)
        if err != nil {
            log.Errorf(cloud.c, "Packing failed to write data to bucket %q file %q: %v", cloud.bucket, fileName, err)   
            return          
        }
    }

    // Make sure to check the error on Close.
    log.Infof(cloud.c, "Closing zip writer")    
    err = zipWriter.Close()
    if err != nil {
        log.Errorf(cloud.c, "Packing failed to close zip file writer from bucket %q file %q : %v", cloud.bucket, fileName, err)
    }

    // write any leftover data
    if buf.Len() > 0 {
        // now drain all that buffered zip data to the cloud storage file   
        // log.Infof(cloud.c, "Packing zip buffer of size %v to cloud storage file %v", buf.Len(), fileName)    
        _, err := buf.WriteTo(storageWriter)
        if err != nil {
            log.Errorf(cloud.c, "Packing failed to write data to bucket %q file %q: %v", cloud.bucket, fileName, err)               
        }
    }

    // close up final write file
    //log.Infof(cloud.c, "Closing cloud storage file %v", fileName) 
    if err := storageWriter.Close(); err != nil {
        log.Errorf(cloud.c, "Packing failed to close bucket %q file %q: %v", cloud.bucket, fileName, err)
        return
    }

    // success!
    log.Infof(cloud.c, "Packed files to new cloud storage file %v successful!", fileName)   
}
  • 写回答

1条回答 默认 最新

  • doufu7835 2015-07-29 18:00
    关注

    Thanks to Stephen who suggested not loading the file into memory buffers when writing to a zip. Here is the fixed code for reference:

    // Pack a folder into zip file
    func (cloud *Cloud) Pack(srcFolder string, fileName string, contentType string, metaData *map[string]string) bool {
    
        log.Infof(cloud.c, "Packing bucket %v folder %v to file %v", cloud.bucket, srcFolder, fileName) 
    
        srcFolder = fmt.Sprintf("%v/", srcFolder)
        query := &storage.Query{Prefix: srcFolder, Delimiter: "/"}
    
        objs, err := storage.ListObjects(cloud.ctx, cloud.bucket, query)
        if err != nil {
            log.Errorf(cloud.c, "Packing failed to list bucket %q: %v", cloud.bucket, err)
            return false
        }
    
        totalFiles := len(objs.Results)
        if totalFiles == 0 {
            log.Errorf(cloud.c, "Packing failed to find objects found in folder %q: %v", cloud.bucket, srcFolder)
            return false
        }
    
        // create storage file for writing
        log.Infof(cloud.c, "Writing new zip file to %v/%v for %v files", cloud.bucket, fileName, totalFiles)
        storageWriter := storage.NewWriter(cloud.ctx, cloud.bucket, fileName)
        defer storageWriter.Close()
    
        // add optional content type and meta data  
        if len(contentType) > 0 { storageWriter.ContentType = contentType }
        if metaData != nil { storageWriter.Metadata = *metaData }
    
        // Create a new zip archive to memory buffer
        zipWriter := zip.NewWriter(storageWriter)
    
        // go through each file in the folder
        for _, obj := range objs.Results {
    
            log.Infof(cloud.c, "Packing file %v of size %v to zip file", obj.Name, obj.Size)
            //d.dumpStats(obj)
    
            // read file in our source folder from storage - io.ReadCloser returned from storage
            storageReader, err := storage.NewReader(cloud.ctx, cloud.bucket, obj.Name)
            if err != nil {
                log.Errorf(cloud.c, "Packing failed to read from bucket %q file %q: %v", cloud.bucket, obj.Name, err)
                return false
            }
            defer storageReader.Close()
    
            // grab just the filename from directory listing (don't want to store paths in zip)
            _, zipFileName := filepath.Split(obj.Name)
            newFileName := strings.ToLower(zipFileName)
    
            // add filename to zip
            zipFile, err := zipWriter.Create(newFileName)
            if err != nil {
                log.Errorf(cloud.c, "Packing failed to create zip file from bucket %q file %q: %v", cloud.bucket, zipFileName, err)
                return false
            }
    
            // copy from storage reader to zip writer   
            _, err = io.Copy(zipFile, storageReader)
            if err != nil {
                log.Errorf(cloud.c, "Failed to copy from storage reader to zip file: %v", err)
                return false
            }   
        }
    
        // Make sure to check the error on Close.
        log.Infof(cloud.c, "Closing zip writer")    
        err = zipWriter.Close()
        if err != nil {
            log.Errorf(cloud.c, "Packing failed to close zip file writer from bucket %q file %q : %v", cloud.bucket, fileName, err)
            return false
        }
    
        // success!
        log.Infof(cloud.c, "Packed files to new cloud storage file %v successful!", fileName)   
        return true
    }
    
    本回答被题主选为最佳回答 , 对您是否有帮助呢?
    评论

报告相同问题?

悬赏问题

  • ¥15 echarts动画效果失效的问题。官网下载的例子。
  • ¥60 许可证msc licensing软件报错显示已有相同版本软件,但是下一步显示无法读取日志目录。
  • ¥15 Attention is all you need 的代码运行
  • ¥15 一个服务器已经有一个系统了如果用usb再装一个系统,原来的系统会被覆盖掉吗
  • ¥15 使用esm_msa1_t12_100M_UR50S蛋白质语言模型进行零样本预测时,终端显示出了sequence handled的进度条,但是并不出结果就自动终止回到命令提示行了是怎么回事:
  • ¥15 前置放大电路与功率放大电路相连放大倍数出现问题
  • ¥30 关于<main>标签页面跳转的问题
  • ¥80 部署运行web自动化项目
  • ¥15 腾讯云如何建立同一个项目中物模型之间的联系
  • ¥30 VMware 云桌面水印如何添加