I am downloading a large .zip file in parallel with Accept-Ranges and Goroutines. The application sends multiple requests to download 10MB chunks of a zip file from a URL using its Range header.
The requests get split up into different ranges as separate Goroutines and the data obtained is written into temp files. The files are named 1, 2, 3....
package main
import (
"bufio"
"fmt"
"io"
"io/ioutil"
"log"
"net/http"
"os"
"strconv"
"sync"
)
var wg sync.WaitGroup
func main() {
url := "https://path/to/large/zip/file/zipfile.zip"
res, _ := http.Head(url)
maps := res.Header
length, _ := strconv.Atoi(maps["Content-Length"][0]) // Get the content length from the header request
chunks := (length / (1024 * 1024 * 10)) + 1
// startByte and endByte determines the positions of the chunk that should be downloaded
var startByte = 0
var endByte = (1024 * 1024 * 10) - 1
//body := make([][]byte, chunks)
body := make([]io.ReadCloser, chunks)
for i := 0; i < chunks; i++ {
wg.Add(1)
go func(min int, max int, i int) {
client := &http.Client {}
req, _ := http.NewRequest("GET", url, nil)
rangeHeader := "bytes=" + strconv.Itoa(min) +"-" + strconv.Itoa(max)
fmt.Println(rangeHeader)
req.Header.Add("Range", rangeHeader)
resp,_ := client.Do(req)
defer resp.Body.Close()
reader, _ := ioutil.ReadAll(resp.Body)
body[i] = resp.Body
ioutil.WriteFile(strconv.Itoa(i), reader, 777) // Write to the file i as a byte array
wg.Done()
}(startByte, endByte, i)
startByte = endByte + 1
endByte += 1024 * 1024 * 10
}
wg.Wait()
filepath := "zipfile.zip"
// Create the file
_, err := os.Create(filepath)
if err != nil {
return
}
file, _ := os.OpenFile(filepath, os.O_APPEND|os.O_WRONLY, os.ModeAppend)
if err != nil {
log.Fatal(err)
}
for j := 0; j < chunks; j++ {
newFileChunk, err := os.Open(strconv.Itoa(j))
if err != nil {
log.Fatal(err)
}
defer newFileChunk.Close()
chunkInfo, err := newFileChunk.Stat()
if err != nil {
log.Fatal(err)
}
var chunkSize int64 = chunkInfo.Size()
chunkBufferBytes := make([]byte, chunkSize)
// read into chunkBufferBytes
reader := bufio.NewReader(newFileChunk)
_, err = reader.Read(chunkBufferBytes)
file.Write(chunkBufferBytes)
file.Sync() //flush to disk
chunkBufferBytes = nil // reset or empty our buffer
}
//Verify file size
filestats, err := file.Stat()
if err != nil {
log.Fatal(err)
return
}
actualFilesize := filestats.Size()
if actualFilesize != int64(length) {
log.Fatal("Actual Size: ", actualFilesize, " Expected: ", length)
return
}
file.Close()
}
After all the files are downloaded, I try to recombine them into one .zip file. However, when the files are put together, I can't unzip the final file, as it appears to be corrupted.
I would like to know what I am doing wrong, or if there's a better approach to this. Thanks in advance.
EDIT: Below is what gets logged to the console
bytes=0-10485759
bytes=10485760-20971519
2018/12/04 11:21:28 Actual Size: 16877828 Expected: 16877827