I've started working on a cache to put into our data center for S3 objects that get frequently pulled from various S3 servers. Some of them are in the GB size and many servers are requesting the same objects. So to improve performance, this cache is needed.
However, unlike other S3 caches I've seen, I don't need the s3 authentication part. It's already included in the headers of the requests from my client.
So the plan is to parse the request, check if the object exists completely locally, or is currently being fetched. And when complete, return the result. Objects are conveniently hashed. So if it's contents changes it's a new hash and a new object.
The requests already contain everything needed to authenticate with S3. But where I'm slightly stuck is I'm not familiar with HTTP's proxy Connect protocol. I modified some example beginning code I found to not use Hijack so I could interpret what is going on and make decisions on whether I need to in fact get the upstream object. But it's not working. The client spits out:
Get https://example.com: tls: first record does not look like a TLS handshake
hmmm.
Here is the proxy listening on HTTP (it'll be behind a firewall so I have made it deliberately insecure for simplicity).
package main
import (
"crypto/tls"
"io"
"log"
"net"
"net/http"
"time"
)
func handleTunneling(w http.ResponseWriter, r *http.Request) {
dest_conn, err := net.DialTimeout("tcp", r.Host, 10*time.Second)
if err != nil {
http.Error(w, err.Error(), http.StatusServiceUnavailable)
return
}
w.WriteHeader(http.StatusOK)
hijacker, ok := w.(http.Hijacker)
if !ok {
http.Error(w, "Hijacking not supported", http.StatusInternalServerError)
return
}
client_conn, _, err := hijacker.Hijack()
if err != nil {
http.Error(w, err.Error(), http.StatusServiceUnavailable)
}
go transfer(dest_conn, client_conn)
go transfer(client_conn, dest_conn)
}
func transfer(destination io.WriteCloser, source io.ReadCloser) {
defer destination.Close()
defer source.Close()
io.Copy(destination, source)
}
func handleHTTP(w http.ResponseWriter, req *http.Request) {
resp, err := http.DefaultTransport.RoundTrip(req)
if err != nil {
http.Error(w, err.Error(), http.StatusServiceUnavailable)
return
}
defer resp.Body.Close()
log.Println(req.RemoteAddr, " ", resp.Status)
copyHeader(w.Header(), resp.Header)
w.WriteHeader(resp.StatusCode)
io.Copy(w, resp.Body)
}
func copyHeader(dst, src http.Header) {
for k, vv := range src {
for _, v := range vv {
dst.Add(k, v)
}
}
}
func main() {
server := &http.Server{
Addr: ":8080",
Handler: http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if r.Method == http.MethodConnect {
handleTunneling(w, r)
} else {
handleHTTP(w, r)
}
}),
// Disable HTTP/2.
TLSNextProto: make(map[string]func(*http.Server, *tls.Conn, http.Handler)),
}
log.Fatal(server.ListenAndServe())
}
The client looks like this:
package main
import (
"net/http"
"fmt"
"io/ioutil"
"os"
"net/url"
)
func main() {
args := os.Args[1:]
var whereTo string
if len(args) > 0 {
whereTo = args[0]
} else {
fmt.Println("Usage: htclient [url]")
os.Exit(1)
}
proxyUrl, err := url.Parse("http://localhost:8080")
client := &http.Client{Transport: &http.Transport{Proxy: http.ProxyURL(proxyUrl)}}
req, err := http.NewRequest("GET", whereTo, nil)
response, err := client.Do(req)
if err != nil {
fmt.Printf("%s", err)
os.Exit(1)
} else {
defer response.Body.Close()
contents, err := ioutil.ReadAll(response.Body)
if err != nil {
fmt.Printf("%s", err)
os.Exit(1)
}
fmt.Printf("%s
", string(contents))
}
}
Suggestions?