Skip to content

Instantly share code, notes, and snippets.

@howeyc
Created November 14, 2013 22:53
Show Gist options
  • Save howeyc/7475785 to your computer and use it in GitHub Desktop.
Save howeyc/7475785 to your computer and use it in GitHub Desktop.
Download an s3 bucket.
// Download a public s3 bucket.
//
// Relies on the directory listing being public.
//
// Example usage:
// s3dl -bucket <bucketName>
//
// -> Will download all files in the "<bucketName>" s3 bucket
// to "<bucketName>" folder.
package main
import (
"crypto/md5"
"encoding/xml"
"flag"
"fmt"
"io"
"log"
"net/http"
"net/url"
"os"
"path/filepath"
"strconv"
"strings"
)
type Content struct {
Key string
Md5Sum string `xml:"ETag"`
Size int64
}
type Result struct {
Contents []Content
IsTruncated bool
}
func main() {
var bucketName string
flag.StringVar(&bucketName, "bucket", "", "bucket to download")
flag.Parse()
bucketUrl := url.URL{Scheme: "https", Host: bucketName + ".s3.amazonaws.com"}
// Request for list of files
// (Possibly requiring multiple requests if bucket has lots of items)
var result Result
for {
reqUrl := bucketUrl
// If we already recieved a response,
// continue from the end of list already received.
if len(result.Contents) > 0 {
reqValues := reqUrl.Query()
reqValues.Set("marker", result.Contents[len(result.Contents)-1].Key)
reqUrl.RawQuery = reqValues.Encode()
}
res, err := http.Get(reqUrl.String())
if err != nil {
log.Fatal(err)
}
xmlDecode := xml.NewDecoder(res.Body)
xmlDecode.Decode(&result)
res.Body.Close()
// Received last of the list
if !result.IsTruncated {
break
}
}
// Download all the files to an output directory with bucket name
downloadContents(bucketUrl, bucketName, result.Contents)
}
func downloadContents(bucketUrl url.URL, bucketName string, contents []Content) {
sum := md5.New()
for _, content := range contents {
fmt.Print("Downloading ", content.Key, "... ")
dir, file := filepath.Split(content.Key)
outDir := filepath.Join(bucketName, dir)
os.MkdirAll(outDir, 0777)
if file != "" {
fileUrl := bucketUrl
fileUrl.Path = "/" + content.Key
resFile, resErr := http.Get(fileUrl.String())
if resErr == nil {
lfile, ecreate := os.Create(filepath.Join(outDir, file))
if ecreate != nil {
fmt.Println(ecreate)
continue
}
sum.Reset()
ofile := io.MultiWriter(lfile, sum)
io.Copy(ofile, resFile.Body)
resFile.Body.Close()
lfile.Close()
// Unquote ETag field
content.Md5Sum, _ = strconv.Unquote(content.Md5Sum) // Remove Quotes
// Look for (and convert) block-size type of Md5
// Assumes only one block if finds this type of md5.
if dashIdx := strings.LastIndex(content.Md5Sum, "-"); dashIdx > 0 {
md5sum := sum.Sum(nil)
sum.Reset()
sum.Write(md5sum)
content.Md5Sum = content.Md5Sum[:dashIdx]
}
// Check Size and Md5
if content.Size != resFile.ContentLength {
fmt.Print("Invalid Size!")
} else if content.Md5Sum != fmt.Sprintf("%x", sum.Sum(nil)) {
fmt.Print("Invalid Md5Sum!")
} else {
fmt.Print("Complete.")
}
fmt.Println()
} else {
fmt.Print(resErr)
}
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment