- Shell exports
- Directory explanations
- Private repo access
- Guard (automatic go run)
- Spurious
- AWS SDK with Go
- Compilation
- Dependencies with godeps
export GOPATH=~/Code/golang
export PATH=~/Code/golang/bin:$PATH
Within this workspace we have three root directories:
src
: holds source codepkg
: holds compiled bitsbin
: holds executables
go get
uses https; so instead force it to use ssh:
git config --global url."[email protected]:".insteadOf "https://github.com/"
Note you can restrict it to a specific organisation as well:
git config --global url."[email protected]:foo/".insteadOf "https://github.com/foo/"
So when you want a private repository: [email protected]:foo/private.git
You can run:
go get github.com/foo/private
Follow this guide (https://gist.github.com/Integralist/b675a263897680e02fbd) for using Guard to get real-time notifications for when changes occur in your Go programming files, and automatically trigger go run
.
If you need Spurious set-up then update the aws.config
accordingly:
_dyn := dynamodb.New(&aws.Config{
Region: "eu-west-1",
DisableSSL: true,
Endpoint: "dynamodb.spurious.localhost:32770", // change port number to appropriate value
})
_s3 := s3.New(&aws.Config{
Region: "eu-west-1",
Endpoint: "s3.spurious.localhost:32769", // change port number to appropriate value
DisableSSL: true,
S3ForcePathStyle: true,
})
Note: remember to set the AWS environment variables in your shell so Dynamo can pick them up (all other spurious services are fine without them)
export AWS_ACCESS_KEY_ID=development_access; export AWS_SECRET_ACCESS_KEY=development_secret; go run application.go
To populate your Spurious set-up you can use Ruby like so: https://gist.github.com/Integralist/58b25f860773d8d2dd3f
In the below code we use go
blocks for parallelising "copy" requests to S3, which is thread-safe because we're not mutating any values. But we can't quite get away with that inside the getS3Locations
function as we need to mutate a slice (and that's not thread-safe) so we then use an interesting pattern where by we use channels to synchronise the data after the parallelisation.
Note: DynamoDB specifically is confusing.
Also, for printing Structs use:fmt.Printf("%+v", myStruct)
(ensures the keys are included)
package main
import (
"fmt"
"github.com/awslabs/aws-sdk-go/aws"
"github.com/awslabs/aws-sdk-go/service/dynamodb"
"github.com/awslabs/aws-sdk-go/service/s3"
"os"
"strings"
"sync"
)
func sequencerTableRecords(sequencer string) *dynamodb.ScanOutput {
svc := dynamodb.New(&aws.Config{
Region: "eu-west-1",
DisableSSL: true,
Endpoint: "dynamodb.spurious.localhost:32791",
})
params := &dynamodb.ScanInput{
TableName: aws.String(sequencer),
}
resp, err := svc.Scan(params)
if awserr := aws.Error(err); awserr != nil {
// A service error occurred.
fmt.Println("Error:", awserr.Code, awserr.Message)
} else if err != nil {
// A non-service error occurred.
panic(err)
}
return resp
}
func getComponentVersions(records *dynamodb.ScanOutput) map[string]string {
components := make(map[string]string)
for _, items := range records.Items {
item := *items
components[*item["key"].S] = *item["value"].N
}
return components
}
func getS3Locations(components map[string]string, s3Path string, lookup string) map[string]string {
svc := dynamodb.New(&aws.Config{
Region: "eu-west-1",
DisableSSL: true,
Endpoint: "dynamodb.spurious.localhost:32791",
})
collectedLocations := []*dynamodb.QueryOutput{}
c := make(chan *dynamodb.QueryOutput, len(components))
done := make(chan int, len(components))
locations := make(map[string]string)
// Parallelise retrieval of data from DynamoDB
for componentKey, componentVersion := range components {
go func(componentKey, componentVersion string) {
params := &dynamodb.QueryInput{
TableName: aws.String(lookup),
ConsistentRead: aws.Boolean(true),
Select: aws.String("SPECIFIC_ATTRIBUTES"),
AttributesToGet: []*string{
aws.String("component_key"),
aws.String("location"),
},
KeyConditions: &map[string]*dynamodb.Condition{
"component_key": &dynamodb.Condition{
ComparisonOperator: aws.String("EQ"),
AttributeValueList: []*dynamodb.AttributeValue{
&dynamodb.AttributeValue{
S: aws.String(componentKey),
},
},
},
"batch_version": &dynamodb.Condition{
ComparisonOperator: aws.String("EQ"),
AttributeValueList: []*dynamodb.AttributeValue{
&dynamodb.AttributeValue{
N: aws.String(componentVersion),
},
},
},
},
}
resp, err := svc.Query(params)
if awserr := aws.Error(err); awserr != nil {
// A service error occurred.
fmt.Println("Error:", awserr.Code, awserr.Message)
} else if err != nil {
// A non-service error occurred.
panic(err)
} else {
c <- resp
done <- 1
}
}(componentKey, componentVersion)
}
// Wait until all data is successfully collated from DynamoDB
for i := len(components); i > 0; {
select {
case item := <-c:
collectedLocations = append(collectedLocations, item)
case <-done:
i--
}
}
for _, items := range collectedLocations {
item := *items
ref := *item.Items[0]
componentLocation := s3Path + *ref["location"].S
componentKey := extractComponentFromKey(*ref["component_key"].S)
locations[componentKey] = componentLocation
}
return locations
}
func extractComponentFromKey(componentKey string) string {
return strings.Split(componentKey, "/")[0]
}
func copyS3DataToNewLocation(event string, s3Bucket string, s3Locations map[string]string) {
svc := s3.New(&aws.Config{
Region: "eu-west-1",
Endpoint: "s3.spurious.localhost:32790",
DisableSSL: true,
S3ForcePathStyle: true,
})
var wg sync.WaitGroup
for component, location := range s3Locations {
destination := "archive/" + event + "/" + component
wg.Add(1)
go func(location, destination string) {
defer wg.Done()
// fmt.Println(s3Bucket)
// fmt.Println(s3Bucket + "/" + location)
// fmt.Println(destination)
params := &s3.CopyObjectInput{
Bucket: aws.String(s3Bucket),
CopySource: aws.String(s3Bucket + "/" + location),
Key: aws.String(destination),
}
_, err := svc.CopyObject(params)
if awserr := aws.Error(err); awserr != nil {
// A service error occurred.
fmt.Println("Error:", awserr.Code, awserr.Message)
} else if err != nil {
// A non-service error occurred.
panic(err)
}
}(location, destination)
}
wg.Wait()
}
func main() {
event := os.Args[1]
s3Bucket := os.Args[2]
s3Path := os.Args[3]
sequencer := os.Args[4]
lookup := os.Args[5]
sequence_records := sequencerTableRecords(sequencer)
components := getComponentVersions(sequence_records)
s3Locations := getS3Locations(components, s3Path, lookup)
copyS3DataToNewLocation(event, s3Bucket, s3Locations)
}
In above example there are API issues with DynamoDB - after about 6 requests a second the API errors. If you flatten out the requests so they are no longer running highly concurrently, then the speed of it slows down so badly that AWS Lambda (which is running the binary) times out. Meaning we need to do things differently... i.e. we need to request all S3 objects instead and partition/filter the unique values from that instead:
Note: S3 objects are listed alphabetically
func getS3ObjectSubset(bucket, source, marker string) *s3.ListObjectsOutput {
svc := s3.New(&aws.Config{
Region: "eu-west-1",
})
params := &s3.ListObjectsInput{
Bucket: aws.String(bucket),
Prefix: aws.String(source),
Marker: aws.String(marker),
}
resp, err := svc.ListObjects(params)
if awserr := aws.Error(err); awserr != nil {
fmt.Println("Error:", awserr.Code, awserr.Message)
} else if err != nil {
panic(err)
}
return resp
}
func main() {
bucket := os.Args[1] // some-bucket
source := os.Args[2] // some/object/path/to/prefix
marker := "" // means to start off from the very first object (overwritten)
var resp *s3.ListObjectsOutput
processing := true
collectedObjects := []*s3.ListObjectsOutput{}
for processing {
resp = getS3ObjectSubset(bucket, source, marker)
collectedObjects = append(collectedObjects, resp)
marker = *resp.Contents[len(resp.Contents)-1].Key
if *resp.IsTruncated == false {
processing = false
}
}
for _, s3SubSet := range collectedObjects {
for _, items := range s3SubSet.Contents {
fmt.Println(*items.Key)
}
}
}
One time only commands:
go get github.com/mitchell/gox
gox -build-toolchain
Compilation (example is for AWS Lambda usage where only a single binary is needed):
gox -os="linux"
rm name-of-go-file_linux_386 name-of-go-file_linux_arm
mv name-of-go-file_amd64 name-of-go-file
When running go get <dependency>
locally, Go will stick the dependency in the folder defined by your $GOPATH
variable. So when you build your code into a binary using go build <script>
it'll bake the dependencies into the binary (i.e. the binary is statically linked).
But if someone pulls down your repo and tries to do a build they'll need to have a network connection to pull down the dependencies, as their $GOPATH
might not have those dependencies yet (unless the user manually executes go get
for each dependency required). Also the dependencies they subsequently pull down could be a more recent (and untested version) of each dependency.
So to make this situation better we can use http://godoc.org/github.com/tools/godep (https://github.com/tools/godep) which sticks all your dependencies within a Godeps
folder inside your project directory. You can then use godep save -r
to automatically update all your references to point to that local folder.
This way users who clone your repo don't need an internet connection to pull the dependencies, as they already have them. But also they'll have the correct versions of the dependencies. This acts like a Gemfile.lock
as you would typically find in the Ruby world.