Skip to content

Instantly share code, notes, and snippets.

@marcelaraujo
Forked from Integralist/Golang Essentials.md
Last active August 29, 2015 14:24
Show Gist options
  • Save marcelaraujo/1e70ddd583bd8547553f to your computer and use it in GitHub Desktop.
Save marcelaraujo/1e70ddd583bd8547553f to your computer and use it in GitHub Desktop.

Shell exports

  • export GOPATH=~/Code/golang
  • export PATH=~/Code/golang/bin:$PATH

Directory explanations

Within this workspace we have three root directories:

  • src: holds source code
  • pkg: holds compiled bits
  • bin: holds executables

Private repo access

go get uses https; so instead force it to use ssh:

git config --global url."[email protected]:".insteadOf "https://github.com/"

Note you can restrict it to a specific organisation as well:
git config --global url."[email protected]:foo/".insteadOf "https://github.com/foo/"

So when you want a private repository: [email protected]:foo/private.git

You can run:

go get github.com/foo/private

Guard (automatic go run)

Follow this guide (https://gist.github.com/Integralist/b675a263897680e02fbd) for using Guard to get real-time notifications for when changes occur in your Go programming files, and automatically trigger go run.

Spurious

If you need Spurious set-up then update the aws.config accordingly:

_dyn := dynamodb.New(&aws.Config{
    Region:     "eu-west-1",
    DisableSSL: true,
    Endpoint:   "dynamodb.spurious.localhost:32770", // change port number to appropriate value
})

_s3 := s3.New(&aws.Config{
    Region:           "eu-west-1",
    Endpoint:         "s3.spurious.localhost:32769", // change port number to appropriate value
    DisableSSL:       true,
    S3ForcePathStyle: true,
})

Note: remember to set the AWS environment variables in your shell so Dynamo can pick them up (all other spurious services are fine without them)

export AWS_ACCESS_KEY_ID=development_access; export AWS_SECRET_ACCESS_KEY=development_secret; go run application.go

To populate your Spurious set-up you can use Ruby like so: https://gist.github.com/Integralist/58b25f860773d8d2dd3f

AWS SDK with Go

In the below code we use go blocks for parallelising "copy" requests to S3, which is thread-safe because we're not mutating any values. But we can't quite get away with that inside the getS3Locations function as we need to mutate a slice (and that's not thread-safe) so we then use an interesting pattern where by we use channels to synchronise the data after the parallelisation.

Note: DynamoDB specifically is confusing.
Also, for printing Structs use: fmt.Printf("%+v", myStruct) (ensures the keys are included)

package main

import (
	"fmt"
	"github.com/awslabs/aws-sdk-go/aws"
	"github.com/awslabs/aws-sdk-go/service/dynamodb"
	"github.com/awslabs/aws-sdk-go/service/s3"
	"os"
	"strings"
	"sync"
)

func sequencerTableRecords(sequencer string) *dynamodb.ScanOutput {
	svc := dynamodb.New(&aws.Config{
		Region: "eu-west-1",
		DisableSSL: true,
		Endpoint:   "dynamodb.spurious.localhost:32791",
	})

	params := &dynamodb.ScanInput{
		TableName: aws.String(sequencer),
	}

	resp, err := svc.Scan(params)

	if awserr := aws.Error(err); awserr != nil {
		// A service error occurred.
		fmt.Println("Error:", awserr.Code, awserr.Message)
	} else if err != nil {
		// A non-service error occurred.
		panic(err)
	}

	return resp
}

func getComponentVersions(records *dynamodb.ScanOutput) map[string]string {
	components := make(map[string]string)

	for _, items := range records.Items {
		item := *items
		components[*item["key"].S] = *item["value"].N
	}

	return components
}

func getS3Locations(components map[string]string, s3Path string, lookup string) map[string]string {
	svc := dynamodb.New(&aws.Config{
		Region: "eu-west-1",
		DisableSSL: true,
		Endpoint:   "dynamodb.spurious.localhost:32791",
	})

	collectedLocations := []*dynamodb.QueryOutput{}

	c := make(chan *dynamodb.QueryOutput, len(components))
	done := make(chan int, len(components))
	locations := make(map[string]string)

	// Parallelise retrieval of data from DynamoDB
	for componentKey, componentVersion := range components {
		go func(componentKey, componentVersion string) {
			params := &dynamodb.QueryInput{
				TableName:      aws.String(lookup),
				ConsistentRead: aws.Boolean(true),
				Select:         aws.String("SPECIFIC_ATTRIBUTES"),
				AttributesToGet: []*string{
					aws.String("component_key"),
					aws.String("location"),
				},
				KeyConditions: &map[string]*dynamodb.Condition{
					"component_key": &dynamodb.Condition{
						ComparisonOperator: aws.String("EQ"),
						AttributeValueList: []*dynamodb.AttributeValue{
							&dynamodb.AttributeValue{
								S: aws.String(componentKey),
							},
						},
					},
					"batch_version": &dynamodb.Condition{
						ComparisonOperator: aws.String("EQ"),
						AttributeValueList: []*dynamodb.AttributeValue{
							&dynamodb.AttributeValue{
								N: aws.String(componentVersion),
							},
						},
					},
				},
			}

			resp, err := svc.Query(params)

			if awserr := aws.Error(err); awserr != nil {
				// A service error occurred.
				fmt.Println("Error:", awserr.Code, awserr.Message)
			} else if err != nil {
				// A non-service error occurred.
				panic(err)
			} else {
				c <- resp
				done <- 1
			}
		}(componentKey, componentVersion)
	}

	// Wait until all data is successfully collated from DynamoDB
	for i := len(components); i > 0; {
		select {
		case item := <-c:
			collectedLocations = append(collectedLocations, item)
		case <-done:
			i--
		}
	}

	for _, items := range collectedLocations {
		item := *items
		ref := *item.Items[0]
		componentLocation := s3Path + *ref["location"].S
		componentKey := extractComponentFromKey(*ref["component_key"].S)

		locations[componentKey] = componentLocation
	}

	return locations
}

func extractComponentFromKey(componentKey string) string {
	return strings.Split(componentKey, "/")[0]
}

func copyS3DataToNewLocation(event string, s3Bucket string, s3Locations map[string]string) {
	svc := s3.New(&aws.Config{
		Region: "eu-west-1",
		Endpoint:         "s3.spurious.localhost:32790",
		DisableSSL:       true,
		S3ForcePathStyle: true,
	})

	var wg sync.WaitGroup

	for component, location := range s3Locations {
		destination := "archive/" + event + "/" + component

		wg.Add(1)

		go func(location, destination string) {
			defer wg.Done()

			// fmt.Println(s3Bucket)
			// fmt.Println(s3Bucket + "/" + location)
			// fmt.Println(destination)

			params := &s3.CopyObjectInput{
				Bucket:     aws.String(s3Bucket),
				CopySource: aws.String(s3Bucket + "/" + location),
				Key:        aws.String(destination),
			}

			_, err := svc.CopyObject(params)

			if awserr := aws.Error(err); awserr != nil {
				// A service error occurred.
				fmt.Println("Error:", awserr.Code, awserr.Message)
			} else if err != nil {
				// A non-service error occurred.
				panic(err)
			}
		}(location, destination)
	}

	wg.Wait()
}

func main() {
	event := os.Args[1]
	s3Bucket := os.Args[2]
	s3Path := os.Args[3]
	sequencer := os.Args[4]
	lookup := os.Args[5]

	sequence_records := sequencerTableRecords(sequencer)
	components := getComponentVersions(sequence_records)
	s3Locations := getS3Locations(components, s3Path, lookup)

	copyS3DataToNewLocation(event, s3Bucket, s3Locations)
}

In above example there are API issues with DynamoDB - after about 6 requests a second the API errors. If you flatten out the requests so they are no longer running highly concurrently, then the speed of it slows down so badly that AWS Lambda (which is running the binary) times out. Meaning we need to do things differently... i.e. we need to request all S3 objects instead and partition/filter the unique values from that instead:

Note: S3 objects are listed alphabetically

func getS3ObjectSubset(bucket, source, marker string) *s3.ListObjectsOutput {
	svc := s3.New(&aws.Config{
		Region: "eu-west-1",
	})

	params := &s3.ListObjectsInput{
		Bucket: aws.String(bucket),
		Prefix: aws.String(source),
		Marker: aws.String(marker),
	}

	resp, err := svc.ListObjects(params)

	if awserr := aws.Error(err); awserr != nil {
		fmt.Println("Error:", awserr.Code, awserr.Message)
	} else if err != nil {
		panic(err)
	}

	return resp
}

func main() {
	bucket := os.Args[1] // some-bucket
	source := os.Args[2] // some/object/path/to/prefix
	marker := ""         // means to start off from the very first object (overwritten)

	var resp *s3.ListObjectsOutput

	processing := true

	collectedObjects := []*s3.ListObjectsOutput{}

	for processing {
		resp = getS3ObjectSubset(bucket, source, marker)
		collectedObjects = append(collectedObjects, resp)
		marker = *resp.Contents[len(resp.Contents)-1].Key

		if *resp.IsTruncated == false {
			processing = false
		}
	}

	for _, s3SubSet := range collectedObjects {
		for _, items := range s3SubSet.Contents {
			fmt.Println(*items.Key)
		}
	}
}

Compilation

One time only commands:

  • go get github.com/mitchell/gox
  • gox -build-toolchain

Compilation (example is for AWS Lambda usage where only a single binary is needed):

  • gox -os="linux"
  • rm name-of-go-file_linux_386 name-of-go-file_linux_arm
  • mv name-of-go-file_amd64 name-of-go-file

Dependencies with godeps

When running go get <dependency> locally, Go will stick the dependency in the folder defined by your $GOPATH variable. So when you build your code into a binary using go build <script> it'll bake the dependencies into the binary (i.e. the binary is statically linked).

But if someone pulls down your repo and tries to do a build they'll need to have a network connection to pull down the dependencies, as their $GOPATH might not have those dependencies yet (unless the user manually executes go get for each dependency required). Also the dependencies they subsequently pull down could be a more recent (and untested version) of each dependency.

So to make this situation better we can use http://godoc.org/github.com/tools/godep (https://github.com/tools/godep) which sticks all your dependencies within a Godeps folder inside your project directory. You can then use godep save -r to automatically update all your references to point to that local folder.

This way users who clone your repo don't need an internet connection to pull the dependencies, as they already have them. But also they'll have the correct versions of the dependencies. This acts like a Gemfile.lock as you would typically find in the Ruby world.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment