Created
October 24, 2016 20:06
-
-
Save PaulCapestany/3b559a86e88bd4ae6dd904038c66953e to your computer and use it in GitHub Desktop.
s3 log parsing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package main | |
import ( | |
"bufio" | |
"flag" | |
"fmt" | |
"io" | |
"log" | |
"os" | |
"regexp" | |
"time" | |
) | |
type Log struct { | |
BucketOwner string | |
Bucket string | |
Time string | |
RemoteIP string | |
Requester string | |
RequestID string | |
Operation string | |
Key string | |
RequestURI string | |
HTTPstatus string | |
ErrorCode string | |
BytesSent string | |
ObjectSize string | |
TotalTime string | |
TurnAroundTime string | |
Referrer string | |
UserAgent string | |
VersionId string | |
} | |
func newLogLine(regexString []string) *Log { | |
logLine := new(Log) | |
logLine.BucketOwner = regexString[1] | |
logLine.Bucket = regexString[2] | |
tmp, _ := time.Parse("02/Jan/2006:15:04:05 -0700", regexString[3]) | |
logLine.Time = tmp.Format("2006-01-02 15:04:05.000000") | |
logLine.RemoteIP = regexString[4] | |
logLine.Requester = regexString[5] | |
logLine.RequestID = regexString[6] | |
logLine.Operation = regexString[7] | |
logLine.Key = regexString[8] | |
logLine.RequestURI = regexString[9] | |
logLine.HTTPstatus = regexString[10] | |
logLine.ErrorCode = regexString[11] | |
logLine.BytesSent = regexString[12] | |
logLine.ObjectSize = regexString[13] | |
logLine.TotalTime = regexString[14] | |
logLine.TurnAroundTime = regexString[15] | |
logLine.Referrer = regexString[16] | |
logLine.UserAgent = regexString[17] | |
logLine.VersionId = regexString[18] | |
return logLine | |
} | |
func parseLogLine(line string) { | |
re := regexp.MustCompile(`(\S+) (\S+) \[(.*?)\] (\S+) (\S+) (\S+) (\S+) (\S+) "([^"]+)" (\S+) (\S+) (\S+) (\S+) (\S+) (\S+) "([^"]+)" "([^"]+)" (\S)`) | |
matches := re.FindAllStringSubmatch(line, -1) | |
for i := 0; i < len(matches); i++ { | |
tmp := newLogLine(matches[i]) | |
fmt.Printf("%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n", tmp.BucketOwner, tmp.Bucket, tmp.Time, tmp.RemoteIP, tmp.Requester, tmp.RequestID, tmp.Operation, tmp.Key, tmp.RequestURI, tmp.HTTPstatus, tmp.ErrorCode, tmp.BytesSent, tmp.ObjectSize, tmp.TotalTime, tmp.Referrer, tmp.UserAgent, tmp.VersionId) | |
} | |
} | |
func getSliceFromTextFile(fileName string) []string { | |
f, _ := os.Open(fileName) | |
defer f.Close() | |
bf := bufio.NewReader(f) | |
// initialize slice to hold all items | |
itemsSlice := []string{} | |
for { | |
line, isPrefix, err := bf.ReadLine() | |
if err == io.EOF { | |
break | |
} | |
if err != nil { | |
log.Fatal(err) | |
} | |
if isPrefix { | |
log.Fatal("Error: Unexpected long line reading", f.Name()) | |
} | |
itemsSlice = append(itemsSlice, string(line)) | |
} | |
return itemsSlice | |
} | |
func main() { | |
flag.Parse() | |
args := flag.Args() | |
filename := args[0] | |
fmt.Printf("BucketOwner\tBucket\tTime\tRemoteIP\tRequester\tRequestID\tOperation\tKey\tRequestURI\tHTTPstatus\tErrorCode\tBytesSent\tObjectSize\tTotalTime\tReferrer\tUserAgent\tVersionId\n") | |
lines := getSliceFromTextFile(filename) | |
for _, line := range lines { | |
parseLogLine(line) | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment