Skip to content

Instantly share code, notes, and snippets.

@ernestom
Last active December 10, 2015 16:18
Show Gist options
  • Save ernestom/4460051 to your computer and use it in GitHub Desktop.
Save ernestom/4460051 to your computer and use it in GitHub Desktop.
Script para llenar una tabla de mysql con los logs de Akamai para TE.
#!/bin/bash
FINDPATH="/Users/ernesto/Desktop/te/logs/";
FINDSEARCH="www_171150.esclf_waf*";
DB="test"
TABLE="te_logs_www";
TMPFILE="/tmp/uwslog_www.txt"
LOGFILE="/tmp/parsed_www.log"
for file in $(find $FINDPATH -name $FINDSEARCH ! -size 0); do
grep $file $LOGFILE
if [ $? == 1 ]
then
zcat -fq $file > $TMPFILE && ./thisscript $TMPFILE $TABLE | mysql -uroot $DB
echo "OK $file"
echo $file >> $LOGFILE
else
echo "File exists $file"
fi
done
/*
1) Create the necessary databases
CREATE TABLE `te_logs_$SITE` (
`id` int(11) NOT NULL AUTOINCREMENT,
`hash` char(32) NOT NULL,
`ip` char(15) NOT NULL,
`date` datetime NOT NULL,
`url` varchar(500) NOT NULL,
`method` char(1) NOT NULL,
`status` char(3) NOT NULL,
`waf` varchar(100) NOT NULL,
`warned` tinyint(4) NOT NULL,
`denied` tinyint(4) NOT NULL,
PRIMARY KEY (`id`),
KEY `a` (`id`),
KEY `b` (`hash`),
KEY `c` (`date`),
KEY `d` (`warned`),
KEY `e` (`denied`)
) ENGINE=MyISAM DEFAULT CHARSET=utf8;
2) Compile the script with `go build`
3) Run the script with something like:
```
#!/bin/bash
# $ time run.sh
FINDPATH="/Users/ernesto/Desktop/te/logs/";
FINDSEARCH="www_171150.esclf_waf*";
DB="test"
TABLE="te_logs_www";
TMPFILE="/tmp/uwslog_www.txt"
LOGFILE="/tmp/parsed_www.log"
for file in $(find $FINDPATH -name $FINDSEARCH ! -size 0); do
grep $file $LOGFILE
if [ $? == 1 ]
then
zcat -fq $file > $TMPFILE && ./thisscript $TMPFILE $TABLE | mysql -uroot $DB
echo "OK $file"
echo $file >> $LOGFILE
else
echo "File exists $file"
fi
done
```
*/
package main
import (
"bufio"
"crypto/md5"
"fmt"
"io"
"log"
"os"
"reflect"
"regexp"
"strings"
)
const MAX_ROWS_PER_INSERT = 10000
type LogLine struct {
Hash string
Ip string
Date string
Request string
Url string
Method string
Status string
Waf string
Warned string
Denied string
}
// Defines the `key` property with `value` in the current `LogLine`
// This only works with strings
func (l *LogLine) Set(key string, value string) {
field := reflect.ValueOf(l).Elem().FieldByName(key)
type_ := field.Type().String()
if type_ == "string" {
field.SetString(value)
} else {
log.Panicln(key + " has an unsupported type: " + type_)
}
}
// Turns this shitty format 11/Aug/2012:12:29:39 +0000
// into an sql friendly one, assuming UTC 0
func FormatDate(date string) string {
pattern := regexp.MustCompile(`(\d+)/(\w+)/(\d{4}):(\d{2}):(\d{2}):(\d{2})`)
matches := pattern.FindStringSubmatch(date)
if len(matches) == 0 {
log.Fatalln("Cannot fix date: " + date)
}
months := []string{
"Jan", "Feb", "Mar", "Apr", "May", "Jun",
"Jul", "Aug", "Sep", "Oct", "Nov", "Dec"}
month := 0
for i, m := range months {
if m == matches[2] {
month = i + 1
}
}
return fmt.Sprintf("%s-%d-%s %s:%s:%s",
matches[3], month, matches[1],
matches[4], matches[5], matches[6])
}
// Reads a line from the current `Reader`
// This supports long lines that don't fit the buffer in a single iteration
func Readln(r *bufio.Reader) (string, error) {
var (
isPrefix bool = true
err error = nil
line, ln []byte
)
for isPrefix && err == nil {
line, isPrefix, err = r.ReadLine()
ln = append(ln, line...)
}
return string(ln), err
}
// Parses the given `line` into a `LogLine`
func LogLineFromLine(line string) *LogLine {
var logLine = new(LogLine)
patterns := []string{
`(?P<Ip>\S+) - -`,
`\[(?P<Date>[^\]]+)\]`,
`"(?P<Request>[^"]+)"`,
`(?P<Status>\d+) .+`,
`"(?P<Waf>[^"]*)"`}
pattern := regexp.MustCompile(strings.Join(patterns, " "))
matches := pattern.FindStringSubmatch(line)
if len(matches) == 0 {
log.Fatalln("No Matches for: \n\t" + line)
}
for i := range matches {
if i == 0 {
continue
}
logLine.Set(pattern.SubexpNames()[i], matches[i])
}
logLine.Date = FormatDate(logLine.Date)
if logLine.Waf == "-" {
logLine.Waf = ""
} else {
splitWaf := strings.Split(logLine.Waf, "|")
if splitWaf[1] != "" {
totalWarned := len(strings.Split(splitWaf[1], ":"))
logLine.Warned = fmt.Sprintf("%d", totalWarned)
}
if splitWaf[2] != "" {
totalDenied := len(strings.Split(splitWaf[2], ":"))
logLine.Denied = fmt.Sprintf("%d", totalDenied)
}
}
logLine.Request = strings.Replace(logLine.Request, "'", "\"", -1)
request := strings.Split(logLine.Request, " ")
logLine.Method, logLine.Url = request[0], request[1]
hash := md5.New()
io.WriteString(hash, line)
logLine.Hash = fmt.Sprintf("%x", hash.Sum(nil))
return logLine
}
// Returns a safe SQL colum representation of the given `value`
func SQLColumn(value string) string {
value = strings.TrimSpace(value)
if value == "" {
return "NULL"
}
return "'" + value + "'"
}
// Returns SQL VALUE clauses from the given `LogFile`
func SQLValuesFromLogLine(logLine *LogLine) string {
if logLine.Hash == "" {
return ""
}
fields := []string{"NULL"}
fields = append(fields, SQLColumn(logLine.Hash))
fields = append(fields, SQLColumn(logLine.Ip))
fields = append(fields, SQLColumn(logLine.Date))
fields = append(fields, SQLColumn(logLine.Url))
fields = append(fields, SQLColumn(logLine.Method))
fields = append(fields, SQLColumn(logLine.Status))
fields = append(fields, SQLColumn(logLine.Waf))
fields = append(fields, SQLColumn(logLine.Warned))
fields = append(fields, SQLColumn(logLine.Denied))
return fmt.Sprintf("(%s)", strings.Join(fields, ","))
}
// Prints the necessary SQL "INSERT into `tableName`" clauses from the given `fileName`
func SQLInsertsFromFile(fileName string, tableName string) {
file, err := os.Open(fileName)
if err != nil {
log.Fatalln("Couldn't read", fileName)
}
r := bufio.NewReader(file)
line, e := Readln(r)
sqlValues := []string{}
flush := func() {
fmt.Printf("INSERT INTO %s VALUES %s;\n\n", tableName, strings.Join(sqlValues, ","))
sqlValues = []string{}
}
for e == nil {
//fmt.Println(line)
line, e = Readln(r)
line = strings.TrimSpace(line)
if line == "" {
continue
}
logLine := LogLineFromLine(line)
sqlValues = append(sqlValues, SQLValuesFromLogLine(logLine))
if len(sqlValues) >= MAX_ROWS_PER_INSERT {
flush()
}
}
flush()
}
func main() {
if len(os.Args) != 3 {
log.Fatalln("Usage: ./thisscript /path/to/uncompressed.log tableName")
}
fileName := os.Args[1]
tableName := os.Args[2]
SQLInsertsFromFile(fileName, tableName)
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment