Last active
December 10, 2015 16:18
-
-
Save ernestom/4460051 to your computer and use it in GitHub Desktop.
Script para llenar una tabla de mysql con los logs de Akamai para TE.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
FINDPATH="/Users/ernesto/Desktop/te/logs/"; | |
FINDSEARCH="www_171150.esclf_waf*"; | |
DB="test" | |
TABLE="te_logs_www"; | |
TMPFILE="/tmp/uwslog_www.txt" | |
LOGFILE="/tmp/parsed_www.log" | |
for file in $(find $FINDPATH -name $FINDSEARCH ! -size 0); do | |
grep $file $LOGFILE | |
if [ $? == 1 ] | |
then | |
zcat -fq $file > $TMPFILE && ./thisscript $TMPFILE $TABLE | mysql -uroot $DB | |
echo "OK $file" | |
echo $file >> $LOGFILE | |
else | |
echo "File exists $file" | |
fi | |
done | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
1) Create the necessary databases | |
CREATE TABLE `te_logs_$SITE` ( | |
`id` int(11) NOT NULL AUTOINCREMENT, | |
`hash` char(32) NOT NULL, | |
`ip` char(15) NOT NULL, | |
`date` datetime NOT NULL, | |
`url` varchar(500) NOT NULL, | |
`method` char(1) NOT NULL, | |
`status` char(3) NOT NULL, | |
`waf` varchar(100) NOT NULL, | |
`warned` tinyint(4) NOT NULL, | |
`denied` tinyint(4) NOT NULL, | |
PRIMARY KEY (`id`), | |
KEY `a` (`id`), | |
KEY `b` (`hash`), | |
KEY `c` (`date`), | |
KEY `d` (`warned`), | |
KEY `e` (`denied`) | |
) ENGINE=MyISAM DEFAULT CHARSET=utf8; | |
2) Compile the script with `go build` | |
3) Run the script with something like: | |
``` | |
#!/bin/bash | |
# $ time run.sh | |
FINDPATH="/Users/ernesto/Desktop/te/logs/"; | |
FINDSEARCH="www_171150.esclf_waf*"; | |
DB="test" | |
TABLE="te_logs_www"; | |
TMPFILE="/tmp/uwslog_www.txt" | |
LOGFILE="/tmp/parsed_www.log" | |
for file in $(find $FINDPATH -name $FINDSEARCH ! -size 0); do | |
grep $file $LOGFILE | |
if [ $? == 1 ] | |
then | |
zcat -fq $file > $TMPFILE && ./thisscript $TMPFILE $TABLE | mysql -uroot $DB | |
echo "OK $file" | |
echo $file >> $LOGFILE | |
else | |
echo "File exists $file" | |
fi | |
done | |
``` | |
*/ | |
package main | |
import ( | |
"bufio" | |
"crypto/md5" | |
"fmt" | |
"io" | |
"log" | |
"os" | |
"reflect" | |
"regexp" | |
"strings" | |
) | |
const MAX_ROWS_PER_INSERT = 10000 | |
type LogLine struct { | |
Hash string | |
Ip string | |
Date string | |
Request string | |
Url string | |
Method string | |
Status string | |
Waf string | |
Warned string | |
Denied string | |
} | |
// Defines the `key` property with `value` in the current `LogLine` | |
// This only works with strings | |
func (l *LogLine) Set(key string, value string) { | |
field := reflect.ValueOf(l).Elem().FieldByName(key) | |
type_ := field.Type().String() | |
if type_ == "string" { | |
field.SetString(value) | |
} else { | |
log.Panicln(key + " has an unsupported type: " + type_) | |
} | |
} | |
// Turns this shitty format 11/Aug/2012:12:29:39 +0000 | |
// into an sql friendly one, assuming UTC 0 | |
func FormatDate(date string) string { | |
pattern := regexp.MustCompile(`(\d+)/(\w+)/(\d{4}):(\d{2}):(\d{2}):(\d{2})`) | |
matches := pattern.FindStringSubmatch(date) | |
if len(matches) == 0 { | |
log.Fatalln("Cannot fix date: " + date) | |
} | |
months := []string{ | |
"Jan", "Feb", "Mar", "Apr", "May", "Jun", | |
"Jul", "Aug", "Sep", "Oct", "Nov", "Dec"} | |
month := 0 | |
for i, m := range months { | |
if m == matches[2] { | |
month = i + 1 | |
} | |
} | |
return fmt.Sprintf("%s-%d-%s %s:%s:%s", | |
matches[3], month, matches[1], | |
matches[4], matches[5], matches[6]) | |
} | |
// Reads a line from the current `Reader` | |
// This supports long lines that don't fit the buffer in a single iteration | |
func Readln(r *bufio.Reader) (string, error) { | |
var ( | |
isPrefix bool = true | |
err error = nil | |
line, ln []byte | |
) | |
for isPrefix && err == nil { | |
line, isPrefix, err = r.ReadLine() | |
ln = append(ln, line...) | |
} | |
return string(ln), err | |
} | |
// Parses the given `line` into a `LogLine` | |
func LogLineFromLine(line string) *LogLine { | |
var logLine = new(LogLine) | |
patterns := []string{ | |
`(?P<Ip>\S+) - -`, | |
`\[(?P<Date>[^\]]+)\]`, | |
`"(?P<Request>[^"]+)"`, | |
`(?P<Status>\d+) .+`, | |
`"(?P<Waf>[^"]*)"`} | |
pattern := regexp.MustCompile(strings.Join(patterns, " ")) | |
matches := pattern.FindStringSubmatch(line) | |
if len(matches) == 0 { | |
log.Fatalln("No Matches for: \n\t" + line) | |
} | |
for i := range matches { | |
if i == 0 { | |
continue | |
} | |
logLine.Set(pattern.SubexpNames()[i], matches[i]) | |
} | |
logLine.Date = FormatDate(logLine.Date) | |
if logLine.Waf == "-" { | |
logLine.Waf = "" | |
} else { | |
splitWaf := strings.Split(logLine.Waf, "|") | |
if splitWaf[1] != "" { | |
totalWarned := len(strings.Split(splitWaf[1], ":")) | |
logLine.Warned = fmt.Sprintf("%d", totalWarned) | |
} | |
if splitWaf[2] != "" { | |
totalDenied := len(strings.Split(splitWaf[2], ":")) | |
logLine.Denied = fmt.Sprintf("%d", totalDenied) | |
} | |
} | |
logLine.Request = strings.Replace(logLine.Request, "'", "\"", -1) | |
request := strings.Split(logLine.Request, " ") | |
logLine.Method, logLine.Url = request[0], request[1] | |
hash := md5.New() | |
io.WriteString(hash, line) | |
logLine.Hash = fmt.Sprintf("%x", hash.Sum(nil)) | |
return logLine | |
} | |
// Returns a safe SQL colum representation of the given `value` | |
func SQLColumn(value string) string { | |
value = strings.TrimSpace(value) | |
if value == "" { | |
return "NULL" | |
} | |
return "'" + value + "'" | |
} | |
// Returns SQL VALUE clauses from the given `LogFile` | |
func SQLValuesFromLogLine(logLine *LogLine) string { | |
if logLine.Hash == "" { | |
return "" | |
} | |
fields := []string{"NULL"} | |
fields = append(fields, SQLColumn(logLine.Hash)) | |
fields = append(fields, SQLColumn(logLine.Ip)) | |
fields = append(fields, SQLColumn(logLine.Date)) | |
fields = append(fields, SQLColumn(logLine.Url)) | |
fields = append(fields, SQLColumn(logLine.Method)) | |
fields = append(fields, SQLColumn(logLine.Status)) | |
fields = append(fields, SQLColumn(logLine.Waf)) | |
fields = append(fields, SQLColumn(logLine.Warned)) | |
fields = append(fields, SQLColumn(logLine.Denied)) | |
return fmt.Sprintf("(%s)", strings.Join(fields, ",")) | |
} | |
// Prints the necessary SQL "INSERT into `tableName`" clauses from the given `fileName` | |
func SQLInsertsFromFile(fileName string, tableName string) { | |
file, err := os.Open(fileName) | |
if err != nil { | |
log.Fatalln("Couldn't read", fileName) | |
} | |
r := bufio.NewReader(file) | |
line, e := Readln(r) | |
sqlValues := []string{} | |
flush := func() { | |
fmt.Printf("INSERT INTO %s VALUES %s;\n\n", tableName, strings.Join(sqlValues, ",")) | |
sqlValues = []string{} | |
} | |
for e == nil { | |
//fmt.Println(line) | |
line, e = Readln(r) | |
line = strings.TrimSpace(line) | |
if line == "" { | |
continue | |
} | |
logLine := LogLineFromLine(line) | |
sqlValues = append(sqlValues, SQLValuesFromLogLine(logLine)) | |
if len(sqlValues) >= MAX_ROWS_PER_INSERT { | |
flush() | |
} | |
} | |
flush() | |
} | |
func main() { | |
if len(os.Args) != 3 { | |
log.Fatalln("Usage: ./thisscript /path/to/uncompressed.log tableName") | |
} | |
fileName := os.Args[1] | |
tableName := os.Args[2] | |
SQLInsertsFromFile(fileName, tableName) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment