-
-
Save erighetto/6314176efcf96b9fabd6db94ef4035ac to your computer and use it in GitHub Desktop.
OpenRefine recipe to parse Apache log
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
[ | |
{ | |
"op": "core/column-split", | |
"description": "Split column Column 1 by separator", | |
"engineConfig": { | |
"facets": [], | |
"mode": "row-based" | |
}, | |
"columnName": "Column 1", | |
"guessCellType": true, | |
"removeOriginalColumn": true, | |
"mode": "separator", | |
"separator": "-", | |
"regex": false, | |
"maxColumns": 2 | |
}, | |
{ | |
"op": "core/column-split", | |
"description": "Split column Column 1 2 by separator", | |
"engineConfig": { | |
"facets": [], | |
"mode": "row-based" | |
}, | |
"columnName": "Column 1 2", | |
"guessCellType": true, | |
"removeOriginalColumn": true, | |
"mode": "separator", | |
"separator": "]", | |
"regex": false, | |
"maxColumns": 2 | |
}, | |
{ | |
"op": "core/column-split", | |
"description": "Split column Column 1 2 1 by separator", | |
"engineConfig": { | |
"facets": [], | |
"mode": "row-based" | |
}, | |
"columnName": "Column 1 2 1", | |
"guessCellType": true, | |
"removeOriginalColumn": true, | |
"mode": "separator", | |
"separator": "[", | |
"regex": false, | |
"maxColumns": 2 | |
}, | |
{ | |
"op": "core/column-split", | |
"description": "Split column Column 1 2 2 by separator", | |
"engineConfig": { | |
"facets": [], | |
"mode": "row-based" | |
}, | |
"columnName": "Column 1 2 2", | |
"guessCellType": true, | |
"removeOriginalColumn": true, | |
"mode": "separator", | |
"separator": "\"", | |
"regex": false, | |
"maxColumns": 0 | |
}, | |
{ | |
"op": "core/column-split", | |
"description": "Split column Column 1 2 2 3 by separator", | |
"engineConfig": { | |
"facets": [], | |
"mode": "row-based" | |
}, | |
"columnName": "Column 1 2 2 3", | |
"guessCellType": true, | |
"removeOriginalColumn": true, | |
"mode": "separator", | |
"separator": " ", | |
"regex": false, | |
"maxColumns": 0 | |
}, | |
{ | |
"op": "core/column-reorder", | |
"description": "Reorder columns", | |
"columnNames": [ | |
"Column 1 1", | |
"Column 1 2 1 1", | |
"Column 1 2 1 2", | |
"Column 1 2 2 2", | |
"Column 1 2 2 3 2", | |
"Column 1 2 2 3 3", | |
"Column 1 2 2 4", | |
"Column 1 2 2 6" | |
] | |
}, | |
{ | |
"op": "core/column-rename", | |
"description": "Rename column Column 1 1 to client IP address", | |
"oldColumnName": "Column 1 1", | |
"newColumnName": " client IP address" | |
}, | |
{ | |
"op": "core/column-rename", | |
"description": "Rename column Column 1 2 1 1 to htaccess userid", | |
"oldColumnName": "Column 1 2 1 1", | |
"newColumnName": "htaccess userid" | |
}, | |
{ | |
"op": "core/column-rename", | |
"description": "Rename column Column 1 2 1 2 to timestamp", | |
"oldColumnName": "Column 1 2 1 2", | |
"newColumnName": "timestamp" | |
}, | |
{ | |
"op": "core/column-rename", | |
"description": "Rename column Column 1 2 2 2 to request line", | |
"oldColumnName": "Column 1 2 2 2", | |
"newColumnName": "request line" | |
}, | |
{ | |
"op": "core/column-rename", | |
"description": "Rename column Column 1 2 2 3 3 to object size", | |
"oldColumnName": "Column 1 2 2 3 3", | |
"newColumnName": "object size" | |
}, | |
{ | |
"op": "core/column-rename", | |
"description": "Rename column Column 1 2 2 4 to HTTP request header", | |
"oldColumnName": "Column 1 2 2 4", | |
"newColumnName": "HTTP request header" | |
}, | |
{ | |
"op": "core/column-rename", | |
"description": "Rename column Column 1 2 2 3 2 to Status Code", | |
"oldColumnName": "Column 1 2 2 3 2", | |
"newColumnName": "Status Code" | |
}, | |
{ | |
"op": "core/column-rename", | |
"description": "Rename column Column 1 2 2 6 to User-Agent HTTP request header", | |
"oldColumnName": "Column 1 2 2 6", | |
"newColumnName": "User-Agent HTTP request header" | |
}, | |
{ | |
"op": "core/text-transform", | |
"description": "Text transform on cells in column timestamp using expression grel:value.replace('Jan','01').replace('Feb','02').replace('Mar','03').replace('Apr','04').replace('May','05').replace('Jun','06').replace('Jui','07').replace('Aug','08').replace('Sep','09').replace('Oct','10').replace('Nov','11').replace('Dec','12').toDate('dd/MM/yyyy:hh:mm:ss')", | |
"engineConfig": { | |
"facets": [], | |
"mode": "row-based" | |
}, | |
"columnName": "timestamp", | |
"expression": "grel:value.replace('Jan','01').replace('Feb','02').replace('Mar','03').replace('Apr','04').replace('May','05').replace('Jun','06').replace('Jui','07').replace('Aug','08').replace('Sep','09').replace('Oct','10').replace('Nov','11').replace('Dec','12').toDate('dd/MM/yyyy:hh:mm:ss')", | |
"onError": "keep-original", | |
"repeat": false, | |
"repeatCount": 10 | |
} | |
] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment