Skip to content

Instantly share code, notes, and snippets.

@josefslerka
Created May 30, 2012 16:20
Show Gist options
  • Save josefslerka/2837367 to your computer and use it in GitHub Desktop.
Save josefslerka/2837367 to your computer and use it in GitHub Desktop.
Google Refine Template for Scrapping Facebook Fan Pages
[
{
"op": "core/column-addition-by-fetching-urls",
"description": "Create column Facebook at index 2 by fetching URLs based on column URL using expression grel:value",
"engineConfig": {
"facets": [],
"mode": "row-based"
},
"newColumnName": "Facebook",
"columnInsertIndex": 2,
"baseColumnName": "url",
"urlExpression": "grel:value",
"onError": "set-to-blank",
"delay": 5000
},
{
"op": "core/column-split",
"description": "Split column Facebook by separator",
"engineConfig": {
"facets": [],
"mode": "record-based"
},
"columnName": "Facebook",
"guessCellType": true,
"removeOriginalColumn": false,
"mode": "separator",
"separator": "pagesListData",
"regex": false,
"maxColumns": 0
},
{
"op": "core/column-split",
"description": "Split column Facebook 2 by separator",
"engineConfig": {
"facets": [],
"mode": "record-based"
},
"columnName": "Facebook 2",
"guessCellType": true,
"removeOriginalColumn": true,
"mode": "separator",
"separator": "uiCollapsedListVisible",
"regex": false,
"maxColumns": 0
},
{
"op": "core/column-addition",
"description": "Create column cistestranky at index 5 based on column Facebook 2 1 using expression grel:replace(value,/<\\/?\\w+((\\s+\\w+(\\s*=\\s*(?:\".*?\"|'.*?'|[^'\">\\s]+))?)+\\s*|\\s*)\\/?>/,'')",
"engineConfig": {
"facets": [],
"mode": "record-based"
},
"newColumnName": "cistestranky",
"columnInsertIndex": 5,
"baseColumnName": "Facebook 2 1",
"expression": "grel:replace(value,/<\\/?\\w+((\\s+\\w+(\\s*=\\s*(?:\".*?\"|'.*?'|[^'\">\\s]+))?)+\\s*|\\s*)\\/?>/,'')",
"onError": "set-to-blank"
},
{
"op": "core/column-split",
"description": "Split column cistestranky by separator",
"engineConfig": {
"facets": [],
"mode": "record-based"
},
"columnName": "cistestranky",
"guessCellType": true,
"removeOriginalColumn": false,
"mode": "separator",
"separator": "<a class=\"infoSeeMore",
"regex": false,
"maxColumns": 0
},
{
"op": "core/column-split",
"description": "Split column cistestranky 1 by separator",
"engineConfig": {
"facets": [],
"mode": "record-based"
},
"columnName": "cistestranky 1",
"guessCellType": true,
"removeOriginalColumn": false,
"mode": "separator",
"separator": "\">",
"regex": false,
"maxColumns": 0
},
{
"op": "core/column-removal",
"description": "Remove column cistestranky 2",
"columnName": "cistestranky 2"
},
{
"op": "core/column-removal",
"description": "Remove column Facebook 2 2",
"columnName": "Facebook 2 2"
},
{
"op": "core/column-removal",
"description": "Remove column Facebook 2 3",
"columnName": "Facebook 2 3"
},
{
"op": "core/column-removal",
"description": "Remove column cistestranky 1 1",
"columnName": "cistestranky 1 1"
},
{
"op": "core/column-removal",
"description": "Remove column cistestranky 1",
"columnName": "cistestranky 1"
},
{
"op": "core/column-removal",
"description": "Remove column cistestranky",
"columnName": "cistestranky"
},
{
"op": "core/column-removal",
"description": "Remove column Facebook 2 1",
"columnName": "Facebook 2 1"
},
{
"op": "core/column-removal",
"description": "Remove column Facebook 1",
"columnName": "Facebook 1"
},
{
"op": "core/column-rename",
"description": "Rename column cistestranky 1 2 to fanpages",
"oldColumnName": "cistestranky 1 2",
"newColumnName": "fanpages"
},
{
"op": "core/column-move",
"description": "Move column Facebook to position 3",
"columnName": "Facebook",
"index": 3
}
]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment