Created
December 3, 2012 15:23
-
-
Save thom4parisot/4195695 to your computer and use it in GitHub Desktop.
DataLocale Restaurants — OpenRefine + ElasticSearch Export
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| [ | |
| { | |
| "op": "core/column-removal", | |
| "description": "Remove column Adresse suite", | |
| "columnName": "Adresse suite" | |
| }, | |
| { | |
| "op": "core/text-transform", | |
| "description": "Text transform on cells in column Site web using expression grel:if(isNonBlank(value), 'http://'+replace(value, /^https?:\\/\\//, ''), null)", | |
| "engineConfig": { | |
| "facets": [ | |
| { | |
| "invert": false, | |
| "expression": "value", | |
| "selectError": false, | |
| "omitError": false, | |
| "selectBlank": false, | |
| "name": "Statut", | |
| "omitBlank": false, | |
| "columnName": "Statut", | |
| "type": "list", | |
| "selection": [ | |
| { | |
| "v": { | |
| "v": "Producteur", | |
| "l": "Producteur" | |
| } | |
| } | |
| ] | |
| } | |
| ], | |
| "mode": "row-based" | |
| }, | |
| "columnName": "Site web", | |
| "expression": "grel:if(isNonBlank(value), 'http://'+replace(value, /^https?:\\/\\//, ''), null)", | |
| "onError": "keep-original", | |
| "repeat": false, | |
| "repeatCount": 10 | |
| }, | |
| { | |
| "op": "core/text-transform", | |
| "description": "Text transform on cells in column Classement Logis using expression grel:if(isNonBlank(value), value.match(/.*(\\d+)\\D+$/)[0], 0)", | |
| "engineConfig": { | |
| "facets": [ | |
| { | |
| "invert": true, | |
| "expression": "value", | |
| "selectError": false, | |
| "omitError": false, | |
| "selectBlank": true, | |
| "name": "Classement Logis", | |
| "omitBlank": false, | |
| "columnName": "Classement Logis", | |
| "type": "list", | |
| "selection": [] | |
| } | |
| ], | |
| "mode": "row-based" | |
| }, | |
| "columnName": "Classement Logis", | |
| "expression": "grel:if(isNonBlank(value), value.match(/.*(\\d+)\\D+$/)[0], 0)", | |
| "onError": "keep-original", | |
| "repeat": false, | |
| "repeatCount": 10 | |
| }, | |
| { | |
| "op": "core/column-addition", | |
| "description": "Create column Style de vie at index 10 based on column Chaînes using expression grel:import re\nfrom sets import Set\n\ntags = Set([])\ncell_labels = [\"Chaînes\", \"Spécialités culinaires\", \"Marques et labels\"]\n\nfor l in cell_labels:\n val = cells[l][\"value\"]\n\n if re.search(\"vélo\", val, flags=re.IGNORECASE):\n tags.add('Bouger')\n\n if re.search(\"ferme\", val, flags=re.IGNORECASE):\n tags.add('Activité')\n\n if re.search(\"bien-être\", val, flags=re.IGNORECASE):\n tags.add('Se détendre')\n\n if re.search(\"bio|traditionnel\", val, flags=re.IGNORECASE):\n tags.add('Se détendre')\n\nreturn \";\".join(tags)", | |
| "engineConfig": { | |
| "facets": [], | |
| "mode": "row-based" | |
| }, | |
| "newColumnName": "Style de vie", | |
| "columnInsertIndex": 10, | |
| "baseColumnName": "Chaînes", | |
| "expression": "grel:import re\nfrom sets import Set\n\ntags = Set([])\ncell_labels = [\"Chaînes\", \"Spécialités culinaires\", \"Marques et labels\"]\n\nfor l in cell_labels:\n val = cells[l][\"value\"]\n\n if re.search(\"vélo\", val, flags=re.IGNORECASE):\n tags.add('Bouger')\n\n if re.search(\"ferme\", val, flags=re.IGNORECASE):\n tags.add('Activité')\n\n if re.search(\"bien-être\", val, flags=re.IGNORECASE):\n tags.add('Se détendre')\n\n if re.search(\"bio|traditionnel\", val, flags=re.IGNORECASE):\n tags.add('Se détendre')\n\nreturn \";\".join(tags)", | |
| "onError": "set-to-blank" | |
| } | |
| ] |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| {"index": {"_type": "restaurants"}} | |
| {"name" : {{jsonize(cells["Raison sociale"].value)}}, "categories" : {{jsonize(cells["Catégorie"].value.split(";"))}}, "specialties" : {{jsonize(cells["Spécialités culinaires"].value.split(";"))}}, "address" : {{jsonize(cells["Adresse 1"].value)}}, "postal_code" : {{jsonize(cells["Code postal"].value)}}, "city" : {{jsonize(cells["Commune"].value)}}, "phone" : {{jsonize(cells["Téléphone"].value.split(";"))}}, "website" : {{jsonize(cells["Site web"].value)}}, "lifestyle": {{jsonize(cells["Style de vie"].value.split(";"))}}, "ranking" : {{jsonize(cells["Classement Logis"].value)}}, "labels" : {{jsonize(cells["Marques et labels"].value.split(";"))}}, "handicap" : {{jsonize(cells["Tourisme et Handicap"].value.split(";"))}}, "location" : [{{jsonize(cells["Longitude"].value)}}, {{jsonize(cells["Latitude"].value)}}]} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment