Created
December 3, 2012 15:27
-
-
Save thom4parisot/4195709 to your computer and use it in GitHub Desktop.
DataLocale Wine Domains — OpenRefine + ElasticSearch Export
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| [ | |
| { | |
| "op": "core/column-removal", | |
| "description": "Remove column Types de produits", | |
| "columnName": "Types de produits" | |
| }, | |
| { | |
| "op": "core/text-transform", | |
| "description": "Text transform on cells in column Adresse using expression grel:if (isNonBlank(cells[\"Adresse suite\"].value), join([value, cells[\"Adresse suite\"].value], \"\\n\"), value)", | |
| "engineConfig": { | |
| "facets": [ | |
| { | |
| "invert": false, | |
| "expression": "value", | |
| "selectError": false, | |
| "omitError": false, | |
| "selectBlank": false, | |
| "name": "Statut", | |
| "omitBlank": false, | |
| "columnName": "Statut", | |
| "type": "list", | |
| "selection": [ | |
| { | |
| "v": { | |
| "v": "Producteur", | |
| "l": "Producteur" | |
| } | |
| } | |
| ] | |
| } | |
| ], | |
| "mode": "row-based" | |
| }, | |
| "columnName": "Adresse", | |
| "expression": "grel:if (isNonBlank(cells[\"Adresse suite\"].value), join([value, cells[\"Adresse suite\"].value], \"\\n\"), value)", | |
| "onError": "keep-original", | |
| "repeat": false, | |
| "repeatCount": 10 | |
| }, | |
| { | |
| "op": "core/column-removal", | |
| "description": "Remove column Adresse suite", | |
| "columnName": "Adresse suite" | |
| }, | |
| { | |
| "op": "core/text-transform", | |
| "description": "Text transform on cells in column Site web using expression grel:if(isNonBlank(value), 'http://'+replace(value, /^https?:\\/\\//, ''), null)", | |
| "engineConfig": { | |
| "facets": [ | |
| { | |
| "invert": false, | |
| "expression": "value", | |
| "selectError": false, | |
| "omitError": false, | |
| "selectBlank": false, | |
| "name": "Statut", | |
| "omitBlank": false, | |
| "columnName": "Statut", | |
| "type": "list", | |
| "selection": [ | |
| { | |
| "v": { | |
| "v": "Producteur", | |
| "l": "Producteur" | |
| } | |
| } | |
| ] | |
| } | |
| ], | |
| "mode": "row-based" | |
| }, | |
| "columnName": "Site web", | |
| "expression": "grel:if(isNonBlank(value), 'http://'+replace(value, /^https?:\\/\\//, ''), null)", | |
| "onError": "keep-original", | |
| "repeat": false, | |
| "repeatCount": 10 | |
| }, | |
| { | |
| "op": "core/column-addition-by-fetching-urls", | |
| "description": "Create column TmpPrix at index 8 by fetching URLs based on column Site web using expression grel:'https://www.google.fr/search?hl=fr&q=\"visite\"+\"€\"+' + escape(\"site:\"+value, \"url\") + \"&btnI=J'ai de la chance\"", | |
| "engineConfig": { | |
| "facets": [ | |
| { | |
| "expression": "value", | |
| "invert": false, | |
| "selectError": false, | |
| "omitError": false, | |
| "name": "Statut", | |
| "selectBlank": false, | |
| "columnName": "Statut", | |
| "omitBlank": false, | |
| "type": "list", | |
| "selection": [ | |
| { | |
| "v": { | |
| "v": "Producteur", | |
| "l": "Producteur" | |
| } | |
| } | |
| ] | |
| } | |
| ], | |
| "mode": "row-based" | |
| }, | |
| "newColumnName": "TmpPrix", | |
| "columnInsertIndex": 8, | |
| "baseColumnName": "Site web", | |
| "urlExpression": "grel:'https://www.google.fr/search?hl=fr&q=\"visite\"+\"€\"+' + escape(\"site:\"+value, \"url\") + \"&btnI=J'ai de la chance\"", | |
| "onError": "set-to-blank", | |
| "delay": 200 | |
| }, | |
| { | |
| "op": "core/column-removal", | |
| "description": "Remove column TmpPrix", | |
| "columnName": "TmpPrix" | |
| }, | |
| { | |
| "op": "core/column-addition", | |
| "description": "Create column Style de vie at index 10 based on column Marques et labels using expression jython:import re\nfrom sets import Set\n\ntags = Set([])\ncell_labels = [\"Marques et labels\"]\n\nfor l in cell_labels:\n val = cells[l][\"value\"]\n\n if re.search(\"vélo\", val, flags=re.IGNORECASE):\n tags.add('Bouger')\n\n if re.search(\"ferme\", val, flags=re.IGNORECASE):\n tags.add('Activité')\n\n if re.search(\"bien-être\", val, flags=re.IGNORECASE):\n tags.add('Se détendre')\n\nreturn \";\".join(tags)", | |
| "engineConfig": { | |
| "facets": [ | |
| { | |
| "invert": true, | |
| "expression": "value", | |
| "selectError": false, | |
| "omitError": false, | |
| "selectBlank": true, | |
| "name": "Marques et labels", | |
| "omitBlank": false, | |
| "columnName": "Marques et labels", | |
| "type": "list", | |
| "selection": [] | |
| } | |
| ], | |
| "mode": "row-based" | |
| }, | |
| "newColumnName": "Style de vie", | |
| "columnInsertIndex": 10, | |
| "baseColumnName": "Marques et labels", | |
| "expression": "jython:import re\nfrom sets import Set\n\ntags = Set([])\ncell_labels = [\"Marques et labels\"]\n\nfor l in cell_labels:\n val = cells[l][\"value\"]\n\n if re.search(\"vélo\", val, flags=re.IGNORECASE):\n tags.add('Bouger')\n\n if re.search(\"ferme\", val, flags=re.IGNORECASE):\n tags.add('Activité')\n\n if re.search(\"bien-être\", val, flags=re.IGNORECASE):\n tags.add('Se détendre')\n\nreturn \";\".join(tags)", | |
| "onError": "set-to-blank" | |
| }, | |
| { | |
| "op": "core/column-addition-by-fetching-urls", | |
| "description": "Create column HTML at index 8 by fetching URLs based on column Site web using expression grel:value", | |
| "engineConfig": { | |
| "facets": [], | |
| "mode": "row-based" | |
| }, | |
| "newColumnName": "HTML", | |
| "columnInsertIndex": 8, | |
| "baseColumnName": "Site web", | |
| "urlExpression": "grel:value", | |
| "onError": "set-to-blank", | |
| "delay": 5000 | |
| } | |
| ] |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| {"index": {"_type": "domains"}} | |
| {"name" : {{jsonize(cells["Raison sociale"].value)}}, "wine_labels" : {{jsonize(cells["AOC"].value.split(";"))}}, "address" : {{jsonize(cells["Adresse"].value)}}, "postal_code" : {{jsonize(cells["Code postal"].value)}}, "lifestyle": {{jsonize(cells["Style de vie"].value.split(";"))}}, "city" : {{jsonize(cells["Commune"].value)}}, "phone" : {{jsonize(cells["Téléphone"].value.split(";"))}}, "website" : {{jsonize(cells["Site web"].value)}}, "labels" : {{jsonize(cells["Marques et labels"].value.split(";"))}}, "handicap" : {{jsonize(cells["Tourisme et Handicap"].value).split(";")}}, "location" : [{{jsonize(cells["Longitude"].value)}}, {{jsonize(cells["Latitude"].value)}}]} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment