Skip to content

Instantly share code, notes, and snippets.

@thom4parisot
Created December 3, 2012 15:27
Show Gist options
  • Select an option

  • Save thom4parisot/4195709 to your computer and use it in GitHub Desktop.

Select an option

Save thom4parisot/4195709 to your computer and use it in GitHub Desktop.
DataLocale Wine Domains — OpenRefine + ElasticSearch Export
[
{
"op": "core/column-removal",
"description": "Remove column Types de produits",
"columnName": "Types de produits"
},
{
"op": "core/text-transform",
"description": "Text transform on cells in column Adresse using expression grel:if (isNonBlank(cells[\"Adresse suite\"].value), join([value, cells[\"Adresse suite\"].value], \"\\n\"), value)",
"engineConfig": {
"facets": [
{
"invert": false,
"expression": "value",
"selectError": false,
"omitError": false,
"selectBlank": false,
"name": "Statut",
"omitBlank": false,
"columnName": "Statut",
"type": "list",
"selection": [
{
"v": {
"v": "Producteur",
"l": "Producteur"
}
}
]
}
],
"mode": "row-based"
},
"columnName": "Adresse",
"expression": "grel:if (isNonBlank(cells[\"Adresse suite\"].value), join([value, cells[\"Adresse suite\"].value], \"\\n\"), value)",
"onError": "keep-original",
"repeat": false,
"repeatCount": 10
},
{
"op": "core/column-removal",
"description": "Remove column Adresse suite",
"columnName": "Adresse suite"
},
{
"op": "core/text-transform",
"description": "Text transform on cells in column Site web using expression grel:if(isNonBlank(value), 'http://'+replace(value, /^https?:\\/\\//, ''), null)",
"engineConfig": {
"facets": [
{
"invert": false,
"expression": "value",
"selectError": false,
"omitError": false,
"selectBlank": false,
"name": "Statut",
"omitBlank": false,
"columnName": "Statut",
"type": "list",
"selection": [
{
"v": {
"v": "Producteur",
"l": "Producteur"
}
}
]
}
],
"mode": "row-based"
},
"columnName": "Site web",
"expression": "grel:if(isNonBlank(value), 'http://'+replace(value, /^https?:\\/\\//, ''), null)",
"onError": "keep-original",
"repeat": false,
"repeatCount": 10
},
{
"op": "core/column-addition-by-fetching-urls",
"description": "Create column TmpPrix at index 8 by fetching URLs based on column Site web using expression grel:'https://www.google.fr/search?hl=fr&q=\"visite\"+\"€\"+' + escape(\"site:\"+value, \"url\") + \"&btnI=J'ai de la chance\"",
"engineConfig": {
"facets": [
{
"expression": "value",
"invert": false,
"selectError": false,
"omitError": false,
"name": "Statut",
"selectBlank": false,
"columnName": "Statut",
"omitBlank": false,
"type": "list",
"selection": [
{
"v": {
"v": "Producteur",
"l": "Producteur"
}
}
]
}
],
"mode": "row-based"
},
"newColumnName": "TmpPrix",
"columnInsertIndex": 8,
"baseColumnName": "Site web",
"urlExpression": "grel:'https://www.google.fr/search?hl=fr&q=\"visite\"+\"€\"+' + escape(\"site:\"+value, \"url\") + \"&btnI=J'ai de la chance\"",
"onError": "set-to-blank",
"delay": 200
},
{
"op": "core/column-removal",
"description": "Remove column TmpPrix",
"columnName": "TmpPrix"
},
{
"op": "core/column-addition",
"description": "Create column Style de vie at index 10 based on column Marques et labels using expression jython:import re\nfrom sets import Set\n\ntags = Set([])\ncell_labels = [\"Marques et labels\"]\n\nfor l in cell_labels:\n val = cells[l][\"value\"]\n\n if re.search(\"vélo\", val, flags=re.IGNORECASE):\n tags.add('Bouger')\n\n if re.search(\"ferme\", val, flags=re.IGNORECASE):\n tags.add('Activité')\n\n if re.search(\"bien-être\", val, flags=re.IGNORECASE):\n tags.add('Se détendre')\n\nreturn \";\".join(tags)",
"engineConfig": {
"facets": [
{
"invert": true,
"expression": "value",
"selectError": false,
"omitError": false,
"selectBlank": true,
"name": "Marques et labels",
"omitBlank": false,
"columnName": "Marques et labels",
"type": "list",
"selection": []
}
],
"mode": "row-based"
},
"newColumnName": "Style de vie",
"columnInsertIndex": 10,
"baseColumnName": "Marques et labels",
"expression": "jython:import re\nfrom sets import Set\n\ntags = Set([])\ncell_labels = [\"Marques et labels\"]\n\nfor l in cell_labels:\n val = cells[l][\"value\"]\n\n if re.search(\"vélo\", val, flags=re.IGNORECASE):\n tags.add('Bouger')\n\n if re.search(\"ferme\", val, flags=re.IGNORECASE):\n tags.add('Activité')\n\n if re.search(\"bien-être\", val, flags=re.IGNORECASE):\n tags.add('Se détendre')\n\nreturn \";\".join(tags)",
"onError": "set-to-blank"
},
{
"op": "core/column-addition-by-fetching-urls",
"description": "Create column HTML at index 8 by fetching URLs based on column Site web using expression grel:value",
"engineConfig": {
"facets": [],
"mode": "row-based"
},
"newColumnName": "HTML",
"columnInsertIndex": 8,
"baseColumnName": "Site web",
"urlExpression": "grel:value",
"onError": "set-to-blank",
"delay": 5000
}
]
{"index": {"_type": "domains"}}
{"name" : {{jsonize(cells["Raison sociale"].value)}}, "wine_labels" : {{jsonize(cells["AOC"].value.split(";"))}}, "address" : {{jsonize(cells["Adresse"].value)}}, "postal_code" : {{jsonize(cells["Code postal"].value)}}, "lifestyle": {{jsonize(cells["Style de vie"].value.split(";"))}}, "city" : {{jsonize(cells["Commune"].value)}}, "phone" : {{jsonize(cells["Téléphone"].value.split(";"))}}, "website" : {{jsonize(cells["Site web"].value)}}, "labels" : {{jsonize(cells["Marques et labels"].value.split(";"))}}, "handicap" : {{jsonize(cells["Tourisme et Handicap"].value).split(";")}}, "location" : [{{jsonize(cells["Longitude"].value)}}, {{jsonize(cells["Latitude"].value)}}]}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment