Skip to content

Instantly share code, notes, and snippets.

@ettorerizza
Last active March 7, 2017 11:21
Show Gist options
  • Save ettorerizza/686b44a4e4c3927e70968ca1af09f0f2 to your computer and use it in GitHub Desktop.
Save ettorerizza/686b44a4e4c3927e70968ca1af09f0f2 to your computer and use it in GitHub Desktop.
This script takes as input a Json file of Open Refine and returns the same file in which each "transform" and each "mass edit" will be documented in a column
#!/usr/bin/python3
import json
with open("test.json", "r") as infile:
data = json.load(infile)
def transform_to_addcolumn(data):
data_trans = dict(data)
data_trans["op"] = "core/column-addition"
data_trans["expression"] = (
"""jython:return \"\"\"%s on cell %s\"\"\" """) %(
data['expression'].replace('grel:', ""), data['columnName'])
data_trans["onError"] = "store-error"
data_trans["description"] = "store operations in a column" + str(count+1)
data_trans.update({"columnInsertIndex": count-1})
data_trans.update({"newColumnName": "transform" + str(count)})
data_trans.update({"baseColumnName": data["columnName"]})
del data_trans["columnName"]
del data_trans["repeat"]
del data_trans["repeatCount"]
return data_trans
def massedit_to_addcolumn(data):
data_trans = dict(data)
data_trans["op"] = "core/column-addition"
data_trans["expression"] = (
"""jython:return \"\"\"MASS EDIT %s TO %s ON COLUMN %s\"\"\" """) %(
data["edits"][0]["from"][0], data["edits"][0]["to"], data["columnName"])
data_trans.update({"onError" : "store-error"})
data_trans["description"] = "store operations in a column" + str(count+1)
data_trans.update({"columnInsertIndex": count-1})
data_trans.update({"newColumnName": "transform" + str(count)})
data_trans.update({"baseColumnName": data["columnName"]})
del data_trans["edits"]
del data_trans["columnName"]
return data_trans
for count, el in enumerate(data):
if el['op'] == "core/text-transform":
data.insert(count + 1, transform_to_addcolumn(el))
elif el['op'] == "core/mass-edit":
data.insert(count + 1, massedit_to_addcolumn(el))
print(data)
with open("new_openrefine_operations.json", "w") as outfile:
json.dump(data, outfile, indent=4)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment