Created
September 25, 2016 20:27
-
-
Save happyrainb/d7e95fd8a730a9033d5ca658925b6c56 to your computer and use it in GitHub Desktop.
This script will update ckan records using a .csv as the input data. Made in 05/2016, tested in testing site, never used in production. It uses the provided connection script to make api call.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import ckanapi | |
import csv | |
from copy import deepcopy | |
def update_dataset_api_call(connection, dataset_data): | |
''' | |
This function uses the provided connection to make an api call that updates the dataset title | |
:param connection: | |
:return: None | |
''' | |
connection.call_action('package_update', dataset_data) | |
def get_dataset_api_call(connection, dataset_data): | |
dataset = { | |
'id': dataset_data.get('id') | |
} | |
return connection.call_action('package_show', dataset) | |
def get_package_list(connection): | |
return connection.call_action('package_list') | |
def rename_extra_field_for_all_datasets(connection, field_name, new_field_name): | |
packages = get_package_list(connection) | |
total = len(packages) | |
count = 0 | |
modified_count = 0 | |
for dataset_name in packages: | |
current_data = get_dataset_api_call(connection, {'id': dataset_name}) | |
extras = current_data.get('extras', []) | |
was_modified = False | |
for extra in extras: | |
if extra.get('key').lower().strip() == field_name.lower().strip(): | |
extra['key'] = new_field_name | |
was_modified = True | |
break | |
if was_modified: | |
update_dataset_api_call(connection, current_data) | |
print('Renamed "'+field_name+'" in package with id: ' + str(current_data['id']) + ' to "'+new_field_name+'" sucessfully') | |
modified_count += 1 | |
count += 1 | |
print("Completed: {0} of {1}".format(count, total)) | |
print("Renamed a total of {0} from {1} datasets.".format(modified_count, total)) | |
def do_csv_update(connection, csv_filename, nested_columns): | |
with open(csv_filename, 'rb') as csvfile: | |
reader = csv.DictReader(csvfile) | |
for row in reader: | |
# comes from csv file which are the new modifications | |
update_data = deepcopy(row) | |
# comes from api call to get current data | |
current_data = get_dataset_api_call(connection, update_data) | |
for column in update_data: | |
if column not in current_data: | |
# update extras | |
extra_was_not_found = True | |
for extra in current_data['extras']: | |
if extra.get('key') == column: | |
extra['value'] = update_data[column] | |
extra_was_not_found = False | |
if extra_was_not_found: | |
current_data['extras'].append( | |
{ | |
'key': column, | |
'value': update_data[column] | |
} | |
) | |
else: | |
current_data[column] = update_data[column] | |
try: | |
update_dataset_api_call(connection, current_data) | |
print('Updated package with id: ' + str(current_data['id']) + ' sucessfully') | |
except ckanapi.errors.NotFound as err: | |
print('Package with id: ' + str(current_data['id']) + ' not found for update.') | |
# this main is now useless btu i will leave it here | |
def main_manual_update(): | |
mysite_connection = ckanapi.RemoteCKAN('http://xxx', | |
apikey='xxx', | |
user_agent='ckanapiexample/1.0 (+http://example.com/my/website)') | |
# do you see that this is still the same as what i had before? | |
# i made the function more generlized, now it will work with any dataset id and data i give it | |
# for example i want to this: | |
# manual data | |
data = { | |
"id": "7285f5c2-412a-4264-95fe-39c658d8be3z2", | |
"title": 'Updated Title UPDATED AGAIN', | |
'author': "Jhon Doe", | |
'extras': [ | |
{'key': 'Restricitons', 'value': 'Does not matter.'}, | |
{'key': 'Creator', 'value': 'This is joe'} | |
] | |
} | |
# just calling it here with the connection i got | |
update_dataset_api_call(mysite_connection, data) | |
print("done manual.") | |
# main that uses csv data to update | |
def update_from_csv(connection): | |
filename = 'data/update_datasets.csv' | |
nested_columns = [ | |
'restrictions', | |
'another_column' | |
] | |
do_csv_update(connection, filename, nested_columns) | |
print("done processing: " + filename) | |
def main(): | |
mysite_connection = ckanapi.RemoteCKAN('http://xxx', | |
apikey='xxxx') | |
# execute updates form data/update_datasets.csv | |
#update_from_csv(mysite_connection) | |
# rename an extra field | |
rename_extra_field_for_all_datasets(mysite_connection, 'frequency', 'Update Frequency') | |
if __name__ == "__main__": | |
# main_manual_update() | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment