Last active
February 16, 2023 04:03
-
-
Save dshorthouse/0eb95f9e6ed0b20b8c35ac691db9fdbf to your computer and use it in GitHub Desktop.
Recursive ruby script to gather ORCIDs from candidate researchers/taxonomists in OpenAlex
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env ruby | |
# encoding: utf-8 | |
require 'csv' | |
require 'rest_client' | |
require 'json' | |
BASE_URL = "https://api.openalex.org/works?filter=concepts.id:C58642233,has_orcid:true,publication_year:2023&per_page=50&page=" | |
def get_data(page:) | |
CSV.open("open_alex.csv", 'ab') do |csv| | |
response = RestClient.get(BASE_URL + page.to_s) | |
data = JSON.parse(response, symbolize_names: true) | |
count = data[:meta][:count].to_i | |
per_page = data[:meta][:per_page].to_i | |
data[:results].each do |result| | |
title = result[:title] | |
next if title.nil? | |
if title.downcase.include?("species") || title.downcase.include?("revision") | |
result[:authorships].map{|a| a[:author][:orcid] }.compact.each do |orcid| | |
csv << [orcid.split("/").last] | |
end | |
end | |
end | |
if per_page.to_i*page.to_i <= count.to_i | |
get_data(page: page+1) | |
else | |
return | |
end | |
end | |
end | |
get_data(page: 1) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment