"My data files are too large."
"I have many different files and I need to join them together."
- a fast and general-purpose cluster computing system
"My data files are too large."
"I have many different files and I need to join them together."
#!/usr/bin/env ruby | |
# encoding: utf-8 | |
require 'rest_client' | |
require 'csv' | |
require 'nokogiri' | |
require 'colorize' | |
page_range = 1..10 |
#!/usr/bin/env ruby | |
# encoding: utf-8 | |
require 'rest_client' | |
require 'csv' | |
require 'nokogiri' | |
require 'colorize' | |
page_range = 0..50 |
# Install via command-line as 'gem install sparql-client' | |
require 'sparql/client' | |
headers = { 'User-Agent' => 'Ruby-Sparql-Client/1.0' } | |
@sparql = SPARQL::Client.new("https://query.wikidata.org/sparql", headers: headers, read_timeout: 120) | |
# A SPARQL query to find an item and an optional Twitter handle | |
def wikidata_by_orcid_query(orcid) | |
%Q( | |
SELECT ?item ?itemLabel ?twitter |
# encoding: utf-8 | |
class String | |
def is_orcid? | |
/(\d{4}-){3}\d{3}[0-9X]{1}$/.match?(self) | |
end | |
def valid_orcid? | |
parts = self.scan(/[0-9X]/) | |
mod = parts[0..14].map(&:to_i) | |
.inject { |sum, n| (sum + n)*2 } |
#!/usr/bin/env ruby | |
# encoding: utf-8 | |
require 'csv' | |
require 'rest_client' | |
require 'json' | |
BASE_URL = "https://api.openalex.org/works?filter=concepts.id:C58642233,has_orcid:true,publication_year:2023&per_page=50&page=" | |
def get_data(page:) |
# Q1312945 Expedition | |
qids = ["Q108669", "Q63760", "Q62747", "Q104839", "Q96384", "Q96384", "Q85444", "Q101823", "Q347529", "Q43881351", "Q95248572"] | |
user_ids = qids.map do |q| | |
u = User.find_by_identifier(q) rescue nil | |
u.id if !u.nil? | |
end.compact |
#!/usr/bin/env ruby | |
# encoding: utf-8 | |
require 'csv' | |
require 'dina' | |
Dina.config = { | |
authorization_url: 'https://dina.biodiversity.agr.gc.ca/auth', | |
endpoint_url: 'https://dina.biodiversity.agr.gc.ca/api', | |
server_name: 'dina-prod', |