Skip to content

Instantly share code, notes, and snippets.

@r00k
Created June 16, 2010 18:21
Show Gist options
  • Save r00k/441071 to your computer and use it in GitHub Desktop.
Save r00k/441071 to your computer and use it in GitHub Desktop.
# Top-level driver script for getting trials from clinicaltrials.gov into our database.
class ImportTrials
SEARCH_URL = "http://clinicaltrials.gov/ct2/results?term=%s&recr=Open&studyxml=true"
INSTITUTIONS = { "Dana-Farber Cancer Institute" => { :search_url => SEARCH_URL % '%22dana+farber%22', :abbrev => 'DFCI' },
"Harvard Medical School" => { :search_url => SEARCH_URL % '%22harvard+medical+school%22', :abbrev => 'HMS' },
"Harvard School of Public Health" => { :search_url => SEARCH_URL % '%22harvard+school+of+public+health%22', :abbrev => 'HSPH' },
"Brigham and Women's Hospital" => { :search_url => SEARCH_URL % '%22brigham+and+women\'s+hospital%22', :abbrev => 'BWH' },
"Children's Hospital Boston" => { :search_url => SEARCH_URL % '%22children%27s+hospital+boston%22', :abbrev => 'CHB' },
"Beth-Israel Deaconess Medical Center" => { :search_url => SEARCH_URL % '%22beth+israel+deaconess%22', :abbrev => 'BIDMC' },
"Massachusetts General Hospital" => { :search_url => SEARCH_URL % '%22massachusetts+general+hospital%22', :abbrev => 'MGH' } }
LOG_FILE = File.dirname(__FILE__) + '/../log/import_trials.log'
# Run the import for all search strings.
def self.run
@logger = Logger.new(LOG_FILE)
time_script do
INSTITUTIONS.each do |institution_name, details|
search_string = details[:search_url]
import(institution_name, search_string)
end
end
@logger.close
end
# Given a search string, download the results and create/update the database records.
def self.import(institution_name, search_string)
log("\n\n\nImport started on #{Time.now}")
log("Started download from CT.gov for #{institution_name} with search string #{search_string}")
DownloadTrials.download(search_string)
log("Unzipping results")
DownloadTrials.unzip_trials
log("Creating trials from xml files")
ImportTrials.create_or_update_trials_from_xml(DownloadTrials::OUTPUT_DIRECTORY, institution_name)
end
def self.assign_abbreviations
INSTITUTIONS.each_key do |institution_name|
institution = Institution.find_by_name(institution_name)
institution.abbreviation = INSTITUTIONS[institution_name][:abbrev]
institution.save!
end
end
# Find or create trials for each xml representation in a directory
def self.create_or_update_trials_from_xml(directory, institution)
Dir.glob(directory + '/*.xml').each do |file|
create_or_update_trial(file, institution)
end
end
def self.create_or_update_trial(file, institution_name)
attributes = Trial.parse_trial_attributes(file)
trial = Trial.find_by_nct_id(attributes[:nct_id])
if trial
trial.update_existing(attributes)
else
trial = Trial.new(attributes)
end
trial.add_institution(institution_name)
if trial.new_record?
trial.save false
else
trial.trial_institutions.map(&:save)
end
trial
end
private
def self.log(message)
@logger.info(message) unless Rails.env.test?
end
# Time how long the script takes
def self.time_script(&block)
start = Time.now
yield
log "Completed import in #{(Time.now - start) / 60} minutes" unless Rails.env.test?
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment