Created
June 16, 2010 18:21
-
-
Save r00k/441071 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Top-level driver script for getting trials from clinicaltrials.gov into our database. | |
class ImportTrials | |
SEARCH_URL = "http://clinicaltrials.gov/ct2/results?term=%s&recr=Open&studyxml=true" | |
INSTITUTIONS = { "Dana-Farber Cancer Institute" => { :search_url => SEARCH_URL % '%22dana+farber%22', :abbrev => 'DFCI' }, | |
"Harvard Medical School" => { :search_url => SEARCH_URL % '%22harvard+medical+school%22', :abbrev => 'HMS' }, | |
"Harvard School of Public Health" => { :search_url => SEARCH_URL % '%22harvard+school+of+public+health%22', :abbrev => 'HSPH' }, | |
"Brigham and Women's Hospital" => { :search_url => SEARCH_URL % '%22brigham+and+women\'s+hospital%22', :abbrev => 'BWH' }, | |
"Children's Hospital Boston" => { :search_url => SEARCH_URL % '%22children%27s+hospital+boston%22', :abbrev => 'CHB' }, | |
"Beth-Israel Deaconess Medical Center" => { :search_url => SEARCH_URL % '%22beth+israel+deaconess%22', :abbrev => 'BIDMC' }, | |
"Massachusetts General Hospital" => { :search_url => SEARCH_URL % '%22massachusetts+general+hospital%22', :abbrev => 'MGH' } } | |
LOG_FILE = File.dirname(__FILE__) + '/../log/import_trials.log' | |
# Run the import for all search strings. | |
def self.run | |
@logger = Logger.new(LOG_FILE) | |
time_script do | |
INSTITUTIONS.each do |institution_name, details| | |
search_string = details[:search_url] | |
import(institution_name, search_string) | |
end | |
end | |
@logger.close | |
end | |
# Given a search string, download the results and create/update the database records. | |
def self.import(institution_name, search_string) | |
log("\n\n\nImport started on #{Time.now}") | |
log("Started download from CT.gov for #{institution_name} with search string #{search_string}") | |
DownloadTrials.download(search_string) | |
log("Unzipping results") | |
DownloadTrials.unzip_trials | |
log("Creating trials from xml files") | |
ImportTrials.create_or_update_trials_from_xml(DownloadTrials::OUTPUT_DIRECTORY, institution_name) | |
end | |
def self.assign_abbreviations | |
INSTITUTIONS.each_key do |institution_name| | |
institution = Institution.find_by_name(institution_name) | |
institution.abbreviation = INSTITUTIONS[institution_name][:abbrev] | |
institution.save! | |
end | |
end | |
# Find or create trials for each xml representation in a directory | |
def self.create_or_update_trials_from_xml(directory, institution) | |
Dir.glob(directory + '/*.xml').each do |file| | |
create_or_update_trial(file, institution) | |
end | |
end | |
def self.create_or_update_trial(file, institution_name) | |
attributes = Trial.parse_trial_attributes(file) | |
trial = Trial.find_by_nct_id(attributes[:nct_id]) | |
if trial | |
trial.update_existing(attributes) | |
else | |
trial = Trial.new(attributes) | |
end | |
trial.add_institution(institution_name) | |
if trial.new_record? | |
trial.save false | |
else | |
trial.trial_institutions.map(&:save) | |
end | |
trial | |
end | |
private | |
def self.log(message) | |
@logger.info(message) unless Rails.env.test? | |
end | |
# Time how long the script takes | |
def self.time_script(&block) | |
start = Time.now | |
yield | |
log "Completed import in #{(Time.now - start) / 60} minutes" unless Rails.env.test? | |
end | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment