Skip to content

Instantly share code, notes, and snippets.

@cknoxrun
Created May 4, 2012 16:55
Show Gist options
  • Save cknoxrun/2596201 to your computer and use it in GitHub Desktop.
Save cknoxrun/2596201 to your computer and use it in GitHub Desktop.
require 'zip/zip'
# Interface to the mutfinder library
class ReadAnalysis
@@nml_dir = File.join(Rails.root, 'lib', 'mutfinder')
@@result_dir = File.join(@@nml_dir, '/var/results/')
# Main entry point for analysis. Just pass an analysis ID
# (An object is not passed because this doesnt work with delayed_job)
def analyze(analysis_id)
begin
analysis = Analysis.find(analysis_id)
analysis.update_attributes :status => Analysis::PROCESSING_STATUS
if (analysis.query_file.content_type == 'application/zip')
#This is a ZIP of fasta files
errors = analyze_zip(analysis)
else
#This is an SFF file
errors = analyze_sff(analysis)
end
rescue => e
#Just in case!
errors = [e.message]
end
analysis.update_attributes :percent_complete => 100, :analysis_errors => errors, :finished_at => Time.now, :status => Analysis::FINISHED_STATUS
end
# Run the script to generate the MID Config Files
def generate_mid_configs(decoder_id)
md = MultiplexDecoder.find(decoder_id)
decoding_path = md.decoding.path
outdir = File.dirname(decoding_path)
Dir.chdir(@@nml_dir)
# Run the MID Extractor code
command = "script/generate_mid_config.pl -dec=#{decoding_path} -out=#{outdir}"
stdin, stdout, stderr = Open3.popen3(command)
#Check for errors
err = stderr.read
res = stdout.read
stdin.close
stdout.close
stderr.close
#Check for errors
if err.length > 0
raise(err)
else
mcf_files = res.split("\n")
mcf_files.each do |file|
f = File.new(file)
mid_config = md.mid_configs.new(:name => File.basename(file))
mid_config.mcf = f
mid_config.save!
#Delete the extracted file, so we dont have two copies
File.delete(f)
end
end
end
private
# Analysis of SFF Files. Extracted using the newbler binary,
# and then each fasta file is analyzed.
def analyze_sff(analysis)
errors = Array.new
sff_task = 10 #Percent of task bar taken up by running sff extraction
begin
analysis.update_attributes :status => 'Extracting SFF'
sffpath = analysis.query_file.path
Dir.chdir(@@nml_dir)
# Run the SFF Extractor code
command = "script/extract_sff.pl #{sffpath}" # -parse=#{parsepath}"
stdin, stdout, stderr = Open3.popen3(command)
#Check for errors
err = stderr.read
res = stdout.read
stdin.close
stdout.close
stderr.close
#Check for errors
if err.length > 0
errors << err
else
fas_files = res.split("\n")
patients = Array.new
fas_files.each do |file|
f = File.new(file)
patient = analysis.patients.new(:code => File.basename(file))
patient.fasta = f
patient.save!
patients << patient
#Delete the extracted file, so we dont have two copies
File.delete(f)
end
#sff_task% complete after unzipping (this is pretty arbitrary)
analysis.update_attributes :percent_complete => sff_task, :status => 'Finished Extracting SFF'
run_patients(patients, sff_task)
end
rescue => e
errors << e.message
end
return errors
end
#Given an array of patient objects, run each one through analysis
def run_patients(patients, complete)
num_tasks = patients.length
patients_complete = 0
patients.each do |p|
p.analysis.update_attributes :status => "Running fasta file #{patients_complete+1} out of #{num_tasks}."
analyze_fasta(p)
#Update progress
patients_complete += 1
complete = (patients_complete.to_f / num_tasks.to_f) * (100-complete) + complete
p.analysis.update_attributes :percent_complete => complete, :status => "Finished #{patients_complete} out of #{num_tasks} fasta files."
end
end
# Analysis of ZIP Files. Extracted from archive,
# and then each fasta file is analyzed.
def analyze_zip(analysis)
errors = Array.new
begin
analysis.update_attributes :status => 'Extracting ZIP'
zip_task = 10 #Percent of task bar taken up by unzipping
patients = Array.new
Zip::ZipFile.open(analysis.query_file.path) do |zip_file|
zip_file.each do |f|
if (not (f.name =~ /\.DS_Store|__MACOSX|(^|\/)\._/ ) ) && (f.name =~ /.*\.fas/ )#Ignore junk files
f_path=File.join(File.dirname(analysis.query_file.path), f.name)
zip_file.extract(f, f_path) unless File.exist?(f_path)
patient = analysis.patients.new(:code => f.name)
patient.fasta = File.new(f_path)
patient.save!
patients << patient
#Delete the extracted file, so we dont have two copies
File.delete(f_path)
end
end
end
#zip_task% complete after unzipping (this is pretty arbitrary)
analysis.update_attributes :percent_complete => zip_task, :status => 'Finished Extracting ZIP'
run_patients(patients, zip_task)
rescue => e
errors << e.message
end
return errors
end
# Runs an analysis given a Patient that has a fasta file
def analyze_fasta(patient)
errors = Array.new
begin
analysis = patient.analysis
querypath = patient.fasta.path
referencepath = patient.analysis.reference_sequence.reference_file.path
queryname = File.basename(querypath, '.fas')
Dir.chdir(@@nml_dir)
# Run the Read Analysis code
command = "script/run_query.pl #{querypath} -ref=#{referencepath}"
results = `#{command} 2>&1`
# Save any error messages from the system call
if results.length > 0
errors << results
end
# Process files
mutation_path = "#{@@result_dir}#{queryname}_mutations.txt"
align_path = "#{@@result_dir}#{queryname}_align.fas"
# Error checking
if (!FileTest.size?(mutation_path))
errors << 'Mutation file not generated.'
end
if (!FileTest.size?(align_path))
errors << 'Align file not generated.'
end
# Return path to result files:
results = { :mutations => File.open(mutation_path).read.split(/\n/),
:alignment => File.open(align_path).read,
:errors => errors }
# Save the exception message
rescue => e
errors << e.message
results = { :errors => errors }
end
# Save results to the patient object
patient.update_attribute(:results, results)
patient.process_results!
patient
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment