Skip to content

Instantly share code, notes, and snippets.

@epaule
Last active January 24, 2022 13:46
Show Gist options
  • Save epaule/48abd7a4083dacbc95af5fbb012dae2d to your computer and use it in GitHub Desktop.
Save epaule/48abd7a4083dacbc95af5fbb012dae2d to your computer and use it in GitHub Desktop.
create the new files for a rc release
#!/usr/bin/env ruby
require "fileutils"
class Assembly
def initialize(id,dir)
@id=id
@dir=dir
end
def get_dir
@latestDir = Dir[@dir+"/assembly/curated/#{@id}.*"].sort_by{ |f| File.mtime(f)}[-1]
@version = File.extname(@latestDir).to_s[1..-1].to_i+1
@oldversion = @version-1
@inPrimary = Dir.glob([@latestDir + "/#{@id}_#{@oldversion}*primary.fa" , @latestDir + "/#{@id}_#{@oldversion}*primary*.fa" , @latestDir + "/#{@id}.#{@oldversion}*primary.fa"])[0].to_s
@inAlt = Dir.glob([@latestDir + "/#{@id}_#{@oldversion}*haplotigs.fa",@latestDir + "/#{@id}.#{@oldversion}*haplotigs.fa"])[0].to_s
raise "primary input file is missing" if not File.exists?(@inPrimary)
end
def make_new_dir
@newDir = @dir + "/assembly/curated/"+@id+"."[email protected]_s
FileUtils.mkpath(@newDir)
puts "created #{@newDir}"
outP=File.basename(@inPrimary)
# outP.sub!("#{@id}_#{(@oldversion).to_s}","#{@id}_#{@version.to_s}")
outP.sub!("#{@id}.#{(@oldversion).to_s}","#{@id}.#{@version.to_s}")
if File.exists?(@inAlt)
outA=File.basename(@inAlt)
# outA.sub!("#{@id}_#{(@oldversion).to_s}","#{@id}_#{@version.to_s}")
outA.sub!("#{@id}.#{(@oldversion).to_s}","#{@id}.#{@version.to_s}")
@outAlt = @newDir +"/"+ outA
end
@outPrimary = @newDir +"/" +outP
end
def parse_decon
files = Dir[@dir+"/assembly/draft/#{@id}.*/*.contamination"]
@blacklist=[]
files.each{|f|
File.open(f).each_line{|l|
if match=l.match(/REMOVE\s+(\S+)/)
id = match.captures[0]
@blacklist.push(id.to_s)
end
}
}
@blacklist.uniq!
end
def strip_fasta
f = File.new(@newDir+"/"+@id+"_"[email protected]_s+".decon_ids","w")
@blacklist.each { |e| f.puts(e) }
f.puts "scaffold_MT"
f.close
if @inPrimary == @outPrimary or @inAlt == @outAlt
raise "the target directory is the same"
end
a = "/software/grit/bin/fastaremove -f #{@inPrimary} -r #{f.path} > #{@outPrimary}"
todo=[a]
if File.exists?(@inAlt)
todo.push("/software/grit/bin/fastaremove -f #{@inAlt} -r #{f.path} > #{@outAlt}")
end
todo.each{|p|
if not system(p)
raise "cannot execute: #{p}"
end
}
end
def move_files
infiles = Dir[@latestDir + "/*"]
infiles.each{|f|
if File.file?(f)
to = @newDir + "/"+ File.basename(f)
to[@id+"_"+(@version-1).to_s]=@id+"_"[email protected]_s
#puts "cp #{f} => #{to}"
FileUtils.cp(f,to)
end
}
end
end
# read tab separated values (and skip the ones with a #):
# ilApoCrat1 /lustre/scratch116/tol/projects/darwin/data/insects/Aporia_crataegi
File.open(ARGV[0]).each_line{|l|
next if l.include? "#"
id,path = l.chomp!.split
puts "processing #{l}"
a=Assembly.new(id,path)
a.get_dir
a.make_new_dir
a.parse_decon
a.strip_fasta
}
ilApoCrat1 /lustre/scratch116/tol/projects/darwin/data/insects/Aporia_crataegi
iySelTumu1 /lustre/scratch116/tol/projects/darwin/data/insects/Seladonia_tumulorum
iySphMoni1 /lustre/scratch116/tol/projects/darwin/data/insects/Sphecodes_monilicornis
iyTenNoth1 /lustre/scratch116/tol/projects/darwin/data/insects/Tenthredo_notha
drMalSylv7 /lustre/scratch116/tol/projects/darwin/data/dicots/Malus_sylvestris
idBelPand1 /lustre/scratch116/tol/projects/darwin/data/insects/Bellardia_pandia
idGymRotn1 /lustre/scratch116/tol/projects/darwin/data/insects/Gymnosoma_rotundatum
drMalDome5 /lustre/scratch116/tol/projects/darwin/data/dicots/Malus_domestica
drMalDome58 /lustre/scratch116/tol/projects/darwin/data/dicots/Malus_domestica
ilEmmMono1 /lustre/scratch116/tol/projects/darwin/data/insects/Emmelina_monodactyla
iiLimLuna2 /lustre/scratch116/tol/projects/darwin/data/insects/Limnephilus_lunatus
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment