Skip to content

Instantly share code, notes, and snippets.

@jokergoo
Created January 22, 2018 11:34
Show Gist options
  • Save jokergoo/bd4531eca10d58a88376127b74787dc0 to your computer and use it in GitHub Desktop.
Save jokergoo/bd4531eca10d58a88376127b74787dc0 to your computer and use it in GitHub Desktop.
giggle_dir = "/icgc/dkfzlsdf/analysis/B080/guz/giggle_test/giggle"
giggle_data_dir = "/icgc/dkfzlsdf/analysis/hipo/hipo_016/analysis/WGBS_final_cohort/giggle_data"
temp_dir = "/icgc/dkfzlsdf/analysis/hipo/hipo_016/analysis/WGBS_final_cohort/temp"
library(GetoptLong)
build_giggle_index = function(gr_list, name = "anno") {
system(qq("mkdir @{giggle_data_dir}/@{name}"))
# write as bed files
for(nm in names(gr_list)) {
qqcat("writing @{nm}.bed\n")
write.table(as.data.frame(gr_list[[nm]]), file = qq("@{giggle_data_dir}/@{name}/@{nm}.bed"), sep = "\t", quote = FALSE,
row.names = FALSE, col.names = FALSE)
}
qqcat("sort bed files\n")
system(qq("mkdir @{giggle_data_dir}/@{name}_sort"))
system(qq("@{giggle_dir}/scripts/sort_bed \"@{giggle_data_dir}/@{name}/*.bed\" @{giggle_data_dir}/@{name}_sort 4"))
qqcat("build index\n")
system(qq("@{giggle_dir}/bin/giggle index -i \"@{giggle_data_dir}/@{name}_sort/*.gz\" -o @{giggle_data_dir}/@{name}_sort_b -f -s"))
return(qq("@{giggle_data_dir}/@{name}_sort_b"))
}
library(digest)
run_giggle_overlap = function(gr, index) {
hash = digest(gr)
gr = as.data.frame(gr)
gr = gr[order(gr[[1]], gr[[2]], gr[[3]]), 1:3]
write.table(as.data.frame(gr), file = qq("@{temp_dir}/@{hash}.bed"), sep = "\t", quote = FALSE, row.names = FALSE, col.names = FALSE)
system(qq("bgzip -c @{temp_dir}/@{hash}.bed > @{temp_dir}/@{hash}.bed.gz"))
cmd = qq("@{giggle_dir}/bin/giggle search -i @{index} -q @{temp_dir}/@{hash}.bed.gz -s")
con = pipe(cmd)
tb = read.table(con, check.names = FALSE, comment.char = "", header = TRUE, stringsAsFactors = FALSE)
file.remove(qq("@{temp_dir}/@{hash}.bed"))
file.remove(qq("@{temp_dir}/@{hash}.bed.gz"))
tb[[1]] = gsub("\\.bed\\.gz$", "", basename(tb[[1]]))
return(tb)
}
do_giggle_in_batch = function(gr_list, index) {
lt = lapply(gr_list, function(gr) {
run_giggle_overlap(gr, index)
})
nm = rep(names(gr_list), times = sapply(lt, nrow))
tb = do.call(rbind, lt)
tb = cbind(query = nm, tb)
tb
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment