Last active
July 18, 2019 01:59
-
-
Save nh13/37a829af9d7ae3a9805caa0151f45211 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
set -euo pipefail | |
fgbio_cmd="java -Xmx8G -jar /Users/nhomer/git/fg/fgbio/target/scala-2.12/fgbio-0.8.0.jar" | |
function build_it () | |
{ | |
local src_build=$1; | |
local target_build=$2; | |
pushd ../${target_build} | |
if [ ! -f ${src_build}_to_${target_build}_mapping.txt ]; then | |
$fgbio_cmd CollectAlternateContigNames \ | |
-i ../${src_build}/${src_build}.assembly_report.txt \ | |
-o ${src_build}_to_${target_build}_mapping.txt \ | |
-p RefSeqAccession \ | |
-a UcscName \ | |
-s AssembledMolecule UnlocalizedScaffold UnplacedScaffold; | |
fi | |
if [ ! -f ${target_build}.alt_names.txt ]; then | |
$fgbio_cmd CollectAlternateContigNames \ | |
-i ../${src_build}/${src_build}.assembly_report.txt \ | |
-o /dev/stdout \ | |
-p RefSeqAccession \ | |
-a UcscName GenBankAccession \ | |
-s AssembledMolecule UnlocalizedScaffold UnplacedScaffold \ | |
--single-line \ | |
| awk '{print $2, $1; print $2, $3;}' \ | |
> ${target_build}.alt_names.txt | |
fi | |
if [ ! -f ${target_build}.gff.gz ]; then | |
$fgbio_cmd UpdateGffContigNames \ | |
-i ../${src_build}/${src_build}.gff.gz \ | |
-m ${src_build}_to_${target_build}_mapping.txt \ | |
-o ${target_build}.gff.gz \ | |
--skip-missing | |
fi | |
if [ ! -f ${target_build}.fasta ]; then | |
$fgbio_cmd UpdateFastaContigNames \ | |
-i ../${src_build}/${src_build}.fasta \ | |
-a ${src_build}_to_${target_build}_mapping.txt \ | |
-o ${target_build}.fasta \ | |
--skip-missing | |
fi | |
if [ ! -f ${target_build}.fasta.fai ]; then | |
samtools faidx ${target_build}.fasta | |
fi | |
if [ ! -f ${target_build}.dict ]; then | |
java -Xmx8G -jar /Users/nhomer/git/picard/build/libs/picard.jar \ | |
CreateSequenceDictionary \ | |
R=${target_build}.fasta \ | |
O=${target_build}.dict \ | |
AS=${target_build} \ | |
URI=ftp://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/001/405/GCF_000001405.25_${src_build}/GCF_000001405.25_${src_build}_genomic.fna.gz \ | |
SP="Homo sapiens" \ | |
AN=${target_build}.alt_names.txt; | |
fi | |
if [ ! -f ${target_build}.fasta.pac ]; then | |
bwa index ${target_build}.fasta; | |
fi | |
popd | |
} | |
build_it GRCh37.p13 hg19 | |
build_it GRCh38.p12 hg38 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment