Skip to content

Instantly share code, notes, and snippets.

@nh13
Last active July 18, 2019 01:59
Show Gist options
  • Save nh13/37a829af9d7ae3a9805caa0151f45211 to your computer and use it in GitHub Desktop.
Save nh13/37a829af9d7ae3a9805caa0151f45211 to your computer and use it in GitHub Desktop.
#!/bin/bash
set -euo pipefail
fgbio_cmd="java -Xmx8G -jar /Users/nhomer/git/fg/fgbio/target/scala-2.12/fgbio-0.8.0.jar"
function build_it ()
{
local src_build=$1;
local target_build=$2;
pushd ../${target_build}
if [ ! -f ${src_build}_to_${target_build}_mapping.txt ]; then
$fgbio_cmd CollectAlternateContigNames \
-i ../${src_build}/${src_build}.assembly_report.txt \
-o ${src_build}_to_${target_build}_mapping.txt \
-p RefSeqAccession \
-a UcscName \
-s AssembledMolecule UnlocalizedScaffold UnplacedScaffold;
fi
if [ ! -f ${target_build}.alt_names.txt ]; then
$fgbio_cmd CollectAlternateContigNames \
-i ../${src_build}/${src_build}.assembly_report.txt \
-o /dev/stdout \
-p RefSeqAccession \
-a UcscName GenBankAccession \
-s AssembledMolecule UnlocalizedScaffold UnplacedScaffold \
--single-line \
| awk '{print $2, $1; print $2, $3;}' \
> ${target_build}.alt_names.txt
fi
if [ ! -f ${target_build}.gff.gz ]; then
$fgbio_cmd UpdateGffContigNames \
-i ../${src_build}/${src_build}.gff.gz \
-m ${src_build}_to_${target_build}_mapping.txt \
-o ${target_build}.gff.gz \
--skip-missing
fi
if [ ! -f ${target_build}.fasta ]; then
$fgbio_cmd UpdateFastaContigNames \
-i ../${src_build}/${src_build}.fasta \
-a ${src_build}_to_${target_build}_mapping.txt \
-o ${target_build}.fasta \
--skip-missing
fi
if [ ! -f ${target_build}.fasta.fai ]; then
samtools faidx ${target_build}.fasta
fi
if [ ! -f ${target_build}.dict ]; then
java -Xmx8G -jar /Users/nhomer/git/picard/build/libs/picard.jar \
CreateSequenceDictionary \
R=${target_build}.fasta \
O=${target_build}.dict \
AS=${target_build} \
URI=ftp://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/001/405/GCF_000001405.25_${src_build}/GCF_000001405.25_${src_build}_genomic.fna.gz \
SP="Homo sapiens" \
AN=${target_build}.alt_names.txt;
fi
if [ ! -f ${target_build}.fasta.pac ]; then
bwa index ${target_build}.fasta;
fi
popd
}
build_it GRCh37.p13 hg19
build_it GRCh38.p12 hg38
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment