cth’s gists

cth / geno_ad_remove.jl

Created February 22, 2016 09:01

	#!/home/fng514/bin/julia
	#$ -S /home/fng514/bin/julia
	#$ -cwd

	# Christian Theil Have, 2016.

	using HypothesisTests
	using StatsBase

	nucleotides = Set{AbstractString}(["A","G","C","T"])

cth / geno_dp_gq.jl

Created February 22, 2016 09:01

	#!/home/fng514/bin/julia
	#$ -S /home/fng514/bin/julia
	#$ -cwd

	# Christian Theil Have, 2016.

	#import GZip

	function process_vcf_line(outfile,line, dpmin, gqmin)
	fields = split(line)

cth / split_multiallelic.jl

Created February 22, 2016 09:00

	#!/home/fng514/bin/julia
	#$ -S /home/fng514/bin/julia
	#$ -cwd
	#
	# Split multi allelic sites:
	# 1. Split multi-allelic sites into multiple VCF lines, s.t., a) each line has one unique alternative allele, and b) if person has an other alternative allele than the one specified for the line in question, then that particular genotype is coded as missing in that line.
	# 2. Run bi-allelic hardy weinberg test for each of these lines.
	#
	# Christian Theil Have, 2016.

cth / split_multiallelic.jl

Created February 22, 2016 09:00

	#!/home/fng514/bin/julia
	#$ -S /home/fng514/bin/julia
	#$ -cwd
	#
	# Split multi allelic sites:
	# 1. Split multi-allelic sites into multiple VCF lines, s.t., a) each line has one unique alternative allele, and b) if person has an other alternative allele than the one specified for the line in question, then that particular genotype is coded as missing in that line.
	# 2. Run bi-allelic hardy weinberg test for each of these lines.
	#
	# Christian Theil Have, 2016.

cth / gq_dp_vs_tstv.jl

Created January 22, 2016 13:37

	#!/home/fng514/bin/julia
	#$ -S /home/fng514/bin/julia
	#$ -cwd
	#$ -pe smp 20

	# Julia script to read a VCF file and calculate tstv by stepwise GQ and DP tresholds
	# Christian Theil Have, 2016.

	import GZip

cth / variant_quality_stats.jl

Created January 19, 2016 14:06

	#!/home/fng514/bin/julia
	#$ -S /home/fng514/bin/julia
	#$ -cwd
	#$ -l pe smp 20
	# Julia script to to read VCF file and calculate quality and depth statistics (in parallel)
	# Christian Theil Have, 2016.

	import GZip

	println(string("Reading ",ARGS[1]))

cth / yuvaraj_info.rb

Created January 13, 2016 14:35

	#!/home/fng514/bin/ruby
	#$ -S /home/fng514/bin/ruby
	#$ -cwd

	# Script to calculate INFO (approx R-squared) and Allele frequency from a VCF file (with dosages).
	# Like the ones produced with https://github.com/cth/vcf-misc-tools
	# Christian Theil Have, 2016.
	# Use as you see fit, but no warranties.
	# Usage:
	# qsub yuvaraj_info.rb my.vcf my.info

cth / README

Last active May 12, 2016 10:36

	strandflip.R version 0.2
	By Christian Theil Have
	with inspiration from SAS script by Aldi Kraja

	DISCLAIMER: NO WARRANTIES WHATSOEVER ARE PROVIDED WITH THIS
	PROGRAM. YOU RUN IT AT YOUR OWN RISK.
	YOU CAN DISTRIBUTE this utility pgm in other collaborations

	purpose: flip dosage if the alleles in original data are
	different when compared to 1000G set of nucmtDNA

cth / power.R

Created November 26, 2015 23:42

	## Very simple power calculation
	## assuming a true odds ratio of 3.1

	pick_eater_obese <- function(n) table(runif(n) < 109/(330+109))
	pick_eater_control <- function(n) table(runif(n) < 37/(37+38))
	p.values <- replicate(1000, fisher.test( matrix(c(pick_eater_obese(50),pick_eater_control(50)),2) )$p )
	significance <- c(5e-1, 5e-2, 5e-3, 5e-4, 5e-5)
	powers <- data.frame(significance, power = sapply(significance,function(x) mean(p.values < x)))

cth / power.R

Created November 26, 2015 23:42

	## Very simple power calculation
	## assuming a true odds ratio of 3.1

	pick_eater_obese <- function(n) table(runif(n) < 109/(330+109))
	pick_eater_control <- function(n) table(runif(n) < 37/(37+38))

	#controls <- data.frame(pheno = rep(1000,"case"), runif(

	p.values <- replicate(1000, fisher.test( matrix(c(pick_eater_obese(50),pick_eater_control(50)),2) )$p )
	significance <- c(5e-1, 5e-2, 5e-3, 5e-4, 5e-5)

Christian Theil Have cth