indapa’s gists

indapa / FactorProductExample.py

Created October 3, 2012 16:00

Factor product example for a graphical model

	#!/usr/bin/env python
	import numpy as np
	from Factor import *
	from FactorOperations import *

	#example from figure 4.3 in Koller and Friedman textbook, 'Probabilistic Graphical Models: Principles and Techniques'
	a1=Factor([ 1, 2], [ 3, 2], [ 0, 0, 0, 0, 0, 0, 0, 0 ] )
	b1=Factor( [2,3], [2,2], [ 0,0] )

	assignmentA=IndexToAssignment( np.arange(0, np.prod( a1.getCard() )), a1.getCard() )

indapa / indice.py

Created October 2, 2012 14:59

NumPy way to set elements of array to a value given a list of indices

	import numpy as np

	indices= [ 9,8,7,6,5,4,3,2,1,0]
	values=np.random.rand(10).tolist()
	zeros=np.zeros(10) #get an array of zeros
	zeros[indices]=values # then assign the indices the values

indapa / indx.m

Created October 2, 2012 14:53

set elements of vector to a value given a list of indices

	val=randn(1,10); % get 10 random values
	indx=[10 9 8 7 6 5 4 3 2 1] % vector representing indices
	value(indx)=val % set the indices of value to value

indapa / enumerateGenotypes.py

Created July 6, 2012 02:51

Enumerate the possible genotypes for a given ploidy

	from itertools import *

	ploidys=[2,3,4]

	for p in ploidys:
	l=[ combo for combo in combinations_with_replacement(['A','C','G','T'],p) ]
	for g in l:
	print "".join( list(g) ), "ploidy: ", p
	print

indapa / ecdf.R

Created June 14, 2012 15:31

Plot empirical CDF function in R

	#plot the empirical CDF

	x=rnorm(100) # 100 random draws from standard normal
	f=ecdf(x) # ecdf returns a function
	percentiles=f(x) # return the percentiles of the data points
	plot(percentiles, x)

	#see ?ecdf for more information

indapa / fastq_summary.py

Created June 3, 2012 21:22

get the mean base composition for a fastq file after running fastq_stats.py from Galaxy

indapa / generator_while.py

Created May 3, 2012 17:06

Python generators and itertools.izip

	"""http://docs.python.org/library/itertools.html#itertools.izip"""
	#thanks Brent Pedersen


	vcf_gen1=vcfobj1.yieldVcfRecordwithGenotypes(vcfh1)
	vcf_gen2=vcfobj1.yieldVcfRecordwithGenotypes(vcfh2)


	for vrec1, vrec2 in itertools.izip(vcf_gen1, vcf_gen2):
	print vrec1.toStringwithGenotypes()

indapa / student_reasoning.py

Created March 17, 2012 17:51

CPD computations for Bayesian network

	#I'm taking the STanford on line course Probabilistic Graphical Models <pgm-class.org>
	# They have a simple example of a Bayesian network for a Student letter of reference
	# the code below was originally posted by a fellow classmate taking the class
	# it really helped me in understanding how to do computations on conditional probability tables (CPDs) that
	# form the heart of Bayesian networks

	from numpy import *
	P_I = array([
	[0.7,0.3]
	])

indapa / reshape.R

Created March 9, 2012 19:31

Using the reshape module on R

	library(reshape)

	#suppose I read a file with the header lines of sample and 13 target ids
	# the values represent median target coverage from an exome capture experiment
	median <- read.table("median.txt", header=T)
	> median
	sample t1 t2 t3 t4 t5 t6 t7 t8 t9 t10 t11 t12 t13
	1 s1 10 34 31 66 34 61 40 46 117 29 55 96 33
	2 s2 20 29 25 36 27 53 34 26 89 28 40 70 26
	3 s3 8 31 31 66 33 57 31 36 93 36 40 74 27

indapa / ploidy.py

Created March 4, 2012 03:26

Using itertools to count the number of possible genotypes given a certain ploidy

	#!/usr/bin/env python
	from itertools import *




	#print the number of possible genotypes with various ploidies
	ploidys=[2,3,4,5,6,7,8,9,10]
	for p in ploidys:
	print p, len([combo for combo in combinations_with_replacement(['A','C','G','T'],p) ])

Amit Indap indapa