indapa’s gists

indapa / indx.m

Created October 2, 2012 14:53

set elements of vector to a value given a list of indices

	val=randn(1,10); % get 10 random values
	indx=[10 9 8 7 6 5 4 3 2 1] % vector representing indices
	value(indx)=val % set the indices of value to value

indapa / indice.py

Created October 2, 2012 14:59

NumPy way to set elements of array to a value given a list of indices

	import numpy as np

	indices= [ 9,8,7,6,5,4,3,2,1,0]
	values=np.random.rand(10).tolist()
	zeros=np.zeros(10) #get an array of zeros
	zeros[indices]=values # then assign the indices the values

indapa / FactorProductExample.py

Created October 3, 2012 16:00

Factor product example for a graphical model

	#!/usr/bin/env python
	import numpy as np
	from Factor import *
	from FactorOperations import *

	#example from figure 4.3 in Koller and Friedman textbook, 'Probabilistic Graphical Models: Principles and Techniques'
	a1=Factor([ 1, 2], [ 3, 2], [ 0, 0, 0, 0, 0, 0, 0, 0 ] )
	b1=Factor( [2,3], [2,2], [ 0,0] )

	assignmentA=IndexToAssignment( np.arange(0, np.prod( a1.getCard() )), a1.getCard() )

indapa / pileupNamedTuple.py

Created October 23, 2012 16:03

using a Python namedtuple

	import collections
	Pileup = collections.namedtuple('Pileup', ['sample', 'RG', 'alignmentname', 'basecall', 'bq'])
	pileupList=[('51_WGA', 'ZFJDLOT83TE', 'D9WJ65M1:224:BB006PABXX:2:21:8481:67213', 'C', 45),
	('51_WGA', 'ZFJDLOT83TE', 'D9WJ65M1:224:BB006PABXX:2:21:8481:67214', 'C', 40
	)]
	pileupList=map(Pileup._make, pileupList)
	for p in pileupList:
	print p.sample, p.basecall, p.bq

indapa / bn_example.py

Created December 17, 2012 20:09

	import numpy as np
	from Factor import *
	from FactorOperations import *
	from PGMcommon import *

	#make Factor objects
	D=Factor( [1], [2], [.6,.4],'Difficulty')
	I=Factor( [5], [2], [.7,.3], 'Intelligence' )
	G=Factor( [2, 5,1], [ 3, 2, 2], [.3,.4,.3,.9,.08,.02,.05,.25,.7,.5,.3,.2], 'Grade', )
	S=Factor( [3,5], [2,2], [.95,.05,.2,.8], 'SAT')

indapa / aggregate_example.R

Last active December 11, 2015 03:09

aggregate function in R Splits the data into subsets, computes summary statistics for each, and returns the result in a convenient form.

	# how to use aggregate function in R
	#say I have a data frame where I keep track how much gas I paid for each time to the pump
	read.csv("gas.csv",header=T)
	head(gas)
	# amount month
	#1 36.21 Jun
	#2 30.22 Jun
	#3 35.42 Jul
	#4 34.12 Jul
	#5 36.09 Aug

indapa / gist:5113856

Created March 8, 2013 02:47

Using dtype object and loadtxt command to read file contents into a Numpy array

	#simref.1 653 T TT GT TT
	#simref.1 1216 T CC CC CC
	#simref.1 1440 A AA AG AA
	#simref.1 1470 A CC CC CC
	#simref.1 2070 C AA AA AA
	#simref.1 2489 C GG GG GG
	import numpy as np
	dt = np.dtype( [ ('chrom', 'S8'), ('pos', 'int32'), ('ref','S1'), ('father','S2'), ('mother','S2'), ('child','S2')] )
	pgm_array=np.loadtxt(file,delimiter='\t',dtype=dt)

indapa / flatten.py

Created March 8, 2013 21:14

Flatten a list of lists with itertools

	#see http://stackoverflow.com/a/952952/1735942
	import itertools
	genotypes=['GG', 'GT', 'GG']
	alleles= [ list(tuple(g)) for g in genotypes ]
	print alleles
	alleles=list(itertools.chain.from_iterable(alleles))
	print alleles

indapa / if_else_comprehension.py

Created March 11, 2013 15:08

	pedobjects=[] #list of pedobjects, represents lines in a pedfile
	pedfh=open(options.pedfile, 'r')
	for line in pedfh:
	fields=line.strip().split('\t')
	(fid,iid,pid,mid,sex,phenotype)=fields[0:6]
	phenotype=int(phenotype)
	pedobjects.append( Ped(fid,iid,pid,mid,sex,phenotype) )

	#the phenotype status is set to 2 if the sample is affected: http://pngu.mgh.harvard.edu/~purcell/plink/data.shtml#ped
	affecteds=[ pedobj.getid() for pedobj in pedobjects if pedobj.getpheno() == 2 ]

indapa / yieldFastqRecord.py

Created April 23, 2013 03:36

Use itertools.groupby to read a fastq file

	def yieldFastqRecord (fh):
	""" a generator that yields a tuple of (fastq_readname, sequence, qualstring)
	adapted from this http://www.biostars.org/p/67246/#67556
	yields a tuple with (header_name,sequence)
	See http://freshfoo.com/blog/itertools_groupby """

	fqiter=(x[1] for x in itertools.groupby(fh, lambda line: line[0] == '@'))
	#fqiter is made of sub-iterators

	#the first sub-iter is the header

Amit Indap indapa