indapa’s gists

indapa / random_permute.py

Created September 26, 2019 20:31

Shuffle an array with Fisher-Yates shuffle

	Shuffle an array with Fisher-Yates shuffle

	"""
	Given input N generate a random permutation of 1 to N.

	Example: N=5 Output: 1 5 2 4 3

	Based on this post https://www.geeksforgeeks.org/generate-a-random-permutation-of-1-to-n/

	"""

indapa / bwt.py

Created June 7, 2016 22:12

bwt.py

	import sys
	import pdb

	def bwt(s):
	""" Return the burrougs wheele transform and suffix array for string s
	based on Fig2 in Li & Durbin 2009 """
	table_rotation=[]
	# Table of rotations of string
	for i in range(len(s)):
	foo=s[i:]

indapa / Makefile

Created April 28, 2014 14:13

Makefile for compiling LASER v1.03 on Mac

	all: laser
	static: laser-static

	G++FLAG = -g
	G++FLAG = -O3

	laser-static: laser.o
	g++ -static -o laser laser.o \
	/usr/local/lib/libgsl.a \
	/Users/indapa/software/lapack-3.5.0/liblapack.a \

indapa / yieldFastqRecord.py

Created April 23, 2013 03:36

Use itertools.groupby to read a fastq file

	def yieldFastqRecord (fh):
	""" a generator that yields a tuple of (fastq_readname, sequence, qualstring)
	adapted from this http://www.biostars.org/p/67246/#67556
	yields a tuple with (header_name,sequence)
	See http://freshfoo.com/blog/itertools_groupby """

	fqiter=(x[1] for x in itertools.groupby(fh, lambda line: line[0] == '@'))
	#fqiter is made of sub-iterators

	#the first sub-iter is the header

indapa / if_else_comprehension.py

Created March 11, 2013 15:08

	pedobjects=[] #list of pedobjects, represents lines in a pedfile
	pedfh=open(options.pedfile, 'r')
	for line in pedfh:
	fields=line.strip().split('\t')
	(fid,iid,pid,mid,sex,phenotype)=fields[0:6]
	phenotype=int(phenotype)
	pedobjects.append( Ped(fid,iid,pid,mid,sex,phenotype) )

	#the phenotype status is set to 2 if the sample is affected: http://pngu.mgh.harvard.edu/~purcell/plink/data.shtml#ped
	affecteds=[ pedobj.getid() for pedobj in pedobjects if pedobj.getpheno() == 2 ]

indapa / flatten.py

Created March 8, 2013 21:14

Flatten a list of lists with itertools

	#see http://stackoverflow.com/a/952952/1735942
	import itertools
	genotypes=['GG', 'GT', 'GG']
	alleles= [ list(tuple(g)) for g in genotypes ]
	print alleles
	alleles=list(itertools.chain.from_iterable(alleles))
	print alleles

indapa / gist:5113856

Created March 8, 2013 02:47

Using dtype object and loadtxt command to read file contents into a Numpy array

	#simref.1 653 T TT GT TT
	#simref.1 1216 T CC CC CC
	#simref.1 1440 A AA AG AA
	#simref.1 1470 A CC CC CC
	#simref.1 2070 C AA AA AA
	#simref.1 2489 C GG GG GG
	import numpy as np
	dt = np.dtype( [ ('chrom', 'S8'), ('pos', 'int32'), ('ref','S1'), ('father','S2'), ('mother','S2'), ('child','S2')] )
	pgm_array=np.loadtxt(file,delimiter='\t',dtype=dt)

indapa / aggregate_example.R

Last active December 11, 2015 03:09

aggregate function in R Splits the data into subsets, computes summary statistics for each, and returns the result in a convenient form.

	# how to use aggregate function in R
	#say I have a data frame where I keep track how much gas I paid for each time to the pump
	read.csv("gas.csv",header=T)
	head(gas)
	# amount month
	#1 36.21 Jun
	#2 30.22 Jun
	#3 35.42 Jul
	#4 34.12 Jul
	#5 36.09 Aug

indapa / bn_example.py

Created December 17, 2012 20:09

	import numpy as np
	from Factor import *
	from FactorOperations import *
	from PGMcommon import *

	#make Factor objects
	D=Factor( [1], [2], [.6,.4],'Difficulty')
	I=Factor( [5], [2], [.7,.3], 'Intelligence' )
	G=Factor( [2, 5,1], [ 3, 2, 2], [.3,.4,.3,.9,.08,.02,.05,.25,.7,.5,.3,.2], 'Grade', )
	S=Factor( [3,5], [2,2], [.95,.05,.2,.8], 'SAT')

indapa / pileupNamedTuple.py

Created October 23, 2012 16:03

using a Python namedtuple

	import collections
	Pileup = collections.namedtuple('Pileup', ['sample', 'RG', 'alignmentname', 'basecall', 'bq'])
	pileupList=[('51_WGA', 'ZFJDLOT83TE', 'D9WJ65M1:224:BB006PABXX:2:21:8481:67213', 'C', 45),
	('51_WGA', 'ZFJDLOT83TE', 'D9WJ65M1:224:BB006PABXX:2:21:8481:67214', 'C', 40
	)]
	pileupList=map(Pileup._make, pileupList)
	for p in pileupList:
	print p.sample, p.basecall, p.bq

Amit Indap indapa