Skip to content

Instantly share code, notes, and snippets.

View indapa's full-sized avatar

Amit Indap indapa

View GitHub Profile
@indapa
indapa / random_permute.py
Created September 26, 2019 20:31
Shuffle an array with Fisher-Yates shuffle
Shuffle an array with Fisher-Yates shuffle
"""
Given input N generate a random permutation of 1 to N.
Example: N=5 Output: 1 5 2 4 3
Based on this post https://www.geeksforgeeks.org/generate-a-random-permutation-of-1-to-n/
"""
@indapa
indapa / bwt.py
Created June 7, 2016 22:12
bwt.py
import sys
import pdb
def bwt(s):
""" Return the burrougs wheele transform and suffix array for string s
based on Fig2 in Li & Durbin 2009 """
table_rotation=[]
# Table of rotations of string
for i in range(len(s)):
foo=s[i:]
@indapa
indapa / Makefile
Created April 28, 2014 14:13
Makefile for compiling LASER v1.03 on Mac
all: laser
static: laser-static
G++FLAG = -g
G++FLAG = -O3
laser-static: laser.o
g++ -static -o laser laser.o \
/usr/local/lib/libgsl.a \
/Users/indapa/software/lapack-3.5.0/liblapack.a \
@indapa
indapa / yieldFastqRecord.py
Created April 23, 2013 03:36
Use itertools.groupby to read a fastq file
def yieldFastqRecord (fh):
""" a generator that yields a tuple of (fastq_readname, sequence, qualstring)
adapted from this http://www.biostars.org/p/67246/#67556
yields a tuple with (header_name,sequence)
See http://freshfoo.com/blog/itertools_groupby """
fqiter=(x[1] for x in itertools.groupby(fh, lambda line: line[0] == '@'))
#fqiter is made of sub-iterators
#the first sub-iter is the header
pedobjects=[] #list of pedobjects, represents lines in a pedfile
pedfh=open(options.pedfile, 'r')
for line in pedfh:
fields=line.strip().split('\t')
(fid,iid,pid,mid,sex,phenotype)=fields[0:6]
phenotype=int(phenotype)
pedobjects.append( Ped(fid,iid,pid,mid,sex,phenotype) )
#the phenotype status is set to 2 if the sample is affected: http://pngu.mgh.harvard.edu/~purcell/plink/data.shtml#ped
affecteds=[ pedobj.getid() for pedobj in pedobjects if pedobj.getpheno() == 2 ]
@indapa
indapa / flatten.py
Created March 8, 2013 21:14
Flatten a list of lists with itertools
#see http://stackoverflow.com/a/952952/1735942
import itertools
genotypes=['GG', 'GT', 'GG']
alleles= [ list(tuple(g)) for g in genotypes ]
print alleles
alleles=list(itertools.chain.from_iterable(alleles))
print alleles
@indapa
indapa / gist:5113856
Created March 8, 2013 02:47
Using dtype object and loadtxt command to read file contents into a Numpy array
#simref.1 653 T TT GT TT
#simref.1 1216 T CC CC CC
#simref.1 1440 A AA AG AA
#simref.1 1470 A CC CC CC
#simref.1 2070 C AA AA AA
#simref.1 2489 C GG GG GG
import numpy as np
dt = np.dtype( [ ('chrom', 'S8'), ('pos', 'int32'), ('ref','S1'), ('father','S2'), ('mother','S2'), ('child','S2')] )
pgm_array=np.loadtxt(file,delimiter='\t',dtype=dt)
@indapa
indapa / aggregate_example.R
Last active December 11, 2015 03:09
aggregate function in R Splits the data into subsets, computes summary statistics for each, and returns the result in a convenient form.
# how to use aggregate function in R
#say I have a data frame where I keep track how much gas I paid for each time to the pump
read.csv("gas.csv",header=T)
head(gas)
# amount month
#1 36.21 Jun
#2 30.22 Jun
#3 35.42 Jul
#4 34.12 Jul
#5 36.09 Aug
import numpy as np
from Factor import *
from FactorOperations import *
from PGMcommon import *
#make Factor objects
D=Factor( [1], [2], [.6,.4],'Difficulty')
I=Factor( [5], [2], [.7,.3], 'Intelligence' )
G=Factor( [2, 5,1], [ 3, 2, 2], [.3,.4,.3,.9,.08,.02,.05,.25,.7,.5,.3,.2], 'Grade', )
S=Factor( [3,5], [2,2], [.95,.05,.2,.8], 'SAT')
@indapa
indapa / pileupNamedTuple.py
Created October 23, 2012 16:03
using a Python namedtuple
import collections
Pileup = collections.namedtuple('Pileup', ['sample', 'RG', 'alignmentname', 'basecall', 'bq'])
pileupList=[('51_WGA', 'ZFJDLOT83TE', 'D9WJ65M1:224:BB006PABXX:2:21:8481:67213', 'C', 45),
('51_WGA', 'ZFJDLOT83TE', 'D9WJ65M1:224:BB006PABXX:2:21:8481:67214', 'C', 40
)]
pileupList=map(Pileup._make, pileupList)
for p in pileupList:
print p.sample, p.basecall, p.bq