Skip to content

Instantly share code, notes, and snippets.

View indapa's full-sized avatar

Amit Indap indapa

View GitHub Profile
@indapa
indapa / indx.m
Created October 2, 2012 14:53
set elements of vector to a value given a list of indices
val=randn(1,10); % get 10 random values
indx=[10 9 8 7 6 5 4 3 2 1] % vector representing indices
value(indx)=val % set the indices of value to value
@indapa
indapa / indice.py
Created October 2, 2012 14:59
NumPy way to set elements of array to a value given a list of indices
import numpy as np
indices= [ 9,8,7,6,5,4,3,2,1,0]
values=np.random.rand(10).tolist()
zeros=np.zeros(10) #get an array of zeros
zeros[indices]=values # then assign the indices the values
@indapa
indapa / FactorProductExample.py
Created October 3, 2012 16:00
Factor product example for a graphical model
#!/usr/bin/env python
import numpy as np
from Factor import *
from FactorOperations import *
#example from figure 4.3 in Koller and Friedman textbook, 'Probabilistic Graphical Models: Principles and Techniques'
a1=Factor([ 1, 2], [ 3, 2], [ 0, 0, 0, 0, 0, 0, 0, 0 ] )
b1=Factor( [2,3], [2,2], [ 0,0] )
assignmentA=IndexToAssignment( np.arange(0, np.prod( a1.getCard() )), a1.getCard() )
@indapa
indapa / pileupNamedTuple.py
Created October 23, 2012 16:03
using a Python namedtuple
import collections
Pileup = collections.namedtuple('Pileup', ['sample', 'RG', 'alignmentname', 'basecall', 'bq'])
pileupList=[('51_WGA', 'ZFJDLOT83TE', 'D9WJ65M1:224:BB006PABXX:2:21:8481:67213', 'C', 45),
('51_WGA', 'ZFJDLOT83TE', 'D9WJ65M1:224:BB006PABXX:2:21:8481:67214', 'C', 40
)]
pileupList=map(Pileup._make, pileupList)
for p in pileupList:
print p.sample, p.basecall, p.bq
import numpy as np
from Factor import *
from FactorOperations import *
from PGMcommon import *
#make Factor objects
D=Factor( [1], [2], [.6,.4],'Difficulty')
I=Factor( [5], [2], [.7,.3], 'Intelligence' )
G=Factor( [2, 5,1], [ 3, 2, 2], [.3,.4,.3,.9,.08,.02,.05,.25,.7,.5,.3,.2], 'Grade', )
S=Factor( [3,5], [2,2], [.95,.05,.2,.8], 'SAT')
@indapa
indapa / aggregate_example.R
Last active December 11, 2015 03:09
aggregate function in R Splits the data into subsets, computes summary statistics for each, and returns the result in a convenient form.
# how to use aggregate function in R
#say I have a data frame where I keep track how much gas I paid for each time to the pump
read.csv("gas.csv",header=T)
head(gas)
# amount month
#1 36.21 Jun
#2 30.22 Jun
#3 35.42 Jul
#4 34.12 Jul
#5 36.09 Aug
@indapa
indapa / gist:5113856
Created March 8, 2013 02:47
Using dtype object and loadtxt command to read file contents into a Numpy array
#simref.1 653 T TT GT TT
#simref.1 1216 T CC CC CC
#simref.1 1440 A AA AG AA
#simref.1 1470 A CC CC CC
#simref.1 2070 C AA AA AA
#simref.1 2489 C GG GG GG
import numpy as np
dt = np.dtype( [ ('chrom', 'S8'), ('pos', 'int32'), ('ref','S1'), ('father','S2'), ('mother','S2'), ('child','S2')] )
pgm_array=np.loadtxt(file,delimiter='\t',dtype=dt)
@indapa
indapa / flatten.py
Created March 8, 2013 21:14
Flatten a list of lists with itertools
#see http://stackoverflow.com/a/952952/1735942
import itertools
genotypes=['GG', 'GT', 'GG']
alleles= [ list(tuple(g)) for g in genotypes ]
print alleles
alleles=list(itertools.chain.from_iterable(alleles))
print alleles
pedobjects=[] #list of pedobjects, represents lines in a pedfile
pedfh=open(options.pedfile, 'r')
for line in pedfh:
fields=line.strip().split('\t')
(fid,iid,pid,mid,sex,phenotype)=fields[0:6]
phenotype=int(phenotype)
pedobjects.append( Ped(fid,iid,pid,mid,sex,phenotype) )
#the phenotype status is set to 2 if the sample is affected: http://pngu.mgh.harvard.edu/~purcell/plink/data.shtml#ped
affecteds=[ pedobj.getid() for pedobj in pedobjects if pedobj.getpheno() == 2 ]
@indapa
indapa / yieldFastqRecord.py
Created April 23, 2013 03:36
Use itertools.groupby to read a fastq file
def yieldFastqRecord (fh):
""" a generator that yields a tuple of (fastq_readname, sequence, qualstring)
adapted from this http://www.biostars.org/p/67246/#67556
yields a tuple with (header_name,sequence)
See http://freshfoo.com/blog/itertools_groupby """
fqiter=(x[1] for x in itertools.groupby(fh, lambda line: line[0] == '@'))
#fqiter is made of sub-iterators
#the first sub-iter is the header