Skip to content

Instantly share code, notes, and snippets.

sxv /
Last active May 14, 2018 17:51
script to run simulations of settlers of catan board tile setups to find the average number of games played before encountering a duplicate arrangement.
import sys, random
# board is defined as list of 19 land tiles: Forest (4), Sheep (4), Wheat (4), Brick (3), Rocks (3), Desert (1)
def new_board(board=''):
available = list('ffffsssswwwwbbbrrrd')
for i in range(19):
board += available.pop()
return board
sxv /
Created November 18, 2017 08:30
take ENS gene id and gene name and append col[2:] from input files
import sys
rows = []
for line in open('gene_names.csv'):
a,b,c = line.replace('\n','').split(',')
for file in sys.argv[1:]:
for i, line in enumerate(open(file)):
sxv /
Created November 18, 2017 08:29
make fastq; align; make expression table;
# usage:
# python /batch/dir1/ /batch/dir2/ # run all steps on two batches
# python -2 -3 -loadAndKeep /batch/dir # run steps 2 and 3 and use STAR --loadAndKeep
# python /batch/dir1 /batch/dir2/ -13 # run steps 1 and 3 on two batches
import sys, os
base_dir = '/home/sxv/code/s7s/'
MAKE_FASTQ = '%s/' % base_dir
ALIGN = '%s/' % base_dir
MAKE_EXPRESSION_TABLE = '%s/make_expression_table.R' % base_dir
gawk '//{x=tolower($0); gsub(/[aeiou]/,"",x); a[x]++; b[x]=$0;}END{for(i in a){ if(a[i]==1){print i, b[i]} }}' /usr/share/dict/words
import sys
opts = {
'window': 100000,
'intrachrom': False
args = sys.argv[1:]
for a, arg in enumerate(args):
if arg.startswith('-'):
if arg.startswith('-w') or arg.startswith('--window'):
sxv /
Last active October 11, 2016 20:53
# define input cases via master_key.txt
# for each run, align fastq -> bam
# if GATK specified, run GATK, variant calling, and annovar
# run matricizer2 on tab output
# todo: add R clustering scripts
## usage
# python --input-dir /path/to/fastqs
## NOTES: input-dir must contain master_key.txt, input fastq.gz files,, UCSC_GENE_NAME3.txt
# define input cases via master_key.txt
# for each run, align fastq -> bam
# if GATK specified, run GATK, variant calling, and annovar
# run matricizer2 on tab output
# todo: add R clustering scripts
## usage
# python --input-dir /path/to/fastqs
## NOTES: input-dir must contain master_key.txt, input fastq.gz files,, UCSC_GENE_NAME3.txt
sxv / index.html
Created June 12, 2016 04:34
<canvas id="canvas"></canvas>
<script src=""></script>
var num = 2000;
var canvas = document.getElementById("canvas");
var width = canvas.width = window.innerWidth;
var height = canvas.height = window.innerHeight;
var ctx = canvas.getContext("2d");
var particles = d3.range(num).map(function(i) {
return [Math.round(width*Math.random()), Math.round(height*Math.random()), 2];
sxv /
Last active August 10, 2016 20:01
import sys
from glob import glob
if (len(sys.argv)<=1): match = '*n0*'
else: match = sys.argv[1]
file = glob(match)[0]
counts = {}
with open(file) as f:
print f.readline()
sxv /
Last active August 10, 2016 20:01
#!/usr/bin/env bash
for f in *.vcf; do \
gawk 'BEGIN{FS="\t"; OFS="\t"; } # separator=tab
## for(i=0; i<NF; i++){ if($i=="FORMAT"){ samples=(NF-i) } } # todo
samples=2; # two samples?
if($(NF)~/[nN]$/ || $(NF)~/normal$/){ swap=1 } # is order t/n?