Matt Shirley mdshw5

Data Scientist @Novartis, CSO Sturge-Weber Foundation. Oncology and Rare Disease Research [email protected]

244 followers · 255 following

View GitHub Profile

Recently created

Least recently created

Recently updated

Least recently updated

mdshw5 / answer.py

Created July 27, 2017 03:16

biostars 264821

	import simplesam

	barcodes = {}
	with open('read_id_barcode_umi.txt') as barcodes_file:
	for line in barcodes_file:
	# should check the delimiter in this file. If it's ' ' or \t or ','
	read_id, umi, barcode = line.rstrip().split()
	barcode[read_id] = (umi, barcode)
	# reading this entire file could use a TON of memory if
	# if you have lots of reads

mdshw5 / answer.py

Created July 19, 2017 14:27

biostars 263478

	from pyfaidx import Fasta
	with Fasta('input.fasta') as fasta:
	with open('pos.txt', 'r') as nucleotides:
	for line in nucleotides:
	chrom, pos = line.rstrip().split()
	nuc = fasta[chrom][int(pos)].seq
	print("{chrom}\t{pos}\t{nuc}".format(**locals()))

mdshw5 / answer.py

Last active July 14, 2017 17:48

biostars 262660

	from pyfaidx import Fasta

	with Fasta("file_2.fasta") as records:
	with open("file_1") as content:
	for line in content:
	_, ec, filename = line.rstrip().split()
	with open(filename, "w") as out_file:
	for record in records:
	if ec in record.name:
	out_file.write(repr(record))

mdshw5 / default.grub

Created November 13, 2016 03:29

grub config

GRUB_CMDLINE_LINUX_DEFAULT="rootflags=degraded,subvol=@ intel_iommu=on,igfx_off vfio_iommu_type1.allow_unsafe_interrupts=1 pcie_acs_override=downstream"

mdshw5 / Steam.xml

Created November 13, 2016 01:45

Steam KVM virtual machine for NVidea GeForce 950GTX passthrough

	<domain type='kvm'>
	<name>Steam</name>
	<uuid>90325573-ce4b-4ffc-875e-ca31f2d2f859</uuid>
	<memory unit='KiB'>2097152</memory>
	<currentMemory unit='KiB'>2097152</currentMemory>
	<vcpu placement='static'>4</vcpu>
	<os>
	<type arch='x86_64' machine='pc-q35-2.5'>hvm</type>
	<loader readonly='yes' type='pflash'>/usr/share/OVMF/OVMF_CODE.fd</loader>
	<nvram>/var/lib/libvirt/qemu/nvram/Steam_VARS.fd</nvram>

mdshw5 / rule.py

Created October 10, 2016 17:47

canvas manifest creation

	## This is a rule for use in Snakemake
	rule create_canvas_xml:
	input: fasta=config["mouse_fasta"]
	output: xml="GenomeSize.xml", genome="genome.fa"
	params: runtime="7200", memory="2G"
	run:
	from pyfaidx import Fa

mdshw5 / answer.py

Created July 30, 2016 17:51

biostars 204336

	from pyfaidx import Fasta

	with Fasta('1st.fa') as first, Fasta('2nd.fa') as second, open('result.fa', 'w') as result:
	for a, b in zip(first, second):
	result.write('>' + a.name)
	result.write(str(a))
	result.write('>' + b.name)
	result.write(str(b))

mdshw5 / example.py

Last active July 25, 2016 15:42

biostars 203117

	from pyfaidx import FastaVariant
	import vcf

	samples = vcf.Reader(open('calls.vcf.gz', 'r')).samples

	for sample in samples:
	with FastaVariant('reference.fasta', 'calls.vcf.gz', sample=sample, het=True, hom=True) as consensus:
	with open(sample + '.fasta', 'w') as sample_fasta:
	for record in consensus:
	sample_fasta.write('>' + record.long_name)

mdshw5 / answer.py

Created April 8, 2016 00:57

biostars 183260

	from pyfaidx import FastaVariant
	with FastaVariant('genome.fasta', 'tabix_indexed_variants.vcf.gz', het=True, hom=True) as consensus:
	for chromosome in consensus:
	for site in chromosome.variant_sites:
	flanking = chromosome[site-2:site+1]
	## do something with flanking sequence
	print(flanking.seq) ## ATG

mdshw5 / answer.sh

Last active March 25, 2016 13:45

biostars 183279

	pip install pyfaidx
	faidx --transform nucleotide giant.fasta > base_counts.txt
	cat base_counts.txt \| awk '{if ($8 == 0); print $1}' > seqs_without_n.txt
	xargs faidx giant.fasta < seqs_without_n.txt

Newer Older