Sean Davis seandavi

- Pediatric oncologist - Cancer researcher - Data scientist - Community organizer

seandavi / prepTargetVCFMetadata.R

Created February 13, 2018 04:03

Preparing VCF file metadata for TARGET Osteosarcoma

	library(dplyr)


	library(RMySQL)
	con = dbConnect(MySQL(),user='USERNAME',password='PASSWORD',host='solexadb.XXXXXXXXXX.us-east-1.rds.amazonaws.com',port=3306,dbname='solexa')
	res = dbGetQuery(con,
	"select study_id,source.name as source_name, sample.name as sample_name, fcb.type as software,
	fcb.dateStamp as basecalldate, fcb.softwareVersion as version, study_id,
	sr.date as run_date, sr.sequencer as sequencer, sm.model, ssr.library_id as library_id,
	sr.ID as run_id, sample.ID as sample_id, nt.value as sample_source,

seandavi / Snakefile

Created February 2, 2018 12:27

Snakemake with s3 and custom profile

	import boto3

	# set the profile name based on ~/.aws/credentials entry
	boto3.setup_default_session(profile_name='s3')

	from snakemake.remote.S3 import RemoteProvider as S3RemoteProvider
	s3 = S3RemoteProvider()

	# This simply copies the file from local storage to s3
	rule all:

seandavi / oncoprint_targetOsteo.R

Created January 8, 2018 13:48

TARGET osteosarcoma oncoprint R code

	library(aws.s3)
	aws.signature::use_credentials(profile='s3')
	disco_maf = s3read_using(readr::read_tsv,object="s3://target-osteosarcoma/TargetOsteoDiscovery/summary/strelka.maf.filtered.tab")
	disco_gistic = s3read_using(readr::read_tsv,object="s3://target-osteosarcoma/TargetOsteoDiscovery/all_thresholded.by_genes.txt")
	library(tidyr)
	library(dplyr)
	library(ComplexHeatmap)
	x = disco_gistic %>%
	gather(key = 'Sample', value='CN', -c('Gene Symbol', "Locus ID", "Cytoband")) %>%
	dplyr::select(Sample, Hugo_Symbol = `Gene Symbol`, CN) %>%

seandavi / TCGAtranslateID.R

Last active January 8, 2024 21:12

Translate GDC file_ids to TCGA barcodes

	library(GenomicDataCommons)
	library(magrittr)

	TCGAtranslateID = function(file_ids) {
	info = files() %>%
	GenomicDataCommons::filter( ~ file_id %in% file_ids) %>%
	GenomicDataCommons::select('cases.samples.submitter_id') %>%
	results_all()
	# The mess of code below is to extract TCGA barcodes
	# id_list will contain a list (one item for each file_id)

seandavi / xmlsplitter.py

Created December 22, 2017 12:47

split xml into smaller xmls based on a split "tag"

	#!/usr/bin/env python
	import argparse
	import lxml.etree
	import os, sys
	import bz2

	parser = argparse.ArgumentParser()
	parser.add_argument('tag')
	parser.add_argument('n',default=100000)
	parser.add_argument('wrapper', default=None)

seandavi / add_user_to_linux_noninteractively.sh

Created December 1, 2017 14:49

Add new user to linux machine non-interactively

	# I often want to add new users
	# to AWS machines in a semi-automated
	# script, perhaps via SSH. Doing
	# so noninteractively took some search
	# work.

	sudo useradd USERNAME
	echo "USERNAME:NEWPASSWORD" \| sudo chpasswd

seandavi / gist:1308c15707d443f1771c3cadeef78547

Last active November 15, 2017 23:19

script skeleton to mine tweets for software projects

	# Mine tweets from a meeting using the meeting hashtag.
	#
	# Looks for URLs in tweets that match:
	# - github
	# - github pages (docs)
	# - bitbucket
	# - CRAN
	# - BitBucket
	#
	# Results in a tidy data.frame that can be further manipulated

seandavi / Genome_Informatics_2017_software.csv

Last active November 8, 2018 14:33

Software list mined from twitter feed for CSHL Genome Informatics meeting, 2017

url	name	user	type
https://github.com/dewyman/TranscriptClean	TranscriptClean	dewyman	github
https://github.com/dewyman/TALON	TALON	dewyman	github
https://github.com/Illumina/strelka	strelka	Illumina	github
https://github.com/gymreklab/GangSTR	GangSTR	gymreklab	github
https://github.com/dewyman/talon	talon	dewyman	github
https://github.com/haghshenas/PhISCS	PhISCS	haghshenas	github
https://github.com/alshai/r-index	r-index	alshai	github
https://github.com/shenwei356/bwt	bwt	shenwei356	github
https://github.com/gymreklab/gangstr	gangstr	gymreklab	github

seandavi / SRA2R.log

Created August 30, 2017 20:09

SRA2R Mac OS installation log

	sdavis2@Seans-MBP-3:~/Documents/git$ R CMD INSTALL SRA2R
	* installing to library ‘/Library/Frameworks/R.framework/Versions/3.4/Resources/library’
	* installing source package ‘SRA2R’ ...
	** libs
	clang -I/Library/Frameworks/R.framework/Resources/include -DNDEBUG -I/Users/sdavis2/include -I/Users/sdavis2/include -I"/Library/Frameworks/R.framework/Versions/3.4/Resources/library/Rcpp/include" -I"/Library/Frameworks/R.framework/Versions/3.4/Resources/library/Biostrings/include" -I"/Library/Frameworks/R.framework/Versions/3.4/Resources/library/XVector/include" -I"/Library/Frameworks/R.framework/Versions/3.4/Resources/library/IRanges/include" -I"/Library/Frameworks/R.framework/Versions/3.4/Resources/library/S4Vectors/include" -I/usr/local/include "-I/usr/local/opt/openssl/include" -fPIC -Wall -g -O2 -c Biostrings_stubs.c -o Biostrings_stubs.o
	clang++ -std=gnu++11 -I/Library/Frameworks/R.framework/Resources/include -DNDEBUG -I/Users/sdavis2/include -I/Users/sdavis2/include -I"/Library/Frameworks/R.framework/Ve

seandavi / app.R

Last active August 10, 2019 11:31

Shiny web-app to generate email to user id mapping (for hosting a course)

	#
	# This is a Shiny web application. You can run the application by clicking
	# the 'Run App' button above.
	#
	# Find out more about building applications with Shiny here:
	#
	# http://shiny.rstudio.com/
	#

	library(shiny)