Skip to content

Instantly share code, notes, and snippets.

View seandavi's full-sized avatar

Sean Davis seandavi

View GitHub Profile
@seandavi
seandavi / prepTargetVCFMetadata.R
Created February 13, 2018 04:03
Preparing VCF file metadata for TARGET Osteosarcoma
library(dplyr)
library(RMySQL)
con = dbConnect(MySQL(),user='USERNAME',password='PASSWORD',host='solexadb.XXXXXXXXXX.us-east-1.rds.amazonaws.com',port=3306,dbname='solexa')
res = dbGetQuery(con,
"select study_id,source.name as source_name, sample.name as sample_name, fcb.type as software,
fcb.dateStamp as basecalldate, fcb.softwareVersion as version, study_id,
sr.date as run_date, sr.sequencer as sequencer, sm.model, ssr.library_id as library_id,
sr.ID as run_id, sample.ID as sample_id, nt.value as sample_source,
@seandavi
seandavi / Snakefile
Created February 2, 2018 12:27
Snakemake with s3 and custom profile
import boto3
# set the profile name based on ~/.aws/credentials entry
boto3.setup_default_session(profile_name='s3')
from snakemake.remote.S3 import RemoteProvider as S3RemoteProvider
s3 = S3RemoteProvider()
# This simply copies the file from local storage to s3
rule all:
@seandavi
seandavi / oncoprint_targetOsteo.R
Created January 8, 2018 13:48
TARGET osteosarcoma oncoprint R code
library(aws.s3)
aws.signature::use_credentials(profile='s3')
disco_maf = s3read_using(readr::read_tsv,object="s3://target-osteosarcoma/TargetOsteoDiscovery/summary/strelka.maf.filtered.tab")
disco_gistic = s3read_using(readr::read_tsv,object="s3://target-osteosarcoma/TargetOsteoDiscovery/all_thresholded.by_genes.txt")
library(tidyr)
library(dplyr)
library(ComplexHeatmap)
x = disco_gistic %>%
gather(key = 'Sample', value='CN', -c('Gene Symbol', "Locus ID", "Cytoband")) %>%
dplyr::select(Sample, Hugo_Symbol = `Gene Symbol`, CN) %>%
@seandavi
seandavi / TCGAtranslateID.R
Last active January 8, 2024 21:12
Translate GDC file_ids to TCGA barcodes
library(GenomicDataCommons)
library(magrittr)
TCGAtranslateID = function(file_ids) {
info = files() %>%
GenomicDataCommons::filter( ~ file_id %in% file_ids) %>%
GenomicDataCommons::select('cases.samples.submitter_id') %>%
results_all()
# The mess of code below is to extract TCGA barcodes
# id_list will contain a list (one item for each file_id)
@seandavi
seandavi / xmlsplitter.py
Created December 22, 2017 12:47
split xml into smaller xmls based on a split "tag"
#!/usr/bin/env python
import argparse
import lxml.etree
import os, sys
import bz2
parser = argparse.ArgumentParser()
parser.add_argument('tag')
parser.add_argument('n',default=100000)
parser.add_argument('wrapper', default=None)
@seandavi
seandavi / add_user_to_linux_noninteractively.sh
Created December 1, 2017 14:49
Add new user to linux machine non-interactively
# I often want to add new users
# to AWS machines in a semi-automated
# script, perhaps via SSH. Doing
# so noninteractively took some search
# work.
sudo useradd USERNAME
echo "USERNAME:NEWPASSWORD" | sudo chpasswd
@seandavi
seandavi / gist:1308c15707d443f1771c3cadeef78547
Last active November 15, 2017 23:19
script skeleton to mine tweets for software projects
# Mine tweets from a meeting using the meeting hashtag.
#
# Looks for URLs in tweets that match:
# - github
# - github pages (docs)
# - bitbucket
# - CRAN
# - BitBucket
#
# Results in a tidy data.frame that can be further manipulated
@seandavi
seandavi / Genome_Informatics_2017_software.csv
Last active November 8, 2018 14:33
Software list mined from twitter feed for CSHL Genome Informatics meeting, 2017
url name user type
https://github.com/dewyman/TranscriptClean TranscriptClean dewyman github
https://github.com/dewyman/TALON TALON dewyman github
https://github.com/Illumina/strelka strelka Illumina github
https://github.com/gymreklab/GangSTR GangSTR gymreklab github
https://github.com/dewyman/talon talon dewyman github
https://github.com/haghshenas/PhISCS PhISCS haghshenas github
https://github.com/alshai/r-index r-index alshai github
https://github.com/shenwei356/bwt bwt shenwei356 github
https://github.com/gymreklab/gangstr gangstr gymreklab github
@seandavi
seandavi / SRA2R.log
Created August 30, 2017 20:09
SRA2R Mac OS installation log
sdavis2@Seans-MBP-3:~/Documents/git$ R CMD INSTALL SRA2R
* installing to library ‘/Library/Frameworks/R.framework/Versions/3.4/Resources/library’
* installing *source* package ‘SRA2R’ ...
** libs
clang -I/Library/Frameworks/R.framework/Resources/include -DNDEBUG -I/Users/sdavis2/include -I/Users/sdavis2/include -I"/Library/Frameworks/R.framework/Versions/3.4/Resources/library/Rcpp/include" -I"/Library/Frameworks/R.framework/Versions/3.4/Resources/library/Biostrings/include" -I"/Library/Frameworks/R.framework/Versions/3.4/Resources/library/XVector/include" -I"/Library/Frameworks/R.framework/Versions/3.4/Resources/library/IRanges/include" -I"/Library/Frameworks/R.framework/Versions/3.4/Resources/library/S4Vectors/include" -I/usr/local/include "-I/usr/local/opt/openssl/include" -fPIC -Wall -g -O2 -c Biostrings_stubs.c -o Biostrings_stubs.o
clang++ -std=gnu++11 -I/Library/Frameworks/R.framework/Resources/include -DNDEBUG -I/Users/sdavis2/include -I/Users/sdavis2/include -I"/Library/Frameworks/R.framework/Ve
@seandavi
seandavi / app.R
Last active August 10, 2019 11:31
Shiny web-app to generate email to user id mapping (for hosting a course)
#
# This is a Shiny web application. You can run the application by clicking
# the 'Run App' button above.
#
# Find out more about building applications with Shiny here:
#
# http://shiny.rstudio.com/
#
library(shiny)