Skip to content

Instantly share code, notes, and snippets.

@allaway
allaway / gist:4422a306adf5ae9142b18a973d8f09d1
Last active November 10, 2023 17:15
store schematic manifests to synapse (NF tools central)
library(googlesheets4)
library(googledrive)
library(synapser)
library(dplyr)
library(readr)
library(purrr)
library(stringr)
synLogin()
@allaway
allaway / addgene_webscrape.R
Created May 19, 2023 00:28
lazy script to scrape from addgene
library(rvest)
library(dplyr)
library(stringr)
plasmids <- c("Addgene_83189", "Addgene_83188", "Addgene_83187", "Addgene_83186", "Addgene_83185", "Addgene_83184", "Addgene_83183", "Addgene_83182", "Addgene_83181", "Addgene_83180", "Addgene_83179", "Addgene_83178", "Addgene_83177", "Addgene_83176", "Addgene_83175", "Addgene_83174", "Addgene_83173", "Addgene_83172", "Addgene_83171", "Addgene_83170", "Addgene_83169", "Addgene_83168", "Addgene_83167", "Addgene_83166", "Addgene_83165", "Addgene_83164", "Addgene_83163", "Addgene_83162", "Addgene_83161", "Addgene_83160", "Addgene_83159", "Addgene_83158", "Addgene_83157", "Addgene_83156", "Addgene_83155", "Addgene_83154", "Addgene_83153", "Addgene_83152", "Addgene_83151", "Addgene_83150", "Addgene_83149", "Addgene_83148", "Addgene_83147", "Addgene_83146", "Addgene_83145", "Addgene_83144", "Addgene_83143", "Addgene_83142", "Addgene_83141", "Addgene_83140", "Addgene_83139", "Addgene_83138", "Addgene_83137", "Addgene_83136", "Addgene_83135", "Addgene_83134", "Addgene_8
@allaway
allaway / plot_portal_size.R
Last active July 2, 2025 19:01
Plot cumulative size of NF Data Portal over time
library(synapser)
library(tidyverse)
synLogin()
metadata <- synTableQuery("select createdOn, dataFileSizeBytes from syn16858331 where type = 'file'")$filepath %>%
read_csv() %>% filter(!is.na(dataFileSizeBytes)) %>%
mutate(cumulative_size=cumsum(dataFileSizeBytes)/1e12) %>%
mutate(cumulative_count=row_number()) %>%
mutate(date = createdOn/1000) %>%
mutate(date = lubridate::as_date(lubridate::as_datetime(date, origin = lubridate::origin)))
@allaway
allaway / synodos_nf2_kinome_baseline.R
Last active December 14, 2022 23:05
Synodos NF2 kinome baseline data
library(synapser)
synLogin()
library(dplyr)
library(readr)
library(ggplot2)
#Looking at my old analysis scripts (bit messy...sorry), it looks like the baseline kinome data is in a different file, the experiment
#was probably done before any drug screening had taken place (hence the drug-pertubed kinome analysis was done seperately and later)
#There's also no syn6. My _guess_ is that this cell line was not tested at the time. Note that Syn1/Syn5 are the isogenic CRISPR pair,
@allaway
allaway / check_md5.R
Last active November 18, 2022 22:40
Check if Synapse-listed MD5 matches locally calculated MD5 by downloading files with client
library(synapser)
synLogin()
ids <- synTableQuery("select id from syn23664752")$asDataFrame()
md5_fail <- "syn45353855" # a known file with a bad md5 in filehandle
ids_testing <- c(md5_fail, ids$id)
res <- lapply(ids_testing[1:2], function(x){
@allaway
allaway / index.R
Created November 11, 2022 00:19
Index S3 files on Synapse when md5s are also deposited as text files in the bucket (using R)
Sys.setenv(
AWS_ACCESS_KEY_ID="abcd",
AWS_SECRET_ACCESS_KEY="1a2b3c",
AWS_SESSION_TOKEN="rlylongstring"
)
library(paws)
library(dplyr)
library(nfportalutils)
library(reticulate)
@allaway
allaway / index.sh
Last active November 4, 2022 14:37
Index files that have been copied from EC2 to an S3 bucket on Synapse
#!/bin/bash
# This script assumes you have a copy of files on an EC2 instance, which you have transferred to an S3 bucket, and copies still exist in both locations (local needed for md5sum)
# synindex.py is from https://github.com/Sage-Bionetworks-Workflows/nf-synindex/blob/main/bin/synindex.py
bucket_path=s3://nf-syn28545963-s3-bucket-18czl49o70rsc/rnaseq/
FILES=$(aws s3 ls $bucket_path | awk '{$1=$2=$3=""; print $0}' | sed 's/^[ \t]*//')
LOCAL_FILES_PATH=$HOME/nerves/
pat=".+bam"
for i in $FILES
@allaway
allaway / rename_folders_files_synapse.R
Created September 8, 2022 19:27
Rename folder or files on synapse
library(synapser)
synLogin()
ids <- synTableQuery("SELECT id FROM syn11601459 where name like '%28NF%' and type = 'file'")$asDataFrame()$id
sapply(ids, function(x){
foo <- synGet(x, downloadFile=F)
nx <- stringr::str_replace(foo$properties$name, "28NF", "28cNF")
foo$properties$name <- nx
synStore(foo)
@allaway
allaway / gist:ffe418f1a428feace3d321b49ba21a20
Created August 20, 2022 18:41
install pyEGA3 on an amazon linux instance (sage service catalog)
sudo yum groupinstall "Development Tools"
yum install python3-devel
##per https://github.com/EGA-archive/ega-download-client, not sure if sudo is really appropriate
sudo pip3 install pyega3
##test
pyega3 --help