Skip to content

Instantly share code, notes, and snippets.

#!/bin/bash
# To run this script do the folowing: docker_run_abyss_test.sh ABYSS_DOCKER_TAG
# ABYSS_DOCKER_TAG is optional.
# Possible values are found here https://hub.docker.com/r/pegi3s/abyss/tags.
# For example, use '2.0.2-3' to run Abyss version 2.0.2.
set -euox pipefail
library(openxlsx)
library(readr)
library(dplyr)
mappings = read.xlsx("mappings.tsv.xlsx") %>% tbl_df()
registration = read.xlsx("Final Registration List.xlsx")
cols = gsub(" ", ".", mappings$FinalRegistrationColumnname)
registration = registration[,!grepl("^What", colnames(registration))] %>%
select(cols) %>% distinct() %>% arrange(Last.Name) %>% tbl_df()
$ cat Dockerfile
FROM rocker/rstudio
RUN apt-get update && apt-get install -y libpng* libjpeg* libhdf5-dev libxml2-dev
RUN install2.r R.utils
RUN R -e "install.packages('https://cran.rstudio.com//src/contrib/Archive/SDMTools/SDMTools_1.1-221.2.tar.gz', repos = NULL)"
RUN install2.r BiocManager
RUN R -e "BiocManager::install('multtest')"
install.packages('R.utils');
install.packages("https://cran.rstudio.com//src/contrib/Archive/SDMTools/SDMTools_1.1-221.2.tar.gz", repos = NULL)
install.packages('BiocManager'); BiocManager::install('multtest')
install.packages('remotes')
remotes::install_version('Seurat', '2.3.4') # <- this is flaky. Sometimes it finds the older version and sometimes not
# my sct package installation
install.packages(c('roxygen2','rversions','devtools'))
@pcantalupo
pcantalupo / symbol2ensemblid
Last active June 13, 2021 15:55
mapIds gene annotation symbol ensembl
library(org.Hs.eg.db)
(fields = columns(org.Hs.eg.db)) # show annotations available
symbols = c("MYC", "CCNE1", "TP53")
# get full gene annotation for MYC (takes 1 or 2 minutes to run)
geneinfo = lapply (fields, function (f) {
message (f)
FROM bioconductor/bioconductor_docker
RUN wget https://github.com/COMBINE-lab/salmon/releases/download/v1.1.0/salmon-1.1.0_linux_x86_64.tar.gz && \
tar -xvzf salmon-1.1.0_linux_x86_64.tar.gz
ENV PATH "$PATH:/salmon-latest_linux_x86_64/bin"
RUN curl ftp://ftp.ensemblgenomes.org/pub/plants/release-28/fasta/arabidopsis_thaliana/cdna/Arabidopsis_thaliana.TAIR10.28.cdna.all.fa.gz -o athal.fa.gz
# RUN salmon index -t athal.fa.gz -i athal_index
library(rtweet)
library(dplyr)
library(tidyr)
library(reactable)
library(glue)
#https://youtu.be/O0gTv9VGRig?t=24
# bonus video for Shiny : https://www.infoworld.com/video/100759/how-to-code-an-interactive-shiny-app-to-search-twitter-do-more-with-r-bonus-video
tweet_df = search_tweets("VirusStalker", n=500, include_rts = T)
Here I'm aligning 10,000 stranded paired end reads. It is a Truseq stranded RNA-seq library generated by GPCL. Below is the code showing that mapping 10K paired reads with either RF or FR results in the exact same alignments.
pgc92 at supernova in ~/projects/ngs/bkv_rpte_vs_hvec/mappingtoBKV/HVEC2-BK-24/new
$ hisat2 -x ../../NC_001538.1 -1 foo_1k_1.fq -2 foo_1k_2.fq --rna-strandness FR > FR.sam
10000 reads; of these:
10000 (100.00%) were paired; of these:
3 (0.03%) aligned concordantly 0 times
8898 (88.98%) aligned concordantly exactly 1 time
1099 (10.99%) aligned concordantly >1 times
β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ•— β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ•—β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ•— β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ•— β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ•—
β–ˆβ–ˆβ•”β•β•β•β•β• β–ˆβ–ˆβ•”β•β•β•β•β•β–ˆβ–ˆβ•”β•β•β•β•β•β–ˆβ–ˆβ•”β•β•β•β•β•β–ˆβ–ˆβ•”β•β•β–ˆβ–ˆβ•—
β–ˆβ–ˆβ•‘ β–ˆβ–ˆβ–ˆβ•—β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ•—β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ•— β–ˆβ–ˆβ•‘ β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ•‘
β–ˆβ–ˆβ•‘ β–ˆβ–ˆβ•‘β•šβ•β•β•β•β–ˆβ–ˆβ•‘β–ˆβ–ˆβ•”β•β•β• β–ˆβ–ˆβ•‘ β–ˆβ–ˆβ•”β•β•β–ˆβ–ˆβ•‘
β•šβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ•”β•β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ•‘β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ•—β•šβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ•—β–ˆβ–ˆβ•‘ β–ˆβ–ˆβ•‘
β•šβ•β•β•β•β•β• β•šβ•β•β•β•β•β•β•β•šβ•β•β•β•β•β•β• β•šβ•β•β•β•β•β•β•šβ•β• β•šβ•β•
[*] loading datasets ...
[*] GSECA running ...
=== FMM and DD ===
[*] NE threshold: 0.01
library(lsa)
list1 = c("g1","g2","g3","g4")
list2 = c("g3","g4","g5","g6","g1")
u = union(list1,list2)
u
a = as.numeric(u %in% list1)
a
b = as.numeric(u %in% list2)
b