This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Download a list of problematic regions (aka blacklist) for the GRCm39/mm39 | |
# mouse genome assembly. Defined by the Boyle-Lab/Blacklist | |
# software, High Signal and Low Mappability regions. | |
# See https://github.com/dozmorovlab/excluderanges for more information. | |
suppressMessages(library(httr)) # https://CRAN.R-project.org/package=httr | |
suppressMessages(library(GenomicRanges)) # https://bioconductor.org/packages/GenomicRanges/ | |
# bedbase_id | |
bedbase_id <- "edc716833d4b5ee75c34a0692fc353d5" | |
# Construct output file name |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Download a list of problematic regions (aka blacklist) for the T2T-CHM13 | |
# telomere-to-telomere human genome assembly. Defined by the Boyle-Lab/Blacklist | |
# software, High Signal and Low Mappability regions. | |
# See https://github.com/dozmorovlab/excluderanges for more information. | |
suppressMessages(library(httr)) # https://CRAN.R-project.org/package=httr | |
suppressMessages(library(GenomicRanges)) # https://bioconductor.org/packages/GenomicRanges/ | |
# bedbase_id | |
bedbase_id <- "6548a002754cc1e882035293541b59a8" | |
# Construct output file name |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# How to liftOver Paired BED data | |
library(dplyr) | |
library(tidyr) | |
library(readxl) | |
library(rtracklayer) | |
# Landscape of Cohesin-Mediated Chromatin Loops in the Human Genome | |
# https://www.nature.com/articles/s41586-020-2151-x | |
# Supplementary Table 4 | Pan-cell type cohesin-mediated chromatin loops, hg19 coordinates, paired BED data | |
url1 <- "https://static-content.springer.com/esm/art%3A10.1038%2Fs41586-020-2151-x/MediaObjects/41586_2020_2151_MOESM5_ESM.xlsx" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Tesseract Intro: https://cran.r-project.org/web/packages/tesseract/vignettes/intro.html | |
# Image source: https://twitter.com/davidsuter_epfl/status/1141959559654846464?s=20 | |
library(tesseract) | |
eng <- tesseract("eng") | |
text <- tesseract::ocr("https://pbs.twimg.com/media/D9kOESnW4AEs90l?format=jpg&name=900x900", engine = eng) | |
cat(text) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Exploratory data analysis of SCSig collection: Signatures of Single Cell Identities | |
# http://www.gsea-msigdb.org/gsea/msigdb/supplementary_genesets.jsp | |
library(dplyr) | |
library(clusterProfiler) | |
# Read in gene sets | |
mtx <- read.gmt("http://software.broadinstitute.org/gsea/msigdb/supplemental/scsig.all.v1.0.symbols.gmt") | |
# Number of unique gene signatures | |
mtx$ont %>% unique() %>% length() | |
# Summary statistics on size of gene signatures | |
mtx %>% group_by(ont) %>% summarise(size = n()) %>% select(size) %>% summary() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# https://www.biostars.org/p/63225/ | |
FILE=VLI9_AA_S60_L008_R1_001.fastq.gz | |
zcat $FILE | head -n 40 | awk '{if(NR%4==0) printf("%s",$0);}' | od -A n -t u1 | awk 'BEGIN{min=100;max=0;}{for(i=1;i<=NF;i++) {if($i>max) max=$i; if($i<min) min=$i;}}END{if(max<=74 && min<59) print "Phred+33"; else if(max>73 && min>=64) print "Phred+64"; else if(min>=59 && min<64 && max>73) print "Solexa+64"; else print "Unknown score encoding";}' |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# PCA: Check for batch effects. Select one batch, to color points by its assignment | |
pca <- mtx %>% varFilter(., var.cutoff = 0.75) %>% scale %>% t %>% prcomp | |
colorby <- "Group" # covariates[2] | |
pt <- ggplot(data = data.frame(pca$x, annot), | |
aes(x = as.numeric(PC1), y = as.numeric(PC2), label = Sample)) + | |
theme(plot.title = element_text(lineheight = 0.8, face="bold")) + | |
ggtitle(paste("PCA with batch, coloring by ", colorby)) + | |
geom_point(aes(color = eval(parse(text = colorby))), size = 3) + | |
geom_text_repel(colour = "black", size = 3) + |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
rank | name | country | category | sales | profits | assets | marketvalue | |
---|---|---|---|---|---|---|---|---|
1 | Citigroup | United States | Banking | 94.71 | 17.85 | 1264.03 | 255.3 | |
2 | General Electric | United States | Conglomerates | 134.19 | 15.59 | 626.93 | 328.54 | |
3 | American Intl Group | United States | Insurance | 76.66 | 6.46 | 647.66 | 194.87 | |
4 | ExxonMobil | United States | Oil & gas operations | 222.88 | 20.96 | 166.99 | 277.02 | |
5 | BP | United Kingdom | Oil & gas operations | 232.57 | 10.27 | 177.57 | 173.54 | |
6 | Bank of America | United States | Banking | 49.01 | 10.81 | 736.45 | 117.55 | |
7 | HSBC Group | United Kingdom | Banking | 44.33 | 6.66 | 757.6 | 177.96 | |
8 | Toyota Motor | Japan | Consumer durables | 135.82 | 7.99 | 171.71 | 115.4 | |
9 | Fannie Mae | United States | Diversified financials | 53.13 | 6.48 | 1019.17 | 76.84 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
"","E116.11BivFlnk.chromStates15","E116.14ReprPCWk.chromStates15","E116.15Quies.chromStates15","E116.1TssA.chromStates15","E116.2TssAFlnk.chromStates15","E116.3TxFlnk.chromStates15","E116.4Tx.chromStates15","E116.5TxWk.chromStates15","E116.6EnhG.chromStates15","E116.7Enh.chromStates15","E116.8ZNFRpts.chromStates15","E116.9Het.chromStates15","category" | |
"crohns_disease",0.162052705452119,0.0635253490922535,-19.2252181061205,1.06711427791428,9.90758142461494,0.00255822057746797,2.56100484438806,-0.253587329445552,3.38505679108121,11.2870094369073,2.23623723813231,2.68992814221344,"autoimmune" | |
"ulcerative_colitis",0.187763955873977,0.127638469530766,-13.9279062300709,2.52742536677685,2.98729538325691,2.42524539010519,1.11285397153932,0.594831983850718,-0.0732590594795718,4.09316926199843,0.887628082728305,3.77780261525578,"autoimmune" | |
"rheumatoid_arthritis",0.158691039640792,-0.0296551102513595,-15.3251476656711,1.55895110481963,15.8450458804273,0.547902010147141,0.337374925927234,1.52148872631903,1.5879471977881 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Source: Additional file 1 from Łabaj, Paweł P., and David P. Kreil. “Sensitivity, Specificity, and Reproducibility of RNA-Seq Differential Expression Calls.” Biology Direct 11, no. 1 (December 20, 2016): 66. doi:10.1186/s13062-016-0169-7. | |
# https://static-content.springer.com/esm/art%3A10.1186%2Fs13062-016-0169-7/MediaObjects/13062_2016_169_MOESM1_ESM.pdf | |
DEfun <- function(counts, design) { | |
DE <- list() | |
## limma | |
gene.dge <- DGEList(counts = counts, group = factor(rep(1:2, each = 4))) | |
gene.dge.norm <- calcNormFactors(gene.dge) | |
gene.dge.norm2 <- gene.dge.norm | |
gene.dge.norm2$counts <- gene.dge.norm2$counts - 0.5 |
NewerOlder