Created
January 24, 2019 00:21
-
-
Save seandavi/11fcee17281632d874cf08c4f491314b to your computer and use it in GitHub Desktop.
TARGET osteosarcoma fusion gene analysis sketch
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
--- | |
title: "fusion genes" | |
output: | |
html_document: | |
self_contained: true | |
--- | |
```{r include=FALSE} | |
library(knitr) | |
opts_chunk$set(message=FALSE, warning=FALSE) | |
``` | |
## Load the data | |
```{r} | |
defuse_file = 'TargetOsteoRNA.results.classify.defuse.tsv' | |
starfusion_file = 'TargetOsteoRNA.star-fusion.fusion_predictions.abridged.tsv' | |
fuseq_file = "TargetOsteoRNA.fusions.FuSeq.tab" | |
system2('md5', defuse_file, stderr = TRUE) | |
system2('md5', starfusion_file, stderr=TRUE) | |
system2('md5', fuseq_file, stderr=TRUE) | |
df_dat = readr::read_tsv(defuse_file) | |
sf_dat = readr::read_tsv(starfusion_file) | |
sf_dat$LeftGeneSymbol = do.call('rbind',strsplit(sf_dat$LeftGene,'\\^'))[,1] | |
sf_dat$RightGeneSymbol = do.call('rbind',strsplit(sf_dat$RightGene,'\\^'))[,1] | |
ff_dat = readr::read_tsv(fuseq_file) | |
``` | |
## TP53 | |
Just a baseline. | |
```{r} | |
sf_samples = unique(sf_dat[sf_dat$LeftGeneSymbol=='TP53' | sf_dat$RightGeneSymbol=='TP53',][[2]]) | |
df_samples = unique(df_dat[df_dat$gene_name2 =='TP53' | df_dat$gene_name1=='TP53',][[2]]) | |
ff_samples = unique(ff_dat[ff_dat$symbol3 =='TP53' | ff_dat$symbol5=='TP53',][[2]]) | |
# star fusion samples | |
length(sf_samples) | |
sf_samples | |
# defuse samples | |
length(df_samples) | |
df_samples | |
# fuseq samples | |
length(ff_samples) | |
ff_samples | |
# overlapping samples | |
intersect(df_samples, sf_samples) | |
intersect(ff_samples, df_samples) | |
intersect(ff_samples, sf_samples) | |
``` | |
VennDiagram to show overlapping samples having TP53 fusions using FuSeq, deFuse and Star-fusion results. | |
```{r} | |
ff_samples <- ff_samples[!is.na(ff_samples)] | |
require("VennDiagram") | |
VENN.LIST.samples <- list("FuSeq" = ff_samples, "DeFuse" = df_samples, "Star-fusion" = sf_samples) | |
venn.plot.samples <- venn.diagram(VENN.LIST.samples , NULL, | |
fill=c("darkmagenta", "darkblue", "red"), | |
alpha=c(0.5,0.5,0.5), cex = 2, cat.fontface=4, | |
category.names=c("FuSeq", "DeFuse", "Star-fusion"), | |
main="TargetOsteoRNA - samples with TP53 fusions") | |
# png(filename = "TargetOsteoRNA_venn_TP53_samples.png") | |
# dev.off() | |
grid.draw(venn.plot.samples) | |
``` | |
Samples with TP53 fusions called by all the three fusion callers | |
```{r} | |
require("gplots") | |
vennObj <- venn(VENN.LIST.samples, show.plot=FALSE) | |
## intersection lists | |
inters <- attr(vennObj,"intersections") | |
## overlapping samples by all the three callers | |
inters$`FuSeq:DeFuse:Star-fusion` | |
``` | |
## Overlapping fusions | |
```{r} | |
# add unique id | |
sorted_genes = t(apply(df_dat[,c('gene_name1', 'gene_name2')], 1, sort)) | |
df_dat$uniq = paste(df_dat$sampleName, sorted_genes[,1], sorted_genes[,2], sep="__") | |
sorted_genes = t(apply(sf_dat[,c('LeftGeneSymbol', 'RightGeneSymbol')], 1, sort)) | |
sf_dat$uniq = paste(sf_dat$sampleName, sorted_genes[,1], sorted_genes[,2], sep="__") | |
tmp_genes=ff_dat[,c('symbol3', 'symbol5')] | |
tmp_genes[is.na(tmp_genes)]='' # fill NAs with '' | |
sorted_genes = t(apply(tmp_genes, 1, sort)) | |
ff_dat$uniq = paste(ff_dat$sampleName, sorted_genes[,1], sorted_genes[,2], sep="__") | |
``` | |
Find the genes involved in overlapping fusions using FuSeq and STAR-Fusion results. The following a sorted table of those genes. | |
```{r} | |
#overlapping | |
length(intersect(sf_dat$uniq, ff_dat$uniq)) | |
intersected_sf = sf_dat[sf_dat$uniq %in% intersect(sf_dat$uniq, ff_dat$uniq),] | |
intersected_sf_gene_counts = table(c(intersected_sf$LeftGeneSymbol, intersected_sf$RightGeneSymbol)) | |
head(sort(intersected_sf_gene_counts, decreasing = TRUE), 50) | |
``` | |
VennDiagram to show overlapping fusions using FuSeq, deFuse and STAR-Fusion results. | |
```{r} | |
VENN.LIST.genes <- list("FuSeq" = unique(ff_dat$uniq), "DeFuse" = unique(df_dat$uniq), "Star-fusion" = unique(sf_dat$uniq)) | |
venn.plot.genes <- venn.diagram(VENN.LIST.genes , NULL, | |
fill=c("darkmagenta", "darkblue", "red"), | |
alpha=c(0.5,0.5,0.5), cex = 2, cat.fontface=4, | |
category.names=c("FuSeq", "DeFuse", "Star-fusion"), | |
main="TargetOsteoRNA - Fusion genes") | |
# png(filename = "TargetOsteoRNA_venn_fusion_genes.png") | |
# dev.off() | |
grid.draw(venn.plot.genes) | |
``` | |
Top genes with fusions called by all the three callers | |
```{r} | |
require("gplots") | |
vennObj_genes <- venn(VENN.LIST.genes, show.plot=FALSE) | |
inters_genes <- attr(vennObj_genes,"intersections") | |
## overlapping fusions called by all the three callers | |
intersected_sf_1 = sf_dat[sf_dat$uniq %in% inters_genes$`FuSeq:DeFuse:Star-fusion`,] | |
intersected_sf_gene_counts_1 = table(c(intersected_sf_1$LeftGeneSymbol, intersected_sf_1$RightGeneSymbol)) | |
head(sort(intersected_sf_gene_counts_1, decreasing = TRUE), 50) | |
``` | |
## COSMIC genes involved in fusions | |
Using the STAR-fusion/FuSeq shared fusions, report the STAR-fusion genes that overlap with COSMIC genes. (Note that _RUNX2_ is not in COSMIC.) | |
```{r} | |
library(TargetOsteoAnalysis) | |
cosmic_genes = cosmic_census() | |
head(sort(intersected_sf_gene_counts[names(intersected_sf_gene_counts) %in% cosmic_genes$`Gene Symbol`], | |
decreasing = TRUE), 50) | |
sf_cosmic = sf_dat[sf_dat$LeftGeneSymbol %in% cosmic_genes$`Gene Symbol` | |
| sf_dat$RightGeneSymbol %in% cosmic_genes$`Gene Symbol` ,] | |
dim(sf_cosmic) | |
df_cosmic = df_dat[df_dat$gene_name1 %in% cosmic_genes$`Gene Symbol` | |
| df_dat$gene_name2 %in% cosmic_genes$`Gene Symbol`, ] | |
dim(df_cosmic) | |
ff_cosmic = ff_dat[ff_dat$symbol3 %in% cosmic_genes$`Gene Symbol` | |
| ff_dat$symbol5 %in% cosmic_genes$`Gene Symbol`, ] | |
dim(ff_cosmic) | |
``` | |
## Filtered fusions | |
The STAR fusion results that overlap with FuSeq results. | |
```{r} | |
library(DT) | |
datatable(intersected_sf) | |
``` | |
## sessionInfo | |
```{r} | |
devtools::session_info() | |
``` |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment