Skip to content

Instantly share code, notes, and snippets.

@blahah
Created August 20, 2014 14:58
Show Gist options
  • Save blahah/1352b0af9c547a417738 to your computer and use it in GitHub Desktop.
Save blahah/1352b0af9c547a417738 to your computer and use it in GitHub Desktop.
Basic plotting of transrate results for varying read depth Trinity mouse assemblies
Transrate assembly and contig plots.
========================================================
Assemblies of mouse with 10, 20, 50 and 100 million reads.
# Assembly-level stats
## Load data
```{r}
setwd('~/Dropbox/ongoing_projects/feeding_transcriptome/final')
astats <- read.csv('all_10M_assemblies.csv')
astats$num_pairs <- 10e6
astats$percent_mapping <- with(astats, total_mappings / num_pairs * 100)
astats$pc_good_mapping <- with(astats, good_mappings / num_pairs * 100)
astats$assembly <- gsub(astats$assembly, pattern="\\.corr\\.Trinity\\.fasta", replacement="")
library(reshape2)
astats <- melt(astats, id='assembly')
astats <- astats[complete.cases(astats),]
astats$assembly <- factor(x=astats$assembly, levels=c("10M", "20M", "50M", "100M"), ordered=T)
```
## Plot the metrics
```{r fig.width=7, fig.height=40}
library(ggplot2)
ggplot(astats, aes(x=assembly, y=value, fill=assembly)) +
geom_bar(position='dodge', stat='identity') +
facet_grid(variable ~ ., scales="free") +
theme(strip.text.y = element_text(angle=0))
```
# Contig-level stats
## Load data
```{r}
library(reshape2)
setwd('~/Dropbox/ongoing_projects/feeding_transcriptome/final')
cstats <- read.csv('all_10M_10M.corr.Trinity.fasta_contigs.csv')
cstats$millionreads <- 10
cstats20 <- read.csv('all_10M_20M.corr.Trinity.fasta_contigs.csv')
cstats20$millionreads <- 20
cstats <- rbind(cstats, cstats20)
cstats50 <- read.csv('all_10M_50M.corr.Trinity.fasta_contigs.csv')
cstats50$millionreads <- 50
cstats <- rbind(cstats, cstats50)
cstats100 <- read.csv('all_10M_100M.corr.Trinity.fasta_contigs.csv')
cstats100$millionreads <- 100
cstats <- rbind(cstats, cstats100)
```
Plot the metrics
## Contig stats
### Length
```{r fig.width=12, fig.height=8}
library(ggplot2)
ggplot(cstats, aes(x=length, colour=factor(millionreads))) +
geom_density() +
labs(colour='Reads (x10^6)')
```
### GC
```{r fig.width=12, fig.height=8}
ggplot(cstats, aes(x=prop_gc, colour=factor(millionreads))) +
geom_density() +
labs(colour='Reads (x10^6)')
```
### GC skew
```{r fig.width=12, fig.height=8}
ggplot(cstats, aes(x=gc_skew, colour=factor(millionreads))) +
geom_density() +
labs(colour='Reads (x10^6)')
```
### AT skew
```{r fig.width=12, fig.height=8}
ggplot(cstats, aes(x=at_skew, colour=factor(millionreads))) +
geom_density() +
labs(colour='Reads (x10^6)')
```
### CpG count
```{r fig.width=12, fig.height=8}
ggplot(cstats, aes(x=cpg_count, colour=factor(millionreads))) +
geom_density() +
labs(colour='Reads (x10^6)')
```
### Cpg ratio
```{r fig.width=12, fig.height=8}
ggplot(cstats, aes(x=cpg_ratio, colour=factor(millionreads))) +
geom_density() +
labs(colour='Reads (x10^6)')
```
### Orf length
```{r fig.width=12, fig.height=8}
ggplot(cstats, aes(x=orf_length, colour=factor(millionreads))) +
geom_density() +
labs(colour='Reads (x10^6)') + scale_x_log10()
```
### Linguistic complexity (k=6)
```{r fig.width=12, fig.height=8}
ggplot(cstats, aes(x=linguistic_complexity_6, colour=factor(millionreads))) +
geom_density() +
labs(colour='Reads (x10^6)')
```
## Read metrics
### Uncovered bases
```{r fig.width=12, fig.height=8}
ggplot(cstats, aes(x=uncovered_bases, colour=factor(millionreads))) +
geom_density() +
labs(colour='Reads (x10^6)') + scale_x_log10()
```
### Mean coverage
```{r fig.width=12, fig.height=8}
ggplot(cstats, aes(x=mean_coverage, colour=factor(millionreads))) +
geom_density() +
labs(colour='Reads (x10^6)') + scale_x_log10()
ggplot(cstats, aes(y=mean_coverage, x=factor(millionreads))) +
geom_violin() +
labs(colour='Reads (x10^6)') + scale_y_log10()
```
### In bridges
```{r fig.width=12, fig.height=8}
ggplot(cstats, aes(x=in_bridges, colour=factor(millionreads))) +
geom_density() +
labs(colour='Reads (x10^6)') + scale_x_log10()
```
### In bridges
```{r fig.width=12, fig.height=8}
ggplot(cstats, aes(x=in_bridges, colour=factor(millionreads))) +
geom_density() +
labs(colour='Reads (x10^6)') + scale_x_log10()
```
### Edit distance per base
```{r fig.width=12, fig.height=8}
ggplot(cstats, aes(x=edit_distance_per_base, colour=factor(millionreads))) +
geom_density() +
labs(colour='Reads (x10^6)')
```
### Low uniqueness bases
```{r fig.width=12, fig.height=8}
ggplot(cstats, aes(x=low_uniqueness_bases, colour=factor(millionreads))) +
geom_density() +
labs(colour='Reads (x10^6)') + scale_x_log10()
```
### Low uniqueness bases (proportion)
```{r fig.width=12, fig.height=8}
ggplot(cstats, aes(x=p_low_uniqueness_bases, colour=factor(millionreads))) +
geom_density() +
labs(colour='Reads (x10^6)')
```
## Reference-based metrics
### Reference coverage
```{r fig.width=12, fig.height=8}
ggplot(cstats, aes(x=reference_coverage, colour=factor(millionreads))) +
geom_density() +
labs(colour='Reads (x10^6)')
```
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment