Last active
August 10, 2017 00:26
-
-
Save FloWuenne/f3f0071c4a0ca7a2b14e613ed8bfa102 to your computer and use it in GitHub Desktop.
Create a cellview Rds object from a seurat expression object (updated for Seurat version 2) [MOUSE]
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
## Cellview export of data for user | |
### Rerun TSNE with 3 dimensions | |
```{r} | |
expression_seurat_cellview <- RunTSNE(expression_seurat, | |
dims.use = 1:20, | |
do.fast=T, | |
dim.embed= 3) | |
``` | |
### Rename gene names as required and prepare data | |
```{r} | |
library(Matrix) | |
library(dplyr) | |
library(clusterProfiler) | |
## Cells vs genes expression matrix | |
expression_cellview <- as.matrix(expression_seurat_cellview@data) | |
## make sure that only cells that have TSNE mapping are included | |
expression_cellview <- expression_cellview[,rownames(expression_seurat_cellview@[email protected])] | |
## Create a table to keep track of seurat gene IDs | |
seurat_gene_ids <- data.frame("Name"=as.factor((rownames(expression_cellview))), | |
"Rownumber"=c(1:nrow(expression_cellview))) | |
## Use bitr to transform gene symbol to ID | |
entrez_ids <- bitr(seurat_gene_ids$Name, fromType="SYMBOL", toType="ENSEMBL", OrgDb="org.Mm.eg.db") | |
colnames(entrez_ids) <- c("Name","ENSG") | |
ens_name_seurat <- full_join(entrez_ids,seurat_gene_ids,by="Name") | |
## Filter out gene IDs that do not exist in the seurat data | |
## Filter out Genes where the name was not found in the ENSG table (likely synonym) | |
## For now filter these out, in the future try to remap to other synonym | |
ens_name_seurat <- ens_name_seurat %>% | |
subset(!is.na(Rownumber)) %>% | |
subset(!is.na(ENSG)) %>% | |
dplyr::arrange(Rownumber) | |
## Remove genes that have multiple Ensemble IDs | |
duplicated_gene_names <- ens_name_seurat[duplicated(ens_name_seurat$Name),]$Name | |
ens_name_seurat_unique <- ens_name_seurat %>% | |
subset(!(Name %in% duplicated_gene_names)) | |
## Actually use ENSG ids as rownames for the seurat expression table | |
log2cpm <- expression_cellview %>% | |
subset(rownames(expression_cellview) %in% ens_name_seurat_unique$Name) | |
rownames(log2cpm ) <- ens_name_seurat_unique$ENSG | |
log2cpm <- as.data.frame(log2cpm) | |
## TSNE clustering matrix | |
## Subset metadata to only contain cluster identities | |
cluster_identities <- [email protected] %>% | |
dplyr::select(res.2) | |
cluster_identities$rownames <- rownames(cluster_identities) | |
## Make a new dataframe that contains tsne mappings in 3d and cluster identities | |
tsne_mappings <- as.data.frame(expression_seurat_cellview@[email protected]) | |
tsne_mappings$rownames <- rownames(expression_seurat_cellview@[email protected]) | |
tsne.data <- full_join(tsne_mappings,cluster_identities, | |
cluster_identities,by="rownames") | |
## Remove joining column rownames | |
tsne.data <- tsne.data %>% | |
dplyr::select(-rownames) | |
## Rename and transform data in the new tsne data frame | |
colnames(tsne.data) <- c("V1","V2","V3","dbCluster") | |
tsne.data <- as.data.frame(tsne.data) | |
tsne.data$V1 <- as.numeric(tsne.data$V1) | |
tsne.data$V2 <- as.numeric(tsne.data$V2) | |
tsne.data$V3 <- as.numeric(tsne.data$V3) | |
tsne.data$dbCluster <- as.numeric(tsne.data$dbCluster) | |
## Featuredata matrix | |
featuredata <- read.table("/media/florian/2da9b7a2-823d-44b6-97f8-993cbc54b240/Programs/CellView-master/Featuredata/MM10_v74_FeatureData.csv", | |
sep=",", | |
header=T, | |
row.names=1) | |
featuredata <- featuredata %>% | |
subset(rownames(featuredata) %in% rownames(log2cpm)) | |
``` | |
### Save the Cellview object into an Rds file | |
```{r} | |
save(log2cpm,featuredata,tsne.data,file="Seurat_cellview.Rds") | |
``` |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment