Skip to content

Instantly share code, notes, and snippets.

@FloWuenne
Last active August 10, 2017 00:26
Show Gist options
  • Save FloWuenne/f3f0071c4a0ca7a2b14e613ed8bfa102 to your computer and use it in GitHub Desktop.
Save FloWuenne/f3f0071c4a0ca7a2b14e613ed8bfa102 to your computer and use it in GitHub Desktop.
Create a cellview Rds object from a seurat expression object (updated for Seurat version 2) [MOUSE]
## Cellview export of data for user
### Rerun TSNE with 3 dimensions
```{r}
expression_seurat_cellview <- RunTSNE(expression_seurat,
dims.use = 1:20,
do.fast=T,
dim.embed= 3)
```
### Rename gene names as required and prepare data
```{r}
library(Matrix)
library(dplyr)
library(clusterProfiler)
## Cells vs genes expression matrix
expression_cellview <- as.matrix(expression_seurat_cellview@data)
## make sure that only cells that have TSNE mapping are included
expression_cellview <- expression_cellview[,rownames(expression_seurat_cellview@[email protected])]
## Create a table to keep track of seurat gene IDs
seurat_gene_ids <- data.frame("Name"=as.factor((rownames(expression_cellview))),
"Rownumber"=c(1:nrow(expression_cellview)))
## Use bitr to transform gene symbol to ID
entrez_ids <- bitr(seurat_gene_ids$Name, fromType="SYMBOL", toType="ENSEMBL", OrgDb="org.Mm.eg.db")
colnames(entrez_ids) <- c("Name","ENSG")
ens_name_seurat <- full_join(entrez_ids,seurat_gene_ids,by="Name")
## Filter out gene IDs that do not exist in the seurat data
## Filter out Genes where the name was not found in the ENSG table (likely synonym)
## For now filter these out, in the future try to remap to other synonym
ens_name_seurat <- ens_name_seurat %>%
subset(!is.na(Rownumber)) %>%
subset(!is.na(ENSG)) %>%
dplyr::arrange(Rownumber)
## Remove genes that have multiple Ensemble IDs
duplicated_gene_names <- ens_name_seurat[duplicated(ens_name_seurat$Name),]$Name
ens_name_seurat_unique <- ens_name_seurat %>%
subset(!(Name %in% duplicated_gene_names))
## Actually use ENSG ids as rownames for the seurat expression table
log2cpm <- expression_cellview %>%
subset(rownames(expression_cellview) %in% ens_name_seurat_unique$Name)
rownames(log2cpm ) <- ens_name_seurat_unique$ENSG
log2cpm <- as.data.frame(log2cpm)
## TSNE clustering matrix
## Subset metadata to only contain cluster identities
cluster_identities <- [email protected] %>%
dplyr::select(res.2)
cluster_identities$rownames <- rownames(cluster_identities)
## Make a new dataframe that contains tsne mappings in 3d and cluster identities
tsne_mappings <- as.data.frame(expression_seurat_cellview@[email protected])
tsne_mappings$rownames <- rownames(expression_seurat_cellview@[email protected])
tsne.data <- full_join(tsne_mappings,cluster_identities,
cluster_identities,by="rownames")
## Remove joining column rownames
tsne.data <- tsne.data %>%
dplyr::select(-rownames)
## Rename and transform data in the new tsne data frame
colnames(tsne.data) <- c("V1","V2","V3","dbCluster")
tsne.data <- as.data.frame(tsne.data)
tsne.data$V1 <- as.numeric(tsne.data$V1)
tsne.data$V2 <- as.numeric(tsne.data$V2)
tsne.data$V3 <- as.numeric(tsne.data$V3)
tsne.data$dbCluster <- as.numeric(tsne.data$dbCluster)
## Featuredata matrix
featuredata <- read.table("/media/florian/2da9b7a2-823d-44b6-97f8-993cbc54b240/Programs/CellView-master/Featuredata/MM10_v74_FeatureData.csv",
sep=",",
header=T,
row.names=1)
featuredata <- featuredata %>%
subset(rownames(featuredata) %in% rownames(log2cpm))
```
### Save the Cellview object into an Rds file
```{r}
save(log2cpm,featuredata,tsne.data,file="Seurat_cellview.Rds")
```
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment