FloWuenne · August 10, 2017 00:26
diff --git a/gistfile1.txt b/gistfile1.txt
 ## Cellview export of data for user

 ### Rerun TSNE with 3 dimensions

 ```{r}
 expression_seurat_cellview <- RunTSNE(expression_seurat,
                                      dims.use = 1:20,
                                      do.fast=T,
                                      dim.embed= 3)
 ```

 ### Rename gene names as required and prepare data

 ```{r}
 library(Matrix)
 library(dplyr)
 library(clusterProfiler)

 ## Cells vs genes expression matrix
 expression_cellview <- as.matrix(expression_seurat_cellview@data)

 ## make sure that only cells that have TSNE mapping are included
 expression_cellview <- expression_cellview[,rownames(expression_seurat_cellview@[email protected])]

 ## Create a table to keep track of seurat gene IDs 
 seurat_gene_ids <- data.frame("Name"=as.factor((rownames(expression_cellview))),
                              "Rownumber"=c(1:nrow(expression_cellview)))


 ## Use bitr to transform gene symbol to ID
 entrez_ids <- bitr(seurat_gene_ids$Name, fromType="SYMBOL", toType="ENSEMBL", OrgDb="org.Mm.eg.db")
 colnames(entrez_ids) <- c("Name","ENSG")

 ens_name_seurat <- full_join(entrez_ids,seurat_gene_ids,by="Name")

 ## Filter out gene IDs that do not exist in the seurat data
 ## Filter out Genes where the name was not found in the ENSG table (likely synonym)
 ## For now filter these out, in the future try to remap to other synonym
 ens_name_seurat <- ens_name_seurat %>%
  subset(!is.na(Rownumber)) %>%
  subset(!is.na(ENSG)) %>%
  dplyr::arrange(Rownumber)

 ## Remove genes that have multiple Ensemble IDs
 duplicated_gene_names <- ens_name_seurat[duplicated(ens_name_seurat$Name),]$Name
 ens_name_seurat_unique <- ens_name_seurat %>%
  subset(!(Name %in% duplicated_gene_names))

 ## Actually use ENSG ids as rownames for the seurat expression table
 log2cpm  <- expression_cellview %>%
  subset(rownames(expression_cellview) %in% ens_name_seurat_unique$Name) 

 rownames(log2cpm ) <- ens_name_seurat_unique$ENSG
 log2cpm <- as.data.frame(log2cpm)

 ## TSNE clustering matrix

 ## Subset metadata to only contain cluster identities
 cluster_identities <- [email protected] %>%
  dplyr::select(res.2)

 cluster_identities$rownames <- rownames(cluster_identities)

 ## Make a new dataframe that contains tsne mappings in 3d and cluster identities
 tsne_mappings <- as.data.frame(expression_seurat_cellview@[email protected])
 tsne_mappings$rownames <- rownames(expression_seurat_cellview@[email protected])


 tsne.data <- full_join(tsne_mappings,cluster_identities,
                   cluster_identities,by="rownames")

 ## Remove joining column rownames
 tsne.data <- tsne.data %>%
  dplyr::select(-rownames)

 ## Rename and transform data in the new tsne data frame
 colnames(tsne.data) <- c("V1","V2","V3","dbCluster")
 tsne.data <- as.data.frame(tsne.data)
 tsne.data$V1 <- as.numeric(tsne.data$V1)
 tsne.data$V2 <- as.numeric(tsne.data$V2)
 tsne.data$V3 <- as.numeric(tsne.data$V3)
 tsne.data$dbCluster <- as.numeric(tsne.data$dbCluster)

 ## Featuredata matrix
 featuredata <- read.table("/media/florian/2da9b7a2-823d-44b6-97f8-993cbc54b240/Programs/CellView-master/Featuredata/MM10_v74_FeatureData.csv",
                          sep=",",
                                 header=T,
                                 row.names=1)

 featuredata <- featuredata %>%
  subset(rownames(featuredata) %in% rownames(log2cpm))

 ```

 ### Save the Cellview object into an Rds file

 ```{r}
 save(log2cpm,featuredata,tsne.data,file="Seurat_cellview.Rds")
 ```
	## Cellview export of data for user

	### Rerun TSNE with 3 dimensions

	```{r}
	expression_seurat_cellview <- RunTSNE(expression_seurat,
	dims.use = 1:20,
	do.fast=T,
	dim.embed= 3)
	```

	### Rename gene names as required and prepare data

	```{r}
	library(Matrix)
	library(dplyr)
	library(clusterProfiler)

	## Cells vs genes expression matrix
	expression_cellview <- as.matrix(expression_seurat_cellview@data)

	## make sure that only cells that have TSNE mapping are included
	expression_cellview <- expression_cellview[,rownames(expression_seurat_cellview@[email protected])]

	## Create a table to keep track of seurat gene IDs
	seurat_gene_ids <- data.frame("Name"=as.factor((rownames(expression_cellview))),
	"Rownumber"=c(1:nrow(expression_cellview)))


	## Use bitr to transform gene symbol to ID
	entrez_ids <- bitr(seurat_gene_ids$Name, fromType="SYMBOL", toType="ENSEMBL", OrgDb="org.Mm.eg.db")
	colnames(entrez_ids) <- c("Name","ENSG")

	ens_name_seurat <- full_join(entrez_ids,seurat_gene_ids,by="Name")

	## Filter out gene IDs that do not exist in the seurat data
	## Filter out Genes where the name was not found in the ENSG table (likely synonym)
	## For now filter these out, in the future try to remap to other synonym
	ens_name_seurat <- ens_name_seurat %>%
	subset(!is.na(Rownumber)) %>%
	subset(!is.na(ENSG)) %>%
	dplyr::arrange(Rownumber)

	## Remove genes that have multiple Ensemble IDs
	duplicated_gene_names <- ens_name_seurat[duplicated(ens_name_seurat$Name),]$Name
	ens_name_seurat_unique <- ens_name_seurat %>%
	subset(!(Name %in% duplicated_gene_names))

	## Actually use ENSG ids as rownames for the seurat expression table
	log2cpm <- expression_cellview %>%
	subset(rownames(expression_cellview) %in% ens_name_seurat_unique$Name)

	rownames(log2cpm ) <- ens_name_seurat_unique$ENSG
	log2cpm <- as.data.frame(log2cpm)

	## TSNE clustering matrix

	## Subset metadata to only contain cluster identities
	cluster_identities <- [email protected] %>%
	dplyr::select(res.2)

	cluster_identities$rownames <- rownames(cluster_identities)

	## Make a new dataframe that contains tsne mappings in 3d and cluster identities
	tsne_mappings <- as.data.frame(expression_seurat_cellview@[email protected])
	tsne_mappings$rownames <- rownames(expression_seurat_cellview@[email protected])


	tsne.data <- full_join(tsne_mappings,cluster_identities,
	cluster_identities,by="rownames")

	## Remove joining column rownames
	tsne.data <- tsne.data %>%
	dplyr::select(-rownames)

	## Rename and transform data in the new tsne data frame
	colnames(tsne.data) <- c("V1","V2","V3","dbCluster")
	tsne.data <- as.data.frame(tsne.data)
	tsne.data$V1 <- as.numeric(tsne.data$V1)
	tsne.data$V2 <- as.numeric(tsne.data$V2)
	tsne.data$V3 <- as.numeric(tsne.data$V3)
	tsne.data$dbCluster <- as.numeric(tsne.data$dbCluster)

	## Featuredata matrix
	featuredata <- read.table("/media/florian/2da9b7a2-823d-44b6-97f8-993cbc54b240/Programs/CellView-master/Featuredata/MM10_v74_FeatureData.csv",
	sep=",",
	header=T,
	row.names=1)

	featuredata <- featuredata %>%
	subset(rownames(featuredata) %in% rownames(log2cpm))

	```

	### Save the Cellview object into an Rds file

	```{r}
	save(log2cpm,featuredata,tsne.data,file="Seurat_cellview.Rds")
	```