MarcinKosinski · July 5, 2017 18:00
diff --git a/provide_genomic_analysis_on_tcga_mutations_data.R b/provide_genomic_analysis_on_tcga_mutations_data.R
 extractSurvival <- function(cohorts){
  
  survivalData <- list()
  for(i in cohorts){
    get(paste0(i, ".clinical"), envir = .GlobalEnv) %>%
 				select(patient.bcr_patient_barcode,
 							 patient.vital_status,
 							 patient.days_to_last_followup,
 							 patient.days_to_death ) %>%
 				mutate(bcr_patient_barcode = toupper(patient.bcr_patient_barcode),
 				       patient.vital_status = ifelse(patient.vital_status %>%
 												 as.character() =="dead",1,0),
    		       barcode = patient.bcr_patient_barcode %>%
 					 	             as.character(),
      		     times = ifelse( !is.na(patient.days_to_last_followup),
                      patient.days_to_last_followup %>%
                     	as.character() %>%
                     	as.numeric(),
               patient.days_to_death %>%
                     	as.character() %>%
                     	as.numeric() )
 					 ) %>%
   filter(!is.na(times)) -> survivalData[[i]]
  
  
  }
  do.call(rbind,survivalData) %>%
    select(bcr_patient_barcode, patient.vital_status, times) %>%
    unique  

 }


 extractMutations <- function(cohorts, prc){
  mutationsData <- list()
  for(i in cohorts){
    get(paste0(i, ".mutations"), envir = .GlobalEnv) %>%
      select(Hugo_Symbol, bcr_patient_barcode) %>%
      filter(nchar(bcr_patient_barcode)==15) %>%
      filter(substr(bcr_patient_barcode, 14, 15)=="01") %>%
      unique -> mutationsData[[i]]
  }
  do.call(rbind,mutationsData) %>% unique -> mutationsData
  
  mutationsData %>%
    group_by(Hugo_Symbol) %>%
    summarise(count = n()) %>%
    arrange(desc(count)) %>%
    mutate(count_prc = count/length(unique(mutationsData$bcr_patient_barcode))) %>%
    filter_(paste0("count_prc > ",prc)) %>%
    select(Hugo_Symbol) %>%
    unlist -> topGenes
  
  mutationsData %>%
    filter(Hugo_Symbol %in% topGenes) -> mutationsData_top
  
  
  mutationsData_top %>%
    dplyr::group_by(bcr_patient_barcode) %>%
    dplyr::summarise(count = n()) %>%
    group_by(count) %>% 
    summarise(total = n()) %>%
    arrange(desc(count))
 #   
 #   mutationsData_top %>%
 #     spread(Hugo_Symbol, bcr_patient_barcode) -> mutationsData_top_sp
  
  as.data.table(mutationsData_top) -> mutationsData_top_DT
  dcast.data.table(mutationsData_top_DT, bcr_patient_barcode ~ Hugo_Symbol , fill = 0) %>%
    as.data.frame -> mutationsData_top_dcasted
  
  mutationsData_top_dcasted[,-1][mutationsData_top_dcasted[,-1] != "0"] <- 1

  mutationsData_top_dcasted -> result
  names(result) <- gsub(names(result),pattern = "-",  replacement = "")
  result
 }


 extractCohortIntersection <- function(){
  
  data(package = "RTCGA.mutations")$results[,3] %>%
    gsub(".mutations", "", x = .) -> mutations_data
  data(package = "RTCGA.clinical")$results[,3] %>%
    gsub(".clinical", "", x = .) -> clinical_data
  
  intersect(mutations_data, clinical_data)
 }

 prepareCoxDataSplit <- function(mutationsData, survivalData, groups, seed = 4561){
  mutationsData %>%
  mutate(bcr_patient_barcode = substr(bcr_patient_barcode,1,12)) %>%
  left_join(survivalData,
            by = "bcr_patient_barcode") -> coxData

  coxData <- coxData[, -c(1,2)]
  
  coxData %>%
    filter(times > 0) %>%
    filter(!is.na(times)) -> coxData
  
  apply(coxData[,-c(1092, 1093)], MARGIN = 2,function(x){
    as.numeric(as.character(x))
  }) -> coxData[,-c(1092, 1093)]
  
  set.seed(seed)
  sample(groups, replace = TRUE, size = 6085) -> groups
  split(coxData, groups) #coxData_split
 }

 prepareForumlaSGD <- function(coxData){
  as.formula(paste("Surv(times, patient.vital_status) ~ ",
                   paste(names(coxData[[1]])[-c(1092, 1093)],
                         collapse="+"), collapse = ""))
 }


 full_cox_loglik_matrix <- function(beta, x, censored){
  order(x$times) -> order2
  x[order2, ] -> xORD
  censored[order2] -> censORD
  sum(censORD*(beta%*%x[, -which(names(x)=='times')] -
                       log(cumsum(exp(beta1*rev(x1) + beta2*rev(x2))))))
 }



 library(dplyr)
 library(RTCGA.clinical)
 library(RTCGA.mutations)
 library(data.table)
 library(coxphSGD)
 library(archivist)
 (extractCohortIntersection() -> cohorts)
 head(extractSurvival(cohorts) -> survivalData)
 extractMutations(cohorts, 0.02) -> mutationsData
 mutationsData[1:8, c(1,4,56,100,207,801)]
 set.seed(4561)
 prepareCoxDataSplit(mutationsData,survivalData, groups = 100) -> coxData_split
 head(coxData_split[[1]][c(1,10), c(210,302,356,898,911,1092:1093)])
 prepareForumlaSGD(coxData_split) -> formulaSGD
 coxData_split[99:100] -> testCox
 coxData_split[1:98] -> trainCox
 coxphSGD(formulaSGD, data = trainCox, max.iter = 490) -> model_1_over_t
 coxphSGD(formulaSGD, data = trainCox, max.iter = 490,
         learningRates = function(t){1/(50*sqrt(t))}) -> model_1_over_50sqrt_t
 coxphSGD(formulaSGD, data = trainCox, max.iter = 490,
         learningRates = function(t){1/(100*sqrt(t))}) -> model_1_over_100sqrt_t
 saveToRepo(testCox)
 saveToRepo(trainCox)
 saveToRepo(model_1_over_t)
 saveToRepo(model_1_over_50sqrt_t)
 saveToRepo(model_1_over_100sqrt_t)
 model_1_over_t$coefficients[c(98, 196, 294, 392, 490)] -> coeffs_1_overt
 do.call(rbind, coeffs_1_overt) %>%
  as.data.frame %>%
  mutate(epoka = 1:5) %>%
  tidyr::gather(key=epoka) ->gat
 names(gat)[2]<- "gen"
 library(ggplot2)
 gat %>%
  ggplot(aes(value)) + geom_histogram(fill=4, col = 1, binwidth =  0.01) +
  facet_grid(epoka~.) +
  theme_classic(base_size = 15) +
  ylab('liczność') + xlab('wartości współczynników modelu') +
  ggtitle('Histogramy wartości współczynników modelu po kolejnych epokach') + coord_cartesian(xlim = c(-0.5,0.5)) -> fig1


 model_1_over_50sqrt_t$coefficients[c(98, 196, 294, 392, 490)] -> coeffs_over_50sqrt_t
 do.call(rbind, coeffs_over_50sqrt_t) %>%
  as.data.frame %>%
  mutate(epoka = 1:5) %>%
  tidyr::gather(key=epoka) ->gat2
 names(gat2)[2]<- "gen"
 library(ggplot2)
 gat2 %>%
  ggplot(aes(value)) + geom_histogram(fill=5, col = 1, binwidth =  0.01) +
  facet_grid(epoka~.) +
  theme_classic(base_size = 15) +
  ylab('liczność') + xlab('wartości współczynników modelu') +
  ggtitle('Histogramy wartości współczynników modelu po kolejnych epokach') + coord_cartesian(xlim = c(-0.25,0.25)) -> fig2

 model_1_over_100sqrt_t$coefficients[c(98, 196, 294, 392, 490)] -> coeffs_over_100sqrt_t
 do.call(rbind, coeffs_over_100sqrt_t) %>%
  as.data.frame %>%
  mutate(epoka = 1:5) %>%
  tidyr::gather(key=epoka) ->gat3
 names(gat3)[2]<- "gen"
 gat3 %>%
  ggplot(aes(value)) + geom_histogram(fill=6, col = 1, binwidth =  0.01) +
  facet_grid(epoka~.) +
  theme_classic(base_size = 15) +
  ylab('liczność') + xlab('wartości współczynników modelu') +
  ggtitle('Histogramy wartości współczynników modelu po kolejnych epokach') + coord_cartesian(xlim = c(-0.1,0.1)) -> fig3



 gat %>%
  mutate(model = "model1") %>%
  bind_rows(gat2 %>%
              mutate(model = "model2")) %>% 
  bind_rows(gat3 %>%
              mutate(model = "model3")) %>% 
  ggplot(aes(value)) + geom_histogram(fill=4, col = 1, binwidth = 0.001) +
  facet_grid(epoka~model) +
  theme_classic(base_size = 15) +
  ylab('liczność') + xlab('wartość współczynnika modelu') +
  ggtitle('Histogramy wartości współczynników modelu po kolejnych epokach') -> fig4

 do.call(rbind, coeffs_over_100sqrt_t) %>% t %>%  as.data.frame -> model3
 do.call(rbind, coeffs_over_50sqrt_t) %>% t %>%  as.data.frame -> model2
 do.call(rbind, coeffs_1_overt) %>% t %>%  as.data.frame -> model1
 names(model3) <- c("epoka1", "epoka2","epoka3", "epoka4", "epoka5")
 names(model2) <- c("epoka1", "epoka2","epoka3", "epoka4", "epoka5")
 names(model1) <- c("epoka1", "epoka2","epoka3", "epoka4", "epoka5")

 library(GGally)
 ggpairs(  title = 'Różnice między współczynnikami w kolejnych epokach',
  model1, 
  upper = "blank",
  lower = list(continuous = "points")
 ) -> fig5 


 saveToRepo(fig1) # 3dc2e14a037bbaae9e892dd255150c28
 saveToRepo(fig2) # 6e5065abdbeae888a2836c1e59460424
 saveToRepo(fig3) # ac9a06e9c6428581cdacb2c1e56e5b57
 saveToRepo(fig4) # 60e327d310c600493c6c661f7330b868
 saveToRepo(fig5) # db5b77ff56b7128244e88d905173cf22

 pdf('hist_overt_t.pdf', width = 10, height = 8 )
 fig2
 dev.off()

 alink('3dc2e14a037bbaae9e892dd255150c28', repo = 'coxphSGD', user = 'MarcinKosinski', format = 'latex')
 alink('6e5065abdbeae888a2836c1e59460424', repo = 'coxphSGD', user = 'MarcinKosinski', format = 'latex')
 alink('ac9a06e9c6428581cdacb2c1e56e5b57', repo = 'coxphSGD', user = 'MarcinKosinski', format = 'latex')
 alink('60e327d310c600493c6c661f7330b868', repo = 'coxphSGD', user = 'MarcinKosinski', format = 'latex')
 alink('db5b77ff56b7128244e88d905173cf22', repo = 'coxphSGD', user = 'MarcinKosinski', format = 'latex')



 names(tail(sort(abs(tail(model_1_over_t$coefficients,1)[[1]])),40)) -> top40
 tail(model_1_over_t$coefficients,1)[[1]][which(names(tail(model_1_over_t$coefficients,1)[[1]]) %in% top40)] -> real_top40
 data.frame(beta = sort(real_top40, decreasing = TRUE),
           exp_beta = exp(sort(real_top40, decreasing = TRUE))) %>% xtable::xtable()


 do.call(rbind, trainCox) -> trainCox_df
 colMeans(trainCox_df[, -c(1092:1093)]) -> srednie_trainCox_df

 data.frame( beta =tail(model_1_over_t$coefficients,1)[[1]],
 beta_star = srednie_trainCox_df) -> wyniki

 data.frame( gen = rownames(wyniki),
            beta = round(wyniki[, 1], 3),
            exp_beta = round(exp(wyniki[, 1]),2),
            srednia = round(wyniki[, 2],3),
            beta_star = round(wyniki[, 1]/wyniki[, 2], 2) ) %>%
  arrange(desc(beta_star)) %>%
  .[1:40,] %>% xtable::xtable()


 aoptions('repoDir', getwd())
 loadFromLocalRepo('446ac4dcb7d65bf39057bb341b296f1a')

 do.call(rbind, model_1_over_t$coefficients) -> wspolczynniki
 qplot(1:491,wspolczynniki[ ,which(colnames(wspolczynniki) == "BRAF")]) +
  xlab('Krok algorytmu') +
  ylab('Wartość współczynnika') +
  theme_minimal(base_size = 20) +
  ggtitle('Wartości współczynnika genu BRAF') -> fig8
 qplot(1:491,wspolczynniki[ ,which(colnames(wspolczynniki) == "EGFR")]) +
 xlab('Krok algorytmu') +
  ylab('Wartość współczynnika') +
  theme_minimal(base_size = 20) +
  ggtitle('Wartości współczynnika genu EGFR') -> fig9

 saveToRepo(fig8) # 3cd8e1f6da766d3028fc2d8f5fef220f
 alink('3cd8e1f6da766d3028fc2d8f5fef220f', repo = 'coxphSGD', user = 'MarcinKosinski', format = 'latex')
 saveToRepo(fig9) # 0f5e8e306cc2cba11fceb2bb5f34cbeb
 alink('0f5e8e306cc2cba11fceb2bb5f34cbeb', repo = 'coxphSGD', user = 'MarcinKosinski', format = 'latex')

 pdf('fig8.pdf', width = 10, height = 8 )
 fig8
 dev.off()
 pdf('fig9.pdf', width = 10, height = 8 )
 fig9
 dev.off()

 library(archivist)
 setLocalRepo(getwd())
 loadFromLocalRepo('1a06bef4a60a237bb65ca3e2f3f23515')
 loadFromLocalRepo('3eebc99bd231b16a3ea4dbeec9ab5edb')

 do.call(rbind,testCox) -> testCox_binded
 do.call(rbind,trainCox) -> trainCox_binded
 library(dplyr)
 library(survMisc)
 trainCox_binded %>%
  select(patient.vital_status, times, BRAF, EGFR) %>%
  survfit( Surv(times, patient.vital_status) ~ BRAF, data = .) %>%
  survMisc:::autoplot.survfit( titleSize=12, type="CI", palette = "Set1",
                               alpha = 0.7, survLineSize=2,
                               pX = 0.3, sigP =3, title = "Przeżycie a mutacja genu BRAF" ) -> km_plot

 xlims <- c(0,5000)
 library(ggplot2)
 km_plot[[1]] <- km_plot[[1]] +
  coord_cartesian(xlim = c(xlims[1],xlims[2])) +
  theme_light(base_size = 22) 
 km_plot[[2]] <- km_plot[[2]] +
   coord_cartesian(xlim = xlims)+
  theme_light(base_size = 22) 
 pdf("BRAF.pdf", width = 10, height = 8)
 survMisc::autoplot(km_plot)
 dev.off()
	extractSurvival <- function(cohorts){

	survivalData <- list()
	for(i in cohorts){
	get(paste0(i, ".clinical"), envir = .GlobalEnv) %>%
	select(patient.bcr_patient_barcode,
	patient.vital_status,
	patient.days_to_last_followup,
	patient.days_to_death ) %>%
	mutate(bcr_patient_barcode = toupper(patient.bcr_patient_barcode),
	patient.vital_status = ifelse(patient.vital_status %>%
	as.character() =="dead",1,0),
	barcode = patient.bcr_patient_barcode %>%
	as.character(),
	times = ifelse( !is.na(patient.days_to_last_followup),
	patient.days_to_last_followup %>%
	as.character() %>%
	as.numeric(),
	patient.days_to_death %>%
	as.character() %>%
	as.numeric() )
	) %>%
	filter(!is.na(times)) -> survivalData[[i]]


	}
	do.call(rbind,survivalData) %>%
	select(bcr_patient_barcode, patient.vital_status, times) %>%
	unique

	}


	extractMutations <- function(cohorts, prc){
	mutationsData <- list()
	for(i in cohorts){
	get(paste0(i, ".mutations"), envir = .GlobalEnv) %>%
	select(Hugo_Symbol, bcr_patient_barcode) %>%
	filter(nchar(bcr_patient_barcode)==15) %>%
	filter(substr(bcr_patient_barcode, 14, 15)=="01") %>%
	unique -> mutationsData[[i]]
	}
	do.call(rbind,mutationsData) %>% unique -> mutationsData

	mutationsData %>%
	group_by(Hugo_Symbol) %>%
	summarise(count = n()) %>%
	arrange(desc(count)) %>%
	mutate(count_prc = count/length(unique(mutationsData$bcr_patient_barcode))) %>%
	filter_(paste0("count_prc > ",prc)) %>%
	select(Hugo_Symbol) %>%
	unlist -> topGenes

	mutationsData %>%
	filter(Hugo_Symbol %in% topGenes) -> mutationsData_top


	mutationsData_top %>%
	dplyr::group_by(bcr_patient_barcode) %>%
	dplyr::summarise(count = n()) %>%
	group_by(count) %>%
	summarise(total = n()) %>%
	arrange(desc(count))
	#
	# mutationsData_top %>%
	# spread(Hugo_Symbol, bcr_patient_barcode) -> mutationsData_top_sp

	as.data.table(mutationsData_top) -> mutationsData_top_DT
	dcast.data.table(mutationsData_top_DT, bcr_patient_barcode ~ Hugo_Symbol , fill = 0) %>%
	as.data.frame -> mutationsData_top_dcasted

	mutationsData_top_dcasted[,-1][mutationsData_top_dcasted[,-1] != "0"] <- 1

	mutationsData_top_dcasted -> result
	names(result) <- gsub(names(result),pattern = "-", replacement = "")
	result
	}


	extractCohortIntersection <- function(){

	data(package = "RTCGA.mutations")$results[,3] %>%
	gsub(".mutations", "", x = .) -> mutations_data
	data(package = "RTCGA.clinical")$results[,3] %>%
	gsub(".clinical", "", x = .) -> clinical_data

	intersect(mutations_data, clinical_data)
	}

	prepareCoxDataSplit <- function(mutationsData, survivalData, groups, seed = 4561){
	mutationsData %>%
	mutate(bcr_patient_barcode = substr(bcr_patient_barcode,1,12)) %>%
	left_join(survivalData,
	by = "bcr_patient_barcode") -> coxData

	coxData <- coxData[, -c(1,2)]

	coxData %>%
	filter(times > 0) %>%
	filter(!is.na(times)) -> coxData

	apply(coxData[,-c(1092, 1093)], MARGIN = 2,function(x){
	as.numeric(as.character(x))
	}) -> coxData[,-c(1092, 1093)]

	set.seed(seed)
	sample(groups, replace = TRUE, size = 6085) -> groups
	split(coxData, groups) #coxData_split
	}

	prepareForumlaSGD <- function(coxData){
	as.formula(paste("Surv(times, patient.vital_status) ~ ",
	paste(names(coxData[[1]])[-c(1092, 1093)],
	collapse="+"), collapse = ""))
	}


	full_cox_loglik_matrix <- function(beta, x, censored){
	order(x$times) -> order2
	x[order2, ] -> xORD
	censored[order2] -> censORD
	sum(censORD(beta%%x[, -which(names(x)=='times')] -
	log(cumsum(exp(beta1rev(x1) + beta2rev(x2))))))
	}



	library(dplyr)
	library(RTCGA.clinical)
	library(RTCGA.mutations)
	library(data.table)
	library(coxphSGD)
	library(archivist)
	(extractCohortIntersection() -> cohorts)
	head(extractSurvival(cohorts) -> survivalData)
	extractMutations(cohorts, 0.02) -> mutationsData
	mutationsData[1:8, c(1,4,56,100,207,801)]
	set.seed(4561)
	prepareCoxDataSplit(mutationsData,survivalData, groups = 100) -> coxData_split
	head(coxData_split[[1]][c(1,10), c(210,302,356,898,911,1092:1093)])
	prepareForumlaSGD(coxData_split) -> formulaSGD
	coxData_split[99:100] -> testCox
	coxData_split[1:98] -> trainCox
	coxphSGD(formulaSGD, data = trainCox, max.iter = 490) -> model_1_over_t
	coxphSGD(formulaSGD, data = trainCox, max.iter = 490,
	learningRates = function(t){1/(50*sqrt(t))}) -> model_1_over_50sqrt_t
	coxphSGD(formulaSGD, data = trainCox, max.iter = 490,
	learningRates = function(t){1/(100*sqrt(t))}) -> model_1_over_100sqrt_t
	saveToRepo(testCox)
	saveToRepo(trainCox)
	saveToRepo(model_1_over_t)
	saveToRepo(model_1_over_50sqrt_t)
	saveToRepo(model_1_over_100sqrt_t)
	model_1_over_t$coefficients[c(98, 196, 294, 392, 490)] -> coeffs_1_overt
	do.call(rbind, coeffs_1_overt) %>%
	as.data.frame %>%
	mutate(epoka = 1:5) %>%
	tidyr::gather(key=epoka) ->gat
	names(gat)[2]<- "gen"
	library(ggplot2)
	gat %>%
	ggplot(aes(value)) + geom_histogram(fill=4, col = 1, binwidth = 0.01) +
	facet_grid(epoka~.) +
	theme_classic(base_size = 15) +
	ylab('liczność') + xlab('wartości współczynników modelu') +
	ggtitle('Histogramy wartości współczynników modelu po kolejnych epokach') + coord_cartesian(xlim = c(-0.5,0.5)) -> fig1


	model_1_over_50sqrt_t$coefficients[c(98, 196, 294, 392, 490)] -> coeffs_over_50sqrt_t
	do.call(rbind, coeffs_over_50sqrt_t) %>%
	as.data.frame %>%
	mutate(epoka = 1:5) %>%
	tidyr::gather(key=epoka) ->gat2
	names(gat2)[2]<- "gen"
	library(ggplot2)
	gat2 %>%
	ggplot(aes(value)) + geom_histogram(fill=5, col = 1, binwidth = 0.01) +
	facet_grid(epoka~.) +
	theme_classic(base_size = 15) +
	ylab('liczność') + xlab('wartości współczynników modelu') +
	ggtitle('Histogramy wartości współczynników modelu po kolejnych epokach') + coord_cartesian(xlim = c(-0.25,0.25)) -> fig2

	model_1_over_100sqrt_t$coefficients[c(98, 196, 294, 392, 490)] -> coeffs_over_100sqrt_t
	do.call(rbind, coeffs_over_100sqrt_t) %>%
	as.data.frame %>%
	mutate(epoka = 1:5) %>%
	tidyr::gather(key=epoka) ->gat3
	names(gat3)[2]<- "gen"
	gat3 %>%
	ggplot(aes(value)) + geom_histogram(fill=6, col = 1, binwidth = 0.01) +
	facet_grid(epoka~.) +
	theme_classic(base_size = 15) +
	ylab('liczność') + xlab('wartości współczynników modelu') +
	ggtitle('Histogramy wartości współczynników modelu po kolejnych epokach') + coord_cartesian(xlim = c(-0.1,0.1)) -> fig3



	gat %>%
	mutate(model = "model1") %>%
	bind_rows(gat2 %>%
	mutate(model = "model2")) %>%
	bind_rows(gat3 %>%
	mutate(model = "model3")) %>%
	ggplot(aes(value)) + geom_histogram(fill=4, col = 1, binwidth = 0.001) +
	facet_grid(epoka~model) +
	theme_classic(base_size = 15) +
	ylab('liczność') + xlab('wartość współczynnika modelu') +
	ggtitle('Histogramy wartości współczynników modelu po kolejnych epokach') -> fig4

	do.call(rbind, coeffs_over_100sqrt_t) %>% t %>% as.data.frame -> model3
	do.call(rbind, coeffs_over_50sqrt_t) %>% t %>% as.data.frame -> model2
	do.call(rbind, coeffs_1_overt) %>% t %>% as.data.frame -> model1
	names(model3) <- c("epoka1", "epoka2","epoka3", "epoka4", "epoka5")
	names(model2) <- c("epoka1", "epoka2","epoka3", "epoka4", "epoka5")
	names(model1) <- c("epoka1", "epoka2","epoka3", "epoka4", "epoka5")

	library(GGally)
	ggpairs( title = 'Różnice między współczynnikami w kolejnych epokach',
	model1,
	upper = "blank",
	lower = list(continuous = "points")
	) -> fig5


	saveToRepo(fig1) # 3dc2e14a037bbaae9e892dd255150c28
	saveToRepo(fig2) # 6e5065abdbeae888a2836c1e59460424
	saveToRepo(fig3) # ac9a06e9c6428581cdacb2c1e56e5b57
	saveToRepo(fig4) # 60e327d310c600493c6c661f7330b868
	saveToRepo(fig5) # db5b77ff56b7128244e88d905173cf22

	pdf('hist_overt_t.pdf', width = 10, height = 8 )
	fig2
	dev.off()

	alink('3dc2e14a037bbaae9e892dd255150c28', repo = 'coxphSGD', user = 'MarcinKosinski', format = 'latex')
	alink('6e5065abdbeae888a2836c1e59460424', repo = 'coxphSGD', user = 'MarcinKosinski', format = 'latex')
	alink('ac9a06e9c6428581cdacb2c1e56e5b57', repo = 'coxphSGD', user = 'MarcinKosinski', format = 'latex')
	alink('60e327d310c600493c6c661f7330b868', repo = 'coxphSGD', user = 'MarcinKosinski', format = 'latex')
	alink('db5b77ff56b7128244e88d905173cf22', repo = 'coxphSGD', user = 'MarcinKosinski', format = 'latex')



	names(tail(sort(abs(tail(model_1_over_t$coefficients,1)[[1]])),40)) -> top40
	tail(model_1_over_t$coefficients,1)[[1]][which(names(tail(model_1_over_t$coefficients,1)[[1]]) %in% top40)] -> real_top40
	data.frame(beta = sort(real_top40, decreasing = TRUE),
	exp_beta = exp(sort(real_top40, decreasing = TRUE))) %>% xtable::xtable()


	do.call(rbind, trainCox) -> trainCox_df
	colMeans(trainCox_df[, -c(1092:1093)]) -> srednie_trainCox_df

	data.frame( beta =tail(model_1_over_t$coefficients,1)[[1]],
	beta_star = srednie_trainCox_df) -> wyniki

	data.frame( gen = rownames(wyniki),
	beta = round(wyniki[, 1], 3),
	exp_beta = round(exp(wyniki[, 1]),2),
	srednia = round(wyniki[, 2],3),
	beta_star = round(wyniki[, 1]/wyniki[, 2], 2) ) %>%
	arrange(desc(beta_star)) %>%
	.[1:40,] %>% xtable::xtable()


	aoptions('repoDir', getwd())
	loadFromLocalRepo('446ac4dcb7d65bf39057bb341b296f1a')

	do.call(rbind, model_1_over_t$coefficients) -> wspolczynniki
	qplot(1:491,wspolczynniki[ ,which(colnames(wspolczynniki) == "BRAF")]) +
	xlab('Krok algorytmu') +
	ylab('Wartość współczynnika') +
	theme_minimal(base_size = 20) +
	ggtitle('Wartości współczynnika genu BRAF') -> fig8
	qplot(1:491,wspolczynniki[ ,which(colnames(wspolczynniki) == "EGFR")]) +
	xlab('Krok algorytmu') +
	ylab('Wartość współczynnika') +
	theme_minimal(base_size = 20) +
	ggtitle('Wartości współczynnika genu EGFR') -> fig9

	saveToRepo(fig8) # 3cd8e1f6da766d3028fc2d8f5fef220f
	alink('3cd8e1f6da766d3028fc2d8f5fef220f', repo = 'coxphSGD', user = 'MarcinKosinski', format = 'latex')
	saveToRepo(fig9) # 0f5e8e306cc2cba11fceb2bb5f34cbeb
	alink('0f5e8e306cc2cba11fceb2bb5f34cbeb', repo = 'coxphSGD', user = 'MarcinKosinski', format = 'latex')

	pdf('fig8.pdf', width = 10, height = 8 )
	fig8
	dev.off()
	pdf('fig9.pdf', width = 10, height = 8 )
	fig9
	dev.off()

	library(archivist)
	setLocalRepo(getwd())
	loadFromLocalRepo('1a06bef4a60a237bb65ca3e2f3f23515')
	loadFromLocalRepo('3eebc99bd231b16a3ea4dbeec9ab5edb')

	do.call(rbind,testCox) -> testCox_binded
	do.call(rbind,trainCox) -> trainCox_binded
	library(dplyr)
	library(survMisc)
	trainCox_binded %>%
	select(patient.vital_status, times, BRAF, EGFR) %>%
	survfit( Surv(times, patient.vital_status) ~ BRAF, data = .) %>%
	survMisc:::autoplot.survfit( titleSize=12, type="CI", palette = "Set1",
	alpha = 0.7, survLineSize=2,
	pX = 0.3, sigP =3, title = "Przeżycie a mutacja genu BRAF" ) -> km_plot

	xlims <- c(0,5000)
	library(ggplot2)
	km_plot[[1]] <- km_plot[[1]] +
	coord_cartesian(xlim = c(xlims[1],xlims[2])) +
	theme_light(base_size = 22)
	km_plot[[2]] <- km_plot[[2]] +
	coord_cartesian(xlim = xlims)+
	theme_light(base_size = 22)
	pdf("BRAF.pdf", width = 10, height = 8)
	survMisc::autoplot(km_plot)
	dev.off()
No results found