Dr Nathan Green n8thangreen

Senior Researcher in Statistics and Health Economics in the Department of Statistical Science @stats-ucl.

n8thangreen / plot_CI_Natsal.R

Last active December 15, 2016 09:53

plot confidence interval plots of Natsal-3 variables against testing proportions

	library("plotrix", lib.loc="C:/Program Files/R/R-3.1.0/library")
	library(Hmisc)

	NATSAL.dat <<- read.csv("~/Chlamydia/data/NATSAL/Natsal-3_extract_2April2014.csv")


	plotCInatsal <- function(varname, save=TRUE){

	tab <- table(NATSAL.dat$cttestly=="yes", NATSAL.dat[,varname])
	tab.df <- data.frame(addmargins(tab))

n8thangreen / forestplot.R

Last active December 15, 2016 09:54

forest plot horizontal lines grouped

	library("rmeta", lib.loc="C:/Program Files/R/R-3.1.0/library")

	names <- c("Premature", "Not premature",
	NA,"Surgical","Not surgical",
	NA,"Congenital","Not congenital",
	NA,"Catheter","Not catheter",
	NA,"Other","Not other",
	NA,"Tai","Not Tai",
	NA,"Cancer","Not cancer")

n8thangreen / ReadinExcelWorkbook.R

Last active December 15, 2016 09:54

Read-in the whole of an Excel workbook and then extract whichever fields wanted

	## read from Excel workbook
	require(XLConnect)
	wb = loadWorkbook("C:/Users/ngreen1/Documents/IDEA/raw_data/TB_database_patientdata_300614.xlsx")
	tab.names <- getSheets(wb)
	TBlist <- sapply(1:length(tab.names), function(x) readWorksheet(wb, sheet=x, header=TRUE))
	names(TBlist) <- tab.names

	## extract only columns of interest
	require(plyr)
	extractNames <- readLines("C:/Users/ngreen1/Documents/IDEA/raw_data/relevant_fields.csv")

n8thangreen / covariate shift.R

Last active May 28, 2018 01:53

empirical and model based (logistic) training sample adjustment


	covariateShift <- function(data, resla, riskfac, ssize=10000){
	## importance sampling approach
	## when different distributions for the
	## training and test data

	require(plyr)

	Natsal.riskfac.table <- DistnTable(data, riskfac)
	Natsal.riskfac.table <- colNameReplace(Natsal.riskfac.table, "(all)", "Natsalfreq")

n8thangreen / TB simple regression.R

Last active December 15, 2016 09:54

	\documentclass{article}
	\usepackage[margin=0.5in]{geometry}

	\begin{document}
	\Sexpr{cat(sub("_", "\\\\\\_", version$platform))}

	% !Rnw root = TB_root.Rnw

	%% some of the output is missing when compiled to PDF
	%% when the regression is fit to models with lots of variable levels

n8thangreen / map_tb.R

Last active December 15, 2016 09:55

Plot UK choropleth maps developed using LA and ETS data but can be applied more generally

	map.tb <- function(map.data, g=NA, brks=NA, title="", value="tb", file=FALSE){
	## plot UK choropleth maps
	## developed using LA and ETS data but
	## can be applied more generally
	##
	## map.data: map object loaded by readOGR()
	## g: number of groups for values; max(g)=9
	## brks: given break points to group values
	## title: plot title
	## value: variable name of colouring value (string)

n8thangreen / Esther's LoS plotting script.R

Last active December 15, 2016 09:54

Length of stay plotting script

	data_long = melt(CDIdata, id.vars=c("rowID", "ribotype", "severe", "LoS.total", "LoS.preinf", "LoS.postinf", "discharge.status"))
	data_long$value = as.numeric(as.character(data_long$value))

	# Change discharge to death in hospital
	data_long$variable = factor(data_long$variable, levels=c(unique(as.character(data_long$variable)), "Death in hospital"))
	d = data_long$variable=="Discharged" & data_long$discharge.status == "3"
	data_long$variable[d] = "Death in hospital"

	#write.csv(data_long, file="data_long.csv")

n8thangreen / probs.logit.covs

Created November 11, 2013 14:17

Vector of conditional probabilities from a multinomial logistic regression given particular covariate values

	probs.logit_covs <- function(groups, cov, data){
	##
	## given a (set of) covariate values, calculates the class membership probabilities
	##
	## groups: response field name of interest in NATSAL e.g. het1yr (string)
	## cov: explanatory covariate name e.g. age_shift (string)
	## data: e.g. NATSAL.dat (array)

	## ordered sequence of covariate levels
	## copy those covered in data but could include intermediate values too

n8thangreen / probs.logit

Created November 11, 2013 14:15

Stochastic array of multinomial logistic regression parameter fits

	probs.logit <- function(grps.seq, cov, coeff, x){
	##
	## multinomial logistic regression model fit for a given covariate
	## e.g. probability sums per age =1
	##
	## x: covariate value e.g. age=16
	## grps: vector of class labels for NATSAL groups e.g. het5yrs
	## cov: covariate name to regress against (string)
	## coeff: coefficients from the mlogit() fit

n8thangreen / multinom_lik

Created November 11, 2013 14:14

Likelihood calculation using a multinomial logistic regression

	multinom_loglik <- function(props, size.melt, cov){
	##
	## multinomial logistic regression likelihood
	## over ALL groups in one batch
	##
	## props: array of covariate by group probabilities
	## size.melt: e.g. agesize.melt
	## cov: covariate name (string) e.g. "age"

	ncol.size.melt <- ncol(size.melt)