jobonaf · April 19, 2021 16:00
diff --git a/mean-vs-rankN.R b/mean-vs-rankN.R

 mean_vs_rankN <- function(poll = "PM10",
 			  nrank = 36,
 			  threshold = 50,
 			  fy = 2009,
 			  ly = 2018) {
 	library(dplyr)

 	Dat <- readRDS(paste0("data/",poll,"_daily_",fy,"-",ly,".rds"))
 	Dat %>%
 	group_by(Year=format(Time,"%Y"),Point) %>%
 	arrange(desc(Value), .by_group=T) %>%
 	summarize(Y_ave=mean(Value,na.rm=T),
 		  nValid=sum(!is.na(Value)),
 		  Y_rank=nth(Value,nrank)) %>%
 	filter(nValid>=365*0.9) %>%
 	select(-nValid) %>%
 	ungroup()-> dat

 	fit <- lm(data=dat, formula=Y_ave~Y_rank)
 	thr <- predict(newdata=data.frame(Y_rank=threshold), object=fit)
 	ii <- signif(fit$coefficients[1],3) ; names(ii) <- NULL
 	ss <- signif(fit$coefficients[2],3) ; names(ss) <- NULL
 	ll <- bquote("best fit:" ~ .(poll)["y.ave"] %~~% .(ii) ~ 
 		     "+" ~ .(ss) %.% .(poll)["rank"*.(nrank)])

 	source("/u/arpa/bonafeg/src/rutilante/R/gg_themes.R")
 	library(ggplot2)
 	library(scales)
 	ggplot(data=dat, aes(x=Y_rank, y=Y_ave)) +
 	geom_point(col="grey70") +
 	scale_x_continuous(limits=c(0,max(c(dat$Y_rank,threshold),na.rm=T))) +
 	scale_y_continuous(limits=c(0,max(c(dat$Y_ave,thr),na.rm=T))) +
 	geom_abline(slope=fit$coefficients[2],
 		    intercept=fit$coefficients[1],
 		    lty=2,col=muted("orange")) +
 	geom_vline(xintercept=threshold,col=muted("blue")) +
 	geom_hline(yintercept=thr,col=muted("blue")) +
 	annotate("text",x=0,y=thr*1.01,label=signif(thr,3),
 		 vjust=0,family="Decima WE",col=muted("blue")) +
 	annotate("text", y=0, x=threshold*1.01, label=threshold,
 		 hjust=0,vjust=1,family="Decima WE",col=muted("blue")) +
 	theme_fvg() +
 	ggtitle(paste0(poll, " (",fy,"-",ly,")"),
 		subtitle=ll) +
 	theme(panel.grid=element_blank()) +
 	xlab(bquote(.(poll)["rank"*.(nrank)] ~ (mu*g/m^3))) +
 	ylab(bquote(.(poll)["y.ave"] ~ (mu*g/m^3))) -> p
 	
 	ggsave_fvg(p, 
 	           filename=paste0(poll,".mean-vs-rank",nrank,
 	                           ".",fy,"-",ly,".pdf"),
 	           width=5, height=5)
 	saveRDS(list(fitted_model=fit, N=nrank, thresholds_Nth=threshold, thresholds_yave=unname(thr)), 
 	           file=paste0(poll,".mean-vs-rank",nrank,".",fy,"-",ly,".rds"))
 }
 mean_vs_rankN(poll = "PM10", nrank = 36, threshold = 50, fy = 2009, ly = 2018) 
 mean_vs_rankN(poll = "PM10", nrank = 4, threshold = c(150,100,75,50), fy = 2009, ly = 2018) 
 mean_vs_rankN(poll = "PM2.5", nrank = 4, threshold = c(75,50,37.5,25), fy = 2009, ly = 2018)

	mean_vs_rankN <- function(poll = "PM10",
	nrank = 36,
	threshold = 50,
	fy = 2009,
	ly = 2018) {
	library(dplyr)

	Dat <- readRDS(paste0("data/",poll,"_daily_",fy,"-",ly,".rds"))
	Dat %>%
	group_by(Year=format(Time,"%Y"),Point) %>%
	arrange(desc(Value), .by_group=T) %>%
	summarize(Y_ave=mean(Value,na.rm=T),
	nValid=sum(!is.na(Value)),
	Y_rank=nth(Value,nrank)) %>%
	filter(nValid>=365*0.9) %>%
	select(-nValid) %>%
	ungroup()-> dat

	fit <- lm(data=dat, formula=Y_ave~Y_rank)
	thr <- predict(newdata=data.frame(Y_rank=threshold), object=fit)
	ii <- signif(fit$coefficients[1],3) ; names(ii) <- NULL
	ss <- signif(fit$coefficients[2],3) ; names(ss) <- NULL
	ll <- bquote("best fit:" ~ .(poll)["y.ave"] %~~% .(ii) ~
	"+" ~ .(ss) %.% .(poll)["rank"*.(nrank)])

	source("/u/arpa/bonafeg/src/rutilante/R/gg_themes.R")
	library(ggplot2)
	library(scales)
	ggplot(data=dat, aes(x=Y_rank, y=Y_ave)) +
	geom_point(col="grey70") +
	scale_x_continuous(limits=c(0,max(c(dat$Y_rank,threshold),na.rm=T))) +
	scale_y_continuous(limits=c(0,max(c(dat$Y_ave,thr),na.rm=T))) +
	geom_abline(slope=fit$coefficients[2],
	intercept=fit$coefficients[1],
	lty=2,col=muted("orange")) +
	geom_vline(xintercept=threshold,col=muted("blue")) +
	geom_hline(yintercept=thr,col=muted("blue")) +
	annotate("text",x=0,y=thr*1.01,label=signif(thr,3),
	vjust=0,family="Decima WE",col=muted("blue")) +
	annotate("text", y=0, x=threshold*1.01, label=threshold,
	hjust=0,vjust=1,family="Decima WE",col=muted("blue")) +
	theme_fvg() +
	ggtitle(paste0(poll, " (",fy,"-",ly,")"),
	subtitle=ll) +
	theme(panel.grid=element_blank()) +
	xlab(bquote(.(poll)["rank".(nrank)] ~ (mug/m^3))) +
	ylab(bquote(.(poll)["y.ave"] ~ (mu*g/m^3))) -> p

	ggsave_fvg(p,
	filename=paste0(poll,".mean-vs-rank",nrank,
	".",fy,"-",ly,".pdf"),
	width=5, height=5)
	saveRDS(list(fitted_model=fit, N=nrank, thresholds_Nth=threshold, thresholds_yave=unname(thr)),
	file=paste0(poll,".mean-vs-rank",nrank,".",fy,"-",ly,".rds"))
	}
	mean_vs_rankN(poll = "PM10", nrank = 36, threshold = 50, fy = 2009, ly = 2018)
	mean_vs_rankN(poll = "PM10", nrank = 4, threshold = c(150,100,75,50), fy = 2009, ly = 2018)
	mean_vs_rankN(poll = "PM2.5", nrank = 4, threshold = c(75,50,37.5,25), fy = 2009, ly = 2018)