Created
January 24, 2017 18:04
-
-
Save actuaryactually/419806f8f9a9f0062e833b4205db2c31 to your computer and use it in GitHub Desktop.
i'm a cowboy
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#preliminaries: | |
wkg.dir = "C:/OldWorld/Blog/DataWrangling" # define where we'll work, save files, etc. for this post | |
setwd(wkg.dir) | |
#Install a helper piece of code (pacman) and then load required code libraries in one-go: | |
install.packages("pacman") | |
pacman::p_load(ChainLadder,ggplot2,data.table,tidyr) | |
#Step 1: load some data and view | |
RAA | |
plot(RAA/1e3,lattice=TRUE, main="RAA Data by Accident Year (USD000s)",xlab="Dev Yrs") | |
#Step 1b: additional output needed for blog only; commented out in production copy on GitHub: | |
png("RAA.Dev.png") | |
plot(RAA/1e3,lattice=TRUE, main="RAA Data by Accident Year (USD000s)",xlab="Dev Yrs") | |
dev.off() | |
#Step 2: Manuipulation of data | |
#zeroes begone... | |
print(RAA, na.print="") | |
#convert to incremental | |
( x = cum2incr(RAA) ) | |
print(x, na.print="") | |
( plot.x = plot(x/1e3,lattice=TRUE, main="RAA Incremental Data by Accident Year (USD000s)",xlab="Dev Yrs") ) | |
#for blog outputs only - dropped in final post to site: | |
png("RAA.IncrementalDev.png") | |
plot.x | |
dev.off() | |
#convert incremental to cumulative: | |
( y = incr2cum(x) ) | |
#convert triangles to flat data format | |
(pivot.format = as.data.frame(RAA)) | |
#check - that our manipulations all tie up - i.e. this should return a triangle of zeroes, which it does: | |
( RAA-y ) | |
# Step 3: Finesse with ggplot | |
#Let's download some example auto claims data taken from CAS website: | |
#the following line is commented out. In principle, it should allow directly download from the website into R. | |
#however, I have left it comments, as you may get identified (wrongly) as a bot by CAS website. | |
#As a result, it's easier to manually download the file to a local drive and proceed as shown: | |
#mydat = fread('http://www.casact.org/research/reserve_data/ppauto_pos.csv') | |
mydat=read.csv("ppauto_pos.csv") #assuming you've saved the csv to the wkg.dir location | |
#Home-in on one company and pluck-out columns of interest: | |
StateFarm = subset(mydat,subset = mydat$GRNAME=="State Farm Mut Grp") | |
StateFarm.Data = StateFarm[,c(2:3,5:8,11)] | |
StateFarm.Data$AccidentYear = as.numeric(StateFarm.Data$AccidentYear) | |
StateFarm.Data$DevelopmentLag = as.numeric(StateFarm.Data$DevelopmentLag) | |
head(StateFarm.Data) # check results - this has the minimum we need, shows first 6 rows only | |
#now we convert this into a format that ChainLadder likes... | |
(paid.tri = as.triangle(StateFarm.Data,origin="AccidentYear",dev="DevelopmentLag",value="CumPaidLoss_B")) | |
(inc.tri = as.triangle(StateFarm.Data,origin="AccidentYear",dev="DevelopmentLag",value="IncurLoss_B")) | |
#lastly overwrite data values so it appears as if we're working with data as at 1997 year end - i.e. the normal situation | |
paid.tri[row(paid.tri)+col(paid.tri)>nrow(paid.tri)+1] = NA | |
inc.tri[row(inc.tri)+col(inc.tri)>nrow(inc.tri)+1] = NA | |
#convert triangles to frame format and purge N/As | |
paid.frame = as.data.frame(paid.tri) | |
paid.frame$type = "paid" | |
paid.frame = subset(paid.frame,subset = paid.frame$value!="NA") | |
inc.frame = as.data.frame(inc.tri) | |
inc.frame$type = "incurred" | |
inc.frame = subset(inc.frame,subset = inc.frame$value!="NA") | |
#in practical situations we are likely to have a view on the ultimate loss cost. FOr this blogpost, let's suppose that we want to run with a 90% loss ratio for all years | |
ULR = 0.9 | |
net.prem = unique(StateFarm.Data$EarnedPremNet_B) | |
net.ult = data.frame(cbind(AccidentYear=1988:1997,Ult=ULR*net.prem)) | |
#create a frame showing the ultimates is the same format as the paid and incurred information - good tip here on vlookup approach, https://www.rforexcelusers.com/vlookup-in-r/ | |
ult.frame = merge(net.ult[, c("AccidentYear", "Ult")], | |
inc.frame[, c("AccidentYear", "DevelopmentLag","type")]) | |
#reorder and rename columns, then override the type | |
ult.frame = ult.frame[,c(1,3,2,4)] | |
colnames(ult.frame) = colnames(inc.frame) | |
ult.frame$type = "Ult" | |
ult.frame = ult.frame[ order(ult.frame$AccidentYear, ult.frame$DevelopmentLag), ] | |
#combine the three frames together: | |
data.combined = data.frame(rbind(paid.frame,inc.frame,ult.frame)) | |
#clean up temp variables | |
rm(paid.frame,inc.frame,ult.frame) | |
#now plot (finally!) | |
p = ggplot(data=data.combined, aes(x=DevelopmentLag, y = value/1e6, color=type)) + | |
geom_point()+geom_line() + facet_wrap(~AccidentYear) | |
#ok, let's improve the output in a few ways | |
#axes limits and ticks | |
p = p+ylim(0,20) | |
p = p+ scale_x_continuous(breaks=seq(0, 10, 2)) # Ticks from 0-10, every 2 | |
p = p+labs(title = "State Farm Mutual - Motor Loss Development", | |
subtitle = "Incurred losses show downward development, highlighting redundancy of case reserves", | |
caption = "Source: http://www.casact.org/research/reserve_data/ppauto_pos.csv", | |
x = "Development Yr", y = "USDm") | |
p = p+theme(axis.text=element_text(size=8), | |
axis.title=element_text(size=10,face="italic")) | |
#color scale | |
p = p+ scale_colour_brewer(palette = "Dark2") #other options are available here: https://www.nceas.ucsb.edu/~frazier/RSpatialGuides/colorPaletteCheatsheet.pdf | |
#write to LAN: | |
png("Prettier_Output.png") | |
p | |
dev.off() | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment