Skip to content

Instantly share code, notes, and snippets.

@actuaryactually
Created January 24, 2017 18:04
Show Gist options
  • Save actuaryactually/419806f8f9a9f0062e833b4205db2c31 to your computer and use it in GitHub Desktop.
Save actuaryactually/419806f8f9a9f0062e833b4205db2c31 to your computer and use it in GitHub Desktop.
i'm a cowboy
#preliminaries:
wkg.dir = "C:/OldWorld/Blog/DataWrangling" # define where we'll work, save files, etc. for this post
setwd(wkg.dir)
#Install a helper piece of code (pacman) and then load required code libraries in one-go:
install.packages("pacman")
pacman::p_load(ChainLadder,ggplot2,data.table,tidyr)
#Step 1: load some data and view
RAA
plot(RAA/1e3,lattice=TRUE, main="RAA Data by Accident Year (USD000s)",xlab="Dev Yrs")
#Step 1b: additional output needed for blog only; commented out in production copy on GitHub:
png("RAA.Dev.png")
plot(RAA/1e3,lattice=TRUE, main="RAA Data by Accident Year (USD000s)",xlab="Dev Yrs")
dev.off()
#Step 2: Manuipulation of data
#zeroes begone...
print(RAA, na.print="")
#convert to incremental
( x = cum2incr(RAA) )
print(x, na.print="")
( plot.x = plot(x/1e3,lattice=TRUE, main="RAA Incremental Data by Accident Year (USD000s)",xlab="Dev Yrs") )
#for blog outputs only - dropped in final post to site:
png("RAA.IncrementalDev.png")
plot.x
dev.off()
#convert incremental to cumulative:
( y = incr2cum(x) )
#convert triangles to flat data format
(pivot.format = as.data.frame(RAA))
#check - that our manipulations all tie up - i.e. this should return a triangle of zeroes, which it does:
( RAA-y )
# Step 3: Finesse with ggplot
#Let's download some example auto claims data taken from CAS website:
#the following line is commented out. In principle, it should allow directly download from the website into R.
#however, I have left it comments, as you may get identified (wrongly) as a bot by CAS website.
#As a result, it's easier to manually download the file to a local drive and proceed as shown:
#mydat = fread('http://www.casact.org/research/reserve_data/ppauto_pos.csv')
mydat=read.csv("ppauto_pos.csv") #assuming you've saved the csv to the wkg.dir location
#Home-in on one company and pluck-out columns of interest:
StateFarm = subset(mydat,subset = mydat$GRNAME=="State Farm Mut Grp")
StateFarm.Data = StateFarm[,c(2:3,5:8,11)]
StateFarm.Data$AccidentYear = as.numeric(StateFarm.Data$AccidentYear)
StateFarm.Data$DevelopmentLag = as.numeric(StateFarm.Data$DevelopmentLag)
head(StateFarm.Data) # check results - this has the minimum we need, shows first 6 rows only
#now we convert this into a format that ChainLadder likes...
(paid.tri = as.triangle(StateFarm.Data,origin="AccidentYear",dev="DevelopmentLag",value="CumPaidLoss_B"))
(inc.tri = as.triangle(StateFarm.Data,origin="AccidentYear",dev="DevelopmentLag",value="IncurLoss_B"))
#lastly overwrite data values so it appears as if we're working with data as at 1997 year end - i.e. the normal situation
paid.tri[row(paid.tri)+col(paid.tri)>nrow(paid.tri)+1] = NA
inc.tri[row(inc.tri)+col(inc.tri)>nrow(inc.tri)+1] = NA
#convert triangles to frame format and purge N/As
paid.frame = as.data.frame(paid.tri)
paid.frame$type = "paid"
paid.frame = subset(paid.frame,subset = paid.frame$value!="NA")
inc.frame = as.data.frame(inc.tri)
inc.frame$type = "incurred"
inc.frame = subset(inc.frame,subset = inc.frame$value!="NA")
#in practical situations we are likely to have a view on the ultimate loss cost. FOr this blogpost, let's suppose that we want to run with a 90% loss ratio for all years
ULR = 0.9
net.prem = unique(StateFarm.Data$EarnedPremNet_B)
net.ult = data.frame(cbind(AccidentYear=1988:1997,Ult=ULR*net.prem))
#create a frame showing the ultimates is the same format as the paid and incurred information - good tip here on vlookup approach, https://www.rforexcelusers.com/vlookup-in-r/
ult.frame = merge(net.ult[, c("AccidentYear", "Ult")],
inc.frame[, c("AccidentYear", "DevelopmentLag","type")])
#reorder and rename columns, then override the type
ult.frame = ult.frame[,c(1,3,2,4)]
colnames(ult.frame) = colnames(inc.frame)
ult.frame$type = "Ult"
ult.frame = ult.frame[ order(ult.frame$AccidentYear, ult.frame$DevelopmentLag), ]
#combine the three frames together:
data.combined = data.frame(rbind(paid.frame,inc.frame,ult.frame))
#clean up temp variables
rm(paid.frame,inc.frame,ult.frame)
#now plot (finally!)
p = ggplot(data=data.combined, aes(x=DevelopmentLag, y = value/1e6, color=type)) +
geom_point()+geom_line() + facet_wrap(~AccidentYear)
#ok, let's improve the output in a few ways
#axes limits and ticks
p = p+ylim(0,20)
p = p+ scale_x_continuous(breaks=seq(0, 10, 2)) # Ticks from 0-10, every 2
p = p+labs(title = "State Farm Mutual - Motor Loss Development",
subtitle = "Incurred losses show downward development, highlighting redundancy of case reserves",
caption = "Source: http://www.casact.org/research/reserve_data/ppauto_pos.csv",
x = "Development Yr", y = "USDm")
p = p+theme(axis.text=element_text(size=8),
axis.title=element_text(size=10,face="italic"))
#color scale
p = p+ scale_colour_brewer(palette = "Dark2") #other options are available here: https://www.nceas.ucsb.edu/~frazier/RSpatialGuides/colorPaletteCheatsheet.pdf
#write to LAN:
png("Prettier_Output.png")
p
dev.off()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment