Last active
April 12, 2018 11:36
-
-
Save AngelBerihuete/7e88d55845044ce04a9e61edcd5954f2 to your computer and use it in GitHub Desktop.
File to reproduce examples in paper
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Reproducible examples with rtip package | |
# --------------------------------------- | |
# Install from CRAN | |
# install.packages("rtip") | |
library(rtip) | |
set.seed(1972) | |
# Datasets | |
# -------- | |
# A synthetic dataset generated from real Austrian EU-SILC: | |
data(eusilc2) | |
head(eusilc2, 3) | |
# The Spanish National Statistics Institute (INE in spanish) | |
# release for the living conditions survey in 2014: | |
data(LCS2014) | |
head(LCS2014,3) | |
# Setup the dataset | |
# ----------------- | |
dataset <- setupDataset(eusilc2, country = "AT", region = NULL, | |
s = NULL, deflator = NULL, pppr = NULL) | |
head(dataset,3) | |
# dataset contains some transformed variables from the EU-SILC survey | |
# which permit users to calculate indicators and curves for inequality, welfare | |
# and poverty | |
# Indicators for poverty, inequality and welfare | |
# ---------------------------------------------- | |
## At-risk-of-poverty threshold: | |
arpt(dataset, ipuc = "ipuc", hhcsw = "DB090", hhsize = "HX040", | |
pz = 0.6, ci = NULL, rep = 500, verbose = FALSE) # Default values | |
## At-risk-of-poverty rate: | |
# Percentage of persons with an equivalised disposable income below 60% of the | |
# median equivalised disposable income: | |
arpt60 <- arpt(dataset) | |
arpr(dataset, arpt.value = arpt60) | |
arpr(dataset, arpt.value = arpt60, ci = 0.98) # confidence interval | |
# Percentage of persons with an equivalised disposable income below 40% of the | |
# median equivalised disposable income: | |
arpt40 <- arpt(dataset, pz = 0.4) | |
arpr(dataset, arpt.value = arpt40) | |
## Maximum of TIP curve: a measure of the intensity of poverty. | |
# It is equal to the mean poverty gap, if norm = FALSE, or the normalised mean | |
# poverty gap if norm = TRUE. | |
s1(dataset, arpt.value = arpt60, norm = TRUE) | |
## Other measurements: | |
# gini(dataset, ci = 0.99) | |
rmpg(dataset, ci = NULL) | |
# qsr(dataset, ci = 0.99) | |
# mip(dataset, ci = 0.99) | |
# mih(dataset, ci = c(0.90, 0.99)) | |
# miuc(dataset, ci = c(0.90, 0.99)) | |
# s2(dataset, arpt.value = arpt(dataset), norm = TRUE, ci = 0.99) | |
# Curves for inequality, welfare and poverty | |
# ------------------------------------------ | |
## Lorenz curve: | |
Burgenland <- setupDataset(eusilc2, country = "AT", region = "Burgenland") | |
lorenz_curve <- lc(Burgenland, generalized = FALSE, plot=TRUE) | |
# lorenz_curve is a data.frame containing abscissae, cumulated proportion of | |
# population (p), and ordinates, values of the Lorenz (if generalized=FALSE) | |
# or the Generalized Lorenz curve (if generalized=TRUE). | |
# The same plot as setting up plot=TRUE in lc function, but using an | |
# alternative theme in ggplot2: | |
p1 <- ggplot(lorenz_curve, aes(x.lg, y.lg)) + geom_line() + | |
geom_segment(aes(x = 0, y = 0, xend = 1, yend = 1), linetype = "dotted", color = "grey") + | |
scale_x_continuous(expression(p)) + | |
scale_y_continuous(expression(L(p))) + theme_bw() | |
print(p1) | |
## TIP curve: | |
tip_curve <- tip(Burgenland, arpt.value = arpt(Burgenland), norm = TRUE, plot = TRUE) | |
# tip_curve contains abscissae, cumulated proportion of population, and | |
# ordinates, values of normalised TIP curve (norm=TRUE). | |
# An alternative plot of normalised TIP curve: | |
p2 <- ggplot(tip_curve, aes(x.tip, y.tip)) + geom_line() + | |
scale_x_continuous(expression(p)) + | |
scale_y_continuous(expression(TIP(p, z))) + theme_bw() | |
print(p2) | |
# Dominance tests | |
# --------------- | |
## Generalized Lorenz dominance: | |
Burgenland <- setupDataset(eusilc2, country = "AT", region = "Burgenland") | |
Carinthia <- setupDataset(eusilc2, country = "AT", region = "Carinthia") | |
# To test the null hypothesis that the income distribution of Burgenland | |
# dominates the income distribution of Carinthia in the generalized Lorenz | |
# sense: | |
testGL(Burgenland, Carinthia, generalized = TRUE, samplesize = 10, alpha = 0.05) | |
# To test the null hypothesis that the income distribution of Carinthia | |
# dominates the income distribution of Burgenland in the Lorenz | |
# sense (generalized =FALSE): | |
testGL(Carinthia, Burgenland, generalized = FALSE, samplesize = 10, alpha = 0.05) | |
# Plotting Lorenz curves (generalized = FALSE) in the same graph adding | |
# 95% confidence intervals. | |
# Let's observe that the standard errors in this case are negligible and | |
# we cannot distinguish the confidence intervals in the graph. | |
Burgenland <- setupDataset(eusilc2, country = "AT", region = "Burgenland") | |
Carinthia <- setupDataset(eusilc2, country = "AT", region = "Carinthia") | |
Tyrol <- setupDataset(eusilc2, country = "AT", region = "Tyrol") | |
nsample = 10 | |
dat1 <- OmegaGL(Burgenland, samplesize = nsample, generalized = FALSE) | |
dat2 <- OmegaGL(Carinthia, samplesize = nsample, generalized = FALSE) | |
data1 <- as.data.frame(dat1[2:3]) | |
data2 <- as.data.frame(dat2[2:3]) | |
data1<-rbind(c(0,0), data1) | |
data2<-rbind(c(0,0), data2) | |
data1$se <- c(NA, sqrt(diag(dat1$Omega)), NA) | |
data2$se <- c(NA, sqrt(diag(dat2$Omega)), NA) | |
p3 <- ggplot(data = data1, aes(p, gl.curve, linetype = "Burgenland")) + | |
geom_line() + | |
geom_ribbon(aes(ymin=gl.curve-se*qnorm(0.975), ymax=gl.curve+se*qnorm(0.975)), fill = "grey70", alpha=0.3) + | |
geom_line(data = data2, aes(p, gl.curve, linetype = "Carinthia")) + | |
geom_ribbon(data = data2, aes(ymin=gl.curve-se*qnorm(0.975), ymax=gl.curve+se*qnorm(0.975)), fill = "grey70", alpha=0.3) + | |
geom_segment(aes(x = 0, y = 0, xend = 1, yend = 1), linetype = "dotted", color = "grey") + | |
scale_x_continuous(expression(p)) + | |
scale_y_continuous(expression(L(p))) + | |
scale_linetype_discrete(name = "Region") + | |
theme_bw() + | |
theme(legend.background = element_rect(colour = "black"), | |
legend.position=c(0.825,0.15)) | |
print(p3) | |
# A plot without confidence intervals will be better in this case: | |
p4 <- ggplot(data = data1, aes(p, gl.curve, linetype = "Burgenland")) + | |
geom_line() + | |
geom_line(data = data2, aes(p, gl.curve, linetype = "Carinthia")) + | |
geom_segment(aes(x = 0, y = 0, xend = 1, yend = 1), linetype = "dotted", color = "grey") + | |
scale_x_continuous(expression(p)) + | |
scale_y_continuous(expression(L(p))) + | |
scale_linetype_discrete(name = "Region") + | |
theme_bw() + | |
theme(legend.background = element_rect(colour = "black"), | |
legend.position=c(0.825,0.15)) | |
print(p4) | |
## TIP dominance | |
# The null hypothesis that the normalised TIP curve of Carinthia dominates the | |
# normalised TIP curve of Burgerland: | |
testTIP(Carinthia, Burgenland, norm = TRUE, samplesize = 50, alpha = 0.05) | |
# Plotting the TIP curves in the same graph: | |
tip1 <- tip(Burgenland, arpt.value = arpt(Burgenland), norm = TRUE, plot = FALSE) | |
tip2 <- tip(Carinthia, arpt.value = arpt(Carinthia), norm = TRUE, plot = FALSE) | |
p5 <- ggplot(data = tip1, aes(x.tip, y.tip, linetype = "Burgenland")) + | |
geom_line() + | |
geom_line(data = tip2, aes(x.tip, y.tip, linetype = "Carinthia")) + | |
scale_x_continuous(expression(p), limits = c(0, 0.3)) + | |
scale_y_continuous(expression(TIP(p,z[i]))) + | |
scale_linetype_discrete(name = "Region") + | |
theme_bw() + | |
theme(legend.background = element_rect(colour = "black"), | |
legend.position=c(0.825,0.15)) | |
print(p5) | |
## Testing samplesize testGL | |
testGL(Carinthia, Burgenland, samplesize = 80) | |
testGL(Carinthia, Tyrol, samplesize = 80) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment