Skip to content

Instantly share code, notes, and snippets.

@AngelBerihuete
Last active April 12, 2018 11:36
Show Gist options
  • Save AngelBerihuete/7e88d55845044ce04a9e61edcd5954f2 to your computer and use it in GitHub Desktop.
Save AngelBerihuete/7e88d55845044ce04a9e61edcd5954f2 to your computer and use it in GitHub Desktop.
File to reproduce examples in paper
# Reproducible examples with rtip package
# ---------------------------------------
# Install from CRAN
# install.packages("rtip")
library(rtip)
set.seed(1972)
# Datasets
# --------
# A synthetic dataset generated from real Austrian EU-SILC:
data(eusilc2)
head(eusilc2, 3)
# The Spanish National Statistics Institute (INE in spanish)
# release for the living conditions survey in 2014:
data(LCS2014)
head(LCS2014,3)
# Setup the dataset
# -----------------
dataset <- setupDataset(eusilc2, country = "AT", region = NULL,
s = NULL, deflator = NULL, pppr = NULL)
head(dataset,3)
# dataset contains some transformed variables from the EU-SILC survey
# which permit users to calculate indicators and curves for inequality, welfare
# and poverty
# Indicators for poverty, inequality and welfare
# ----------------------------------------------
## At-risk-of-poverty threshold:
arpt(dataset, ipuc = "ipuc", hhcsw = "DB090", hhsize = "HX040",
pz = 0.6, ci = NULL, rep = 500, verbose = FALSE) # Default values
## At-risk-of-poverty rate:
# Percentage of persons with an equivalised disposable income below 60% of the
# median equivalised disposable income:
arpt60 <- arpt(dataset)
arpr(dataset, arpt.value = arpt60)
arpr(dataset, arpt.value = arpt60, ci = 0.98) # confidence interval
# Percentage of persons with an equivalised disposable income below 40% of the
# median equivalised disposable income:
arpt40 <- arpt(dataset, pz = 0.4)
arpr(dataset, arpt.value = arpt40)
## Maximum of TIP curve: a measure of the intensity of poverty.
# It is equal to the mean poverty gap, if norm = FALSE, or the normalised mean
# poverty gap if norm = TRUE.
s1(dataset, arpt.value = arpt60, norm = TRUE)
## Other measurements:
# gini(dataset, ci = 0.99)
rmpg(dataset, ci = NULL)
# qsr(dataset, ci = 0.99)
# mip(dataset, ci = 0.99)
# mih(dataset, ci = c(0.90, 0.99))
# miuc(dataset, ci = c(0.90, 0.99))
# s2(dataset, arpt.value = arpt(dataset), norm = TRUE, ci = 0.99)
# Curves for inequality, welfare and poverty
# ------------------------------------------
## Lorenz curve:
Burgenland <- setupDataset(eusilc2, country = "AT", region = "Burgenland")
lorenz_curve <- lc(Burgenland, generalized = FALSE, plot=TRUE)
# lorenz_curve is a data.frame containing abscissae, cumulated proportion of
# population (p), and ordinates, values of the Lorenz (if generalized=FALSE)
# or the Generalized Lorenz curve (if generalized=TRUE).
# The same plot as setting up plot=TRUE in lc function, but using an
# alternative theme in ggplot2:
p1 <- ggplot(lorenz_curve, aes(x.lg, y.lg)) + geom_line() +
geom_segment(aes(x = 0, y = 0, xend = 1, yend = 1), linetype = "dotted", color = "grey") +
scale_x_continuous(expression(p)) +
scale_y_continuous(expression(L(p))) + theme_bw()
print(p1)
## TIP curve:
tip_curve <- tip(Burgenland, arpt.value = arpt(Burgenland), norm = TRUE, plot = TRUE)
# tip_curve contains abscissae, cumulated proportion of population, and
# ordinates, values of normalised TIP curve (norm=TRUE).
# An alternative plot of normalised TIP curve:
p2 <- ggplot(tip_curve, aes(x.tip, y.tip)) + geom_line() +
scale_x_continuous(expression(p)) +
scale_y_continuous(expression(TIP(p, z))) + theme_bw()
print(p2)
# Dominance tests
# ---------------
## Generalized Lorenz dominance:
Burgenland <- setupDataset(eusilc2, country = "AT", region = "Burgenland")
Carinthia <- setupDataset(eusilc2, country = "AT", region = "Carinthia")
# To test the null hypothesis that the income distribution of Burgenland
# dominates the income distribution of Carinthia in the generalized Lorenz
# sense:
testGL(Burgenland, Carinthia, generalized = TRUE, samplesize = 10, alpha = 0.05)
# To test the null hypothesis that the income distribution of Carinthia
# dominates the income distribution of Burgenland in the Lorenz
# sense (generalized =FALSE):
testGL(Carinthia, Burgenland, generalized = FALSE, samplesize = 10, alpha = 0.05)
# Plotting Lorenz curves (generalized = FALSE) in the same graph adding
# 95% confidence intervals.
# Let's observe that the standard errors in this case are negligible and
# we cannot distinguish the confidence intervals in the graph.
Burgenland <- setupDataset(eusilc2, country = "AT", region = "Burgenland")
Carinthia <- setupDataset(eusilc2, country = "AT", region = "Carinthia")
Tyrol <- setupDataset(eusilc2, country = "AT", region = "Tyrol")
nsample = 10
dat1 <- OmegaGL(Burgenland, samplesize = nsample, generalized = FALSE)
dat2 <- OmegaGL(Carinthia, samplesize = nsample, generalized = FALSE)
data1 <- as.data.frame(dat1[2:3])
data2 <- as.data.frame(dat2[2:3])
data1<-rbind(c(0,0), data1)
data2<-rbind(c(0,0), data2)
data1$se <- c(NA, sqrt(diag(dat1$Omega)), NA)
data2$se <- c(NA, sqrt(diag(dat2$Omega)), NA)
p3 <- ggplot(data = data1, aes(p, gl.curve, linetype = "Burgenland")) +
geom_line() +
geom_ribbon(aes(ymin=gl.curve-se*qnorm(0.975), ymax=gl.curve+se*qnorm(0.975)), fill = "grey70", alpha=0.3) +
geom_line(data = data2, aes(p, gl.curve, linetype = "Carinthia")) +
geom_ribbon(data = data2, aes(ymin=gl.curve-se*qnorm(0.975), ymax=gl.curve+se*qnorm(0.975)), fill = "grey70", alpha=0.3) +
geom_segment(aes(x = 0, y = 0, xend = 1, yend = 1), linetype = "dotted", color = "grey") +
scale_x_continuous(expression(p)) +
scale_y_continuous(expression(L(p))) +
scale_linetype_discrete(name = "Region") +
theme_bw() +
theme(legend.background = element_rect(colour = "black"),
legend.position=c(0.825,0.15))
print(p3)
# A plot without confidence intervals will be better in this case:
p4 <- ggplot(data = data1, aes(p, gl.curve, linetype = "Burgenland")) +
geom_line() +
geom_line(data = data2, aes(p, gl.curve, linetype = "Carinthia")) +
geom_segment(aes(x = 0, y = 0, xend = 1, yend = 1), linetype = "dotted", color = "grey") +
scale_x_continuous(expression(p)) +
scale_y_continuous(expression(L(p))) +
scale_linetype_discrete(name = "Region") +
theme_bw() +
theme(legend.background = element_rect(colour = "black"),
legend.position=c(0.825,0.15))
print(p4)
## TIP dominance
# The null hypothesis that the normalised TIP curve of Carinthia dominates the
# normalised TIP curve of Burgerland:
testTIP(Carinthia, Burgenland, norm = TRUE, samplesize = 50, alpha = 0.05)
# Plotting the TIP curves in the same graph:
tip1 <- tip(Burgenland, arpt.value = arpt(Burgenland), norm = TRUE, plot = FALSE)
tip2 <- tip(Carinthia, arpt.value = arpt(Carinthia), norm = TRUE, plot = FALSE)
p5 <- ggplot(data = tip1, aes(x.tip, y.tip, linetype = "Burgenland")) +
geom_line() +
geom_line(data = tip2, aes(x.tip, y.tip, linetype = "Carinthia")) +
scale_x_continuous(expression(p), limits = c(0, 0.3)) +
scale_y_continuous(expression(TIP(p,z[i]))) +
scale_linetype_discrete(name = "Region") +
theme_bw() +
theme(legend.background = element_rect(colour = "black"),
legend.position=c(0.825,0.15))
print(p5)
## Testing samplesize testGL
testGL(Carinthia, Burgenland, samplesize = 80)
testGL(Carinthia, Tyrol, samplesize = 80)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment