AngelBerihuete · April 12, 2018 11:36
diff --git a/rtip-examples.R b/rtip-examples.R
 # Reproducible examples with rtip package
 # ---------------------------------------

 # Install from CRAN
 # install.packages("rtip")

 library(rtip)
 set.seed(1972)

 # Datasets
 # --------

 # A synthetic dataset generated from real Austrian EU-SILC:

 data(eusilc2)
 head(eusilc2, 3)

 # The Spanish National Statistics Institute (INE in spanish)
 # release for the living conditions survey in 2014:

 data(LCS2014)
 head(LCS2014,3)

 # Setup the dataset
 # -----------------

 dataset <- setupDataset(eusilc2, country = "AT", region = NULL,
                        s = NULL, deflator = NULL, pppr = NULL)
 head(dataset,3)

 # dataset contains some transformed variables from the EU-SILC survey
 # which permit users to calculate indicators and curves for inequality, welfare
 # and poverty


 # Indicators for poverty, inequality and welfare
 # ----------------------------------------------

 ## At-risk-of-poverty threshold:

 arpt(dataset, ipuc = "ipuc", hhcsw = "DB090", hhsize = "HX040",
     pz = 0.6, ci = NULL, rep = 500, verbose = FALSE) # Default values

 ## At-risk-of-poverty rate:

 # Percentage of persons with an equivalised disposable income below 60% of the
 # median equivalised disposable income:
 arpt60 <- arpt(dataset)
 arpr(dataset, arpt.value = arpt60)

 arpr(dataset, arpt.value = arpt60, ci = 0.98) #  confidence interval

 # Percentage of persons with an equivalised disposable income below 40% of the
 # median equivalised disposable income:
 arpt40 <- arpt(dataset, pz = 0.4)
 arpr(dataset, arpt.value = arpt40)

 ## Maximum of TIP curve: a measure of the intensity of poverty.
 # It is equal to the mean poverty gap, if norm = FALSE, or the normalised mean
 # poverty gap if norm = TRUE.
 s1(dataset, arpt.value = arpt60, norm = TRUE)

 ## Other measurements:

 # gini(dataset, ci = 0.99)
 rmpg(dataset, ci = NULL)
 # qsr(dataset, ci = 0.99)
 # mip(dataset, ci = 0.99)
 # mih(dataset, ci = c(0.90, 0.99))
 # miuc(dataset, ci = c(0.90, 0.99))
 # s2(dataset, arpt.value = arpt(dataset), norm = TRUE, ci = 0.99)

 # Curves for inequality, welfare and poverty
 # ------------------------------------------

 ## Lorenz curve:

 Burgenland <- setupDataset(eusilc2, country = "AT", region = "Burgenland")
 lorenz_curve <- lc(Burgenland, generalized = FALSE, plot=TRUE)

 # lorenz_curve is a data.frame containing abscissae, cumulated proportion of
 # population (p), and ordinates, values of the Lorenz (if generalized=FALSE)
 # or the Generalized Lorenz curve (if generalized=TRUE).

 # The same plot as setting up plot=TRUE in lc function, but using an
 # alternative theme in ggplot2:

 p1 <- ggplot(lorenz_curve, aes(x.lg, y.lg)) + geom_line() +
 geom_segment(aes(x = 0, y = 0, xend = 1, yend = 1), linetype = "dotted", color = "grey") +
 scale_x_continuous(expression(p)) +
 scale_y_continuous(expression(L(p))) + theme_bw()
 print(p1)

 ## TIP curve:

 tip_curve <- tip(Burgenland, arpt.value = arpt(Burgenland), norm = TRUE, plot = TRUE)

 # tip_curve contains abscissae, cumulated proportion of population, and
 # ordinates, values of normalised TIP curve (norm=TRUE).

 # An alternative plot of normalised TIP curve:

 p2 <- ggplot(tip_curve, aes(x.tip, y.tip)) + geom_line() +
 scale_x_continuous(expression(p)) +
 scale_y_continuous(expression(TIP(p, z))) + theme_bw()
 print(p2)

 # Dominance tests
 # ---------------

 ## Generalized Lorenz dominance:

 Burgenland <- setupDataset(eusilc2, country = "AT", region = "Burgenland")
 Carinthia <- setupDataset(eusilc2, country = "AT", region = "Carinthia")

 # To test the null hypothesis that the income distribution of Burgenland
 # dominates the income distribution of Carinthia in the generalized Lorenz
 # sense:

 testGL(Burgenland, Carinthia, generalized = TRUE, samplesize = 10, alpha = 0.05)

 # To test the null hypothesis that the income distribution of Carinthia
 # dominates the income distribution of Burgenland in the Lorenz
 # sense (generalized =FALSE):

 testGL(Carinthia, Burgenland, generalized = FALSE, samplesize = 10, alpha = 0.05)

 # Plotting Lorenz curves (generalized = FALSE) in the same graph adding
 # 95% confidence intervals.
 # Let's observe that the standard errors in this case are negligible and
 # we cannot distinguish the confidence intervals in the graph.

 Burgenland <- setupDataset(eusilc2, country = "AT", region = "Burgenland")
 Carinthia <- setupDataset(eusilc2, country = "AT", region = "Carinthia")
 Tyrol <- setupDataset(eusilc2, country = "AT", region = "Tyrol")

 nsample = 10

 dat1 <- OmegaGL(Burgenland, samplesize = nsample, generalized = FALSE)
 dat2 <- OmegaGL(Carinthia, samplesize =  nsample, generalized = FALSE)

 data1 <- as.data.frame(dat1[2:3])
 data2 <- as.data.frame(dat2[2:3])
 data1<-rbind(c(0,0), data1)
 data2<-rbind(c(0,0), data2)

 data1$se <- c(NA, sqrt(diag(dat1$Omega)), NA)
 data2$se <- c(NA, sqrt(diag(dat2$Omega)), NA)

 p3 <- ggplot(data = data1, aes(p, gl.curve, linetype = "Burgenland")) +
  geom_line() +
  geom_ribbon(aes(ymin=gl.curve-se*qnorm(0.975), ymax=gl.curve+se*qnorm(0.975)), fill = "grey70", alpha=0.3) +
  geom_line(data = data2, aes(p, gl.curve, linetype = "Carinthia")) +
  geom_ribbon(data = data2, aes(ymin=gl.curve-se*qnorm(0.975), ymax=gl.curve+se*qnorm(0.975)), fill = "grey70", alpha=0.3) +
  geom_segment(aes(x = 0, y = 0, xend = 1, yend = 1), linetype = "dotted", color = "grey") +
  scale_x_continuous(expression(p)) +
  scale_y_continuous(expression(L(p))) +
  scale_linetype_discrete(name = "Region") +
  theme_bw() +
  theme(legend.background = element_rect(colour = "black"),
        legend.position=c(0.825,0.15))
 print(p3)

 # A plot without confidence intervals will be better in this case:

 p4 <- ggplot(data = data1, aes(p, gl.curve, linetype = "Burgenland")) +
  geom_line() +
  geom_line(data = data2, aes(p, gl.curve, linetype = "Carinthia")) +
  geom_segment(aes(x = 0, y = 0, xend = 1, yend = 1), linetype = "dotted", color = "grey") +
  scale_x_continuous(expression(p)) +
  scale_y_continuous(expression(L(p))) +
  scale_linetype_discrete(name = "Region") +
  theme_bw() +
  theme(legend.background = element_rect(colour = "black"),
        legend.position=c(0.825,0.15))
 print(p4)


 ## TIP dominance

 # The null hypothesis that the normalised TIP curve of Carinthia dominates the
 # normalised TIP curve of Burgerland:

 testTIP(Carinthia, Burgenland, norm = TRUE, samplesize = 50, alpha = 0.05)

 # Plotting the TIP curves in the same graph:

 tip1 <- tip(Burgenland, arpt.value = arpt(Burgenland), norm = TRUE, plot = FALSE)
 tip2 <- tip(Carinthia, arpt.value = arpt(Carinthia),  norm = TRUE, plot = FALSE)

 p5 <- ggplot(data = tip1, aes(x.tip, y.tip, linetype = "Burgenland")) +
  geom_line() +
  geom_line(data = tip2, aes(x.tip, y.tip,  linetype = "Carinthia")) +
  scale_x_continuous(expression(p), limits = c(0, 0.3)) +
  scale_y_continuous(expression(TIP(p,z[i]))) +
  scale_linetype_discrete(name = "Region") +
  theme_bw() +
  theme(legend.background = element_rect(colour = "black"),
        legend.position=c(0.825,0.15))
 print(p5)


 ## Testing samplesize testGL

 testGL(Carinthia, Burgenland, samplesize = 80)
 testGL(Carinthia, Tyrol, samplesize = 80)
	# Reproducible examples with rtip package
	# ---------------------------------------

	# Install from CRAN
	# install.packages("rtip")

	library(rtip)
	set.seed(1972)

	# Datasets
	# --------

	# A synthetic dataset generated from real Austrian EU-SILC:

	data(eusilc2)
	head(eusilc2, 3)

	# The Spanish National Statistics Institute (INE in spanish)
	# release for the living conditions survey in 2014:

	data(LCS2014)
	head(LCS2014,3)

	# Setup the dataset
	# -----------------

	dataset <- setupDataset(eusilc2, country = "AT", region = NULL,
	s = NULL, deflator = NULL, pppr = NULL)
	head(dataset,3)

	# dataset contains some transformed variables from the EU-SILC survey
	# which permit users to calculate indicators and curves for inequality, welfare
	# and poverty


	# Indicators for poverty, inequality and welfare
	# ----------------------------------------------

	## At-risk-of-poverty threshold:

	arpt(dataset, ipuc = "ipuc", hhcsw = "DB090", hhsize = "HX040",
	pz = 0.6, ci = NULL, rep = 500, verbose = FALSE) # Default values

	## At-risk-of-poverty rate:

	# Percentage of persons with an equivalised disposable income below 60% of the
	# median equivalised disposable income:
	arpt60 <- arpt(dataset)
	arpr(dataset, arpt.value = arpt60)

	arpr(dataset, arpt.value = arpt60, ci = 0.98) # confidence interval

	# Percentage of persons with an equivalised disposable income below 40% of the
	# median equivalised disposable income:
	arpt40 <- arpt(dataset, pz = 0.4)
	arpr(dataset, arpt.value = arpt40)

	## Maximum of TIP curve: a measure of the intensity of poverty.
	# It is equal to the mean poverty gap, if norm = FALSE, or the normalised mean
	# poverty gap if norm = TRUE.
	s1(dataset, arpt.value = arpt60, norm = TRUE)

	## Other measurements:

	# gini(dataset, ci = 0.99)
	rmpg(dataset, ci = NULL)
	# qsr(dataset, ci = 0.99)
	# mip(dataset, ci = 0.99)
	# mih(dataset, ci = c(0.90, 0.99))
	# miuc(dataset, ci = c(0.90, 0.99))
	# s2(dataset, arpt.value = arpt(dataset), norm = TRUE, ci = 0.99)

	# Curves for inequality, welfare and poverty
	# ------------------------------------------

	## Lorenz curve:

	Burgenland <- setupDataset(eusilc2, country = "AT", region = "Burgenland")
	lorenz_curve <- lc(Burgenland, generalized = FALSE, plot=TRUE)

	# lorenz_curve is a data.frame containing abscissae, cumulated proportion of
	# population (p), and ordinates, values of the Lorenz (if generalized=FALSE)
	# or the Generalized Lorenz curve (if generalized=TRUE).

	# The same plot as setting up plot=TRUE in lc function, but using an
	# alternative theme in ggplot2:

	p1 <- ggplot(lorenz_curve, aes(x.lg, y.lg)) + geom_line() +
	geom_segment(aes(x = 0, y = 0, xend = 1, yend = 1), linetype = "dotted", color = "grey") +
	scale_x_continuous(expression(p)) +
	scale_y_continuous(expression(L(p))) + theme_bw()
	print(p1)

	## TIP curve:

	tip_curve <- tip(Burgenland, arpt.value = arpt(Burgenland), norm = TRUE, plot = TRUE)

	# tip_curve contains abscissae, cumulated proportion of population, and
	# ordinates, values of normalised TIP curve (norm=TRUE).

	# An alternative plot of normalised TIP curve:

	p2 <- ggplot(tip_curve, aes(x.tip, y.tip)) + geom_line() +
	scale_x_continuous(expression(p)) +
	scale_y_continuous(expression(TIP(p, z))) + theme_bw()
	print(p2)

	# Dominance tests
	# ---------------

	## Generalized Lorenz dominance:

	Burgenland <- setupDataset(eusilc2, country = "AT", region = "Burgenland")
	Carinthia <- setupDataset(eusilc2, country = "AT", region = "Carinthia")

	# To test the null hypothesis that the income distribution of Burgenland
	# dominates the income distribution of Carinthia in the generalized Lorenz
	# sense:

	testGL(Burgenland, Carinthia, generalized = TRUE, samplesize = 10, alpha = 0.05)

	# To test the null hypothesis that the income distribution of Carinthia
	# dominates the income distribution of Burgenland in the Lorenz
	# sense (generalized =FALSE):

	testGL(Carinthia, Burgenland, generalized = FALSE, samplesize = 10, alpha = 0.05)

	# Plotting Lorenz curves (generalized = FALSE) in the same graph adding
	# 95% confidence intervals.
	# Let's observe that the standard errors in this case are negligible and
	# we cannot distinguish the confidence intervals in the graph.

	Burgenland <- setupDataset(eusilc2, country = "AT", region = "Burgenland")
	Carinthia <- setupDataset(eusilc2, country = "AT", region = "Carinthia")
	Tyrol <- setupDataset(eusilc2, country = "AT", region = "Tyrol")

	nsample = 10

	dat1 <- OmegaGL(Burgenland, samplesize = nsample, generalized = FALSE)
	dat2 <- OmegaGL(Carinthia, samplesize = nsample, generalized = FALSE)

	data1 <- as.data.frame(dat1[2:3])
	data2 <- as.data.frame(dat2[2:3])
	data1<-rbind(c(0,0), data1)
	data2<-rbind(c(0,0), data2)

	data1$se <- c(NA, sqrt(diag(dat1$Omega)), NA)
	data2$se <- c(NA, sqrt(diag(dat2$Omega)), NA)

	p3 <- ggplot(data = data1, aes(p, gl.curve, linetype = "Burgenland")) +
	geom_line() +
	geom_ribbon(aes(ymin=gl.curve-seqnorm(0.975), ymax=gl.curve+seqnorm(0.975)), fill = "grey70", alpha=0.3) +
	geom_line(data = data2, aes(p, gl.curve, linetype = "Carinthia")) +
	geom_ribbon(data = data2, aes(ymin=gl.curve-seqnorm(0.975), ymax=gl.curve+seqnorm(0.975)), fill = "grey70", alpha=0.3) +
	geom_segment(aes(x = 0, y = 0, xend = 1, yend = 1), linetype = "dotted", color = "grey") +
	scale_x_continuous(expression(p)) +
	scale_y_continuous(expression(L(p))) +
	scale_linetype_discrete(name = "Region") +
	theme_bw() +
	theme(legend.background = element_rect(colour = "black"),
	legend.position=c(0.825,0.15))
	print(p3)

	# A plot without confidence intervals will be better in this case:

	p4 <- ggplot(data = data1, aes(p, gl.curve, linetype = "Burgenland")) +
	geom_line() +
	geom_line(data = data2, aes(p, gl.curve, linetype = "Carinthia")) +
	geom_segment(aes(x = 0, y = 0, xend = 1, yend = 1), linetype = "dotted", color = "grey") +
	scale_x_continuous(expression(p)) +
	scale_y_continuous(expression(L(p))) +
	scale_linetype_discrete(name = "Region") +
	theme_bw() +
	theme(legend.background = element_rect(colour = "black"),
	legend.position=c(0.825,0.15))
	print(p4)


	## TIP dominance

	# The null hypothesis that the normalised TIP curve of Carinthia dominates the
	# normalised TIP curve of Burgerland:

	testTIP(Carinthia, Burgenland, norm = TRUE, samplesize = 50, alpha = 0.05)

	# Plotting the TIP curves in the same graph:

	tip1 <- tip(Burgenland, arpt.value = arpt(Burgenland), norm = TRUE, plot = FALSE)
	tip2 <- tip(Carinthia, arpt.value = arpt(Carinthia), norm = TRUE, plot = FALSE)

	p5 <- ggplot(data = tip1, aes(x.tip, y.tip, linetype = "Burgenland")) +
	geom_line() +
	geom_line(data = tip2, aes(x.tip, y.tip, linetype = "Carinthia")) +
	scale_x_continuous(expression(p), limits = c(0, 0.3)) +
	scale_y_continuous(expression(TIP(p,z[i]))) +
	scale_linetype_discrete(name = "Region") +
	theme_bw() +
	theme(legend.background = element_rect(colour = "black"),
	legend.position=c(0.825,0.15))
	print(p5)


	## Testing samplesize testGL

	testGL(Carinthia, Burgenland, samplesize = 80)
	testGL(Carinthia, Tyrol, samplesize = 80)