Skip to content

Instantly share code, notes, and snippets.

@djhurio
Last active November 22, 2016 15:45
Show Gist options
  • Save djhurio/dfb6953284ccd86cdf0606c146dcfc6f to your computer and use it in GitHub Desktop.
Save djhurio/dfb6953284ccd86cdf0606c146dcfc6f to your computer and use it in GitHub Desktop.
Mate5054 - Apsekojumu statistika (2KP). Ceturtais praktiskais darbs
### Apsekojumu statistika
### Praktiskie darbi 4
# Bibliotekas ####
require(data.table)
require(sampling)
require(foreach)
require(ggplot2)
require(tools)
# Reset workspace ####
rm(list = ls())
gc()
# Datu fails
file.data <- file.path("http://home.lu.lv/~pm90015/work/LU",
"ApsekojumuStatistika/Data/Population.Rdata")
# Nolādē datu failu, ja nepieciešams
download.file(file.data, "Population.Rdata", mode = "wb")
list.files()
# Pārbaude vai datu nolāde ir veiksmīga
md5sum("Population.Rdata") == "d035c64414527a3dd581104e06ebeb34"
# Ielādē datu failu
load("Population.Rdata")
pop
names(pop)
dim(pop)
# Pārkodē J100
pop[, .N, keyby = J100]
pop[is.na(J100) | J100 == 9, J100 := 2]
pop[, .N, keyby = J100]
# Izpētes mainīgie (Y) ####
# Ekonomiskā aktivitāte
pop[, .N, keyby = eka]
pop[, y1 := as.integer(eka == 1)]
pop[, y2 := as.integer(eka == 2)]
pop[, y3 := as.integer(eka == 3)]
# Y mainīgo nosaukumi
ynames <- paste0("y", 1:3)
ynames
# Palīginformācijas mainīgie (X) ####
# Konstante
pop[, x1 := 1L]
# Dzimums
pop[, .N, keyby = DZIMUMS]
pop[, x2 := as.integer(DZIMUMS == 1)]
# J100
pop[, .N, keyby = J100]
pop[, x3 := as.integer(J100 == 1)]
# X mainīgo nosaukumi
xnames <- paste0("x", 1:3)
xnames
# X summārās vērtības
totals <- pop[, lapply(.SD, sum), .SDcols = xnames]
totals
# SRS ####
# Izlases apjoms
n <- 2000
# Izlasē iekļaušanas varbūtības
pop[, pik := n / .N]
pop[, sum(pik)]
# SRS
pop[, s := srswor(n, .N)]
pop[, sum(s)]
# Sample
s <- pop[s == 1L]
# Dizaina svari
s[, d := 1 / pik]
s[, sum(d)]
all.equal(s[, sum(d)], pop[, .N])
# HT novērtējumi
Y_HT <- s[, lapply(.SD[, ynames, with = F], function(y) sum(y * d))]
Y_HT
# Svaru kalibrācija - matricu formā
omega <- diag(s[, d])
dim(omega)
omega[1:5, 1:5]
X <- as.matrix(s[, xnames, with = F])
dim(X)
X[1:5, ]
totals
tot <- unlist(totals)
one <- rep(1, n)
g <- as.vector(t(one) + (t(tot) - t(one) %*% omega %*% X) %*%
solve(t(X) %*% omega %*% X) %*% t(X))
head(g)
# Svaru kalibrācija - ar sampling bibliotēku
# Margins for plot
par("mar")
par(mar=rep(2, 4))
s[, g := calib(.SD[, xnames, with = F], d, tot,
method = "linear", description = T)]
# Salīdzinam
all.equal(g, s[, g])
# Kalibrētie svari
s[, w := d * g]
s[, sum(w)]
# Kalibrētie novērtējumi
Y_cal <- s[, lapply(.SD[, ynames, with = F], function(y) sum(y * w))]
Y_cal
# Patiesās Y summārās vērtības
Y_true <- pop[, lapply(.SD, sum), .SDcols = ynames]
Y_true
Y_true[, estimator := "true"]
Y_HT[, estimator := "HT"]
Y_cal[, estimator := "cal"]
Y_est <- rbindlist(list(Y_true, Y_HT, Y_cal))
Y_est
# Simulācija ####
# Simulāciju skaits
I <- 1000
sim <- foreach(i = 1:I, .combine = rbind) %do% {
# SRS
pop[, s := srswor(n, .N)]
# Sample
s <- pop[s == 1L]
# Dizaina svari
s[, d := s / pik]
# HT novērtējumi
Y_HT <- s[, lapply(.SD[, ynames, with = F], function(y) sum(y * d))]
# Svaru kalibrācija
s[, g := calib(.SD[, xnames, with = F], d, unlist(totals),
method = "linear", description = F)]
# Kalibrētie svari
s[, w := d * g]
# Kalibrētie novērtējumi
Y_cal <- s[, lapply(.SD[, ynames, with = F], function(y) sum(y * w))]
Y_cal
setnames(Y_HT, paste0(names(Y_HT), "_HT"))
setnames(Y_cal, paste0(names(Y_cal), "_cal"))
cbind(i = i, Y_HT, Y_cal)
}
sim
sim[, lapply(.SD[, -1, with = F], mean)]
sim[, lapply(.SD[, -1, with = F], sd)]
ggplot() +
geom_density(aes(y1_HT), data = sim) +
geom_density(aes(y1_cal), data = sim, colour = "red") +
theme_bw()
ggplot() +
geom_density(aes(y2_HT), data = sim) +
geom_density(aes(y2_cal), data = sim, colour = "red") +
theme_bw()
ggplot() +
geom_density(aes(y3_HT), data = sim) +
geom_density(aes(y3_cal), data = sim, colour = "red") +
theme_bw()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment