Last active
November 22, 2016 15:45
-
-
Save djhurio/dfb6953284ccd86cdf0606c146dcfc6f to your computer and use it in GitHub Desktop.
Mate5054 - Apsekojumu statistika (2KP). Ceturtais praktiskais darbs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
### Apsekojumu statistika | |
### Praktiskie darbi 4 | |
# Bibliotekas #### | |
require(data.table) | |
require(sampling) | |
require(foreach) | |
require(ggplot2) | |
require(tools) | |
# Reset workspace #### | |
rm(list = ls()) | |
gc() | |
# Datu fails | |
file.data <- file.path("http://home.lu.lv/~pm90015/work/LU", | |
"ApsekojumuStatistika/Data/Population.Rdata") | |
# Nolādē datu failu, ja nepieciešams | |
download.file(file.data, "Population.Rdata", mode = "wb") | |
list.files() | |
# Pārbaude vai datu nolāde ir veiksmīga | |
md5sum("Population.Rdata") == "d035c64414527a3dd581104e06ebeb34" | |
# Ielādē datu failu | |
load("Population.Rdata") | |
pop | |
names(pop) | |
dim(pop) | |
# Pārkodē J100 | |
pop[, .N, keyby = J100] | |
pop[is.na(J100) | J100 == 9, J100 := 2] | |
pop[, .N, keyby = J100] | |
# Izpētes mainīgie (Y) #### | |
# Ekonomiskā aktivitāte | |
pop[, .N, keyby = eka] | |
pop[, y1 := as.integer(eka == 1)] | |
pop[, y2 := as.integer(eka == 2)] | |
pop[, y3 := as.integer(eka == 3)] | |
# Y mainīgo nosaukumi | |
ynames <- paste0("y", 1:3) | |
ynames | |
# Palīginformācijas mainīgie (X) #### | |
# Konstante | |
pop[, x1 := 1L] | |
# Dzimums | |
pop[, .N, keyby = DZIMUMS] | |
pop[, x2 := as.integer(DZIMUMS == 1)] | |
# J100 | |
pop[, .N, keyby = J100] | |
pop[, x3 := as.integer(J100 == 1)] | |
# X mainīgo nosaukumi | |
xnames <- paste0("x", 1:3) | |
xnames | |
# X summārās vērtības | |
totals <- pop[, lapply(.SD, sum), .SDcols = xnames] | |
totals | |
# SRS #### | |
# Izlases apjoms | |
n <- 2000 | |
# Izlasē iekļaušanas varbūtības | |
pop[, pik := n / .N] | |
pop[, sum(pik)] | |
# SRS | |
pop[, s := srswor(n, .N)] | |
pop[, sum(s)] | |
# Sample | |
s <- pop[s == 1L] | |
# Dizaina svari | |
s[, d := 1 / pik] | |
s[, sum(d)] | |
all.equal(s[, sum(d)], pop[, .N]) | |
# HT novērtējumi | |
Y_HT <- s[, lapply(.SD[, ynames, with = F], function(y) sum(y * d))] | |
Y_HT | |
# Svaru kalibrācija - matricu formā | |
omega <- diag(s[, d]) | |
dim(omega) | |
omega[1:5, 1:5] | |
X <- as.matrix(s[, xnames, with = F]) | |
dim(X) | |
X[1:5, ] | |
totals | |
tot <- unlist(totals) | |
one <- rep(1, n) | |
g <- as.vector(t(one) + (t(tot) - t(one) %*% omega %*% X) %*% | |
solve(t(X) %*% omega %*% X) %*% t(X)) | |
head(g) | |
# Svaru kalibrācija - ar sampling bibliotēku | |
# Margins for plot | |
par("mar") | |
par(mar=rep(2, 4)) | |
s[, g := calib(.SD[, xnames, with = F], d, tot, | |
method = "linear", description = T)] | |
# Salīdzinam | |
all.equal(g, s[, g]) | |
# Kalibrētie svari | |
s[, w := d * g] | |
s[, sum(w)] | |
# Kalibrētie novērtējumi | |
Y_cal <- s[, lapply(.SD[, ynames, with = F], function(y) sum(y * w))] | |
Y_cal | |
# Patiesās Y summārās vērtības | |
Y_true <- pop[, lapply(.SD, sum), .SDcols = ynames] | |
Y_true | |
Y_true[, estimator := "true"] | |
Y_HT[, estimator := "HT"] | |
Y_cal[, estimator := "cal"] | |
Y_est <- rbindlist(list(Y_true, Y_HT, Y_cal)) | |
Y_est | |
# Simulācija #### | |
# Simulāciju skaits | |
I <- 1000 | |
sim <- foreach(i = 1:I, .combine = rbind) %do% { | |
# SRS | |
pop[, s := srswor(n, .N)] | |
# Sample | |
s <- pop[s == 1L] | |
# Dizaina svari | |
s[, d := s / pik] | |
# HT novērtējumi | |
Y_HT <- s[, lapply(.SD[, ynames, with = F], function(y) sum(y * d))] | |
# Svaru kalibrācija | |
s[, g := calib(.SD[, xnames, with = F], d, unlist(totals), | |
method = "linear", description = F)] | |
# Kalibrētie svari | |
s[, w := d * g] | |
# Kalibrētie novērtējumi | |
Y_cal <- s[, lapply(.SD[, ynames, with = F], function(y) sum(y * w))] | |
Y_cal | |
setnames(Y_HT, paste0(names(Y_HT), "_HT")) | |
setnames(Y_cal, paste0(names(Y_cal), "_cal")) | |
cbind(i = i, Y_HT, Y_cal) | |
} | |
sim | |
sim[, lapply(.SD[, -1, with = F], mean)] | |
sim[, lapply(.SD[, -1, with = F], sd)] | |
ggplot() + | |
geom_density(aes(y1_HT), data = sim) + | |
geom_density(aes(y1_cal), data = sim, colour = "red") + | |
theme_bw() | |
ggplot() + | |
geom_density(aes(y2_HT), data = sim) + | |
geom_density(aes(y2_cal), data = sim, colour = "red") + | |
theme_bw() | |
ggplot() + | |
geom_density(aes(y3_HT), data = sim) + | |
geom_density(aes(y3_cal), data = sim, colour = "red") + | |
theme_bw() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment