Skip to content

Instantly share code, notes, and snippets.

@patperu
Created February 29, 2016 18:50
Bevölkerungsprognose Berlin, Ebene PGR
---
title: "Bevölkerungsprognose Berlin, Ebene PGR"
author: "Patrick Hausmann"
date: "29 Februar 2016"
# always_allow_html: yes
# output: rmarkdown::github_document
output: html_document
---
```{r setup, include=TRUE}
library('readr')
library('reshape2')
library('ggplot2')
library('dplyr')
library('htmltools')
library('vegalite')
knitr::opts_chunk$set(echo = TRUE,
message = FALSE,
warning = FALSE,
fig.width=7, fig.height=7)
options(stringsAsFactors = FALSE)
```
```{r ag}
set_ag_gr <- function(df, ag_gr) {
if (!any(colnames(df) %in% "Alter")) {
stop("Dataframe enthält keine Spalte 'Alter'.", call. = FALSE)
}
z <- cut(df$Alter,
breaks = ag_gr,
include.lowest = TRUE,
right = FALSE)
z <- as.character(z)
z <- gsub("99", "99+", z)
z[is.na(z)] <- "Insgesamt"
z
}
```
### Import
```{r Import, eval=TRUE}
files <- list.files("input/data")
x <- lapply(files, function(file) {
yr <- c(2014, 2030)
x <- read_csv2(file.path("input/data", file), skip = 6, n_max = 102)
ix <- grep("[0-9]{4}", names(x))
ix <- lapply(ix, function(v) v + c(0:diff(yr)))
z <- lapply(ix, function(v) x[, v])
m <- list()
for(i in seq_along(z)) {
colnames(z[[i]]) <- paste("PGR", substr(colnames(z[[i]])[1], 1, 4), c(yr[1]:yr[2]), sep = "_")
m[[i]] <- z[[i]][-1,]
}
res <- do.call("cbind", m)
res <- data.frame(Alter = 0:100, res, stringsAsFactors = FALSE)
res <- melt(res, id.var = "Alter")
res$variable <- as.character(res$variable)
id <- do.call("rbind", strsplit(res$variable, "_"))
res <- data.frame(id, res, stringsAsFactors = FALSE)
res$variable <- NULL
colnames(res) <- c("Ebene", "raumid", "Jahr", "Alter", "Anzahl")
res$Jahr <- as.numeric(res$Jahr)
res <- res[order(res$raumid, res$Jahr, res$Alter), ]
res
})
x <- do.call("rbind", x)
x$ag_gr1 <- set_ag_gr(x, ag_gr = c(0, 18, 65, 99))
x$ag_gr2 <- set_ag_gr(x, ag_gr = c(0, 6, 18, 45, 65, 99))
write.csv2(x, file ="bev_pgr.csv", row.names = FALSE)
```
```{r Summarise}
x <- read_csv2(file ="bev_pgr.csv")
x$ag_gr2 <- factor(x$ag_gr2,
levels = c("[0,6)", "[6,18)", "[18,45)", "[45,65)", "[65,99+]"),
ordered = TRUE)
x1 <- subset(x, subset = ag_gr2 != "Insgesamt" & !raumid %in% c("0406"))
x1 <- tbl_df(x1) %>%
mutate(raumid = as.character(raumid)) %>%
group_by(Ebene, raumid, Jahr, ag_gr2) %>%
summarise(Anzahl = sum(Anzahl)) %>%
ungroup()
```
```{r Plot, eval = TRUE, include = TRUE}
p0 <- ggplot(x1, aes(x= Jahr, y = Anzahl)) + geom_line(aes(group = raumid, color = raumid))
p0 <- p0 + ggplot2::facet_grid( ~ ag_gr2)
p0
```
Using `vegalite`....
```{r}
x2 <- subset(x1, subset = !raumid %in% c("0406"))
x2$Ebene <- NULL
vegalite(viewport_height = 400) %>%
cell_size(300, 300) %>%
add_data(x2) %>%
encode_x("Jahr", "temporal") %>%
encode_y("Anzahl", "quantitative") %>%
encode_color("raumid", "nominal") %>%
facet_col("ag_gr2", "ordinal") %>%
mark_line()
```
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment