|
#! edit this |
|
setwd("~/Documents/ESR/CJC/2013-DPG") |
|
|
|
require(Amelia) |
|
require(geocode) |
|
require(ggmap) |
|
require(ggplot2) |
|
require(RColorBrewer) |
|
require(reshape) |
|
require(scales) |
|
require(zoo) |
|
|
|
# Requires Gill Sans MT. |
|
require(extrafont) |
|
# font_import() |
|
loadfonts() |
|
|
|
## EFFECTIFS DOC/DT |
|
|
|
x <- read.csv("stats-esr.csv")[-1, c(1, 5, 7)] |
|
names(x) <- c("Année", "Taux de croissance annuel du doctorat (% inscriptions) ", "Taux de croissance annuel des docteurs (% soutenances)") |
|
x <- melt(x, id = "Année") |
|
ggplot(data = x, aes(x = Année, y = value)) + |
|
geom_hline(y = 0, linetype = "dotted") + |
|
geom_line(aes(color = variable)) + |
|
geom_point(color = "white", size = 16) + |
|
geom_text(aes(color = variable, family = "Gill Sans MT", |
|
label = paste(ifelse(value > 0, "+", ""), round(value, 1)))) + |
|
scale_color_brewer("", palette = "Set1") + |
|
scale_y_continuous(breaks = -5:10) + |
|
scale_x_continuous(breaks = min(x$Année):max(x$Année)) + |
|
labs(y = NULL, x = NULL) + #, title = "Figure 1. Taux de croissance des effectifs doctorants et docteurs") + |
|
theme_bw() + theme( |
|
plot.title = element_text(face = "bold"), |
|
text = element_text(family = "Gill Sans MT", size = 12), |
|
panel.border = element_rect(color = "white"), |
|
panel.grid.minor.y = element_blank(), |
|
panel.grid.minor.x = element_blank(), |
|
panel.grid.major.x = element_blank(), |
|
axis.text.y = element_blank(), |
|
axis.ticks = element_blank(), |
|
legend.position = 'top', |
|
legend.text = element_text(face = "bold", size = 12), |
|
legend.direction = "vertical") |
|
ggsave("stats-esr.svg", width = 6, height = 4) |
|
|
|
## DISPARITES JCE |
|
|
|
y <- data.frame( |
|
P = c("Paris", "Évry", "Strasbourg", "Créteil"), |
|
E = c(76, 46, 51, 74), |
|
S = c(13, 46, 35, 3)) |
|
names(y) <- c("Préfecture", |
|
"% de doctorants avec carte de séjour étudiant", |
|
"% de doctorants avec carte de séjour scientifiques-chercheurs") |
|
y <- melt(y, id = "Préfecture") |
|
|
|
ggplot(data = y, aes(x = Préfecture, y = value, fill = variable)) + |
|
geom_bar(aes(group = variable), stat = "identity", position = "dodge") + |
|
geom_text(aes(group = variable, label = value, family = "Gill Sans MT"), |
|
hjust = -1, position = position_dodge(width = 0.9)) + |
|
scale_fill_brewer("", palette = "Set1") + |
|
scale_y_continuous(limits = c(0, 80)) + |
|
labs(y = NULL) + |
|
theme_bw() + theme( |
|
plot.title = element_text(face = "bold"), |
|
text = element_text(family = "Gill Sans MT", size = 12), |
|
panel.border = element_rect(color = "white"), |
|
panel.grid.minor.y = element_blank(), |
|
panel.grid.minor.x = element_blank(), |
|
panel.grid.major.x = element_blank(), |
|
axis.text.x = element_blank(), |
|
axis.text.y = element_text(size = 12), |
|
axis.ticks = element_blank(), |
|
legend.position = "top", |
|
legend.text = element_text(face = "bold", size = 12), |
|
legend.direction = "vertical") + coord_flip() |
|
ggsave("stats-esr-jce.svg", width = 6, height = 4) |
|
|
|
## VACATAIRES |
|
|
|
# Data. |
|
z <- read.table("vacataires.tsv", sep = "\t", |
|
header = TRUE, |
|
stringsAsFactors = FALSE) |
|
|
|
# LOCB academy values. |
|
z$academie <- ifelse(grepl("Academie", z$Etablissement), z$Etablissement, NA) |
|
z$academie <- na.locf(z$academie, fromLast = TRUE) |
|
z <- subset(z, !grepl("Academie", Etablissement)) |
|
|
|
# Mark missing values. |
|
z$truezero <- is.na(z$truezero) |
|
z$total[z$total == 0 & z$truezero] <- NA |
|
z$inf96h[z$inf96h == 0 & z$truezero] <- NA |
|
z$sup96h[z$sup96h == 0 & z$truezero] <- NA |
|
z$truezero <- NULL |
|
|
|
# Subset to metropolitan. |
|
z <- z[!grepl("ANTILLES|LA REUNION|POLYNESIE", z$academie), ] |
|
# Subset to measured PCNP and ETECA. |
|
z <- z[!(is.na(z$PCNP) | is.na(z$ETECA)), ] |
|
# Factor units. |
|
z$Etablissement <- factor(z$Etablissement) |
|
z$academie <- factor(z$academie) |
|
# Check result. |
|
str(z) |
|
|
|
# Imputing vacs. by PCNP and ETECA |
|
a.out <- amelia(x = z[, c(1, 3:6)], idvars = "Etablissement", m = 10^3, |
|
bounds = matrix(c(2, 3, 0, 0, Inf, Inf), nrow = 2)) |
|
|
|
save(file = "vacataires.amelia.Rda", a.out) |
|
# cbind(z[, c(1, 3)], sapply(1:10, FUN = function(x) { a.out$imputations[[x]]$inf96h })) |
|
|
|
# Maximal lon/lat information. |
|
if(!file.exists(file <- "vacataires-fullgeo.Rda")) { |
|
vacataires.fullgeo <- geocode(paste("Universite", z$Etablissement, "France"), |
|
output = "all") |
|
save(file = file, vacataires.fullgeo) |
|
} |
|
|
|
# Minimal lon/lat information. |
|
if(!file.exists(file <- "vacataires-geo.txt")) { |
|
l <- geocode(paste("Universite", z$Etablissement, "France")) |
|
write.csv(data.frame(z, l), file) |
|
} |
|
|
|
## RESULTS |
|
|
|
z <- read.csv("vacataires-geo.txt") |
|
|
|
# mark imputed |
|
z$inf96h.imputed <- is.na(z$inf96h) |
|
z$sup96h.imputed <- is.na(z$sup96h) |
|
z$total.imputed <- is.na(z$inf96h) | is.na(z$sup96h) |
|
# apply imputed |
|
z$inf96h <- rowMeans(sapply(1:10^3, FUN = function(x) { a.out$imputations[[x]]$inf96h })) |
|
z$sup96h <- rowMeans(sapply(1:10^3, FUN = function(x) { a.out$imputations[[x]]$sup96h })) |
|
z$total <- z$inf96h + z$sup96h |
|
|
|
# imputation ratio: |
|
prop.table(table(z$total.imputed)) |
|
|
|
# sums and ratios: |
|
|
|
inf96h <- tapply(z$inf96h, z$inf96h.imputed, sum) |
|
# FALSE TRUE |
|
# 56129.00 30695.45 |
|
inf96h[2] / sum(inf96h) |
|
# TRUE |
|
# 0.3535346 |
|
sup96h <- tapply(z$sup96h, z$sup96h.imputed, sum) |
|
# FALSE TRUE |
|
# 9813.00 16689.24 |
|
sup96h[2] / sum(sup96h) |
|
# TRUE |
|
# 0.6297294 |
|
total <- tapply(z$total, z$total.imputed, sum) |
|
# FALSE TRUE |
|
# 65615.00 47711.69 |
|
total[2] / sum(total) |
|
# TRUE |
|
# 0.4210102 |
|
tapply(z$total, z$total.imputed, sum) |
|
# FALSE TRUE |
|
# 65615.00 47711.69 |
|
|
|
# ratio sur-service |
|
sum(z$sup96h) / sum(z$total) |
|
|
|
# ratio par académies |
|
academies = data.frame(tapply(z$total, z$academie, sum), tapply(z$sup96h, z$academie, sum)) |
|
academies$ratio <- academies[, 2] / academies[, 1] |
|
academies[rev(order(academies$ratio)), 1:3] |
|
|
|
# nb. moyen de vacataires : |
|
|
|
sum(total) / nrow(z) |
|
# [1] 786.9909 |
|
sum(sup96h) / nrow(z) |
|
# [1] 184.0433 |
|
sum(sup96h) / sum(total) |
|
# [1] 0.233857 |
|
|
|
## MAP |
|
|
|
ggplot(map_data("france")) + |
|
geom_polygon(aes(x = long, y = lat, group = group), |
|
fill = "grey95", colour = "grey50") + |
|
geom_point(data = z[z$total > 0, ], |
|
aes(x = lon, y = lat, size = total), colour = "grey10", alpha = .5) + |
|
geom_point(data = z[z$sup96h > 0, ], |
|
aes(x = lon, y = lat, size = sup96h), colour = brewer.pal(3, "Set1")[1], |
|
alpha = .5) + |
|
scale_size_area("Vacataires\n", max_size = 12) + |
|
labs(y = NULL, x = NULL, title = NULL) + # "Vacations en-dessous et au-dessus de 96h ETD") |
|
theme_bw() + theme( |
|
text = element_text(family = "Gill Sans MT", size = 12), |
|
panel.border = element_rect(color = "white"), |
|
axis.text = element_blank(), |
|
axis.ticks = element_blank(), |
|
panel.grid = element_blank(), |
|
legend.text = element_text(face = "bold", size = 12), |
|
legend.key = element_rect(colour = "white") |
|
) |
|
|
|
ggsave("stats-esr-vac.svg", width = 6, height = 4) |