Last active
April 26, 2023 14:42
-
-
Save berkorbay/f394bf2ea8775cf7b5316aeafd27c482 to your computer and use it in GitHub Desktop.
YOK Foreign Students Data Starter Code for Tidyverse (dplyr + ggplot2) and Quarto
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
--- | |
title: "Yabancı Öğrenci Raporu (Örnek)" | |
author: "MEF MGMT 553 2022-2023 Bahar Dönemi" | |
format: pptx | |
editor: visual | |
execute: | |
echo: false | |
message: false | |
warning: false | |
--- | |
```{r,echo=FALSE,message=FALSE,warning=FALSE} | |
## Hazırlık: tidyverse paketini yükle, excel dosyalarını yükle, dataframei düzenle | |
library(tidyverse) | |
raw_df <- readxl::read_excel("/Users/rocket/Downloads/mef_mgmt_quarto/foreign_students_by_nationality_2021_2022.xlsx") | |
raw_df <- raw_df %>% rename(univ_name=1,univ_type=2,city_name=3,country=4,male=5,female=6,total=7) | |
the_df <- raw_df %>% mutate(across(c(male,female,total),as.numeric)) | |
``` | |
```{r} | |
## Örnek grafik: En fazla yabancı öğrenci sağlayan 10 ülkeyi listele | |
## veriyi hazırla | |
df1 <- the_df %>% | |
filter(complete.cases(.)) %>% #Eksik (NA) değerleri olmayan satırları getir | |
group_by(country) %>% # Ülkelere göre grupla | |
summarise(total=sum(total)) %>% # Toplam öğrenci sayısını hesapla | |
arrange(desc(total)) # Büyükten küçüğe sırala | |
ggplot( | |
df1 %>% slice(1:10), | |
aes( | |
x=reorder(country,-total), | |
y=total | |
) | |
) + | |
geom_bar(stat="identity") + | |
theme_minimal() + | |
theme(axis.text.x = element_text(angle=60,hjust=1)) + | |
labs( | |
title="En Fazla Yabancı Öğrencinin Geldiği Ülkeler", | |
subtitle="Kaynak: 2021-2022 YÖK Verileri", | |
y="Öğrenci Sayısı", | |
x="" | |
) | |
``` | |
```{r} | |
## Fonksiyon hazırla | |
generate_country_plot <- function(the_df,the_country){ | |
country_df<- the_df %>% | |
filter(complete.cases(.) & country==the_country[1]) %>% arrange(desc(total)) %>% | |
select(univ_name,male,female,total,univ_type) %>% pivot_longer(.,-c(univ_name,univ_type,total)) %>% arrange(desc(total)) | |
# ggplot(country_df,aes(x=univ_name,y=)) | |
country_plot <- ggplot(country_df %>% slice(1:20),aes(x=reorder(univ_name,total),y=value,fill=name)) + geom_bar(stat="identity",position="stack") + coord_flip() + theme_minimal() + labs(title = paste0(the_country),subtitle="Ülkeden gelen öğrencilerin en fazla yerleştiği 10 üniversite.",y="",x="Öğrenci Sayısı",fill="") + theme(legend.position = "top") | |
return(country_plot) | |
} | |
``` | |
```{r,output='asis'} | |
for (i in 1:10){ | |
the_country <- df1 %>% select(country) %>% slice(i) %>% unlist() | |
total_students <- df1 %>% filter(complete.cases(.) & country==the_country[1]) %>% select(total) %>% unlist() | |
cat(paste0("## ",the_country),"\n\n") | |
cat(paste0("Öğrenci sayısı: ", total_students,"\n\n")) | |
# generate_country_plot(the_df,the_country) | |
knitr::knit_print(generate_country_plot(the_df,the_country)) | |
cat("\n\n") | |
} | |
``` | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
--- | |
title: "Yabancı Öğrenci Raporu (Üniversiteler)" | |
author: "MEF MGMT 553 2022-2023 Bahar Dönemi" | |
format: html | |
editor: visual | |
execute: | |
echo: false | |
message: false | |
warning: false | |
--- | |
```{r,echo=FALSE,message=FALSE,warning=FALSE} | |
## Hazırlık: tidyverse paketini yükle, excel dosyalarını yükle, dataframei düzenle | |
library(tidyverse) | |
raw_df <- readxl::read_excel("foreign_students_by_nationality_2021_2022.xlsx") | |
raw_df <- raw_df %>% rename(univ_name=1,univ_type=2,city_name=3,country=4,male=5,female=6,total=7) | |
the_df <- raw_df %>% mutate(across(c(male,female,total),as.numeric)) | |
n_results = 20 | |
``` | |
```{r} | |
## Örnek grafik: En fazla yabancı öğrenci sağlayan 10 ülkeyi listele | |
## veriyi hazırla | |
df1 <- the_df %>% filter(univ_type=="VAKIF") %>% | |
filter(complete.cases(.)) %>% #Eksik (NA) değerleri olmayan satırları getir | |
group_by(univ_name,univ_type) %>% # Ülkelere göre grupla | |
summarise(total=sum(total)) %>% # Toplam öğrenci sayısını hesapla | |
arrange(desc(total)) %>% ungroup() %>% ungroup() # Büyükten küçüğe sırala | |
ggplot( | |
df1 %>% slice(1:n_results), | |
aes( | |
x=reorder(univ_name,total), | |
y=total, | |
fill=univ_type | |
) | |
) + | |
geom_bar(stat="identity") + | |
theme_minimal() + | |
theme(axis.text.x = element_text(angle=60,hjust=1)) + | |
labs( | |
title="En Fazla Yabancı Öğrencinin Bulunduğu Üniversiteler", | |
subtitle="Kaynak: 2021-2022 YÖK Verileri", | |
y="Öğrenci Sayısı", | |
x="" | |
) + coord_flip() | |
``` | |
```{r} | |
## Fonksiyon hazırla | |
generate_university_plot <- function(the_df,the_university){ | |
university_df<- the_df %>% | |
filter(complete.cases(.) & univ_name==the_university[1]) %>% arrange(desc(total)) %>% mutate(male=male/sum(total),female=female/sum(total)) %>% | |
select(country,male,female,total) %>% pivot_longer(.,-c(country,total)) %>% arrange(desc(total)) | |
# ggplot(country_df,aes(x=univ_name,y=)) | |
university_plot <- ggplot(university_df %>% slice(1:20),aes(x=reorder(country,total),y=value,fill=name)) + geom_bar(stat="identity",position="stack") + coord_flip() + theme_minimal() + labs(title = paste0(the_university),subtitle="Üniversiteye gelen öğrencilerin en fazla geldikleri 10 ülke.",y="Öğrenci Yüzdesi",x="",fill="") + theme(legend.position = "top") + scale_y_continuous(labels=scales::percent) | |
return(university_plot) | |
} | |
``` | |
```{r,output='asis',eval=TRUE} | |
for (i in 1:n_results){ | |
the_university <- df1 %>% filter(univ_type=="VAKIF") %>% select(univ_name) %>% slice(i) %>% unlist() | |
# the_country <- df1 %>% select(country) %>% slice(i) %>% unlist() | |
total_students <- df1 %>% filter(complete.cases(.) & univ_name==the_university[1]) %>% select(total) %>% unlist() | |
cat(paste0("## ",the_university),"\n\n") | |
cat(paste0("Öğrenci sayısı: ", total_students,"\n\n")) | |
# generate_country_plot(the_df,the_country) | |
knitr::knit_print(generate_university_plot(the_df,the_university)) | |
cat("\n\n") | |
} | |
``` |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#install.packages("tidyverse","readxl") | |
library(tidyverse) | |
#https://github.com/berkorbay/datasets/raw/master/foreign_students/foreign_students_by_nationality_2021_2022.xlsx | |
raw_df <- readxl::read_excel("foreign_students_by_nationality_2021_2022.xlsx") | |
raw_df <- raw_df %>% rename(univ_name=1,univ_type=2,city_name=3,country=4,male=5,female=6,total=7) | |
the_df <- raw_df %>% mutate(across(c(male,female,total),as.numeric)) | |
the_df %>% | |
filter(complete.cases(.)) %>% | |
group_by(country) %>% | |
summarise(total=sum(total)) %>% | |
arrange(desc(total)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment