Created
March 15, 2020 13:22
-
-
Save apoorv74/2b40cdd8184613d5d98410c0da136dc0 to your computer and use it in GitHub Desktop.
Processing changelog for the Annual Budget 2020-2021
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(readr) | |
library(dplyr) | |
# Change These | |
file_path <- "~/Downloads/budget_changelog_2020/changelog_2.csv" | |
file_export_path <- "~/Downloads/budget_changelog_2020/changelog_updated.csv" | |
changelog <- read_csv( | |
file_path, | |
col_types = cols( | |
`77c77` = col_skip(), | |
X10 = col_skip(), | |
X2 = col_character(), | |
X3 = col_character(), | |
X4 = col_character(), | |
X5 = col_double(), | |
X6 = col_double(), | |
X7 = col_double(), | |
X8 = col_double(), | |
X9 = col_skip() | |
) | |
) | |
names(changelog) <- | |
c( | |
"Deparment/Ministry", | |
"Sub-allocation", | |
"Sub-sub-allocation", | |
"Actuals (2018-19)", | |
"Budget Estimates (2019-20)", | |
"Revised Estimates (2019-20)", | |
"Budget Estimates (2020-21)" | |
) | |
all_departments <- c("Department of Commerce", "Department of Economic Affairs", "Interest Payments", "Ministry of Minority Affairs", "Ministry of Tribal Affairs", "Department of Empowerment of Persons with Disabilities", "Pensions", "Department of Financial Services", "Indirect Taxes", "Ministry of Housing and Urban Affairs", "Department of Expenditure", "Ministry of Micro, Small and Medium Enterprises", "Department of Food and Public Distribution", "Ministry of Defence (Civil)", "Ministry of Textiles", "Department of Health and Family Welfare", "Dadra and Nagar Haveli and Daman and Diu", "Department of Social Justice and Empowerment", "Transfers to States", "Ministry of Personnel, Public Grievances and Pensions", "Ministry of Food Processing Industries", "Ministry of Planning") | |
changelog <- | |
changelog %>% filter(.data$`Deparment/Ministry` %in% all_departments) %>% mutate( | |
key = paste0( | |
.data$`Deparment/Ministry`, | |
'-', | |
.data$`Sub-allocation`, | |
'-', | |
.data$`Sub-sub-allocation` | |
) | |
) | |
all_unique_keys <- unique(changelog$key) | |
# data_key <- 'Ministry of Planning-Other General Economic Services-Total Other General Economic Services' | |
create_log <- function(data_key){ | |
key_df <- changelog %>% filter(.data$key == data_key) | |
rows_to_change <- nrow(key_df) | |
if(rows_to_change%%2 == 0){ | |
base_rows <- rows_to_change/2 | |
base_df <- key_df[1:base_rows,] | |
base_df[(is.na(base_df))] <- 0 | |
updated_df <- key_df[base_rows+1:nrow(key_df),] | |
updated_df <- updated_df[!is.na(updated_df$`Deparment/Ministry`),] | |
updated_cols <- updated_df[,c(4:7)] | |
updated_cols[] <- sapply(updated_cols, as.character) | |
updated_cols[(is.na(updated_cols))] <- "" | |
names(updated_cols)[] <- paste0(names(updated_cols)[],"_updated") | |
} else { | |
base_df <- key_df[1:rows_to_change,] | |
base_df[(is.na(base_df))] <- 0 | |
updated_cols <- data.frame( | |
"Actuals (2018-19)_updated" = "", | |
"Budget Estimates (2019-20)_updated" = "", | |
"Revised Estimates (2019-20)_updated" = "", | |
"Budget Estimates (2020-21)_updated" = "",check.names = FALSE | |
) | |
} | |
updated_base <- dplyr::bind_cols(base_df, updated_cols) | |
return(updated_base) | |
} | |
build_changelog <- lapply(all_unique_keys, create_log) %>% dplyr::bind_rows() | |
build_changelog$key <- NULL | |
# Export processed changelog | |
readr::write_csv(build_changelog, col_names = TRUE, path = file_export_path) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Details here: https://medium.com/civicdatalab/inconsistencies-within-the-union-budget-2020-5961d74ec98e