Last active
August 25, 2024 05:33
-
-
Save Tadge-Analytics/b9377455891f1f02750dc0c3681f1063 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
--- | |
title: "Expense Analyser" | |
output: | |
flexdashboard::flex_dashboard: | |
navbar: | |
- { title: "About", href: "https://www.tadge-analytics.com.au", align: right } | |
runtime: shiny | |
--- | |
```{r setup, include=FALSE} | |
library(tidyverse) | |
library(plotly) | |
library(lubridate) | |
library(openxlsx) | |
library(DT) | |
library(flexdashboard) | |
# library(rhandsontable) | |
library(shinyWidgets) | |
library(googlesheets4) | |
sheets_auth(email = "[email protected]") | |
# might need to update the widgets where possible | |
# shinyWidgets::shinyWidgetsGallery() | |
``` | |
Transactions {data-orientation=rows} | |
===================================== | |
SideBar {.sidebar} | |
--- | |
```{r} | |
# change this to a slider | |
fileInput("cba", | |
"CBA", | |
multiple = FALSE, | |
accept = c("text/csv", | |
"text/comma-separated-values, text/plain", | |
".csv")) | |
fileInput("ing", | |
"ING", | |
multiple = FALSE, | |
accept = c("text/csv", | |
"text/comma-separated-values, text/plain", | |
".csv")) | |
# fileInput("up_bank", | |
# "UpBank", | |
# multiple = FALSE, | |
# accept = c("text/csv", | |
# "text/comma-separated-values, text/plain", | |
# ".csv")) | |
fileInput("paypal", | |
"Paypal", | |
multiple = FALSE, | |
accept = c("text/csv", | |
"text/comma-separated-values, text/plain", | |
".csv")) | |
# | |
# | |
# fileInput("cash", | |
# "Cash", | |
# multiple = FALSE, | |
# accept = c("text/csv", | |
# "text/comma-separated-values, text/plain", | |
# ".csv")) | |
# | |
# | |
# fileInput("manual", | |
# "Manual entry", | |
# multiple = FALSE, | |
# accept = c("text/csv", | |
# "text/comma-separated-values, text/plain", | |
# ".csv")) | |
############################################## | |
# use these below by default | |
fileInput("categorisation_file", | |
"Retrieve from Excel", | |
multiple = FALSE, | |
accept = c(".xlsx")) | |
downloadHandler( | |
filename = function() { | |
"Categorisations done.xlsx" | |
}, | |
content = function(file) { | |
my_workbook <- createWorkbook() | |
addWorksheet( | |
wb = my_workbook, | |
sheetName = "Categorisations" | |
) | |
writeDataTable( | |
my_workbook, | |
sheet = "Categorisations", | |
new_and_already_done_row_items(), | |
tableStyle = "TableStyleMedium2", withFilter = T | |
) | |
setColWidths(my_workbook, "Categorisations", cols = c(1:7), widths = c(8,14,12,74,20,20,20)) | |
addWorksheet( | |
wb = my_workbook, | |
sheetName = "Categories" | |
) | |
writeDataTable( | |
my_workbook, | |
sheet = "Categories", | |
keywords_newly_created(), | |
tableStyle = "TableStyleMedium2", withFilter = T | |
) | |
setColWidths(my_workbook, "Categories", cols = c(1:4), widths = c(22)) | |
addWorksheet( | |
wb = my_workbook, | |
sheetName = "forecast rates" | |
) | |
writeDataTable( | |
my_workbook, | |
sheet = "forecast rates", | |
rates_for_saving(), | |
tableStyle = "TableStyleMedium2", withFilter = T | |
) | |
setColWidths(my_workbook, "forecast rates", cols = c(1:9), widths = c(22)) | |
addWorksheet( | |
wb = my_workbook, | |
sheetName = "forecast values" | |
) | |
writeDataTable( | |
my_workbook, | |
sheet = "forecast values", | |
forecast_table_for_saving() %>% | |
mutate(Amount = round(Amount, 2)), | |
tableStyle = "TableStyleMedium2", withFilter = T | |
) | |
setColWidths(my_workbook, "forecast values", cols = c(1:7), widths = c(15,15,30,20,12,30,30)) | |
saveWorkbook(my_workbook, file) | |
}) | |
br() | |
br() | |
#################################### | |
# add a toggle | |
materialSwitch("use_google_sheets", "Source from Googlesheets", status = "success") | |
textInput("googlesheets_id_entry", | |
"Googlesheets ID", | |
value = "19t1yTFn3k7GL2FFPCSbW8Of98P4fItp9wDLUF9b6paM") | |
actionButton("retrieve_from_google_sheets", "Retrieve (from GS)", icon = icon("download", "fa")) | |
br() | |
br() | |
amount_col_name <- as.symbol("Amount (if different going forward)") | |
reference_date_col_name <- as.symbol("A reference date -past or future") | |
limit_prev_col_name <- as.symbol("Irregular schedule transaction limit for forecasting") | |
blank_rates_table <- tibble(`Category Level1` = NA_character_, | |
`Category Level2` = NA_character_, | |
`Category Level3` = NA_character_, | |
`No plans for it to reoccur` = NA_integer_, | |
!!limit_prev_col_name := NA_integer_, | |
`Shedule|Number` = NA_integer_, | |
`Shedule|Timeframe` = NA_character_, | |
!!amount_col_name := NA_integer_, | |
!!reference_date_col_name := NA_integer_) | |
# upload button | |
actionButton("upload_to_google_sheets", "Upload (to GS)", icon = icon("upload", "fa")) | |
# upload back to google sheets | |
observeEvent( | |
input$upload_to_google_sheets, { | |
req(new_and_already_done_row_items()) # take this object and upload it back to googlesheets | |
req(keywords_newly_created()) | |
req(rates_for_saving()) | |
req(forecast_table_for_saving()) | |
if (nrow(new_and_already_done_row_items()) == 0) { | |
googlesheets4::write_sheet( | |
tibble( | |
data_source = NA_character_, | |
Date = NA_character_, | |
Amount = NA_character_, | |
Description = NA_character_, | |
Keyword = NA_character_, | |
`Suggested Category` = NA_character_, | |
`Category Level1` = NA_character_) %>% | |
mutate(Date = as.Date(Date), | |
Amount = as.numeric(Amount)), | |
ss = input$googlesheets_id_entry, sheet = "Categorisations") | |
} else { | |
googlesheets4::write_sheet( new_and_already_done_row_items(), ss = input$googlesheets_id_entry, sheet = "Categorisations") | |
} | |
if (nrow(keywords_newly_created()) == 0) { | |
googlesheets4::write_sheet( | |
tibble(type = NA_character_, | |
Keyword = NA_character_, | |
`Category Level1` = NA_character_, | |
warning = NA_character_), | |
ss = input$googlesheets_id_entry, sheet = "Categories") | |
} else { | |
googlesheets4::write_sheet( keywords_newly_created(), ss = input$googlesheets_id_entry, sheet = "Categories") | |
} | |
if (nrow(rates_for_saving()) == 0) { | |
googlesheets4::write_sheet( | |
blank_rates_table, | |
ss = input$googlesheets_id_entry, sheet = "forecast rates") | |
} else { | |
googlesheets4::write_sheet( rates_for_saving(), ss = input$googlesheets_id_entry, sheet = "forecast rates") | |
} | |
if (nrow(forecast_table_for_saving()) == 0) { | |
googlesheets4::write_sheet(tibble( | |
data_source = NA_character_, | |
Date = NA_character_, | |
Amount = NA_character_, | |
Description = NA_character_, | |
Keyword = NA_character_, | |
`Suggested Category` = NA_character_, | |
`Category Level1` = NA_character_, | |
`Category Level2` = NA_character_, | |
`Category Level3` = NA_character_), | |
ss = input$googlesheets_id_entry, sheet = "forecast values") } else { | |
googlesheets4::write_sheet( forecast_table_for_saving(), ss = input$googlesheets_id_entry, sheet = "forecast values") | |
} | |
}) | |
# switch for only Cats done table | |
# to make upload / download faster | |
# googlesheets4::write_sheet(new_and_already_done_row_items(), ss = input$googlesheets_id_entry, sheet = "Categorisations") | |
# observeEvent( | |
# input$upload_to_google_sheets, { | |
# req(new_and_already_done_row_items()) | |
# googlesheets4::write_sheet(new_and_already_done_row_items(), ss = input$googlesheets_id_entry, sheet = "Categorisations") | |
# }) | |
``` | |
```{r} | |
blank_table <- tibble(Date = NA_character_, | |
Amount = NA_character_, | |
Description = NA_character_, | |
data_source = NA_character_) %>% | |
mutate(Date = as.Date(Date), | |
Amount = as.numeric(Amount)) %>% | |
slice(0) | |
# cba | |
cba <- reactive(if (is.null(input$cba)){ | |
blank_table | |
} else { | |
read_csv(input$cba$datapath, col_names = F) %>% | |
rename(Date = 1, Amount = 2, Description = 3, Balance = 4) %>% | |
mutate(data_source = "cba", | |
Date = dmy(Date)) %>% | |
select(-Balance) | |
}) | |
# ing | |
ing <- reactive(if (is.null(input$ing)){ | |
blank_table | |
} else { | |
read_csv(input$ing$datapath) %>% | |
mutate(Credit = as.numeric(Credit), | |
Debit = as.numeric(Debit), | |
Amount = coalesce(Credit, Debit), | |
data_source = "ing", | |
Date = dmy(Date)) %>% | |
select(-Credit, -Debit, -Balance) | |
}) | |
# paypal | |
paypal <- reactive(if (is.null(input$paypal)){ | |
blank_table | |
} else { | |
paypal_import <- read_csv(input$paypal$datapath) %>% | |
mutate(data_source = "paypal", | |
Date = dmy(Date)) %>% | |
rename(Description = Name) %>% | |
select(-Balance, -`Time zone`, -`Receipt ID`) | |
# work out international payments aud amounts | |
aud_amounts <- paypal_import %>% | |
filter(Currency != "AUD" | Type == "General Currency Conversion") %>% | |
filter(is.na(Description)) %>% | |
filter(Currency == "AUD") %>% # these are the AUD values for above | |
select(Date, Time, Currency, Amount) | |
# work out international payments desciprtions | |
descriptions_with_aud_amounts <- paypal_import %>% | |
filter(Currency != "AUD" | Type == "General Currency Conversion") %>% | |
filter(!is.na(Description)) %>% | |
select(-Amount, -Currency) %>% | |
left_join(aud_amounts, by = c("Date", "Time")) %>% | |
distinct(Date, Time, .keep_all = T) | |
# combine non-international with internationals | |
paypal_import %>% | |
filter(!(Currency != "AUD" | Type == "General Currency Conversion")) %>% | |
filter(Type != "Transfer to PayPal account") %>% | |
filter(Description != "PayPal") %>% | |
bind_rows(descriptions_with_aud_amounts) %>% | |
mutate(Description = paste0(Description, " (", Type, ")")) %>% | |
select(-Time, -Currency, -Type, -Status) | |
}) | |
# up_bank | |
# | |
# up_bank <- reactive(if (is.null(input$up_bank)){ | |
# | |
# blank_table | |
# | |
# } else { | |
# | |
# read_csv(input$up_bank$datapath) %>% | |
# transmute(Date = as.Date(Time), | |
# Description = paste0(Payee, "|", `Transaction Type`), | |
# Amount = `Total (AUD)`, | |
# data_source = "upbank") | |
# | |
# }) | |
# cash | |
# | |
# cash <- reactive(if (is.null(input$cash)){ | |
# | |
# blank_table | |
# | |
# } else { | |
# | |
# read_csv(input$cba$datapath) %>% | |
# mutate(Date = dmy(Date), | |
# data_source = "cash") | |
# | |
# }) | |
# manual entry | |
# | |
# manual <- reactive(if (is.null(input$manual)){ | |
# | |
# blank_table | |
# | |
# } else { | |
# | |
# read_csv(input$manual$datapath) %>% | |
# mutate(Date = dmy(Date), | |
# data_source = "manual") | |
# }) | |
# combine all transaction sources | |
combined_import1 <- reactive(bind_rows(cba(), | |
ing(), | |
paypal(), | |
# cash(), | |
# up_bank() | |
)) | |
# create seperate rows for cashouts that happened with purchases | |
purchase_cash_outs <- reactive(combined_import1() %>% | |
filter(str_detect(Description, "Cash Out")) %>% | |
{if (nrow(.) == 0) {.} else { | |
mutate(., row_id = row_number()) %>% | |
uncount(2) %>% | |
group_by(row_id) %>% | |
mutate(cash_out_or_purchase = c("cash_out", "purchase")) %>% | |
ungroup() %>% | |
separate(Description, "\\$", into = c("cash_out_amount", "second"), remove = F, extra = "merge") %>% | |
separate(second, "\\.", into = c("cash_out_amount", "second"), extra = "merge") %>% | |
mutate(cash_out_amount = if_else(is.na(cash_out_amount), Amount, -as.numeric(cash_out_amount)), | |
purchase_amount = Amount - cash_out_amount, | |
Amount = if_else(cash_out_or_purchase == "cash_out", cash_out_amount, purchase_amount), | |
Description = paste0("(", cash_out_or_purchase, ") ", Description)) %>% | |
select(Date, Description, Amount, data_source) %>% | |
filter(Amount != 0) | |
}}) | |
combined_import <- reactive(bind_rows(combined_import1() %>% filter(!str_detect(Description, "Cash Out")), | |
purchase_cash_outs())) | |
blank_categorisations_done <- tibble( | |
data_source = NA_character_, | |
Date = NA_character_, | |
Amount = NA_character_, | |
Description = NA_character_, | |
Keyword = NA_character_, | |
`Suggested Category` = NA_character_, | |
`Category Level1` = NA_character_) %>% | |
mutate(Date = as.Date(Date), | |
Amount = as.numeric(Amount)) %>% | |
slice(0) | |
# retrieval of the googlesheets data | |
google_sheet_cats_done_data <- eventReactive(c(input$retrieve_from_google_sheets, | |
input$retrieve_from_google_sheets_2), { | |
if (input$retrieve_from_google_sheets == 0) {NULL} else { | |
read_sheet(ss = input$googlesheets_id_entry, sheet = "Categorisations", col_types = "c") %>% | |
mutate(Date = as.Date(Date), | |
Amount = as.numeric(Amount), | |
`Suggested Category` = as.character(`Suggested Category`), | |
`Category Level1` = as.character(`Category Level1`), | |
`Category Level1` = if_else(`Category Level1` == "1", `Suggested Category`, `Category Level1`), | |
Keyword = trimws(toupper(Keyword)), | |
`Category Level1` = trimws(toupper(`Category Level1`)))}}) | |
# make this run only when there are values for input$categorisation_file | |
excel_cats_done_data <- reactive( | |
if (is.null(input$categorisation_file)) {NULL} else { | |
readxl::read_excel(input$categorisation_file$datapath, sheet = "Categorisations", col_types = "text") %>% | |
mutate(Date = as.Date(as.numeric(Date), origin = "1899-12-30"), | |
Amount = as.numeric(Amount), | |
`Category Level1` = if_else(`Category Level1` == "1", `Suggested Category`, `Category Level1`), | |
Keyword = trimws(toupper(Keyword)), | |
`Category Level1` = trimws(toupper(`Category Level1`))) | |
}) | |
# logic for assigning object(s) | |
categorisations_done_upload <- reactive( | |
if (is.null(input$categorisation_file) & input$retrieve_from_google_sheets == 0) { | |
blank_categorisations_done} else { | |
if (!is.null(input$categorisation_file) & !input$use_google_sheets) { | |
excel_cats_done_data() | |
} else { | |
google_sheet_cats_done_data() | |
}}) | |
blank_categories_data <- tibble( | |
Keyword = NA_character_, | |
`Category Level1` = NA_character_) %>% | |
slice(0) | |
excel_categories_data <- reactive( | |
if (is.null(input$categorisation_file)) {NULL} else { | |
readxl::read_excel(input$categorisation_file$datapath, sheet = "Categories", col_types = "text") %>% | |
filter(!is.na(Keyword)) %>% | |
select(-type) %>% | |
mutate(Keyword = trimws(toupper(Keyword)), | |
`Category Level1` = trimws(toupper(`Category Level1`))) | |
}) | |
googlesheets_categories_data <- eventReactive(c(input$retrieve_from_google_sheets, | |
input$retrieve_from_google_sheets_2), { | |
if (input$retrieve_from_google_sheets == 0) {NULL} else { | |
read_sheet(ss = input$googlesheets_id_entry, sheet = "Categories", col_types = "c") %>% | |
filter(!is.na(Keyword)) %>% | |
select(-type) %>% | |
mutate(Keyword = trimws(toupper(Keyword)), | |
`Category Level1` = trimws(toupper(`Category Level1`)))}}) | |
categories_upload <- reactive( | |
if (is.null(input$categorisation_file) & input$retrieve_from_google_sheets == 0) { | |
blank_categories_data} else { | |
if (!is.null(input$categorisation_file) & !input$use_google_sheets) { | |
excel_categories_data() | |
} else { | |
googlesheets_categories_data() | |
}}) | |
excel_rates_table <- reactive( | |
if (is.null(input$categorisation_file)) {NULL} else { | |
readxl::read_excel(input$categorisation_file$datapath, sheet = "forecast rates", col_types = "text") %>% | |
mutate(`Category Level1` = trimws(toupper(`Category Level1`)), | |
`Category Level2` = trimws(toupper(`Category Level2`)), | |
`Category Level3` = trimws(toupper(`Category Level3`)))}) | |
googlesheets_rates_table <- eventReactive(c(input$retrieve_from_google_sheets, | |
input$retrieve_from_google_sheets_2), { | |
if (input$retrieve_from_google_sheets == 0) {NULL} else { | |
read_sheet(ss = input$googlesheets_id_entry, sheet = "forecast rates", col_types = "c") %>% | |
mutate(`Category Level1` = trimws(toupper(`Category Level1`)), | |
`Category Level2` = trimws(toupper(`Category Level2`)), | |
`Category Level3` = trimws(toupper(`Category Level3`)))}}) | |
rates_import <- reactive( | |
if (is.null(input$categorisation_file) & input$retrieve_from_google_sheets == 0) { | |
blank_rates_table} else { | |
if (!is.null(input$categorisation_file) & !input$use_google_sheets) { | |
excel_rates_table() | |
} else { | |
googlesheets_rates_table() | |
}}) | |
# process uploaded files | |
combine_transactions_with_historical <- function(combined_import, | |
categorisations_done_upload, | |
categories_upload) { | |
# preparing the update of the keywords + categories table (second sheet) | |
# still present in cats done | |
existing_keywords <- categories_upload %>% | |
distinct(Keyword) %>% | |
semi_join(categorisations_done_upload, by = "Keyword") %>% | |
mutate(type = "existing keyword") | |
# No longer present in cats done sheet? | |
historical_keywords <- categories_upload %>% | |
distinct(Keyword) %>% | |
anti_join(categorisations_done_upload, by = "Keyword") %>% | |
mutate(type = "historical keyword") | |
# new (only in cats done records) | |
new_keywords <- categorisations_done_upload %>% | |
distinct(Keyword) %>% | |
filter(!is.na(Keyword)) %>% | |
anti_join(categories_upload, by = "Keyword") %>% | |
mutate(type = "new keyword") | |
initial_all_keywords <- bind_rows(existing_keywords, | |
historical_keywords, | |
new_keywords) %>% | |
arrange(Keyword) | |
initial_previous_keywords <- initial_all_keywords %>% | |
distinct(Keyword) %>% | |
group_by(Keyword) %>% | |
{if (nrow(.) == 0) mutate(., test = NA) else crossing(., test = .$Keyword)} %>% | |
{if (nrow(.) == 1) mutate(., contained = NA) else | |
filter(., Keyword != test) %>% | |
mutate(contained = str_detect(test, fixed(Keyword)))} %>% | |
mutate(seconded = if_else( | |
any(contained) | | |
str_length(Keyword) < 4, | |
1, | |
0)) %>% | |
select(-test, -contained) %>% | |
distinct(Keyword, .keep_all = T) # we don't want to have keywords with multiple categories | |
# what are going to be the suggested categories for each keyword | |
cats_done_with_level1 <- | |
categorisations_done_upload %>% | |
filter(!is.na(`Category Level1`)) %>% | |
count(Keyword, `Category Level1`) %>% | |
group_by(Keyword) %>% | |
mutate(percent_of_total = n/sum(n)) %>% | |
arrange(desc(percent_of_total)) %>% | |
slice(1) %>% | |
mutate(warning = if_else(percent_of_total != 1, "Multiple Categories", "Single Category")) %>% | |
select(-n, -percent_of_total) | |
categories_with_level1 <- | |
categories_upload %>% | |
filter(!is.na(`Category Level1`)) %>% | |
group_by(Keyword) %>% | |
mutate(total_rows = n()) %>% | |
slice(1) %>% # take the one with the level 2, if it exists | |
mutate(warning = if_else(total_rows != 1, "Multiple Categories", "Single Category")) %>% | |
select(-total_rows) %>% | |
bind_rows(cats_done_with_level1) %>% | |
distinct(Keyword, .keep_all = T) %>% | |
ungroup() | |
all_keywords <- initial_all_keywords %>% | |
select(type, Keyword) %>% | |
left_join(categories_with_level1, by = "Keyword") | |
previous_keywords <- initial_previous_keywords %>% | |
left_join(categories_with_level1 %>% | |
transmute(Keyword, `Suggested Category` = `Category Level1`) | |
, by = "Keyword") | |
rm(list=setdiff(ls(), c("all_keywords", # previously established Level2 input | |
"previous_keywords", # keywords used for detection in the new | |
"combined_import", # imported new transactions | |
"categorisations_done_upload" # the imported cats done | |
))) | |
## 3. detect presence of keywords.R (~110 lines) ---- | |
# categorisations which have had cats defined | |
done_and_have_defined_cats <- categorisations_done_upload %>% | |
filter(!is.na(`Category Level1`)) | |
# categorisations with no cats defined | |
done_but_no_defined_cats <- categorisations_done_upload %>% | |
filter(is.na(`Category Level1`)) | |
# do any of our transaction descriptions include "category keywords" | |
items_with_keyword_options <- combined_import %>% | |
anti_join(done_and_have_defined_cats, | |
by = c("Date", "Amount", "Description", "data_source")) %>% | |
# remove any that already appear in the cats done document | |
# but weren't defined yet, as there may have been details added | |
anti_join(done_but_no_defined_cats, | |
by = c("Date", "Amount", "Description", "data_source")) %>% | |
bind_rows(done_but_no_defined_cats %>% | |
select(-Keyword, -`Suggested Category`, -`Category Level1`)) %>% # because we are going to be re-estimating these | |
mutate(row_id = row_number()) %>% # we want to be careful not to duplicate rows in this process | |
{if(nrow(previous_keywords) == 0) | |
mutate(., Keyword = "~~~test", seconded = NA) else # need to put in a "something" which will certainly not be detected | |
crossing(., previous_keywords %>% select(Keyword, seconded))} %>% | |
mutate(contains_test = str_detect(toupper(Description), fixed(Keyword)), | |
contains_test = if_else(is.na(contains_test), FALSE, contains_test)) | |
# no keyword matches | |
no_keyword_matches <- items_with_keyword_options %>% | |
group_by(row_id) %>% | |
filter(!any(contains_test == TRUE)) %>% | |
distinct(row_id, .keep_all = T) %>% | |
ungroup() %>% | |
select(-Keyword, -seconded, -contains_test, -row_id) | |
# received keyword matches | |
keyword_match <- items_with_keyword_options %>% | |
filter(contains_test == TRUE) %>% | |
select(-contains_test) %>% | |
group_by(row_id) %>% | |
mutate(count = n(), | |
sum_of_seconded = sum(seconded, na.rm = T), | |
no_worries = count - sum_of_seconded == 1) | |
# single keyword match | |
single_keyword_match <- keyword_match %>% | |
filter(count == 1) %>% | |
ungroup() %>% | |
select(-no_worries, -seconded, -count, -sum_of_seconded, -row_id) | |
# Has mutiple keyword matches | |
# only 1 non-Seconded | |
mutiple_keyword_matches_but_1_good <- keyword_match %>% | |
filter(count > 1 & no_worries & seconded != 1) %>% | |
ungroup() %>% | |
select(-no_worries, -seconded, -count, -sum_of_seconded, -row_id) | |
# no clear winning match | |
# (i.e. multiple secondeds or multiple non-secondeds) | |
# in theory this should be rare | |
# here... add in:: pick the longest keyword.... | |
no_clear_winning_match <- keyword_match %>% | |
filter(count > 1 & !no_worries) %>% | |
select(-no_worries) %>% | |
distinct(row_id, .keep_all = T) %>% | |
ungroup() %>% | |
mutate(Keyword = "_Multiple_") %>% | |
select(-seconded, -count, -sum_of_seconded, -row_id) | |
# append them all together | |
new_and_already_done_row_items <- | |
bind_rows(no_keyword_matches, # bind all the new rows with their relevant detected keywords | |
single_keyword_match, | |
mutiple_keyword_matches_but_1_good, | |
no_clear_winning_match) %>% | |
left_join(previous_keywords %>% | |
select(Keyword, `Suggested Category`), | |
by = "Keyword") %>% | |
# then I connect these with all the other data | |
# i.e the previously categorised in cats done | |
bind_rows(categorisations_done_upload %>% | |
filter(!is.na(`Category Level1`))) %>% | |
arrange(desc(Date)) %>% | |
select(data_source, Date, Amount, Description, `Suggested Category`, Keyword, `Category Level1`, everything()) | |
rm(list=setdiff(ls(), c("all_keywords", | |
"new_and_already_done_row_items"))) | |
return(new_and_already_done_row_items) | |
} | |
# process uploaded files | |
generate_keywords_sheet <- function(categorisations_done_upload, | |
categories_upload) { | |
# preparing the update of the keywords + categories table (second sheet) | |
# still present in cats done | |
existing_keywords <- categories_upload %>% | |
distinct(Keyword) %>% | |
semi_join(categorisations_done_upload, by = "Keyword") %>% | |
mutate(type = "existing keyword") | |
# No longer present in cats done sheet? | |
historical_keywords <- categories_upload %>% | |
distinct(Keyword) %>% | |
anti_join(categorisations_done_upload, by = "Keyword") %>% | |
mutate(type = "historical keyword") | |
# new (only in cats done records) | |
new_keywords <- categorisations_done_upload %>% | |
distinct(Keyword) %>% | |
filter(!is.na(Keyword)) %>% | |
anti_join(categories_upload, by = "Keyword") %>% | |
mutate(type = "new keyword") | |
initial_all_keywords <- | |
bind_rows(existing_keywords, | |
historical_keywords, | |
new_keywords) %>% | |
arrange(Keyword) | |
# what are going to be the suggested categories for each keyword | |
cats_done_with_level1 <- | |
categorisations_done_upload %>% | |
filter(!is.na(`Category Level1`)) %>% | |
count(Keyword, `Category Level1`) %>% | |
group_by(Keyword) %>% | |
mutate(percent_of_total = n/sum(n)) %>% | |
arrange(desc(percent_of_total)) %>% | |
slice(1) %>% | |
mutate(warning = if_else(percent_of_total != 1, "Multiple Categories", "Single Category")) %>% | |
select(-n, -percent_of_total) | |
categories_with_level1 <- | |
categories_upload %>% | |
filter(!is.na(`Category Level1`)) %>% | |
group_by(Keyword) %>% | |
mutate(total_rows = n()) %>% | |
slice(1) %>% # take the one with the level 2, if it exists | |
mutate(warning = if_else(total_rows != 1, "Multiple Categories", "Single Category")) %>% | |
select(-total_rows) %>% | |
bind_rows(cats_done_with_level1) %>% | |
distinct(Keyword, .keep_all = T) %>% | |
ungroup() | |
all_keywords <- initial_all_keywords %>% | |
select(type, Keyword) %>% | |
left_join(categories_with_level1, by = "Keyword") | |
return(all_keywords) | |
} | |
############################################################ | |
# Forecasting functions | |
generate_rates_table <- function(new_and_already_done_row_items, rates_import, forecast_horison_days = 370) { | |
amount_col_name <- as.symbol("Amount (if different going forward)") | |
reference_date_col_name <- as.symbol("A reference date -past or future") | |
limit_prev_col_name <- as.symbol("Irregular schedule transaction limit for forecasting") | |
# get columns to the right type and add forcast horizon | |
# based on the latest date from all the transactions | |
rates_import %>% | |
right_join( | |
new_and_already_done_row_items %>% | |
distinct(`Category Level1`) %>% | |
filter(`Category Level1` != "IGNORE") | |
, by = "Category Level1") %>% | |
distinct(`Category Level1`, .keep_all = T) %>% | |
mutate(`Shedule|Number` = as.numeric(`Shedule|Number`), | |
`No plans for it to reoccur` = as.numeric(`No plans for it to reoccur`), | |
!!reference_date_col_name := as.Date(as.numeric(!!reference_date_col_name), origin = "1899-12-30"), | |
!!amount_col_name := as.numeric(!!amount_col_name), | |
`Shedule|Number` = if_else(is.na(`Shedule|Number`), 1, `Shedule|Number`), | |
forecast_horizon_final_date = max(new_and_already_done_row_items$Date) + forecast_horison_days) | |
} | |
regenerate_schedule_table <- function(rates, forecast_horison_days = 370) { | |
# certain forecast values will have to only utilise a set number or | |
# (from a certain date) of historical transactions | |
# to determine their on-going rates. | |
amount_col_name <- as.symbol("Amount (if different going forward)") | |
reference_date_col_name <- as.symbol("A reference date -past or future") | |
limit_prev_col_name <- as.symbol("Irregular schedule transaction limit for forecasting") | |
limiting_transactions <- rates %>% | |
filter(!is.na(!!limit_prev_col_name)) %>% | |
mutate(what_is_it = case_when(is.na(as.numeric(!!limit_prev_col_name)) ~ "not a number", | |
as.numeric(!!limit_prev_col_name) <= 1000 ~ "less than 1000", | |
as.numeric(!!limit_prev_col_name) > 1000 ~ "greater then 1000", | |
TRUE ~ "other"), | |
limit_by_date = case_when(what_is_it == "not a number" ~ dmy(!!limit_prev_col_name), | |
what_is_it == "greater then 1000" ~ as.Date(as.numeric(!!limit_prev_col_name), origin = "1899-12-30")), | |
limit_by_last_transactions = if_else(what_is_it == "less than 1000", as.numeric(!!limit_prev_col_name), NA_real_)) %>% | |
select(`Category Level1`, limit_by_last_transactions, limit_by_date) | |
rates %>% | |
left_join( | |
limiting_transactions %>% | |
mutate(replacement_col = if_else(is.na(limit_by_last_transactions), format(limit_by_date, "%d/%m/%Y"), as.character(limit_by_last_transactions))) %>% | |
select(`Category Level1`, replacement_col) | |
, by = "Category Level1") %>% | |
{if (nrow(.) == 0) {.} else {mutate(., !!limit_prev_col_name := if_else(!is.na(replacement_col), replacement_col, !!limit_prev_col_name))}} %>% | |
select(-replacement_col, -forecast_horizon_final_date) %>% | |
mutate(`Shedule|Number` = if_else(`Shedule|Number` == 1, NA_real_, `Shedule|Number`)) %>% | |
arrange(`Shedule|Timeframe`, `No plans for it to reoccur`, `Category Level1`) | |
} | |
############################################################################## | |
create_forecast_values <- function(new_and_already_done_row_items, rates, forecast_horison_days = 370) { | |
# take all transactions | |
# filter out the IGNORE cats, exclude those cats that have been specified to not recur | |
# if the cat1 isn't entered, label it UNCATEGORISED | |
amount_col_name <- as.symbol("Amount (if different going forward)") | |
reference_date_col_name <- as.symbol("A reference date -past or future") | |
limit_prev_col_name <- as.symbol("Irregular schedule transaction limit for forecasting") | |
initial_summary <- new_and_already_done_row_items %>% | |
# don't think we need this anymore | |
mutate(`Category Level1` = case_when(is.na(`Category Level1`) & Amount < 0 ~ "UNCATEGORISED EXPENSE", | |
is.na(`Category Level1`) & Amount > 0 ~ "UNCATEGORISED INCOME", | |
TRUE ~ `Category Level1`)) %>% | |
# nor this.... | |
filter(`Category Level1` != "IGNORE") %>% | |
filter(data_source != "cash") %>% | |
anti_join(rates %>% filter(!is.na(`No plans for it to reoccur`)), by = "Category Level1") %>% | |
arrange(desc(Date)) %>% | |
group_by(`Category Level1`) %>% | |
mutate(min_date = min(Date), | |
max_date = max(Date), | |
days_between_min_and_max = as.numeric(max_date - min_date), | |
new_cat_level1 = if_else(days_between_min_and_max == 0, | |
"CAT1 BUCKET", `Category Level1`)) %>% | |
ungroup() | |
# specified forecasts that exist in the transactions | |
# restrict to only those with shedules entered, already. | |
# keep only the latest entry (by the distinct()) | |
specified_forecasts <- initial_summary %>% | |
inner_join(rates %>% filter(!is.na(`Shedule|Timeframe`)), by = "Category Level1") %>% | |
distinct(`Category Level1`, .keep_all = T) %>% | |
mutate(on_going_amount = if_else(!is.na(!!amount_col_name), !!amount_col_name, Amount), | |
latest_date = if_else(!is.na(!!reference_date_col_name), !!reference_date_col_name, Date), | |
latest_date = if_else(latest_date >= Date, latest_date, Date)) %>% | |
select(latest_date, `Category Level1`, `Shedule|Number`, `Shedule|Timeframe`, on_going_amount, forecast_horizon_final_date) %>% | |
# but what if our scheduled item didn't exist in the transactions? | |
# Let's make it so we can add these future schedules in | |
bind_rows( | |
rates %>% | |
filter(!is.na(!!reference_date_col_name)) %>% | |
anti_join(initial_summary, by = "Category Level1") %>% | |
mutate(latest_date = !!reference_date_col_name, | |
on_going_amount = !!amount_col_name) %>% | |
select(latest_date, `Category Level1`, `Shedule|Number`, `Shedule|Timeframe`, on_going_amount, forecast_horizon_final_date) | |
) %>% | |
mutate(Date = map2(latest_date + 1, forecast_horizon_final_date, ~seq.Date(.x, .y, 1))) %>% | |
unnest(Date) %>% | |
group_by(`Category Level1`) %>% | |
mutate(months_out = map2_int(latest_date, Date, ~length(seq(.x, .y, by = "month"))-1L), | |
total_days = row_number()) %>% | |
group_by(`Category Level1`, months_out) %>% | |
mutate(days_out = row_number()) %>% | |
group_by(`Category Level1`) %>% | |
mutate(weeks_out = total_days %/% 7, | |
days_out = if_else(months_out == 0, days_out, days_out - 1L), | |
day_in_week = total_days - weeks_out*7) %>% | |
mutate(keep_row = case_when(`Shedule|Timeframe` == "Year" ~ months_out == 12 & days_out == 0, | |
`Shedule|Timeframe` == "Month" ~ days_out == 0, | |
`Shedule|Timeframe` == "Week" ~ day_in_week == 0, | |
`Shedule|Timeframe` == "Day" ~ TRUE)) %>% | |
filter(keep_row) %>% | |
mutate(multiple_of_rate = row_number() %/% `Shedule|Number`) %>% | |
group_by(`Category Level1`, multiple_of_rate) %>% | |
mutate(keep_row = multiple_of_rate != 0 & row_number() == 1) %>% | |
filter(keep_row) %>% | |
ungroup() %>% | |
# here is where the cat levels are being restricted | |
select(Date, `Category Level1`, Amount = on_going_amount) %>% | |
mutate(data_source = "FORECAST", | |
Description = "FIXED SCHEDULE") | |
# certain forecast values will have to only utilise a set number or (from a certain date) of historical transactions | |
# to determine their on-going rates. | |
limiting_transactions <- rates %>% | |
filter(!is.na(!!limit_prev_col_name)) %>% | |
mutate(what_is_it = case_when(is.na(as.numeric(!!limit_prev_col_name)) ~ "not a number", | |
as.numeric(!!limit_prev_col_name) <= 1000 ~ "less than 1000", | |
as.numeric(!!limit_prev_col_name) > 1000 ~ "greater then 1000", | |
TRUE ~ "other"), | |
limit_by_date = case_when(what_is_it == "not a number" ~ dmy(!!limit_prev_col_name), | |
what_is_it == "greater then 1000" ~ as.Date(as.numeric(!!limit_prev_col_name), origin = "1899-12-30")), | |
limit_by_last_transactions = if_else(what_is_it == "less than 1000", as.numeric(!!limit_prev_col_name), NA_real_)) %>% | |
select(`Category Level1`, limit_by_last_transactions, limit_by_date) | |
# now let's handle those transactions that do not have schedules | |
# first filter out those transactions which the forecast is not going to be based on | |
grouped_amounts <- initial_summary %>% | |
anti_join(specified_forecasts, by = "Category Level1") %>% | |
left_join(limiting_transactions, by = "Category Level1") %>% # do I need to add a group_by here, for the row_number step below? | |
mutate(within_range_filter = case_when(!is.na(limit_by_date) ~ Date >= limit_by_date, | |
!is.na(limit_by_last_transactions) ~ row_number() <= limit_by_last_transactions, | |
TRUE ~ TRUE)) %>% | |
filter(within_range_filter) %>% | |
select(-within_range_filter) %>% | |
group_by(new_cat_level1) %>% | |
mutate(is_max_date = Date == max(Date)) %>% | |
summarise(total_spend_prior_to_latest_day = sum(Amount[is_max_date == F]), | |
min_date = min(Date), | |
max_date = max(Date), | |
days_between_min_and_max = as.numeric(max_date - min_date), | |
spend_per_day = total_spend_prior_to_latest_day/days_between_min_and_max) | |
total_for_bucket <- grouped_amounts %>% | |
filter(new_cat_level1 == "CAT1 BUCKET") %>% | |
select(new_cat_level1, spend_per_day) | |
# what are the overall contributions of each cat level 1 to the CAT1 Bucket? | |
bucket_cat1_proportions <- initial_summary %>% | |
anti_join(specified_forecasts, by = "Category Level1") %>% | |
filter(new_cat_level1 == "CAT1 BUCKET") %>% | |
group_by(new_cat_level1, `Category Level1`) %>% | |
summarise(amount_per_cat1 = sum(Amount)) %>% | |
left_join(total_for_bucket, by = "new_cat_level1") %>% | |
mutate(total_in_bucket = sum(amount_per_cat1), | |
proportion = amount_per_cat1/total_in_bucket, | |
proportional_spend_per_day = spend_per_day * proportion) %>% | |
ungroup() %>% | |
select(new_cat_level1, `Category Level1`, proportional_spend_per_day) | |
grouped_amounts %>% | |
left_join(bucket_cat1_proportions, by = "new_cat_level1") %>% | |
mutate(`Category Level1` = if_else(is.na(`Category Level1`), new_cat_level1, `Category Level1`), | |
Amount = if_else(is.na(proportional_spend_per_day), spend_per_day, proportional_spend_per_day)) %>% | |
mutate(data_source = "FORECAST", | |
Description = if_else(new_cat_level1 == "CAT1 BUCKET", "BUCKETED ESTIMATE", "CATEGORY ESTIMATE"), | |
max_of_all_dates = max(max_date), | |
Date = map2(max_date + 1, max_of_all_dates + forecast_horison_days, ~seq.Date(.x, .y, 1))) %>% | |
unnest(Date) %>% | |
select(data_source, Date, `Category Level1`, Description, Amount) %>% | |
bind_rows(specified_forecasts) %>% | |
arrange(Date) | |
} | |
generate_time_period_dates_table <- function(period_of_timeframe, timeframe_type, max_date) { | |
tibble(review_timeframe_options = period_of_timeframe, timeframe_type_options = timeframe_type) %>% | |
mutate(current_end_date = case_when(timeframe_type_options != 2 ~ max_date, | |
review_timeframe_options == 7 | review_timeframe_options == 14 ~ floor_date(max_date, "week", week_start = 1)-1, | |
review_timeframe_options == 30 ~ floor_date(max_date, "month")-1), | |
current_start_date = case_when(timeframe_type_options == 1 ~ current_end_date-(review_timeframe_options-1), | |
review_timeframe_options == 7 ~ floor_date(current_end_date, "week", week_start = 1), | |
review_timeframe_options == 14 ~ floor_date(current_end_date, "week", week_start = 1)-7, | |
review_timeframe_options == 30 ~ floor_date(current_end_date, "month")), | |
current_difference = as.integer(current_end_date - current_start_date), | |
comparison_end_date = if_else(timeframe_type_options != 3, current_start_date - 1, | |
case_when(review_timeframe_options == 7 ~ floor_date(current_start_date - 1, "week", week_start = 1) + current_difference, | |
review_timeframe_options == 14 ~ floor_date(current_start_date - 1, "week", week_start = 1)-7 + current_difference, | |
review_timeframe_options == 30 ~ floor_date(current_start_date - 1, "month") + current_difference)), | |
comparison_start_date = case_when(timeframe_type_options == 1 ~ comparison_end_date-(review_timeframe_options-1), | |
review_timeframe_options == 7 ~ floor_date(comparison_end_date, "week", week_start = 1), | |
review_timeframe_options == 14 ~ floor_date(comparison_end_date, "week", week_start = 1)-7, | |
review_timeframe_options == 30 ~ floor_date(comparison_end_date, "month")) | |
, comparison_difference = as.integer(comparison_end_date - comparison_start_date)) %>% | |
select(-review_timeframe_options, -timeframe_type_options) %>% | |
gather(key, value, -current_difference, -comparison_difference) | |
} | |
new_and_already_done_row_items <- | |
reactive( | |
combined_import() %>% | |
{if (is.null(input$categorisation_file) & input$retrieve_from_google_sheets == 0) | |
{ | |
mutate(., 'Suggested Category' = NA_character_, Keyword = NA_character_, `Category Level1` = NA_character_) | |
} else | |
{ | |
combine_transactions_with_historical(., categorisations_done_upload(), categories_upload()) | |
} %>% | |
select(data_source, everything()) | |
}) | |
keywords_newly_created <- | |
reactive( | |
generate_keywords_sheet(categorisations_done_upload(), | |
categories_upload())) | |
``` | |
Row | |
------------------------------------- | |
### # Transactions (by day) | |
```{r} | |
all_transactions_plot <- reactive(new_and_already_done_row_items() %>% | |
mutate(week_date = floor_date(Date, "week", week_start = 1), | |
week_and_source = paste0(week_date, "|", data_source)) %>% | |
count(Date, data_source) %>% | |
ggplot() + | |
aes(Date, n, fill = data_source, key = Date) + | |
geom_bar(stat = "identity") + | |
labs(x = NULL, y = NULL) + | |
{if (length(min_date_selection()) != 0) | |
{geom_bar(data = . %>% filter(Date < min_date_selection()[[1]]), stat = "identity", alpha = 0.4, fill = "white", show.legend = FALSE)} else {NULL}} | |
+ | |
{if (length(min_date_selection()) != 0) | |
{geom_vline(xintercept = as.numeric(as.Date(min_date_selection()[[1]])), linetype = "dashed")} else {NULL}}) | |
renderPlotly( | |
{if (nrow(new_and_already_done_row_items()) == 0) {NULL} else { | |
all_transactions_plot() %>% | |
ggplotly(source = "all_transactions_plot") %>% | |
config(displayModeBar = F) %>% | |
layout(xaxis=list(fixedrange=TRUE)) %>% | |
layout(yaxis=list(fixedrange=TRUE)) | |
}}) | |
``` | |
Row | |
------------------------------------- | |
### # Transactions (by week) | |
```{r} | |
min_date_selection <- reactive(event_data("plotly_click", source = "all_transactions_plot")["key"]) | |
line_chart_by_week_plot <- reactive( | |
if (nrow(new_and_already_done_row_items()) == 0) {NULL} else { | |
new_and_already_done_row_items() %>% | |
mutate(week_date = floor_date(Date, "week", week_start = 1), | |
week_and_source = paste0(week_date, "|", data_source)) %>% | |
{if (length(min_date_selection()) != 0) {filter(., Date >= min_date_selection()[[1]])} else {.}} %>% | |
mutate(Date = week_date) %>% | |
count(week_and_source, Date, data_source) %>% | |
ggplot() + | |
aes(Date, n, col = data_source) + | |
geom_line(stat = "identity") + | |
geom_point(aes(Date, n, col = data_source, key = week_and_source), stat = "identity") + # see if you can add a slight jitter, so dots on top of each otehr can be clicked | |
theme(legend.position = "none") + | |
labs(x = NULL, y = NULL) | |
}) | |
renderPlotly( | |
{if (nrow(new_and_already_done_row_items()) == 0) {NULL} else { | |
line_chart_by_week_plot() %>% | |
ggplotly(source = "line_chart_by_week") %>% | |
config(displayModeBar = F) %>% | |
layout(xaxis=list(fixedrange=TRUE)) %>% | |
layout(yaxis=list(fixedrange=TRUE)) | |
}}) | |
``` | |
### Transactions | |
```{r} | |
min_date_selection_for_transactions <- reactive(event_data("plotly_click", source = "line_chart_by_week")["key"]) | |
# if a click event in the second chart has happend... further restrict | |
# if Cats done dfile loaded, remove IGNORE rows | |
data_going_forward <- reactive( | |
new_and_already_done_row_items() %>% | |
mutate(week_date = floor_date(Date, "week", week_start = 1), | |
week_and_source = paste0(week_date, "|", data_source)) %>% | |
{if (length(min_date_selection()) != 0) {filter(., Date >= min_date_selection()[[1]])} else {.}} %>% | |
{if (!is.null(input$categorisation_file) | !input$retrieve_from_google_sheets == 0) | |
{filter(., is.na(`Category Level1`) | `Category Level1` != "IGNORE") %>% | |
left_join(rates_for_saving() %>% | |
select(`Category Level1`, `Category Level2`, `Category Level3`) | |
, by = "Category Level1") %>% | |
mutate(`Category Level1` = case_when(is.na(`Category Level1`) & Amount < 0 ~ "UNCATEGORISED EXPENSE", | |
is.na(`Category Level1`) & Amount > 0 ~ "UNCATEGORISED INCOME", | |
TRUE ~ `Category Level1`), | |
`Category Level2` = case_when(is.na(`Category Level2`) & Amount < 0 ~ "UNCATEGORISED EXPENSE", | |
is.na(`Category Level2`) & Amount > 0 ~ "UNCATEGORISED INCOME", | |
TRUE ~ `Category Level2`), | |
`Category Level3` = case_when(is.na(`Category Level3`) & Amount < 0 ~ "EXPENSE", | |
is.na(`Category Level3`) & Amount > 0 ~ "INCOME", | |
TRUE ~ `Category Level3`)) } else { | |
mutate(., `Category Level1` = case_when(Amount < 0 ~ "UNCATEGORISED EXPENSE", | |
Amount > 0 ~ "UNCATEGORISED INCOME"), | |
`Category Level2` = case_when(Amount < 0 ~ "UNCATEGORISED EXPENSE", | |
Amount > 0 ~ "UNCATEGORISED INCOME"), | |
`Category Level3` = case_when(Amount < 0 ~ "EXPENSE", | |
Amount > 0 ~ "INCOME")) | |
}} | |
) | |
table_for_first_DT <- reactive( | |
data_going_forward() %>% | |
{if (length(min_date_selection_for_transactions()) != 0) {filter(., week_and_source == min_date_selection_for_transactions()[[1]])} else {.}} %>% | |
select(Date, Description, Amount) %>% | |
arrange(desc(abs(Amount)))) | |
renderDataTable( | |
{if (nrow(new_and_already_done_row_items()) == 0) {NULL} else { | |
datatable(table_for_first_DT(), | |
rownames = F, | |
options = | |
list( | |
# dom = "t, | |
pageLength = 20, scrollY = "300px")) %>% | |
formatDate("Date", "toDateString") %>% | |
formatCurrency("Amount", digits = 0) %>% | |
formatStyle('Amount', | |
background = styleColorBar(table_for_first_DT()$Amount, 'steelblue'), | |
backgroundSize = '100% 90%', | |
backgroundRepeat = 'no-repeat', | |
backgroundPosition = 'center')}} | |
) | |
``` | |
Expenses | |
===================================== | |
SideBar {.sidebar} | |
--- | |
```{r} | |
br() | |
actionButton("retrieve_from_google_sheets_2", "Retrieve (from GS)", icon = icon("download", "fa")) | |
review_timeframe_options <- c("Week" = 7, "Fortnight" = 14, "Month or 30 days" = 30, "(All records)" = 0) | |
timeframe_type_options <- c("Latest" = 1, "Current (up till now)" = 3, "Last complete" = 2) | |
metric_options <- c("Weekly" = "avg_weekly_spend", "Daily" = "avg_daily_spend") | |
radioButtons("review_timeframe", | |
"Review timeframe", | |
choices = review_timeframe_options, | |
selected = review_timeframe_options[1]) | |
radioButtons("timeframe_type", | |
"Timeframe type", | |
choices = timeframe_type_options, | |
selected = timeframe_type_options[1]) | |
radioButtons("avg_metric_selection", | |
"Avg expenditure metric", | |
choices = metric_options, | |
selected = metric_options[1]) | |
br() | |
br() | |
br() | |
br() | |
Level3_options <- reactive( | |
if (nrow(new_and_already_done_row_items()) == 0) {"(All)"} else { | |
c("(All)", sort(unique(data_going_forward()$`Category Level3`)))} | |
) | |
renderUI( | |
radioButtons("Level3_options", | |
"Level3_options", | |
choices = Level3_options(), | |
selected = Level3_options()[1]) | |
) | |
Level2_options <- reactive(if (input$Level3_options == "(All)") { | |
data_going_forward() %>% | |
pull(`Category Level2`) %>% | |
unique() %>% | |
sort()} else { | |
data_going_forward() %>% | |
filter(`Category Level3` == input$Level3_options) %>% | |
pull(`Category Level2`) %>% | |
unique() %>% | |
sort() | |
}) | |
# this select all box can replace the one below.. | |
# just need to learn all the different options to make it look better. | |
# | |
# renderUI( | |
# if (nrow(new_and_already_done_row_items()) == 0) {NULL} else { | |
# | |
# pickerInput( | |
# inputId = "Level2_options", | |
# label = "Level2_options", | |
# choices = Level2_options(), | |
# options = list( | |
# `actions-box` = TRUE | |
# ), | |
# multiple = TRUE | |
# )}) | |
renderUI( | |
if (nrow(new_and_already_done_row_items()) == 0) {NULL} else { | |
actionLink("selectall_1","Select All") | |
}) | |
renderUI( | |
if (nrow(new_and_already_done_row_items()) == 0) {NULL} else { | |
checkboxGroupInput("Level2_options", | |
"Level2_options", | |
choices = Level2_options(), | |
selected = Level2_options()) | |
}) | |
observe({ | |
req(input$selectall_1) | |
req(Level2_options()) | |
if (input$selectall_1 == 0) {NULL} else {if (input$selectall_1 %% 2 == 0) { | |
updateCheckboxGroupInput(session, "Level2_options","Level2_options", choices = Level2_options(), selected = Level2_options()) | |
} else { | |
updateCheckboxGroupInput(session, "Level2_options","Level2_options", choices = Level2_options(), selected = NULL) | |
}} | |
}) | |
br() | |
Level1_options <- reactive(if (input$Level3_options == "(All)" & (length(input$Level2_options) == 0 | length(input$Level2_options) == length(Level2_options()))) { | |
data_going_forward() %>% | |
pull(`Category Level1`) %>% | |
unique() %>% | |
sort()} else { | |
data_going_forward() %>% | |
filter(., `Category Level2` %in% input$Level2_options) %>% | |
pull(`Category Level1`) %>% | |
unique() %>% | |
sort() | |
}) | |
renderUI( | |
if (nrow(new_and_already_done_row_items()) == 0) {NULL} else { | |
actionLink("selectall_2","Select All") | |
}) | |
renderUI( | |
if (nrow(new_and_already_done_row_items()) == 0) {NULL} else { | |
checkboxGroupInput("Level1_options", | |
"Level1_options", | |
choices = Level1_options(), | |
selected = Level1_options()) | |
}) | |
observe({ | |
req(input$selectall_2) | |
req(Level1_options()) | |
if (input$selectall_2 == 0) {NULL} else {if (input$selectall_2 %% 2 == 0) { | |
updateCheckboxGroupInput(session, "Level1_options","Level1_options", choices = Level1_options(), selected = Level1_options()) | |
} else { | |
updateCheckboxGroupInput(session, "Level1_options","Level1_options", choices = Level1_options(), selected = NULL) | |
}} | |
}) | |
``` | |
```{r} | |
# let's get the data all prepped | |
data_for_period <- reactive(data_going_forward() %>% | |
filter(is.na(`Category Level1`) | `Category Level1` != "INCOME") %>% | |
filter(data_source != "cash")) | |
period_of_timeframe <- reactive(as.numeric(input$review_timeframe)) | |
timeframe_type <- reactive(as.numeric(input$timeframe_type)) | |
max_date <- reactive(max(data_for_period()$Date)) | |
min_date <- reactive(min(data_for_period()$Date)) | |
dates_table_first <- reactive(generate_time_period_dates_table(period_of_timeframe(), timeframe_type(), max_date())) | |
dates_table <- reactive( | |
dates_table_first() %>% | |
select(-current_difference, -comparison_difference) %>% | |
deframe()) | |
cumulative_spending_table <- reactive( | |
data_for_period() %>% | |
{if (input$Level3_options == "(All)") {.} else | |
{filter(., `Category Level3` == input$Level3_options)} } %>% | |
{if (length(input$Level2_options) == 0 | length(input$Level2_options) == length(Level2_options())) {.} else | |
{filter(., `Category Level2` %in% input$Level2_options)} } %>% | |
{if (length(input$Level1_options) == 0 | length(input$Level1_options) == length(Level1_options())) {.} else | |
{filter(., `Category Level1` %in% input$Level1_options)} } %>% | |
group_by(Date) %>% | |
summarise(total_expenses = -sum(Amount)) %>% | |
ungroup() %>% | |
right_join(tibble(Date = seq.Date(min_date(), max_date(), by = "day")), by = "Date") %>% | |
arrange(Date) %>% | |
mutate(total_expenses = if_else(is.na(total_expenses), 0, total_expenses), | |
analysis_period = case_when(Date >= dates_table()["current_start_date"] & Date <= dates_table()["current_end_date"] ~ "current", | |
Date >= dates_table()["comparison_start_date"] & Date <= dates_table()["comparison_end_date"] ~ "comparison", | |
TRUE ~ "Other"), | |
cumulative_expenses = cumsum(total_expenses), | |
lag_expenses = lag(cumulative_expenses, period_of_timeframe()), | |
dif_from_lag_expenses = cumulative_expenses - lag_expenses, | |
timeframe = period_of_timeframe(), | |
avg_daily_spend = dif_from_lag_expenses/timeframe, | |
avg_weekly_spend = avg_daily_spend*7) | |
) | |
``` | |
Row {data-width=600} | |
------ | |
### Expenses | |
```{r} | |
daily_actuals_plot <- reactive( | |
cumulative_spending_table() %>% | |
ggplot() + | |
aes(Date, total_expenses, fill = analysis_period, label = total_expenses, key = Date) + | |
geom_bar(stat = "identity") + | |
labs(x = NULL, y = NULL) + | |
theme(legend.position = "none") | |
# ggrepel::geom_text_repel() # format as $, only show occaisional ones | |
) | |
renderPlotly( | |
{if (nrow(new_and_already_done_row_items()) == 0) {NULL} else { | |
daily_actuals_plot() %>% | |
ggplotly(source = "daily_actuals_plot") %>% | |
config(displayModeBar = F) %>% | |
layout(xaxis=list(fixedrange=TRUE)) %>% | |
layout(yaxis=list(fixedrange=TRUE)) | |
}}) | |
``` | |
### Transactions | |
```{r} | |
daily_actuals_plot_value <- reactive(event_data("plotly_click", source = "daily_actuals_plot")["key"]) | |
new_data_for_period <- reactive( | |
data_for_period() %>% | |
{if (input$Level3_options == "(All)") {.} else | |
{filter(., `Category Level3` == input$Level3_options)} } %>% | |
{if (length(input$Level2_options) == 0 | length(input$Level2_options) == length(Level2_options())) {.} else | |
{filter(., `Category Level2` %in% input$Level2_options)} } %>% | |
{if (length(input$Level1_options) == 0 | length(input$Level1_options) == length(Level1_options())) {.} else | |
{filter(., `Category Level1` %in% input$Level1_options)} } %>% | |
{if (length(daily_actuals_plot_value()) != 0) | |
{filter(., Date == daily_actuals_plot_value()[[1]])} else {.}} %>% | |
select(Date, Description, Amount) %>% | |
arrange(desc(abs(Amount)))) | |
renderDataTable( | |
{if (nrow(new_and_already_done_row_items()) == 0) {NULL} else { | |
datatable(new_data_for_period(), | |
rownames = F, | |
options = | |
list( | |
# dom = "t, | |
pageLength = 20, scrollY = "300px")) %>% | |
formatDate("Date", "toDateString") %>% | |
formatCurrency("Amount", digits = 0) %>% | |
formatStyle('Amount', | |
background = styleColorBar(new_data_for_period()$Amount, 'steelblue'), | |
backgroundSize = '100% 90%', | |
backgroundRepeat = 'no-repeat', | |
backgroundPosition = 'center')}} | |
) | |
``` | |
Row {data-width=400} | |
------ | |
### Cumulative expenses | |
```{r} | |
data_for_cumulations <- reactive( | |
cumulative_spending_table() %>% | |
{if (input$review_timeframe == 0) { | |
mutate(., enough_dates_col = TRUE) | |
} else { | |
filter(., analysis_period != "Other") %>% | |
mutate(enough_dates_col = case_when(min_date() <= dates_table()["comparison_start_date"] ~ TRUE, | |
min_date() <= dates_table()["current_start_date"] ~ analysis_period == "current", | |
min_date() > dates_table()["current_start_date"] ~ FALSE))}} %>% | |
group_by(analysis_period) %>% | |
mutate(cumulative_expenses = cumsum(total_expenses)) %>% | |
ungroup() | |
) | |
only_period_start_points <- reactive( | |
data_for_cumulations() %>% | |
group_by(analysis_period) %>% | |
slice(1) %>% | |
ungroup() %>% | |
select(analysis_period, Date, cumulative_expenses, enough_dates_col) %>% | |
bind_rows(mutate(., cumulative_expenses = 0)) | |
) | |
cumulative_expenses_plot <- reactive( | |
data_for_cumulations() %>% | |
ggplot() + | |
aes(Date, cumulative_expenses, col = analysis_period) + | |
{if (nrow(filter(data_for_cumulations(), enough_dates_col)) == 0) {NULL} else | |
{geom_line(data = . %>% filter(enough_dates_col), stat = "identity")}} + | |
{if (nrow(filter(data_for_cumulations(), enough_dates_col)) == 0) {NULL} else | |
{geom_line(data = filter(only_period_start_points(), enough_dates_col), stat = "identity")}} + | |
{if (nrow(filter(data_for_cumulations(), !enough_dates_col)) == 0) {NULL} else | |
{geom_line(data = filter(data_for_cumulations(), !enough_dates_col), stat = "identity", linetype = "dashed")}} + | |
{if (nrow(filter(data_for_cumulations(), !enough_dates_col)) == 0) {NULL} else | |
{geom_line(data = filter(only_period_start_points(), !enough_dates_col), stat = "identity", linetype = "dashed")}} + | |
geom_point(data = . %>% group_by(analysis_period) %>% filter(Date == max(Date)), stat = "identity") + | |
labs(x = NULL, y = NULL) + | |
theme(legend.position = "none")) | |
renderPlotly( | |
{if (nrow(new_and_already_done_row_items()) == 0) {NULL} else { | |
cumulative_expenses_plot() %>% | |
ggplotly() %>% | |
config(displayModeBar = F) %>% | |
layout(xaxis=list(fixedrange=TRUE)) %>% | |
layout(yaxis=list(fixedrange=TRUE)) | |
}}) | |
``` | |
```{r} | |
# data for value boxes | |
value_box_values <- reactive(data_for_cumulations() %>% | |
group_by(enough_dates_col, analysis_period) %>% | |
summarise(total_spend = sum(total_expenses), | |
avg_daily_spend = mean(total_expenses)) %>% | |
mutate(avg_weekly_spend = avg_daily_spend*7) %>% | |
gather(metric, value, -analysis_period, -enough_dates_col) %>% | |
mutate(value = scales::dollar(value, accuracy = 1), | |
value = if_else(enough_dates_col, value, paste0(value,"*"))) %>% | |
spread(metric, value) | |
) | |
``` | |
### Current period (total spend) | |
```{r} | |
renderValueBox( | |
if (nrow(new_and_already_done_row_items()) == 0) {NULL} else { | |
valueBox( | |
value_box_values() %>% | |
{if (input$review_timeframe == 0) {.} else {filter(., analysis_period == "current")}} %>% | |
pull(total_spend) | |
, icon = "fa-pencil") | |
} ) | |
``` | |
### Comparison period (total spend) | |
```{r} | |
renderValueBox( | |
if (nrow(new_and_already_done_row_items()) == 0 | input$review_timeframe == 0) {NULL} else { | |
valueBox( | |
value_box_values() %>% | |
filter(analysis_period == "comparison") %>% | |
pull(total_spend) | |
, icon = "fa-pencil") | |
} ) | |
``` | |
### Avg expenditure | |
```{r} | |
rolling_avg_expenditure <- reactive( | |
data_for_cumulations() %>% | |
{if (input$review_timeframe == 0) { | |
mutate(., avg_daily_spend = sum(total_expenses) / n(), | |
avg_weekly_spend = avg_daily_spend*7)} else {.}} %>% | |
ggplot() + | |
aes_string("Date", input$avg_metric_selection, col = "analysis_period") + | |
geom_line(stat = "identity") + | |
geom_point(data = . %>% group_by(analysis_period) %>% filter(Date == max(Date)), stat = "identity") + | |
labs(x = NULL, y = NULL) + | |
theme(legend.position = "none") | |
) | |
renderPlotly( | |
{if (nrow(new_and_already_done_row_items()) == 0) {NULL} else { | |
rolling_avg_expenditure() %>% | |
ggplotly() %>% | |
config(displayModeBar = F) %>% | |
layout(xaxis=list(fixedrange=TRUE)) %>% | |
layout(yaxis=list(fixedrange=TRUE)) | |
}}) | |
``` | |
### Current Period (spend rate) | |
```{r} | |
renderValueBox( | |
if (nrow(new_and_already_done_row_items()) == 0) {NULL} else { | |
valueBox( | |
value_box_values() %>% | |
{if (input$review_timeframe == 0) {.} else {filter(., analysis_period == "current")}} %>% | |
pull(!! as.symbol(input$avg_metric_selection)) | |
, caption = if (input$avg_metric_selection == "avg_weekly_spend") {"Current Period (Weekly spend rate)"} else {"Current Period (Daily spend rate)"} | |
, icon = "fa-pencil") | |
} ) | |
``` | |
### Comparison period (spend rate) | |
```{r} | |
renderValueBox( | |
if (nrow(new_and_already_done_row_items()) == 0 | input$review_timeframe == 0) {NULL} else { | |
valueBox( | |
value_box_values() %>% | |
filter(analysis_period == "comparison") %>% | |
pull(!! as.symbol(input$avg_metric_selection)) | |
, caption = if (input$avg_metric_selection == "avg_weekly_spend") {"Comparison Period (Weekly spend rate)"} else {"Comparison Period (Daily spend rate)"} | |
, icon = "fa-pencil") | |
} ) | |
``` | |
Forecast | |
===================================== | |
SideBar {.sidebar} | |
--- | |
```{r} | |
# start date | |
miniumum_forecast_value <- reactive( | |
if (nrow(new_and_already_done_row_items()) == 0) | |
{Sys.Date()} else | |
{max(data_going_forward()$Date) + 1} | |
) | |
renderUI( | |
dateInput("min_date_of_forecast", | |
label = "Forecast start:", | |
value = miniumum_forecast_value(), | |
min = miniumum_forecast_value(), | |
max = miniumum_forecast_value() + 300) | |
) | |
# months_into the future... | |
sliderInput("months_ahead", | |
"Forecast horizon:", | |
min = 1, max = 12, | |
post = " months", | |
value = 6, | |
step = 1) | |
level_options <- rev(c("Level 1" = "Category Level1", | |
"Level 2" = "Category Level2", | |
"Level 3" = "Category Level3")) | |
radioButtons("display_level_id", | |
"Display level:", | |
choices = level_options, | |
selected = level_options[1]) | |
renderUI( | |
if (input$display_level_id != level_options[1]) {NULL} else { | |
# savings box | |
numericInput("savings_id", "Savings in the bank", 0) | |
}) | |
Level3_options_2 <- reactive( | |
if (nrow(new_and_already_done_row_items()) == 0) {"(All)"} else { | |
c("(All)", sort(unique(forecast_table_for_saving()$`Category Level3`)))} | |
) | |
renderUI( | |
radioButtons("Level3_options_2", | |
"Level3_options", | |
choices = Level3_options_2(), | |
selected = input$Level3_options[1]) | |
) | |
Level2_options_2 <- reactive(if (input$Level3_options_2 == "(All)") { | |
forecast_table_for_saving() %>% | |
pull(`Category Level2`) %>% | |
unique() %>% | |
sort()} else { | |
forecast_table_for_saving() %>% | |
filter(`Category Level3` == input$Level3_options_2) %>% | |
pull(`Category Level2`) %>% | |
unique() %>% | |
sort() | |
}) | |
renderUI( | |
if (nrow(new_and_already_done_row_items()) == 0) {NULL} else { | |
actionLink("selectall_A","Select All") | |
}) | |
renderUI( | |
if (nrow(new_and_already_done_row_items()) == 0) {NULL} else { | |
checkboxGroupInput("Level2_options_2", | |
"Level2_options", | |
choices = Level2_options_2(), | |
selected = Level2_options_2()) | |
}) | |
observe({ | |
req(input$selectall_A) | |
req(Level2_options_2()) | |
if (input$selectall_A == 0) {NULL} else {if (input$selectall_A %% 2 == 0) { | |
updateCheckboxGroupInput(session, "Level2_options_2","Level2_options", choices = Level2_options_2(), selected = Level2_options_2()) | |
} else { | |
updateCheckboxGroupInput(session, "Level2_options_2","Level2_options", choices = Level2_options_2(), selected = NULL) | |
}} | |
}) | |
Level1_options_2 <- reactive(if (input$Level3_options_2 == "(All)" & (length(input$Level2_options_2) == 0 | length(input$Level2_options_2) == length(Level2_options_2()))) { | |
forecast_table_for_saving() %>% | |
pull(`Category Level1`) %>% | |
unique() %>% | |
sort()} else { | |
forecast_table_for_saving() %>% | |
filter(`Category Level2` %in% input$Level2_options_2) %>% | |
pull(`Category Level1`) %>% | |
unique() %>% | |
sort() | |
}) | |
renderUI( | |
if (nrow(new_and_already_done_row_items()) == 0) {NULL} else { | |
actionLink("selectall_B","Select All") | |
}) | |
renderUI( | |
if (nrow(new_and_already_done_row_items()) == 0) {NULL} else { | |
checkboxGroupInput("Level1_options_2", | |
"Level1_options", | |
choices = Level1_options_2(), | |
selected = Level1_options_2()) | |
}) | |
observe({ | |
req(input$selectall_B) | |
req(Level1_options_2()) | |
if (input$selectall_B == 0) {NULL} else {if (input$selectall_B %% 2 == 0) { | |
updateCheckboxGroupInput(session, "Level1_options_2","Level1_options", choices = Level1_options_2(), selected = Level1_options_2()) | |
} else { | |
updateCheckboxGroupInput(session, "Level1_options_2","Level1_options", choices = Level1_options_2(), selected = NULL) | |
}} | |
}) | |
``` | |
```{r} | |
# let's prep the data | |
forecast_horison_days <- 370 | |
rates <- reactive( generate_rates_table(new_and_already_done_row_items(), rates_import(), forecast_horison_days)) | |
rates_for_saving <- reactive( regenerate_schedule_table(rates(), forecast_horison_days)) | |
forecast_date_display_limit <- reactive( | |
as.Date(input$min_date_of_forecast + duration(input$months_ahead, "months")) | |
) | |
forecast_table_for_saving <- reactive( | |
create_forecast_values(data_going_forward(), rates(), forecast_horison_days) %>% | |
left_join(rates_for_saving() %>% | |
select(`Category Level1`, `Category Level2`, `Category Level3`) %>% | |
bind_rows( | |
tibble(`Category Level1` = c("UNCATEGORISED EXPENSE", "UNCATEGORISED INCOME"), | |
`Category Level2` = c("UNCATEGORISED EXPENSE", "UNCATEGORISED INCOME"), | |
`Category Level3` = c("EXPENSE", "INCOME"))) %>% | |
distinct(`Category Level1`, .keep_all = TRUE) | |
, by = "Category Level1") %>% | |
filter(Date >= miniumum_forecast_value()) | |
) | |
``` | |
Rest of the page {data-width=600} | |
--- | |
### Chart A | |
```{r} | |
forecasted_expenses_plot_data <- reactive( | |
forecast_table_for_saving() %>% | |
filter(Date <= forecast_date_display_limit()) %>% | |
{if (input$Level3_options_2 == "(All)") {.} else | |
{filter(., `Category Level3` == input$Level3_options_2)} } %>% | |
{if (length(input$Level2_options_2) == 0 | length(input$Level2_options_2) == length(Level2_options_2())) {.} else | |
{filter(., `Category Level2` %in% input$Level2_options_2)} } %>% | |
{if (length(input$Level1_options_2) == 0 | length(input$Level1_options_2) == length(Level1_options_2())) {.} else | |
{filter(., `Category Level1` %in% input$Level1_options_2)} } %>% | |
group_by(Date, !! as.symbol(input$display_level_id)) %>% | |
summarise(total_expenses = sum(Amount, na.rm = T)) %>% | |
{if (input$display_level_id != "Category Level3") {.} else { | |
bind_rows(., group_by(., Date) %>% | |
summarise(total_expenses = sum(total_expenses) | |
) %>% | |
mutate(!! as.symbol(input$display_level_id) := "NET POSITION") | |
) }} %>% | |
mutate(total_expenses = if_else(!! as.symbol(input$display_level_id) == "NET POSITION", | |
total_expenses, | |
abs(total_expenses))) %>% | |
group_by(!! as.symbol(input$display_level_id)) %>% | |
mutate(cumulative_expenses = if_else(!! as.symbol(input$display_level_id) == "NET POSITION", | |
cumsum(total_expenses) + input$savings_id, | |
cumsum(total_expenses))) %>% | |
ungroup()) | |
forecasted_expenses_plot <- reactive( | |
forecasted_expenses_plot_data() %>% | |
ggplot() + | |
aes(Date, cumulative_expenses, col = !! as.symbol(input$display_level_id), key = Date, | |
label = paste0(!! as.symbol(input$display_level_id), "\n", scales::dollar(cumulative_expenses, accuracy = 1))) + | |
geom_line(data = . %>% filter(!! as.symbol(input$display_level_id) != "NET POSITION"), stat = "identity") + | |
{if (nrow(filter(forecasted_expenses_plot_data(), !! as.symbol(input$display_level_id) == "NET POSITION")) == 0) {NULL} else | |
{geom_line(data = . %>% filter(!! as.symbol(input$display_level_id) == "NET POSITION"), stat = "identity", linetype = "dashed")}} + | |
{if (length(forecast_date_selection()) != 0) | |
{geom_vline(xintercept = as.numeric(as.Date(forecast_date_selection()[[1]])), linetype = "dashed")} else {NULL}} + | |
# add labels for these points | |
{if (length(forecast_date_selection()) != 0) | |
{geom_text(data = forecasted_expenses_plot_data() %>% | |
filter(Date == forecast_date_selection()[[1]]), nudge_x = 0.1 | |
)} else {NULL}} + | |
labs(x = NULL, y = NULL) | |
) | |
renderPlotly( | |
{if (nrow(new_and_already_done_row_items()) == 0) {NULL} else { | |
forecasted_expenses_plot() %>% | |
ggplotly(source = "forecast_transactions_plot") %>% | |
config(displayModeBar = F) %>% | |
layout(xaxis=list(fixedrange=TRUE)) %>% | |
layout(yaxis=list(fixedrange=TRUE)) | |
}}) | |
forecast_date_selection <- reactive(event_data("plotly_click", source = "forecast_transactions_plot")["key"]) | |
``` | |
Row {data-width=400} | |
------ | |
### Chart B | |
```{r} | |
# renderDataTable( | |
# | |
# {if (nrow(new_and_already_done_row_items()) == 0) {NULL} else { | |
# | |
# datatable(forecasted_expenses_plot_data() | |
# # some kind of grouping and summarising to go here | |
# , rownames = F | |
# , extensions="Buttons" | |
# , options = | |
# list( | |
# dom = 'Bfrtip', | |
# buttons = c('copy', 'csv', 'excel', 'pdf', 'print'), | |
# pageLength = 20, scrollY = "300px")) %>% | |
# formatDate("Date", "toDateString") %>% | |
# formatCurrency("cumulative_expenses", digits = 0) | |
# }}) | |
``` | |
### Chart C | |
```{r} | |
``` | |
Instructions | |
===================================== | |
SideBar {.sidebar} | |
--- |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment