Skip to content

Instantly share code, notes, and snippets.

@jcpsantiago
Created May 18, 2020 12:33
Show Gist options
  • Save jcpsantiago/e119a53199379a438c14e1c33f651b93 to your computer and use it in GitHub Desktop.
Save jcpsantiago/e119a53199379a438c14e1c33f651b93 to your computer and use it in GitHub Desktop.
the_plan <-
drake::drake_plan(
# cached data from previous analyses
orders = get_orders(),
matches = get_matches(cols_in_orders = names(orders)),
nearby = get_nearby(),
emailage = get_emailage(orders$order_id),
plz_and_area = get_plz_and_area(),
legal_form_mapping = get_legal_form_mapping(),
# fresh data from database, keeps only relevant ids
order_terms = get_order_terms(orders$order_id),
debtor_external_data = get_debtor_external_data(orders$order_id),
order_postal_codes = get_orders_postal_codes(orders$order_id),
latest_payment_date = get_latest_payment_date(orders$order_id),
enriched_orders = enrich_orders(
orders, order_terms, matches, nearby, emailage,
debtor_external_data, order_postal_codes,
latest_payment_date
),
# engineer features
order_features = engineer_order_features(enriched_orders, plz_and_area),
company_features = engineer_company_features(enriched_orders, n_outstanding_orders),
df_for_eda = create_df_for_eda(
enriched_orders, order_features, company_features, "My online store"
), # 170k rows, 176 cols
# exploring features
#### N requests ####
n_requests_density_plot = density_plot(
df_for_eda, n_requests, "Number of requests", "Number of requests"
) +
ggplot2::scale_x_log10(),
n_requests_roc_plot = roc_plot(
df_for_eda, n_requests, "Number of requests"
),
#### T between first and latest invoice ####
created_at_diff_density_plot = density_plot(
df_for_eda, t_first_latest_order, "Time between first and latest order", "Days"
),
#### T since first order ####
t_since_first_order_density_plot = density_plot(
df_for_eda, t_since_first_order, "Time since first order", "Minutes"
),
t_since_first_order_roc_plot = roc_plot(
df_for_eda, t_since_first_order, "Time since first order"
),
#### T since last order ####
t_since_last_order_density_plot = density_plot(
df_for_eda, t_since_last_order, "Time since previous order", "Minutes"
),
t_since_last_order_roc_plot = roc_plot(
df_for_eda, t_since_last_order, "Time since last order"
),
#### Requests per day ####
reqs_per_day_density_plot = density_plot(
df_for_eda, reqs_per_day, "Requests per day", "N requests per day"
),
reqs_per_day_roc_plot = roc_plot(
df_for_eda, reqs_per_day, "Requests per day"
),
#### N outstanding orders ####
n_outstanding_orders_density_plot = density_plot(df_for_eda, n_outstanding_orders, "Requests per day", "N requests per day"),
n_outstanding_orders_roc_plot = roc_plot(df_for_eda, n_outstanding_orders, "Requests per day"
),
#### Report ####
report = target(
command = {
rmarkdown::render(knitr_in("doc/analysis.Rmd"))
file_out("doc/analysis.html")
}
)
)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment