This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| def write_concatenated_df(): | |
| url_2011 = "https://www.run2pix.com/report/report_w.php?EventCode=20111218&Race=MA&sn=3" | |
| url_2012 = "https://www.run2pix.com/report/report_w.php?EventCode=20121216&Race=MA&sn=32" | |
| url_2013 = "https://www.run2pix.com/report/report_w.php?EventCode=20131215&Race=MA&sn=57" | |
| url_2014 = "https://www.run2pix.com/report/report_w.php?EventCode=20141221&Race=MA&sn=86" | |
| url_2015 = "https://www.run2pix.com/report/report_w.php?EventCode=20151220&Race=MA&sn=111" | |
| url_2016 = "https://www.run2pix.com/report/report_w.php?EventCode=20161218&Race=MA&sn=136" | |
| url_2017 = "https://www.run2pix.com/report/report_w.php?EventCode=20171217&Race=MA&sn=161" | |
| url_list = [url_2011, url_2012, url_2013, url_2014, url_2015, url_2016, url_2017] | |
| df_list = [] |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| get_time_group <- function(x) { | |
| sub_2_half <- 2.5 * 3600 | |
| sub_3_hour <- 3 * 3600 | |
| sub_3_half <- 3.5 * 3600 | |
| sub_4_hour <- 4 * 3600 | |
| if (x < sub_2_half) { | |
| return("sub 2:30") | |
| } else if (x < sub_3_hour) { | |
| return("sub 3:00") | |
| } else if (x < sub_3_half) { |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| library(ggplot2) | |
| library(dplyr) | |
| library(magrittr) | |
| library(ggtheme) | |
| library(scale) | |
| library(plotly) | |
| runner_by_year_gender <- df %>% | |
| group_by(year, gender) %>% | |
| summarise(finishers = n()) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # get_specdata() | |
| get_specdata <- function(dest_file, ex_dir) { | |
| specdata_url <- "https://storage.googleapis.com/jhu_rprg/specdata.zip" | |
| download.file(specdata_url, destfile = dest_file) | |
| unzip(dest_file, exdir = ex_dir) | |
| } | |
| get_specdata("~/Downloads/specdata.zip", "~") |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # pollutantmean() | |
| pollutantmean <- function(directory, pollutant, id = 1:332) { | |
| csv_files <- list.files(directory) # 使用內建函數 list.files() 建立出 CSV 檔案路徑 | |
| csv_file_paths <- paste0(directory, csv_files) | |
| csv_file_paths <- csv_file_paths[id] # 依照輸入的 id 參數選擇性讀入 | |
| df_list <- list() | |
| pollutant_vector <- vector() | |
| for (i in 1:length(csv_file_paths)) { | |
| df_list[[i]] <- read.csv(csv_file_paths[i]) | |
| pollutant_vector <- c(pollutant_vector, df_list[[i]][, pollutant]) # 將讀入測站的污染物資料合併起來 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # complete() | |
| complete <- function(directory, id = 1:332) { | |
| csv_files <- list.files(directory) # 使用內建函數 list.files() 建立出 CSV 檔案路徑 | |
| csv_file_paths <- paste0(directory, csv_files) | |
| csv_file_paths <- csv_file_paths[id] # 依照輸入的 id 參數選擇性讀入 | |
| df_list <- list() | |
| nobs <- vector() | |
| for (i in 1:length(csv_file_paths)) { | |
| df_list[[i]] <- read.csv(csv_file_paths[i]) | |
| is_complete <- complete.cases(df_list[[i]]) # 直接引用 complete.cases() 函數 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # corr() | |
| corr <- function(directory, threshold = 0) { | |
| nobs <- complete(directory)[, "nobs"] # 使用上一題已經定義好的 complete() 函數 | |
| if (threshold > max(nobs)) { # 如果使用者輸入的門檻值超過所有測站的最大完整觀測值 | |
| return(NULL) | |
| } else { | |
| df_to_read <- nobs > threshold | |
| csv_files <- list.files(directory) # 使用內建函數 list.files() 建立出 CSV 檔案路徑 | |
| csv_file_paths <- paste0(directory, csv_files) | |
| csv_file_paths <- csv_file_paths[df_to_read] # 利用邏輯值選出大於等於門檻的測站 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # get_hospital_data() | |
| get_hospital_data <- function(dest_file, ex_dir) { | |
| specdata_url <- "https://storage.googleapis.com/jhu_rprg/hospital_data.zip" | |
| download.file(specdata_url, destfile = dest_file) | |
| unzip(dest_file, exdir = ex_dir) | |
| } | |
| get_hospital_data("~/Downloads/hospital_data.zip", "~/hospital_data") |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # To draw a histogram | |
| library(ggplot2) | |
| file_path <- "~/hospital_data/outcome-of-care-measures.csv" | |
| outcome_of_care_measures <- read.csv(file_path, stringsAsFactors = FALSE) | |
| outcome_of_care_measures[, 11] <- as.numeric(outcome_of_care_measures[, 11]) | |
| hist(outcome_of_care_measures[, 11], xlab = "30 Day Death Mortality Rates From Heart Attack", col = rgb(1, 0, 0, 0.5), main = "", breaks = 40) # base plotting system | |
| ggplot(outcome_of_care_measures, aes(x = Hospital.30.Day.Death..Mortality..Rates.from.Heart.Attack)) + | |
| geom_histogram(bins = 40, fill = rgb(1, 0, 0, 0.5)) + | |
| xlab("30 Day Death Mortality Rates From Heart Attack") + |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # best() | |
| best <- function(state, outcome) { | |
| library(dplyr) | |
| library(magrittr) | |
| file_path <- "~/hospital_data/outcome-of-care-measures.csv" | |
| outcome_of_care_measures <- read.csv(file_path, stringsAsFactors = FALSE) | |
| outcome_col_idx <- c(11, 17, 23) | |
| for (i in outcome_col_idx) { | |
| outcome_of_care_measures[, i] <- suppressWarnings(as.numeric(outcome_of_care_measures[, i])) |