Skip to content

Instantly share code, notes, and snippets.

View yaojenkuo's full-sized avatar

Yao-Jen Kuo yaojenkuo

View GitHub Profile
# rankhospital()
rankhospital <- function(state, outcome, num = "best") {
library(dplyr)
library(magrittr)
file_path <- "~/hospital_data/outcome-of-care-measures.csv"
outcome_of_care_measures <- read.csv(file_path, stringsAsFactors = FALSE)
outcome_col_idx <- c(11, 17, 23)
for (i in outcome_col_idx) {
outcome_of_care_measures[, i] <- suppressWarnings(as.numeric(outcome_of_care_measures[, i]))
# rankall()
rankall <- function(outcome, num = "best") {
library(dplyr)
library(magrittr)
file_path <- "~/hospital_data/outcome-of-care-measures.csv"
outcome_of_care_measures <- read.csv(file_path, stringsAsFactors = FALSE)
outcome_col_idx <- c(11, 17, 23)
for (i in outcome_col_idx) {
outcome_of_care_measures[, i] <- suppressWarnings(as.numeric(outcome_of_care_measures[, i]))
# run_analysis.R
library(dplyr)
library(magrittr)
## get_data: Get required data into a list
get_data <- function(dest_file, ex_dir) {
data_url <- "https://d396qusza40orc.cloudfront.net/getdata%2Fprojectfiles%2FUCI%20HAR%20Dataset.zip"
download.file(data_url, destfile = dest_file) # 下載壓縮檔
unzip(dest_file, exdir = ex_dir) # 解壓縮
file_names <- c("train/X_train.txt", "train/y_train.txt", "train/subject_train.txt", "test/X_test.txt", "test/y_test.txt", "test/subject_test.txt", "features.txt")
# Use this dictionary to map state names to two letter acronyms
states = {'OH': 'Ohio', 'KY': 'Kentucky', 'AS': 'American Samoa', 'NV': 'Nevada', 'WY': 'Wyoming', 'NA': 'National', 'AL': 'Alabama', 'MD': 'Maryland', 'AK': 'Alaska', 'UT': 'Utah', 'OR': 'Oregon', 'MT': 'Montana', 'IL': 'Illinois', 'TN': 'Tennessee', 'DC': 'District of Columbia', 'VT': 'Vermont', 'ID': 'Idaho', 'AR': 'Arkansas', 'ME': 'Maine', 'WA': 'Washington', 'HI': 'Hawaii', 'WI': 'Wisconsin', 'MI': 'Michigan', 'IN': 'Indiana', 'NJ': 'New Jersey', 'AZ': 'Arizona', 'GU': 'Guam', 'MS': 'Mississippi', 'PR': 'Puerto Rico', 'NC': 'North Carolina', 'TX': 'Texas', 'SD': 'South Dakota', 'MP': 'Northern Mariana Islands', 'IA': 'Iowa', 'MO': 'Missouri', 'CT': 'Connecticut', 'WV': 'West Virginia', 'SC': 'South Carolina', 'LA': 'Louisiana', 'KS': 'Kansas', 'NY': 'New York', 'NE': 'Nebraska', 'OK': 'Oklahoma', 'FL': 'Florida', 'CA': 'California', 'CO': 'Colorado', 'PA': 'Pennsylvania', 'DE': 'Delaware', 'NM': 'New Mexico', 'RI': 'Rhode Island', 'MN': 'Mi
def get_recession_flag():
"""
Returns a DataFrame with Recession Starts Flag
"""
df = pd.read_excel('https://storage.googleapis.com/um_ds_intro/gdplev.xls', skiprows=220, header=None)
df = df.iloc[:, [4, 6]]
df.columns = ['Quarterly', 'GDP in billions of chained 2009 dollars']
# 取得 lagged 資料框 ------------
df_shift = df.shift(1)
df_concat = pd.concat([df, df_shift['GDP in billions of chained 2009 dollars']], axis = 1)
def get_recession_end():
"""
Returns the year and quarter of the recession end time as a
string value in a format such as 2005q3
"""
df = get_recession_flag()
recession_start_idx = df[df['is_recession_start'] == True]['Quarterly'].values[0]
df = df.set_index('Quarterly')
# 篩選衰退起始以後的觀測值 -------------
df_after_recession = df.loc[recession_start_idx:, :].drop('is_recession_start', axis = 1)
def get_recession_bottom():
"""
Returns the year and quarter of the recession bottom time as a
string value in a format such as 2005q3
"""
df = pd.read_excel('https://storage.googleapis.com/um_ds_intro/gdplev.xls', skiprows=220, header=None)
df = df.iloc[:, [4, 6]]
df.columns = ['Quarterly', 'GDP in billions of chained 2009 dollars']
df = df.set_index('Quarterly')
recession_starts = get_recession_start()
def convert_housing_data_to_quarters():
"""
Converts the housing data to quarters and returns it as mean
values in a dataframe. This dataframe should be a dataframe with
columns for 2000q1 through 2016q3, and should have a multi-index
in the shape of ["State","RegionName"].
Note: Quarters are defined in the assignment description, they are
not arbitrary three month periods.
def run_ttest():
"""
First creates new data showing the decline or growth of housing prices
between the recession start and the recession bottom. Then runs a ttest
comparing the university town values to the non-university towns values,
return whether the alternative hypothesis (that the two groups are the same)
is true or not as well as the p-value of the confidence.
Return the tuple (different, p, better) where different=True if the t-test is
True at a p<0.01 (we reject the null hypothesis), or different=False if
from selenium import webdriver
from selenium.webdriver.firefox.options import Options
options = Options()
options.set_headless(headless=True)
driver = webdriver.Firefox(firefox_options=options, executable_path='/home/ubuntu/geckodriver')
driver.get("http://www.imdb.com/title/tt3783958/")
elem = driver.find_element_by_css_selector('strong span')
print("Rating: {}".format(elem.text))
elem = driver.find_elements_by_css_selector('.subtext .itemprop')