Created
October 13, 2015 23:25
-
-
Save tomquisel/51116662e0ac3c237360 to your computer and use it in GitHub Desktop.
R linear regression in ipython notebook
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
%reload_ext rpy2.ipython | |
%R -n library(dplyr) | |
%%R | |
do_regression = function(mpr, features) { | |
df = read.csv("activity_quantity_df.csv", header=T) | |
df$mem_gender = as.factor(df$mem_gender) | |
df$mem_age = as.factor(df$mem_age) | |
feature_str = paste(features, collapse=" + ") | |
print(paste(mpr, "~", feature_str, " + mem_gender*mem_age")) | |
lm_formula = as.formula(paste(mpr, "~", feature_str, "+ mem_gender*mem_age")) | |
model = lm(lm_formula, data=df) | |
print(summary(model)) | |
} | |
def do_regression(mpr, condition, activity, df=None, features=['active_ratio_adjusted']): | |
if df is None: | |
df = dfs[condition] | |
df = df[df.activity == activity].copy() | |
df = df[df.days_possible > 10].copy() | |
desired_columns = ['mem_gender', 'mem_age', mpr] + features | |
df = df[desired_columns] | |
for col in df.columns: | |
# scale values so that the regression coefficients are more interpretable | |
if col == 'steps_per_active_week': | |
df[col] = df[col] / 14000 # measure impact of increasing steps by 5k/week | |
if col == 'active_ratio_adjusted': | |
df[col] = df[col] * 2 # measure impact of increasing active ratio by 50% | |
df.to_csv('activity_quantity_df.csv', index=False) | |
print mpr, condition, activity, len(df) | |
%R -i mpr,features -o result result=do_regression(mpr, features) | |
return result |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment