Skip to content

Instantly share code, notes, and snippets.

@tomquisel
Created October 13, 2015 23:25
Show Gist options
  • Save tomquisel/51116662e0ac3c237360 to your computer and use it in GitHub Desktop.
Save tomquisel/51116662e0ac3c237360 to your computer and use it in GitHub Desktop.
R linear regression in ipython notebook
%reload_ext rpy2.ipython
%R -n library(dplyr)
%%R
do_regression = function(mpr, features) {
df = read.csv("activity_quantity_df.csv", header=T)
df$mem_gender = as.factor(df$mem_gender)
df$mem_age = as.factor(df$mem_age)
feature_str = paste(features, collapse=" + ")
print(paste(mpr, "~", feature_str, " + mem_gender*mem_age"))
lm_formula = as.formula(paste(mpr, "~", feature_str, "+ mem_gender*mem_age"))
model = lm(lm_formula, data=df)
print(summary(model))
}
def do_regression(mpr, condition, activity, df=None, features=['active_ratio_adjusted']):
if df is None:
df = dfs[condition]
df = df[df.activity == activity].copy()
df = df[df.days_possible > 10].copy()
desired_columns = ['mem_gender', 'mem_age', mpr] + features
df = df[desired_columns]
for col in df.columns:
# scale values so that the regression coefficients are more interpretable
if col == 'steps_per_active_week':
df[col] = df[col] / 14000 # measure impact of increasing steps by 5k/week
if col == 'active_ratio_adjusted':
df[col] = df[col] * 2 # measure impact of increasing active ratio by 50%
df.to_csv('activity_quantity_df.csv', index=False)
print mpr, condition, activity, len(df)
%R -i mpr,features -o result result=do_regression(mpr, features)
return result
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment