tomquisel · October 13, 2015 23:25
diff --git a/r_lm.py b/r_lm.py
 %reload_ext rpy2.ipython
 %R -n library(dplyr)

 %%R
 do_regression = function(mpr, features) {
    df = read.csv("activity_quantity_df.csv", header=T)
    df$mem_gender = as.factor(df$mem_gender)
    df$mem_age = as.factor(df$mem_age)
    feature_str = paste(features, collapse=" + ")
    print(paste(mpr, "~", feature_str, " + mem_gender*mem_age"))
    lm_formula = as.formula(paste(mpr, "~", feature_str, "+ mem_gender*mem_age"))
    model = lm(lm_formula, data=df)
    print(summary(model))
 }

 def do_regression(mpr, condition, activity, df=None, features=['active_ratio_adjusted']):
    if df is None:
        df = dfs[condition]
    df = df[df.activity == activity].copy()
    df = df[df.days_possible > 10].copy()
    desired_columns = ['mem_gender', 'mem_age', mpr] + features
    df = df[desired_columns]
    for col in df.columns:
        # scale values so that the regression coefficients are more interpretable
        if col == 'steps_per_active_week':
            df[col] = df[col] / 14000 # measure impact of increasing steps by 5k/week
        if col == 'active_ratio_adjusted':
            df[col] = df[col] * 2 # measure impact of increasing active ratio by 50%
    df.to_csv('activity_quantity_df.csv', index=False)
    print mpr, condition, activity, len(df)
    %R -i mpr,features -o result result=do_regression(mpr, features)
    return result
	%reload_ext rpy2.ipython
	%R -n library(dplyr)

	%%R
	do_regression = function(mpr, features) {
	df = read.csv("activity_quantity_df.csv", header=T)
	df$mem_gender = as.factor(df$mem_gender)
	df$mem_age = as.factor(df$mem_age)
	feature_str = paste(features, collapse=" + ")
	print(paste(mpr, "~", feature_str, " + mem_gender*mem_age"))
	lm_formula = as.formula(paste(mpr, "~", feature_str, "+ mem_gender*mem_age"))
	model = lm(lm_formula, data=df)
	print(summary(model))
	}

	def do_regression(mpr, condition, activity, df=None, features=['active_ratio_adjusted']):
	if df is None:
	df = dfs[condition]
	df = df[df.activity == activity].copy()
	df = df[df.days_possible > 10].copy()
	desired_columns = ['mem_gender', 'mem_age', mpr] + features
	df = df[desired_columns]
	for col in df.columns:
	# scale values so that the regression coefficients are more interpretable
	if col == 'steps_per_active_week':
	df[col] = df[col] / 14000 # measure impact of increasing steps by 5k/week
	if col == 'active_ratio_adjusted':
	df[col] = df[col] * 2 # measure impact of increasing active ratio by 50%
	df.to_csv('activity_quantity_df.csv', index=False)
	print mpr, condition, activity, len(df)
	%R -i mpr,features -o result result=do_regression(mpr, features)
	return result