Skip to content

Instantly share code, notes, and snippets.

@Beomi
Created August 22, 2016 07:35
Show Gist options
  • Save Beomi/228cf35bf9e99c5db1243780e8c17174 to your computer and use it in GitHub Desktop.
Save Beomi/228cf35bf9e99c5db1243780e8c17174 to your computer and use it in GitHub Desktop.
import statsmodels.api as stapi
import numpy
def main():
(N, X, Y) = read_data()
results = do_multivariate_regression(N, X, Y)
print(results.summary())
effective_variables = get_effective_variables(results)
print(effective_variables)
def do_multivariate_regression(N, X, Y):
# 2
X = numpy.array(X)
results = stapi.OLS(Y,X).fit()
print(results)
return results
def get_effective_variables(results):
eff_vars = []
# 3
n = 1
for pvalue in results:
if pvalue < 0.05:
eff_vars.append(n)
n+=1
return eff_vars
def read_data():
# 1
N = 0
X = []
Y = []
with open("students.txt") as f:
next(f)
for line in f:
splits = line.strip().split(" ")
numeric_data = [float(x) for x in splits]
x = numeric_data[0:-1]
y = numeric_data[-1]
X.append(x)
Y.append(y)
N += 1
# X must be numpy.array in (30 * 5) shape.
# Y must be 1-dimensional numpy.array.
X = numpy.array(X)
Y = numpy.array(Y)
return (N, X, Y)
if __name__ == "__main__":
main()
@chemineer
Copy link

chemineer commented Oct 5, 2020

def get_effective_variables(results):
eff_vars = []
# 3
n = 0
df_result = pd.DataFrame(results.summary().tables[1].data[1:])

for pvalue in df_result[4]:
    if float(pvalue) < 0.05:
        eff_vars.append(df_result.loc[0][n])
    n+=1
return eff_vars

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment