scott-lydon · April 11, 2020 13:50
diff --git a/LinearRegression.py b/LinearRegression.py
 # A dependent column must have continuous data.  If it is continuous you should use a linear regression model.  
    
 # we create a dictionary 
 eruptions = {
        "eruptions": [3.6, 1.8, 3.333, 2.283, 4.533, 2.883],
        "waiting":   [79,  54,  74,    62,    85,    55]
    }

 # we convert the dictionary into a dataframe
 edf = pd.DataFrame(eruptions)

 # we plot the dataframe
 edfPlot = plt.plot(edf["eruptions"], edf["waiting"])

 # we confirm there is a correlation
 if edf.corr()["eruptions"][1] > 0.5:
    
    # We store each line into different variables
    x = edf[["eruptions"]]
    y = edf["waiting"]
    
    # create a linear regression object
    m = LinearRegression()
    
    # Trains out model.  y = mx + c a very simple model for calculating slope...
    # chooses the line with the lowest error lines sum. 
    m.fit(x, y)
    
    # provides the slope. 
    slope = m.coef_
    
    # intercept
    intercept = m.intercept_
    
    # 100 is the x value 
    yvalue = slope * 100 + intercept
    print(yvalue)
    
    # predict for one single value
    single = m.predict([[100]])
    print(single)
    
    # predict for multiple values create a dataframe.
    pr = pd.DataFrame({"eruptions": [30, 400, 990, 2349]})
    waiting = m.predict(pr)
    print(waiting)
    prd = m.predict(x)
    
    plt.plot(edf["eruptions"], edf["waiting"])
	# A dependent column must have continuous data. If it is continuous you should use a linear regression model.

	# we create a dictionary
	eruptions = {
	"eruptions": [3.6, 1.8, 3.333, 2.283, 4.533, 2.883],
	"waiting": [79, 54, 74, 62, 85, 55]
	}

	# we convert the dictionary into a dataframe
	edf = pd.DataFrame(eruptions)

	# we plot the dataframe
	edfPlot = plt.plot(edf["eruptions"], edf["waiting"])

	# we confirm there is a correlation
	if edf.corr()["eruptions"][1] > 0.5:

	# We store each line into different variables
	x = edf[["eruptions"]]
	y = edf["waiting"]

	# create a linear regression object
	m = LinearRegression()

	# Trains out model. y = mx + c a very simple model for calculating slope...
	# chooses the line with the lowest error lines sum.
	m.fit(x, y)

	# provides the slope.
	slope = m.coef_

	# intercept
	intercept = m.intercept_

	# 100 is the x value
	yvalue = slope * 100 + intercept
	print(yvalue)

	# predict for one single value
	single = m.predict([[100]])
	print(single)

	# predict for multiple values create a dataframe.
	pr = pd.DataFrame({"eruptions": [30, 400, 990, 2349]})
	waiting = m.predict(pr)
	print(waiting)
	prd = m.predict(x)

	plt.plot(edf["eruptions"], edf["waiting"])