Skip to content

Instantly share code, notes, and snippets.

#importing Neccessary modules/libraries
import numpy as np
import time
# - is for comments in python
#generate data for x feature(input data)
#intiate empty list
x=[]
#range function generates values between given two numbers. range(start,end,step): start-default as 0, step- default as 1.
#end- till that value excluding that value
#range(1,15,1) is similiar to range(1,15)
#As we have generated small data like 1-15, it is fast. But when processing/generating huge amount of data like in millions and trillions
#you need to process fastly. That's when numpy arrays will come useful.
#this is calculate the start time
timestamp1 = time.time()
#generate data in range of 0 to 1 million using numpy arrays
x = np.arange(1000000)
#end time
timestamp2 = time.time()
#time taken to generate 1 million data.
print("This took %.2f seconds" % (timestamp2 - timestamp1))
#multiply elements in x with 11 using list comprehensions in python
y = [i*11 for i in x]
#above is similiar to below
y = []
for i in x:
multiplication = i*11
y.append(multiplication)
#instead of 4 lines of code, we can do same operation in one line using list comprehensions in python as shown above
a = 1.01
print(type(a))
#output: <class 'float'>
b = 1
print(type(b))
#output: <class 'int'>
#dictionary
#passing all values of list x into key X in dictionary and similiarly for y
dictionary = {'X': x, 'y':y}
#import pandas for dataframe.
import pandas as pd
#call DataFrame method from pandas and pass dictionary variable to it.
#and store the dataframe in a variable to access as and when required
df = pd.DataFrame(dictionary)
#importing train_test_split class from scikit-learn module/library
from sklearn.model_selection import train_test_split
#importing Linear regression class from scikit-learn module/library
from sklearn.linear_model import LinearRegression
#Radomly shuffling the X and y columns data into training and testing data and with test size of 30 percent and training data is of 70 percent
#random_state= some number is to keep same data for train and test to evaluate our algorithm when called with same number(in our case '15')
X_train, X_test, y_train, y_test = train_test_split(df[['X']],df.y,test_size=0.3, random_state=15)
print('Training input data: ',X_train)
print('Testing input data: ',X_test)
import random
random.seed(3)
#a with random seed number
a = [random.randint(1, 100) for i in range(10)]
print(a)
#output-1 when run first time: [61, 35, 85, 68, 86, 45, 19, 49, 2, 48]
#output-2 when run second time: [61, 35, 85, 68, 86, 45, 19, 49, 2, 48]
@Manikant92
Manikant92 / test.py
Last active September 4, 2018 09:42
#X_test is our test set 5 rows-30% percent split. Pass our test data to predict method of algorithm.
y_prediction = lr.predict(X_test)
#print out the results of predictions done by our model.
print(y_prediction)
#output: [ 154. 33. 110. 55. 22.]
#import metrics module from sklear
from sklearn import metrics
print(metrics.r2_score(y_test, y_prediction)*100)
#output: 100.0
#predict by providing new input as -18
print(lr.predict(-18))
#output: [-198.]
#-18 * 11 = -198
#predict with 89 as input
print(lr.predict(89))