Last active
October 22, 2017 11:31
-
-
Save prafullakumar/91a9e9b61c71420977edd5b94d549c3a to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/Users/prafulla/miniconda2/bin/python | |
# path for python | |
# give exc permission | |
# -*- coding: utf-8 -*- | |
#to read cvs execell etc | |
import pandas | |
#get file from : https://drive.google.com/open?id=0B_P-kZ2NQv5aZWNSYUR2b2VrbFE | |
dataset = pandas.read_csv('50_Startups.csv') | |
#Features : Are the variables in the row over which result is dependent | |
X = dataset.iloc[:,:-1].values | |
#Labels : this is what we going to predict for future data | |
y = dataset.iloc[:,3].values | |
#Encoding categorical data (As linear regression deals with number) | |
#anything string need to be change to numerical value if needed | |
#from sklearn.preprocessing import LabelEncoder,OneHotEncoder | |
#encode = LabelEncoder() # assign 0,1 value per label in row | |
#X[:,3] = encode.fit_transform(X[:,3]) | |
#hotencode = OneHotEncoder(categorical_features=[3]) | |
#X = hotencode.fit_transform(X).toarray() | |
# To avoid Dummy Variable trap | |
# This is automatically managed in Regression algorithm !!! (sci-kit) | |
#X = X[:,1:]// it handled in library automatically | |
# Training and Testing Set will enable me for two things: | |
# 1. Training set will help me to create a model. (70% dataset) | |
# 2. Testing set will help me evaluate my model for accuracy of predn (30% dataset) | |
from sklearn.cross_validation import train_test_split | |
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size = 0.3, | |
random_state=0) | |
# Fitting Multiple LR to training set | |
from sklearn.linear_model import LinearRegression | |
regressor = LinearRegression() | |
regressor.fit(X_train,y_train) #The output of this operation is a model | |
#Test the model for predictions based on test split | |
#y_pred = regressor.predict(X_test) # use this to check prediction i.e | |
# compare with achual result | |
# coefficients | |
regressor.intercept_ | |
regressor.coef_ | |
# now we will deploy above model | |
import coremltools | |
# convert to Core ML Model | |
coreml_model = coremltools.converters.sklearn.convert(regressor, input_features=["R&D Spend","Administration","Marketing Spend"], output_feature_names="Profit") | |
# deploy your model | |
coreml_model.save("ProfitPredictor.mlmodel") | |
#now this saved model we are going to use in our iOS App | |
#you can create various models with deferent algoritums and | |
#deploy it for you App |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment