Last active
May 28, 2024 10:59
-
-
Save rajagurunath/1f227880ccf9afed3fe643cb2f859857 to your computer and use it in GitHub Desktop.
Linear Regression sample code using streamlit
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import streamlit as st | |
| from datetime import time | |
| from datetime import date | |
| import pandas as pd | |
| import matplotlib.pyplot as plt | |
| import seaborn as sns | |
| import plotly.figure_factory as ff | |
| import plotly.graph_objs as go | |
| from sklearn.linear_model import LinearRegression | |
| from sklearn.model_selection import train_test_split | |
| from sklearn.metrics import mean_squared_error, r2_score | |
| from math import sqrt | |
| import numpy as np | |
| sns.set_style("darkgrid") | |
| st.title("Advertisement and Sales Data") | |
| st.markdown(""" | |
| The data set contains information about money spent on advertisement and their generated sales. Money | |
| was spent on TV, radio and newspaper ads. | |
| ## Problem Statement | |
| Sales (in thousands of units) for a particular product as a function of advertising budgets (in thousands of | |
| dollars) for TV, radio, and newspaper media. Suppose that in our role as statistical consultants we are | |
| asked to suggest. | |
| Here are a few important questions that you might seek to address: | |
| - Is there a relationship between advertising budget and sales? | |
| - How strong is the relationship between the advertising budget and sales? | |
| - Which media contribute to sales? | |
| - How accurately can we estimate the effect of each medium on sales? | |
| - How accurately can we predict future sales? | |
| - Is the relationship linear? | |
| We want to find a function that given input budgets for TV, radio and newspaper predicts the output sales | |
| and visualize the relationship between the features and the response using scatter plots. | |
| The objective is to use linear regression to understand how advertisement spending impacts sales. | |
| ### Data Description | |
| TV | |
| Radio | |
| Newspaper | |
| Sales | |
| """) | |
| st.sidebar.title("Operations on the Dataset") | |
| #st.subheader("Checkbox") | |
| w1 = st.sidebar.checkbox("show table", False) | |
| plot= st.sidebar.checkbox("show plots", False) | |
| plothist= st.sidebar.checkbox("show hist plots", False) | |
| trainmodel= st.sidebar.checkbox("Train model", False) | |
| dokfold= st.sidebar.checkbox("DO KFold", False) | |
| distView=st.sidebar.checkbox("Dist View", False) | |
| _3dplot=st.sidebar.checkbox("3D plots", False) | |
| linechart=st.sidebar.checkbox("Linechart",False) | |
| #st.write(w1) | |
| @st.cache | |
| def read_data(): | |
| return pd.read_csv("../ML_Python_LinearR_Case_2_Advertising_Data.csv")[["TV","radio","newspaper","sales"]] | |
| df=read_data() | |
| #st.write(df) | |
| if w1: | |
| st.dataframe(df,width=2000,height=500) | |
| if linechart: | |
| st.subheader("Line chart") | |
| st.line_chart(df) | |
| if plothist: | |
| st.subheader("Distributions of each columns") | |
| options = ("TV","radio","newspaper","sales") | |
| sel_cols = st.selectbox("select columns", options,1) | |
| st.write(sel_cols) | |
| #f=plt.figure() | |
| fig = go.Histogram(x=df[sel_cols],nbinsx=50) | |
| st.plotly_chart([fig]) | |
| # plt.hist(df[sel_cols]) | |
| # plt.xlabel(sel_cols) | |
| # plt.ylabel("sales") | |
| # plt.title(f"{sel_cols} vs Sales") | |
| #plt.show() | |
| # st.plotly_chart(f) | |
| if plot: | |
| st.subheader("correlation between sales and Ad compaigns") | |
| options = ("TV","radio","newspaper","sales") | |
| w7 = st.selectbox("Ad medium", options,1) | |
| st.write(w7) | |
| f=plt.figure() | |
| plt.scatter(df[w7],df["sales"]) | |
| plt.xlabel(w7) | |
| plt.ylabel("sales") | |
| plt.title(f"{w7} vs Sales") | |
| #plt.show() | |
| st.plotly_chart(f) | |
| if distView: | |
| st.subheader("Combined distribution viewer") | |
| # Add histogram data | |
| # Group data together | |
| hist_data = [df["TV"].values,df["radio"].values,df["newspaper"].values] | |
| group_labels = ["TV", "Radio", "newspaper"] | |
| # Create distplot with custom bin_size | |
| fig = ff.create_distplot(hist_data, group_labels, bin_size=[0.1, 0.25, 0.5]) | |
| # Plot! | |
| st.plotly_chart(fig) | |
| if _3dplot: | |
| options = st.multiselect( | |
| 'Enter columns to plot',('TV', 'radio'),('TV', 'radio', 'newspaper', 'sales')) | |
| st.write('You selected:', options) | |
| st.subheader("TV & Radio vs Sales") | |
| hist_data = [df["TV"].values,df["radio"].values,df["newspaper"].values] | |
| #x, y, z = np.random.multivariate_normal(np.array([0, 0, 0]), np.eye(3), 400).transpose() | |
| trace1 = go.Scatter3d( | |
| x=hist_data[0], | |
| y=hist_data[1], | |
| z=df["sales"].values, | |
| mode="markers", | |
| marker=dict( | |
| size=8, | |
| #color=df['sales'], # set color to an array/list of desired values | |
| colorscale="Viridis", # choose a colorscale | |
| # opacity=0., | |
| ), | |
| ) | |
| data = [trace1] | |
| layout = go.Layout(margin=dict(l=0, r=0, b=0, t=0)) | |
| fig = go.Figure(data=data, layout=layout) | |
| st.write(fig) | |
| # trainmodel= st.checkbox("Train model", False) | |
| if trainmodel: | |
| st.header("Modeling") | |
| y=df.sales | |
| X=df[["TV","radio","newspaper"]].values | |
| X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3) | |
| lrgr = LinearRegression() | |
| lrgr.fit(X_train,y_train) | |
| pred = lrgr.predict(X_test) | |
| mse = mean_squared_error(y_test,pred) | |
| rmse = sqrt(mse) | |
| st.markdown(f""" | |
| Linear Regression model trained : | |
| - MSE:{mse} | |
| - RMSE:{rmse} | |
| """) | |
| st.success('Model trained successfully') | |
| if dokfold: | |
| st.subheader("KFOLD Random sampling Evalution") | |
| st.empty() | |
| my_bar = st.progress(0) | |
| from sklearn.model_selection import KFold | |
| X=df.values[:,-1].reshape(-1,1) | |
| y=df.values[:,-1] | |
| #st.progress() | |
| kf=KFold(n_splits=10) | |
| #X=X.reshape(-1,1) | |
| mse_list=[] | |
| rmse_list=[] | |
| r2_list=[] | |
| idx=1 | |
| fig=plt.figure() | |
| i=0 | |
| for train_index, test_index in kf.split(X): | |
| # st.progress() | |
| my_bar.progress(idx*10) | |
| X_train, X_test = X[train_index], X[test_index] | |
| y_train, y_test = y[train_index], y[test_index] | |
| lrgr = LinearRegression() | |
| lrgr.fit(X_train,y_train) | |
| pred = lrgr.predict(X_test) | |
| mse = mean_squared_error(y_test,pred) | |
| rmse = sqrt(mse) | |
| r2=r2_score(y_test,pred) | |
| mse_list.append(mse) | |
| rmse_list.append(rmse) | |
| r2_list.append(r2) | |
| plt.plot(pred,label=f"dataset-{idx}") | |
| idx+=1 | |
| plt.legend() | |
| plt.xlabel("Data points") | |
| plt.ylabel("PRedictions") | |
| plt.show() | |
| st.plotly_chart(fig) | |
| res=pd.DataFrame(columns=["MSE","RMSE","r2_SCORE"]) | |
| res["MSE"]=mse_list | |
| res["RMSE"]=rmse_list | |
| res["r2_SCORE"]=r2_list | |
| st.write(res) | |
| st.balloons() | |
| #st.subheader("results of KFOLD") | |
| #f=res.plot(kind='box',subplots=True) | |
| #st.plotly_chart([f]) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment