In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
In [2]:
#Reading the dataset
#dataset = pd.read_csv("https://raw.githubusercontent.com/Satyajeet-IITDelhi/sales/main/SLRSales.csv")
In [3]:
#Reading the dataset
dataset = pd.read_csv("C:/NeuralNetwork/MRMSL861/SLRSales.csv")
In [4]:
dataset.head()
Out[4]: Sales Adv_Exp
0 43.6 13.9
1 38.0 12.0
2 30.1 9.3
3 35.3 9.7
4 46.4 12.3
In [5]:
#Model Building
#Simple Linear Regresion
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn import metrics
In [6]:
#Setting the value for X and Y
x = dataset[['Adv_Exp']]
y = dataset['Sales']
In [7]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.3, random_state = 100)
In [8]:
slr= LinearRegression()
slr.fit(x_train, y_train)
Out[8]: LinearRegression()
In [9]:
#Printing the model coefficients
print('Intercept: ', slr.intercept_)
print('Coefficient:', slr.coef_)
Intercept: 14.462716405605931
Coefficient: [2.08367683]
In [10]:
print('Regression Equation: Sales = 14.46 + 2.08 * Adv_Exp')
Regression Equation: Sales = 14.46 + 2.08 * Adv_Exp
In [11]:
import statsmodels.api as sm
In [12]:
#fit linear regression model
model = sm.OLS(y, x).fit()
In [13]:
#view model summary
print(model.summary())
OLS Regression Results
=======================================================================================
Dep. Variable: Sales R-squared (uncentered): 0.990
Model: OLS Adj. R-squared (uncentered): 0.990
Method: Least Squares F-statistic: 1140.
Date: Thu, 06 Jul 2023 Prob (F-statistic): 1.84e-12
Time: 16:19:36 Log-Likelihood: -32.310
No. Observations: 12 AIC: 66.62
Df Residuals: 11 BIC: 67.11
Df Model: 1
Covariance Type: nonrobust
==============================================================================
coef std err t P>|t| [0.025 0.975]
------------------------------------------------------------------------------
Adv_Exp 3.2395 0.096 33.762 0.000 3.028 3.451
==============================================================================
Omnibus: 0.341 Durbin-Watson: 2.699
Prob(Omnibus): 0.843 Jarque-Bera (JB): 0.445
Skew: 0.288 Prob(JB): 0.801
Kurtosis: 2.253 Cond. No. 1.00
==============================================================================
Notes:
[1] R² is computed without centering (uncentered) since the model does not contain a constant.
[2] Standard Errors assume that the covariance matrix of the errors is correctly specified.
C:\Users\Satyajeet\anaconda3\lib\site-packages\scipy\stats\_stats_py.py:1736: UserWarning: kurtosistest only valid for n>=20 ... c
ontinuing anyway, n=12
warnings.warn("kurtosistest only valid for n>=20 ... continuing "
Multiple Linear Regression (MLR)
In [14]:
#Importing the libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
In [15]:
#Reading the dataset
dataset = pd.read_csv("https://raw.githubusercontent.com/Harshita0109/Sales-Prediction/master/advertising.csv")
In [16]:
dataset.head()
Out[16]: TV Radio Newspaper Sales
0 230.1 37.8 69.2 22.1
1 44.5 39.3 45.1 10.4
2 17.2 45.9 69.3 12.0
3 151.5 41.3 58.5 16.5
TV Radio Newspaper Sales
4 180.8 10.8 58.4 17.9
In [17]:
#Exploratory Data Analysis
#Distribution of the target variable
sns.distplot(dataset['Sales']);
C:\Users\Satyajeet\anaconda3\lib\site-packages\seaborn\distributions.py:2557: FutureWarning: `distplot` is a deprecated function a
nd will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexi
bility) or `histplot` (an axes-level function for histograms).
warnings.warn(msg, FutureWarning)
In [18]:
#Exploratory Data Analysis
#Distribution of the Independent variable(IV)
sns.distplot(dataset['TV']);
C:\Users\Satyajeet\anaconda3\lib\site-packages\seaborn\distributions.py:2557: FutureWarning: `distplot` is a deprecated function a
nd will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexi
bility) or `histplot` (an axes-level function for histograms).
warnings.warn(msg, FutureWarning)
In [19]:
#Exploratory Data Analysis
#Distribution of the Independent variable(IV)
sns.distplot(dataset['Radio']);
C:\Users\Satyajeet\anaconda3\lib\site-packages\seaborn\distributions.py:2557: FutureWarning: `distplot` is a deprecated function a
nd will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexi
bility) or `histplot` (an axes-level function for histograms).
warnings.warn(msg, FutureWarning)
In [20]:
#Exploratory Data Analysis
#Distribution of the Independent variable(IV)
sns.distplot(dataset['Newspaper']);
C:\Users\Satyajeet\anaconda3\lib\site-packages\seaborn\distributions.py:2557: FutureWarning: `distplot` is a deprecated function a
nd will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexi
bility) or `histplot` (an axes-level function for histograms).
warnings.warn(msg, FutureWarning)
In [21]:
#Heatmap
sns.heatmap(dataset.corr(), annot = True)
plt.show()
In [22]:
#Multiple Linear Regression(MLR)
#Equation: Sales = β0 + (β1 * TV) + (β2 * Radio) + (β3 * Newspaper)
#Setting the value for X and Y
x = dataset[['TV', 'Radio', 'Newspaper']]
y = dataset['Sales']
In [23]:
x_train, x_test, y_train, y_test= train_test_split(x, y, test_size= 0.3, random_state=100)
In [24]:
mlr= LinearRegression()
mlr.fit(x_train, y_train)
Out[24]: LinearRegression()
In [25]:
#Printing the model coefficients
print(mlr.intercept_)
# pair the feature names with the coefficients
list(zip(x, mlr.coef_))
4.334595861728431
Out[25]: [('TV', 0.053829108667250075),
('Radio', 0.11001224388558056),
('Newspaper', 0.006289950146130346)]
In [26]:
import statsmodels.api as sm
In [27]:
#fit linear regression model
model = sm.OLS(y, x).fit()
In [28]:
#view model summary
print(model.summary())
OLS Regression Results
=======================================================================================
Dep. Variable: Sales R-squared (uncentered): 0.977
Model: OLS Adj. R-squared (uncentered): 0.977
Method: Least Squares F-statistic: 2826.
Date: Thu, 06 Jul 2023 Prob (F-statistic): 1.35e-161
Time: 16:22:13 Log-Likelihood: -460.08
No. Observations: 200 AIC: 926.2
Df Residuals: 197 BIC: 936.1
Df Model: 3
Covariance Type: nonrobust
==============================================================================
coef std err t P>|t| [0.025 0.975]
------------------------------------------------------------------------------
TV 0.0671 0.002 42.078 0.000 0.064 0.070
Radio 0.1600 0.011 14.154 0.000 0.138 0.182
Newspaper 0.0284 0.008 3.545 0.000 0.013 0.044
==============================================================================
Omnibus: 0.114 Durbin-Watson: 1.949
Prob(Omnibus): 0.945 Jarque-Bera (JB): 0.025
Skew: 0.026 Prob(JB): 0.987
Kurtosis: 3.020 Cond. No. 12.6
==============================================================================
Notes:
[1] R² is computed without centering (uncentered) since the model does not contain a constant.
[2] Standard Errors assume that the covariance matrix of the errors is correctly specified.
In [ ]:
In [ ]: