import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
data_no_outliers = pd.read_csv(r'creditcard.csv')
# Define the target and features
X = data_no_outliers.drop(columns=['Time']) # Features
y = data_no_outliers['Time'] # Target variable
# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y,
test_size=0.2, random_state=42)
# Initialize and fit the multiple linear regression model
model = LinearRegression()
model.fit(X_train, y_train)
# Make predictions on the test set
y_pred = model.predict(X_test)
# Calculate model performance metrics
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
mse, r2
(1201653291.0275302, 0.467113975083428)
# Define the target and features
X = data_no_outliers.drop(columns=['Time']) # Features
y = data_no_outliers['Time'] # Target variable
X
y
0 0.0
1 0.0
2 1.0
3 1.0
4 2.0
...
284802 172786.0
284803 172787.0
284804 172788.0
284805 172788.0
284806 172792.0
Name: Time, Length: 284807, dtype: float64
# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y,
test_size=0.2, random_state=42)
X_train
y_train
X_test
y_test
43428 41505.0
49906 44261.0
29474 35484.0
276481 167123.0
278846 168473.0
...
75723 56223.0
252263 155726.0
221246 142491.0
81910 59157.0
59490 48864.0
Name: Time, Length: 56962, dtype: float64
# Initialize and fit the multiple linear regression model
model = LinearRegression()
model.fit(X_train, y_train)
model
LinearRegression()
# Make predictions on the test set
y_pred = model.predict(X_test)
y_pred
array([ 71280.97126156, 90283.36672967, 81321.57754383, ...,
194003.70265184, 69533.54343132, 104049.50409021])
# Calculate model performance metrics
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
mse, r2
(1201653291.0275302, 0.467113975083428)
from sklearn import metrics
print("Mean Absolute Error : ",
metrics.mean_absolute_error(y_test,y_pred))
Mean Absolute Error : 27076.807030947213
print("Root Mean Squared Error : ",
metrics.mean_squared_error(y_test,y_pred))
Root Mean Squared Error : 1201653291.0275302