import pandas as pd
import numpy as np
import time
from sklearn.linear_model import Lasso
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
# Read the data from Excel file inputfornonsuperplasticizer
data = pd.read_excel('input_file.xlsx', skiprows=1)
# Extract the feature matrix (X) and target variable (y)
X = data.iloc[:, :32] # Assuming the features are in the first (n-1) columns
y = data.iloc[:, 32] # Assuming the target variable is in the last column
X.columns = X.columns.astype(str)
# Replace NaN values with mean of respective columns
X = X.fillna(X.mean())
y = y.fillna(y.mean())
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_test, y_test, test_size=0.5, random_state=42)
# Create an instance of MultiTaskLasso regression
lasso = Lasso(alpha=0.1) # alpha is the regularization parameter
# Fit the model on the training data
start_time = time.time()
lasso.fit(X_train, y_train)
elapsed_time = time.time() - start_time
# Predict on the testing data
y_pred = lasso.predict(X_test)
# Make predictions on the validation set
y_val_pred = lasso.predict(X_val)
# Evaluate the model on the validation set
mse_val = mean_squared_error(y_val, y_val_pred)
r2_val = lasso.score(X_val, y_val)
print("Validation Mean Squared Error:", mse_val)
print("Validation R-squared Score:", r2_val)
writer =pd.ExcelWriter('output_file.xlsx', engine='xlsxwriter')
pred_df = pd.DataFrame(y_pred)
actuals_df =pd.DataFrame(y_test)
pred_df.to_excel(writer, sheet_name='prediction', index=False)
actuals_df.to_excel(writer, sheet_name='actuals', index=False)
writer.save
# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
r2 = lasso.score(X_test, y_test)
print("Mean Squared Error:", mse)
print("R-squared Score:", r2)
print("Time elapsed:", elapsed_time, "seconds")
# Get the coefficients
coefficients = lasso.coef_
roundedcoefficient_matrix = np.round(coefficients, decimals=4)
#print("Coefficients:", roundedcoefficient_matrix)
# Calculate the absolute values of the coefficients
abs_coefficients = np.abs(coefficients)
-3