Thanks to visit codestin.com
Credit goes to www.scribd.com

0% found this document useful (0 votes)
5 views5 pages

Python Programs

Uploaded by

vidhyamailto30
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
5 views5 pages

Python Programs

Uploaded by

vidhyamailto30
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
You are on page 1/ 5

Ex 5:

import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression, LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, classification_report, confusion_matrix
from sklearn.preprocessing import StandardScaler

# Load the Pima Indians Diabetes dataset from UCI (CSV)


url =
"https://raw.githubusercontent.com/jbrownlee/Datasets/master/pima-indians-diabetes.data.csv"
columns = ['Pregnancies', 'Glucose', 'BloodPressure', 'SkinThickness', 'Insulin', 'BMI',
'DiabetesPedigreeFunction', 'Age', 'Outcome']
data = pd.read_csv(url, header=None, names=columns)

# Univariate Analysis: Descriptive Statistics


print("Descriptive Statistics:\n", data.describe())

# Univariate Analysis: Skewness, Kurtosis, and Mode


print("\nSkewness:\n", data.skew())
print("\nKurtosis:\n", data.kurt())
print("\nMode:\n", data.mode().iloc[0])

# Bivariate Analysis: Linear Regression (Predict 'Glucose')


X = data[['Pregnancies', 'BloodPressure', 'SkinThickness', 'Insulin', 'BMI',
'DiabetesPedigreeFunction', 'Age']]
y = data['Glucose']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
lin_reg = LinearRegression()
lin_reg.fit(X_train, y_train)
y_pred = lin_reg.predict(X_test)
print("\nLinear Regression MSE:", mean_squared_error(y_test, y_pred))

# Bivariate Analysis: Logistic Regression (Predict 'Outcome')


X = data[['Pregnancies', 'Glucose', 'BloodPressure', 'SkinThickness', 'Insulin', 'BMI',
'DiabetesPedigreeFunction', 'Age']]
y = data['Outcome']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
log_reg = LogisticRegression(max_iter=10000)
log_reg.fit(X_train, y_train)
y_pred_class = log_reg.predict(X_test)
print("\nLogistic Regression Classification Report:\n", classification_report(y_test,
y_pred_class))
print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred_class))

# Multiple Regression: Predict 'Outcome' (Using all features for prediction)


X = data.drop(columns=['Outcome'])
y = data['Outcome']
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
lin_reg_multiple = LinearRegression()
lin_reg_multiple.fit(X_scaled, y)
print("\nMultiple Regression Coefficients:\n", lin_reg_multiple.coef_)

# Visualizations: Histograms for Univariate Analysis


data.hist(figsize=(10, 8))
plt.tight_layout()
plt.show()

Output:
Descriptive Statistics:
Pregnancies Glucose BloodPressure SkinThickness Insulin BMI
DiabetesPedigreeFunction Age Outcome
count 768.000000 768.000000 768.000000 768.000000 768.000000 768.000000
768.000000 768.000000 768.000000
mean 3.845052 120.894531 69.105469 20.536458 79.799479 31.992578
0.471876 33.240885 0.348958
std 3.369578 31.972618 19.355807 11.211053 115.248711 7.884160
0.331722 11.760232 0.476951
min 0.000000 0.000000 0.000000 0.000000 0.000000 18.200000
0.078000 21.000000 0.000000
25% 1.000000 99.000000 62.000000 0.000000 0.000000 27.300000
0.243000 24.000000 0.000000
50% 3.000000 117.000000 72.000000 23.000000 30.500000 32.000000
0.372000 29.000000 0.000000
75% 6.000000 140.000000 80.000000 32.000000 127.250000 36.600000
0.626000 41.000000 1.000000
max 17.000000 199.000000 122.000000 99.000000 846.000000 67.100000
2.420000 81.000000 1.000000

Skewness:
Pregnancies 0.900848
Glucose 0.172272
BloodPressure 0.169831
SkinThickness 1.214581
Insulin 2.264120
BMI 0.521497
DiabetesPedigreeFunction 1.105264
Age 0.560308
Outcome 0.639479
dtype: float64

Kurtosis:
Pregnancies 2.116012
Glucose -0.010561
BloodPressure -0.602501
SkinThickness 1.440893
Insulin 6.476368
BMI -0.118526
DiabetesPedigreeFunction 1.420690
Age -0.104247
Outcome -1.830631
dtype: float64

Ex 6:
# Import necessary libraries
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from scipy.stats import norm
from mpl_toolkits.mplot3d import Axes3D

# Load the Pima Indians Diabetes dataset from UCI (CSV)


url =
"https://raw.githubusercontent.com/jbrownlee/Datasets/master/pima-indians-diabetes.data.csv"
columns = ['Pregnancies', 'Glucose', 'BloodPressure', 'SkinThickness', 'Insulin', 'BMI',
'DiabetesPedigreeFunction', 'Age', 'Outcome']
data = pd.read_csv(url, header=None, names=columns)
# 1. Normal Curves (for a selected feature, e.g., 'Glucose')
plt.figure(figsize=(8, 6))
sns.histplot(data['Glucose'], kde=True, stat='density', linewidth=2)
plt.title("Normal Curve for Glucose")
plt.xlabel("Glucose Level")
plt.ylabel("Density")
plt.show()

# 2. Density and Contour Plots


# Plotting density for two features, e.g., 'Glucose' and 'BMI'
plt.figure(figsize=(8, 6))
sns.kdeplot(x=data['Glucose'], y=data['BMI'], cmap='Blues', shade=True, shade_lowest=False)
plt.title("Density Plot between Glucose and BMI")
plt.xlabel("Glucose Level")
plt.ylabel("BMI")
plt.show()

# 3. Correlation and Scatter Plots


# Scatter plot and correlation heatmap
plt.figure(figsize=(8, 6))
sns.scatterplot(x=data['Glucose'], y=data['BMI'], hue=data['Outcome'], palette='coolwarm')
plt.title("Scatter Plot of Glucose vs BMI")
plt.xlabel("Glucose Level")
plt.ylabel("BMI")
plt.show()

# Correlation heatmap
corr = data.corr()
plt.figure(figsize=(8, 6))
sns.heatmap(corr, annot=True, cmap='coolwarm', fmt='.2f', linewidths=0.5)
plt.title("Correlation Heatmap")
plt.show()

# 4. Histograms (for all features)


data.hist(figsize=(10, 8), bins=20)
plt.tight_layout()
plt.suptitle("Histograms of Features", fontsize=16)
plt.show()

# 5. Three Dimensional Plotting (for three features, e.g., 'Glucose', 'BMI', and 'Age')
fig = plt.figure(figsize=(10, 8))
ax = fig.add_subplot(111, projection='3d')

ax.scatter(data['Glucose'], data['BMI'], data['Age'], c=data['Outcome'], cmap='coolwarm')


ax.set_xlabel('Glucose')
ax.set_ylabel('BMI')
ax.set_zlabel('Age')
ax.set_title('3D Scatter Plot of Glucose, BMI, and Age')
plt.show()

Output:

Ex 7:

Use from manual

You might also like