PYTHON CODE
Importing Libraries
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
Loading dataset
data=pd.read_csv("Medical Inventory Optimaization Dataset.csv")
Information about Dataset
data.info()
Description of Dataset
data.describe()
Finding Measures of Central Tendency
Mean
data.mean()
Median
data.median()
Mode
Data.mode()
Measures of Dispersion
Variance
data.var()
Standard Deviation
column_name = 'Final_Cost'
# Calculate the standard deviation for the specified column
std_dev = data[column_name].std()
print("Standard Deviation of '{}' column: {}".format(column_name, std_dev))
column_name = 'Final_Sales'
# Calculate the standard deviation for the specified column
std_dev = data[column_name].std()
print("Standard Deviation of '{}' column: {}".format(column_name, std_dev))
Range
column_name = 'Final_Cost'
# Find the minimum and maximum values in the specified column
min_value = data[column_name].min()
max_value = data[column_name].max()
# Calculate the range
data_range = max_value - min_value
print("Range of '{}' column: {}".format(column_name, data_range))
column_name = 'Final_Sales'
# Find the minimum and maximum values in the specified column
min_value = data[column_name].min()
max_value = data[column_name].max()
# Calculate the range
data_range = max_value - min_value
print("Range of '{}' column: {}".format(column_name, data_range))
Skewness
column_name = 'Final_Cost'
# Calculate the skewness for the specified column
skewness = data[column_name].skew()
print("Skewness of '{}' column: {}".format(column_name, skewness))
column_name = 'Final_Sales'
# Calculate the skewness for the specified column
skewness = data[column_name].skew()
print("Skewness of '{}' column: {}".format(column_name, skewness))
Kurtosis
column_name = 'Final_Cost'
# Calculate the kurtosis for the specified column
kurtosis_val = data[column_name].kurtosis()
print("Kurtosis of '{}' column: {}".format(column_name, kurtosis_val))
column_name = 'Final_Sales'
# Calculate the kurtosis for the specified column
kurtosis_val = data[column_name].kurtosis()
print("Kurtosis of '{}' column: {}".format(column_name, kurtosis_val))
Visualization
Scatter Plot
# Assuming 'x_column' and 'y_column' are the names of the columns you want to visualize
x_column = 'Final_Cost'
y_column = 'Final_Sales'
# Create a scatter plot
plt.figure(figsize=(10, 6))
plt.scatter(data[x_column], data[y_column], alpha=0.5)
plt.title('Scatter plot of {} vs {}'.format(y_column, x_column))
plt.xlabel(x_column)
plt.ylabel(y_column)
plt.grid(True)
plt.show()
Histogram
for column in data.columns:
plt.figure(figsize=(8, 6))
plt.hist(data[column], bins=20, color='skyblue', edgecolor='black')
plt.title('Histogram of {}'.format(column))
plt.xlabel(column)
plt.ylabel('Frequency')
plt.grid(True)
plt.show()
Boxplot
column_name = 'Final_Cost'
# Create a boxplot
plt.figure(figsize=(10, 6))
data.boxplot(column=[column_name])
plt.title('Boxplot of {}'.format(column_name))
plt.ylabel(column_name)
plt.grid(True)
plt.show()
Correlation HeatMap
corr_matrix = data.corr()
# Create a heatmap
plt.figure(figsize=(10, 8))
sns.heatmap(corr_matrix, annot=True, cmap='coolwarm', fmt=".2f", linewidths=0.5)
plt.title('Correlation Heatmap')
plt.show()
Finding Missing Values
missing_values = data.isnull().sum()
print("Missing values:\n", missing_values)
Handling Missing values
data.dropna(inplace=True)