SALES FORECASTING USING WALMART DATASET
from matplotlib import pyplot as plt
import matplotlib.ticker as mtick
import pandas as pd
import seaborn as sns
df = pd.read_csv("C:/Users/Bobby/Downloads/Walmart.csv")
#print(df.info())
#print(df.describe())
#print(df.isna().sum())
Temp = df['Temperature'][:300]
sales = df['Weekly_Sales'][:300]
fuel = df["Fuel_Price"][:300]
fig, ax = plt.subplots()
plt.hist(df['Weekly_Sales'], bins = 50, edgecolor ='black', label='Weekly Sales',
histtype='stepfilled', align='mid', bottom=True)
plt.legend(loc = "upper right")
ax.set_title("Histogram of weekly sales:")
ax.set_xlabel("Sales")
ax.set_ylabel("Freq")
plt.figure()
plt.subplot()
sns.scatterplot(data=df, x=Temp, y=sales, markers=['o','*'], hue=fuel)
plt.title('Scatter Plot: Temperature vs. Weekly Sales')
corr_metrix = df.corr()
plt.figure()
df['Date'] = pd.to_datetime(df['Date'])
# Set 'Date' as the index for time series analysis
df.set_index('Date', inplace=True)
# Generate a numerical representation of the date (e.g., days since the start)
df['Numeric_Date'] = (df.index - df.index.min()).days
# Calculate correlation matrix
correlation_matrix = df.corr()
# Plot heatmap
sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', fmt=".2f", cbar=True)
plt.title('Correlation Matrix')
plt.figure()
sns.countplot(x = df['Holiday_Flag'][:300], data=df)
plt.title('Count of Holidays')
plt.xlabel('Holiday Flag')
plt.ylabel('Count')
plt.figure()
plt.plot(df['Weekly_Sales'][:300], scalex=True, scaley=True)
plt.title('Weekly Sales Over Time')
plt.xlabel('Date')
plt.ylabel('Weekly Sales')
plt.figure()
df = df.groupby('Date')['Weekly_Sales'].sum().reset_index()
# Set 'Date' as the index for time series analysis
df.set_index('Date', inplace=True)
# Convert Weekly_Sales to millions for better visualization
df['Weekly_Sales_Millions'] = df['Weekly_Sales'][:300] / 100000
# Create the box plot
sns.boxplot(x='Weekly_Sales_Millions', data=df)
plt.title('Box Plot: Weekly Sales (in Millions)')
plt.xlabel('Weekly Sales')
plt.ylabel('Sales (Millions)')
# Format y-axis labels to display in millions
plt.gca().yaxis.set_major_formatter(mtick.StrMethodFormatter('${x:,.0f}M'))
plt.show()