import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
# Load your data
df = pd.read_csv(r'D:\pythonProject1\venv\Rain_imd\IMD_DataSET\abu_box.csv')
# Remove extra spaces in column names (if any)
df.columns = df.columns.str.strip()
# Convert 'Date' to datetime format
df["Date"] = pd.to_datetime(df["Date"], dayfirst=False, errors="coerce") # Change
dayfirst=True if format is DD/MM/YYYY
# Check for NaT (Not a Time) values
if df["Date"].isna().sum() > 0:
print("Warning: Some dates could not be parsed!")
# Extract month and year from the 'Date' column
df["Month_Num"] = df["Date"].dt.month
df["Year"] = df["Date"].dt.year
# Filter the data for Monsoon months (June to October)
monsoon_df = df[(df["Month_Num"] >= 6) & (df["Month_Num"] <= 10)]
# Set plot style
sns.set(style="whitegrid", font_scale=1.2)
# Define correct column names (adjust as necessary based on column names)
variables = ["Prp (mm)", "tmin (celcius)", "tmax (celcius)", "Flow (cumecs)"]
titles = ["Precipitation (mm)", "Min Temperature (°C)", "Max Temperature (°C)",
"Flow (cumecs)"]
# Directory to save plots (make sure the directory exists)
output_directory = r"D:\pythonProject1\venv\Rain_imd\IMD_DataSET\Monsoon_Box_Plots"
# Create box plots for each month (June to October)
for month in range(6, 11): # For months 6 to 10 (June to October)
month_name = pd.to_datetime(f"2020-{month:02d}-01").strftime("%b") # Get the
month name (e.g., Jun, Jul, etc.)
for var, title in zip(variables, titles):
plt.figure(figsize=(10, 6)) # Create a new figure for each plot
month_data = monsoon_df[monsoon_df["Month_Num"] == month]
sns.boxplot(x="Year", y=var, data=month_data, palette="coolwarm")
plt.title(f"{title} - {month_name} (Year-wise)")
plt.xlabel("Year")
plt.ylabel(title)
plt.xticks(rotation=90) # Rotate x-axis labels for better readability
# Save the plot as a PNG file
plot_filename = f"{month_name}_{var.replace(' ', '_').replace('(',
'').replace(')', '')}_box_plot.png"
plt.tight_layout()
plt.savefig(f"{output_directory}\\{plot_filename}")
plt.close() # Close the plot to avoid display, since it's saved