import pandas as pd
from openpyxl import Workbook
from openpyxl.utils.dataframe import dataframe_to_rows
from openpyxl.styles import Font
from collections import Counter
import os
# --- Load the file ---
df = pd.read_excel("sample.xlsx")
# Keep a clean copy for Full_Statement
df_cleaned_original = df.copy()
# --- Step 1: Preprocess for Monthly Summary ---
df.columns = df.columns.str.strip().str.upper()
df = df.rename(columns={"TRANS DATE": "DATE"})
df['DATE'] = pd.to_datetime(df['DATE'], errors='coerce')
df = df.dropna(subset=['DATE'])
# Normalize
df['DESCRIPTION'] = df['DESCRIPTION'].astype(str).str.upper()
df['CREDITS'] = pd.to_numeric(df.get('CREDITS', 0), errors='coerce').fillna(0)
df['DEBITS'] = pd.to_numeric(df.get('DEBITS', 0), errors='coerce').fillna(0)
df['BALANCE'] = pd.to_numeric(df.get('BALANCE', 0), errors='coerce')
df['DAY'] = df['DATE'].dt.day
df['MONTH'] = df['DATE'].dt.month
df['YEAR'] = df['DATE'].dt.year
df['IS_IWRETURN'] = df['DESCRIPTION'].str.contains('IWRETURN', na=False)
df['IS_OWRETURN'] = df['DESCRIPTION'].str.contains('OWRETURN', na=False)
df['IS_NON_BUSINESS'] = ~df['DESCRIPTION'].str.contains('SALARY|NEFT|RTGS|UPI|
IMPS', na=False)
df['IS_EMI_BOUNCE'] = df['DESCRIPTION'].str.contains('EMI BOUNCE|BOUNCE', na=False)
df['IS_ODCC_INTEREST'] = df['DESCRIPTION'].str.contains('OD|CC INTEREST', na=False)
# Daily last balance
daily_last_bal = df.sort_values('DATE').groupby(df['DATE'].dt.date)
['BALANCE'].last().reset_index()
daily_last_bal.columns = ['Date', 'Closing Balance']
# Monthly summary
summary = df.groupby(['YEAR', 'MONTH']).agg(
SumOfCredit=('CREDITS', 'sum'),
SumOfDebit=('DEBITS', 'sum'),
NoOfCredit=('CREDITS', lambda x: (x > 0).sum()),
NoOfDebit=('DEBITS', lambda x: (x > 0).sum()),
IWReturns=('IS_IWRETURN', 'sum'),
OWReturns=('IS_OWRETURN', 'sum'),
NonBusinessCreditsSUM=('CREDITS', lambda x: (df.loc[x.index, 'IS_NON_BUSINESS']
* x).sum()),
NonBusinessCreditsInNo=('IS_NON_BUSINESS', 'sum'),
NoOfEMIBounces=('IS_EMI_BOUNCE', 'sum'),
MonthlyODCCInterest=('CREDITS', lambda x: (df.loc[x.index, 'IS_ODCC_INTEREST']
* x).sum())
).reset_index()
# Add daily last balances (columns 01–31)
for day in range(1, 32):
summary[f"{day:02}"] = summary.apply(
lambda row: df.loc[
(df['YEAR'] == row['YEAR']) & (df['MONTH'] == row['MONTH']) &
(df['DAY'] == day),
'BALANCE'
].iloc[-1] if not df.loc[
(df['YEAR'] == row['YEAR']) & (df['MONTH'] == row['MONTH']) &
(df['DAY'] == day)
].empty else 0,
axis=1
)
# --- Step 2: Prepare Full_Statement ---
drop_cols = ['DAY', 'MONTH', 'YEAR', 'IS_IWRETURN', 'IS_OWRETURN',
'IS_NON_BUSINESS', 'IS_EMI_BOUNCE', 'IS_ODCC_INTEREST']
df_cleaned = df.drop(columns=drop_cols, errors='ignore')
# --- Step 3: Create Top Transactions Sheet ---
df_top = df_cleaned_original.copy()
df_top.columns = df_top.columns.str.strip().str.upper()
df_top['CREDITS'] = pd.to_numeric(df_top.get('CREDITS', 0),
errors='coerce').fillna(0)
df_top['DEBITS'] = pd.to_numeric(df_top.get('DEBITS', 0),
errors='coerce').fillna(0)
df_top['TRANS DATE'] = pd.to_datetime(df_top['TRANS DATE'], errors='coerce')
df_top = df_top.dropna(subset=['TRANS DATE'])
# Classification logic
def classify_transaction(desc):
desc = str(desc).upper()
if 'NEFT' in desc:
return 'NEFT'
elif 'RTGS' in desc:
return 'RTGS'
elif 'UPI' in desc:
return 'UPI'
elif 'IMPS' in desc:
return 'IMPS'
elif 'CLG' in desc or 'CHEQUE' in desc:
return 'Cheque Outward'
elif 'SELF' in desc or 'CASH' in desc:
return 'Cash Withdrawal'
else:
return 'Other'
# Overall Top 5
overall_credits = df_top[df_top['CREDITS'] > 0].nlargest(5, 'CREDITS').copy()
overall_debits = df_top[df_top['DEBITS'] > 0].nlargest(5, 'DEBITS').copy()
for df_txn, col in [(overall_credits, 'CREDITS'), (overall_debits, 'DEBITS')]:
df_txn['AMOUNT'] = df_txn[col]
df_txn['Classification Transaction'] =
df_txn['DESCRIPTION'].apply(classify_transaction)
df_txn['TRANS DATE'] = df_txn['TRANS DATE'].dt.strftime('%d-%m-%Y')
credit_table = overall_credits[['TRANS DATE', 'DESCRIPTION', 'AMOUNT',
'Classification Transaction']]
debit_table = overall_debits[['TRANS DATE', 'DESCRIPTION', 'AMOUNT',
'Classification Transaction']]
credit_table.columns = debit_table.columns = ['Extracted Date', 'Description',
'Amount', 'Classification Transaction']
# --- Monthly Top 5 ---
df_top['Month_Str'] = df_top['TRANS DATE'].dt.strftime('%B-%Y')
monthly_credits = df_top[df_top['CREDITS'] > 0].copy()
monthly_debits = df_top[df_top['DEBITS'] > 0].copy()
monthly_credit_dict = {}
monthly_debit_dict = {}
for month in sorted(df_top['Month_Str'].unique()):
top_5_c = monthly_credits[monthly_credits['Month_Str'] == month].nlargest(5,
'CREDITS')
top_5_d = monthly_debits[monthly_debits['Month_Str'] == month].nlargest(5,
'DEBITS')
credit_rows = []
for _, row in top_5_c.iterrows():
credit_rows.append([
row['TRANS DATE'].strftime('%d-%m-%Y'),
row['DESCRIPTION'],
row['CREDITS'],
classify_transaction(row['DESCRIPTION'])
])
monthly_credit_dict[month] = credit_rows
debit_rows = []
for _, row in top_5_d.iterrows():
debit_rows.append([
row['TRANS DATE'].strftime('%d-%m-%Y'),
row['DESCRIPTION'],
row['DEBITS'],
classify_transaction(row['DESCRIPTION'])
])
monthly_debit_dict[month] = debit_rows
# --- Step 4: Save to Excel ---
wb = Workbook()
# Monthly_Summary
ws1 = wb.active
ws1.title = "Monthly_Summary"
for r in dataframe_to_rows(summary, index=False, header=True):
ws1.append(r)
# Full_Statement
ws2 = wb.create_sheet("Full_Statement")
for r in dataframe_to_rows(df_cleaned_original, index=False, header=True):
ws2.append(r)
# Top_Transactions
ws3 = wb.create_sheet("Top_Transactions")
# Section 1: Overall Top 5
ws3.append(["Top 5 Transactions Credit", "", "", "", "", "Top 5 Transactions
Debit", "", "", ""])
ws3.append([])
ws3.append(["Extracted Date", "Description", "Amount", "Classification
Transaction", "",
"Extracted Date", "Description", "Amount", "Classification
Transaction"])
for row_c, row_d in zip(dataframe_to_rows(credit_table, index=False, header=False),
dataframe_to_rows(debit_table, index=False, header=False)):
ws3.append(row_c + [""] + row_d)
# Section 2: Monthly Top 5
ws3.append([])
ws3.append([])
ws3.append(["Top 5 Transactions Credits Monthly", "", "", "", "", "Top 5
Transactions Debits Monthly", "", "", ""])
for month in monthly_credit_dict.keys():
# Add month headers
ws3.append([month, "", "", "", "", month])
ws3.append(["Extracted Date", "Description", "Amount", "Classification
Transaction", "",
"Extracted Date", "Description", "Amount", "Classification
Transaction"])
credit_rows = monthly_credit_dict[month]
debit_rows = monthly_debit_dict.get(month, [])
max_len = max(len(credit_rows), len(debit_rows))
for i in range(max_len):
c_row = credit_rows[i] if i < len(credit_rows) else [""] * 4
d_row = debit_rows[i] if i < len(debit_rows) else [""] * 4
ws3.append(c_row + [""] + d_row)
ws3.append([]) # spacing
# Daily_Balance Sheet (Sheet 5)
# --- Sheet 4: Daily_Balance (Correct Format) ---
ws4 = wb.create_sheet("Daily_Balance")
# Prepare day-wise balance data
day_cols = [f"{day:02}" for day in range(1, 32)]
summary['Month_Label'] = pd.to_datetime(summary[['YEAR',
'MONTH']].assign(DAY=1)).dt.strftime('%B-%Y')
# Prepare daily balance summary table
daily_bal_summary = summary[['Month_Label'] + day_cols].copy()
# Add an average row at the end
avg_row = ['Average'] + [round(daily_bal_summary[day].mean(), 2) for day in
day_cols]
daily_bal_summary.loc[len(daily_bal_summary)] = avg_row
# Write headers and rows to Sheet 4
ws4.append(['Month'] + day_cols)
for _, row in daily_bal_summary.iterrows():
ws4.append([row['Month_Label']] + list(row[day_cols]))
# --- Step 5: Fraud_Transactions Detection ---
df_fraud = df_cleaned_original.copy()
df_fraud.columns = df_fraud.columns.str.strip().str.upper()
# Ensure numeric types
df_fraud['CREDITS'] = pd.to_numeric(df_fraud.get('CREDITS', 0),
errors='coerce').fillna(0)
df_fraud['DEBITS'] = pd.to_numeric(df_fraud.get('DEBITS', 0),
errors='coerce').fillna(0)
df_fraud['BALANCE'] = pd.to_numeric(df_fraud.get('BALANCE', 0), errors='coerce')
# Parse transaction date and sort
df_fraud['TRANS DATE'] = pd.to_datetime(df_fraud['TRANS DATE'], errors='coerce')
df_fraud = df_fraud.sort_values('TRANS DATE').reset_index(drop=True)
# Load holiday list
holiday_dates = pd.to_datetime([
"2023-12-04", "2023-12-12", "2023-12-13", "2023-12-14", "2023-12-18",
"2023-12-19", "2023-12-25", "2023-12-26", "2023-12-30", "2024-01-26",
"2024-02-19", "2024-03-29", "2024-04-01", "2024-04-17", "2024-05-01",
"2024-06-06", "2024-06-07", "2024-06-11", "2024-06-15"
])
# --- STEP 1: Calculate expected balance from FIRST row ---
expected_balances = [df_fraud.loc[0, 'BALANCE']]
mismatch_flags = [False] # First row has no mismatch
for i in range(1, len(df_fraud)):
prev_balance = expected_balances[-1]
credit = df_fraud.loc[i, 'CREDITS']
debit = df_fraud.loc[i, 'DEBITS']
actual_balance = df_fraud.loc[i, 'BALANCE']
expected = prev_balance + credit - debit
expected_balances.append(expected)
mismatch = abs(expected - actual_balance) > 1.0
mismatch_flags.append(mismatch)
df_fraud['Expected_Balance'] = expected_balances
df_fraud['Mismatch'] = mismatch_flags
# --- STEP 2: Flag suspicious holiday debits (excluding UPI) ---
df_fraud['DESCRIPTION_LOWER'] = df_fraud['DESCRIPTION'].str.lower().fillna('')
df_fraud['IsUPI'] = df_fraud['DESCRIPTION_LOWER'].str.contains('upi|unified
payment')
df_fraud['IsHolidayWithdrawal'] = (
df_fraud['TRANS DATE'].isin(holiday_dates) &
(df_fraud['DEBITS'] > 0) &
(~df_fraud['IsUPI']) &
df_fraud['Mismatch']
)
# --- STEP 3: Filter fraud transactions ---
fraud_rows = df_fraud[df_fraud['Mismatch'] | df_fraud['IsHolidayWithdrawal']]
# --- STEP 4: Prepare output ---
fraud_output = fraud_rows[['TRANS DATE', 'DESCRIPTION', 'CREDITS', 'DEBITS',
'BALANCE']].copy()
fraud_output.rename(columns={
'TRANS DATE': 'Extracted Date',
'CREDITS': 'Credit',
'DEBITS': 'Debit'
}, inplace=True)
# --- STEP 5: Classify Transaction Type ---
def classify_transaction(description):
if not isinstance(description, str):
return "Unknown"
desc = description.lower()
if "upi" in desc:
return "UPI"
elif "credit" in desc or "cr" in desc:
return "Credit"
elif "debit" in desc or "withdrawal" in desc or "dr" in desc:
return "Debit"
return "Unknown"
fraud_output['Party Name'] = ""
fraud_output['Transaction Type'] =
fraud_output['DESCRIPTION'].apply(classify_transaction)
# --- STEP 6: Write to Excel ---
ws5 = wb.create_sheet("Fraud_Transactions")
ws5.append(fraud_output.columns.tolist())
for row in fraud_output.itertuples(index=False):
try:
ws5.append(list(row))
except:
ws5.append(["Invalid data in row"])
# --- Step 6: Fraud_Indicators Sheet ---
fraud_indicator_rows = [
["Amount Balance Mismatch", "Transaction whose amount/balance do not match with
previous transactions."],
["Irregular Interest Charges", "Interest Charges which are not in all months
within narrow date range."],
["Irregular Transfers to Parties", "Transactions categorised as Fund Transfers
which are not in all months within a narrow date range."],
["Irregular Salary Credits", "Salary credits which are not in every month
within a narrow date range."],
["Suspicious ATM Withdrawals", "ATM Withdrawals having amount not being
multiple of 100 or outside of permissible range."],
["Transactions on Bank Holidays", "NEFT, RTGS and Cheque Deposit transactions
cannot happen on Bank Holidays."],
["Suspicious RTGS Transactions", "RTGS transactions have a minimum amount limit
as prescribed by RBI."],
["Suspicious Salary Credits", "Salary Credit on Bank Holidays."],
["Salary Credit Amount Remains Unchanged over extended period", "Salary amount
usually changes over time, particularly during tail end of financial year, due to
change in TDS amount."],
["Round Figure Tax Payment", "Tax paid amounts are usually not in round figures
(multiple of 100)."],
["Negative EOD Balance", "EOD bank balance on any day is unlikely to be
negative in Savings Account."],
["Interest Credit Transactions", "Interest Credit transactions should be
periodic (monthly/quarterly/half yearly)."],
["More and frequent Cash Deposit than Salary", "Higher number or more cash
deposit than salary is highly unlikely."],
["Immediate big transactions after Salary Credits", "Withdrawal of big amount
after salary credit is due to forged salary entries."],
["Equal Credit Debit", "Total amount of credit/debit or total number of
credit/debit cannot be same. Both scenarios are extremely unlikely."],
]
# Create the new Fraud_Indicators sheet
# --- Step 6: Fraud_Indicators Sheet with Detailed Format ---
ws6 = wb.create_sheet("Fraud_Indicators")
# Updated Header
ws6.append(["Sr No", "Fraud Indicator", "Description", "Identified ?", "Transaction
Count"])
# Add rows with Sr No and placeholders
for i, row in enumerate(fraud_indicator_rows, start=1):
ws6.append([i, row[0], row[1], "No", 0])
# --- Step 7: Irregular_Transactions Sheet ---
# Step 1: Determine threshold for large transactions (95th percentile)
debit_threshold = df['DEBITS'].quantile(0.95)
credit_threshold = df['CREDITS'].quantile(0.95)
# Step 2: Count occurrence of each description
desc_counts = df['DESCRIPTION'].value_counts()
# Step 3: Mark irregular if:
# - High amount or
# - Rare description
df['IS_IRREGULAR'] = (
(df['DEBITS'] > debit_threshold) |
(df['CREDITS'] > credit_threshold) |
(df['DESCRIPTION'].apply(lambda x: desc_counts.get(x, 0)) <= 1)
)
# Step 4: Extract irregular transactions
irregular_df = df[df['IS_IRREGULAR']].copy()
irregular_df = irregular_df[['DATE', 'DESCRIPTION', 'DEBITS', 'CREDITS',
'BALANCE']]
irregular_df.columns = ['Date', 'Description', 'Debit', 'Credit', 'Balance']
irregular_df = irregular_df.sort_values('Date')
# Step 5: Write to new sheet
ws7 = wb.create_sheet("Irregular_Transactions")
ws7.append(irregular_df.columns.tolist())
for row in irregular_df.itertuples(index=False):
ws7.append(list(row))
#Ensure column names are consistent and datetime is parsed
df.columns = df.columns.str.strip().str.upper()
if 'DATE' in df.columns:
df['DATE'] = pd.to_datetime(df['DATE'], errors='coerce')
else:
raise KeyError("Expected column 'DATE' not found in DataFrame.")
# Create YearMonth column for grouping
df['YEARMONTH'] = df['DATE'].dt.to_period("M")
# Ensure DEBITS and CREDITS are numeric
for col in ['DEBITS', 'CREDITS']:
if col in df.columns:
df[col] = pd.to_numeric(df[col], errors='coerce').fillna(0)
else:
df[col] = 0.0
# Optional classifier (customize based on your keywords)
def classify_transaction(desc):
desc = str(desc).lower()
if any(word in desc for word in ['atm', 'withdraw', 'debit']):
return 'Use'
elif any(word in desc for word in ['salary', 'refund', 'credit', 'transfer']):
return 'Source'
return 'Other'
# Function to extract top uses and sources
def get_top_usage_source(df_sub):
uses_data, sources_data = [], []
grouped = df_sub.groupby("DESCRIPTION")
for desc, group in grouped:
total_debit = group["DEBITS"].sum()
total_credit = group["CREDITS"].sum()
combined_amounts = pd.concat([group["DEBITS"], group["CREDITS"]]).round(2)
common_amounts = Counter(combined_amounts.dropna()).most_common(1)
most_common_amt, count_common = common_amounts[0] if common_amounts else
(0, 0)
total_count = len(group)
classification = classify_transaction(desc)
row = {
"Description": desc,
"Total Sum of Transaction": round(total_debit + total_credit, 2),
"Total Count": total_count,
"Similar Transaction Amount": round(most_common_amt, 2),
"Count of Transaction": count_common,
"Classification Transaction": classification
}
if total_debit > total_credit:
uses_data.append(row)
elif total_credit > 0:
sources_data.append(row)
uses_df = pd.DataFrame(uses_data)
sources_df = pd.DataFrame(sources_data)
if not uses_df.empty:
uses_df = uses_df.sort_values("Total Sum of Transaction",
ascending=False).head(10)
if not sources_df.empty:
sources_df = sources_df.sort_values("Total Sum of Transaction",
ascending=False).head(10)
return uses_df, sources_df
# If you already have a workbook, use that, else create new one
# Replace this if 'wb' already exists in your main code
try:
wb
except NameError:
wb = Workbook()
ws8 = wb.create_sheet("Top_Uses_and_Sources")
ws8.append(["Top 10 Uses (Overall)"] + [""] * 5 + ["Top 10 Sources (Overall)"])
# Overall data
overall_uses_df, overall_sources_df = get_top_usage_source(df)
ws8.append(list(overall_uses_df.columns) + [""] + list(overall_sources_df.columns))
for i in range(max(len(overall_uses_df), len(overall_sources_df))):
use_row = list(overall_uses_df.iloc[i]) if i < len(overall_uses_df) else [""] *
len(overall_uses_df.columns)
source_row = list(overall_sources_df.iloc[i]) if i < len(overall_sources_df)
else [""] * len(overall_sources_df.columns)
ws8.append(use_row + [""] + source_row)
# Monthly breakdown
ws8.append([]); ws8.append(["Top 10 Uses and Sources Per Month"])
for month, group in df.groupby("YEARMONTH"):
ws8.append([]); ws8.append([f"Month: {month}"])
monthly_uses_df, monthly_sources_df = get_top_usage_source(group)
ws8.append(["Top 10 Uses"] + [""] * 5 + ["Top 10 Sources"])
ws8.append(list(monthly_uses_df.columns) + [""] +
list(monthly_sources_df.columns))
for i in range(max(len(monthly_uses_df), len(monthly_sources_df))):
use_row = list(monthly_uses_df.iloc[i]) if i < len(monthly_uses_df) else
[""] * len(monthly_uses_df.columns)
source_row = list(monthly_sources_df.iloc[i]) if i <
len(monthly_sources_df) else [""] * len(monthly_sources_df.columns)
ws8.append(use_row + [""] + source_row)
# --- Step 9: Account_Summary Sheet with Key Financial Metrics ---
from dateutil.relativedelta import relativedelta
# Get last available date in the dataset
latest_date = df['DATE'].max()
six_months_ago = latest_date - relativedelta(months=6)
twelve_months_ago = latest_date - relativedelta(months=12)
# Average Balance (overall)
avg_balance = round(df['BALANCE'].mean(), 2)
# Average Balance on 5th, 15th, and 25th
avg_balance_5_15_25 = round(df[df['DAY'].isin([5, 15, 25])]['BALANCE'].mean(), 2)
# Average Balance (last 6 months)
avg_balance_last_6 = round(df[df['DATE'] >= six_months_ago]['BALANCE'].mean(), 2)
# Average Receipt (last 6 months)
avg_receipt_6 = round(df[(df['DATE'] >= six_months_ago) & (df['CREDITS'] > 0)]
['CREDITS'].mean(), 2)
# I/W Return
iw_return_count = int(df['IS_IWRETURN'].sum())
# O/W Return
ow_return_count = int(df['IS_OWRETURN'].sum())
# Average Balance (last 12 months)
avg_balance_last_12 = round(df[df['DATE'] >= twelve_months_ago]['BALANCE'].mean(),
2)
# Average Receipt (last 12 months)
avg_receipt_12 = round(df[(df['DATE'] >= twelve_months_ago) & (df['CREDITS'] > 0)]
['CREDITS'].mean(), 2)
# Total Gross Credits
total_gross_credits = round(df['CREDITS'].sum(), 2)
# Total Net Credits (excluding IW returns)
total_net_credits = round(df[~df['IS_IWRETURN']]['CREDITS'].sum(), 2)
# Total Gross Debits
total_gross_debits = round(df['DEBITS'].sum(), 2)
# --- Create 9th Sheet ---
ws9 = wb.create_sheet("New_Analysis")
# Format header
ws9.append(["Metric", "Value"])
# Append rows
rows = [
["Average Balance", avg_balance],
["Average Balance(5,15,25)", avg_balance_5_15_25],
["Average Balance(Last 6 Month)", avg_balance_last_6],
["Average Receipt(6 months)", avg_receipt_6],
["I/W Return", iw_return_count],
["O/W Return", ow_return_count],
["Average Balance(Last 12 Month)", avg_balance_last_12],
["Average Receipt(12 Month)", avg_receipt_12],
["Total Gross Credits", total_gross_credits],
["Total Net Credits", total_net_credits],
["Total Gross Debits", total_gross_debits]
]
for r in rows:
ws9.append(r)
# Leave space and label
ws9.append([])
ws9.append(["Monthly Detailed Metrics"])
ws9.append([])
# Ensure 'MONTH_YEAR' and 'DAY' columns exist
df['MONTH_YEAR'] = df['DATE'].dt.strftime('%b %Y')
df['DAY'] = df['DATE'].dt.day
# Group by Month-Year
monthly_group = df.groupby('MONTH_YEAR')
monthly_data = []
# Safe helper
def count_and_sum(group, col_flag, value_col):
if col_flag in group.columns:
return [
int(group[col_flag].sum()),
round(group.loc[group[col_flag], value_col].sum(), 2)
]
return [0, 0.0]
# Process each month
for label, group in monthly_group:
row = [label] # Month-Year
# Balances on 5th, 15th, 25th and Monthly Avg
for d in [5, 15, 25]:
row.append(round(group[group['DAY'] == d]['BALANCE'].mean(), 2))
row.append(round(group['BALANCE'].mean(), 2))
# Credit/Debit stats
iw_flag = group['IS_IWRETURN'] if 'IS_IWRETURN' in group else pd.Series([False]
* len(group), index=group.index)
ow_flag = group['IS_OWRETURN'] if 'IS_OWRETURN' in group else pd.Series([False]
* len(group), index=group.index)
row.extend([
group[group['CREDITS'] > 0].shape[0],
round(group['CREDITS'].sum(), 2),
group[group['DEBITS'] > 0].shape[0],
round(group['DEBITS'].sum(), 2),
int(iw_flag.sum()),
int(ow_flag.sum()),
group[(group['CREDITS'] > 0) & (~iw_flag)].shape[0],
round(group[(group['CREDITS'] > 0) & (~iw_flag)]['CREDITS'].sum(), 2),
group[(group['DEBITS'] > 0) & (~ow_flag)].shape[0],
round(group[(group['DEBITS'] > 0) & (~ow_flag)]['DEBITS'].sum(), 2),
])
# Other flags
row += count_and_sum(group, 'IS_CASH_WITHDRAWAL', 'DEBITS')
row += count_and_sum(group, 'IS_ATM', 'DEBITS')
row += count_and_sum(group, 'IS_CASH_DEPOSIT', 'CREDITS')
row += count_and_sum(group, 'IS_CHEQUE_RETURN_CHARGE', 'DEBITS')
row += count_and_sum(group, 'IS_CHEQUE_INWARD_BOUNCE', 'DEBITS')
row += count_and_sum(group, 'IS_CHEQUE_OUTWARD_BOUNCE', 'DEBITS')
row += count_and_sum(group, 'IS_PAYMENT_INWARD_BOUNCE', 'DEBITS')
row += count_and_sum(group, 'IS_PAYMENT_OUTWARD_BOUNCE', 'DEBITS')
row += count_and_sum(group, 'IS_PAYMENT_BOUNCE_CHARGE', 'DEBITS')
row += count_and_sum(group, 'IS_CHEQUE_DEPOSIT', 'CREDITS')
row += count_and_sum(group, 'IS_CHEQUE_ISSUE', 'DEBITS')
row += count_and_sum(group, 'IS_CREDIT_INTERNAL_TRANSFER', 'CREDITS')
row += count_and_sum(group, 'IS_DEBIT_INTERNAL_TRANSFER', 'DEBITS')
row += count_and_sum(group, 'IS_LOAN_DISBURSAL', 'CREDITS')
row += count_and_sum(group, 'IS_INTEREST_RECEIVED', 'CREDITS')
row += count_and_sum(group, 'IS_INTEREST_PAID', 'DEBITS')
# Salary
if 'IS_SALARY' in group.columns:
row += [
round(group[group['IS_SALARY']]['CREDITS'].sum(), 2),
round(group[group['IS_SALARY']]['DEBITS'].sum(), 2)
]
else:
row += [0.0, 0.0]
# Holiday and charges
row += count_and_sum(group, 'IS_HOLIDAY_TRANSACTION', 'DEBITS')
row += count_and_sum(group, 'IS_MIN_BAL_CHARGE', 'DEBITS')
# Cash deposit ranges
row.append(((group['CREDITS'] >= 900000) & (group['CREDITS'] <=
1000000)).sum())
row.append(((group['CREDITS'] >= 40000) & (group['CREDITS'] <= 50000)).sum())
# ATM withdrawals > 2000
if 'IS_ATM' in group.columns:
row.append(((group['IS_ATM']) & (group['DEBITS'] > 2000)).sum())
else:
row.append(0)
# Min, Max, Avg balance
row += [
round(group['BALANCE'].min(), 2),
round(group['BALANCE'].max(), 2),
round(group['BALANCE'].mean(), 2)
]
monthly_data.append(row)
# Final column headers - must match row length
monthly_headers = [
"Month-Year", "5th Balance", "15th Balance", "25th Balance", "Monthly Avg
Balance",
"No. of Credit Txns", "Total Credit Amt",
"No. of Debit Txns", "Total Debit Amt",
"No. of IW Returns", "No. of OW Returns",
"Net Credit Txns", "Net Credit Amt",
"Net Debit Txns", "Net Debit Amt",
"Cash Withdrawal Txns", "Cash Withdrawal Amt",
"ATM Withdrawal Txns", "ATM Withdrawal Amt",
"Cash Deposit Txns", "Cash Deposit Amt",
"Cheque Return Charges (No, Amt)",
"Cheque Inward Bounce (No, Amt)",
"Cheque Outward Bounce (No, Amt)",
"Payment Inward Bounce (No, Amt)",
"Payment Outward Bounce (No, Amt)",
"Payment Bounce Charges (No, Amt)",
"Cheque Deposits (No, Amt)",
"Cheque Issues (No, Amt)",
"Credit Internal Transfers (No, Amt)",
"Debit Internal Transfers (No, Amt)",
"Loan Disbursals (No, Amt)",
"Interest Received (No, Amt)",
"Interest Paid (No, Amt)",
"Salary Credit", "Salary Debit",
"Holiday Txns (No, Amt)",
"Minimum Balance Charges (No, Amt)",
"Cash Deposit ₹9–10L", "Cash Deposit ₹40–50K",
"ATM Withdrawal > ₹2000",
"Min Balance", "Max Balance", "Avg Balance"
]
# Safe Excel row handler
def safe_row(row):
return [str(x) if isinstance(x, (list, dict)) or pd.isna(x) else x for x in
row]
# Add to Excel
ws9.append(safe_row(monthly_headers))
for row in monthly_data:
ws9.append(safe_row(row))
# --- Save ---
wb.save("Analysis_Output28.xlsx")
print("✅ Final file 'Analysis_Output.xlsx' saved with Daily_Balance sheet added.")