import pandas as pd
import numpy as np
# Read the CSV file
def read_and_process_bank_data():
# Read the CSV file
df = pd.read_csv('/content/Book1 (1).csv')
print("Column names:", list([Link]))
# Clean column names by stripping whitespace **early**
[Link] = [Link]()
# Clean the data by removing rows where all key columns are NaN
df = [Link](subset=['Date', 'Description', 'Category'], how='all')
# Remove rows where Category is NaN (these are header rows or empty rows)
df = df[df['Category'].notna()]
# Convert Amount columns to numeric, handling commas and quotes
for col in ['Debit', 'Credit', 'Balance']:
if col in [Link]:
df[col] = df[col].astype(str).[Link](',', '').[Link]('"', '')
df[col] = pd.to_numeric(df[col], errors='coerce')
return df
def group_by_category_and_save():
try:
# Read and process the data
df = read_and_process_bank_data()
print("Original data shape:", [Link])
print("Categories found:", df['Category'].unique())
# Group by category and calculate summary statistics
category_summary = [Link]('Category').agg({
'Debit': ['count', 'sum', 'mean'],
'Credit': ['count', 'sum', 'mean'],
'Description': 'count' # Total number of transactions per category
}).round(2)
# Flatten column names
category_summary.columns = ['_'.join(col).strip() for col in
category_summary.columns]
# Rename columns for clarity
category_summary = category_summary.rename(columns={
'Debit_count': 'Debit_Transaction_Count',
'Debit_sum': 'Total_Debit_Amount',
'Debit_mean': 'Average_Debit_Amount',
'Credit_count': 'Credit_Transaction_Count',
'Credit_sum': 'Total_Credit_Amount',
'Credit_mean': 'Average_Credit_Amount',
'Description_count': 'Total_Transactions'
})
# Calculate net amount (Credit - Debit) for each category
category_summary['Net_Amount'] = (
category_summary['Total_Credit_Amount'].fillna(0) -
category_summary['Total_Debit_Amount'].fillna(0)
).round(2)
# Reset index to make Category a column
category_summary = category_summary.reset_index()
# Sort by total transaction count (descending)
category_summary = category_summary.sort_values('Total_Transactions',
ascending=False)
# Save to CSV
output_filename = 'bank_transactions_grouped_by_category.csv'
#category_summary.to_csv(output_filename, index=False)
print(f"\nData successfully grouped by category and saved to
'{output_filename}'")
print(f"\nSummary of categories:")
print("=" * 80)
# Display summary
for _, row in category_summary.iterrows():
print(f"Category: {row['Category']}")
print(f" Total Transactions: {row['Total_Transactions']}")
print(f" Total Debit: ₹{row['Total_Debit_Amount']:,.2f}" if not
[Link](row['Total_Debit_Amount']) else " Total Debit: ₹0.00")
print(f" Total Credit: ₹{row['Total_Credit_Amount']:,.2f}" if not
[Link](row['Total_Credit_Amount']) else " Total Credit: ₹0.00")
print(f" Net Amount: ₹{row['Net_Amount']:,.2f}")
print("-" * 40)
# Create detailed transactions by category
detailed_by_category = [Link]('Category').apply(
lambda x: x[['Date', 'Description', 'Debit', 'Credit', 'Balance',
'Name', 'Bank Name']].copy()
).reset_index(drop=True)
# Save detailed transactions by category
detailed_filename = 'detailed_transactions_by_category.csv'
for category in df['Category'].dropna().unique():
# Filter transactions for this category
category_df = df[df['Category'] == category].copy()
# Clean the category name to make it a safe filename
safe_category = "".join(c if [Link]() or c in (' ', '-', '_') else '_'
for c in category).strip().replace(' ', '_')
filename = f'category_{safe_category}.csv'
# Save to CSV
#category_df.to_csv(filename, index=False)
print(f" Saved: {filename}")
print(f"\nDetailed transactions by category saved to
'{detailed_filename}'")
# Combine all category CSVs into a single text file
combined_txt_filename = 'all_categories.txt'
with open(combined_txt_filename, 'w', encoding='utf-8') as combined_file:
for category in df['Category'].dropna().unique():
# Filter transactions
category_df = df[df['Category'] == category].copy()
# Clean category name
safe_category = "".join(c if [Link]() or c in (' ', '-', '_')
else '_' for c in category).strip().replace(' ', '_')
# Write header
combined_file.write(f"{'=' * 80}\n")
combined_file.write(f"CATEGORY: {category}\n")
combined_file.write(f"{'=' * 80}\n\n")
# Write DataFrame as text
combined_file.write(category_df.to_string(index=False))
combined_file.write("\n\n")
print(f"\nCombined category transactions written to
'{combined_txt_filename}'")
return category_summary
except FileNotFoundError:
print("Error: '[Link]' file not found. Please make sure the file is in
the same directory as this script.")
return None
except Exception as e:
print(f"An error occurred: {str(e)}")
return None
# Execute the function
if __name__ == "__main__":
result = group_by_category_and_save()
if result is not None:
print("\n" + "="*80)
print("EXECUTION COMPLETED SUCCESSFULLY")
print("="*80)
print("Files created:")
print("1. bank_transactions_grouped_by_category.csv - Summary by category")
print("2. detailed_transactions_by_category.csv - All transactions grouped
by category")
# Create separate CSV for each category
print("\nCreating separate CSV files for each category...")