0% found this document useful (0 votes)
10 views3 pages

Code Python

Uploaded by

peopleslively
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as TXT, PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
10 views3 pages

Code Python

Uploaded by

peopleslively
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as TXT, PDF, TXT or read online on Scribd
You are on page 1/ 3

import pandas as pd

import numpy as np

# Read the CSV file


def read_and_process_bank_data():
# Read the CSV file
df = pd.read_csv('/content/Book1 (1).csv')
print("Column names:", list([Link]))

# Clean column names by stripping whitespace **early**


[Link] = [Link]()

# Clean the data by removing rows where all key columns are NaN
df = [Link](subset=['Date', 'Description', 'Category'], how='all')

# Remove rows where Category is NaN (these are header rows or empty rows)
df = df[df['Category'].notna()]

# Convert Amount columns to numeric, handling commas and quotes


for col in ['Debit', 'Credit', 'Balance']:
if col in [Link]:
df[col] = df[col].astype(str).[Link](',', '').[Link]('"', '')
df[col] = pd.to_numeric(df[col], errors='coerce')

return df
def group_by_category_and_save():
try:
# Read and process the data
df = read_and_process_bank_data()

print("Original data shape:", [Link])


print("Categories found:", df['Category'].unique())

# Group by category and calculate summary statistics


category_summary = [Link]('Category').agg({
'Debit': ['count', 'sum', 'mean'],
'Credit': ['count', 'sum', 'mean'],
'Description': 'count' # Total number of transactions per category
}).round(2)

# Flatten column names


category_summary.columns = ['_'.join(col).strip() for col in
category_summary.columns]

# Rename columns for clarity


category_summary = category_summary.rename(columns={
'Debit_count': 'Debit_Transaction_Count',
'Debit_sum': 'Total_Debit_Amount',
'Debit_mean': 'Average_Debit_Amount',
'Credit_count': 'Credit_Transaction_Count',
'Credit_sum': 'Total_Credit_Amount',
'Credit_mean': 'Average_Credit_Amount',
'Description_count': 'Total_Transactions'
})

# Calculate net amount (Credit - Debit) for each category


category_summary['Net_Amount'] = (
category_summary['Total_Credit_Amount'].fillna(0) -
category_summary['Total_Debit_Amount'].fillna(0)
).round(2)

# Reset index to make Category a column


category_summary = category_summary.reset_index()

# Sort by total transaction count (descending)


category_summary = category_summary.sort_values('Total_Transactions',
ascending=False)

# Save to CSV
output_filename = 'bank_transactions_grouped_by_category.csv'
#category_summary.to_csv(output_filename, index=False)

print(f"\nData successfully grouped by category and saved to


'{output_filename}'")
print(f"\nSummary of categories:")
print("=" * 80)

# Display summary
for _, row in category_summary.iterrows():
print(f"Category: {row['Category']}")
print(f" Total Transactions: {row['Total_Transactions']}")
print(f" Total Debit: ₹{row['Total_Debit_Amount']:,.2f}" if not
[Link](row['Total_Debit_Amount']) else " Total Debit: ₹0.00")
print(f" Total Credit: ₹{row['Total_Credit_Amount']:,.2f}" if not
[Link](row['Total_Credit_Amount']) else " Total Credit: ₹0.00")
print(f" Net Amount: ₹{row['Net_Amount']:,.2f}")
print("-" * 40)

# Create detailed transactions by category


detailed_by_category = [Link]('Category').apply(
lambda x: x[['Date', 'Description', 'Debit', 'Credit', 'Balance',
'Name', 'Bank Name']].copy()
).reset_index(drop=True)

# Save detailed transactions by category


detailed_filename = 'detailed_transactions_by_category.csv'
for category in df['Category'].dropna().unique():
# Filter transactions for this category
category_df = df[df['Category'] == category].copy()

# Clean the category name to make it a safe filename


safe_category = "".join(c if [Link]() or c in (' ', '-', '_') else '_'
for c in category).strip().replace(' ', '_')
filename = f'category_{safe_category}.csv'

# Save to CSV
#category_df.to_csv(filename, index=False)
print(f" Saved: {filename}")

print(f"\nDetailed transactions by category saved to


'{detailed_filename}'")
# Combine all category CSVs into a single text file
combined_txt_filename = 'all_categories.txt'
with open(combined_txt_filename, 'w', encoding='utf-8') as combined_file:
for category in df['Category'].dropna().unique():
# Filter transactions
category_df = df[df['Category'] == category].copy()
# Clean category name
safe_category = "".join(c if [Link]() or c in (' ', '-', '_')
else '_' for c in category).strip().replace(' ', '_')

# Write header
combined_file.write(f"{'=' * 80}\n")
combined_file.write(f"CATEGORY: {category}\n")
combined_file.write(f"{'=' * 80}\n\n")

# Write DataFrame as text


combined_file.write(category_df.to_string(index=False))
combined_file.write("\n\n")

print(f"\nCombined category transactions written to


'{combined_txt_filename}'")

return category_summary

except FileNotFoundError:
print("Error: '[Link]' file not found. Please make sure the file is in
the same directory as this script.")
return None
except Exception as e:
print(f"An error occurred: {str(e)}")
return None

# Execute the function


if __name__ == "__main__":
result = group_by_category_and_save()

if result is not None:


print("\n" + "="*80)
print("EXECUTION COMPLETED SUCCESSFULLY")
print("="*80)
print("Files created:")
print("1. bank_transactions_grouped_by_category.csv - Summary by category")
print("2. detailed_transactions_by_category.csv - All transactions grouped
by category")
# Create separate CSV for each category
print("\nCreating separate CSV files for each category...")

You might also like