import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
# Load financial data
df = pd.read_csv("financial_analysis.csv")
# Display first few rows
print(df.head())
# Exclude non-numeric columns (e.g., 'Company')
numeric_cols = df.select_dtypes(include=['number']).columns
# Handle missing values only for numeric columns
df[numeric_cols] = df[numeric_cols].fillna(df[numeric_cols].mean())
# Standardize data for clustering
scaler = StandardScaler()
scaled_data = scaler.fit_transform(df[numeric_cols])
# K-Means Clustering
kmeans = KMeans(n_clusters=3, random_state=42, n_init=10)
df['Cluster'] = kmeans.fit_predict(scaled_data)
# Scatter plot for Clusters (using first two numeric columns)
plt.figure(figsize=(8,6))
sns.scatterplot(x=df[numeric_cols[0]], y=df[numeric_cols[1]], hue=df['Cluster'],
palette='viridis')
plt.xlabel(numeric_cols[0])
plt.ylabel(numeric_cols[1])
plt.title("Clustering Analysis")
plt.show()
# Histogram of a financial metric (e.g., Revenue)
plt.figure(figsize=(8,6))
sns.histplot(df['Revenue'], bins=30, kde=True, color='blue')
plt.title("Revenue Distribution")
plt.show()
# Heatmap of feature correlations
plt.figure(figsize=(10, 6))
sns.heatmap(df[numeric_cols].corr(), annot=True, cmap="coolwarm", linewidths=0.5)
plt.title("Correlation Heatmap")
plt.show()