import pandas as pd
import matplotlib.pyplot as plt
# Extracting data
data = pd.read_csv('Amazon Sales data.csv')
data.head()
# Transforming data
data['Order Date'] = pd.to_datetime(data['Order Date'])
data['Year'] = data['Order Date'].dt.year
data['Month'] = data['Order Date'].dt.month
data['Year_Month'] = data['Order Date'].dt.to_period('M')
# sales trends analysis
monthly_sales = data.groupby('Month')['Total Revenue'].sum()
yearly_sales = data.groupby('Year')['Total Revenue'].sum()
yearly_monthly_sales = data.groupby('Year_Month')['Total Revenue'].sum()
# Plotting sales trends
plt.figure(figsize=(12, 6))
plt.subplot(3, 1, 1)
monthly_sales.plot(kind='bar', title='Monthly Sales Trend')
plt.xlabel('Month')
plt.ylabel('Total Revenue')
plt.subplot(3, 1, 2)
yearly_sales.plot(kind='bar', title='Yearly Sales Trend')
plt.xlabel('Year')
plt.ylabel('Total Revenue')
plt.subplot(3, 1, 3)
yearly_monthly_sales.plot(kind='line', marker='o', title='Yearly-Monthly Sales Trend')
plt.xlabel('Year-Month')
plt.ylabel('Total Revenue')
plt.tight_layout()
plt.show()
# Identify key metrics and factors
sales_by_order_priority = data.groupby('Order Priority')['Total Revenue'].sum().sort_values(ascending=False)
print("\nSales by Order Priority:\n", sales_by_order_priority)
sales_by_category = data.groupby('Item Type')['Total Revenue'].sum().sort_values(ascending=False)
print("\nSales by Product Category:\n", sales_by_category)
avg_unit_price_by_category = data.groupby('Item Type')['Unit Price'].mean().sort_values(ascending=False)
print("\nAverage Unit Price by Product Category:\n", avg_unit_price_by_category)
sales_by_country = data.groupby('Country')['Total Revenue'].sum().sort_values(ascending=False)
print("\nSales by Country:\n", sales_by_country)
sales_by_region = data.groupby('Region')['Total Revenue'].sum().sort_values(ascending=False)
print("\nSales by Region:\n", sales_by_region)
sales_by_channel = data.groupby('Sales Channel')['Total Revenue'].sum()
print("\nSales by Sales Channel:\n", sales_by_channel)
# Plotting key metric and factor
fig, axs = plt.subplots(3, 2, figsize=(15, 15),constrained_layout=True)
# Plot sales by order priority
axs[0, 0].bar(sales_by_order_priority.index, sales_by_order_priority.values)
axs[0, 0].set_title('Sales by Order Priority')
axs[0, 0].set_xlabel('Order Priority')
axs[0, 0].set_ylabel('Total Revenue')
# Plot sales by product category
axs[0, 1].barh(sales_by_category.index, sales_by_category.values)
axs[0, 1].set_title('Sales by Product Category')
axs[0, 1].set_xlabel('Total Revenue')
axs[0, 1].set_ylabel('Product Category')
# Plot average unit price by product category
axs[1, 0].barh(avg_unit_price_by_category.index, avg_unit_price_by_category.values)
axs[1, 0].set_title('Average Unit Price by Product Category')
axs[1, 0].set_xlabel('Average Unit Price')
axs[1, 0].set_ylabel('Product Category')
# Plot sales by country (top 10 countries for better visualization)
axs[1, 1].barh(sales_by_country.head(10).index, sales_by_country.head(10).values)
axs[1, 1].set_title('Sales by Country (Top 10)')
axs[1, 1].set_xlabel('Total Revenue')
axs[1, 1].set_ylabel('Country')
# Plot sales by region
axs[2, 0].barh(sales_by_region.index, sales_by_region.values)
axs[2, 0].set_title('Sales by Region')
axs[2, 0].set_xlabel('Total Revenue')
axs[2, 0].set_ylabel('Region')
# Plot sales by sales channel
axs[2, 1].pie(sales_by_channel, labels=sales_by_channel.index, autopct='%1.1f%%')
axs[2, 1].set_title('Sales by Sales Channel')
plt.show()