7/25/25, 8:45 PM week3 AND MORE.
ipynb - Colab
keyboard_arrow_down Step 1: Import Required Libraries
# Step 1: Import Required Libraries
import warnings
[Link](action='ignore', category=FutureWarning)
import numpy as np
import pandas as pd
import [Link] as plt
from [Link] import mean_squared_error, mean_absolute_error
from [Link] import ARIMA
from prophet import Prophet
keyboard_arrow_down Step 2: Load and Preprocess Data
# Load dataset
df = pd.read_csv('[Link]', parse_dates=['Order Date', 'Ship Date'], encoding='latin1')
[Link] = [Link]()
df.sort_values('Order Date', inplace=True)
df.set_index('Order Date', inplace=True)
# Display first few rows
print("Sample data:")
print(df[['Sales', 'Ship Date']].head())
print("\nAll Columns:", [Link]())
Sample data:
Sales Ship Date
Order Date
2014-01-06 2573.820 2014-01-10
2014-01-07 76.728 2014-01-12
2014-01-10 51.940 2014-01-15
2014-01-11 9.940 2014-01-14
2014-01-13 545.940 2014-01-16
All Columns: ['Row ID', 'Order ID', 'Ship Date', 'Ship Mode', 'Customer ID', 'Customer Name', 'Segment', 'Country', 'City', 'State',
print([Link])
Index(['Row ID', 'Order ID', 'Ship Date', 'Ship Mode', 'Customer ID',
'Customer Name', 'Segment', 'Country', 'City', 'State', 'Postal Code',
'Region', 'Product ID', 'Category', 'Sub-Category', 'Product Name',
'Sales', 'Quantity', 'Discount', 'Profit'],
dtype='object')
keyboard_arrow_down Step 3: Aggregate Monthly Sales
# Step 3: Aggregate Monthly Sales (Central Region)
def get_monthly_sales(region):
region_df = df[df['Region'] == region].copy()
monthly_sales = region_df['Sales'].resample('M').sum().to_frame()
return monthly_sales
monthly_sales = get_monthly_sales('Central')
# Plot monthly sales
[Link](figsize=(10, 5))
[Link](monthly_sales, marker='o')
[Link]("Monthly Sales Trend - Central Region")
[Link]("Date")
[Link]("Sales")
[Link](True)
plt.tight_layout()
[Link]()
[Link] 1/5
7/25/25, 8:45 PM week3 AND [Link] - Colab
keyboard_arrow_down Step 4: Train-Test Split
# Step 4: Train-Test Split
train_size = int(len(monthly_sales) * 0.8)
train = monthly_sales.iloc[:train_size]
test = monthly_sales.iloc[train_size:]
print(f"\nTraining Range: {[Link]()} to {[Link]()}")
print(f"Testing Range: {[Link]()} to {[Link]()}")
Training Range: 2014-01-31 [Link] to 2017-02-28 [Link]
Testing Range: 2017-03-31 [Link] to 2017-12-31 [Link]
keyboard_arrow_down Step 5: ARIMA Model Fitting
# Step 5: ARIMA Model Function
def train_arima_model(train, test, order=(1,1,1)):
model = ARIMA(train, order=order)
model_fit = [Link]()
forecast = model_fit.forecast(steps=len(test))
forecast = [Link](forecast, index=[Link])
rmse = [Link](mean_squared_error(test, forecast))
mae = mean_absolute_error(test, forecast)
return forecast, rmse, mae
# Run ARIMA
arima_forecast, arima_rmse, arima_mae = train_arima_model(train, test)
print(f"\n✅ ARIMA RMSE: {arima_rmse:.2f}")
print(f"✅ ARIMA MAE: {arima_mae:.2f}")
/usr/local/lib/python3.11/dist-packages/statsmodels/tsa/statespace/[Link]: UserWarning: Non-invertible starting MA parameter
warn('Non-invertible starting MA parameters found.'
✅ ARIMA RMSE: 2043.50
✅ ARIMA MAE: 1774.72
# Plot ARIMA Forecast
[Link](figsize=(12, 6))
[Link]([Link], train['Sales'], label='Train')
[Link]([Link], test['Sales'], label='Test', color='green')
[Link]([Link], arima_forecast, label='ARIMA Forecast', color='orange', linestyle='--')
[Link]("📈 ARIMA Forecast vs Actual Sales")
[Link]("Date")
[Link]("Sales")
[Link] 2/5
7/25/25, 8:45 PM week3 AND [Link] - Colab
[Link]()
[Link](True)
plt.tight_layout()
[Link]()
/tmp/[Link]: UserWarning: Glyph 128200 (\N{CHART WITH UPWARDS TREND}) missing from font(s) DejaVu Sans.
plt.tight_layout()
/usr/local/lib/python3.11/dist-packages/IPython/core/[Link]: UserWarning: Glyph 128200 (\N{CHART WITH UPWARDS TREND}) mis
[Link].print_figure(bytes_io, **kw)
# Plot ARIMA Residuals
residuals = test['Sales'] - arima_forecast
[Link](figsize=(12, 4))
[Link](residuals, marker='o')
[Link]("Residuals (Test - Forecast)")
[Link](y=0, color='r', linestyle='--')
[Link](True)
plt.tight_layout()
[Link]()
keyboard_arrow_down Step 6: Prophet Model for Monthly Profit (using Ship Date)
# Re-aggregate using Ship Date for Prophet
df['Ship Date'] = pd.to_datetime(df['Ship Date'])
monthly_profit = [Link]('MS', on='Ship Date')['Profit'].sum().reset_index()
monthly_profit.columns = ['ds', 'y']
[Link] 3/5
7/25/25, 8:45 PM week3 AND [Link] - Colab
# Fit Prophet Model
prophet_model = Prophet()
prophet_model.fit(monthly_profit)
INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
DEBUG:cmdstanpy:input tempfile: /tmp/tmp56x9uwqi/[Link]
DEBUG:cmdstanpy:input tempfile: /tmp/tmp56x9uwqi/[Link]
DEBUG:cmdstanpy:idx 0
DEBUG:cmdstanpy:running CmdStan, num_threads: None
DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=38163
[Link] - cmdstanpy - INFO - Chain [1] start processing
INFO:cmdstanpy:Chain [1] start processing
[Link] - cmdstanpy - INFO - Chain [1] done processing
INFO:cmdstanpy:Chain [1] done processing
<[Link] at 0x7fc4a8344250>
# Forecast next 6 months
future = prophet_model.make_future_dataframe(periods=6, freq='MS')
prophet_forecast = prophet_model.predict(future)
# Plot Prophet Forecast
fig = prophet_model.plot(prophet_forecast)
[Link]("🔮 Prophet Forecast - Monthly Profit (by Ship Date)")
plt.tight_layout()
[Link]()
/tmp/[Link]: UserWarning: Glyph 128302 (\N{CRYSTAL BALL}) missing from font(s) DejaVu Sans.
plt.tight_layout()
/usr/local/lib/python3.11/dist-packages/IPython/core/[Link]: UserWarning: Glyph 128302 (\N{CRYSTAL BALL}) missing from fo
[Link].print_figure(bytes_io, **kw)
# Combine actual and forecast into one DataFrame
arima_df = [Link]([
[Link](type='Train'),
[Link](type='Test'),
[Link]({'Sales': arima_forecast, 'type': 'Forecast'}, index=arima_forecast.index)
])
# Save to CSV
arima_df.to_csv("arima_forecast_vs_actual.csv")
monthly_sales_all = [Link](['Region']).resample('M')['Sales'].sum().reset_index()
monthly_sales_all.to_csv("monthly_sales_by_region.csv", index=False)
[Link] 4/5
7/25/25, 8:45 PM week3 AND [Link] - Colab
# Only keep relevant columns
prophet_output = prophet_forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']]
prophet_output.to_csv("prophet_forecast.csv", index=False)
# Only keep important columns
prophet_output = prophet_forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']]
# Save it to CSV file (this is "B")
prophet_output.to_csv("prophet_forecast.csv", index=False)
[Link] 5/5