import numpy as np
import pandas as pd
import yfinance as yf
import matplotlib.pyplot as plt
from statsmodels.tsa.arima.model import ARIMA
from datetime import datetime, timedelta
# 1. Fetch historical data
print("Fetching Coal India historical data (2022 to present)...")
start_date = "2022-01-01"
end_date = datetime.now().strftime("%Y-%m-%d")
stock_data = yf.download('COALINDIA.NS', start=start_date, end=end_date)
# 2. Data preprocessing
if stock_data.empty:
print("Warning: No data retrieved. Trying alternative ticker format...")
stock_data = yf.download('COALINDIA.BO', start=start_date, end=end_date)
df = stock_data[['Close']].dropna()
# 3. Check data sufficiency
if len(df) < 100:
raise ValueError("Insufficient data: Need at least 100 trading days of
history")
# 4. Train-test split
train_size = int(len(df) * 0.8)
train, test = df[:train_size], df[train_size:]
# 5. ARIMA Model - FIXED PARAMETERS
print("Fitting ARIMA model...")
try:
# Try the original model first
model = ARIMA(train['Close'], order=(5, 1, 0), seasonal_order=(1, 1, 1, 5))
model_fit = model.fit()
except ValueError:
print("Original model failed due to parameter conflict. Using alternative
parameters...")
# Alternative 1: Reduce non-seasonal AR order
model = ARIMA(train['Close'], order=(4, 1, 0), seasonal_order=(1, 1, 1, 5))
model_fit = model.fit()
print(model_fit.summary())
# 6. Forecasting
forecast_3m = model_fit.get_forecast(steps=63) # 3 months
forecast_6m = model_fit.get_forecast(steps=126) # 6 months
forecast_1y = model_fit.get_forecast(steps=252) # 1 year
# 7. Extract forecast values
def get_forecast_values(forecast):
conf_int = forecast.conf_int()
return {
'mean': forecast.predicted_mean.iloc[-1],
'confidence_lower': conf_int.iloc[-1, 0],
'confidence_upper': conf_int.iloc[-1, 1]
}
forecasts = {
'3_months': get_forecast_values(forecast_3m),
'6_months': get_forecast_values(forecast_6m),
'1_year': get_forecast_values(forecast_1y)
}
# 8. Display results
print("\n=== COALINDIA Forecast Results ===")
for period, values in forecasts.items():
print(f"\n{period.replace('_', ' ').title()}:")
print(f"Predicted Price: ₹{values['mean']:.2f}")
print(f"Confidence Range: ₹{values['confidence_lower']:.2f} - ₹
{values['confidence_upper']:.2f}")
# 9. Visualization
plt.figure(figsize=(14, 7))
plt.plot(df.index, df['Close'], label='Historical', color='blue')
# Plot forecast
last_date = df.index[-1]
forecast_index = pd.date_range(start=last_date + pd.offsets.BDay(1), periods=252,
freq='B')
plt.plot(forecast_index, forecast_1y.predicted_mean, label='1-Year Forecast',
color='red')
# Confidence intervals
conf_int = forecast_1y.conf_int()
plt.fill_between(
forecast_index,
conf_int.iloc[:, 0],
conf_int.iloc[:, 1],
color='pink', alpha=0.3
)
plt.title('Coal India Stock Forecast (2022-Present Data)')
plt.xlabel('Date')
plt.ylabel('Price (INR)')
plt.legend()
plt.grid(True)
plt.savefig('coal_india_forecast.png', dpi=300, bbox_inches='tight')
plt.show()
# 10. Validation
print("\n=== Data Validation ===")
print(f"Data Range: {df.index[0].date()} to {df.index[-1].date()}")
print(f"Total Trading Days: {len(df)}")
print(f"Training Data: {len(train)} days ({len(train)/len(df):.1%})")
print(f"Test Data: {len(test)} days ({len(test)/len(df):.1%})")