US48 Demand for Electricity Forecast

Load Libraries

import os
import pandas as pd
from statsforecast import StatsForecast
import datetime
import statsmodels.api as sm
import matplotlib.pyplot as plt
from utilsforecast.plotting import plot_series
from statistics import mean


from statsforecast.models import (
    HoltWinters,
    CrostonClassic as Croston, 
    HistoricAverage,
    DynamicOptimizedTheta,
    SeasonalNaive,
    AutoARIMA,
    AutoRegressive,
    AutoETS,
    AutoTBATS,
    MSTL,
    Holt
)



import plotly.graph_objects as go

Load the Data

Reformat the data

ts = pd.read_csv("data/data.csv")
ts["ds"] = pd.to_datetime(ts["ds"])
ts = ts.sort_values("ds")
ts.head()
ds y unique_id
0 2022-04-06 23:00:00 462894.0 1
1 2022-04-07 00:00:00 463663.0 1
2 2022-04-07 01:00:00 464916.0 1
3 2022-04-07 02:00:00 459376.0 1
4 2022-04-07 03:00:00 441989.0 1

Subset for the last 25 months:

end = ts["ds"].max()
start = end - datetime.timedelta(hours = 24 * 31 * 25)
ts = ts[ts["ds"] >= start]

Set the last 72 hours as testing partition and use the rest for training

test_length = 72

train_end = end  - datetime.timedelta(hours = test_length)


train = ts[ts["ds"] <= train_end]
test = ts[ts["ds"] > train_end]

plot_series(train, engine = "plotly")
plot_series(test, engine = "plotly")

Set the Models

Set the models:

auto_arima = AutoARIMA()
s_naive = SeasonalNaive(season_length=24)
theta =  DynamicOptimizedTheta(season_length= 24)
mstl = MSTL(
    season_length=[24, 24 * 7], 
    trend_forecaster=AutoARIMA(),
    alias="MSTL_ARIMA_trend"
)

stats_models = [
   auto_arima,
   s_naive,
   theta,
   mstl
]



sf = StatsForecast( 
    models=stats_models,
    freq="h", 
    fallback_model = AutoARIMA(),
    n_jobs= -1,
)
forecasts_stats = sf.forecast(df=train, h=72, level=[95])
/opt/forecasting-poc/lib/python3.10/site-packages/statsforecast/core.py:492: FutureWarning:

In a future version the predictions will have the id as a column. You can set the `NIXTLA_ID_AS_COL` environment variable to adopt the new behavior and to suppress this warning.
forecasts_stats.head()
ds AutoARIMA AutoARIMA-lo-95 AutoARIMA-hi-95 SeasonalNaive SeasonalNaive-lo-95 SeasonalNaive-hi-95 DynamicOptimizedTheta DynamicOptimizedTheta-lo-95 DynamicOptimizedTheta-hi-95 MSTL_ARIMA_trend MSTL_ARIMA_trend-lo-95 MSTL_ARIMA_trend-hi-95
unique_id
1 2024-04-23 00:00:00 441782.00000 433129.18750 450434.81250 421082.59375 347799.03125 494366.15625 438476.87500 423830.15625 455920.84375 447210.62500 444114.84375 450306.43750
1 2024-04-23 01:00:00 444155.65625 421518.93750 466792.40625 429728.31250 356444.75000 503011.87500 435782.43750 411024.31250 458975.06250 448354.50000 443145.62500 453563.37500
1 2024-04-23 02:00:00 445304.81250 404940.46875 485669.15625 430690.96875 357407.40625 503974.53125 428324.37500 396393.78125 456607.96875 441677.78125 434246.87500 449108.65625
1 2024-04-23 03:00:00 445594.12500 385722.96875 505465.25000 420094.59375 346811.03125 493378.15625 414728.09375 382661.28125 446589.25000 424103.62500 414574.18750 433633.06250
1 2024-04-23 04:00:00 445355.68750 365746.09375 524965.31250 403292.37500 330008.81250 476575.93750 396965.03125 360224.62500 431502.50000 401796.06250 390396.56250 413195.53125
sf.plot(test, forecasts_stats, models = ["AutoARIMA"], level=[95])
sf.plot(test, forecasts_stats, engine = "plotly", level=[95])
/opt/forecasting-poc/lib/python3.10/site-packages/statsforecast/core.py:1447: FutureWarning:

Passing the ids as the index is deprecated. Please provide them as a column instead.

/opt/forecasting-poc/lib/python3.10/site-packages/statsforecast/core.py:1447: FutureWarning:

Passing the ids as the index is deprecated. Please provide them as a column instead.

Score the Models

def mape(y, yhat):
    mape = mean(abs(y - yhat)/ y) 
    return mape

def rmse(y, yhat):
    rmse = (mean((y - yhat) ** 2 )) ** 0.5
    return rmse

def coverage(y, lower, upper):
    coverage = sum((y <= upper) & (y >= lower)) / len(y)
    return coverage
fc = forecasts_stats.merge(test, how = "left", on = "ds")
fc_performance = None

for i in ["AutoARIMA", "SeasonalNaive", "DynamicOptimizedTheta", "MSTL_ARIMA_trend"]:
    m = mape(y = fc.y, yhat = fc[i])
    r = rmse(y = fc.y, yhat = fc[i])
    c = coverage(y = fc.y, lower = fc[i + "-lo-95"], upper = fc[i + "-hi-95"])

    perf = {"model": i,
            "mape": m,
            "rmse": r,
            "coverage": c}
    if fc_performance is None:
        fc_performance = pd.DataFrame([perf])
    else:
        fc_performance = pd.concat([fc_performance, pd.DataFrame([perf])])

fc_performance.sort_values("rmse")
model mape rmse coverage
0 MSTL_ARIMA_trend 0.029793 13690.780221 0.902778
0 SeasonalNaive 0.025622 14196.465062 1.000000
0 DynamicOptimizedTheta 0.048023 22472.707037 0.972222
0 AutoARIMA 0.080049 41152.048617 1.000000

Test additional versions of the MSTL model

mstl1 = MSTL(
    season_length=[24, 24 * 7], 
    trend_forecaster=AutoARIMA(),
    alias="MSTL_ARIMA_trend_1"
)

mstl2 = MSTL(
    season_length=[24, 24 * 7], 
    trend_forecaster= HoltWinters(),
    alias="MSTL_HW_trend" 
)


mstl3 = MSTL(
    season_length=[24, 24 * 7], 
    trend_forecaster= DynamicOptimizedTheta(),
    alias="MSTL_DOT_trend" 
)


mstl4 = MSTL(
    season_length=[24, 24 * 7], 
    trend_forecaster=  Holt(),
    alias="MSTL_Holt_trend" 
)


mstl5 = MSTL(
    season_length=[24, 24 * 7], 
    trend_forecaster=  AutoRegressive(lags=[1,2, 3, 4,5,6,24 ], include_mean=True),
    alias="MSTL_AR_trend" 
)

mstl6 = MSTL(
    season_length=[24, 24 * 7, 24 * 365], 
    trend_forecaster=AutoARIMA(),
    alias="MSTL_ARIMA_trend_2"
)


mstl_models = [
   mstl1,
   mstl2,
   mstl3,
   mstl4,
   mstl5,
   mstl6
]



sf_mstl = StatsForecast( 
    models=mstl_models,
    freq="h", 
    fallback_model = AutoARIMA(),
    n_jobs= -1,
)
forecasts_mstl = sf_mstl.forecast(df=train, h=72, level=[95])
/opt/forecasting-poc/lib/python3.10/site-packages/statsforecast/arima.py:829: UserWarning:

some AR parameters were fixed: setting transform_pars = False

/opt/forecasting-poc/lib/python3.10/site-packages/statsforecast/core.py:492: FutureWarning:

In a future version the predictions will have the id as a column. You can set the `NIXTLA_ID_AS_COL` environment variable to adopt the new behavior and to suppress this warning.
forecasts_mstl.head()
ds MSTL_ARIMA_trend_1 MSTL_ARIMA_trend_1-lo-95 MSTL_ARIMA_trend_1-hi-95 MSTL_HW_trend MSTL_HW_trend-lo-95 MSTL_HW_trend-hi-95 MSTL_DOT_trend MSTL_DOT_trend-lo-95 MSTL_DOT_trend-hi-95 MSTL_Holt_trend MSTL_Holt_trend-lo-95 MSTL_Holt_trend-hi-95 MSTL_AR_trend MSTL_AR_trend-lo-95 MSTL_AR_trend-hi-95 MSTL_ARIMA_trend_2 MSTL_ARIMA_trend_2-lo-95 MSTL_ARIMA_trend_2-hi-95
unique_id
1 2024-04-23 00:00:00 447210.62500 444114.84375 450306.43750 441782.00000 433129.18750 450434.81250 445456.03125 442295.59375 449220.03125 445482.15625 441707.37500 449256.90625 447665.34375 444598.78125 450731.90625 454678.59375 452427.40625 456929.81250
1 2024-04-23 01:00:00 448354.50000 443145.62500 453563.37500 444155.65625 421518.93750 466792.40625 445100.40625 439725.15625 450135.78125 445124.21875 439785.90625 450462.56250 449133.59375 443669.03125 454598.15625 454491.00000 450708.18750 458273.81250
1 2024-04-23 02:00:00 441677.78125 434246.87500 449108.65625 445304.81250 404940.46875 485669.15625 437529.87500 430476.68750 443777.46875 437551.43750 431013.15625 444089.71875 442690.50000 434685.46875 450695.50000 447848.18750 442430.37500 453266.03125
1 2024-04-23 03:00:00 424103.62500 414574.18750 433633.06250 445594.12500 385722.96875 505465.25000 419549.75000 412234.25000 426818.28125 419569.09375 412019.03125 427119.15625 425196.56250 414771.75000 435621.37500 429126.68750 422157.71875 436095.68750
1 2024-04-23 04:00:00 401796.06250 390396.56250 413195.53125 445355.68750 365746.09375 524965.31250 397128.46875 388371.68750 405360.15625 397145.68750 388704.09375 405587.25000 402821.40625 390242.53125 415400.28125 406840.28125 398481.12500 415199.43750
fc = forecasts_mstl.merge(test, how = "left", on = "ds")
fc_performance = None

for i in ["MSTL_ARIMA_trend_1", "MSTL_ARIMA_trend_2", "MSTL_HW_trend", "MSTL_DOT_trend", "MSTL_AR_trend", "MSTL_Holt_trend"]:
    m = mape(y = fc.y, yhat = fc[i])
    r = rmse(y = fc.y, yhat = fc[i])
    c = coverage(y = fc.y, lower = fc[i + "-lo-95"], upper = fc[i + "-hi-95"])

    perf = {"model": i,
            "mape": m,
            "rmse": r,
            "coverage": c}
    if fc_performance is None:
        fc_performance = pd.DataFrame([perf])
    else:
        fc_performance = pd.concat([fc_performance, pd.DataFrame([perf])])

fc_performance.sort_values("rmse")
model mape rmse coverage
0 MSTL_AR_trend 0.022841 11133.594451 0.916667
0 MSTL_ARIMA_trend_2 0.029137 13636.440450 0.875000
0 MSTL_ARIMA_trend_1 0.029793 13690.780221 0.902778
0 MSTL_DOT_trend 0.038641 17377.932553 0.763889
0 MSTL_Holt_trend 0.038708 17405.987291 0.833333
0 MSTL_HW_trend 0.080049 41152.048617 1.000000