US48 Demand for Electricity Forecast

Load Libraries

import os
import pandas as pd
from statsforecast import StatsForecast
import datetime
import statsmodels.api as sm
import matplotlib.pyplot as plt
from utilsforecast.plotting import plot_series
from statistics import mean


from statsforecast.models import (
    HoltWinters,
    CrostonClassic as Croston, 
    HistoricAverage,
    DynamicOptimizedTheta,
    SeasonalNaive,
    AutoARIMA,
    AutoRegressive,
    AutoETS,
    AutoTBATS,
    MSTL,
    Holt
)



import plotly.graph_objects as go

Load the Data

Reformat the data

ts = pd.read_csv("data/data.csv")
ts["ds"] = pd.to_datetime(ts["ds"])
ts = ts.sort_values("ds")
ts.head()

	ds	y	unique_id
0	2022-04-06 23:00:00	462894.0	1
1	2022-04-07 00:00:00	463663.0	1
2	2022-04-07 01:00:00	464916.0	1
3	2022-04-07 02:00:00	459376.0	1
4	2022-04-07 03:00:00	441989.0	1

Subset for the last 25 months:

end = ts["ds"].max()
start = end - datetime.timedelta(hours = 24 * 31 * 25)
ts = ts[ts["ds"] >= start]

Set the last 72 hours as testing partition and use the rest for training

test_length = 72

train_end = end  - datetime.timedelta(hours = test_length)


train = ts[ts["ds"] <= train_end]
test = ts[ts["ds"] > train_end]

plot_series(train, engine = "plotly")
plot_series(test, engine = "plotly")

Set the Models

Set the models:

auto_arima = AutoARIMA()
s_naive = SeasonalNaive(season_length=24)
theta =  DynamicOptimizedTheta(season_length= 24)
mstl = MSTL(
    season_length=[24, 24 * 7], 
    trend_forecaster=AutoARIMA(),
    alias="MSTL_ARIMA_trend"
)

stats_models = [
   auto_arima,
   s_naive,
   theta,
   mstl
]



sf = StatsForecast( 
    models=stats_models,
    freq="h", 
    fallback_model = AutoARIMA(),
    n_jobs= -1,
)

forecasts_stats = sf.forecast(df=train, h=72, level=[95])

/opt/forecasting-poc/lib/python3.10/site-packages/statsforecast/core.py:492: FutureWarning:

In a future version the predictions will have the id as a column. You can set the `NIXTLA_ID_AS_COL` environment variable to adopt the new behavior and to suppress this warning.

forecasts_stats.head()

	ds	AutoARIMA	AutoARIMA-lo-95	AutoARIMA-hi-95	SeasonalNaive	SeasonalNaive-lo-95	SeasonalNaive-hi-95	DynamicOptimizedTheta	DynamicOptimizedTheta-lo-95	DynamicOptimizedTheta-hi-95	MSTL_ARIMA_trend	MSTL_ARIMA_trend-lo-95	MSTL_ARIMA_trend-hi-95
unique_id
1	2024-04-23 00:00:00	441782.00000	433129.18750	450434.81250	421082.59375	347799.03125	494366.15625	438476.87500	423830.15625	455920.84375	447210.62500	444114.84375	450306.43750
1	2024-04-23 01:00:00	444155.65625	421518.93750	466792.40625	429728.31250	356444.75000	503011.87500	435782.43750	411024.31250	458975.06250	448354.50000	443145.62500	453563.37500
1	2024-04-23 02:00:00	445304.81250	404940.46875	485669.15625	430690.96875	357407.40625	503974.53125	428324.37500	396393.78125	456607.96875	441677.78125	434246.87500	449108.65625
1	2024-04-23 03:00:00	445594.12500	385722.96875	505465.25000	420094.59375	346811.03125	493378.15625	414728.09375	382661.28125	446589.25000	424103.62500	414574.18750	433633.06250
1	2024-04-23 04:00:00	445355.68750	365746.09375	524965.31250	403292.37500	330008.81250	476575.93750	396965.03125	360224.62500	431502.50000	401796.06250	390396.56250	413195.53125

sf.plot(test, forecasts_stats, models = ["AutoARIMA"], level=[95])
sf.plot(test, forecasts_stats, engine = "plotly", level=[95])

/opt/forecasting-poc/lib/python3.10/site-packages/statsforecast/core.py:1447: FutureWarning:

Passing the ids as the index is deprecated. Please provide them as a column instead.

/opt/forecasting-poc/lib/python3.10/site-packages/statsforecast/core.py:1447: FutureWarning:

Passing the ids as the index is deprecated. Please provide them as a column instead.

Score the Models

def mape(y, yhat):
    mape = mean(abs(y - yhat)/ y) 
    return mape

def rmse(y, yhat):
    rmse = (mean((y - yhat) ** 2 )) ** 0.5
    return rmse

def coverage(y, lower, upper):
    coverage = sum((y <= upper) & (y >= lower)) / len(y)
    return coverage

fc = forecasts_stats.merge(test, how = "left", on = "ds")
fc_performance = None

for i in ["AutoARIMA", "SeasonalNaive", "DynamicOptimizedTheta", "MSTL_ARIMA_trend"]:
    m = mape(y = fc.y, yhat = fc[i])
    r = rmse(y = fc.y, yhat = fc[i])
    c = coverage(y = fc.y, lower = fc[i + "-lo-95"], upper = fc[i + "-hi-95"])

    perf = {"model": i,
            "mape": m,
            "rmse": r,
            "coverage": c}
    if fc_performance is None:
        fc_performance = pd.DataFrame([perf])
    else:
        fc_performance = pd.concat([fc_performance, pd.DataFrame([perf])])

fc_performance.sort_values("rmse")

model	mape	rmse	coverage
MSTL_ARIMA_trend	0.029793	13690.780221	0.902778
SeasonalNaive	0.025622	14196.465062	1.000000
DynamicOptimizedTheta	0.048023	22472.707037	0.972222
AutoARIMA	0.080049	41152.048617	1.000000

Test additional versions of the MSTL model

mstl1 = MSTL(
    season_length=[24, 24 * 7], 
    trend_forecaster=AutoARIMA(),
    alias="MSTL_ARIMA_trend_1"
)

mstl2 = MSTL(
    season_length=[24, 24 * 7], 
    trend_forecaster= HoltWinters(),
    alias="MSTL_HW_trend" 
)


mstl3 = MSTL(
    season_length=[24, 24 * 7], 
    trend_forecaster= DynamicOptimizedTheta(),
    alias="MSTL_DOT_trend" 
)


mstl4 = MSTL(
    season_length=[24, 24 * 7], 
    trend_forecaster=  Holt(),
    alias="MSTL_Holt_trend" 
)


mstl5 = MSTL(
    season_length=[24, 24 * 7], 
    trend_forecaster=  AutoRegressive(lags=[1,2, 3, 4,5,6,24 ], include_mean=True),
    alias="MSTL_AR_trend" 
)

mstl6 = MSTL(
    season_length=[24, 24 * 7, 24 * 365], 
    trend_forecaster=AutoARIMA(),
    alias="MSTL_ARIMA_trend_2"
)


mstl_models = [
   mstl1,
   mstl2,
   mstl3,
   mstl4,
   mstl5,
   mstl6
]



sf_mstl = StatsForecast( 
    models=mstl_models,
    freq="h", 
    fallback_model = AutoARIMA(),
    n_jobs= -1,
)

forecasts_mstl = sf_mstl.forecast(df=train, h=72, level=[95])

/opt/forecasting-poc/lib/python3.10/site-packages/statsforecast/arima.py:829: UserWarning:

some AR parameters were fixed: setting transform_pars = False

/opt/forecasting-poc/lib/python3.10/site-packages/statsforecast/core.py:492: FutureWarning:

In a future version the predictions will have the id as a column. You can set the `NIXTLA_ID_AS_COL` environment variable to adopt the new behavior and to suppress this warning.

forecasts_mstl.head()

	ds	MSTL_ARIMA_trend_1	MSTL_ARIMA_trend_1-lo-95	MSTL_ARIMA_trend_1-hi-95	MSTL_HW_trend	MSTL_HW_trend-lo-95	MSTL_HW_trend-hi-95	MSTL_DOT_trend	MSTL_DOT_trend-lo-95	MSTL_DOT_trend-hi-95	MSTL_Holt_trend	MSTL_Holt_trend-lo-95	MSTL_Holt_trend-hi-95	MSTL_AR_trend	MSTL_AR_trend-lo-95	MSTL_AR_trend-hi-95	MSTL_ARIMA_trend_2	MSTL_ARIMA_trend_2-lo-95	MSTL_ARIMA_trend_2-hi-95
unique_id
1	2024-04-23 00:00:00	447210.62500	444114.84375	450306.43750	441782.00000	433129.18750	450434.81250	445456.03125	442295.59375	449220.03125	445482.15625	441707.37500	449256.90625	447665.34375	444598.78125	450731.90625	454678.59375	452427.40625	456929.81250
1	2024-04-23 01:00:00	448354.50000	443145.62500	453563.37500	444155.65625	421518.93750	466792.40625	445100.40625	439725.15625	450135.78125	445124.21875	439785.90625	450462.56250	449133.59375	443669.03125	454598.15625	454491.00000	450708.18750	458273.81250
1	2024-04-23 02:00:00	441677.78125	434246.87500	449108.65625	445304.81250	404940.46875	485669.15625	437529.87500	430476.68750	443777.46875	437551.43750	431013.15625	444089.71875	442690.50000	434685.46875	450695.50000	447848.18750	442430.37500	453266.03125
1	2024-04-23 03:00:00	424103.62500	414574.18750	433633.06250	445594.12500	385722.96875	505465.25000	419549.75000	412234.25000	426818.28125	419569.09375	412019.03125	427119.15625	425196.56250	414771.75000	435621.37500	429126.68750	422157.71875	436095.68750
1	2024-04-23 04:00:00	401796.06250	390396.56250	413195.53125	445355.68750	365746.09375	524965.31250	397128.46875	388371.68750	405360.15625	397145.68750	388704.09375	405587.25000	402821.40625	390242.53125	415400.28125	406840.28125	398481.12500	415199.43750

fc = forecasts_mstl.merge(test, how = "left", on = "ds")
fc_performance = None

for i in ["MSTL_ARIMA_trend_1", "MSTL_ARIMA_trend_2", "MSTL_HW_trend", "MSTL_DOT_trend", "MSTL_AR_trend", "MSTL_Holt_trend"]:
    m = mape(y = fc.y, yhat = fc[i])
    r = rmse(y = fc.y, yhat = fc[i])
    c = coverage(y = fc.y, lower = fc[i + "-lo-95"], upper = fc[i + "-hi-95"])

    perf = {"model": i,
            "mape": m,
            "rmse": r,
            "coverage": c}
    if fc_performance is None:
        fc_performance = pd.DataFrame([perf])
    else:
        fc_performance = pd.concat([fc_performance, pd.DataFrame([perf])])

fc_performance.sort_values("rmse")

model	mape	rmse	coverage
MSTL_AR_trend	0.022841	11133.594451	0.916667
MSTL_ARIMA_trend_2	0.029137	13636.440450	0.875000
MSTL_ARIMA_trend_1	0.029793	13690.780221	0.902778
MSTL_DOT_trend	0.038641	17377.932553	0.763889
MSTL_Holt_trend	0.038708	17405.987291	0.833333
MSTL_HW_trend	0.080049	41152.048617	1.000000