import pandas as pd
import numpy as np
import datetime
import statsforecast as sf
import statsforecast.models as sfm
from utilsforecast.plotting import plot_seriesNixtla Demo
Load Libraries
Load Data
url = "https://raw.githubusercontent.com/LinkedInLearning/data-pipeline-automation-with-github-actions-4503382/main/csv/ciso_grid_py.csv"
d = pd.read_csv(url)
d.head()| period | subba | subba-name | parent | parent-name | value | value-units | |
|---|---|---|---|---|---|---|---|
| 0 | 2018-07-01 08:00:00 | PGAE | Pacific Gas and Electric | CISO | California Independent System Operator | 12522.0 | megawatthours |
| 1 | 2018-07-01 09:00:00 | PGAE | Pacific Gas and Electric | CISO | California Independent System Operator | 11745.0 | megawatthours |
| 2 | 2018-07-01 10:00:00 | PGAE | Pacific Gas and Electric | CISO | California Independent System Operator | 11200.0 | megawatthours |
| 3 | 2018-07-01 11:00:00 | PGAE | Pacific Gas and Electric | CISO | California Independent System Operator | 10822.0 | megawatthours |
| 4 | 2018-07-01 12:00:00 | PGAE | Pacific Gas and Electric | CISO | California Independent System Operator | 10644.0 | megawatthours |
d["ds"] = pd.to_datetime(d["period"])
d = d[["ds", "subba", "value"]]
start = datetime.datetime(2022, 8, 1, 0, 0, 0)
end = datetime.datetime(2024, 8, 20, 23, 0, 0)
subba = d["subba"].dropna().unique()
ts = None
for i in range(len(subba)):
s = subba[i]
id = i + 1
ts_temp = pd.DataFrame(np.arange(start = start, stop = end + datetime.timedelta(hours = 1), step = datetime.timedelta(hours = 1)).astype(datetime.datetime), columns=["ds"])
ts_temp["unique_id"] = id
ts_temp["subba"] = s
ts_temp = ts_temp.merge(d, on = ["ds", "subba"], how = "left")
ts_temp = ts_temp.sort_values("ds")
if ts_temp["value"].isnull().any():
r = ts_temp[ts_temp["value"].isnull()]
for n in r.index:
ts_temp.at[n, "value"] = (ts_temp.at[n - 1, "value"] + ts_temp.at[n - 24, "value"] + ts_temp.at[n - 24 * 7, "value"]) / 3
ts_temp = ts_temp.rename(columns = {"value": "y"})
if ts is None:
ts = ts_temp
else:
ts = pd.concat([ts, ts_temp])
ts = ts[["ds", "unique_id", "y"]]
ts.head()| ds | unique_id | y | |
|---|---|---|---|
| 0 | 2022-08-01 00:00:00 | 1 | 12375.0 |
| 1 | 2022-08-01 01:00:00 | 1 | 13233.0 |
| 2 | 2022-08-01 02:00:00 | 1 | 14115.0 |
| 3 | 2022-08-01 03:00:00 | 1 | 14813.0 |
| 4 | 2022-08-01 04:00:00 | 1 | 14737.0 |
plot_series(ts, engine = "plotly")Training Models
test_length = 72
train_end = end - datetime.timedelta(hours = test_length)
train = ts[ts["ds"] <= train_end]
test = ts[ts["ds"] > train_end]
plot_series(test, engine = "plotly")auto_arima = sfm.AutoARIMA()
s_naive = sfm.SeasonalNaive(season_length=24)
theta = sfm.DynamicOptimizedTheta(season_length= 24)
mstl1 = sfm.MSTL(
season_length=[24, 24 * 7],
trend_forecaster=sfm.AutoARIMA(),
alias="MSTL_ARIMA_trend"
)
mstl2 = sfm.MSTL(
season_length=[24, 24 * 7],
trend_forecaster= sfm.HoltWinters(),
alias="MSTL_HW_trend"
)
stats_models = [auto_arima, s_naive, theta, mstl1, mstl2]
md = sf.StatsForecast(
models=stats_models,
freq="h",
fallback_model = sfm.AutoARIMA(),
n_jobs= -1,
)forecast_stats = md.forecast(df=train, h=72, level=[95])
print(forecast_stats.head())
md.plot(test, forecast_stats,engine = "plotly", level=[95]) ds AutoARIMA AutoARIMA-lo-95 AutoARIMA-hi-95 \
unique_id
1 2024-08-18 00:00:00 11573.708984 10918.641602 12228.776367
1 2024-08-18 01:00:00 11831.327148 10476.957031 13185.697266
1 2024-08-18 02:00:00 11963.833008 9930.119141 13997.546875
1 2024-08-18 03:00:00 12063.378906 9422.343750 14704.414062
1 2024-08-18 04:00:00 12076.913086 8876.990234 15276.834961
SeasonalNaive SeasonalNaive-lo-95 SeasonalNaive-hi-95 \
unique_id
1 13469.0 10469.772461 16468.228516
1 14589.0 11589.772461 17588.228516
1 15648.0 12648.772461 18647.228516
1 16037.0 13037.772461 19036.228516
1 16037.0 13037.772461 19036.228516
DynamicOptimizedTheta DynamicOptimizedTheta-lo-95 \
unique_id
1 11639.806641 11149.407227
1 12293.179688 11412.287109
1 13137.013672 11901.803711
1 13940.085938 12580.620117
1 14211.835938 12552.809570
DynamicOptimizedTheta-hi-95 MSTL_ARIMA_trend \
unique_id
1 12223.863281 11914.586914
1 13118.373047 12968.906250
1 14231.144531 13956.809570
1 15290.833984 14797.150391
1 15771.384766 15019.596680
MSTL_ARIMA_trend-lo-95 MSTL_ARIMA_trend-hi-95 MSTL_HW_trend \
unique_id
1 11611.195312 12217.978516 11573.708984
1 12488.618164 13449.193359 11831.327148
1 13334.845703 14578.774414 11963.833008
1 14056.582031 15537.718750 12063.378906
1 14175.956055 15863.237305 12076.913086
MSTL_HW_trend-lo-95 MSTL_HW_trend-hi-95
unique_id
1 10918.641602 12228.776367
1 10476.957031 13185.697266
1 9930.119141 13997.546875
1 9422.343750 14704.414062
1 8876.990234 15276.834961
/opt/forecasting-poc/lib/python3.10/site-packages/statsforecast/core.py:492: FutureWarning:
In a future version the predictions will have the id as a column. You can set the `NIXTLA_ID_AS_COL` environment variable to adopt the new behavior and to suppress this warning.
/opt/forecasting-poc/lib/python3.10/site-packages/statsforecast/core.py:1447: FutureWarning:
Passing the ids as the index is deprecated. Please provide them as a column instead.