import pandas as pd
import numpy as np
import datetime
import statsforecast as sf
import statsforecast.models as sfm
from utilsforecast.plotting import plot_series
Nixtla Demo
Load Libraries
Load Data
= "https://raw.githubusercontent.com/LinkedInLearning/data-pipeline-automation-with-github-actions-4503382/main/csv/ciso_grid_py.csv"
url
= pd.read_csv(url)
d
d.head()
period | subba | subba-name | parent | parent-name | value | value-units | |
---|---|---|---|---|---|---|---|
0 | 2018-07-01 08:00:00 | PGAE | Pacific Gas and Electric | CISO | California Independent System Operator | 12522.0 | megawatthours |
1 | 2018-07-01 09:00:00 | PGAE | Pacific Gas and Electric | CISO | California Independent System Operator | 11745.0 | megawatthours |
2 | 2018-07-01 10:00:00 | PGAE | Pacific Gas and Electric | CISO | California Independent System Operator | 11200.0 | megawatthours |
3 | 2018-07-01 11:00:00 | PGAE | Pacific Gas and Electric | CISO | California Independent System Operator | 10822.0 | megawatthours |
4 | 2018-07-01 12:00:00 | PGAE | Pacific Gas and Electric | CISO | California Independent System Operator | 10644.0 | megawatthours |
"ds"] = pd.to_datetime(d["period"])
d[= d[["ds", "subba", "value"]]
d = datetime.datetime(2022, 8, 1, 0, 0, 0)
start = datetime.datetime(2024, 8, 20, 23, 0, 0)
end
= d["subba"].dropna().unique()
subba = None
ts
for i in range(len(subba)):
= subba[i]
s id = i + 1
= pd.DataFrame(np.arange(start = start, stop = end + datetime.timedelta(hours = 1), step = datetime.timedelta(hours = 1)).astype(datetime.datetime), columns=["ds"])
ts_temp "unique_id"] = id
ts_temp["subba"] = s
ts_temp[= ts_temp.merge(d, on = ["ds", "subba"], how = "left")
ts_temp = ts_temp.sort_values("ds")
ts_temp if ts_temp["value"].isnull().any():
= ts_temp[ts_temp["value"].isnull()]
r for n in r.index:
"value"] = (ts_temp.at[n - 1, "value"] + ts_temp.at[n - 24, "value"] + ts_temp.at[n - 24 * 7, "value"]) / 3
ts_temp.at[n, = ts_temp.rename(columns = {"value": "y"})
ts_temp
if ts is None:
= ts_temp
ts else:
= pd.concat([ts, ts_temp])
ts
= ts[["ds", "unique_id", "y"]]
ts ts.head()
ds | unique_id | y | |
---|---|---|---|
0 | 2022-08-01 00:00:00 | 1 | 12375.0 |
1 | 2022-08-01 01:00:00 | 1 | 13233.0 |
2 | 2022-08-01 02:00:00 | 1 | 14115.0 |
3 | 2022-08-01 03:00:00 | 1 | 14813.0 |
4 | 2022-08-01 04:00:00 | 1 | 14737.0 |
= "plotly") plot_series(ts, engine
Training Models
= 72
test_length
= end - datetime.timedelta(hours = test_length)
train_end
= ts[ts["ds"] <= train_end]
train = ts[ts["ds"] > train_end]
test
= "plotly") plot_series(test, engine
= sfm.AutoARIMA()
auto_arima
= sfm.SeasonalNaive(season_length=24)
s_naive
= sfm.DynamicOptimizedTheta(season_length= 24)
theta
= sfm.MSTL(
mstl1 =[24, 24 * 7],
season_length=sfm.AutoARIMA(),
trend_forecaster="MSTL_ARIMA_trend"
alias
)
= sfm.MSTL(
mstl2 =[24, 24 * 7],
season_length= sfm.HoltWinters(),
trend_forecaster="MSTL_HW_trend"
alias
)
= [auto_arima, s_naive, theta, mstl1, mstl2]
stats_models
= sf.StatsForecast(
md =stats_models,
models="h",
freq= sfm.AutoARIMA(),
fallback_model = -1,
n_jobs )
= md.forecast(df=train, h=72, level=[95])
forecast_stats
print(forecast_stats.head())
= "plotly", level=[95]) md.plot(test, forecast_stats,engine
ds AutoARIMA AutoARIMA-lo-95 AutoARIMA-hi-95 \
unique_id
1 2024-08-18 00:00:00 11573.708984 10918.641602 12228.776367
1 2024-08-18 01:00:00 11831.327148 10476.957031 13185.697266
1 2024-08-18 02:00:00 11963.833008 9930.119141 13997.546875
1 2024-08-18 03:00:00 12063.378906 9422.343750 14704.414062
1 2024-08-18 04:00:00 12076.913086 8876.990234 15276.834961
SeasonalNaive SeasonalNaive-lo-95 SeasonalNaive-hi-95 \
unique_id
1 13469.0 10469.772461 16468.228516
1 14589.0 11589.772461 17588.228516
1 15648.0 12648.772461 18647.228516
1 16037.0 13037.772461 19036.228516
1 16037.0 13037.772461 19036.228516
DynamicOptimizedTheta DynamicOptimizedTheta-lo-95 \
unique_id
1 11639.806641 11149.407227
1 12293.179688 11412.287109
1 13137.013672 11901.803711
1 13940.085938 12580.620117
1 14211.835938 12552.809570
DynamicOptimizedTheta-hi-95 MSTL_ARIMA_trend \
unique_id
1 12223.863281 11914.586914
1 13118.373047 12968.906250
1 14231.144531 13956.809570
1 15290.833984 14797.150391
1 15771.384766 15019.596680
MSTL_ARIMA_trend-lo-95 MSTL_ARIMA_trend-hi-95 MSTL_HW_trend \
unique_id
1 11611.195312 12217.978516 11573.708984
1 12488.618164 13449.193359 11831.327148
1 13334.845703 14578.774414 11963.833008
1 14056.582031 15537.718750 12063.378906
1 14175.956055 15863.237305 12076.913086
MSTL_HW_trend-lo-95 MSTL_HW_trend-hi-95
unique_id
1 10918.641602 12228.776367
1 10476.957031 13185.697266
1 9930.119141 13997.546875
1 9422.343750 14704.414062
1 8876.990234 15276.834961
/opt/forecasting-poc/lib/python3.10/site-packages/statsforecast/core.py:492: FutureWarning:
In a future version the predictions will have the id as a column. You can set the `NIXTLA_ID_AS_COL` environment variable to adopt the new behavior and to suppress this warning.
/opt/forecasting-poc/lib/python3.10/site-packages/statsforecast/core.py:1447: FutureWarning:
Passing the ids as the index is deprecated. Please provide them as a column instead.