Nixtla Demo

Load Libraries

import pandas as pd
import numpy as np
import datetime


import statsforecast as sf
import statsforecast.models as sfm
from utilsforecast.plotting import plot_series

Load Data

url = "https://raw.githubusercontent.com/LinkedInLearning/data-pipeline-automation-with-github-actions-4503382/main/csv/ciso_grid_py.csv"

d = pd.read_csv(url)

d.head()

	period	subba	subba-name	parent	parent-name	value	value-units
0	2018-07-01 08:00:00	PGAE	Pacific Gas and Electric	CISO	California Independent System Operator	12522.0	megawatthours
1	2018-07-01 09:00:00	PGAE	Pacific Gas and Electric	CISO	California Independent System Operator	11745.0	megawatthours
2	2018-07-01 10:00:00	PGAE	Pacific Gas and Electric	CISO	California Independent System Operator	11200.0	megawatthours
3	2018-07-01 11:00:00	PGAE	Pacific Gas and Electric	CISO	California Independent System Operator	10822.0	megawatthours
4	2018-07-01 12:00:00	PGAE	Pacific Gas and Electric	CISO	California Independent System Operator	10644.0	megawatthours

d["ds"] = pd.to_datetime(d["period"])
d = d[["ds", "subba", "value"]]
start = datetime.datetime(2022, 8, 1, 0, 0, 0)
end = datetime.datetime(2024, 8, 20, 23, 0, 0)


subba = d["subba"].dropna().unique()
ts = None

for i in range(len(subba)):
    s = subba[i]
    id = i + 1
    ts_temp = pd.DataFrame(np.arange(start = start, stop = end + datetime.timedelta(hours = 1), step = datetime.timedelta(hours = 1)).astype(datetime.datetime), columns=["ds"])
    ts_temp["unique_id"] = id
    ts_temp["subba"] = s
    ts_temp = ts_temp.merge(d, on = ["ds", "subba"], how = "left")
    ts_temp = ts_temp.sort_values("ds")
    if ts_temp["value"].isnull().any():
        r = ts_temp[ts_temp["value"].isnull()]
        for n in r.index:
            ts_temp.at[n, "value"] = (ts_temp.at[n - 1, "value"] + ts_temp.at[n - 24, "value"] + ts_temp.at[n - 24 * 7, "value"]) / 3 
    ts_temp = ts_temp.rename(columns = {"value": "y"})

    if ts is None:
        ts = ts_temp
    else:
        ts =  pd.concat([ts, ts_temp])

ts = ts[["ds", "unique_id", "y"]]
ts.head()

	ds	unique_id	y
0	2022-08-01 00:00:00	1	12375.0
1	2022-08-01 01:00:00	1	13233.0
2	2022-08-01 02:00:00	1	14115.0
3	2022-08-01 03:00:00	1	14813.0
4	2022-08-01 04:00:00	1	14737.0

plot_series(ts, engine = "plotly")

Training Models

test_length = 72

train_end = end  - datetime.timedelta(hours = test_length)


train = ts[ts["ds"] <= train_end]
test = ts[ts["ds"] > train_end]

plot_series(test, engine = "plotly")

auto_arima = sfm.AutoARIMA()

s_naive = sfm.SeasonalNaive(season_length=24)

theta =  sfm.DynamicOptimizedTheta(season_length= 24)

mstl1 = sfm.MSTL(
    season_length=[24, 24 * 7], 
    trend_forecaster=sfm.AutoARIMA(),
    alias="MSTL_ARIMA_trend"
)

mstl2 = sfm.MSTL(
    season_length=[24, 24 * 7], 
    trend_forecaster= sfm.HoltWinters(),
    alias="MSTL_HW_trend" 
)

stats_models = [auto_arima, s_naive, theta, mstl1, mstl2]



md = sf.StatsForecast( 
    models=stats_models,
    freq="h", 
    fallback_model = sfm.AutoARIMA(),
    n_jobs= -1,
)

forecast_stats = md.forecast(df=train, h=72, level=[95])

print(forecast_stats.head())

md.plot(test, forecast_stats,engine = "plotly", level=[95])

                           ds     AutoARIMA  AutoARIMA-lo-95  AutoARIMA-hi-95  \
unique_id                                                                       
1         2024-08-18 00:00:00  11573.708984     10918.641602     12228.776367   
1         2024-08-18 01:00:00  11831.327148     10476.957031     13185.697266   
1         2024-08-18 02:00:00  11963.833008      9930.119141     13997.546875   
1         2024-08-18 03:00:00  12063.378906      9422.343750     14704.414062   
1         2024-08-18 04:00:00  12076.913086      8876.990234     15276.834961   

           SeasonalNaive  SeasonalNaive-lo-95  SeasonalNaive-hi-95  \
unique_id                                                            
1                13469.0         10469.772461         16468.228516   
1                14589.0         11589.772461         17588.228516   
1                15648.0         12648.772461         18647.228516   
1                16037.0         13037.772461         19036.228516   
1                16037.0         13037.772461         19036.228516   

           DynamicOptimizedTheta  DynamicOptimizedTheta-lo-95  \
unique_id                                                       
1                   11639.806641                 11149.407227   
1                   12293.179688                 11412.287109   
1                   13137.013672                 11901.803711   
1                   13940.085938                 12580.620117   
1                   14211.835938                 12552.809570   

           DynamicOptimizedTheta-hi-95  MSTL_ARIMA_trend  \
unique_id                                                  
1                         12223.863281      11914.586914   
1                         13118.373047      12968.906250   
1                         14231.144531      13956.809570   
1                         15290.833984      14797.150391   
1                         15771.384766      15019.596680   

           MSTL_ARIMA_trend-lo-95  MSTL_ARIMA_trend-hi-95  MSTL_HW_trend  \
unique_id                                                                  
1                    11611.195312            12217.978516   11573.708984   
1                    12488.618164            13449.193359   11831.327148   
1                    13334.845703            14578.774414   11963.833008   
1                    14056.582031            15537.718750   12063.378906   
1                    14175.956055            15863.237305   12076.913086   

           MSTL_HW_trend-lo-95  MSTL_HW_trend-hi-95  
unique_id                                            
1                 10918.641602         12228.776367  
1                 10476.957031         13185.697266  
1                  9930.119141         13997.546875  
1                  9422.343750         14704.414062  
1                  8876.990234         15276.834961

/opt/forecasting-poc/lib/python3.10/site-packages/statsforecast/core.py:492: FutureWarning:

In a future version the predictions will have the id as a column. You can set the `NIXTLA_ID_AS_COL` environment variable to adopt the new behavior and to suppress this warning.

/opt/forecasting-poc/lib/python3.10/site-packages/statsforecast/core.py:1447: FutureWarning:

Passing the ids as the index is deprecated. Please provide them as a column instead.