import pandas as pd
import numpy as np
import requests
import json
import os
import mlflow
import datetime
import plotly.graph_objects as go
import mlflow
from mlforecast import MLForecast
from mlforecast.target_transforms import Differences
from mlforecast.utils import PredictionIntervals
from window_ops.expanding import expanding_mean
from lightgbm import LGBMRegressor
from xgboost import XGBRegressor
from sklearn.linear_model import Lasso, LinearRegression, Ridge
from sklearn.neural_network import MLPRegressor
from sklearn.ensemble import RandomForestRegressor
from utilsforecast.plotting import plot_series
from statistics import mean
import plotly.express as px
Training Models with Backtesting
Load Libraries
Load the Data
Reformat the data
::: {#cell-Loading data .cell execution_count=2}
= pd.read_csv("data/data.csv")
ts "ds"] = pd.to_datetime(ts["ds"])
ts[= ts.sort_values("ds")
ts ts.head()
ds | y | unique_id | |
---|---|---|---|
0 | 2022-04-06 23:00:00 | 462894.0 | 1 |
1 | 2022-04-07 00:00:00 | 463663.0 | 1 |
2 | 2022-04-07 01:00:00 | 464916.0 | 1 |
3 | 2022-04-07 02:00:00 | 459376.0 | 1 |
4 | 2022-04-07 03:00:00 | 441989.0 | 1 |
:::
Subset for the last 25 months:
= ts["ds"].max()
end = end - datetime.timedelta(hours = 24 * 31 * 25)
start = ts[ts["ds"] >= start] ts
Set the Backtesting
Define the forecasting models:
= {
ml_models "lightGBM": LGBMRegressor(),
"xgboost": XGBRegressor(),
"linear_regression": LinearRegression(),
"lasso": Lasso(),
"ridge": Ridge()
}
# ml_models = [lgb, xgb, lm, lasso, ridge]
= MLForecast(
mlf = ml_models,
models='h',
freq=list(range(1, 24)),
lags=["month", "day", "dayofweek", "week", "hour"]
date_features )
= 72
h = 24
step_size = 10
partitions = 5
n_windows = "conformal_distribution"
method = PredictionIntervals(h=h, n_windows = n_windows , method = method)
pi = [95] levels
= mlf.cross_validation(
bkt_df = ts,
df = h,
h = step_size,
step_size = partitions,
n_windows = pi,
prediction_intervals = levels) level
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001432 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 5994
[LightGBM] [Info] Number of data points in the train set: 17330, number of used features: 28
[LightGBM] [Info] Start training from score 469246.384788
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001232 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 5994
[LightGBM] [Info] Number of data points in the train set: 17690, number of used features: 28
[LightGBM] [Info] Start training from score 468050.072022
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001513 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 5994
[LightGBM] [Info] Number of data points in the train set: 17354, number of used features: 28
[LightGBM] [Info] Start training from score 469139.604203
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001331 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 5994
[LightGBM] [Info] Number of data points in the train set: 17714, number of used features: 28
[LightGBM] [Info] Start training from score 467936.890547
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001407 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 5994
[LightGBM] [Info] Number of data points in the train set: 17378, number of used features: 28
[LightGBM] [Info] Start training from score 469020.516614
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000417 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 5994
[LightGBM] [Info] Number of data points in the train set: 17738, number of used features: 28
[LightGBM] [Info] Start training from score 467869.911431
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001372 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 5994
[LightGBM] [Info] Number of data points in the train set: 17402, number of used features: 28
[LightGBM] [Info] Start training from score 468942.764452
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001255 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 5994
[LightGBM] [Info] Number of data points in the train set: 17762, number of used features: 28
[LightGBM] [Info] Start training from score 467818.064342
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001506 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 5994
[LightGBM] [Info] Number of data points in the train set: 17426, number of used features: 28
[LightGBM] [Info] Start training from score 468888.286763
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001089 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 5994
[LightGBM] [Info] Number of data points in the train set: 17786, number of used features: 28
[LightGBM] [Info] Start training from score 467770.123898
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001078 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 5994
[LightGBM] [Info] Number of data points in the train set: 17450, number of used features: 28
[LightGBM] [Info] Start training from score 468834.192077
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001471 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 5994
[LightGBM] [Info] Number of data points in the train set: 17810, number of used features: 28
[LightGBM] [Info] Start training from score 467731.935823
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001748 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 5994
[LightGBM] [Info] Number of data points in the train set: 17474, number of used features: 28
[LightGBM] [Info] Start training from score 468780.458308
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001478 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 5994
[LightGBM] [Info] Number of data points in the train set: 17834, number of used features: 28
[LightGBM] [Info] Start training from score 467682.217089
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000646 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 5994
[LightGBM] [Info] Number of data points in the train set: 17498, number of used features: 28
[LightGBM] [Info] Start training from score 468730.181573
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001445 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 5994
[LightGBM] [Info] Number of data points in the train set: 17858, number of used features: 28
[LightGBM] [Info] Start training from score 467600.108565
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001417 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 5994
[LightGBM] [Info] Number of data points in the train set: 17522, number of used features: 28
[LightGBM] [Info] Start training from score 468647.783064
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001180 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 5994
[LightGBM] [Info] Number of data points in the train set: 17882, number of used features: 28
[LightGBM] [Info] Start training from score 467493.942764
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001287 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 5994
[LightGBM] [Info] Number of data points in the train set: 17546, number of used features: 28
[LightGBM] [Info] Start training from score 468545.938421
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001445 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 5994
[LightGBM] [Info] Number of data points in the train set: 17906, number of used features: 28
[LightGBM] [Info] Start training from score 467417.387929
/opt/forecasting-poc/lib/python3.10/site-packages/sklearn/linear_model/_coordinate_descent.py:697: ConvergenceWarning:
Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 9.005e+10, tolerance: 9.766e+09
/opt/forecasting-poc/lib/python3.10/site-packages/sklearn/linear_model/_coordinate_descent.py:697: ConvergenceWarning:
Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 9.124e+10, tolerance: 9.923e+09
/opt/forecasting-poc/lib/python3.10/site-packages/sklearn/linear_model/_coordinate_descent.py:697: ConvergenceWarning:
Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 9.010e+10, tolerance: 9.781e+09
/opt/forecasting-poc/lib/python3.10/site-packages/sklearn/linear_model/_coordinate_descent.py:697: ConvergenceWarning:
Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 9.130e+10, tolerance: 9.943e+09
/opt/forecasting-poc/lib/python3.10/site-packages/sklearn/linear_model/_coordinate_descent.py:697: ConvergenceWarning:
Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 9.014e+10, tolerance: 9.800e+09
/opt/forecasting-poc/lib/python3.10/site-packages/sklearn/linear_model/_coordinate_descent.py:697: ConvergenceWarning:
Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 9.141e+10, tolerance: 9.953e+09
/opt/forecasting-poc/lib/python3.10/site-packages/sklearn/linear_model/_coordinate_descent.py:697: ConvergenceWarning:
Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 9.023e+10, tolerance: 9.811e+09
/opt/forecasting-poc/lib/python3.10/site-packages/sklearn/linear_model/_coordinate_descent.py:697: ConvergenceWarning:
Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 9.151e+10, tolerance: 9.961e+09
/opt/forecasting-poc/lib/python3.10/site-packages/sklearn/linear_model/_coordinate_descent.py:697: ConvergenceWarning:
Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 9.030e+10, tolerance: 9.818e+09
/opt/forecasting-poc/lib/python3.10/site-packages/sklearn/linear_model/_coordinate_descent.py:697: ConvergenceWarning:
Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 9.160e+10, tolerance: 9.968e+09
/opt/forecasting-poc/lib/python3.10/site-packages/sklearn/linear_model/_coordinate_descent.py:697: ConvergenceWarning:
Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 9.039e+10, tolerance: 9.824e+09
/opt/forecasting-poc/lib/python3.10/site-packages/sklearn/linear_model/_coordinate_descent.py:697: ConvergenceWarning:
Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 9.167e+10, tolerance: 9.974e+09
/opt/forecasting-poc/lib/python3.10/site-packages/sklearn/linear_model/_coordinate_descent.py:697: ConvergenceWarning:
Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 9.048e+10, tolerance: 9.829e+09
/opt/forecasting-poc/lib/python3.10/site-packages/sklearn/linear_model/_coordinate_descent.py:697: ConvergenceWarning:
Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 9.176e+10, tolerance: 9.981e+09
/opt/forecasting-poc/lib/python3.10/site-packages/sklearn/linear_model/_coordinate_descent.py:697: ConvergenceWarning:
Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 9.058e+10, tolerance: 9.834e+09
/opt/forecasting-poc/lib/python3.10/site-packages/sklearn/linear_model/_coordinate_descent.py:697: ConvergenceWarning:
Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 9.182e+10, tolerance: 9.992e+09
/opt/forecasting-poc/lib/python3.10/site-packages/sklearn/linear_model/_coordinate_descent.py:697: ConvergenceWarning:
Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 9.064e+10, tolerance: 9.844e+09
/opt/forecasting-poc/lib/python3.10/site-packages/sklearn/linear_model/_coordinate_descent.py:697: ConvergenceWarning:
Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 9.188e+10, tolerance: 1.001e+10
/opt/forecasting-poc/lib/python3.10/site-packages/sklearn/linear_model/_coordinate_descent.py:697: ConvergenceWarning:
Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 9.068e+10, tolerance: 9.858e+09
/opt/forecasting-poc/lib/python3.10/site-packages/sklearn/linear_model/_coordinate_descent.py:697: ConvergenceWarning:
Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 9.207e+10, tolerance: 1.002e+10
::: {#cell-Review the bkt .cell execution_count=7}
bkt_df.head()
unique_id | ds | cutoff | y | lightGBM | xgboost | linear_regression | lasso | ridge | lightGBM-lo-95 | lightGBM-hi-95 | xgboost-lo-95 | xgboost-hi-95 | linear_regression-lo-95 | linear_regression-hi-95 | lasso-lo-95 | lasso-hi-95 | ridge-lo-95 | ridge-hi-95 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1 | 2024-04-14 00:00:00 | 2024-04-13 23:00:00 | 421836.39 | 422716.385199 | 421999.12500 | 422352.303789 | 422303.387840 | 422352.318516 | 421022.905138 | 424409.865260 | 413150.163187 | 430848.086813 | 420572.672519 | 424131.935059 | 420193.904762 | 424412.870918 | 420572.698521 | 424131.938511 |
1 | 1 | 2024-04-14 01:00:00 | 2024-04-13 23:00:00 | 421300.67 | 422439.422659 | 422403.71875 | 420444.205975 | 420181.246550 | 420444.246554 | 417651.607031 | 427227.238288 | 410528.525875 | 434278.911625 | 413143.863192 | 427744.548759 | 412754.618246 | 427607.874853 | 413143.912384 | 427744.580725 |
2 | 1 | 2024-04-14 02:00:00 | 2024-04-13 23:00:00 | 415940.13 | 417209.926483 | 418226.87500 | 413555.865386 | 412950.784579 | 413555.936496 | 407277.565240 | 427142.287725 | 403594.666500 | 432859.083500 | 401713.539463 | 425398.191308 | 400649.670396 | 425251.898762 | 401713.623882 | 425398.249109 |
3 | 1 | 2024-04-14 03:00:00 | 2024-04-13 23:00:00 | 403788.77 | 405820.047603 | 405515.62500 | 401358.336479 | 400883.378063 | 401358.437580 | 392670.364652 | 418969.730553 | 388216.352469 | 422814.897531 | 388385.173887 | 414331.499070 | 387325.023471 | 414441.732654 | 388385.318921 | 414331.556240 |
4 | 1 | 2024-04-14 04:00:00 | 2024-04-13 23:00:00 | 388042.74 | 386520.594124 | 388142.90625 | 386263.522024 | 385896.776166 | 386263.652143 | 372017.701036 | 401023.487212 | 370163.194469 | 406122.618031 | 370245.276224 | 402281.767824 | 369233.631968 | 402559.920363 | 370245.519124 | 402281.785161 |
:::
::: {#cell-Reformat the bkt obj .cell execution_count=8}
= list(ml_models.keys())
model_label = [type(s).__name__ for s in list(ml_models.values())]
model_name = [s + "-lo-95" for s in model_label]
lower = [s + "-hi-95" for s in model_label]
upper= pd.DataFrame({"model_label": model_label,
models_mapping "model_name": model_name})
= pd.melt(bkt_df, id_vars= ["unique_id", "ds", "cutoff"],
d1 = model_label, var_name = "model_label" , value_name = "forecast")
value_vars= pd.melt(bkt_df, id_vars= ["unique_id", "ds", "cutoff"],
d2 = lower, var_name = "model_label" , value_name = "lower")
value_vars"model_label"] = d2["model_label"].str.replace("-lo-95", "")
d2[= pd.melt(bkt_df, id_vars= ["unique_id", "ds", "cutoff"],
d3 = upper, var_name = "model_label", value_name = "upper")
value_vars"model_label"] = d3["model_label"].str.replace("-hi-95", "")
d3[
= (
bkt_long
d1= d2, how = "left", on = ["unique_id", "ds", "cutoff", "model_label"])
.merge(right = d3, how = "left", on = ["unique_id", "ds", "cutoff", "model_label"])
.merge(right = models_mapping, how = "left", on = ["model_label"])
.merge(right = ts, how = "left", on = ["unique_id", "ds"])
.merge(right
) bkt_long.head()
unique_id | ds | cutoff | model_label | forecast | lower | upper | model_name | y | |
---|---|---|---|---|---|---|---|---|---|
0 | 1 | 2024-04-14 00:00:00 | 2024-04-13 23:00:00 | lightGBM | 422716.385199 | 421022.905138 | 424409.865260 | LGBMRegressor | 421836.39 |
1 | 1 | 2024-04-14 01:00:00 | 2024-04-13 23:00:00 | lightGBM | 422439.422659 | 417651.607031 | 427227.238288 | LGBMRegressor | 421300.67 |
2 | 1 | 2024-04-14 02:00:00 | 2024-04-13 23:00:00 | lightGBM | 417209.926483 | 407277.565240 | 427142.287725 | LGBMRegressor | 415940.13 |
3 | 1 | 2024-04-14 03:00:00 | 2024-04-13 23:00:00 | lightGBM | 405820.047603 | 392670.364652 | 418969.730553 | LGBMRegressor | 403788.77 |
4 | 1 | 2024-04-14 04:00:00 | 2024-04-13 23:00:00 | lightGBM | 386520.594124 | 372017.701036 | 401023.487212 | LGBMRegressor | 388042.74 |
:::
::: {#cell-Score the models .cell execution_count=9}
def mape(y, yhat):
= mean(abs(y - yhat)/ y)
mape return mape
def rmse(y, yhat):
= (mean((y - yhat) ** 2 )) ** 0.5
rmse return rmse
def coverage(y, lower, upper):
= sum((y <= upper) & (y >= lower)) / len(y)
coverage return coverage
def score(df):
= mape(y = df["y"], yhat = df["forecast"])
mape_score = rmse(y = df["y"], yhat = df["forecast"])
rmse_score = coverage(y = df["y"], lower = df["lower"], upper = df["upper"])
coverage_score = ["mape","rmse", "coverage"]
cols = pd.Series([mape_score, rmse_score, coverage_score], index=cols)
d
return d
= (bkt_long
score_df "unique_id", "model_label", "model_name", "cutoff"])
.groupby([apply(score)
.
.reset_index()
)
score_df.head()
/tmp/ipykernel_26232/1729970652.py:25: DeprecationWarning:
DataFrameGroupBy.apply operated on the grouping columns. This behavior is deprecated, and in a future version of pandas the grouping columns will be excluded from the operation. Either pass `include_groups=False` to exclude the groupings or explicitly select the grouping columns after groupby to silence this warning.
unique_id | model_label | model_name | cutoff | mape | rmse | coverage | |
---|---|---|---|---|---|---|---|
0 | 1 | lasso | Lasso | 2024-04-13 23:00:00 | 0.050913 | 29315.983715 | 0.486111 |
1 | 1 | lasso | Lasso | 2024-04-14 23:00:00 | 0.037723 | 19034.393950 | 0.763889 |
2 | 1 | lasso | Lasso | 2024-04-15 23:00:00 | 0.017668 | 9768.991810 | 0.986111 |
3 | 1 | lasso | Lasso | 2024-04-16 23:00:00 | 0.014224 | 7839.292592 | 1.000000 |
4 | 1 | lasso | Lasso | 2024-04-17 23:00:00 | 0.023679 | 13550.628885 | 0.847222 |
:::
::: {#cell-Calculate the leaderboard .cell execution_count=10}
= score_df.groupby(["unique_id", "model_label", "model_name"]).agg({"mape": "mean", "rmse": "mean", "coverage": "mean"}).reset_index()
leaderboard
= ["mape"]) leaderboard.sort_values(by
unique_id | model_label | model_name | mape | rmse | coverage | |
---|---|---|---|---|---|---|
1 | 1 | lightGBM | LGBMRegressor | 0.022780 | 12462.716830 | 0.750000 |
4 | 1 | xgboost | XGBRegressor | 0.022854 | 12967.317366 | 0.781944 |
0 | 1 | lasso | Lasso | 0.031274 | 16877.080942 | 0.804167 |
2 | 1 | linear_regression | LinearRegression | 0.031534 | 17016.431327 | 0.806944 |
3 | 1 | ridge | Ridge | 0.031534 | 17016.462986 | 0.806944 |
:::
::: {#cell-Plot the error rate .cell execution_count=11}
= px.box(score_df, x="model_label", y="rmse", color="model_label")
fig = 'all', jitter = 0.3, pointpos = -1.8, showlegend = False)
fig.update_traces(boxpoints
fig.update_layout(="Error Distribution",
title="Model",
xaxis_title="RMSE",
yaxis_title=dict(family="Arial", size=14, color="black")
font
)
fig.show()
:::
Logging the Results with MLflow
::: {#cell-Add partitions .cell execution_count=12}
= bkt_long["cutoff"].unique()
cutoff = pd.DataFrame({"cutoff": cutoff, "partition": range(1, len(cutoff) + 1)})
partitions_mapping
partitions_mapping
= score_df.merge(partitions_mapping, how = "left", on = ["cutoff"])
score_df score_df
unique_id | model_label | model_name | cutoff | mape | rmse | coverage | partition | |
---|---|---|---|---|---|---|---|---|
0 | 1 | lasso | Lasso | 2024-04-13 23:00:00 | 0.050913 | 29315.983715 | 0.486111 | 1 |
1 | 1 | lasso | Lasso | 2024-04-14 23:00:00 | 0.037723 | 19034.393950 | 0.763889 | 2 |
2 | 1 | lasso | Lasso | 2024-04-15 23:00:00 | 0.017668 | 9768.991810 | 0.986111 | 3 |
3 | 1 | lasso | Lasso | 2024-04-16 23:00:00 | 0.014224 | 7839.292592 | 1.000000 | 4 |
4 | 1 | lasso | Lasso | 2024-04-17 23:00:00 | 0.023679 | 13550.628885 | 0.847222 | 5 |
5 | 1 | lasso | Lasso | 2024-04-18 23:00:00 | 0.046461 | 25860.584388 | 0.611111 | 6 |
6 | 1 | lasso | Lasso | 2024-04-19 23:00:00 | 0.036826 | 18266.907443 | 0.819444 | 7 |
7 | 1 | lasso | Lasso | 2024-04-20 23:00:00 | 0.025101 | 13916.795765 | 0.916667 | 8 |
8 | 1 | lasso | Lasso | 2024-04-21 23:00:00 | 0.027231 | 15115.674787 | 0.722222 | 9 |
9 | 1 | lasso | Lasso | 2024-04-22 23:00:00 | 0.032912 | 16101.556090 | 0.888889 | 10 |
10 | 1 | lightGBM | LGBMRegressor | 2024-04-13 23:00:00 | 0.029535 | 17218.066669 | 0.652778 | 1 |
11 | 1 | lightGBM | LGBMRegressor | 2024-04-14 23:00:00 | 0.019967 | 10495.262534 | 0.847222 | 2 |
12 | 1 | lightGBM | LGBMRegressor | 2024-04-15 23:00:00 | 0.012077 | 7845.148808 | 0.958333 | 3 |
13 | 1 | lightGBM | LGBMRegressor | 2024-04-16 23:00:00 | 0.017118 | 9754.698420 | 1.000000 | 4 |
14 | 1 | lightGBM | LGBMRegressor | 2024-04-17 23:00:00 | 0.015552 | 8585.153709 | 0.750000 | 5 |
15 | 1 | lightGBM | LGBMRegressor | 2024-04-18 23:00:00 | 0.035272 | 19640.882716 | 0.569444 | 6 |
16 | 1 | lightGBM | LGBMRegressor | 2024-04-19 23:00:00 | 0.028595 | 15191.343242 | 0.750000 | 7 |
17 | 1 | lightGBM | LGBMRegressor | 2024-04-20 23:00:00 | 0.029471 | 15255.697823 | 0.500000 | 8 |
18 | 1 | lightGBM | LGBMRegressor | 2024-04-21 23:00:00 | 0.025501 | 12852.596611 | 0.666667 | 9 |
19 | 1 | lightGBM | LGBMRegressor | 2024-04-22 23:00:00 | 0.014709 | 7788.317766 | 0.805556 | 10 |
20 | 1 | linear_regression | LinearRegression | 2024-04-13 23:00:00 | 0.050049 | 29021.438982 | 0.486111 | 1 |
21 | 1 | linear_regression | LinearRegression | 2024-04-14 23:00:00 | 0.037321 | 18822.531200 | 0.777778 | 2 |
22 | 1 | linear_regression | LinearRegression | 2024-04-15 23:00:00 | 0.018896 | 10240.836002 | 0.986111 | 3 |
23 | 1 | linear_regression | LinearRegression | 2024-04-16 23:00:00 | 0.013719 | 7687.633764 | 1.000000 | 4 |
24 | 1 | linear_regression | LinearRegression | 2024-04-17 23:00:00 | 0.023580 | 13768.946301 | 0.861111 | 5 |
25 | 1 | linear_regression | LinearRegression | 2024-04-18 23:00:00 | 0.047667 | 26500.453913 | 0.583333 | 6 |
26 | 1 | linear_regression | LinearRegression | 2024-04-19 23:00:00 | 0.038432 | 18577.561146 | 0.819444 | 7 |
27 | 1 | linear_regression | LinearRegression | 2024-04-20 23:00:00 | 0.025731 | 14224.261457 | 0.902778 | 8 |
28 | 1 | linear_regression | LinearRegression | 2024-04-21 23:00:00 | 0.026638 | 14974.293718 | 0.763889 | 9 |
29 | 1 | linear_regression | LinearRegression | 2024-04-22 23:00:00 | 0.033311 | 16346.356785 | 0.888889 | 10 |
30 | 1 | ridge | Ridge | 2024-04-13 23:00:00 | 0.050049 | 29021.446288 | 0.486111 | 1 |
31 | 1 | ridge | Ridge | 2024-04-14 23:00:00 | 0.037321 | 18822.673480 | 0.777778 | 2 |
32 | 1 | ridge | Ridge | 2024-04-15 23:00:00 | 0.018896 | 10240.726491 | 0.986111 | 3 |
33 | 1 | ridge | Ridge | 2024-04-16 23:00:00 | 0.013719 | 7687.653094 | 1.000000 | 4 |
34 | 1 | ridge | Ridge | 2024-04-17 23:00:00 | 0.023580 | 13769.038991 | 0.861111 | 5 |
35 | 1 | ridge | Ridge | 2024-04-18 23:00:00 | 0.047667 | 26500.671411 | 0.583333 | 6 |
36 | 1 | ridge | Ridge | 2024-04-19 23:00:00 | 0.038432 | 18577.686928 | 0.819444 | 7 |
37 | 1 | ridge | Ridge | 2024-04-20 23:00:00 | 0.025731 | 14224.254051 | 0.902778 | 8 |
38 | 1 | ridge | Ridge | 2024-04-21 23:00:00 | 0.026637 | 14974.240917 | 0.763889 | 9 |
39 | 1 | ridge | Ridge | 2024-04-22 23:00:00 | 0.033310 | 16346.238205 | 0.888889 | 10 |
40 | 1 | xgboost | XGBRegressor | 2024-04-13 23:00:00 | 0.025656 | 15843.452463 | 0.708333 | 1 |
41 | 1 | xgboost | XGBRegressor | 2024-04-14 23:00:00 | 0.022303 | 12020.662135 | 0.861111 | 2 |
42 | 1 | xgboost | XGBRegressor | 2024-04-15 23:00:00 | 0.012085 | 8670.466746 | 0.958333 | 3 |
43 | 1 | xgboost | XGBRegressor | 2024-04-16 23:00:00 | 0.015215 | 9863.543049 | 0.958333 | 4 |
44 | 1 | xgboost | XGBRegressor | 2024-04-17 23:00:00 | 0.018744 | 9681.493492 | 0.722222 | 5 |
45 | 1 | xgboost | XGBRegressor | 2024-04-18 23:00:00 | 0.037328 | 22206.794887 | 0.666667 | 6 |
46 | 1 | xgboost | XGBRegressor | 2024-04-19 23:00:00 | 0.033654 | 18977.013015 | 0.791667 | 7 |
47 | 1 | xgboost | XGBRegressor | 2024-04-20 23:00:00 | 0.023604 | 12231.028438 | 0.611111 | 8 |
48 | 1 | xgboost | XGBRegressor | 2024-04-21 23:00:00 | 0.023305 | 11024.236889 | 0.708333 | 9 |
49 | 1 | xgboost | XGBRegressor | 2024-04-22 23:00:00 | 0.016647 | 9154.482548 | 0.833333 | 10 |
:::
import mlflow
= "ml_forecast"
experiment_name
= "file:///mlruns"
mlflow_path
= {"h": h,
tags "step_size": step_size,
"partitions": partitions,
"intervals_type": "ConformalIntervals",
"intervals_h": h,
"intervals_n_windows": n_windows,
"intervals_method": "conformal_distribution",
"levels": levels }
try:
= experiment_name,
mlflow.create_experiment(name = mlflow_path,
artifact_location= tags)
tags = mlflow.get_experiment_by_name(experiment_name)
meta print(f"Set a new experiment {experiment_name}")
print("Pulling the metadata")
except:
print(f"Experiment {experiment_name} exists, pulling the metadata")
= mlflow.get_experiment_by_name(experiment_name) meta
Experiment ml_forecast exists, pulling the metadata
= datetime.datetime.now().strftime("%Y-%m-%d %H-%M-%S")
run_time for index, row in score_df.iterrows():
= row["model_label"] + "-" + run_time
run_name with mlflow.start_run(experiment_id = meta.experiment_id,
= run_name,
run_name = {"type": "backtesting",
tags "partition": row["partition"],
"unique_id": row["unique_id"],
"model_label": row["model_label"],
"model_name": row["model_name"],
"run_name": run_name}) as run:
= ml_models[row["model_label"]].get_params()
model_params "model_name"] = row["model_name"]
model_params["model_label"] = row["model_label"]
model_params["partition"] = row["partition"]
model_params["lags"] = list(range(1, 24))
model_params["date_features"] = ["month", "day", "dayofweek","week", "hour"]
model_params[
mlflow.log_params(model_params)"mape", row["mape"])
mlflow.log_metric("rmse", row["rmse"])
mlflow.log_metric("coverage", row["coverage"]) mlflow.log_metric(