Metrics
For the precise mathematical definitions of metrics, see AutoGluon documentation.
Note: Currently, multivariate metrics are computed by first computing the univariate metric on each target column
and then averaging the results, similar to the following:
metric_value = np.mean(
[metric.compute_metric(test_data[col], predictions[col])
for col in task.target_columns]
)
For some metrics like WAPE, this leads to results that are different from first concatenating all target columns into a
single array and computing the metric on it.
metrics
Classes
MAE
Bases: Metric
Mean absolute error.
Source code in src/fev/metrics.py
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72 | class MAE(Metric):
"""Mean absolute error."""
def compute(
self,
*,
test_data: datasets.Dataset,
predictions: datasets.Dataset,
past_data: datasets.Dataset,
seasonality: int,
quantile_levels: list[float],
target_column: str = "target",
):
y_test = np.array(test_data[target_column])
y_pred = np.array(predictions[PREDICTIONS])
return np.nanmean(np.abs(y_test - y_pred))
|
MAPE
Bases: Metric
Mean absolute percentage error.
Source code in src/fev/metrics.py
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218 | class MAPE(Metric):
"""Mean absolute percentage error."""
def compute(
self,
*,
test_data: datasets.Dataset,
predictions: datasets.Dataset,
past_data: datasets.Dataset,
seasonality: int,
quantile_levels: list[float],
target_column: str = "target",
):
y_test = np.array(test_data[target_column])
y_pred = np.array(predictions[PREDICTIONS])
ratio = np.abs(y_test - y_pred) / np.abs(y_test)
return self._safemean(ratio)
|
MASE
Bases: Metric
Mean absolute scaled error.
Source code in src/fev/metrics.py
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120 | class MASE(Metric):
"""Mean absolute scaled error."""
def __init__(self, epsilon: float = 0.0) -> None:
self.epsilon = epsilon
def compute(
self,
*,
test_data: datasets.Dataset,
predictions: datasets.Dataset,
past_data: datasets.Dataset,
seasonality: int,
quantile_levels: list[float],
target_column: str = "target",
):
y_test = np.array(test_data[target_column])
y_pred = np.array(predictions[PREDICTIONS])
seasonal_error = _abs_seasonal_error_per_item(
past_data=past_data, seasonality=seasonality, target_column=target_column
)
seasonal_error = np.clip(seasonal_error, self.epsilon, None)
return self._safemean(np.abs(y_test - y_pred) / seasonal_error[:, None])
|
MQL
Bases: Metric
Mean quantile loss.
Source code in src/fev/metrics.py
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262 | class MQL(Metric):
"""Mean quantile loss."""
needs_quantiles: bool = True
def compute(
self,
*,
test_data: datasets.Dataset,
predictions: datasets.Dataset,
past_data: datasets.Dataset,
seasonality: int,
quantile_levels: list[float],
target_column: str = "target",
):
if quantile_levels is None or len(quantile_levels) == 0:
raise ValueError(f"{self.__class__.__name__} cannot be computed if quantile_levels is None")
ql = _quantile_loss(
test_data=test_data,
predictions=predictions,
quantile_levels=quantile_levels,
target_column=target_column,
)
return np.nanmean(ql)
|
MSE
Bases: Metric
Mean squared error.
Source code in src/fev/metrics.py
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181 | class MSE(Metric):
"""Mean squared error."""
def compute(
self,
*,
test_data: datasets.Dataset,
predictions: datasets.Dataset,
past_data: datasets.Dataset,
seasonality: int,
quantile_levels: list[float],
target_column: str = "target",
):
y_test = np.array(test_data[target_column])
y_pred = np.array(predictions[PREDICTIONS])
return np.nanmean((y_test - y_pred) ** 2)
|
RMSE
Bases: Metric
Root mean squared error.
Source code in src/fev/metrics.py
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138 | class RMSE(Metric):
"""Root mean squared error."""
def compute(
self,
*,
test_data: datasets.Dataset,
predictions: datasets.Dataset,
past_data: datasets.Dataset,
seasonality: int,
quantile_levels: list[float],
target_column: str = "target",
):
y_test = np.array(test_data[target_column])
y_pred = np.array(predictions[PREDICTIONS])
return np.sqrt(np.nanmean((y_test - y_pred) ** 2))
|
RMSLE
Bases: Metric
Root mean squared logarithmic error.
Source code in src/fev/metrics.py
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199 | class RMSLE(Metric):
"""Root mean squared logarithmic error."""
def compute(
self,
*,
test_data: datasets.Dataset,
predictions: datasets.Dataset,
past_data: datasets.Dataset,
seasonality: int,
quantile_levels: list[float],
target_column: str = "target",
):
y_test = np.array(test_data[target_column])
y_pred = np.array(predictions[PREDICTIONS])
return np.sqrt(np.nanmean((np.log1p(y_test) - np.log1p(y_pred)) ** 2))
|
RMSSE
Bases: Metric
Root mean squared scaled error.
Source code in src/fev/metrics.py
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163 | class RMSSE(Metric):
"""Root mean squared scaled error."""
def __init__(self, epsilon: float = 0.0) -> None:
self.epsilon = epsilon
def compute(
self,
*,
test_data: datasets.Dataset,
predictions: datasets.Dataset,
past_data: datasets.Dataset,
seasonality: int,
quantile_levels: list[float],
target_column: str = "target",
):
y_test = np.array(test_data[target_column])
y_pred = np.array(predictions[PREDICTIONS])
seasonal_error = _squared_seasonal_error_per_item(
past_data, seasonality=seasonality, target_column=target_column
)
seasonal_error = np.clip(seasonal_error, self.epsilon, None)
return np.sqrt(self._safemean((y_test - y_pred) ** 2 / seasonal_error[:, None]))
|
SMAPE
Bases: Metric
Symmetric mean absolute percentage error.
Source code in src/fev/metrics.py
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236 | class SMAPE(Metric):
"""Symmetric mean absolute percentage error."""
def compute(
self,
*,
test_data: datasets.Dataset,
predictions: datasets.Dataset,
past_data: datasets.Dataset,
seasonality: int,
quantile_levels: list[float],
target_column: str = "target",
):
y_test = np.array(test_data[target_column])
y_pred = np.array(predictions[PREDICTIONS])
return self._safemean(2 * np.abs(y_test - y_pred) / (np.abs(y_test) + np.abs(y_pred)))
|
SQL
Bases: Metric
Scaled quantile loss.
Source code in src/fev/metrics.py
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294 | class SQL(Metric):
"""Scaled quantile loss."""
needs_quantiles: bool = True
def __init__(self, epsilon: float = 0.0) -> None:
self.epsilon = epsilon
def compute(
self,
*,
test_data: datasets.Dataset,
predictions: datasets.Dataset,
past_data: datasets.Dataset,
seasonality: int,
quantile_levels: list[float],
target_column: str = "target",
):
ql = _quantile_loss(
test_data=test_data,
predictions=predictions,
quantile_levels=quantile_levels,
target_column=target_column,
)
ql_per_time_step = np.nanmean(ql, axis=2) # [num_items, horizon]
seasonal_error = _abs_seasonal_error_per_item(
past_data=past_data, seasonality=seasonality, target_column=target_column
)
seasonal_error = np.clip(seasonal_error, self.epsilon, None)
return self._safemean(ql_per_time_step / seasonal_error[:, None])
|
WAPE
Bases: Metric
Weighted absolute percentage error.
Source code in src/fev/metrics.py
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94 | class WAPE(Metric):
"""Weighted absolute percentage error."""
def __init__(self, epsilon: float = 0.0) -> None:
self.epsilon = epsilon
def compute(
self,
*,
test_data: datasets.Dataset,
predictions: datasets.Dataset,
past_data: datasets.Dataset,
seasonality: int,
quantile_levels: list[float],
target_column: str = "target",
):
y_test = np.array(test_data[target_column])
y_pred = np.array(predictions[PREDICTIONS])
return np.nanmean(np.abs(y_test - y_pred)) / max(self.epsilon, np.nanmean(np.abs(y_test)))
|
WQL
Bases: Metric
Weighted quantile loss.
Source code in src/fev/metrics.py
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321 | class WQL(Metric):
"""Weighted quantile loss."""
needs_quantiles: bool = True
def __init__(self, epsilon: float = 0.0) -> None:
self.epsilon = epsilon
def compute(
self,
*,
test_data: datasets.Dataset,
predictions: datasets.Dataset,
past_data: datasets.Dataset,
seasonality: int,
quantile_levels: list[float],
target_column: str = "target",
):
ql = _quantile_loss(
test_data=test_data,
predictions=predictions,
quantile_levels=quantile_levels,
target_column=target_column,
)
return np.nanmean(ql) / max(self.epsilon, np.nanmean(np.abs(np.array(test_data[target_column]))))
|
Functions
get_metric(metric: MetricConfig) -> Metric
Get a metric class by name or configuration.
Source code in src/fev/metrics.py
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54 | def get_metric(metric: MetricConfig) -> Metric:
"""Get a metric class by name or configuration."""
metric_name = metric if isinstance(metric, str) else metric["name"]
try:
metric_type = AVAILABLE_METRICS[metric_name.upper()]
except KeyError:
raise ValueError(
f"Evaluation metric '{metric_name}' is not available. Available metrics: {sorted(AVAILABLE_METRICS)}"
)
if isinstance(metric, str):
return metric_type()
elif isinstance(metric, dict):
return metric_type(**{k: v for k, v in metric.items() if k != "name"})
else:
raise ValueError(f"Invalid metric configuration: {metric}")
|