Skip to content

Utility methods

This page contains the utility methods for converting input data and predictions.

Functions

convert_input_data(window: EvaluationWindow, adapter: Literal['pandas', 'datasets', 'gluonts', 'nixtla', 'darts', 'autogluon'] = 'pandas', *, as_univariate: bool = False, univariate_target_column: str = 'target', **kwargs) -> Any

Convert the output of task.get_input_data() to a format compatible with popular forecasting frameworks.

Parameters:

Name Type Description Default
window EvaluationWindow

Evaluation window for which input data must be converted.

required
adapter ('pandas', 'datasets', 'gluonts', 'nixtla', 'darts', 'autogluon')

Format to which the dataset must be converted.

"pandas"
as_univariate bool

If True, the separate instances will be created from each target column before passing the data to the adapter. Covariate columns will not be affected, only targets will be modified.

Setting as_univariate=True makes it easy to evaluate a univariate model on a multivariate task.

Use fev.combine_univariate_predictions_to_multivariate to combine univariate predictions back to the multivariate format.

False
univariate_target_column str

Target column name used when as_univariate=True. Only used by the "datasets" adapter.

'target'
**kwargs

Keyword arguments passed to EvaluationWindow.get_input_data().

{}
Source code in src/fev/adapters.py
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
def convert_input_data(
    window: EvaluationWindow,
    adapter: Literal["pandas", "datasets", "gluonts", "nixtla", "darts", "autogluon"] = "pandas",
    *,
    as_univariate: bool = False,
    univariate_target_column: str = "target",
    **kwargs,
) -> Any:
    """Convert the output of `task.get_input_data()` to a format compatible with popular forecasting frameworks.

    Parameters
    ----------
    window
        Evaluation window for which input data must be converted.
    adapter : {"pandas", "datasets", "gluonts", "nixtla", "darts", "autogluon"}
        Format to which the dataset must be converted.
    as_univariate
        If `True`, the separate instances will be created from each target column before passing the data to the adapter.
        Covariate columns will not be affected, only targets will be modified.

        Setting `as_univariate=True` makes it easy to evaluate a univariate model on a multivariate task.

        Use [`fev.combine_univariate_predictions_to_multivariate`][fev.combine_univariate_predictions_to_multivariate] to combine univariate predictions back to the
        multivariate format.
    univariate_target_column
        Target column name used when `as_univariate=True`. Only used by the `"datasets"` adapter.
    **kwargs
        Keyword arguments passed to [`EvaluationWindow.get_input_data()`][fev.EvaluationWindow.get_input_data].
    """
    past, future = window.get_input_data(**kwargs)

    if as_univariate:
        # Raise error if column called `univariate_target_column` already exists and it's not the *only* target column
        if univariate_target_column in past.column_names and window.target_columns != [univariate_target_column]:
            raise ValueError(
                f"Column '{univariate_target_column}' already exists. Choose a different univariate_target_column."
            )
        target_columns = [univariate_target_column]
        if len(window.target_columns) > 1:
            # For multiple targets, we split each item into multiple instances (one per target column)
            past = utils.generate_univariate_targets_from_multivariate(
                past,
                id_column=window.id_column,
                new_target_column=univariate_target_column,
                generate_univariate_targets_from=window.target_columns,
            )
            # We cannot apply generate_univariate_targets_from_multivariate to future since it does not contain target cols,
            # so we just repeat each entry and insert the IDs from past, repeating entries as [0, 0, ..., 1, 1, ..., N -1, N - 1, ...]
            original_column_order = future.column_names
            future = future.select([i for i in range(len(future)) for _ in range(len(window.target_columns))])
            future = future.remove_columns(window.id_column).add_column(
                name=window.id_column, column=past[window.id_column]
            )
            future = future.select_columns(original_column_order)
        else:
            # For single target, we just rename the existing target to univariate_target_column
            if univariate_target_column not in past.column_names:
                past = past.rename_column(window.target_columns[0], univariate_target_column)
    else:
        target_columns = window.target_columns

    if adapter not in DATASET_ADAPTERS:
        raise KeyError(f"`adapter` must be one of {list(DATASET_ADAPTERS)}")
    adapter_cls = DATASET_ADAPTERS[adapter]

    return adapter_cls().convert_input_data(
        past=past,
        future=future,
        target_columns=target_columns,
        id_column=window.id_column,
        timestamp_column=window.timestamp_column,
        static_columns=window.static_columns,
    )

combine_univariate_predictions_to_multivariate(predictions: datasets.Dataset | list[dict] | datasets.DatasetDict | dict[str, list[dict]], target_columns: list[str]) -> datasets.DatasetDict

Combine univariate predictions back into multivariate format.

Assumes predictions are ordered by cycling through target columns. For example: if target_columns = ["X", "Y"], predictions should be ordered as [item1_X, item1_Y, item2_X, item2_Y, ...].

Parameters:

Name Type Description Default
predictions Dataset | list[dict] | DatasetDict | dict[str, list[dict]]

Univariate predictions for a single evaluation window.

For the list of accepted types, see Task.clean_and_validate_predictions.

required
target_columns list[str]

List of target columns in the original Task / EvaluationWindow.

required

Returns:

Type Description
DatasetDict

Predictions for the evaluation window converted to multivariate format.

Source code in src/fev/utils.py
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
def combine_univariate_predictions_to_multivariate(
    predictions: datasets.Dataset | list[dict] | datasets.DatasetDict | dict[str, list[dict]],
    target_columns: list[str],
) -> datasets.DatasetDict:
    """Combine univariate predictions back into multivariate format.

    Assumes predictions are ordered by cycling through target columns. For example: if `target_columns = ["X", "Y"]`,
    predictions should be ordered as `[item1_X, item1_Y, item2_X, item2_Y, ...]`.

    Parameters
    ----------
    predictions
        Univariate predictions for a single evaluation window.

        For the list of accepted types, see [`Task.clean_and_validate_predictions`][fev.Task.clean_and_validate_predictions].
    target_columns
        List of target columns in the original `Task` / `EvaluationWindow`.

    Returns
    -------
    datasets.DatasetDict
        Predictions for the evaluation window converted to multivariate format.
    """
    if isinstance(predictions, (dict, datasets.DatasetDict)):
        assert len(predictions) == 1, "Univariate predictions must contain a single key/value"
        predictions = next(iter(predictions.values()))
    if isinstance(predictions, list):
        try:
            predictions = datasets.Dataset.from_list(predictions)
        except Exception:
            raise ValueError(
                "`datasets.Dataset.from_list(predictions)` failed. Please convert predictions to `datasets.Dataset` format."
            )
    assert isinstance(predictions, datasets.Dataset), "predictions must be a datasets.Dataset object"
    assert len(predictions) % len(target_columns) == 0, (
        "Number of predictions must be divisible by the number of target columns"
    )
    prediction_dict = {}
    for i, col in enumerate(target_columns):
        prediction_dict[col] = predictions.select(range(i, len(predictions), len(target_columns)))
    return datasets.DatasetDict(prediction_dict)