Skip to content

Utility methods

This page contains the utility methods for converting input data and predictions.

Functions

convert_input_data(window: EvaluationWindow, adapter: Literal['pandas', 'datasets', 'gluonts', 'nixtla', 'darts', 'autogluon'] = 'pandas', *, as_univariate: bool = False, univariate_target_column: str = 'target', **kwargs) -> Any

Convert the output of task.get_input_data() to a format compatible with popular forecasting frameworks.

Parameters:

Name Type Description Default
window EvaluationWindow

Evaluation window for which input data must be converted.

required
adapter ('pandas', 'datasets', 'gluonts', 'nixtla', 'darts', 'autogluon')

Format to which the dataset must be converted.

"pandas"
as_univariate bool

If True, the separate instances will be created from each target column before passing the data to the adapter. Covariate columns will not be affected, only targets will be modified.

Setting as_univariate=True makes it easy to evaluate a univariate model on a multivariate task.

Use fev.combine_univariate_predictions_to_multivariate to combine univariate predictions back to the multivariate format.

False
univariate_target_column str

Target column name used when as_univariate=True. Only used by the "datasets" adapter.

'target'
**kwargs

Keyword arguments passed to EvaluationWindow.get_input_data().

{}
Source code in src/fev/adapters.py
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
def convert_input_data(
    window: EvaluationWindow,
    adapter: Literal["pandas", "datasets", "gluonts", "nixtla", "darts", "autogluon"] = "pandas",
    *,
    as_univariate: bool = False,
    univariate_target_column: str = "target",
    **kwargs,
) -> Any:
    """Convert the output of `task.get_input_data()` to a format compatible with popular forecasting frameworks.

    Parameters
    ----------
    window
        Evaluation window for which input data must be converted.
    adapter : {"pandas", "datasets", "gluonts", "nixtla", "darts", "autogluon"}
        Format to which the dataset must be converted.
    as_univariate
        If `True`, the separate instances will be created from each target column before passing the data to the adapter.
        Covariate columns will not be affected, only targets will be modified.

        Setting `as_univariate=True` makes it easy to evaluate a univariate model on a multivariate task.

        Use [`fev.combine_univariate_predictions_to_multivariate`][fev.combine_univariate_predictions_to_multivariate] to combine univariate predictions back to the
        multivariate format.
    univariate_target_column
        Target column name used when `as_univariate=True`. Only used by the `"datasets"` adapter.
    **kwargs
        Keyword arguments passed to [`EvaluationWindow.get_input_data()`][fev.EvaluationWindow.get_input_data].
    """
    past, future = window.get_input_data(**kwargs)

    if as_univariate:
        # Raise error if column called `univariate_target_column` already exists and it's not the *only* target column
        if univariate_target_column in past.column_names and window.target_columns != [univariate_target_column]:
            raise ValueError(
                f"Column '{univariate_target_column}' already exists. Choose a different univariate_target_column."
            )
        target_columns = [univariate_target_column]
        if len(window.target_columns) > 1:
            # For multiple targets, we split each item into multiple instances (one per target column)
            past = utils.generate_univariate_targets_from_multivariate(
                past,
                id_column=window.id_column,
                new_target_column=univariate_target_column,
                generate_univariate_targets_from=window.target_columns,
            )
            # We cannot apply generate_univariate_targets_from_multivariate to future since it does not contain target cols,
            # so we just repeat each entry and insert the IDs from past, repeating entries as [0, 0, ..., 1, 1, ..., N -1, N - 1, ...]
            original_column_order = future.column_names
            future = future.select([i for i in range(len(future)) for _ in range(len(window.target_columns))])
            future = future.remove_columns(window.id_column).add_column(
                name=window.id_column, column=past[window.id_column]
            )
            future = future.select_columns(original_column_order)
        else:
            # For single target, we just rename the existing target to univariate_target_column
            if univariate_target_column not in past.column_names:
                past = past.rename_column(window.target_columns[0], univariate_target_column)
    else:
        target_columns = window.target_columns

    if adapter not in DATASET_ADAPTERS:
        raise KeyError(f"`adapter` must be one of {list(DATASET_ADAPTERS)}")
    adapter_cls = DATASET_ADAPTERS[adapter]

    return adapter_cls().convert_input_data(
        past=past,
        future=future,
        target_columns=target_columns,
        id_column=window.id_column,
        timestamp_column=window.timestamp_column,
        static_columns=window.static_columns,
    )

combine_univariate_predictions_to_multivariate(predictions: datasets.Dataset | list[dict] | datasets.DatasetDict | dict[str, list[dict]], target_columns: list[str]) -> datasets.DatasetDict

Combine univariate predictions back into multivariate format.

Assumes predictions are ordered by cycling through target columns. For example: if target_columns = ["X", "Y"], predictions should be ordered as [item1_X, item1_Y, item2_X, item2_Y, ...].

Parameters:

Name Type Description Default
predictions Dataset | list[dict] | DatasetDict | dict[str, list[dict]]

Univariate predictions for a single evaluation window.

For the list of accepted types, see Task.clean_and_validate_predictions.

required
target_columns list[str]

List of target columns in the original Task / EvaluationWindow.

required

Returns:

Type Description
DatasetDict

Predictions for the evaluation window converted to multivariate format.

Source code in src/fev/utils.py
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
def combine_univariate_predictions_to_multivariate(
    predictions: datasets.Dataset | list[dict] | datasets.DatasetDict | dict[str, list[dict]],
    target_columns: list[str],
) -> datasets.DatasetDict:
    """Combine univariate predictions back into multivariate format.

    Assumes predictions are ordered by cycling through target columns. For example: if `target_columns = ["X", "Y"]`,
    predictions should be ordered as `[item1_X, item1_Y, item2_X, item2_Y, ...]`.

    Parameters
    ----------
    predictions
        Univariate predictions for a single evaluation window.

        For the list of accepted types, see [`Task.clean_and_validate_predictions`][fev.Task.clean_and_validate_predictions].
    target_columns
        List of target columns in the original `Task` / `EvaluationWindow`.

    Returns
    -------
    datasets.DatasetDict
        Predictions for the evaluation window converted to multivariate format.
    """
    if isinstance(predictions, (dict, datasets.DatasetDict)):
        assert len(predictions) == 1, "Univariate predictions must contain a single key/value"
        predictions = next(iter(predictions.values()))
    if isinstance(predictions, list):
        try:
            predictions = datasets.Dataset.from_list(predictions)
        except Exception:
            raise ValueError(
                "`datasets.Dataset.from_list(predictions)` failed. Please convert predictions to `datasets.Dataset` format."
            )
    assert isinstance(predictions, datasets.Dataset), "predictions must be a datasets.Dataset object"
    assert len(predictions) % len(target_columns) == 0, (
        "Number of predictions must be divisible by the number of target columns"
    )
    table = predictions.data.table
    prediction_dict = {}
    for i, col in enumerate(target_columns):
        indices = list(range(i, len(predictions), len(target_columns)))
        prediction_dict[col] = datasets.Dataset(table.take(indices))
    return datasets.DatasetDict(prediction_dict)