Grün, Felix; Saif-ur-Rehman, Muhammad; Glasmachers, Tobias; Iossifidis, Ioannis Invariance to~Quantile Selection in~Distributional Continuous Control Proceedings Article In: Nicosia, Giuseppe; Ojha, Varun; Giesselbach, Sven; Pardalos, M. Panos; Umeton, Renato; Emanuele, La Malfa; Gabriele, La Malfa (Hrsg.): Machine Learning, Optimization, and Data Science, S. 175–190, Springer Nature Switzerland, Cham, 2026, ISBN: 978-3-032-21477-5. Abstract | Links | BibTeX | Schlagwörter: Actor-critic, BCI, Continuous control, Distributional reinforcement learning, Machine Learning, Quantile regression, reinforcement learning (RL)2026
@inproceedings{grunInvarianceQuantileSelection2026,
title = {Invariance to~Quantile Selection in~Distributional Continuous Control},
author = {Felix Grün and Muhammad Saif-ur-Rehman and Tobias Glasmachers and Ioannis Iossifidis},
editor = {Giuseppe Nicosia and Varun Ojha and Sven Giesselbach and M. Panos Pardalos and Renato Umeton and La Malfa Emanuele and La Malfa Gabriele},
doi = {10.1007/978-3-032-21477-5_12},
isbn = {978-3-032-21477-5},
year = {2026},
date = {2026-06-01},
urldate = {2026-06-01},
booktitle = {Machine Learning, Optimization, and Data Science},
pages = {175–190},
publisher = {Springer Nature Switzerland},
address = {Cham},
abstract = {In recent years, distributional reinforcement learning has produced many state-of-the-art results in typical reinforcement learning benchmarks, such as the suite of Atari games. Increasingly sample-efficient distributional algorithms for the discrete action domain have been developed over time, which vary primarily in the way they parameterize their approximations of value distributions, and how they quantify the differences between those distributions. In this work, we transfer three of those algorithms - Quantile Regression Deep Q-Network (QR-DQN), Implicit Quantile Networks (IQN) and Fully Parameterized Quantile Function (FQF) - to the continuous action domain by extending two powerful actor-critic algorithms - Twin Delayed Deep Deterministic policy gradient (TD3) and Soft Actor-Critic (SAC) - with distributional critics. We investigate whether the relative performance of the methods for the discrete action space translates to the continuous case. To that end, we compare them empirically on a set of continuous control tasks (Ant, HalfCheetah, Hopper, Humanoid and Walker2D). Our results indicate qualitative invariance regarding the number and placement of distributional atoms in the deterministic, continuous action setting.},
keywords = {Actor-critic, BCI, Continuous control, Distributional reinforcement learning, Machine Learning, Quantile regression, reinforcement learning (RL)},
pubstate = {published},
tppubtype = {inproceedings}
}
