Lehmler, Stephan Johann; Saif-ur-Rehman, Muhammad; Glasmachers, Tobias; Iossifidis, Ioannis Distributional Properties of~ReLU-Activations in~Artificial Neural Networks That~Learn by~Memorization Proceedings Article In: Nicosia, Giuseppe; Ojha, Varun; Giesselbach, Sven; Pardalos, M. Panos; Umeton, Renato; Emanuele, La Malfa; Gabriele, La Malfa (Hrsg.): Machine Learning, Optimization, and Data Science, S. 410–423, Springer Nature Switzerland, Cham, 2026, ISBN: 978-3-032-21477-5. Abstract | Links | BibTeX | Schlagwörter: Artificial neural networks, BCI, Machine Learning, Memorization, Statistical Modeling2026

@inproceedings{lehmlerDistributionalPropertiesReLUActivations2026,
title = {Distributional Properties of~ReLU-Activations in~Artificial Neural Networks That~Learn by~Memorization},
author = {Stephan Johann Lehmler and Muhammad Saif-ur-Rehman and Tobias Glasmachers and Ioannis Iossifidis},
editor = {Giuseppe Nicosia and Varun Ojha and Sven Giesselbach and M. Panos Pardalos and Renato Umeton and La Malfa Emanuele and La Malfa Gabriele},
doi = {10.1007/978-3-032-21477-5_27},
isbn = {978-3-032-21477-5},
year = {2026},
date = {2026-06-01},
urldate = {2026-06-01},
booktitle = {Machine Learning, Optimization, and Data Science},
pages = {410–423},
publisher = {Springer Nature Switzerland},
address = {Cham},
abstract = {We investigate the distributional properties of layers in Artificial Neural Network (ANN) that can be used to distinguish between networks learning by generalization and memorizing networks. Starting from the notion of memorization being essentially definable as learning ‘rare’ features of the input data, we propose the activation probability of Rectified Linear Units (ReLU)-neurons as an important indicator of memorization. Building on this idea, we show how hypotheses about distributional properties over whole networks structures can be derived from the activation probability of singular neurons in memorizing ANNs. We such extend previous work on using Poisson process models of activations in ANN by considering the correlation between neurons. Using this approach, we further simulate the effect of memorizing neurons on distributional properties of weight matrices and activation magnitudes and find a connection between L1/L2-norm regularization of weight matrices. We empirically evaluate the distributions of activation rate, correlation structure and weight matrices in memorizing and generalizing ANNs on a simple MNIST-classification task. Our initial findings show how the activation frequency and intra-layer correlation structure can be used to distinguish generalizing from memorizing networks and for inferring distributional properties on affected parts of the networks. This work presents a building block to later derive online metrics for memorization in ANNs.},
keywords = {Artificial neural networks, BCI, Machine Learning, Memorization, Statistical Modeling},
pubstate = {published},
tppubtype = {inproceedings}
}