Lehmler, Stephan Johann; Saif-ur-Rehman, Muhammad; Glasmachers, Tobias; Iossifidis, Ioannis Distributional Properties of~ReLU-Activations in~Artificial Neural Networks That~Learn by~Memorization Proceedings Article In: Nicosia, Giuseppe; Ojha, Varun; Giesselbach, Sven; Pardalos, M. Panos; Umeton, Renato; Emanuele, La Malfa; Gabriele, La Malfa (Hrsg.): Machine Learning, Optimization, and Data Science, S. 410–423, Springer Nature Switzerland, Cham, 2026, ISBN: 978-3-032-21477-5. Abstract | Links | BibTeX | Schlagwörter: Artificial neural networks, BCI, Machine Learning, Memorization, Statistical Modeling Lehmler, Stephan Johann; Saif-ur-Rehman, Muhammad; Glasmachers, Tobias; Iossifidis, Ioannis In: Neurocomputing, S. 128473, 2024, ISSN: 0925-2312. Abstract | Links | BibTeX | Schlagwörter: Artificial neural networks, Generalization, Machine Learning, Memorization, Poisson process, Stochastic modeling2026

@inproceedings{lehmlerDistributionalPropertiesReLUActivations2026,
title = {Distributional Properties of~ReLU-Activations in~Artificial Neural Networks That~Learn by~Memorization},
author = {Stephan Johann Lehmler and Muhammad Saif-ur-Rehman and Tobias Glasmachers and Ioannis Iossifidis},
editor = {Giuseppe Nicosia and Varun Ojha and Sven Giesselbach and M. Panos Pardalos and Renato Umeton and La Malfa Emanuele and La Malfa Gabriele},
doi = {10.1007/978-3-032-21477-5_27},
isbn = {978-3-032-21477-5},
year = {2026},
date = {2026-06-01},
urldate = {2026-06-01},
booktitle = {Machine Learning, Optimization, and Data Science},
pages = {410–423},
publisher = {Springer Nature Switzerland},
address = {Cham},
abstract = {We investigate the distributional properties of layers in Artificial Neural Network (ANN) that can be used to distinguish between networks learning by generalization and memorizing networks. Starting from the notion of memorization being essentially definable as learning ‘rare’ features of the input data, we propose the activation probability of Rectified Linear Units (ReLU)-neurons as an important indicator of memorization. Building on this idea, we show how hypotheses about distributional properties over whole networks structures can be derived from the activation probability of singular neurons in memorizing ANNs. We such extend previous work on using Poisson process models of activations in ANN by considering the correlation between neurons. Using this approach, we further simulate the effect of memorizing neurons on distributional properties of weight matrices and activation magnitudes and find a connection between L1/L2-norm regularization of weight matrices. We empirically evaluate the distributions of activation rate, correlation structure and weight matrices in memorizing and generalizing ANNs on a simple MNIST-classification task. Our initial findings show how the activation frequency and intra-layer correlation structure can be used to distinguish generalizing from memorizing networks and for inferring distributional properties on affected parts of the networks. This work presents a building block to later derive online metrics for memorization in ANNs.},
keywords = {Artificial neural networks, BCI, Machine Learning, Memorization, Statistical Modeling},
pubstate = {published},
tppubtype = {inproceedings}
}
2024
@article{lehmlerUnderstandingActivationPatterns2024,
title = {Understanding Activation Patterns in Artificial Neural Networks by Exploring Stochastic Processes: Discriminating Generalization from Memorization},
author = {Stephan Johann Lehmler and Muhammad Saif-ur-Rehman and Tobias Glasmachers and Ioannis Iossifidis},
editor = {Elsevier},
url = {https://www.sciencedirect.com/science/article/pii/S092523122401244X},
doi = {10.1016/j.neucom.2024.128473},
issn = {0925-2312},
year = {2024},
date = {2024-09-19},
urldate = {2024-09-19},
journal = {Neurocomputing},
pages = {128473},
abstract = {To gain a deeper understanding of the behavior and learning dynamics of artificial neural networks, mathematical abstractions and models are valuable. They provide a simplified perspective and facilitate systematic investigations. In this paper, we propose to analyze dynamics of artificial neural activation using stochastic processes, which have not been utilized for this purpose thus far. Our approach involves modeling the activation patterns of nodes in artificial neural networks as stochastic processes. By focusing on the activation frequency, we can leverage techniques used in neuroscience to study neural spike trains. Specifically, we extract the activity of individual artificial neurons during a classification task and model their activation frequency. The underlying process model is an arrival process following a Poisson distribution.We examine the theoretical fit of the observed data generated by various artificial neural networks in image recognition tasks to the proposed model’s key assumptions. Through the stochastic process model, we derive measures describing activation patterns of each network. We analyze randomly initialized, generalizing, and memorizing networks, allowing us to identify consistent differences in learning methods across multiple architectures and training sets. We calculate features describing the distribution of Activation Rate and Fano Factor, which prove to be stable indicators of memorization during learning. These calculated features offer valuable insights into network behavior. The proposed model demonstrates promising results in describing activation patterns and could serve as a general framework for future investigations. It has potential applications in theoretical simulation studies as well as practical areas such as pruning or transfer learning.},
keywords = {Artificial neural networks, Generalization, Machine Learning, Memorization, Poisson process, Stochastic modeling},
pubstate = {published},
tppubtype = {article}
}
