@techreport{34ae1c36f1a64c478b04c24130ef5127,
title = "Unlocking the Potential of Federated Learning for Deeper Models.",
abstract = "Federated learning (FL) is a new paradigm for distributed machine learning that allows a global model to be trained across multiple clients without compromising their privacy. Although FL has demonstrated remarkable success in various scenarios, recent studies mainly utilize shallow and small neural networks. In our research, we discover a significant performance decline when applying the existing FL framework to deeper neural networks, even when client data are independently and identically distributed (i.i.d.). Our further investigation shows that the decline is due to the continuous accumulation of dissimilarities among client models during the layer-by-layer back-propagation process, which we refer to as {"}divergence accumulation.{"} As deeper models involve a longer chain of divergence accumulation, they tend to manifest greater divergence, subsequently leading to performance decline. Both theoretical derivations and empirical evidence are proposed to support the existence of divergence accumulation and its amplified effects in deeper models. To address this issue, we propose several technical guidelines based on reducing divergence, such as using wider models and reducing the receptive field. These approaches can greatly improve the accuracy of FL on deeper models. For example, the application of these guidelines can boost the ResNet101 model's performance by as much as 43\% on the Tiny-ImageNet dataset.",
author = "Haolin WANG and Xuefeng LIU and Jianwei NIU and Shaojie TANG and Jiaxing SHEN",
note = "DBLP License: DBLP's bibliographic metadata records provided through http://dblp.org/ are distributed under a Creative Commons CC0 1.0 Universal Public Domain Dedication. Although the bibliographic metadata records are provided consistent with CC0 1.0 Dedication, the content described by the metadata records is not. Content may be subject to copyright, rights of privacy, rights of publicity and other restrictions.",
year = "2023",
month = jun,
day = "5",
doi = "10.48550/arXiv.2306.02701",
language = "English",
volume = "abs/2306.02701",
series = "Computing Research Repository (CoRR) ",
publisher = "arXiv.org e-Print Archive",
type = "WorkingPaper",
institution = "arXiv.org e-Print Archive",
}