@inproceedings{37eddba2cc17484182cdd1078e6ab520,
title = "Discretization of continuous-valued attributes in decision tree generation",
abstract = "Decision tree is one of the most popular and widely used classification models in machine learning. The discretization of continuous-valued attributes plays an important role in decision tree generation. In this paper, we improve Fayyad's discretization method which uses the average class entropy of candidate partitions to select boundaries for discretization. Our method can reduce the number of candidate boundaries further. Here we also propose a generalized splitting criterion for cut point selection and prove that the cut points are always on boundaries when using this criterion. Along with the formal proof, we present empirical results that the decision trees generated by using such criteria are similar on several datasets from the UCI Machine Learning Repository.",
keywords = "Continuous-valued, Decision tree, Discretization, Splitting criterion",
author = "Wen-Liagn LI and Rui-Hua YU and Xi-Zhao WANG",
note = "This research is supported by the national natural science foundation of China (60903088, 60903089), by the natural science foundation of Hebei Province (F2009000227, F2008000635), by the key project foundation of applied fundamental research of Hebei Province (08963522D). ; 2010 International Conference on Machine Learning and Cybernetics, ICMLC 2010 ; Conference date: 11-07-2010 Through 14-07-2010",
year = "2010",
doi = "10.1109/ICMLC.2010.5581069",
language = "English",
isbn = "9781424465262",
series = "International Conference on Machine Learning and Cybernetics (ICMLC)",
publisher = "IEEE",
pages = "194--198",
booktitle = "Proceedings : 2010 International Conference on Machine Learning and Cybernetics, ICMLC 2010",
}