@PhdThesis{Heaton:thesis,
author = "Jeff Heaton",
title = "Automated Feature Engineering for Deep Neural Networks
with Genetic Programming",
school = "Computer Science, Nova Southeastern University",
year = "2017",
address = "Florida, USA",
keywords = "genetic algorithms, genetic programming, Applied
sciences, Deep neural network, Feature engineering,
Artificial intelligence, Computer science",
isbn13 = "9781369660012",
language = "English",
URL = "https://search.proquest.com/docview/1889190846?accountid=14511",
URL = "https://search.proquest.com/docview/1889190846?pq-origsite=gscholar",
URL = "http://nsuworks.nova.edu/gscis_etd/994/",
URL = "https://www.researchgate.net/publication/316285310_Automated_Feature_Engineering_for_Deep_Neural_Networks_with_Genetic_Programming",
abstract = "Feature engineering is a process that augments the
feature vector of a machine learning model with
calculated values that are designed to enhance the
accuracy of a model's predictions. Research has shown
that the accuracy of models such as deep neural
networks, support vector machines, and
tree/forest-based algorithms sometimes benefit from
feature engineering. Expressions that combine one or
more of the original features usually create these
engineered features. The choice of the exact structure
of an engineered feature is dependent on the type of
machine learning model in use. Previous research
demonstrated that various model families benefit from
different types of engineered feature. Random forests,
gradient-boosting machines, or other tree-based models
might not see the same accuracy gain that an engineered
feature allowed neural networks, generalized linear
models, or other dot-product based models to achieve on
the same data set.
This dissertation presents a genetic programming-based
algorithm that automatically engineers features that
increase the accuracy of deep neural networks for some
data sets. For a genetic programming algorithm to be
effective, it must prioritize the search space and
efficiently evaluate what it finds. This dissertation
algorithm faced a potential search space composed of
all possible mathematical combinations of the original
feature vector. Five experiments were designed to guide
the search process to efficiently evolve good
engineered features. The result of this dissertation is
an automated feature engineering (AFE) algorithm that
is computationally efficient, even though a neural
network is used to evaluate each candidate feature.
This approach gave the algorithm a greater opportunity
to specifically target deep neural networks in its
search for engineered features that improve accuracy.
Finally, a sixth experiment empirically demonstrated
the degree to which this algorithm improved the
accuracy of neural networks on data sets augmented by
the algorithm's engineered features.",
notes = "
ProQuest Dissertations Publishing, 2017.
10259604.
Supervisor James D. Cannady School code 1191",
}