@InProceedings{Azad:2014:NaBIC,
author = "R. Muhammad Atif Azad and David Medernach and
Conor Ryan",
title = "Efficient Approaches to Interleaved Sampling of
training data for Symbolic Regression",
booktitle = "Sixth World Congress on Nature and Biologically
Inspired Computing",
year = "2014",
editor = "Ana Maria Madureira and Ajith Abraham and
Emilio Corchado and Leonilde Varela and Azah Kamilah Muda and
Choo yun Huoy",
pages = "176--183",
address = "Porto, Portugal",
month = "30 " # jul # " - 1 " # jul,
publisher = "IEEE",
keywords = "genetic algorithms, genetic programming",
isbn13 = "978-1-4799-5937-2/14",
DOI = "doi:10.1109/NaBIC.2014.6921874",
abstract = "The ability to generalise beyond the training set is
paramount for any machine learning algorithm and
Genetic Programming (GP) is no exception. This paper
investigates a recently proposed technique to improve
generalisation in GP, termed Interleaved Sampling where
GP alternates between using the entire data set and
only a single data point in alternate generations. This
paper proposes two alternatives to using a single data
point: the use of random search instead of a single
data point, and simply minimising the tree size. Both
the approaches are more efficient than the original
Interleaved Sampling because they simply do not
evaluate the fitness in half the number of generations.
The results show that in terms of generalisation,
random search and size minimisation are as effective as
the original Interleaved Sampling; however, they are
computationally more efficient in terms of data
processing. Size minimisation is particularly
interesting because it completely prevents bloat while
still being competitive in terms of training results as
well as generalisation. The tree sizes with size
minimisation are substantially smaller reducing the
computational expense substantially.",
notes = "NaBIC 2014 http://www.mirlabs.net/nabic14/",
}