@inproceedings{stan_IS13b,
title = {TUNDRA: A Multilingual Corpus of Found Data for TTS Research Created with Light Supervision},
author = {Adriana Stan and Oliver Watts and Yoshitaka Mamiya and Mircea Giurgiu and Rob Clark and Junichi Yamagishi and Simon King},
url = {http://consortium.simple4all.org/files/2013/03/master1.pdf},
year = {2013},
date = {2013-08-24},
booktitle = {Proc Interspeech 2013},
abstract = {Simple4All Tundra (version 1.0) is the first release of a standardised multilingual corpus designed for text-to-speech re- search with imperfect or found data. The corpus consists of approximately 60 hours of speech data from audiobooks in 14 languages, as well as utterance-level alignments obtained with a lightly-supervised process. Future versions of the corpus will include finer-grained alignment and prosodic annotation, all of which will be made freely available. This paper gives a gen- eral outline of the data collected so far, as well as a detailed description of how this has been done, emphasizing the mini- mal language-specific knowledge and manual intervention used to compile the corpus. To demonstrate its potential use, text- to-speech systems have been built for all languages using unsu- pervised or lightly supervised methods, also briefly presented in the paper.},
keywords = {audiobook data, found data, imperfect data, light supervision, multilingual corpus, text-to-speech}
}

Simple4All Tundra (version 1.0) is the first release of a standardised multilingual corpus designed for text-to-speech re- search with imperfect or found data. The corpus consists of approximately 60 hours of speech data from audiobooks in 14 languages, as well as utterance-level alignments obtained with a lightly-supervised process. Future versions of the corpus will include finer-grained alignment and prosodic annotation, all of which will be made freely available. This paper gives a gen- eral outline of the data collected so far, as well as a detailed description of how this has been done, emphasizing the mini- mal language-specific knowledge and manual intervention used to compile the corpus. To demonstrate its potential use, text- to-speech systems have been built for all languages using unsu- pervised or lightly supervised methods, also briefly presented in the paper.