Abstract

Text classification has become very serious problem for big organization to manage the large amount of online data and has been extensively applied in the tasks of Natural Language Processing (NLP). Text classification can support users to excellently manage and exploit meaningful information require to be classified into various categories for further use. In order to best classify texts, our research efforts to develop a deep learning approach which obtains superior performance in text classification than other RNNs approaches. However, the main problem in text classification is how to enhance the classification accuracy and the sparsity of the data semantics sensitivity to context often hinders the classification performance of texts. In order to overcome the weakness, in this paper we proposed unified structure to investigate the effects of word embedding and Gated Recurrent Unit (GRU) for text classification on two benchmark datasets included (Google snippets and TREC). GRU is a well-known type of recurrent neural network (RNN), which is ability of computing sequential data over its recurrent architecture. Experimentally, the semantically connected words are commonly near to each other in embedding spaces. First, words in posts are changed into vectors via word embedding technique. Then, the words sequential in sentences are fed to GRU to extract the contextual semantics between words. The experimental results showed that proposed GRU model can effectively learn the word usage in context of texts provided training data. The quantity and quality of training data significantly affected the performance. We evaluated the performance of proposed approach with traditional recurrent approaches, RNN, MV-RNN and LSTM, the proposed approach is obtained better results on two benchmark datasets in the term of accuracy and error rate.

@article{JOIV289,
author = {Muhammad Zulqarnain and Rozaida Ghazali and Muhammad Ghouse and Muhammad Mushtaq},
title = {Efficient processing of GRU based on word embedding for text classification},
journal = {JOIV : International Journal on Informatics Visualization},
volume = {3},
number = {4},
year = {2019},
keywords = {RNN; GRU; LSTM; Word embedding; Text classification; Natural language processing;},
abstract = {Text classification has become very serious problem for big organization to manage the large amount of online data and has been extensively applied in the tasks of Natural Language Processing (NLP). Text classification can support users to excellently manage and exploit meaningful information require to be classified into various categories for further use. In order to best classify texts, our research efforts to develop a deep learning approach which obtains superior performance in text classification than other RNNs approaches. However, the main problem in text classification is how to enhance the classification accuracy and the sparsity of the data semantics sensitivity to context often hinders the classification performance of texts. In order to overcome the weakness, in this paper we proposed unified structure to investigate the effects of word embedding and Gated Recurrent Unit (GRU) for text classification on two benchmark datasets included (Google snippets and TREC). GRU is a well-known type of recurrent neural network (RNN), which is ability of computing sequential data over its recurrent architecture. Experimentally, the semantically connected words are commonly near to each other in embedding spaces. First, words in posts are changed into vectors via word embedding technique. Then, the words sequential in sentences are fed to GRU to extract the contextual semantics between words. The experimental results showed that proposed GRU model can effectively learn the word usage in context of texts provided training data. The quantity and quality of training data significantly affected the performance. We evaluated the performance of proposed approach with traditional recurrent approaches, RNN, MV-RNN and LSTM, the proposed approach is obtained better results on two benchmark datasets in the term of accuracy and error rate.},
issn = {2549-9904}, pages = {377--383}, doi = {10.30630/joiv.3.4.289},
url = {http://joiv.org/index.php/joiv/article/view/289}
}