## Save trained model as a binary file or as TSV so that you can inspect the embeddings e.g. with data.table::fread("wikipedia_embeddings.tsv")
starspace_save_model(model)
starspace_save_model(model, file="wikipedia_embeddings.bin")
starspace_save_model(model, file="wikipedia_embeddings.tsv", as_tsv=TRUE)
## Load a pre-trained modelmodel<- starspace_load_model("wikipedia_embeddings.bin")
## Get the document embedding
starspace_embedding(model, "The apps to predict / get nearest neighbours are still under construction.")
[,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10]
[1,] -0.42138230.4987145-0.08066317-0.6519815-0.17437250.094014960.026701850.2627260.17617050.04599866

The following functionalities do similar things. They see what is the closest word or sentence to a provided sentence.

## What is closest term from the dictionary
starspace_knn(model, "What does this bunch of text look like", k=10)
## What is closest sentence to vector of sentences
predict(model, newdata="what does this bunch of text look like",
basedoc= c("what does this bunch of text look like",
"word abracadabra was not part of the dictionary",
"give me back my mojo",
"cosine distance is what i show"))
## Get cosine distance between 2 sentence vectors
embedding_similarity(
starspace_embedding(model, "what does this bunch of text look like"),
starspace_embedding(model, "word abracadabra was not part of the dictionary"),
type="cosine")

Notes

Why did you call the package ruimtehol? Because that is the translation of StarSpace in WestVlaams.

The R wrapper is distributed under the Mozilla Public License 2.0. The package contains a copy of the StarSpace C++ code (namely all code under src/Starspace) which has a BSD license (which is available in file LICENSE.notes) and also has an accompanying PATENTS file which you can inspect here.

The package has only been tested on Windows, Ubuntu and Debian at this stage