# In this example the k-means clustering method is used to cluster a given toy
# data set. In k-means clustering one tries to partition n observations into k
# clusters in which each observation belongs to the cluster with the nearest mean.
# The algorithm class constructor takes the number of clusters and a distance to
# be used as input. The distance used in this example is Euclidean distance.
# After training one can fetch the result of clustering by obtaining the cluster
# centers and their radiuses.
library("sg")
fm_train <- as.matrix(read.table('../data/fm_train_real.dat'))
# KMEANS
print('KMeans')
k <- 3
iter <- 1000
dump <- sg('set_distance', 'EUCLIDIAN', 'REAL')
dump <- sg('set_features', 'TRAIN', fm_train)
dump <- sg('new_clustering', 'KMEANS')
dump <- sg('train_clustering', k, iter)
result <- sg('get_clustering')
radi <- result[[1]]
centers <- result[[2]]

Distance

../examples/documented/r_static/distance_braycurtis.R

# An approach as applied below, which shows the processing of input data
# from a file becomes a crucial factor for writing your own sample applications.
# This approach is just one example of what can be done using the distance
# functions provided by shogun.
#
# First, you need to determine what type your data will be, because this
# will determine the distance function you can use.
#
# This example loads two stored matrices of real values (feature type 'REAL')
# from different files and initializes the distance to 'BRAYCURTIS'.
# Each column of the matrices corresponds to one data point.
#
# The target 'TRAIN' for 'set_features' controls the processing of the given
# data points, where a pairwise distance matrix is computed by
# 'get_distance_matrix'.
#
# The resulting distance matrix can be reaccessed by 'get_distance_matrix' and
# target 'TRAIN'.
#
# The target 'TEST' for 'set_features' controls the processing of the given
# data points 'TRAIN' and 'TEST', where a pairwise distance matrix between
# these two matrices is computed by 'get_distance_matrix'.
#
# The resulting distance matrix can be reaccessed by 'get_distance_matrix'
# and target 'TEST'. The 'TRAIN' distance matrix ceased to exist.
#
# For more details see doc/classshogun_1_1CBrayCurtisDistance.html.
#
# Obviously, using the Bray Curtis distance is not limited to this showcase
# example.
library("sg")
fm_train_real <- t(as.matrix(read.table('../data/fm_train_real.dat')))
fm_test_real <- t(as.matrix(read.table('../data/fm_test_real.dat')))
# BrayCurtis Distance
print('BrayCurtisDistance')
dump <- sg('set_distance', 'BRAYCURTIS', 'REAL')
dump <- sg('set_features', 'TRAIN', fm_train_real)
dm <- sg('get_distance_matrix', 'TRAIN')
dump <- sg('set_features', 'TEST', fm_test_real)
dm <- sg('get_distance_matrix', 'TEST')

../examples/documented/r_static/distance_canberra.R

# An approach as applied below, which shows the processing of input data
# from a file becomes a crucial factor for writing your own sample applications.
# This approach is just one example of what can be done using the distance
# functions provided by shogun.
#
# First, you need to determine what type your data will be, because this
# will determine the distance function you can use.
#
# This example loads two stored matrices of real values (feature type 'REAL')
# from different files and initializes the distance to 'CANBERRA'.
# Each column of the matrices corresponds to one data point.
#
# The target 'TRAIN' for 'set_features' controls the processing of the given
# data points, where a pairwise distance (dissimilarity ratio) matrix is
# computed by 'get_distance_matrix'.
#
# The resulting distance matrix can be reaccessed by 'get_distance_matrix'
# and target 'TRAIN'.
#
# The target 'TEST' for 'set_features' controls the processing of the given
# data points 'TRAIN' and 'TEST', where a pairwise distance (dissimilarity ratio)
# matrix between these two data sets is computed by 'get_distance_matrix'.
#
# The resulting distance matrix can be reaccessed by 'get_distance_matrix' and
# target 'TEST'. The 'TRAIN' distance matrix ceased to exist.
#
# For more details see doc/classshogun_1_1CCanberraMetric.html.
#
# Obviously, using the Canberra distance is not limited to this showcase
# example.
library("sg")
fm_train_real <- t(as.matrix(read.table('../data/fm_train_real.dat')))
fm_test_real <- t(as.matrix(read.table('../data/fm_test_real.dat')))
# Canberra Metric
print('CanberraMetric')
dump <- sg('set_distance', 'CANBERRA', 'REAL')
dump <- sg('set_features', 'TRAIN', fm_train_real)
dm <- sg('get_distance_matrix', 'TRAIN')
dump <- sg('set_features', 'TEST', fm_test_real)
dm <- sg('get_distance_matrix', 'TEST')

../examples/documented/r_static/distance_canberraword.R

# An approach as applied below, which shows the processing of input data
# from a file becomes a crucial factor for writing your own sample applications.
# This approach is just one example of what can be done using the distance
# functions provided by shogun.
#
# First, you need to determine what type your data will be, because this
# will determine the distance function you can use.
#
# This example loads two stored data sets in 'STRING' representation
# (feature type 'CHAR' with alphabet 'DNA') from different files and
# initializes the distance to 'CANBERRA' with feature type 'WORD'.
#
# Data points in this example are defined by the transformation function
# 'convert' and the preprocessing step applied afterwards (defined by
# 'add_preproc' and preprocessor 'SORTWORDSTRING').
#
# The target 'TRAIN' for 'set_features' controls the binding of the given
# data points. In order to compute a pairwise distance matrix by
# 'get_distance_matrix', we have to perform two preprocessing steps for
# input data 'TRAIN'. The method 'convert' transforms the input data to
# a string representation suitable for the selected distance. The individual
# strings are sorted in ascending order after the execution of 'attach_preproc'.
# A pairwise distance matrix is computed by 'get_distance_matrix'.
#
# The resulting distance matrix can be reaccessed by 'get_distance_matrix'
# and target 'TRAIN'.
#
# The target 'TEST' for 'set_features' controls the binding of the given
# data points 'TRAIN' and 'TEST'. In order to compute a pairwise distance
# matrix between these two data sets by 'get_distance_matrix', we have to
# perform two preprocessing steps for input data 'TEST'. The method 'convert'
# transforms the input data 'TEST' to a string representation suitable for
# the selected distance. The individual strings are sorted in ascending order
# after the execution of 'attach_preproc'. A pairwise distance matrix between
# the data sets 'TRAIN' and 'TEST' is computed by 'get_distance_matrix'.
#
# The resulting distance matrix can be reaccessed by 'get_distance_matrix'
# and target 'TEST'. The 'TRAIN' distance matrix ceased to exist.
#
# For more details see
# doc/classshogun_1_1CSortWordString.html,
# doc/classshogun_1_1CPreprocessor.html,
# doc/classshogun_1_1CStringFeatures.html (method obtain_from_char_features) and
# doc/classshogun_1_1CCanberraWordDistance.html.
#
# Obviously, using the Canberra word distance is not limited to this showcase
# example.
library("sg")
fm_train_dna <- as.matrix(read.table('../data/fm_train_dna.dat'))
fm_test_dna <- as.matrix(read.table('../data/fm_test_dna.dat'))
order <- 3
gap <- 0
reverse <- 'n'
# Canberra Word Distance
print('CanberraWordDistance')
dump <- sg('set_distance', 'CANBERRA', 'WORD')
dump <- sg('add_preproc', 'SORTWORDSTRING')
dump <- sg('set_features', 'TRAIN', fm_train_dna, 'DNA')
dump <- sg('convert', 'TRAIN', 'STRING', 'CHAR', 'STRING', 'WORD', order, order-1, gap, reverse)
dump <- sg('attach_preproc', 'TRAIN')
dm <- sg('get_distance_matrix', 'TRAIN')
dump <- sg('set_features', 'TEST', fm_test_dna, 'DNA')
dump <- sg('convert', 'TEST', 'STRING', 'CHAR', 'STRING', 'WORD', order, order-1, gap, reverse)
dump <- sg('attach_preproc', 'TEST')
dm <- sg('get_distance_matrix', 'TEST')

../examples/documented/r_static/distance_chebyshew.R

# An approach as applied below, which shows the processing of input data
# from a file becomes a crucial factor for writing your own sample applications.
# This approach is just one example of what can be done using the distance
# functions provided by shogun.
#
# First, you need to determine what type your data will be, because this
# will determine the distance function you can use.
#
# This example loads two stored matrices of real values (feature type 'REAL')
# from different files and initializes the distance to 'CHEBYSHEW'.
# Each column of the matrices corresponds to one data point.
#
# The target 'TRAIN' for 'set_features' controls the processing of the given
# data points, where a pairwise distance matrix (maximum of absolute feature
# dimension differences) is computed by 'get_distance_matrix'.
#
# The resulting distance matrix can be reaccessed by 'get_distance_matrix'
# and target 'TRAIN'.
#
# The target 'TEST' for 'set_features' controls the processing of the given
# data points 'TRAIN' and 'TEST', where a pairwise distance matrix (maximum
# of absolute feature dimension differences) between these two data sets is
# computed.
#
# The resulting distance matrix can be reaccessed by 'get_distance_matrix'
# and target 'TEST'. The 'TRAIN' distance matrix ceased to exist.
#
# For more details see doc/classshogun_1_1CChebyshewMetric.html.
#
# Obviously, using the Chebyshew distance is not limited to this showcase
# example.
library("sg")
fm_train_real <- t(as.matrix(read.table('../data/fm_train_real.dat')))
fm_test_real <- t(as.matrix(read.table('../data/fm_test_real.dat')))
# Chebyshew Metric
print('ChebyshewMetric')
dump <- sg('set_distance', 'CHEBYSHEW', 'REAL')
dump <- sg('set_features', 'TRAIN', fm_train_real)
dm <- sg('get_distance_matrix', 'TRAIN')
dump <- sg('set_features', 'TEST', fm_test_real)
dm <- sg('get_distance_matrix', 'TEST')

../examples/documented/r_static/distance_chisquare.R

# An approach as applied below, which shows the processing of input data
# from a file becomes a crucial factor for writing your own sample applications.
# This approach is just one example of what can be done using the distance
# functions provided by shogun.
#
# First, you need to determine what type your data will be, because this
# will determine the distance function you can use.
#
# This example loads two stored matrices of real values (feature type 'REAL')
# from different files and initializes the distance to 'CHISQUARE'.
# Each column of the matrices corresponds to one data point.
#
# The target 'TRAIN' for 'set_features' controls the processing of the given
# data points, where a pairwise distance matrix is computed by
# 'get_distance_matrix'.
#
# The resulting distance matrix can be reaccessed by 'get_distance_matrix'
# and target 'TRAIN'.
#
# The target 'TEST' for 'set_features' controls the processing of the given
# data points 'TRAIN' and 'TEST', where a pairwise distance matrix between
# these two matrices is computed by 'get_distance_matrix'.
#
# The resulting distance matrix can be reaccessed by 'get_distance_matrix'
# and target 'TEST'. The 'TRAIN' distance matrix ceased to exist.
#
# For more details see doc/classshogun_1_1CChiSquareDistance.html.
#
# Obviously, using the ChiSquare distance is not limited to this showcase
# example.
library("sg")
fm_train_real <- t(as.matrix(read.table('../data/fm_train_real.dat')))
fm_test_real <- t(as.matrix(read.table('../data/fm_test_real.dat')))
# ChiSquare Distance
print('ChiSquareDistance')
dump <- sg('set_distance', 'CHISQUARE', 'REAL')
dump <- sg('set_features', 'TRAIN', fm_train_real)
dm <- sg('get_distance_matrix', 'TRAIN')
dump <- sg('set_features', 'TEST', fm_test_real)
dm <- sg('get_distance_matrix', 'TEST')

../examples/documented/r_static/distance_cosine.R

# An approach as applied below, which shows the processing of input data
# from a file becomes a crucial factor for writing your own sample applications.
# This approach is just one example of what can be done using the distance
# functions provided by shogun.
#
# First, you need to determine what type your data will be, because this
# will determine the distance function you can use.
#
# This example loads two stored matrices of real values (feature type 'REAL')
# from different files and initializes the distance to 'COSINE'.
# Each column of the matrices corresponds to one data point.
#
# The target 'TRAIN' for 'set_features' controls the processing of the given
# data points, where a pairwise distance matrix is computed by
# 'get_distance_matrix'.
#
# The resulting distance matrix can be reaccessed by 'get_distance_matrix' and
# target 'TRAIN'.
#
# The target 'TEST' for 'set_features' controls the processing of the given
# data points 'TRAIN' and 'TEST', where a pairwise distance matrix between
# these two data sets is computed by 'get_distance_matrix'.
#
# The resulting distance matrix can be reaccessed by 'get_distance_matrix'
# and target 'TEST'. The 'TRAIN' distance matrix ceased to exist.
#
# For more details see doc/classshogun_1_1CCosineDistance.html.
#
# Obviously, using the Cosine distance is not limited to this showcase
# example.
library("sg")
fm_train_real <- t(as.matrix(read.table('../data/fm_train_real.dat')))
fm_test_real <- t(as.matrix(read.table('../data/fm_test_real.dat')))
# Cosine Distance
print('CosineDistance')
dump <- sg('set_distance', 'COSINE', 'REAL')
dump <- sg('set_features', 'TRAIN', fm_train_real)
dm <- sg('get_distance_matrix', 'TRAIN')
dump <- sg('set_features', 'TEST', fm_test_real)
dm <- sg('get_distance_matrix', 'TEST')

../examples/documented/r_static/distance_euclidian.R

# An approach as applied below, which shows the processing of input data
# from a file becomes a crucial factor for writing your own sample applications.
# This approach is just one example of what can be done using the distance
# functions provided by shogun.
#
# First, you need to determine what type your data will be, because this
# will determine the distance function you can use.
#
# This example loads two stored matrices of real values (feature type 'REAL')
# from different files and initializes the distance to 'EUCLIDIAN'.
# Each column of the matrices corresponds to one data point.
#
# The target 'TRAIN' for 'set_features' controls the processing of the given
# data points, where a pairwise distance matrix is computed by
# 'get_distance_matrix'.
#
# The resulting distance matrix can be reaccessed by 'get_distance_matrix' and
# target 'TRAIN'.
#
# The target 'TEST' for 'set_features' controls the processing of the given
# data points 'TRAIN' and 'TEST', where a pairwise distance matrix between
# these two data sets is computed by 'get_distance_matrix'.
#
# The resulting distance matrix can be reaccessed by 'get_distance_matrix'
# and target 'TEST'. The 'TRAIN' distance matrix ceased to exist.
#
# For more details see doc/classshogun_1_1CEuclidianDistance.html.
#
# Obviously, using the Euclidian distance is not limited to this showcase
# example.
library("sg")
fm_train_real <- t(as.matrix(read.table('../data/fm_train_real.dat')))
fm_test_real <- t(as.matrix(read.table('../data/fm_test_real.dat')))
# Euclidian Distance
print('EuclidianDistance')
dump <- sg('set_distance', 'EUCLIDIAN', 'REAL')
dump <- sg('set_features', 'TRAIN', fm_train_real)
dm <- sg('get_distance_matrix', 'TRAIN')
dump <- sg('set_features', 'TEST', fm_test_real)
dm <- sg('get_distance_matrix', 'TEST')

../examples/documented/r_static/distance_geodesic.R

# An approach as applied below, which shows the processing of input data
# from a file becomes a crucial factor for writing your own sample applications.
# This approach is just one example of what can be done using the distance
# functions provided by shogun.
#
# First, you need to determine what type your data will be, because this
# will determine the distance function you can use.
#
# This example loads two stored matrices of real values (feature type 'REAL')
# from different files and initializes the distance to 'GEODESIC'.
# Each column of the matrices corresponds to one data point.
#
# The target 'TRAIN' for 'set_features' controls the processing of the given
# data points, where a pairwise distance (shortest path on a sphere) matrix is
# computed by 'get_distance_matrix'.
#
# The resulting distance matrix can be reaccessed by 'get_distance_matrix' and
# target 'TRAIN'.
#
# The target 'TEST' for 'set_features' controls the processing of the given
# data points 'TRAIN' and 'TEST', where a pairwise distance (shortest path on
# a sphere) matrix between these two data sets is computed by 'get_distance_matrix'.
#
# The resulting distance matrix can be reaccessed by 'get_distance_matrix'
# and target 'TEST'. The 'TRAIN' distance matrix ceased to exist.
#
# For more details see doc/classshogun_1_1CGeodesicMetric.html.
#
# Obviously, using the Geodesic distance is not limited to this showcase
# example.
library("sg")
fm_train_real <- t(as.matrix(read.table('../data/fm_train_real.dat')))
fm_test_real <- t(as.matrix(read.table('../data/fm_test_real.dat')))
# Geodesic Metric
print('GeodesicMetric')
dump <- sg('set_distance', 'GEODESIC', 'REAL')
dump <- sg('set_features', 'TRAIN', fm_train_real)
dm <- sg('get_distance_matrix', 'TRAIN')
dump <- sg('set_features', 'TEST', fm_test_real)
dm <- sg('get_distance_matrix', 'TEST')

../examples/documented/r_static/distance_hammingword.R

# An approach as applied below, which shows the processing of input data
# from a file becomes a crucial factor for writing your own sample applications.
# This approach is just one example of what can be done using the distance
# functions provided by shogun.
#
# First, you need to determine what type your data will be, because this
# will determine the distance function you can use.
#
# This example loads two stored data sets in 'STRING' representation
# (feature type 'CHAR' with alphabet 'DNA') from different files and
# initializes the distance to 'HAMMING' with feature type 'WORD'.
#
# Data points in this example are defined by the transformation function
# 'convert' and the preprocessing step applied afterwards (defined by
# 'add_preproc' and preprocessor 'SORTWORDSTRING').
#
# The target 'TRAIN' for 'set_features' controls the binding of the given
# data points. In order to compute a pairwise distance matrix by
# 'get_distance_matrix', we have to perform two preprocessing steps for
# input data 'TRAIN'. The method 'convert' transforms the input data to
# a string representation suitable for the selected distance. The individual
# strings are sorted in ascending order after the execution of 'attach_preproc'.
# A pairwise distance matrix is computed by 'get_distance_matrix'.
#
# The resulting distance matrix can be reaccessed by 'get_distance_matrix'
# and target 'TRAIN'.
#
# The target 'TEST' for 'set_features' controls the binding of the given
# data points 'TRAIN' and 'TEST'. In order to compute a pairwise distance
# matrix between these two data sets by 'get_distance_matrix', we have to
# perform two preprocessing steps for input data 'TEST'. The method 'convert'
# transforms the input data 'TEST' to a string representation suitable for
# the selected distance. The individual strings are sorted in ascending order
# after the execution of 'attach_preproc'. A pairwise distance matrix between
# the data sets 'TRAIN' and 'TEST' is computed by 'get_distance_matrix'.
#
# The resulting distance matrix can be reaccessed by 'get_distance_matrix'
# and target 'TEST'. The 'TRAIN' distance matrix ceased to exist.
#
# For more details see
# doc/classshogun_1_1CSortWordString.html,
# doc/classshogun_1_1CPreprocessor.html,
# doc/classshogun_1_1CStringFeatures.html (method obtain_from_char_features) and
# doc/classshogun_1_1CHammingWordDistance.html.
#
# Obviously, using the Hamming word distance is not limited to this showcase
# example.
library("sg")
fm_train_dna <- as.matrix(read.table('../data/fm_train_dna.dat'))
fm_test_dna <- as.matrix(read.table('../data/fm_test_dna.dat'))
order <- 3
gap <- 0
reverse <- 'n'
# Hamming Word Distance
print('HammingWordDistance')
dump <- sg('set_distance', 'HAMMING', 'WORD')
dump <- sg('add_preproc', 'SORTWORDSTRING')
dump <- sg('set_features', 'TRAIN', fm_train_dna, 'DNA')
dump <- sg('convert', 'TRAIN', 'STRING', 'CHAR', 'STRING', 'WORD', order, order-1, gap, reverse)
dump <- sg('attach_preproc', 'TRAIN')
dm <- sg('get_distance_matrix', 'TRAIN')
dump <- sg('set_features', 'TEST', fm_test_dna, 'DNA')
dump <- sg('convert', 'TEST', 'STRING', 'CHAR', 'STRING', 'WORD', order, order-1, gap, reverse)
dump <- sg('attach_preproc', 'TEST')
dm <- sg('get_distance_matrix', 'TEST')

../examples/documented/r_static/distance_jensen.R

# An approach as applied below, which shows the processing of input data
# from a file becomes a crucial factor for writing your own sample applications.
# This approach is just one example of what can be done using the distance
# functions provided by shogun.
#
# First, you need to determine what type your data will be, because this
# will determine the distance function you can use.
#
# This example loads two stored matrices of real values (feature type 'REAL')
# from different files and initializes the distance to 'JENSEN'.
# Each column of the matrices corresponds to one data point.
#
# The target 'TRAIN' for 'set_features' controls the processing of the given
# data points, where a pairwise distance (divergence measure based on the
# Kullback-Leibler divergence) matrix is computed by 'get_distance_matrix'.
#
# The resulting distance matrix can be reaccessed by 'get_distance_matrix' and
# target 'TRAIN'.
#
# The target 'TEST' for 'set_features' controls the processing of the given
# data points 'TRAIN' and 'TEST', where a pairwise distance (divergence measure
# based on the Kullback-Leibler divergence) matrix between these two data sets
# is computed by 'get_distance_matrix'.
#
# The resulting distance matrix can be reaccessed by 'get_distance_matrix'
# and target 'TEST'. The 'TRAIN' distance matrix ceased to exist.
#
# For more details see doc/classshogun_1_1CJensenMetric.html.
#
# Obviously, using the Jensen-Shannon distance/divergence is not limited to
# this showcase example.
library("sg")
fm_train_real <- t(as.matrix(read.table('../data/fm_train_real.dat')))
fm_test_real <- t(as.matrix(read.table('../data/fm_test_real.dat')))
# Jensen Metric
print('JensenMetric')
dump <- sg('set_distance', 'JENSEN', 'REAL')
dump <- sg('set_features', 'TRAIN', fm_train_real)
dm <- sg('get_distance_matrix', 'TRAIN')
dump <- sg('set_features', 'TEST', fm_test_real)
dm <- sg('get_distance_matrix', 'TEST')

../examples/documented/r_static/distance_manhatten.R

# n approach as applied below, which shows the processing of input data
# from a file becomes a crucial factor for writing your own sample applications.
# This approach is just one example of what can be done using the distance
# functions provided by shogun.
#
# First, you need to determine what type your data will be, because this
# will determine the distance function you can use.
#
# This example loads two stored matrices of real values (feature type 'REAL')
# from different files and initializes the distance to 'MANHATTAN'.
# Each column of the matrices corresponds to one data point.
#
# The target 'TRAIN' for 'set_features' controls the processing of the given
# data points, where a pairwise distance (sum of absolute feature
# dimension differences) matrix is computed by 'get_distance_matrix'.
#
# The resulting distance matrix can be reaccessed by 'get_distance_matrix' and
# target 'TRAIN'.
#
# The target 'TEST' for 'set_features' controls the processing of the given
# data points 'TRAIN' and 'TEST', where a pairwise distance (sum of absolute
# feature dimension differences) matrix between these two data sets is
# computed by 'get_distance_matrix'.
#
# The resulting distance matrix can be reaccessed by 'get_distance_matrix'
# and target 'TEST'. The 'TRAIN' distance matrix ceased to exist.
#
# For more details see doc/classshogun_1_1CManhattanMetric.html.
#
# Obviously, using the Manhattan distance is not limited to this showcase
# example.
library("sg")
fm_train_real <- t(as.matrix(read.table('../data/fm_train_real.dat')))
fm_test_real <- t(as.matrix(read.table('../data/fm_test_real.dat')))
# Manhattan Metric
print('ManhattanMetric')
dump <- sg('set_distance', 'MANHATTAN', 'REAL')
dump <- sg('set_features', 'TRAIN', fm_train_real)
dm <- sg('get_distance_matrix', 'TRAIN')
dump <- sg('set_features', 'TEST', fm_test_real)
dm <- sg('get_distance_matrix', 'TEST')

../examples/documented/r_static/distance_manhattenword.R

# An approach as applied below, which shows the processing of input data
# from a file becomes a crucial factor for writing your own sample applications.
# This approach is just one example of what can be done using the distance
# functions provided by shogun.
#
# First, you need to determine what type your data will be, because this
# will determine the distance function you can use.
#
# This example loads two stored data sets in 'STRING' representation
# (feature type 'CHAR' with alphabet 'DNA') from different files and
# initializes the distance to 'MANHATTAN' with feature type 'WORD'.
#
# Data points in this example are defined by the transformation function
# 'convert' and the preprocessing step applied afterwards (defined by
# 'add_preproc' and preprocessor 'SORTWORDSTRING').
#
# The target 'TRAIN' for 'set_features' controls the binding of the given
# data points. In order to compute a pairwise distance matrix by
# 'get_distance_matrix', we have to perform two preprocessing steps for
# input data 'TRAIN'. The method 'convert' transforms the input data to
# a string representation suitable for the selected distance. The individual
# strings are sorted in ascending order after the execution of 'attach_preproc'.
# A pairwise distance matrix is computed by 'get_distance_matrix'.
#
# The resulting distance matrix can be reaccessed by 'get_distance_matrix'
# and target 'TRAIN'.
#
# The target 'TEST' for 'set_features' controls the binding of the given
# data points 'TRAIN' and 'TEST'. In order to compute a pairwise distance
# matrix between these two data sets by 'get_distance_matrix', we have to
# perform two preprocessing steps for input data 'TEST'. The method 'convert'
# transforms the input data 'TEST' to a string representation suitable for
# the selected distance. The individual strings are sorted in ascending order
# after the execution of 'attach_preproc'. A pairwise distance matrix between
# the data sets 'TRAIN' and 'TEST' is computed by 'get_distance_matrix'.
#
# The resulting distance matrix can be reaccessed by 'get_distance_matrix'
# and target 'TEST'. The 'TRAIN' distance matrix ceased to exist.
#
# For more details see
# doc/classshogun_1_1CSortWordString.html,
# doc/classshogun_1_1CPreprocessor.html,
# doc/classshogun_1_1CStringFeatures.html (method obtain_from_char_features) and
# doc/classshogun_1_1CManhattanWordDistance.html.
#
# Obviously, using the Manhattan word distance is not limited to this showcase
# example.
library("sg")
fm_train_dna <- as.matrix(read.table('../data/fm_train_dna.dat'))
fm_test_dna <- as.matrix(read.table('../data/fm_test_dna.dat'))
order <- 3
gap <- 0
reverse <- 'n'
# Manhattan Word Distance
print('ManhattanWordDistance')
dump <- sg('set_distance', 'MANHATTAN', 'WORD')
dump <- sg('add_preproc', 'SORTWORDSTRING')
dump <- sg('set_features', 'TRAIN', fm_train_dna, 'DNA')
dump <- sg('convert', 'TRAIN', 'STRING', 'CHAR', 'STRING', 'WORD', order, order-1, gap, reverse)
dump <- sg('attach_preproc', 'TRAIN')
dm <- sg('get_distance_matrix', 'TRAIN')
dump <- sg('set_features', 'TEST', fm_test_dna, 'DNA')
dump <- sg('convert', 'TEST', 'STRING', 'CHAR', 'STRING', 'WORD', order, order-1, gap, reverse)
dump <- sg('attach_preproc', 'TEST')
dm <- sg('get_distance_matrix', 'TEST')

../examples/documented/r_static/distance_minkowski.R

# An approach as applied below, which shows the processing of input data
# from a file becomes a crucial factor for writing your own sample applications.
# This approach is just one example of what can be done using the distance
# functions provided by shogun.
#
# First, you need to determine what type your data will be, because this
# will determine the distance function you can use.
#
# This example loads two stored matrices of real values (feature type 'REAL')
# from different files and initializes the distance to 'MINKOWSKI' with
# norm 'k'. Each column of the matrices corresponds to one data point.
#
# The target 'TRAIN' for 'set_features' controls the processing of the given
# data points, where a pairwise distance matrix is computed by
# 'get_distance_matrix'.
#
# The resulting distance matrix can be reaccessed by 'get_distance_matrix' and
# target 'TRAIN'.
#
# The target 'TEST' for 'set_features' controls the processing of the given
# data points 'TRAIN' and 'TEST', where a pairwise distance matrix between
# these two data sets is computed by 'get_distance_matrix'.
#
# The resulting distance matrix can be reaccessed by 'get_distance_matrix'
# and target 'TEST'. The 'TRAIN' distance matrix ceased to exist.
#
# For more details see doc/classshogun_1_1CMinkowskiMetric.html.
#
# Obviously, using the Minkowski metric is not limited to this showcase
# example.
library("sg")
fm_train_real <- t(as.matrix(read.table('../data/fm_train_real.dat')))
fm_test_real <- t(as.matrix(read.table('../data/fm_test_real.dat')))
# Minkowski Metric
print('MinkowskiMetric')
k <- 3
dump <- sg('set_distance', 'MINKOWSKI', 'REAL', k)
dump <- sg('set_features', 'TRAIN', fm_train_real)
dm <- sg('get_distance_matrix', 'TRAIN')
dump <- sg('set_features', 'TEST', fm_test_real)
dm <- sg('get_distance_matrix', 'TEST')

../examples/documented/r_static/distance_tanimoto.R

# An approach as applied below, which shows the processing of input data
# from a file becomes a crucial factor for writing your own sample applications.
# This approach is just one example of what can be done using the distance
# functions provided by shogun.
#
# First, you need to determine what type your data will be, because this
# will determine the distance function you can use.
#
# This example loads two stored matrices of real values (feature type 'REAL')
# from different files and initializes the distance to 'TANIMOTO'.
# Each column of the matrices corresponds to one data point.
#
# The target 'TRAIN' for 'set_features' controls the processing of the given
# data points, where a pairwise distance (extended Jaccard coefficient)
# matrix is computed by 'get_distance_matrix'.
#
# The resulting distance matrix can be reaccessed by 'get_distance_matrix' and
# target 'TRAIN'.
#
# The target 'TEST' for 'set_features' controls the processing of the given
# data points 'TRAIN' and 'TEST', where a pairwise distance (extended
# Jaccard coefficient) matrix between these two data sets is computed by
# 'get_distance_matrix'.
#
# The resulting distance matrix can be reaccessed by 'get_distance_matrix'
# and target 'TEST'. The 'TRAIN' distance matrix ceased to exist.
#
# For more details see doc/classshogun_1_1CTanimotoDistance.html.
#
# Obviously, using the Tanimoto distance/coefficient is not limited to
# this showcase example.
library("sg")
fm_train_real <- t(as.matrix(read.table('../data/fm_train_real.dat')))
fm_test_real <- t(as.matrix(read.table('../data/fm_test_real.dat')))
# Tanimoto Distance
print('TanimotoDistance')
dump <- sg('set_distance', 'TANIMOTO', 'REAL')
dump <- sg('set_features', 'TRAIN', fm_train_real)
dm <- sg('get_distance_matrix', 'TRAIN')
dump <- sg('set_features', 'TEST', fm_test_real)
dm <- sg('get_distance_matrix', 'TEST')

Preproc

../examples/documented/r_static/preproc_logplusone.R

# In this example a kernel matrix is computed for a given real-valued data set.
# The kernel used is the Chi2 kernel which operates on real-valued vectors. It
# computes the chi-squared distance between sets of histograms. It is a very
# useful distance in image recognition (used to detect objects). The preprocessor
# LogPlusOne adds one to a dense real-valued vector and takes the logarithm of
# each component of it. It is most useful in situations where the inputs are
# counts: When one compares differences of small counts any difference may matter
# a lot, while small differences in large counts don't. This is what this log
# transformation controls for.
library("sg")
size_cache <- 10
fm_train_real <- t(as.matrix(read.table('../data/fm_train_real.dat')))
fm_test_real <- t(as.matrix(read.table('../data/fm_test_real.dat')))
width <- 1.4
# LogPlusOne
print('LogPlusOne')
dump <- sg('add_preproc', 'LOGPLUSONE')
dump <- sg('set_kernel', 'CHI2', 'REAL', size_cache, width)
dump <- sg('set_features', 'TRAIN', fm_train_real)
dump <- sg('attach_preproc', 'TRAIN')
km <- sg('get_kernel_matrix', 'TRAIN')
dump <- sg('set_features', 'TEST', fm_test_real)
dump <- sg('attach_preproc', 'TEST')
km <- sg('get_kernel_matrix', 'TEST')