[docs]deftrain(self,dataset):""" Trains the classifier using the given dataset. """cli=self._client()for(idx,(label,d))indataset:iflabelisNone:raiseRuntimeError('Dataset without label column cannot be used for training')result=cli.train([jubatus.classifier.types.LabeledDatum(unicode_t(label),d)])assertresult==1yield(idx,label)

[docs]defclassify(self,dataset,softmax=False):""" Classify the given dataset using this classifier. When ``softmax`` is set to True, softmax is applied to the resulting scores. """cli=self._client()for(idx,(label,d))indataset:# Do classification for the record.result=cli.classify([d])assertlen(result)==1# Create the list of (label, score) desc sorted by score.label_score_sorted=[(ent.label,ent.score)forentinsorted(result[0],key=lambdax:x.score,reverse=True)]ifsoftmax:labels=[x[0]forxinlabel_score_sorted]scores=[x[1]forxinlabel_score_sorted]label_score_sorted=list(zip(labels,Utils.softmax(scores)))# Note: label may become None.yield(idx,label,label_score_sorted)

[docs]@classmethoddeftrain_and_classify(cls,config,train_dataset,test_dataset,metric):""" This is an utility method to perform bulk train-test. Run a classifier using the given config, train the classifier, classify using the classifier, then return the calculated metrics. """classifier=cls.run(config)for_inclassifier.train(train_dataset):passy_true=[]y_pred=[]for(idx,label,result)inclassifier.classify(test_dataset):if0<len(result):y_true.append(label)y_pred.append(result[0][0])classifier.stop()returnmetric(y_true,y_pred)

@classmethoddef_default_method(cls):return'AROW'@classmethoddef_default_parameter(cls,method):ifmethodin('perceptron','PA','passive_aggressive'):returnNoneelifmethodin('PA1','passive_aggressive_1','PA2','passive_aggressive_2','CW','confidence_weighted','AROW','NHERD','normal_herd'):return{'regularization_weight':1.0}elifmethodin('cosine','euclidean'):return{'nearest_neighbor_num':128,'local_sensitivity':1.0,}elifmethodin('NN','nearest_neighbor'):return{'method':'euclid_lsh','parameter':{'threads':-1,# use number of logical CPU cores'hash_num':64,},'nearest_neighbor_num':128,'local_sensitivity':1.0}else:raiseRuntimeError('unknown method: {0}'.format(method))