/* This file is part of Orange. Copyright 1996-2010 Faculty of Computer and Information Science, University of Ljubljana Contact: janez.demsar@fri.uni-lj.si Orange is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. Orange is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with Orange. If not, see <http://www.gnu.org/licenses/>.*/#include "examples.hpp"#include "classify.hpp"#include "table.hpp"#include "logistic.ppp"#include <math.h>TLogRegLearner::TLogRegLearner(){}// TODO: najdi pametno mesto za naslednji dve funkciji// compute waldZ statistic from beta and beta_sePAttributedFloatListTLogRegLearner::computeWaldZ(PAttributedFloatList&beta,PAttributedFloatList&beta_se){PAttributedFloatListwaldZ=PAttributedFloatList(mlnewTAttributedFloatList(beta->attributes));TAttributedFloatList::const_iteratorb(beta->begin()),be(beta->end());TAttributedFloatList::const_iterators(beta_se->begin()),se(beta_se->end());for(;(b!=be)&&(s!=se);b++,s++)waldZ->push_back((*b)/(*s));returnwaldZ;}// compute P from waldZ statisticPAttributedFloatListTLogRegLearner::computeP(PAttributedFloatList&waldZ){PAttributedFloatListPstat=PAttributedFloatList(mlnewTAttributedFloatList(waldZ->attributes));TAttributedFloatList::const_iteratorz(waldZ->begin()),ze(waldZ->end());for(;(z!=ze);z++){doublezt=(*z)*(*z);if(zt>1000){Pstat->push_back(0.0);continue;}doublep=exp(-0.5*zt);// TODO: PI, kje najdes to konstanop*=sqrt(2*zt/3.141592);doublet=p;inta=3;// TODO: poglej kaj je to 0.0000...1 ?for(;t>0.0000000001*p;a=a+2){t*=zt/a;p+=t;}Pstat->push_back(1-p);}returnPstat;}PClassifierTLogRegLearner::operator()(PExampleGeneratorgen,constint&weight){interror;PVariablevar;PClassifiercl=fitModel(gen,weight,error,var);if(error>=TLogRegFitter::Constant)raiseError("%s in %s",error==TLogRegFitter::Constant?"constant":"singularity",var->get_name().c_str());returncl;}TDomainContinuizer*constructDefaultLRContinuizer(){TDomainContinuizer*def=mlnewTDomainContinuizer();def->zeroBased=true;def->continuousTreatment=TDomainContinuizer::Leave;def->multinomialTreatment=TDomainContinuizer::FrequentIsBase;def->classTreatment=TDomainContinuizer::Ignore;returndef;}TDomainContinuizer*logisticRegressionDomainContinuizer=constructDefaultLRContinuizer();PClassifierTLogRegLearner::fitModel(PExampleGeneratorgen,constint&weight,int&error,PVariable&errorAt){PImputerimputer=imputerConstructor?imputerConstructor->call(gen,weight):PImputer();PExampleGeneratorimputed=imputer?imputer->call(gen,weight):gen;// construct classifier TLogRegClassifier*lrc=mlnewTLogRegClassifier(imputed->domain);lrc->dataDescription=mlnewTEFMDataDescription(gen->domain,mlnewTDomainDistributions(gen),0,getMetaID());PClassifiercl=lrc;lrc->imputer=imputer;//if (imputed->domain->hasDiscreteAttributes(false)) {lrc->continuizedDomain=domainContinuizer?domainContinuizer->call(imputed,weight):(*logisticRegressionDomainContinuizer)(imputed,weight);imputed=mlnewTExampleTable(lrc->continuizedDomain,imputed);//}// copy class value// construct a LR fitterfitter=fitter?fitter:PLogRegFitter(mlnewTLogRegFitter_Cholesky());PAttributedFloatListtemp_beta,temp_beta_se;// fit logistic regression temp_beta=fitter->call(imputed,weight,temp_beta_se,lrc->likelihood,error,errorAt);lrc->fit_status=error;// transform beta to AttributedListPVarListenum_attributes=mlnewTVarList();enum_attributes->push_back(imputed->domain->classVar);PITERATE(TVarList,vl,imputed->domain->attributes)enum_attributes->push_back(*vl);// tranfsorm *beta into a PFloatListlrc->beta=mlnewTAttributedFloatList(enum_attributes);lrc->beta_se=mlnewTAttributedFloatList(enum_attributes);PITERATE(TAttributedFloatList,fi,temp_beta)lrc->beta->push_back(*fi);PITERATE(TAttributedFloatList,fi_se,temp_beta_se)lrc->beta_se->push_back(*fi_se);if(error>=TLogRegFitter::Constant)returncl;lrc->wald_Z=computeWaldZ(lrc->beta,lrc->beta_se);lrc->P=computeP(lrc->wald_Z);// return classifier with domain, beta and standard errors of beta returncl;}TLogRegClassifier::TLogRegClassifier(){}TLogRegClassifier::TLogRegClassifier(PDomaindom):TClassifierFD(dom,true){};PDistributionTLogRegClassifier::classDistribution(constTExample&origexam){checkProperty(domain);TExamplecexample(domain,origexam);TExample*example2;if(imputer)example2=imputer->call(cexample);else{if(dataDescription)for(TExample::const_iteratorei(cexample.begin()),ee(cexample.end()-1);ei!=ee;ei++)if((*ei).isSpecial())returnTClassifier::classDistribution(cexample,dataDescription);example2=&cexample;}TExample*example=continuizedDomain?mlnewTExample(continuizedDomain,*example2):example2;floatprob1;try{// multiply example with betaTAttributedFloatList::const_iteratorb(beta->begin()),be(beta->end());// get beta 0prob1=*b;b++;// multiply beta with exampleTVarList::const_iteratorvi(example->domain->attributes->begin());TExample::const_iteratorei(example->begin()),ee(example->end());for(;(b!=be)&&(ei!=ee);ei++,b++,vi++){if((*ei).isSpecial())raiseError("unknown value in attribute '%s'",(*vi)->get_name().c_str());prob1+=(*ei).floatV*(*b);}prob1=exp(prob1)/(1+exp(prob1));}catch(...){if(imputer)mldeleteexample2;if(continuizedDomain)mldeleteexample;throw;}if(imputer)mldeleteexample2;if(continuizedDomain)mldeleteexample;if(classVar->varType==TValue::INTVAR){TDiscDistribution*dist=mlnewTDiscDistribution(classVar);PDistributionres=dist;dist->addint(0,1-prob1);dist->addint(1,prob1);returnres;}else{TContDistribution*dist=mlnewTContDistribution(classVar);PDistributionres=dist;dist->addfloat(prob1,1.0);returnres;}}