[docs]definitController(self):"""Initialize the controller. In this case, it is a state-action-table."""self.numberOfActions=4self.inputDimension=self.world.getSensors().shape[0]self.controller=FFNetwork((self.inputDimension,20,20,self.numberOfActions),[TanH()]*3,self.alpha,1)self.randomState=np.random.RandomState()

[docs]defupdateReward(self,state,reward,nextState,action,nextAction,episodeOver):"""Updates the reward for the current action by considering the reward and next action."""reward=self.scaleReward(reward)self.rewards.append(reward)expectedFutureReward=self.getQValues(self.scaleState(nextState))[nextAction]QValues=self.getQValues(self.scaleState(state))QDiff=(reward+self.gamma*expectedFutureReward)-QValues[action]self.errors.append(QDiff)QDiffVec=np.zeros(QValues.shape[0])QDiffVec[action]=QDiffself.controller.propagateBack(QDiffVec)

[docs]defselectAction(self,state):"""Epsilon-greedy action selection for the state"""Q=self.getQValues(self.scaleState(self.state))possibleActions=np.where(Q==Q.max())[0]ifself.randomState.uniform(0,1)<self.epsilon:# choose among the output nodes of the last layerpossibleActions=xrange(self.numberOfActions)returnself.randomState.choice(possibleActions)