Implementing the learning. Work in progress
qlearning.prg
Code: Select all
// AI: Reinforcement Learning example (Q-Learning)
// Harbour implementation by Antonio Linares
#include "FiveWin.ch"
function Main()
local aStates := { 0, 1, 2, 3, 4, 5 }
local aR := Array( Len( aStates ), Len( aStates ) ) // Rewards matrix
local aQ := Array( Len( aStates ), Len( aStates ) ) // Q-Learning matrix
local nGamma := 0.8
local nState := aStates[ hb_RandomInt( 1, Len( aStates ) ) ]
local nGoal := ATail( aStates )
local nAction
? "initial state:", nState
AEval( aR, { | aRow, n | aR[ n ] := AFill( aRow, -1 ) } ) // Initialize the Rewards matrix
AEval( aQ, { | aRow, n | aQ[ n ] := AFill( aRow, 0 ) } ) // Initialize the Q-Learning matrix
aR[ 1 ][ 5 ] = 0 // state 0 can only go to state 4
aR[ 2 ][ 4 ] = 0 // state 1 can only go to state 3 or
aR[ 2 ][ 6 ] = 100 // to state 5 (goal)
aR[ 3 ][ 4 ] = 0 // state 2 can only go to state 3
aR[ 4 ][ 2 ] = 0 // state 3 can only go to state 1 or
aR[ 4 ][ 3 ] = 0 // to state 2 or
aR[ 4 ][ 5 ] = 0 // to state 4
aR[ 5 ][ 1 ] = 0 // state 4 can only go to state 0 or
aR[ 5 ][ 4 ] = 0 // to state 3 or
aR[ 5 ][ 6 ] = 100 // to state 5 (goal)
aR[ 6 ][ 2 ] = 0 // state 5 can only go to state 1 or
aR[ 6 ][ 5 ] = 0 // to state 4 or
aR[ 6 ][ 6 ] = 100 // to itself (goal)
XBROWSER aQ TITLE "Q-Learning matrix"
while nState != nGoal
nAction = NextAction( aR, nState )
aQ[ nState + 1, nAction + 1 ] = aR[ nState + 1, nAction + 1 ] + nGamma * GetMaxQForActions( aQ, nState )
XBROWSER aQ TITLE "Q-Learning matrix"
? "next state", nAction
nState = nAction
end
return nil
function GetMaxQForActions( aQ, nState )
local nMax := 0
AEval( aQ[ nState + 1 ], { | nLearning | nMax := Max( nLearning, nMax ) } )
MsgInfo( nMax, "Max Q" )
return nMax
function GetActions( aR, nState )
local aActions := {}
AEval( aR[ nState + 1 ],;
{ | nReward, nState | If( nReward > -1, AAdd( aActions, nState - 1 ), ) } )
return aActions
function NextAction( aR, nState )
local aActions := GetActions( aR, nState )
XBROWSER aActions TITLE "Possible Actions for state: " + Str( nState )
return aActions[ hb_RandomInt( 1, Len( aActions ) ) ]