From dbe9bdaeb357f4fbe11b001e1629ff79082a09ac Mon Sep 17 00:00:00 2001 From: Antonis Maronikolakis Date: Thu, 30 Mar 2017 20:32:14 +0300 Subject: [PATCH 1/2] Added test_rl.py --- tests/test_rl.py | 56 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 56 insertions(+) create mode 100644 tests/test_rl.py diff --git a/tests/test_rl.py b/tests/test_rl.py new file mode 100644 index 000000000..4880eaf9b --- /dev/null +++ b/tests/test_rl.py @@ -0,0 +1,56 @@ +import pytest + +from rl import * +from mdp import sequential_decision_environment + + +north = (0, 1) +south = (0,-1) +west = (-1, 0) +east = (1, 0) + +policy = { + (0, 2): east, (1, 2): east, (2, 2): east, (3, 2): None, + (0, 1): north, (2, 1): north, (3, 1): None, + (0, 0): north, (1, 0): west, (2, 0): west, (3, 0): west, +} + + + +def test_PassiveADPAgent(): + agent = PassiveADPAgent(policy, sequential_decision_environment) + for i in range(75): + run_single_trial(agent,sequential_decision_environment) + + assert agent.U == 0 + # Agent does not always produce same results. + # Check if results are good enough. + assert agent.U[(0, 0)] > 0.15 # In reality around 0.3 + assert agent.U[(0, 1)] > 0.15 # In reality around 0.4 + assert agent.U[(1, 0)] > 0 # In reality around 0.2 + + + +def test_PassiveTDAgent(): + agent = PassiveTDAgent(policy, sequential_decision_environment, alpha=lambda n: 60./(59+n)) + for i in range(200): + run_single_trial(agent,sequential_decision_environment) + + # Agent does not always produce same results. + # Check if results are good enough. + assert agent.U[(0, 0)] > 0.15 # In reality around 0.3 + assert agent.U[(0, 1)] > 0.15 # In reality around 0.35 + assert agent.U[(1, 0)] > 0.15 # In reality around 0.25 + + +def test_QLearning(): + q_agent = QLearningAgent(sequential_decision_environment, Ne=5, Rplus=2, + alpha=lambda n: 60./(59+n)) + + for i in range(200): + run_single_trial(q_agent,sequential_decision_environment) + + # Agent does not always produce same results. + # Check if results are good enough. + assert q_agent.Q[((0, 1), (0, 1))] >= -0.5 # In reality around 0.1 + assert q_agent.Q[((1, 0), (0, -1))] <= 0.5 # In reality around -0.1 \ No newline at end of file From 6961ae4562e1dc45563a60d68ee5c6ab23c10194 Mon Sep 17 00:00:00 2001 From: Antonis Maronikolakis Date: Sat, 1 Apr 2017 11:40:38 +0300 Subject: [PATCH 2/2] Update test_rl.py Accidentally left "agent.U == 0" in. It was there for some testing of mine. --- tests/test_rl.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/test_rl.py b/tests/test_rl.py index 4880eaf9b..05f071266 100644 --- a/tests/test_rl.py +++ b/tests/test_rl.py @@ -22,7 +22,6 @@ def test_PassiveADPAgent(): for i in range(75): run_single_trial(agent,sequential_decision_environment) - assert agent.U == 0 # Agent does not always produce same results. # Check if results are good enough. assert agent.U[(0, 0)] > 0.15 # In reality around 0.3 @@ -53,4 +52,4 @@ def test_QLearning(): # Agent does not always produce same results. # Check if results are good enough. assert q_agent.Q[((0, 1), (0, 1))] >= -0.5 # In reality around 0.1 - assert q_agent.Q[((1, 0), (0, -1))] <= 0.5 # In reality around -0.1 \ No newline at end of file + assert q_agent.Q[((1, 0), (0, -1))] <= 0.5 # In reality around -0.1