From dbe9bdaeb357f4fbe11b001e1629ff79082a09ac Mon Sep 17 00:00:00 2001
From: Antonis Maronikolakis <antmarakis@programmers.gr>
Date: Thu, 30 Mar 2017 20:32:14 +0300
Subject: [PATCH 1/2] Added test_rl.py

---
 tests/test_rl.py | 56 ++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 56 insertions(+)
 create mode 100644 tests/test_rl.py

diff --git a/tests/test_rl.py b/tests/test_rl.py
new file mode 100644
index 000000000..4880eaf9b
--- /dev/null
+++ b/tests/test_rl.py
@@ -0,0 +1,56 @@
+import pytest
+
+from rl import *
+from mdp import sequential_decision_environment
+
+
+north = (0, 1)
+south = (0,-1)
+west = (-1, 0)
+east = (1, 0)
+
+policy = {
+    (0, 2): east,  (1, 2): east,  (2, 2): east,   (3, 2): None,
+    (0, 1): north,                (2, 1): north,  (3, 1): None,
+    (0, 0): north, (1, 0): west,  (2, 0): west,   (3, 0): west, 
+}
+
+
+
+def test_PassiveADPAgent():
+	agent = PassiveADPAgent(policy, sequential_decision_environment)
+	for i in range(75):
+		run_single_trial(agent,sequential_decision_environment)
+	
+	assert agent.U == 0
+	# Agent does not always produce same results.
+	# Check if results are good enough.
+	assert agent.U[(0, 0)] > 0.15 # In reality around 0.3
+	assert agent.U[(0, 1)] > 0.15 # In reality around 0.4
+	assert agent.U[(1, 0)] > 0 # In reality around 0.2
+
+
+
+def test_PassiveTDAgent():
+	agent = PassiveTDAgent(policy, sequential_decision_environment, alpha=lambda n: 60./(59+n))
+	for i in range(200):
+		run_single_trial(agent,sequential_decision_environment)
+	
+	# Agent does not always produce same results.
+	# Check if results are good enough.
+	assert agent.U[(0, 0)] > 0.15 # In reality around 0.3
+	assert agent.U[(0, 1)] > 0.15 # In reality around 0.35
+	assert agent.U[(1, 0)] > 0.15 # In reality around 0.25
+
+
+def test_QLearning():
+	q_agent = QLearningAgent(sequential_decision_environment, Ne=5, Rplus=2, 
+							 alpha=lambda n: 60./(59+n))
+
+	for i in range(200):
+		run_single_trial(q_agent,sequential_decision_environment)
+
+	# Agent does not always produce same results.
+	# Check if results are good enough.
+	assert q_agent.Q[((0, 1), (0, 1))] >= -0.5 # In reality around 0.1
+	assert q_agent.Q[((1, 0), (0, -1))] <= 0.5 # In reality around -0.1
\ No newline at end of file

From 6961ae4562e1dc45563a60d68ee5c6ab23c10194 Mon Sep 17 00:00:00 2001
From: Antonis Maronikolakis <antmarakis@programmers.gr>
Date: Sat, 1 Apr 2017 11:40:38 +0300
Subject: [PATCH 2/2] Update test_rl.py

Accidentally left "agent.U == 0" in. It was there for some testing of mine.
---
 tests/test_rl.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/tests/test_rl.py b/tests/test_rl.py
index 4880eaf9b..05f071266 100644
--- a/tests/test_rl.py
+++ b/tests/test_rl.py
@@ -22,7 +22,6 @@ def test_PassiveADPAgent():
 	for i in range(75):
 		run_single_trial(agent,sequential_decision_environment)
 	
-	assert agent.U == 0
 	# Agent does not always produce same results.
 	# Check if results are good enough.
 	assert agent.U[(0, 0)] > 0.15 # In reality around 0.3
@@ -53,4 +52,4 @@ def test_QLearning():
 	# Agent does not always produce same results.
 	# Check if results are good enough.
 	assert q_agent.Q[((0, 1), (0, 1))] >= -0.5 # In reality around 0.1
-	assert q_agent.Q[((1, 0), (0, -1))] <= 0.5 # In reality around -0.1
\ No newline at end of file
+	assert q_agent.Q[((1, 0), (0, -1))] <= 0.5 # In reality around -0.1