From 18be53254ff4cf33e25502ecc4bc37ac49d35b5d Mon Sep 17 00:00:00 2001 From: Antonis Maronikolakis Date: Fri, 24 Mar 2017 17:36:49 +0200 Subject: [PATCH 1/4] Bugfixing --- learning.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/learning.py b/learning.py index 981a557c2..60b44b6f7 100644 --- a/learning.py +++ b/learning.py @@ -42,9 +42,9 @@ def hamming_distance(predictions, targets): class DataSet: - """A data set for a machine learning problem. It has the following fields: + """A data set for a machine learning problem. It has the following fields: - d.examples A list of examples. Each one is a list of attribute values. + d.examples A list of examples. Each one is a list of attribute values. d.attrs A list of integers to index into an example, so example[attr] gives a value. Normally the same as range(len(d.examples[0])). d.attrnames Optional list of mnemonic names for corresponding attrs. @@ -60,6 +60,8 @@ class DataSet: since that can handle any field types. d.name Name of the data set (for output display only). d.source URL or other source where the data came from. + d.exclude A list of attribute indexes to exclude from d.inputs. Elements + of this list can either be integers (attrs) or attrnames. Normally, you call the constructor and you're done; then you just access fields like d.examples and d.target and d.inputs.""" @@ -67,7 +69,7 @@ class DataSet: def __init__(self, examples=None, attrs=None, attrnames=None, target=-1, inputs=None, values=None, distance=mean_boolean_error, name='', source='', exclude=()): - """Accepts any of DataSet's fields. Examples can also be a + """Accepts any of DataSet's fields. Examples can also be a string or file from which to parse examples using parse_csv. Optional parameter: exclude, as documented in .setproblem(). >>> DataSet(examples='1, 2, 3') @@ -107,7 +109,7 @@ def setproblem(self, target, inputs=None, exclude=()): to not use in inputs. Attributes can be -n .. n, or an attrname. Also computes the list of possible values, if that wasn't done yet.""" self.target = self.attrnum(target) - exclude = map(self.attrnum, exclude) + exclude = list(map(self.attrnum, exclude)) if inputs: self.inputs = removeall(self.target, inputs) else: @@ -165,6 +167,7 @@ def classes_to_numbers(self,classes=None): def remove_examples(self,value=""): """Remove examples that contain given value.""" self.examples = [x for x in self.examples if value not in x] + self.values = list(map(unique, zip(*self.examples))) def __repr__(self): return ''.format( From 83707b9c088940d67e3e2d0ae35c5b54f3d3861b Mon Sep 17 00:00:00 2001 From: Antonis Maronikolakis Date: Fri, 24 Mar 2017 17:38:35 +0200 Subject: [PATCH 2/4] Test for "exclude" --- tests/test_learning.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tests/test_learning.py b/tests/test_learning.py index f216ad168..ac2d5a90c 100644 --- a/tests/test_learning.py +++ b/tests/test_learning.py @@ -4,6 +4,10 @@ from utils import DataFile +def test_exclude(): + iris = DataSet(name='iris', exclude=[3]) + assert iris.inputs == [0, 1, 2] + def test_parse_csv(): Iris = DataFile('iris.csv').read() @@ -38,6 +42,7 @@ def test_k_nearest_neighbors(): kNN = NearestNeighborLearner(iris,k=3) assert kNN([5,3,1,0.1]) == "setosa" + def test_decision_tree_learner(): iris = DataSet(name="iris") From a3e9b6c27b9002018169f751611dcd71041ac2e5 Mon Sep 17 00:00:00 2001 From: Antonis Maronikolakis Date: Fri, 24 Mar 2017 18:57:34 +0200 Subject: [PATCH 3/4] Update test_learning.py --- tests/test_learning.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/tests/test_learning.py b/tests/test_learning.py index ac2d5a90c..4f618f7c1 100644 --- a/tests/test_learning.py +++ b/tests/test_learning.py @@ -52,21 +52,23 @@ def test_decision_tree_learner(): def test_neural_network_learner(): iris = DataSet(name="iris") + iris.remove_examples("virginica") + classes = ["setosa","versicolor","virginica"] - iris.classes_to_numbers() nNL = NeuralNetLearner(iris) - # NeuralNetLearner might be wrong. Just check if prediction is in range + # NeuralNetLearner might be wrong. Just check if prediction is in range. assert nNL([5,3,1,0.1]) in range(len(classes)) def test_perceptron(): iris = DataSet(name="iris") + iris.remove_examples("virginica") + classes = ["setosa","versicolor","virginica"] - iris.classes_to_numbers() perceptron = PerceptronLearner(iris) - # PerceptronLearner might be wrong. Just check if prediction is in range + # PerceptronLearner might be wrong. Just check if prediction is in range. assert perceptron([5,3,1,0.1]) in range(len(classes)) From deaf46cedd41ab965a142015d9b954197315f1cb Mon Sep 17 00:00:00 2001 From: Antonis Maronikolakis Date: Fri, 24 Mar 2017 20:02:04 +0200 Subject: [PATCH 4/4] update_values --- learning.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/learning.py b/learning.py index 60b44b6f7..c40c9a777 100644 --- a/learning.py +++ b/learning.py @@ -116,7 +116,7 @@ def setproblem(self, target, inputs=None, exclude=()): self.inputs = [a for a in self.attrs if a != self.target and a not in exclude] if not self.values: - self.values = list(map(unique, zip(*self.examples))) + self.update_values() self.check_me() def check_me(self): @@ -151,6 +151,9 @@ def attrnum(self, attr): else: return attr + def update_values(self): + self.values = list(map(unique, zip(*self.examples))) + def sanitize(self, example): """Return a copy of example, with non-input attributes replaced by None.""" return [attr_i if i in self.inputs else None @@ -167,7 +170,7 @@ def classes_to_numbers(self,classes=None): def remove_examples(self,value=""): """Remove examples that contain given value.""" self.examples = [x for x in self.examples if value not in x] - self.values = list(map(unique, zip(*self.examples))) + self.update_values() def __repr__(self): return ''.format(