@@ -43,9 +43,9 @@ def hamming_distance(predictions, targets):
43
43
44
44
45
45
class DataSet :
46
- """A data set for a machine learning problem. It has the following fields:
46
+ """A data set for a machine learning problem. It has the following fields:
47
47
48
- d.examples A list of examples. Each one is a list of attribute values.
48
+ d.examples A list of examples. Each one is a list of attribute values.
49
49
d.attrs A list of integers to index into an example, so example[attr]
50
50
gives a value. Normally the same as range(len(d.examples[0])).
51
51
d.attrnames Optional list of mnemonic names for corresponding attrs.
@@ -61,14 +61,16 @@ class DataSet:
61
61
since that can handle any field types.
62
62
d.name Name of the data set (for output display only).
63
63
d.source URL or other source where the data came from.
64
+ d.exclude A list of attribute indexes to exclude from d.inputs. Elements
65
+ of this list can either be integers (attrs) or attrnames.
64
66
65
67
Normally, you call the constructor and you're done; then you just
66
68
access fields like d.examples and d.target and d.inputs."""
67
69
68
70
def __init__ (self , examples = None , attrs = None , attrnames = None , target = - 1 ,
69
71
inputs = None , values = None , distance = mean_boolean_error ,
70
72
name = '' , source = '' , exclude = ()):
71
- """Accepts any of DataSet's fields. Examples can also be a
73
+ """Accepts any of DataSet's fields. Examples can also be a
72
74
string or file from which to parse examples using parse_csv.
73
75
Optional parameter: exclude, as documented in .setproblem().
74
76
>>> DataSet(examples='1, 2, 3')
@@ -108,14 +110,14 @@ def setproblem(self, target, inputs=None, exclude=()):
108
110
to not use in inputs. Attributes can be -n .. n, or an attrname.
109
111
Also computes the list of possible values, if that wasn't done yet."""
110
112
self .target = self .attrnum (target )
111
- exclude = map (self .attrnum , exclude )
113
+ exclude = list ( map (self .attrnum , exclude ) )
112
114
if inputs :
113
115
self .inputs = removeall (self .target , inputs )
114
116
else :
115
117
self .inputs = [a for a in self .attrs
116
118
if a != self .target and a not in exclude ]
117
119
if not self .values :
118
- self .values = list ( map ( unique , zip ( * self . examples )) )
120
+ self .update_values ( )
119
121
self .check_me ()
120
122
121
123
def check_me (self ):
@@ -150,6 +152,9 @@ def attrnum(self, attr):
150
152
else :
151
153
return attr
152
154
155
+ def update_values (self ):
156
+ self .values = list (map (unique , zip (* self .examples )))
157
+
153
158
def sanitize (self , example ):
154
159
"""Return a copy of example, with non-input attributes replaced by None."""
155
160
return [attr_i if i in self .inputs else None
@@ -166,6 +171,7 @@ def classes_to_numbers(self,classes=None):
166
171
def remove_examples (self ,value = "" ):
167
172
"""Remove examples that contain given value."""
168
173
self .examples = [x for x in self .examples if value not in x ]
174
+ self .update_values ()
169
175
170
176
def __repr__ (self ):
171
177
return '<DataSet({}): {:d} examples, {:d} attributes>' .format (
0 commit comments