Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit de9f358

Browse files
authored
Merge pull request tensorflow#4084 from XinyueZ/optimized/cookbook/regression/make_dataset
Fixed tensorflow#4083 and two points of optimizations
2 parents 579ef7d + 4cfb259 commit de9f358

File tree

5 files changed

+45
-80
lines changed

5 files changed

+45
-80
lines changed

samples/cookbook/regression/automobile_data.py

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -109,19 +109,19 @@ def load_data(y_name="price", train_fraction=0.7, seed=None):
109109

110110
return (x_train, y_train), (x_test, y_test)
111111

112-
def make_dataset(x, y=None):
113-
"""Create a slice Dataset from a pandas DataFrame and labels"""
114-
# TODO(markdaooust): simplify this after the 1.4 cut.
115-
# Convert the DataFrame to a dict
116-
x = dict(x)
117-
118-
# Convert the pd.Series to np.arrays
119-
for key in x:
120-
x[key] = np.array(x[key])
121112

122-
items = [x]
123-
if y is not None:
124-
items.append(np.array(y, dtype=np.float32))
113+
def make_dataset(batch_sz, x, y=None, shuffle=False, shuffle_buffer_size=1000):
114+
"""Create a slice Dataset from a pandas DataFrame and labels"""
125115

126-
# Create a Dataset of slices
127-
return tf.data.Dataset.from_tensor_slices(tuple(items))
116+
def input_fn():
117+
if y is not None:
118+
dataset = tf.data.Dataset.from_tensor_slices((dict(x), y))
119+
else:
120+
dataset = tf.data.Dataset.from_tensor_slices(dict(x))
121+
if shuffle:
122+
dataset = dataset.shuffle(shuffle_buffer_size).batch(batch_sz).repeat()
123+
else:
124+
dataset = dataset.batch(batch_sz)
125+
return dataset.make_one_shot_iterator().get_next()
126+
127+
return input_fn

samples/cookbook/regression/custom_regression.py

Lines changed: 9 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -31,11 +31,6 @@
3131
parser.add_argument('--price_norm_factor', default=1000., type=float,
3232
help='price normalization factor')
3333

34-
35-
def from_dataset(ds):
36-
return lambda: ds.make_one_shot_iterator().get_next()
37-
38-
3934
def my_dnn_regression_fn(features, labels, mode, params):
4035
"""A model function implementing DNN regression for a custom Estimator."""
4136

@@ -81,6 +76,10 @@ def my_dnn_regression_fn(features, labels, mode, params):
8176
# Calculate root mean squared error
8277
print(labels)
8378
print(predictions)
79+
80+
# Fixed for #4083
81+
predictions = tf.cast(predictions, tf.float64)
82+
8483
rmse = tf.metrics.root_mean_squared_error(labels, predictions)
8584

8685
# Add the rmse to the collection of evaluation metrics.
@@ -102,17 +101,11 @@ def main(argv):
102101
train_y /= args.price_norm_factor
103102
test_y /= args.price_norm_factor
104103

105-
# Build the training dataset.
106-
train = (
107-
automobile_data.make_dataset(train_x, train_y)
108-
# Shuffling with a buffer larger than the data set ensures
109-
# that the examples are well mixed.
110-
.shuffle(1000).batch(args.batch_size)
111-
# Repeat forever
112-
.repeat())
104+
# Provide the training input dataset.
105+
train_input_fn = automobile_data.make_dataset(args.batch_size, train_x, train_y, True, 1000)
113106

114107
# Build the validation dataset.
115-
test = automobile_data.make_dataset(test_x, test_y).batch(args.batch_size)
108+
test_input_fn = automobile_data.make_dataset(args.batch_size, test_x, test_y)
116109

117110
# The first way assigns a unique weight to each category. To do this you must
118111
# specify the category's vocabulary (values outside this specification will
@@ -151,10 +144,10 @@ def main(argv):
151144
})
152145

153146
# Train the model.
154-
model.train(input_fn=from_dataset(train), steps=args.train_steps)
147+
model.train(input_fn=train_input_fn, steps=args.train_steps)
155148

156149
# Evaluate how the model performs on data it has not yet seen.
157-
eval_result = model.evaluate(input_fn=from_dataset(test))
150+
eval_result = model.evaluate(input_fn=test_input_fn)
158151

159152
# Print the Root Mean Square Error (RMSE).
160153
print("\n" + 80 * "*")

samples/cookbook/regression/dnn_regression.py

Lines changed: 6 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -32,10 +32,6 @@
3232
help='price normalization factor')
3333

3434

35-
def from_dataset(ds):
36-
return lambda: ds.make_one_shot_iterator().get_next()
37-
38-
3935
def main(argv):
4036
"""Builds, trains, and evaluates the model."""
4137
args = parser.parse_args(argv[1:])
@@ -45,17 +41,11 @@ def main(argv):
4541
train_y /= args.price_norm_factor
4642
test_y /= args.price_norm_factor
4743

48-
# Build the training dataset.
49-
train = (
50-
automobile_data.make_dataset(train_x, train_y)
51-
# Shuffling with a buffer larger than the data set ensures
52-
# that the examples are well mixed.
53-
.shuffle(1000).batch(args.batch_size)
54-
# Repeat forever
55-
.repeat())
44+
# Provide the training input dataset.
45+
train_input_fn = automobile_data.make_dataset(args.batch_size, train_x, train_y, True, 1000)
5646

57-
# Build the validation dataset.
58-
test = automobile_data.make_dataset(test_x, test_y).batch(args.batch_size)
47+
# Provide the validation input dataset.
48+
test_input_fn = automobile_data.make_dataset(args.batch_size, test_x, test_y)
5949

6050
# Use the same categorical columns as in `linear_regression_categorical`
6151
body_style_vocab = ["hardtop", "wagon", "sedan", "hatchback", "convertible"]
@@ -84,10 +74,10 @@ def main(argv):
8474

8575
# Train the model.
8676
# By default, the Estimators log output every 100 steps.
87-
model.train(input_fn=from_dataset(train), steps=args.train_steps)
77+
model.train(input_fn=train_input_fn, steps=args.train_steps)
8878

8979
# Evaluate how the model performs on data it has not yet seen.
90-
eval_result = model.evaluate(input_fn=from_dataset(test))
80+
eval_result = model.evaluate(input_fn=test_input_fn)
9181

9282
# The evaluation returns a Python dictionary. The "average_loss" key holds the
9383
# Mean Squared Error (MSE).

samples/cookbook/regression/linear_regression.py

Lines changed: 10 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -33,10 +33,6 @@
3333
help='price normalization factor')
3434

3535

36-
def from_dataset(ds):
37-
return lambda: ds.make_one_shot_iterator().get_next()
38-
39-
4036
def main(argv):
4137
"""Builds, trains, and evaluates the model."""
4238
args = parser.parse_args(argv[1:])
@@ -46,17 +42,11 @@ def main(argv):
4642
train_y /= args.price_norm_factor
4743
test_y /= args.price_norm_factor
4844

49-
# Build the training dataset.
50-
train = (
51-
automobile_data.make_dataset(train_x, train_y)
52-
# Shuffling with a buffer larger than the data set ensures
53-
# that the examples are well mixed.
54-
.shuffle(1000).batch(args.batch_size)
55-
# Repeat forever
56-
.repeat())
45+
# Provide the training input dataset.
46+
train_input_fn = automobile_data.make_dataset(args.batch_size, train_x, train_y, True, 1000)
5747

58-
# Build the validation dataset.
59-
test = automobile_data.make_dataset(test_x, test_y).batch(args.batch_size)
48+
# Provide the validation input dataset.
49+
test_input_fn = automobile_data.make_dataset(args.batch_size, test_x, test_y)
6050

6151
feature_columns = [
6252
# "curb-weight" and "highway-mpg" are numeric columns.
@@ -69,10 +59,10 @@ def main(argv):
6959

7060
# Train the model.
7161
# By default, the Estimators log output every 100 steps.
72-
model.train(input_fn=from_dataset(train), steps=args.train_steps)
62+
model.train(input_fn=train_input_fn, steps=args.train_steps)
7363

7464
# Evaluate how the model performs on data it has not yet seen.
75-
eval_result = model.evaluate(input_fn=from_dataset(test))
65+
eval_result = model.evaluate(input_fn=test_input_fn)
7666

7767
# The evaluation returns a Python dictionary. The "average_loss" key holds the
7868
# Mean Squared Error (MSE).
@@ -88,8 +78,10 @@ def main(argv):
8878
"curb-weight": np.array([2000, 3000]),
8979
"highway-mpg": np.array([30, 40])
9080
}
91-
predict = automobile_data.make_dataset(input_dict).batch(1)
92-
predict_results = model.predict(input_fn=from_dataset(predict))
81+
82+
# Provide the predict input dataset.
83+
predict_input_fn = automobile_data.make_dataset(1, input_dict)
84+
predict_results = model.predict(input_fn=predict_input_fn)
9385

9486
# Print the prediction results.
9587
print("\nPrediction results:")

samples/cookbook/regression/linear_regression_categorical.py

Lines changed: 6 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -32,10 +32,6 @@
3232
help='price normalization factor')
3333

3434

35-
def from_dataset(ds):
36-
return lambda: ds.make_one_shot_iterator().get_next()
37-
38-
3935
def main(argv):
4036
"""Builds, trains, and evaluates the model."""
4137
args = parser.parse_args(argv[1:])
@@ -45,17 +41,11 @@ def main(argv):
4541
train_y /= args.price_norm_factor
4642
test_y /= args.price_norm_factor
4743

48-
# Build the training dataset.
49-
train = (
50-
automobile_data.make_dataset(train_x, train_y)
51-
# Shuffling with a buffer larger than the data set ensures
52-
# that the examples are well mixed.
53-
.shuffle(1000).batch(args.batch_size)
54-
# Repeat forever
55-
.repeat())
44+
# Provide the training input dataset.
45+
train_input_fn = automobile_data.make_dataset(args.batch_size, train_x, train_y, True, 1000)
5646

57-
# Build the validation dataset.
58-
test = automobile_data.make_dataset(test_x, test_y).batch(args.batch_size)
47+
# Provide the validation input dataset.
48+
test_input_fn = automobile_data.make_dataset(args.batch_size, test_x, test_y)
5949

6050
# The following code demonstrates two of the ways that `feature_columns` can
6151
# be used to build a model with categorical inputs.
@@ -93,10 +83,10 @@ def main(argv):
9383

9484
# Train the model.
9585
# By default, the Estimators log output every 100 steps.
96-
model.train(input_fn=from_dataset(train), steps=args.train_steps)
86+
model.train(input_fn=train_input_fn, steps=args.train_steps)
9787

9888
# Evaluate how the model performs on data it has not yet seen.
99-
eval_result = model.evaluate(input_fn=from_dataset(test))
89+
eval_result = model.evaluate(input_fn=test_input_fn)
10090

10191
# The evaluation returns a Python dictionary. The "average_loss" key holds the
10292
# Mean Squared Error (MSE).

0 commit comments

Comments
 (0)