Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 7babedc

Browse files
reedwmTaylor Robie
authored and
Taylor Robie
committed
Fix spurious "did not start correctly" error. (tensorflow#5252)
* Fix spurious "did not start correctly" error. The error "Generation subprocess did not start correctly" would occur if the async process started up after the main process checked for the subproc_alive file. * Add error message
1 parent 5856878 commit 7babedc

File tree

2 files changed

+12
-7
lines changed

2 files changed

+12
-7
lines changed

official/recommendation/data_preprocessing.py

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -452,6 +452,14 @@ def instantiate_pipeline(dataset, data_dir, batch_size, eval_batch_size,
452452
atexit.register(tf.gfile.DeleteRecursively,
453453
ncf_dataset.cache_paths.cache_root)
454454

455+
for _ in range(15):
456+
if tf.gfile.Exists(ncf_dataset.cache_paths.subproc_alive):
457+
break
458+
time.sleep(1) # allow `alive` file to be written
459+
if not tf.gfile.Exists(ncf_dataset.cache_paths.subproc_alive):
460+
raise ValueError("Generation subprocess did not start correctly. Data will "
461+
"not be available; exiting to avoid waiting forever.")
462+
455463
return ncf_dataset
456464

457465

@@ -495,8 +503,10 @@ def make_train_input_fn(ncf_dataset):
495503
"""Construct training input_fn for the current epoch."""
496504

497505
if not tf.gfile.Exists(ncf_dataset.cache_paths.subproc_alive):
498-
raise ValueError("Generation subprocess did not start correctly. Data will "
499-
"not be available; exiting to avoid waiting forever.")
506+
# The generation subprocess must have been alive at some point, because we
507+
# earlier checked that the subproc_alive file existed.
508+
raise ValueError("Generation subprocess unexpectedly died. Data will not "
509+
"be available; exiting to avoid waiting forever.")
500510

501511
train_epoch_dir = ncf_dataset.cache_paths.train_epoch_dir
502512
while not tf.gfile.Exists(train_epoch_dir):

official/recommendation/data_test.py

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -115,11 +115,6 @@ def test_end_to_end(self):
115115
batch_size=BATCH_SIZE, eval_batch_size=BATCH_SIZE, num_data_readers=2,
116116
num_neg=NUM_NEG)
117117

118-
for _ in range(30):
119-
if tf.gfile.Exists(ncf_dataset.cache_paths.subproc_alive):
120-
break
121-
time.sleep(1) # allow `alive` file to be written
122-
123118
g = tf.Graph()
124119
with g.as_default():
125120
input_fn, record_dir, batch_count = \

0 commit comments

Comments
 (0)