@@ -143,10 +143,10 @@ push_wal_file(const char *from_path, const char *to_path, bool is_compress,
143
143
char to_path_temp [MAXPGPATH ];
144
144
int errno_temp ;
145
145
/* partial handling */
146
- int partial_timeout = 0 ;
147
- int partial_size = 0 ;
148
146
struct stat st ;
149
- bool partial_exists = false;
147
+ int partial_file_timeout = 0 ;
148
+ int partial_file_size = 0 ;
149
+ bool partial_file_exists = false;
150
150
151
151
#ifdef HAVE_LIBZ
152
152
char gz_to_path [MAXPGPATH ];
@@ -186,7 +186,7 @@ push_wal_file(const char *from_path, const char *to_path, bool is_compress,
186
186
gz_out = fio_gzopen (to_path_temp , PG_BINARY_W , instance_config .compress_level , FIO_BACKUP_HOST );
187
187
if (gz_out == NULL )
188
188
{
189
- partial_exists = true;
189
+ partial_file_exists = true;
190
190
elog (WARNING , "Cannot open destination temporary WAL file \"%s\": %s" ,
191
191
to_path_temp , strerror (errno ));
192
192
}
@@ -199,63 +199,65 @@ push_wal_file(const char *from_path, const char *to_path, bool is_compress,
199
199
out = fio_open (to_path_temp , O_RDWR | O_CREAT | O_EXCL | PG_BINARY , FIO_BACKUP_HOST );
200
200
if (out < 0 )
201
201
{
202
- partial_exists = true;
202
+ partial_file_exists = true;
203
203
elog (WARNING , "Cannot open destination temporary WAL file \"%s\": %s" ,
204
204
to_path_temp , strerror (errno ));
205
205
}
206
206
}
207
207
208
- /* sleep a second, check if .partial file size is changing, if not, then goto p1
209
- * Algorihtm is not pretty however we do not expect conflict for '.partial' file
210
- * to be frequent occurrence.
211
- * The main goal is to protect against failed archive-push which left behind
212
- * orphan '.partial' file.
208
+ /* Partial file is already exists, it could have happened due to failed archive-push,
209
+ * in this case partial file can be discarded, or due to concurrent archiving.
210
+ *
211
+ * Our main goal here is to try to handle partial file to prevent stalling of
212
+ * continious archiving.
213
+ * To ensure that ecncountered partial file is actually a stale "orphaned" file,
214
+ * check its size every second.
215
+ * If the size has not changed in PARTIAL_WAL_TIMER seconds, we can consider
216
+ * the file stale and reuse it.
217
+ * If file size is changing, it means that another archiver works at the same
218
+ * directory with the same files. Such partial files cannot be reused.
213
219
*/
214
- if (partial_exists )
220
+ if (partial_file_exists )
215
221
{
216
- while (1 )
222
+ while (partial_file_timeout < PARTIAL_WAL_TIMER )
217
223
{
218
- /* exit from loop */
219
- if (partial_timeout > 10 )
220
- {
221
- /* For 10 second the file didn`t changed its size, so consider it stale and reuse it */
222
- elog (WARNING , "Reusing stale destination temporary WAL file \"%s\"" , to_path_temp );
223
- fio_unlink (to_path_temp , FIO_BACKUP_HOST );
224
-
225
- #ifdef HAVE_LIBZ
226
- if (is_compress )
227
- {
228
- gz_out = fio_gzopen (to_path_temp , PG_BINARY_W , instance_config .compress_level , FIO_BACKUP_HOST );
229
- if (gz_out == NULL )
230
- elog (ERROR , "Cannot open destination temporary WAL file \"%s\": %s" ,
231
- to_path_temp , strerror (errno ));
232
- }
233
- else
234
- #endif
235
- {
236
- out = fio_open (to_path_temp , O_RDWR | O_CREAT | O_EXCL | PG_BINARY , FIO_BACKUP_HOST );
237
- if (out < 0 )
238
- elog (ERROR , "Cannot open destination temporary WAL file \"%s\": %s" ,
239
- to_path_temp , strerror (errno ));
240
- }
241
- break ;
242
- }
243
224
244
225
if (fio_stat (to_path_temp , & st , false, FIO_BACKUP_HOST ) < 0 )
245
226
/* It is ok if partial is gone, we can safely error out */
246
227
elog (ERROR , "Cannot stat destination temporary WAL file \"%s\": %s" , to_path_temp ,
247
228
strerror (errno ));
248
229
249
230
/* first round */
250
- if (!partial_timeout )
251
- partial_size = st .st_size ;
231
+ if (!partial_file_timeout )
232
+ partial_file_size = st .st_size ;
252
233
253
234
/* file size is changing */
254
- if (st .st_size > partial_size )
235
+ if (st .st_size > partial_file_size )
255
236
elog (ERROR , "Destination temporary WAL file \"%s\" is not stale" , to_path_temp );
256
237
257
238
sleep (1 );
258
- partial_timeout ++ ;
239
+ partial_file_timeout ++ ;
240
+ }
241
+
242
+ /* Partial segment is considered stale, so reuse it */
243
+ elog (WARNING , "Reusing stale destination temporary WAL file \"%s\"" , to_path_temp );
244
+ fio_unlink (to_path_temp , FIO_BACKUP_HOST );
245
+
246
+ #ifdef HAVE_LIBZ
247
+ if (is_compress )
248
+ {
249
+ gz_out = fio_gzopen (to_path_temp , PG_BINARY_W , instance_config .compress_level , FIO_BACKUP_HOST );
250
+ if (gz_out == NULL )
251
+ elog (ERROR , "Cannot open destination temporary WAL file \"%s\": %s" ,
252
+ to_path_temp , strerror (errno ));
253
+ }
254
+ else
255
+ #endif
256
+ {
257
+ out = fio_open (to_path_temp , O_RDWR | O_CREAT | O_EXCL | PG_BINARY , FIO_BACKUP_HOST );
258
+ if (out < 0 )
259
+ elog (ERROR , "Cannot open destination temporary WAL file \"%s\": %s" ,
260
+ to_path_temp , strerror (errno ));
259
261
}
260
262
}
261
263
0 commit comments