@@ -443,11 +443,12 @@ free_cps_slot(int code, Datum arg)
443
443
void
444
444
bgw_main_concurrent_part (Datum main_arg )
445
445
{
446
- int rows ;
446
+ ConcurrentPartSlot * part_slot ;
447
+ char * sql = NULL ;
448
+ int64 rows ;
447
449
bool failed ;
448
450
int failures_count = 0 ;
449
- char * sql = NULL ;
450
- ConcurrentPartSlot * part_slot ;
451
+ LOCKMODE lockmode = RowExclusiveLock ;
451
452
452
453
/* Update concurrent part slot */
453
454
part_slot = & concurrent_part_slots [DatumGetInt32 (main_arg )];
@@ -479,12 +480,14 @@ bgw_main_concurrent_part(Datum main_arg)
479
480
/* Do the job */
480
481
do
481
482
{
482
- MemoryContext old_mcxt ;
483
+ MemoryContext old_mcxt ;
483
484
484
485
Oid types [2 ] = { OIDOID , INT4OID };
485
486
Datum vals [2 ] = { part_slot -> relid , part_slot -> batch_size };
486
487
bool nulls [2 ] = { false, false };
487
488
489
+ bool rel_locked = false;
490
+
488
491
/* Reset loop variables */
489
492
failed = false;
490
493
rows = 0 ;
@@ -520,66 +523,89 @@ bgw_main_concurrent_part(Datum main_arg)
520
523
/* Exec ret = _partition_data_concurrent() */
521
524
PG_TRY ();
522
525
{
523
- /* Make sure that relation exists and has partitions */
524
- if (SearchSysCacheExists1 (RELOID , ObjectIdGetDatum (part_slot -> relid )) &&
525
- get_pathman_relation_info (part_slot -> relid ) != NULL )
526
- {
527
- int ret ;
528
- bool isnull ;
526
+ int ret ;
527
+ bool isnull ;
529
528
530
- ret = SPI_execute_with_args ( sql , 2 , types , vals , nulls , false, 0 );
531
- if (ret == SPI_OK_SELECT )
532
- {
533
- TupleDesc tupdesc = SPI_tuptable -> tupdesc ;
534
- HeapTuple tuple = SPI_tuptable -> vals [ 0 ];
529
+ /* Lock relation for DELETE and INSERT */
530
+ if (! ConditionalLockRelationOid ( part_slot -> relid , lockmode ) )
531
+ {
532
+ elog ( ERROR , "could not take lock on relation %u" , part_slot -> relid ) ;
533
+ }
535
534
536
- Assert (SPI_processed == 1 ); /* there should be 1 result at most */
535
+ /* Great, now relation is locked */
536
+ rel_locked = true;
537
537
538
- rows = DatumGetInt32 (SPI_getbinval (tuple , tupdesc , 1 , & isnull ));
538
+ /* Make sure that relation exists */
539
+ if (!SearchSysCacheExists1 (RELOID , ObjectIdGetDatum (part_slot -> relid )))
540
+ {
541
+ /* Exit after we raise ERROR */
542
+ failures_count = PART_WORKER_MAX_ATTEMPTS ;
539
543
540
- Assert (!isnull ); /* ... and ofc it must not be NULL */
541
- }
544
+ elog (ERROR , "relation %u does not exist" , part_slot -> relid );
542
545
}
543
- /* Otherwise it's time to exit */
544
- else
546
+
547
+ /* Make sure that relation has partitions */
548
+ if (get_pathman_relation_info (part_slot -> relid ) == NULL )
545
549
{
550
+ /* Exit after we raise ERROR */
546
551
failures_count = PART_WORKER_MAX_ATTEMPTS ;
547
552
548
- elog (LOG , "relation \"%u\" is not partitioned (or does not exist)" ,
549
- part_slot -> relid );
553
+ elog (ERROR , "relation \"%s\" is not partitioned" ,
554
+ get_rel_name (part_slot -> relid ));
555
+ }
556
+
557
+ /* Call concurrent partitioning function */
558
+ ret = SPI_execute_with_args (sql , 2 , types , vals , nulls , false, 0 );
559
+ if (ret == SPI_OK_SELECT )
560
+ {
561
+ TupleDesc tupdesc = SPI_tuptable -> tupdesc ;
562
+ HeapTuple tuple = SPI_tuptable -> vals [0 ];
563
+
564
+ /* There should be 1 result at most */
565
+ Assert (SPI_processed == 1 );
566
+
567
+ /* Extract number of processed rows */
568
+ rows = DatumGetInt64 (SPI_getbinval (tuple , tupdesc , 1 , & isnull ));
569
+ Assert (!isnull ); /* ... and ofc it must not be NULL */
550
570
}
571
+ /* Else raise generic error */
572
+ else elog (ERROR , "partitioning function returned %u" , ret );
573
+
574
+ /* Finally, unlock our partitioned table */
575
+ UnlockRelationOid (part_slot -> relid , lockmode );
551
576
}
552
577
PG_CATCH ();
553
578
{
554
579
/*
555
580
* The most common exception we can catch here is a deadlock with
556
581
* concurrent user queries. Check that attempts count doesn't exceed
557
- * some reasonable value
582
+ * some reasonable value.
558
583
*/
559
- ErrorData * error ;
560
- char * sleep_time_str ;
584
+ ErrorData * error ;
585
+
586
+ /* Unlock relation if we caught ERROR too early */
587
+ if (rel_locked )
588
+ UnlockRelationOid (part_slot -> relid , lockmode );
589
+
590
+ /* Increase number of failures and set 'failed' status */
591
+ failures_count ++ ;
592
+ failed = true;
561
593
562
594
/* Switch to the original context & copy edata */
563
595
MemoryContextSwitchTo (old_mcxt );
564
596
error = CopyErrorData ();
565
597
FlushErrorState ();
566
598
567
599
/* Print messsage for this BGWorker to server log */
568
- sleep_time_str = datum_to_cstring (Float8GetDatum (part_slot -> sleep_time ),
569
- FLOAT8OID );
570
- failures_count ++ ;
571
600
ereport (LOG ,
572
601
(errmsg ("%s: %s" , concurrent_part_bgw , error -> message ),
573
- errdetail ("attempt: %d/%d, sleep time: %s " ,
602
+ errdetail ("attempt: %d/%d, sleep time: %.2f " ,
574
603
failures_count ,
575
604
PART_WORKER_MAX_ATTEMPTS ,
576
- sleep_time_str )));
577
- pfree (sleep_time_str ); /* free the time string */
605
+ (float ) part_slot -> sleep_time )));
578
606
607
+ /* Finally, free error data */
579
608
FreeErrorData (error );
580
-
581
- /* Set 'failed' flag */
582
- failed = true;
583
609
}
584
610
PG_END_TRY ();
585
611
@@ -606,9 +632,10 @@ bgw_main_concurrent_part(Datum main_arg)
606
632
/* Failed this time, wait */
607
633
else if (failed )
608
634
{
609
- /* Abort transaction and sleep for a second */
635
+ /* Abort transaction */
610
636
AbortCurrentTransaction ();
611
637
638
+ /* Sleep for a specified amount of time (default 1s) */
612
639
DirectFunctionCall1 (pg_sleep , Float8GetDatum (part_slot -> sleep_time ));
613
640
}
614
641
@@ -626,8 +653,10 @@ bgw_main_concurrent_part(Datum main_arg)
626
653
627
654
#ifdef USE_ASSERT_CHECKING
628
655
/* Report debug message */
629
- elog (DEBUG1 , "%s: relocated %d rows, total: " UINT64_FORMAT " [%u]" ,
630
- concurrent_part_bgw , rows , part_slot -> total_rows , MyProcPid );
656
+ elog (DEBUG1 , "%s: "
657
+ "relocated" INT64_FORMAT "rows, "
658
+ "total: " INT64_FORMAT ,
659
+ concurrent_part_bgw , rows , part_slot -> total_rows );
631
660
#endif
632
661
}
633
662
@@ -636,9 +665,6 @@ bgw_main_concurrent_part(Datum main_arg)
636
665
break ;
637
666
}
638
667
while (rows > 0 || failed ); /* do while there's still rows to be relocated */
639
-
640
- /* Reclaim the resources */
641
- pfree (sql );
642
668
}
643
669
644
670
@@ -824,26 +850,33 @@ show_concurrent_part_tasks_internal(PG_FUNCTION_ARGS)
824
850
/* Iterate through worker slots */
825
851
for (i = userctx -> cur_idx ; i < PART_WORKER_SLOTS ; i ++ )
826
852
{
827
- ConcurrentPartSlot * cur_slot = & concurrent_part_slots [i ];
853
+ ConcurrentPartSlot * cur_slot = & concurrent_part_slots [i ],
854
+ slot_copy ;
828
855
HeapTuple htup = NULL ;
829
856
830
- HOLD_INTERRUPTS ();
857
+ /* Copy slot to process local memory */
831
858
SpinLockAcquire (& cur_slot -> mutex );
859
+ memcpy (& slot_copy , cur_slot , sizeof (ConcurrentPartSlot ));
860
+ SpinLockRelease (& cur_slot -> mutex );
832
861
833
- if (cur_slot -> worker_status != CPS_FREE )
862
+ if (slot_copy . worker_status != CPS_FREE )
834
863
{
835
864
Datum values [Natts_pathman_cp_tasks ];
836
865
bool isnull [Natts_pathman_cp_tasks ] = { 0 };
837
866
838
- values [Anum_pathman_cp_tasks_userid - 1 ] = cur_slot -> userid ;
839
- values [Anum_pathman_cp_tasks_pid - 1 ] = cur_slot -> pid ;
840
- values [Anum_pathman_cp_tasks_dbid - 1 ] = cur_slot -> dbid ;
841
- values [Anum_pathman_cp_tasks_relid - 1 ] = cur_slot -> relid ;
842
- values [Anum_pathman_cp_tasks_processed - 1 ] = cur_slot -> total_rows ;
867
+ values [Anum_pathman_cp_tasks_userid - 1 ] = slot_copy .userid ;
868
+ values [Anum_pathman_cp_tasks_pid - 1 ] = slot_copy .pid ;
869
+ values [Anum_pathman_cp_tasks_dbid - 1 ] = slot_copy .dbid ;
870
+ values [Anum_pathman_cp_tasks_relid - 1 ] = slot_copy .relid ;
871
+
872
+ /* Record processed rows */
873
+ values [Anum_pathman_cp_tasks_processed - 1 ] =
874
+ /* FIXME: use Int64GetDatum() in release 1.5 */
875
+ Int32GetDatum ((int32 ) slot_copy .total_rows );
843
876
844
877
/* Now build a status string */
845
878
values [Anum_pathman_cp_tasks_status - 1 ] =
846
- CStringGetTextDatum (cps_print_status (cur_slot -> worker_status ));
879
+ CStringGetTextDatum (cps_print_status (slot_copy . worker_status ));
847
880
848
881
/* Form output tuple */
849
882
htup = heap_form_tuple (funcctx -> tuple_desc , values , isnull );
@@ -852,9 +885,6 @@ show_concurrent_part_tasks_internal(PG_FUNCTION_ARGS)
852
885
userctx -> cur_idx = i + 1 ;
853
886
}
854
887
855
- SpinLockRelease (& cur_slot -> mutex );
856
- RESUME_INTERRUPTS ();
857
-
858
888
/* Return tuple if needed */
859
889
if (htup )
860
890
SRF_RETURN_NEXT (funcctx , HeapTupleGetDatum (htup ));
0 commit comments