Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Hybrid suspend deadlock taking the marshal mutex #9407

@lambdageek

Description

@lambdageek

Seen on Hybrid suspend CI https://jenkins.mono-project.com/job/test-mono-pull-request-hybrid-suspend/2170/parsed_console/log.html

  Id   Target Id         Frame 
* 1    Thread 0x7f88e70fd740 (LWP 59080) "mono" 0x00007f88e60383d6 in sigsuspend () from /lib/x86_64-linux-gnu/libc.so.6
  2    Thread 0x7f88e5bff700 (LWP 59127) "SGen worker" 0x00007f88e65cbb3a in waitpid () from /lib/x86_64-linux-gnu/libpthread.so.0
  3    Thread 0x7f88e33e1700 (LWP 59136) "Finalizer" 0x00007f88e65ca536 in do_futex_wait.constprop () from /lib/x86_64-linux-gnu/libpthread.so.0
  4    Thread 0x7f88e1d4b700 (LWP 59384) "mono" 0x00007f88e60383d6 in sigsuspend () from /lib/x86_64-linux-gnu/libc.so.6
  5    Thread 0x7f88e1b4a700 (LWP 59385) "Thread Pool Wor" 0x00007f88e65caf5c in __lll_lock_wait () from /lib/x86_64-linux-gnu/libpthread.so.0
  6    Thread 0x7f88e1949700 (LWP 59386) "Thread Pool Wor" 0x00007f88e60383d6 in sigsuspend () from /lib/x86_64-linux-gnu/libc.so.6
  7    Thread 0x7f88e1744700 (LWP 62887) "Timer-Scheduler" 0x00007f88e60383d6 in sigsuspend () from /lib/x86_64-linux-gnu/libc.so.6
  8    Thread 0x7f88cbd6d700 (LWP 62888) "Domain unloader" 0x00007f88e65ca536 in do_futex_wait.constprop () from /lib/x86_64-linux-gnu/libpthread.so.0

Thread 8 (Thread 0x7f88cbd6d700 (LWP 62888)):
#0  0x00007f88e65ca536 in do_futex_wait.constprop () from /lib/x86_64-linux-gnu/libpthread.so.0
#1  0x00007f88e65ca5e4 in __new_sem_wait_slow.constprop.0 () from /lib/x86_64-linux-gnu/libpthread.so.0
#2  0x000056521fa5dd31 in mono_os_sem_wait (flags=<optimized out>, sem=<optimized out>) at ../../mono/utils/mono-os-semaphore.h:209
#3  mono_os_sem_timedwait (sem=0x56521fdc1180 <suspend_semaphore>, flags=MONO_SEM_FLAGS_NONE, timeout_ms=4294967295) at ../../mono/utils/mono-os-semaphore.h:242
#4  mono_threads_wait_pending_operations () at mono-threads.c:305
#5  0x000056521fa117c7 in sgen_unified_suspend_stop_world () at sgen-stw.c:275
#6  sgen_client_stop_world (generation=generation@entry=0, serial_collection=serial_collection@entry=0) at sgen-stw.c:124
#7  0x000056521fa215fc in sgen_stop_world (generation=generation@entry=0, serial_collection=serial_collection@entry=0) at sgen-gc.c:3821
#8  0x000056521f9f34f1 in mono_gc_clear_domain (domain=domain@entry=0x56522092d800) at sgen-mono.c:829
#9  0x000056521f902170 in mono_domain_free (domain=domain@entry=0x56522092d800, force=force@entry=0) at domain.c:1124
#10 0x000056521f8fb4a8 in unload_thread_main (arg=arg@entry=0x56522308e160) at appdomain.c:2806
#11 0x000056521f99ce92 in start_wrapper_internal (stack_ptr=<optimized out>, start_info=0x0) at threads.c:1132
#12 start_wrapper (data=0x56522308dfc0) at threads.c:1192
#13 0x00007f88e65c2494 in start_thread () from /lib/x86_64-linux-gnu/libpthread.so.0
#14 0x00007f88e60edacf in clone () from /lib/x86_64-linux-gnu/libc.so.6

Thread 7 (Thread 0x7f88e1744700 (LWP 62887)):
#0  0x00007f88e60383d6 in sigsuspend () from /lib/x86_64-linux-gnu/libc.so.6
#1  0x000056521fa615f3 in suspend_signal_handler (_dummy=<optimized out>, info=<optimized out>, context=0x7f88e1742bc0) at mono-threads-posix-signals.c:199
#2  <signal handler called>
#3  0x00007f88e65c815d in pthread_cond_wait@@GLIBC_2.3.2 () from /lib/x86_64-linux-gnu/libpthread.so.0
#4  0x000056521fa562c5 in mono_os_cond_wait (mutex=0x5652203ec748, cond=0x5652203ec770) at mono-os-mutex.h:173
#5  mono_os_cond_timedwait (cond=cond@entry=0x5652203ec770, mutex=mutex@entry=0x5652203ec748, timeout_ms=timeout_ms@entry=4294967295) at mono-os-mutex.c:32
#6  0x000056521f9b3655 in mono_coop_cond_timedwait (timeout_ms=4294967295, mutex=0x5652203ec748, cond=0x5652203ec770) at ../../mono/utils/mono-coop-mutex.h:102
#7  mono_w32handle_timedwait_signal_naked (alerted=0x7f88e1743220, poll=0, timeout=4294967295, mutex=0x5652203ec748, cond=0x5652203ec770) at w32handle.c:641
#8  mono_w32handle_timedwait_signal_handle (handle_data=0x5652203ec738, timeout=timeout@entry=4294967295, alerted=alerted@entry=0x7f88e1743220, poll=0) at w32handle.c:756
#9  0x000056521f9b495f in mono_w32handle_wait_one (handle=<optimized out>, timeout=timeout@entry=4294967295, alertable=alertable@entry=1) at w32handle.c:864
#10 0x000056521f9b50a7 in mono_w32handle_wait_multiple (handles=handles@entry=0x7f88e17439c0, nhandles=nhandles@entry=1, waitall=waitall@entry=0, timeout=timeout@entry=4294967295, alertable=alertable@entry=1) at w32handle.c:912
#11 0x000056521f99aa73 in ves_icall_System_Threading_WaitHandle_Wait_internal (handles=0x7f88e17439c0, numhandles=<optimized out>, waitall=<optimized out>, timeout=-1, error=0x7f88e17438e0) at threads.c:2104
#12 0x000000004125f6b5 in ?? ()
#13 0x00007f88e5e9fe10 in ?? ()
#14 0x00007f88e1743ab0 in ?? ()
#15 0xffffffffffffffff in ?? ()
#16 0x0000000000000000 in ?? ()

Thread 6 (Thread 0x7f88e1949700 (LWP 59386)):
#0  0x00007f88e60383d6 in sigsuspend () from /lib/x86_64-linux-gnu/libc.so.6
#1  0x000056521fa615f3 in suspend_signal_handler (_dummy=<optimized out>, info=<optimized out>, context=0x7f88e1948740) at mono-threads-posix-signals.c:199
#2  <signal handler called>
#3  0x00007f88e65ca700 in do_futex_wait () from /lib/x86_64-linux-gnu/libpthread.so.0
#4  0x00007f88e65ca7cf in __new_sem_wait_slow () from /lib/x86_64-linux-gnu/libpthread.so.0
#5  0x00007f88e65ca882 in sem_timedwait () from /lib/x86_64-linux-gnu/libpthread.so.0
#6  0x000056521f9fa412 in mono_os_sem_timedwait (flags=MONO_SEM_FLAGS_ALERTABLE, timeout_ms=<optimized out>, sem=0x56521fdb2108 <worker+72>) at ../../mono/utils/mono-os-semaphore.h:258
#7  mono_coop_sem_timedwait (sem=0x56521fdb2108 <worker+72>, flags=MONO_SEM_FLAGS_ALERTABLE, timeout_ms=<optimized out>) at ../../mono/utils/mono-coop-semaphore.h:57
#8  worker_park () at threadpool-worker-default.c:391
#9  worker_thread (unused=unused@entry=0x0) at threadpool-worker-default.c:490
#10 0x000056521f99ce92 in start_wrapper_internal (stack_ptr=<optimized out>, start_info=0x0) at threads.c:1132
#11 start_wrapper (data=0x7f88d8004000) at threads.c:1192
#12 0x00007f88e65c2494 in start_thread () from /lib/x86_64-linux-gnu/libpthread.so.0
#13 0x00007f88e60edacf in clone () from /lib/x86_64-linux-gnu/libc.so.6

Thread 5 (Thread 0x7f88e1b4a700 (LWP 59385)):
#0  0x00007f88e65caf5c in __lll_lock_wait () from /lib/x86_64-linux-gnu/libpthread.so.0
#1  0x00007f88e65c4c06 in pthread_mutex_lock () from /lib/x86_64-linux-gnu/libpthread.so.0
#2  0x000056521f94b45d in mono_os_mutex_lock (mutex=0x56521fdae800 <marshal_mutex>) at ../../mono/utils/mono-os-mutex.h:99
#3  mono_marshal_lock_internal () at marshal.c:302
#4  0x000056521f98f232 in mono_marshal_remoting_find_in_cache (wrapper_type=9, method=0x56522307a910) at remoting.c:317
#5  mono_marshal_get_remoting_invoke_with_check (method=method@entry=0x56522307a910, error=error@entry=0x7f88e1b494e0) at remoting.c:1363
#6  0x000056521f819356 in decode_method_ref_with_target (module=module@entry=0x5652204025c0, ref=ref@entry=0x7f88e1b494c0, target=target@entry=0x0, buf=<optimized out>, endbuf=endbuf@entry=0x7f88e1b494b8, error=error@entry=0x7f88e1b494e0) at aot-runtime.c:948
#7  0x000056521f81df3f in decode_method_ref (error=0x7f88e1b494e0, endbuf=0x7f88e1b494b8, buf=<optimized out>, ref=0x7f88e1b494c0, module=0x5652204025c0) at aot-runtime.c:1399
#8  decode_patch (aot_module=aot_module@entry=0x5652204025c0, mp=mp@entry=0x7f88d80cb890, ji=ji@entry=0x7f88e1b49610, buf=<optimized out>, endbuf=endbuf@entry=0x7f88e1b49608) at aot-runtime.c:3631
#9  0x000056521f822126 in mono_aot_plt_resolve (aot_module=aot_module@entry=0x5652204025c0, plt_info_offset=<optimized out>, code=code@entry=0x7f88e381e8c3 <System_Threading_Tasks_Task_DelayPromise_Complete+227> "M\205\377\017\204\027", error=error@entry=0x7f88e1b49670) at aot-runtime.c:4970
#10 0x000056521f82f0e3 in mono_aot_plt_trampoline (regs=0x7f88e1b49728, code=0x7f88e381e8c3 <System_Threading_Tasks_Task_DelayPromise_Complete+227> "M\205\377\017\204\027", aot_module=0x5652204025c0 "@\020@ RV", tramp=<optimized out>) at mini-trampolines.c:1101
#11 0x0000000040b9ed93 in ?? ()
#12 0x00007f88d8000ff5 in ?? ()
#13 0x0000000000000000 in ?? ()

Thread 4 (Thread 0x7f88e1d4b700 (LWP 59384)):
#0  0x00007f88e60383d6 in sigsuspend () from /lib/x86_64-linux-gnu/libc.so.6
#1  0x000056521fa615f3 in suspend_signal_handler (_dummy=<optimized out>, info=<optimized out>, context=0x7f88e1d4a700) at mono-threads-posix-signals.c:199
#2  <signal handler called>
#3  0x00007f88e65c8508 in pthread_cond_timedwait@@GLIBC_2.3.2 () from /lib/x86_64-linux-gnu/libpthread.so.0
#4  0x000056521fa5627b in mono_os_cond_timedwait (cond=cond@entry=0x56521fdc10c0 <sleep_cond>, mutex=mutex@entry=0x56521fdc1100 <sleep_mutex>, timeout_ms=timeout_ms@entry=500) at mono-os-mutex.c:75
#5  0x000056521fa5f9f2 in mono_coop_cond_timedwait (cond=0x56521fdc10c0 <sleep_cond>, mutex=0x56521fdc1100 <sleep_mutex>, timeout_ms=500) at ../../mono/utils/mono-coop-mutex.h:102
#6  sleep_interruptable (alerted=0x7f88e1d4adfc, ms=500) at mono-threads.c:1394
#7  mono_thread_info_sleep (ms=ms@entry=500, alerted=alerted@entry=0x7f88e1d4adfc) at mono-threads.c:1426
#8  0x000056521f9faea2 in monitor_thread (unused=unused@entry=0x0) at threadpool-worker-default.c:708
#9  0x000056521f99ce92 in start_wrapper_internal (stack_ptr=<optimized out>, start_info=0x0) at threads.c:1132
#10 start_wrapper (data=0x565220e084d0) at threads.c:1192
#11 0x00007f88e65c2494 in start_thread () from /lib/x86_64-linux-gnu/libpthread.so.0
#12 0x00007f88e60edacf in clone () from /lib/x86_64-linux-gnu/libc.so.6

Thread 3 (Thread 0x7f88e33e1700 (LWP 59136)):
#0  0x00007f88e65ca536 in do_futex_wait.constprop () from /lib/x86_64-linux-gnu/libpthread.so.0
#1  0x00007f88e65ca5e4 in __new_sem_wait_slow.constprop.0 () from /lib/x86_64-linux-gnu/libpthread.so.0
#2  0x000056521fa5d262 in mono_os_sem_wait (flags=MONO_SEM_FLAGS_NONE, sem=0x7f88dc000920) at ../../mono/utils/mono-os-semaphore.h:209
#3  mono_thread_info_wait_for_resume (info=info@entry=0x7f88dc0008c0) at mono-threads.c:220
#4  0x000056521fa61e20 in mono_threads_state_poll_with_info (info=0x7f88dc0008c0) at mono-threads-coop.c:145
#5  0x000056521fa621ea in mono_threads_enter_gc_unsafe_region_unbalanced_with_info (info=<optimized out>, stackdata=stackdata@entry=0x7f88e33e0be0) at mono-threads-coop.c:416
#6  0x000056521fa62206 in mono_threads_enter_gc_unsafe_region_with_info (info=<optimized out>, stackdata=stackdata@entry=0x7f88e33e0be0) at mono-threads-coop.c:373
#7  0x000056521fa62242 in mono_threads_enter_gc_unsafe_region_internal (stackdata=stackdata@entry=0x7f88e33e0be0) at mono-threads-coop.c:354
#8  0x000056521f94670c in mono_method_signature (m=m@entry=0x565222f9d0b0) at loader.c:2580
#9  0x000056521f9549a8 in mono_marshal_free_dynamic_wrappers (method=method@entry=0x565222f9d0b0) at marshal.c:6206
#10 0x000056521f946260 in mono_free_method (method=0x565222f9d0b0) at loader.c:1966
#11 0x000056521f9762ca in mono_runtime_free_method (domain=<optimized out>, method=<optimized out>) at object.c:780
#12 0x000056521f9bfaed in free_dynamic_method (dynamic_method=0x565222f9d090) at sre.c:3971
#13 0x000056521f9e5254 in reference_queue_proccess (queue=queue@entry=0x565220c367e0) at gc.c:1131
#14 0x000056521f9e5758 in reference_queue_proccess_all () at gc.c:1145
#15 0x000056521f9e5ce9 in mono_runtime_do_background_work () at gc.c:874
#16 finalizer_thread (unused=unused@entry=0x0) at gc.c:911
#17 0x000056521f99ce92 in start_wrapper_internal (stack_ptr=<optimized out>, start_info=0x0) at threads.c:1132
#18 start_wrapper (data=0x565220457f50) at threads.c:1192
#19 0x00007f88e65c2494 in start_thread () from /lib/x86_64-linux-gnu/libpthread.so.0
#20 0x00007f88e60edacf in clone () from /lib/x86_64-linux-gnu/libc.so.6

Thread 2 (Thread 0x7f88e5bff700 (LWP 59127)):
#0  0x00007f88e65cbb3a in waitpid () from /lib/x86_64-linux-gnu/libpthread.so.0
#1  0x000056521f82c421 in mono_handle_native_crash (signal=0x56521fa8d266 "SIGABRT", ctx=<optimized out>, info=<optimized out>) at mini-exceptions.c:3179
#2  <signal handler called>
#3  0x00007f88e65c815f in pthread_cond_wait@@GLIBC_2.3.2 () from /lib/x86_64-linux-gnu/libpthread.so.0
#4  0x000056521fa49d5b in mono_os_cond_wait (mutex=0x56521fdc0680 <lock>, cond=0x56521fdc0640 <work_cond>) at ../../mono/utils/mono-os-mutex.h:173
#5  get_work (job=<synthetic pointer>, do_idle=<synthetic pointer>, work_context=<synthetic pointer>, worker_index=0) at sgen-thread-pool.c:165
#6  thread_func (data=<optimized out>) at sgen-thread-pool.c:196
#7  0x00007f88e65c2494 in start_thread () from /lib/x86_64-linux-gnu/libpthread.so.0
#8  0x00007f88e60edacf in clone () from /lib/x86_64-linux-gnu/libc.so.6

Thread 1 (Thread 0x7f88e70fd740 (LWP 59080)):
#0  0x00007f88e60383d6 in sigsuspend () from /lib/x86_64-linux-gnu/libc.so.6
#1  0x000056521fa615f3 in suspend_signal_handler (_dummy=<optimized out>, info=<optimized out>, context=0x7ffec5ebc100) at mono-threads-posix-signals.c:199
#2  <signal handler called>
#3  0x00007f88e65c815d in pthread_cond_wait@@GLIBC_2.3.2 () from /lib/x86_64-linux-gnu/libpthread.so.0
#4  0x000056521fa4cbff in mono_os_cond_wait (mutex=0x56521fdc0840 <signal_mutex>, cond=0x7ffec5ebc720) at mono-os-mutex.h:173
#5  mono_os_event_wait_multiple (events=events@entry=0x7ffec5ebc798, nevents=nevents@entry=1, waitall=waitall@entry=1, timeout=timeout@entry=4294967295, alertable=alertable@entry=1) at os-event-unix.c:190
#6  0x000056521fa4ce82 in mono_os_event_wait_one (event=<optimized out>, event@entry=0x7f88c4000f60, timeout=timeout@entry=4294967295, alertable=alertable@entry=1) at os-event-unix.c:94
#7  0x000056521fa6002d in mono_thread_info_wait_one_handle (thread_handle=thread_handle@entry=0x7f88c4000f50, timeout=timeout@entry=4294967295, alertable=alertable@entry=1) at mono-threads.c:1756
#8  0x000056521f8ffa98 in guarded_wait (timeout=4294967295, alertable=1, thread_handle=0x7f88c4000f50) at appdomain.c:2840
#9  mono_domain_try_unload (domain=domain@entry=0x56522092d800, exc=exc@entry=0x7ffec5ebc898) at appdomain.c:2943
#10 0x000056521f8ffc16 in ves_icall_System_AppDomain_InternalUnload (domain_id=<optimized out>, error=0x7ffec5ebc900) at appdomain.c:2468
#11 0x000000004155f1b4 in ?? ()
#12 0x00007f88e5d469a0 in ?? ()
#13 0x00007f88e5cf2520 in ?? ()
#14 0x00007f88e5cf0618 in ?? ()
#15 0x00007f88e5eb5140 in ?? ()
#16 0x00007f88e14458c8 in ?? ()
#17 0x0000565220409f10 in ?? ()
#18 0x00007ffec5ebc9f0 in ?? ()
#19 0x00007ffec5ebc8c0 in ?? ()
#20 0x0000000000000000 in ?? ()

Thread 3 is in mono_marshal_free_dynamic_wrappers where it's holding the marshal_mutex and Thread 5 is trying to acquire that same mutex. Thread 3 is self suspended and we're waiting for Thread 5 to self-suspend but it can't since it's blocked on the same mutex.

Pretty sure marshal_mutex should be a coop mutex.

Part of #6921

Metadata

Metadata

Assignees

No one assigned

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions