Skip to content

Commit 3e86e39

Browse files
committed
core: add core_mutex_debug to aid debugging deadlocks
Adding `USEMODULE += core_mutex_debug` to your `Makefile` results in on log messages such as [mutex] waiting for thread 1 (pc = 0x800024d) being added whenever `mutex_lock()` blocks. This makes tracing down deadlocks easier.
1 parent 8b58e55 commit 3e86e39

File tree

3 files changed

+79
-8
lines changed

3 files changed

+79
-8
lines changed

core/Kconfig

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,9 @@ config MODULE_CORE_MSG_BUS
3939
help
4040
Messaging Bus API for inter process message broadcast.
4141

42+
config MODULE_CORE_MUTEX_DEBUG
43+
bool "Aid debugging deadlocks by printing on whom mutex_lock() is waiting"
44+
4245
config MODULE_CORE_MUTEX_PRIORITY_INHERITANCE
4346
bool "Use priority inheritance to mitigate priority inversion for mutexes"
4447

core/include/mutex.h

Lines changed: 34 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,25 @@
9797
* `MUTEX_LOCK`.
9898
* - The scheduler is run, so that if the unblocked waiting thread can
9999
* run now, in case it has a higher priority than the running thread.
100+
*
101+
* Debugging deadlocks
102+
* -------------------
103+
*
104+
* The module `core_mutex_debug` can be used to print on whom `mutex_lock()`
105+
* is waiting. This information includes the thread ID of the owner and the
106+
* program counter (PC) from where `mutex_lock()` was called. Note that the
107+
* information is only valid if:
108+
*
109+
* - The mutex was locked by a thread, and not e.g. by `MUTEX_INIT_LOCKED`
110+
* - The function `cpu_get_caller_pc()` is implemented for the target
111+
* architecture. (The thread ID will remain valid, though.)
112+
* - The caller PC is briefly 0 when the current owner passes over ownership
113+
* to the next thread, but that thread didn't get CPU time yet to write its
114+
* PC into the data structure. Even worse, on architectures where an aligned
115+
* function-pointer-sized write is not atomic, the value may briefly be
116+
* bogus. Chances are close to zero this ever hits and since this only
117+
* effects debug output, the ostrich algorithm was chosen here.
118+
*
100119
* @{
101120
*
102121
* @file
@@ -112,6 +131,7 @@
112131
#include <stdint.h>
113132
#include <stdbool.h>
114133

134+
#include "architecture.h"
115135
#include "kernel_defines.h"
116136
#include "list.h"
117137
#include "thread.h"
@@ -130,7 +150,8 @@ typedef struct {
130150
* @internal
131151
*/
132152
list_node_t queue;
133-
#if defined(DOXYGEN) || defined(MODULE_CORE_MUTEX_PRIORITY_INHERITANCE)
153+
#if defined(DOXYGEN) || defined(MODULE_CORE_MUTEX_PRIORITY_INHERITANCE) \
154+
|| defined(MODULE_CORE_MUTEX_DEBUG)
134155
/**
135156
* @brief The current owner of the mutex or `NULL`
136157
* @note Only available if module core_mutex_priority_inheritance
@@ -141,6 +162,18 @@ typedef struct {
141162
* this will have the value of `NULL`.
142163
*/
143164
kernel_pid_t owner;
165+
#endif
166+
#if defined(DOXYGEN) || defined(MODULE_CORE_MUTEX_DEBUG)
167+
/**
168+
* @brief Program counter of the call to @ref mutex_lock that most
169+
* recently acquired this mutex
170+
*
171+
* This is used when the module `core_mutex_debug` is used to debug
172+
* deadlocks and is non-existing otherwise
173+
*/
174+
uinttxtptr_t owner_calling_pc;
175+
#endif
176+
#if defined(DOXYGEN) || defined(MODULE_CORE_MUTEX_PRIORITY_INHERITANCE)
144177
/**
145178
* @brief Original priority of the owner
146179
* @note Only available if module core_mutex_priority_inheritance

core/mutex.c

Lines changed: 42 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -47,9 +47,18 @@
4747
* function into both @ref mutex_lock and @ref mutex_lock_cancelable is,
4848
* therefore, beneficial for the majority of applications.
4949
*/
50-
static inline __attribute__((always_inline)) void _block(mutex_t *mutex,
51-
unsigned irq_state)
50+
static inline __attribute__((always_inline))
51+
void _block(mutex_t *mutex,
52+
unsigned irq_state,
53+
uinttxtptr_t pc)
5254
{
55+
/* pc is only used when MODULE_CORE_MUTEX_DEBUG */
56+
(void)pc;
57+
#if IS_USED(MODULE_CORE_MUTEX_DEBUG)
58+
printf("[mutex] waiting for thread %" PRIkernel_pid " (pc = 0x%" PRIxTXTPTR
59+
")\n",
60+
mutex->owner, mutex->owner_calling_pc);
61+
#endif
5362
thread_t *me = thread_get_active();
5463

5564
/* Fail visibly even if a blocking action is called from somewhere where
@@ -80,10 +89,17 @@ static inline __attribute__((always_inline)) void _block(mutex_t *mutex,
8089
irq_restore(irq_state);
8190
thread_yield_higher();
8291
/* We were woken up by scheduler. Waker removed us from queue. */
92+
#if IS_USED(MODULE_CORE_MUTEX_DEBUG)
93+
mutex->owner_calling_pc = pc;
94+
#endif
8395
}
8496

8597
bool mutex_lock_internal(mutex_t *mutex, bool block)
8698
{
99+
uinttxtptr_t pc = 0;
100+
#if IS_USED(MODULE_CORE_MUTEX_DEBUG)
101+
pc = cpu_get_caller_pc();
102+
#endif
87103
unsigned irq_state = irq_disable();
88104

89105
DEBUG("PID[%" PRIkernel_pid "] mutex_lock_internal(block=%u).\n",
@@ -92,9 +108,15 @@ bool mutex_lock_internal(mutex_t *mutex, bool block)
92108
if (mutex->queue.next == NULL) {
93109
/* mutex is unlocked. */
94110
mutex->queue.next = MUTEX_LOCKED;
95-
#ifdef MODULE_CORE_MUTEX_PRIORITY_INHERITANCE
111+
#if IS_USED(MODULE_CORE_MUTEX_PRIORITY_INHERITANCE) \
112+
|| IS_USED(MODULE_CORE_MUTEX_DEBUG)
96113
thread_t *me = thread_get_active();
97114
mutex->owner = me->pid;
115+
#endif
116+
#if IS_USED(MODULE_CORE_MUTEX_DEBUG)
117+
mutex->owner_calling_pc = pc;
118+
#endif
119+
#if IS_USED(MODULE_CORE_MUTEX_PRIORITY_INHERITANCE)
98120
mutex->owner_original_priority = me->priority;
99121
#endif
100122
DEBUG("PID[%" PRIkernel_pid "] mutex_lock(): early out.\n",
@@ -106,14 +128,18 @@ bool mutex_lock_internal(mutex_t *mutex, bool block)
106128
irq_restore(irq_state);
107129
return false;
108130
}
109-
_block(mutex, irq_state);
131+
_block(mutex, irq_state, pc);
110132
}
111133

112134
return true;
113135
}
114136

115137
int mutex_lock_cancelable(mutex_cancel_t *mc)
116138
{
139+
uinttxtptr_t pc = 0;
140+
#if IS_USED(MODULE_CORE_MUTEX_DEBUG)
141+
pc = cpu_get_caller_pc();
142+
#endif
117143
unsigned irq_state = irq_disable();
118144

119145
DEBUG("PID[%" PRIkernel_pid "] mutex_lock_cancelable()\n",
@@ -131,9 +157,15 @@ int mutex_lock_cancelable(mutex_cancel_t *mc)
131157
if (mutex->queue.next == NULL) {
132158
/* mutex is unlocked. */
133159
mutex->queue.next = MUTEX_LOCKED;
134-
#ifdef MODULE_CORE_MUTEX_PRIORITY_INHERITANCE
160+
#if IS_USED(MODULE_CORE_MUTEX_PRIORITY_INHERITANCE) \
161+
|| IS_USED(MODULE_CORE_MUTEX_DEBUG)
135162
thread_t *me = thread_get_active();
136163
mutex->owner = me->pid;
164+
#endif
165+
#if IS_USED(MODULE_CORE_MUTEX_DEBUG)
166+
mutex->owner_calling_pc = pc;
167+
#endif
168+
#if IS_USED(MODULE_CORE_MUTEX_PRIORITY_INHERITANCE)
137169
mutex->owner_original_priority = me->priority;
138170
#endif
139171
DEBUG("PID[%" PRIkernel_pid "] mutex_lock_cancelable() early out.\n",
@@ -142,7 +174,7 @@ int mutex_lock_cancelable(mutex_cancel_t *mc)
142174
return 0;
143175
}
144176
else {
145-
_block(mutex, irq_state);
177+
_block(mutex, irq_state, pc);
146178
if (mc->cancelled) {
147179
DEBUG("PID[%" PRIkernel_pid "] mutex_lock_cancelable() "
148180
"cancelled.\n", thread_getpid());
@@ -185,7 +217,7 @@ void mutex_unlock(mutex_t *mutex)
185217

186218
uint16_t process_priority = process->priority;
187219

188-
#ifdef MODULE_CORE_MUTEX_PRIORITY_INHERITANCE
220+
#if IS_USED(MODULE_CORE_MUTEX_PRIORITY_INHERITANCE)
189221
thread_t *owner = thread_get(mutex->owner);
190222
if ((owner) && (owner->priority != mutex->owner_original_priority)) {
191223
DEBUG("PID[%" PRIkernel_pid "] prio %u --> %u\n",
@@ -194,6 +226,9 @@ void mutex_unlock(mutex_t *mutex)
194226
sched_change_priority(owner, mutex->owner_original_priority);
195227
}
196228
#endif
229+
#if IS_USED(MODULE_CORE_MUTEX_DEBUG)
230+
mutex->owner_calling_pc = 0;
231+
#endif
197232

198233
irq_restore(irqstate);
199234
sched_switch(process_priority);

0 commit comments

Comments
 (0)