diff --git a/include/bitstring.h b/include/bitstring.h
index 68d3a2743f..c0b1bacafd 100644
--- a/include/bitstring.h
+++ b/include/bitstring.h
@@ -51,6 +51,9 @@
  * works."
  *  /s/ Perry E. Metzger, 2 Feb 98
  */
+#include <stddef.h>
+#include <stdint.h>
+
 typedef unsigned char bitstr_t;
 
 /* internal macros */
diff --git a/include/sys/context.h b/include/sys/context.h
index e6b70b49ad..8bbe383c91 100644
--- a/include/sys/context.h
+++ b/include/sys/context.h
@@ -5,6 +5,7 @@
 #include <sys/types.h>
 #include <stdbool.h>
 
+typedef struct mtx mtx_t;
 typedef struct thread thread_t;
 typedef struct __ucontext ucontext_t;
 typedef struct ctx ctx_t;
@@ -47,7 +48,7 @@ void mcontext_restart_syscall(mcontext_t *ctx);
  *
  * \note must be called with interrupts disabled!
  */
-void ctx_switch(thread_t *from, thread_t *to);
+void ctx_switch(thread_t *from, thread_t *to, mtx_t *mtx);
 
 /* Implementation of setcontext syscall. */
 int do_setcontext(thread_t *td, ucontext_t *uc);
diff --git a/include/sys/mutex.h b/include/sys/mutex.h
index 885b33041c..d791a47718 100644
--- a/include/sys/mutex.h
+++ b/include/sys/mutex.h
@@ -60,6 +60,8 @@ typedef struct mtx {
 #define MTX_DEFINE(mutexname, type)                                            \
   mtx_t mutexname = MTX_INITIALIZER(mutexname, type)
 
+void init_mtx(void);
+
 /*! \brief Initializes mutex.
  *
  * \note Every mutex has to be initialized before it is used. */
@@ -120,4 +122,6 @@ DEFINE_CLEANUP_FUNCTION(mtx_t *, mtx_unlock);
 #define WITH_MTX_LOCK(mtx_p)                                                   \
   WITH_STMT(mtx_t, mtx_lock, CLEANUP_FUNCTION(mtx_unlock), mtx_p)
 
+extern mtx_t blocked_lock;
+
 #endif /* !_SYS_MUTEX_H_ */
diff --git a/include/sys/pcpu.h b/include/sys/pcpu.h
index 8b4e660ba3..313b79e85e 100644
--- a/include/sys/pcpu.h
+++ b/include/sys/pcpu.h
@@ -1,9 +1,9 @@
 #ifndef _SYS_PCPU_H_
 #define _SYS_PCPU_H_
 
+#include <stdbool.h>
 #include <machine/types.h>
 #include <machine/pcpu.h>
-#include <stdbool.h>
 
 typedef struct thread thread_t;
 typedef struct pmap pmap_t;
diff --git a/include/sys/runq.h b/include/sys/runq.h
index 4f835c2088..39b4d615e7 100644
--- a/include/sys/runq.h
+++ b/include/sys/runq.h
@@ -3,19 +3,19 @@
 
 #ifdef _KERNEL
 
+#include <bitstring.h>
 #include <sys/cdefs.h>
 #include <sys/queue.h>
 
 typedef struct thread thread_t;
+typedef TAILQ_HEAD(, thread) thread_list_t;
 
-/* TODO How to prevent tests from using following values? */
 #define RQ_NQS 64 /* Number of run queues. */
 #define RQ_PPQ 4  /* Priorities per queue. */
 
-TAILQ_HEAD(rq_head, thread);
-
-typedef struct {
-  struct rq_head rq_queues[RQ_NQS];
+typedef struct runq {
+  bitstr_t bit_decl(rq_status, RQ_NQS);
+  thread_list_t rq_queues[RQ_NQS];
 } runq_t;
 
 /* Initialize a run structure. */
diff --git a/include/sys/sched.h b/include/sys/sched.h
index fc33aa4215..d08f68e14a 100644
--- a/include/sys/sched.h
+++ b/include/sys/sched.h
@@ -43,6 +43,8 @@ static inline void __preempt_enable(void *data) {
 #define WITH_NO_PREEMPTION                                                     \
   WITH_STMT(void, __preempt_disable, __preempt_enable, NULL)
 
+void sched_init(thread_t *td);
+
 /*! \brief Add new thread to the scheduler.
  *
  * The thread will be set runnable. */
diff --git a/include/sys/sleepq.h b/include/sys/sleepq.h
index c23b05c99e..806a17a4a8 100644
--- a/include/sys/sleepq.h
+++ b/include/sys/sleepq.h
@@ -13,6 +13,10 @@ typedef struct sleepq sleepq_t;
 /*! \brief Called during kernel initialization. */
 void init_sleepq(void);
 
+void sleepq_lock(void *wchan);
+
+void sleepq_unlock(void *wchan);
+
 /*! \brief Allocates sleep queue entry. */
 sleepq_t *sleepq_alloc(void);
 
@@ -54,7 +58,7 @@ bool sleepq_broadcast(void *wchan);
 /*! \brief Break thread's sleep.
  *
  * \returns true on success
- * \returns false if the thread has not been asleep
+ * \returns false if the thread has not been aborted
  */
 bool sleepq_abort(thread_t *td);
 
diff --git a/include/sys/thread.h b/include/sys/thread.h
index 387644eb5b..74af09997d 100644
--- a/include/sys/thread.h
+++ b/include/sys/thread.h
@@ -214,6 +214,14 @@ void thread_reap(void);
  * Must be called with acquired td_lock. */
 void thread_continue(thread_t *td);
 
+static inline bool thread_lock_eq(thread_t *td, mtx_t *mtx) {
+  return td->td_lock == mtx;
+}
+
+void thread_lock_set(thread_t *td, mtx_t *mtx);
+
+mtx_t *thread_lock_block(thread_t *td);
+
 /* Please use following functions to read state of a thread! */
 static inline bool td_is_ready(thread_t *td) {
   return td->td_state == TDS_READY;
diff --git a/include/sys/turnstile.h b/include/sys/turnstile.h
index 86a5a6ca68..edd2b7cf9e 100644
--- a/include/sys/turnstile.h
+++ b/include/sys/turnstile.h
@@ -4,11 +4,16 @@
 #include <sys/cdefs.h>
 #include <sys/priority.h>
 
+typedef struct mtx mtx_t;
 typedef struct turnstile turnstile_t;
 
 /*! \brief Called during kernel initialization. */
 void init_turnstile(void);
 
+void turnstile_lock(mtx_t *mtx);
+
+void turnstile_unlock(mtx_t *mtx);
+
 /*! \brief Allocates turnstile entry. */
 turnstile_t *turnstile_alloc(void);
 
@@ -28,18 +33,12 @@ void turnstile_destroy(turnstile_t *ts);
  * \note Requires td_spin acquired. */
 void turnstile_adjust(thread_t *td, prio_t oldprio);
 
-/* Provide turnstile that we're going to block on. */
-turnstile_t *turnstile_take(void *wchan);
-
-/* Release turnstile in case we decided not to block on it. */
-void turnstile_give(turnstile_t *ts);
-
 /* Block the current thread on given turnstile. This function will perform
  * context switch and release turnstile when woken up. */
-void turnstile_wait(turnstile_t *ts, thread_t *owner, const void *waitpt);
+void turnstile_wait(mtx_t *mtx, const void *waitpt);
 
-/* Wakeup all threads waiting on given channel and adjust the priority of the
+/* Wakeup all threads waiting on given mutex and adjust the priority of the
  * current thread appropriately. */
-void turnstile_broadcast(void *wchan);
+void turnstile_broadcast(mtx_t *mtx);
 
 #endif /* !_SYS_TURNSTILE_H_ */
diff --git a/sys/drv/rtc.c b/sys/drv/rtc.c
index 9da861c5c2..bffcb20a41 100644
--- a/sys/drv/rtc.c
+++ b/sys/drv/rtc.c
@@ -69,8 +69,11 @@ static intr_filter_t rtc_intr(void *data) {
   rtc_state_t *rtc = data;
   uint8_t regc = rtc_read(rtc->regs, MC_REGC);
   if (regc & MC_REGC_PF) {
-    if (rtc->counter++ & 1)
+    if (rtc->counter++ & 1) {
+      sleepq_lock(rtc);
       sleepq_signal(rtc);
+      sleepq_unlock(rtc);
+    }
     return IF_FILTERED;
   }
   return IF_STRAY;
@@ -80,6 +83,7 @@ static int rtc_time_read(devnode_t *dev, uio_t *uio) {
   rtc_state_t *rtc = dev->data;
   tm_t t;
 
+  sleepq_lock(rtc);
   sleepq_wait(rtc, NULL);
   rtc_gettime(rtc->regs, &t);
   int count = snprintf(rtc->asctime, RTC_ASCTIME_SIZE, "%d %d %d %d %d %d",
diff --git a/sys/kern/callout.c b/sys/kern/callout.c
index 62d9259908..4e803b0ed2 100644
--- a/sys/kern/callout.c
+++ b/sys/kern/callout.c
@@ -7,7 +7,6 @@
 #include <sys/sleepq.h>
 #include <sys/thread.h>
 #include <sys/sched.h>
-#include <sys/interrupt.h>
 #include <sys/time.h>
 
 /* Note: If the difference in time between ticks is greater than the number of
@@ -53,14 +52,16 @@ static void callout_thread(void *arg) {
   while (true) {
     callout_t *elem;
 
-    WITH_INTR_DISABLED {
-      while (TAILQ_EMPTY(&delegated)) {
-        sleepq_wait(&delegated, NULL);
-      }
+    sleepq_lock(&delegated);
 
-      elem = TAILQ_FIRST(&delegated);
-      TAILQ_REMOVE(&delegated, elem, c_link);
+    while (TAILQ_EMPTY(&delegated)) {
+      sleepq_wait(&delegated, NULL);
+      sleepq_lock(&delegated);
     }
+    elem = TAILQ_FIRST(&delegated);
+    TAILQ_REMOVE(&delegated, elem, c_link);
+
+    sleepq_unlock(&delegated);
 
     assert(callout_is_active(elem));
     assert(!callout_is_pending(elem));
@@ -163,6 +164,8 @@ bool callout_stop(callout_t *handle) {
  * current position and delegate them to callout thread.
  */
 void callout_process(systime_t time) {
+  SCOPED_MTX_LOCK(&ci.lock);
+
   unsigned int last_bucket;
   unsigned int current_bucket = ci.last % CALLOUT_BUCKETS;
 
@@ -174,8 +177,7 @@ void callout_process(systime_t time) {
     last_bucket = time % CALLOUT_BUCKETS;
   }
 
-  /* We are in kernel's bottom half. */
-  assert(intr_disabled());
+  sleepq_lock(&delegated);
 
   while (true) {
     callout_list_t *head = ci_list(current_bucket);
@@ -204,12 +206,18 @@ void callout_process(systime_t time) {
   if (!TAILQ_EMPTY(&delegated)) {
     sleepq_signal(&delegated);
   }
+
+  sleepq_unlock(&delegated);
 }
 
 bool callout_drain(callout_t *handle) {
-  SCOPED_INTR_DISABLED();
-  if (!callout_is_pending(handle) && !callout_is_active(handle))
+  sleepq_lock(handle);
+
+  if (!callout_is_pending(handle) && !callout_is_active(handle)) {
+    sleepq_unlock(handle);
     return false;
+  }
+
   while (callout_is_pending(handle) || callout_is_active(handle))
     sleepq_wait(handle, NULL);
   return true;
diff --git a/sys/kern/condvar.c b/sys/kern/condvar.c
index a991b62f29..5d7ea91f32 100644
--- a/sys/kern/condvar.c
+++ b/sys/kern/condvar.c
@@ -1,7 +1,6 @@
 #include <sys/condvar.h>
+#include <sys/mutex.h>
 #include <sys/sleepq.h>
-#include <sys/sched.h>
-#include <sys/interrupt.h>
 
 void cv_init(condvar_t *cv, const char *name) {
   cv->name = name;
@@ -9,40 +8,38 @@ void cv_init(condvar_t *cv, const char *name) {
 }
 
 void cv_wait(condvar_t *cv, mtx_t *m) {
-  WITH_INTR_DISABLED {
-    cv->waiters++;
-    mtx_unlock(m);
-    /* If we got interrupted here and an interrupt filter called
-     * cv_signal, we would have a lost wakeup, so we need interrupts
-     * to be disabled. Same goes for cv_wait_timed. */
-    sleepq_wait(cv, __caller(0));
-  }
-  _mtx_lock(m, __caller(0));
+  sleepq_lock(cv);
+  cv->waiters++;
+  mtx_unlock(m);
+  sleepq_wait(cv, __caller(0));
+  mtx_lock(m);
 }
 
 int cv_wait_timed(condvar_t *cv, mtx_t *m, systime_t timeout) {
-  int status;
-  WITH_INTR_DISABLED {
-    cv->waiters++;
-    mtx_unlock(m);
-    status = sleepq_wait_timed(cv, __caller(0), timeout);
-  }
-  _mtx_lock(m, __caller(0));
+  sleepq_lock(cv);
+  cv->waiters++;
+  mtx_unlock(m);
+  int status = sleepq_wait_timed(cv, __caller(0), timeout);
+  mtx_lock(m);
   return status;
 }
 
 void cv_signal(condvar_t *cv) {
-  SCOPED_NO_PREEMPTION();
-  if (cv->waiters > 0) {
-    cv->waiters--;
-    sleepq_signal(cv);
-  }
+  sleepq_lock(cv);
+  if (!cv->waiters)
+    goto end;
+  cv->waiters--;
+  sleepq_signal(cv);
+end:
+  sleepq_unlock(cv);
 }
 
 void cv_broadcast(condvar_t *cv) {
-  SCOPED_NO_PREEMPTION();
-  if (cv->waiters > 0) {
-    cv->waiters = 0;
-    sleepq_broadcast(cv);
-  }
+  sleepq_lock(cv);
+  if (!cv->waiters)
+    goto end;
+  cv->waiters--;
+  sleepq_broadcast(cv);
+end:
+  sleepq_unlock(cv);
 }
diff --git a/sys/kern/interrupt.c b/sys/kern/interrupt.c
index d237c38465..f18eeea204 100644
--- a/sys/kern/interrupt.c
+++ b/sys/kern/interrupt.c
@@ -209,7 +209,9 @@ void intr_event_run_handlers(intr_event_t *ie) {
 
   if (ie_status & IF_DELEGATE) {
     ie_disable(ie);
+    sleepq_lock(ie);
     sleepq_signal(ie);
+    sleepq_unlock(ie);
   }
 
   if (ie_status == IF_STRAY)
@@ -269,13 +271,10 @@ static void intr_thread(void *arg) {
     }
 
     /* If there are still handlers assigned to the interrupt event, enable
-     * interrupts and wait for a wakeup. We do it with interrupts disabled
-     * to prevent the wakeup from being lost. */
-    WITH_INTR_DISABLED {
-      if (!TAILQ_EMPTY(&ie->ie_handlers))
-        ie_enable(ie);
-
-      sleepq_wait(ie, NULL);
-    }
+     * interrupts and wait for a wakeup. */
+    sleepq_lock(ie);
+    if (!TAILQ_EMPTY(&ie->ie_handlers))
+      ie_enable(ie);
+    sleepq_wait(ie, NULL);
   }
 }
diff --git a/sys/kern/main.c b/sys/kern/main.c
index 4fd54ed375..9b1926a2af 100644
--- a/sys/kern/main.c
+++ b/sys/kern/main.c
@@ -92,6 +92,7 @@ __noreturn void kernel_init(void) {
   /* Make dispatcher & scheduler structures ready for use. */
   init_sleepq();
   init_turnstile();
+  init_mtx();
   lockdep_init();
   init_thread0();
   init_sched();
diff --git a/sys/kern/mutex.c b/sys/kern/mutex.c
index 2cff000be5..b0263e7c94 100644
--- a/sys/kern/mutex.c
+++ b/sys/kern/mutex.c
@@ -5,10 +5,17 @@
 #include <sys/sched.h>
 #include <sys/thread.h>
 
+MTX_DEFINE(blocked_lock, MTX_SPIN);
+
 bool mtx_owned(mtx_t *m) {
   return (mtx_owner(m) == thread_self());
 }
 
+void init_mtx(void) {
+  mtx_init(&blocked_lock, MTX_SPIN);
+  blocked_lock.m_owner = 0xdeadc0de;
+}
+
 void _mtx_init(mtx_t *m, intptr_t flags, const char *name,
                lock_class_key_t *key) {
   assert((flags & ~(MTX_SPIN | MTX_NODEBUG)) == 0);
@@ -23,15 +30,13 @@ void _mtx_init(mtx_t *m, intptr_t flags, const char *name,
 void _mtx_lock(mtx_t *m, const void *waitpt) {
   intptr_t flags = m->m_owner & (MTX_SPIN | MTX_NODEBUG);
 
-  if (flags & MTX_SPIN) {
+  if (flags & MTX_SPIN)
     intr_disable();
-  } else {
-    if (__unlikely(intr_disabled()))
-      panic("Cannot acquire sleep mutex in interrupt context!");
-  }
+  else if (__unlikely(intr_disabled()))
+    panic("Cannot acquire sleep mutex in interrupt context!");
 
   if (__unlikely(mtx_owned(m)))
-    panic("Attempt was made to re-acquire non-recursive mutex!");
+    panic("Attempt was made to re-acquire a mutex!");
 
 #if LOCKDEP
   if (!(flags & MTX_NODEBUG))
@@ -41,33 +46,36 @@ void _mtx_lock(mtx_t *m, const void *waitpt) {
   thread_t *td = thread_self();
 
   for (;;) {
-    intptr_t expected = flags;
-    intptr_t value = (intptr_t)td | flags;
+    intptr_t cur = flags;
+    intptr_t new = (intptr_t)td | flags;
 
     /* Fast path: if lock has no owner then take ownership. */
-    if (atomic_compare_exchange_strong(&m->m_owner, &expected, value))
+    if (atomic_compare_exchange_strong(&m->m_owner, &cur, new))
       break;
 
     if (flags & MTX_SPIN)
       continue;
 
-    WITH_NO_PREEMPTION {
-      /* TODO(cahir) turnstile_take / turnstile_give doesn't make much sense
-       * until tc_lock is thrown into the equation. */
-      turnstile_t *ts = turnstile_take(m);
-
-      /* Between atomic cas and turnstile_take there's a small window when
-       * preemption can take place. This can result in mutex being released. */
-      if (m->m_owner) {
-        /* we're the first thread to block, so lock is now being contested */
-        if (ts == td->td_turnstile)
-          m->m_owner |= MTX_CONTESTED;
-
-        turnstile_wait(ts, mtx_owner(m), waitpt);
-      } else {
-        turnstile_give(ts);
-      }
+    turnstile_lock(m);
+    cur = m->m_owner;
+
+  retry_turnstile:
+
+    if (cur == flags) {
+      turnstile_unlock(m);
+      continue;
     }
+
+    if (cur & MTX_CONTESTED)
+      goto block;
+
+    new = cur | MTX_CONTESTED;
+    if (!atomic_compare_exchange_strong(&m->m_owner, &cur, new))
+      goto retry_turnstile;
+
+  block:
+    assert(mtx_owned(m));
+    turnstile_wait(m, waitpt);
   }
 }
 
@@ -82,12 +90,14 @@ void mtx_unlock(mtx_t *m) {
 #endif
 
   /* Fast path: if lock is not contested then drop ownership. */
-  intptr_t expected = (intptr_t)thread_self() | flags;
-  intptr_t value = flags;
+  intptr_t cur = (intptr_t)thread_self() | flags;
+  intptr_t new = flags;
 
-  if (atomic_compare_exchange_strong(&m->m_owner, &expected, value))
+  if (atomic_compare_exchange_strong(&m->m_owner, &cur, new))
     goto done;
 
+  assert(!(flags & MTX_SPIN));
+
   /* Using broadcast instead of signal is faster according to
    * "The Design and Implementation of the FreeBSD Operating System",
    * 2nd edition, 4.3 Context Switching, page 138.
@@ -95,11 +105,11 @@ void mtx_unlock(mtx_t *m) {
    * The reasoning is that the awakened threads will often be scheduled
    * sequentially and only act on empty mutex on which operations are
    * cheaper. */
-  WITH_NO_PREEMPTION {
-    uintptr_t owner = atomic_exchange(&m->m_owner, 0);
-    if (owner & MTX_CONTESTED)
-      turnstile_broadcast(m);
-  }
+  turnstile_lock(m);
+  uintptr_t owner = atomic_exchange(&m->m_owner, flags & MTX_NODEBUG);
+  if (owner & MTX_CONTESTED)
+    turnstile_broadcast(m);
+  turnstile_unlock(m);
 
 done:
   if (flags & MTX_SPIN)
diff --git a/sys/kern/runq.c b/sys/kern/runq.c
index 10bdf27e6f..df35ffd90f 100644
--- a/sys/kern/runq.c
+++ b/sys/kern/runq.c
@@ -13,21 +13,22 @@ void runq_init(runq_t *rq) {
 void runq_add(runq_t *rq, thread_t *td) {
   unsigned prio = td->td_prio / RQ_PPQ;
   TAILQ_INSERT_TAIL(&rq->rq_queues[prio], td, td_runq);
+  bit_set(rq->rq_status, prio);
 }
 
 thread_t *runq_choose(runq_t *rq) {
-  for (int i = 0; i < RQ_NQS; i++) {
-    struct rq_head *head = &rq->rq_queues[i];
-    thread_t *td = TAILQ_FIRST(head);
-
-    if (td)
-      return td;
-  }
-
-  return NULL;
+  int prio;
+  bit_ffs(rq->rq_status, RQ_NQS, &prio);
+  if (prio == -1)
+    return NULL;
+  thread_list_t *head = &rq->rq_queues[prio];
+  return TAILQ_FIRST(head);
 }
 
 void runq_remove(runq_t *rq, thread_t *td) {
   unsigned prio = td->td_prio / RQ_PPQ;
-  TAILQ_REMOVE(&rq->rq_queues[prio], td, td_runq);
+  thread_list_t *head = &rq->rq_queues[prio];
+  TAILQ_REMOVE(head, td, td_runq);
+  if (TAILQ_EMPTY(head))
+    bit_clear(rq->rq_status, prio);
 }
diff --git a/sys/kern/sched.c b/sys/kern/sched.c
index f76b20940c..3beedb7b16 100644
--- a/sys/kern/sched.c
+++ b/sys/kern/sched.c
@@ -7,7 +7,6 @@
 #include <sys/interrupt.h>
 #include <sys/time.h>
 #include <sys/thread.h>
-#include <sys/mutex.h>
 #include <sys/pcpu.h>
 #include <sys/turnstile.h>
 
@@ -22,34 +21,51 @@ void init_sched(void) {
   runq_init(&runq);
 }
 
-void sched_add(thread_t *td) {
-  klog("Add thread %ld {%p} to scheduler", td->td_tid, td);
-
-  WITH_MTX_LOCK (td->td_lock)
-    sched_wakeup(td);
+void sched_init(thread_t *td) {
+  td->td_lock = &sched_lock;
 }
 
-void sched_wakeup(thread_t *td) {
+static void _sched_add(thread_t *td) {
   assert(mtx_owned(td->td_lock));
   assert(td != thread_self());
-  assert(!td_is_running(td));
 
-  /* Update sleep time. */
-  bintime_t now = binuptime();
-  bintime_sub(&now, &td->td_last_slptime);
-  bintime_add(&td->td_slptime, &now);
+  klog("Add thread %ld {%p} to scheduler", td->td_tid, td);
 
   td->td_state = TDS_READY;
   td->td_slice = SLICE;
 
+  if (!thread_lock_eq(td, &sched_lock)) {
+    mtx_lock(&sched_lock);
+    td->td_lock = &sched_lock;
+  }
+
   runq_add(&runq, td);
 
   /* Check if we need to reschedule threads. */
   thread_t *oldtd = thread_self();
+  assert(mtx_owned(oldtd->td_lock));
   if (prio_gt(td->td_prio, oldtd->td_prio))
     oldtd->td_flags |= TDF_NEEDSWITCH;
 }
 
+void sched_add(thread_t *td) {
+  SCOPED_MTX_LOCK(td->td_lock);
+  _sched_add(td);
+}
+
+void sched_wakeup(thread_t *td) {
+  assert(mtx_owned(td->td_lock));
+  assert(td != thread_self());
+  assert(!td_is_running(td));
+
+  /* Update sleep time. */
+  bintime_t now = binuptime();
+  bintime_sub(&now, &td->td_last_slptime);
+  bintime_add(&td->td_slptime, &now);
+
+  _sched_add(td);
+}
+
 /*! \brief Set thread's active priority \a td_prio to \a prio.
  *
  * \note Must be called with \a td_lock acquired!
@@ -62,9 +78,10 @@ static void sched_set_active_prio(thread_t *td, prio_t prio) {
 
   if (td_is_ready(td)) {
     /* Thread is on a run queue. */
+    assert(thread_lock_eq(td, &sched_lock));
     runq_remove(&runq, td);
     td->td_prio = prio;
-    runq_add(&runq, td);
+    _sched_add(td);
   } else {
     td->td_prio = prio;
   }
@@ -111,6 +128,7 @@ void sched_unlend_prio(thread_t *td, prio_t prio) {
  * \note Returned thread is marked as running!
  */
 static thread_t *sched_choose(void) {
+  assert(mtx_owned(&sched_lock));
   thread_t *td = runq_choose(&runq);
   if (td == NULL)
     return PCPU_GET(idle_thread);
@@ -123,34 +141,47 @@ static thread_t *sched_choose(void) {
 void sched_switch(void) {
   thread_t *td = thread_self();
 
-  if (!sched_active)
-    goto noswitch;
+  if (!sched_active) {
+    mtx_unlock(td->td_lock);
+    return;
+  }
 
   assert(mtx_owned(td->td_lock));
   assert(!td_is_running(td));
 
   td->td_flags &= ~(TDF_SLICEEND | TDF_NEEDSWITCH);
 
-  /* Update running time, */
+  /* Update running time. */
   bintime_t now = binuptime();
   bintime_sub(&now, &td->td_last_rtime);
   bintime_add(&td->td_rtime, &now);
 
+  mtx_t *mtx = thread_lock_block(td);
+  intr_disable();
+
   if (td_is_ready(td)) {
+    assert(mtx == &sched_lock);
     /* Idle threads need not to be inserted into the run queue. */
     if (td != PCPU_GET(idle_thread))
       runq_add(&runq, td);
-  } else if (td_is_sleeping(td)) {
-    /* Record when the thread fell asleep. */
-    td->td_last_slptime = now;
-  } else if (td_is_dead(td) || td_is_stopped(td)) {
-    /* Don't add dead or stopped threads to run queue. */
+  } else {
+    if (td_is_sleeping(td)) {
+      /* Record when the thread fell asleep. */
+      td->td_last_slptime = now;
+    }
+    if (mtx != &sched_lock) {
+      mtx_unlock(mtx);
+      mtx_lock(&sched_lock);
+    }
   }
 
   thread_t *newtd = sched_choose();
+  mtx_unlock(&sched_lock);
 
-  if (td == newtd)
-    goto noswitch;
+  if (td == newtd) {
+    td->td_lock = mtx;
+    goto end;
+  }
 
   /* If we got here then a context switch is required. */
   td->td_nctxsw++;
@@ -158,15 +189,10 @@ void sched_switch(void) {
   if (PCPU_GET(no_switch))
     panic("Switching context while interrupts are disabled is forbidden!");
 
-  WITH_INTR_DISABLED {
-    mtx_unlock(td->td_lock);
-    ctx_switch(td, newtd);
-    return;
-    /* XXX Right now all local variables belong to thread we switched to! */
-  }
+  ctx_switch(td, newtd, mtx);
 
-noswitch:
-  mtx_unlock(td->td_lock);
+end:
+  intr_enable();
 }
 
 void sched_clock(void) {
diff --git a/sys/kern/signal.c b/sys/kern/signal.c
index 00b2c0aed5..09988d7b7a 100644
--- a/sys/kern/signal.c
+++ b/sys/kern/signal.c
@@ -194,8 +194,8 @@ int do_sigsuspend(proc_t *p, const sigset_t *mask) {
     do_sigprocmask(SIG_SETMASK, mask, NULL);
   }
 
-  int error;
-  error = sleepq_wait_intr(&td->td_sigmask, "sigsuspend()");
+  sleepq_lock(&td->td_sigmask);
+  int error = sleepq_wait_intr(&td->td_sigmask, "sigsuspend()");
   assert(error == EINTR);
 
   return ERESTARTNOHAND;
@@ -396,12 +396,8 @@ void sig_kill(proc_t *p, ksiginfo_t *ksi) {
     td->td_flags |= TDF_NEEDSIGCHK;
     /* If the thread is sleeping interruptibly (!), wake it up, so that it
      * continues execution and the signal gets delivered soon. */
-    if (td_is_interruptible(td)) {
-      /* XXX Maybe TDF_NEEDSIGCHK should be protected by a different lock? */
-      mtx_unlock(td->td_lock);
-      sleepq_abort(td); /* Locks & unlocks td_lock */
-      mtx_lock(td->td_lock);
-    }
+    if (td_is_interruptible(td))
+      sleepq_abort(td);
   }
 }
 
diff --git a/sys/kern/sleepq.c b/sys/kern/sleepq.c
index 8351b652ea..ddf2a778c5 100644
--- a/sys/kern/sleepq.c
+++ b/sys/kern/sleepq.c
@@ -2,13 +2,11 @@
 #include <sys/klog.h>
 #include <sys/mimiker.h>
 #include <sys/queue.h>
-#include <sys/libkern.h>
 #include <sys/sleepq.h>
 #include <sys/pool.h>
 #include <sys/sched.h>
 #include <sys/mutex.h>
 #include <sys/thread.h>
-#include <sys/interrupt.h>
 #include <sys/errno.h>
 #include <sys/callout.h>
 
@@ -27,41 +25,22 @@ typedef struct sleepq_chain {
 
 static sleepq_chain_t sleepq_chains[SC_TABLESIZE];
 
-static sleepq_chain_t *sc_acquire(void *wchan) {
-  sleepq_chain_t *sc = SC_LOOKUP(wchan);
-  mtx_lock(&sc->sc_lock);
-  return sc;
-}
-
 static bool sc_owned(sleepq_chain_t *sc) {
   return mtx_owned(&sc->sc_lock);
 }
 
-static void sc_release(sleepq_chain_t *sc) {
-  mtx_unlock(&sc->sc_lock);
-}
-
 /*! \brief stores all threads sleeping on the same resource.
  *
  * All fields below are protected by corresponding sc_lock. */
 typedef struct sleepq {
-  TAILQ_ENTRY(sleepq) sq_entry;    /*!< link on sleepq_chain */
+  TAILQ_ENTRY(sleepq) sq_entry;    /*!< link on sleepq_chain or sq_free */
   TAILQ_HEAD(, sleepq) sq_free;    /*!< unused sleep queue records */
   TAILQ_HEAD(, thread) sq_blocked; /*!< blocked threads */
   unsigned sq_nblocked;            /*!< number of blocked threads */
   void *sq_wchan;                  /*!< associated waiting channel */
 } sleepq_t;
 
-static void sq_ctor(sleepq_t *sq) {
-  TAILQ_INIT(&sq->sq_blocked);
-  TAILQ_INIT(&sq->sq_free);
-  sq->sq_nblocked = 0;
-  sq->sq_wchan = NULL;
-}
-
 void init_sleepq(void) {
-  memset(sleepq_chains, 0, sizeof(sleepq_chains));
-
   for (int i = 0; i < SC_TABLESIZE; i++) {
     sleepq_chain_t *sc = &sleepq_chains[i];
     mtx_init(&sc->sc_lock, MTX_SPIN | MTX_NODEBUG);
@@ -69,23 +48,37 @@ void init_sleepq(void) {
   }
 }
 
+void sleepq_lock(void *wchan) {
+  sleepq_chain_t *sc = SC_LOOKUP(wchan);
+  mtx_lock(&sc->sc_lock);
+}
+
+void sleepq_unlock(void *wchan) {
+  sleepq_chain_t *sc = SC_LOOKUP(wchan);
+  assert(sc_owned(sc));
+  mtx_unlock(&sc->sc_lock);
+}
+
 static POOL_DEFINE(P_SLEEPQ, "sleepq", sizeof(sleepq_t));
 
 sleepq_t *sleepq_alloc(void) {
-  sleepq_t *sq = pool_alloc(P_SLEEPQ, M_ZERO);
-  sq_ctor(sq);
+  sleepq_t *sq = pool_alloc(P_SLEEPQ, M_WAITOK | M_ZERO);
+  TAILQ_INIT(&sq->sq_blocked);
+  TAILQ_INIT(&sq->sq_free);
   return sq;
 }
 
 void sleepq_destroy(sleepq_t *sq) {
+  assert(TAILQ_EMPTY(&sq->sq_blocked));
+  assert(TAILQ_EMPTY(&sq->sq_free));
+  assert(sq->sq_nblocked == 0);
+  assert(sq->sq_wchan == NULL);
   pool_free(P_SLEEPQ, sq);
 }
 
 /*! \brief Lookup the sleep queue associated with \a wchan.
  *
  * \return NULL if no queue is found
- *
- * \warning returned sleep queue is locked!
  */
 static sleepq_t *sq_lookup(sleepq_chain_t *sc, void *wchan) {
   assert(sc_owned(sc));
@@ -113,6 +106,8 @@ static __used sleepq_t *sleepq_lookup(void *wchan) {
 
 static void sq_enter(thread_t *td, sleepq_chain_t *sc, void *wchan,
                      const void *waitpt) {
+  assert(wchan != NULL);
+  assert(waitpt);
   assert(sc_owned(sc));
   assert(mtx_owned(td->td_lock));
 
@@ -127,6 +122,7 @@ static void sq_enter(thread_t *td, sleepq_chain_t *sc, void *wchan,
   assert(TAILQ_EMPTY(&td_sq->sq_blocked));
   assert(TAILQ_EMPTY(&td_sq->sq_free));
   assert(td_sq->sq_nblocked == 0);
+  assert(td_sq->sq_wchan == NULL);
 
   sleepq_t *sq = sq_lookup(sc, wchan);
 
@@ -144,7 +140,7 @@ static void sq_enter(thread_t *td, sleepq_chain_t *sc, void *wchan,
 
   TAILQ_INSERT_TAIL(&sq->sq_blocked, td, td_sleepq);
   sq->sq_nblocked++;
-  sc_release(sc);
+  thread_lock_set(td, &sc->sc_lock);
 
   td->td_wchan = wchan;
   td->td_waitpt = waitpt;
@@ -157,9 +153,6 @@ static void sq_leave(thread_t *td, sleepq_chain_t *sc, sleepq_t *sq) {
   klog("Wakeup thread %u from %p at pc=%p", td->td_tid, td->td_wchan,
        td->td_waitpt);
 
-  assert(sc_owned(sc));
-  assert(mtx_owned(td->td_lock));
-
   assert(td->td_wchan != NULL);
   assert(td->td_sleepqueue == NULL);
   assert(sq->sq_nblocked >= 1);
@@ -193,30 +186,32 @@ static bool sq_wakeup(thread_t *td, sleepq_chain_t *sc, sleepq_t *sq,
                       int wakeup) {
   assert(sc_owned(sc));
 
-  WITH_MTX_LOCK (td->td_lock) {
-    if ((wakeup == EINTR) && (td->td_flags & TDF_SLPINTR)) {
-      td->td_flags &= ~TDF_SLPTIMED; /* Not woken up by timeout. */
-    } else if ((wakeup == ETIMEDOUT) && (td->td_flags & TDF_SLPTIMED)) {
-      td->td_flags &= ~TDF_SLPINTR; /* Not woken up by signal. */
-    } else if (wakeup == 0) {
-      td->td_flags &= ~(TDF_SLPINTR | TDF_SLPTIMED); /* Regular wakeup. */
-    } else {
-      return false;
-    }
-    sq_leave(td, sc, sq);
+  /* TODO(SMP): wait for `td_lock` to become `sc_lock`. */
+  assert(mtx_owned(td->td_lock));
+  assert(thread_lock_eq(td, &sc->sc_lock));
+
+  if ((wakeup == EINTR) && (td->td_flags & TDF_SLPINTR)) {
+    td->td_flags &= ~TDF_SLPTIMED; /* Not woken up by timeout. */
+  } else if ((wakeup == ETIMEDOUT) && (td->td_flags & TDF_SLPTIMED)) {
+    td->td_flags &= ~TDF_SLPINTR; /* Not woken up by signal. */
+  } else if (wakeup == 0) {
+    td->td_flags &= ~(TDF_SLPINTR | TDF_SLPTIMED); /* Regular wakeup. */
+  } else {
+    return false;
   }
+  sq_leave(td, sc, sq);
 
   return true;
 }
 
 bool sleepq_signal(void *wchan) {
-  sleepq_chain_t *sc = sc_acquire(wchan);
+  assert(wchan != NULL);
+  sleepq_chain_t *sc = SC_LOOKUP(wchan);
+  assert(sc_owned(sc));
   sleepq_t *sq = sq_lookup(sc, wchan);
 
-  if (sq == NULL) {
-    sc_release(sc);
+  if (sq == NULL)
     return false;
-  }
 
   thread_t *td, *best_td = TAILQ_FIRST(&sq->sq_blocked);
   TAILQ_FOREACH (td, &sq->sq_blocked, td_sleepq) {
@@ -224,50 +219,40 @@ bool sleepq_signal(void *wchan) {
     if (prio_gt(td->td_prio, best_td->td_prio))
       best_td = td;
   }
-
   sq_wakeup(best_td, sc, sq, 0);
-  sc_release(sc);
 
-  sched_maybe_preempt();
   return true;
 }
 
 bool sleepq_broadcast(void *wchan) {
-  sleepq_chain_t *sc = sc_acquire(wchan);
+  assert(wchan != NULL);
+  sleepq_chain_t *sc = SC_LOOKUP(wchan);
+  assert(sc_owned(sc));
   sleepq_t *sq = sq_lookup(sc, wchan);
 
-  if (sq == NULL) {
-    sc_release(sc);
+  if (sq == NULL)
     return false;
-  }
 
   thread_t *td;
   TAILQ_FOREACH (td, &sq->sq_blocked, td_sleepq)
     sq_wakeup(td, sc, sq, 0);
-  sc_release(sc);
 
-  sched_maybe_preempt();
   return true;
 }
 
 static bool _sleepq_abort(thread_t *td, int wakeup) {
-  sleepq_chain_t *sc = sc_acquire(td->td_wchan);
-  sleepq_t *sq = sq_lookup(sc, td->td_wchan);
+  assert(mtx_owned(td->td_lock));
 
-  if (sq == NULL) {
-    sc_release(sc);
+  if (!td_is_sleeping(td))
     return false;
-  }
 
-  /* Remove a thread from the sleep queue and resume it. */
-  bool aborted = sq_wakeup(td, sc, sq, wakeup);
-  sc_release(sc);
+  void *wchan = td->td_wchan;
+  sleepq_chain_t *sc = SC_LOOKUP(wchan);
+  assert(thread_lock_eq(td, &sc->sc_lock));
+  sleepq_t *sq = sq_lookup(sc, wchan);
+  assert(sq != NULL);
 
-  /* If we woke up higher priority thread, we should switch to it immediately.
-   * This is useful if `_sleepq_abort` gets called in thread context and
-   * preemption is enabled. */
-  sched_maybe_preempt();
-  return aborted;
+  return sq_wakeup(td, sc, sq, wakeup);
 }
 
 bool sleepq_abort(thread_t *td) {
@@ -280,16 +265,14 @@ void sleepq_wait(void *wchan, const void *waitpt) {
   if (waitpt == NULL)
     waitpt = __caller(0);
 
-  sleepq_chain_t *sc = sc_acquire(wchan);
+  sleepq_chain_t *sc = SC_LOOKUP(wchan);
+  assert(sc_owned(sc));
   mtx_lock(td->td_lock);
-  td->td_state = TDS_SLEEPING;
   sq_enter(td, sc, wchan, waitpt);
-
-  /* Panic if we were interrupted by timeout or signal. */
-  assert((td->td_flags & (TDF_SLPINTR | TDF_SLPTIMED)) == 0);
 }
 
 static void sq_timeout(thread_t *td) {
+  SCOPED_MTX_LOCK(td->td_lock);
   _sleepq_abort(td, ETIMEDOUT);
 }
 
@@ -299,13 +282,13 @@ int sleepq_wait_timed(void *wchan, const void *waitpt, systime_t timeout) {
   if (waitpt == NULL)
     waitpt = __caller(0);
 
-  sleepq_chain_t *sc = sc_acquire(wchan);
+  sleepq_chain_t *sc = SC_LOOKUP(wchan);
+  assert(sc_owned(sc));
   mtx_lock(td->td_lock);
 
   /* If there are pending signals, interrupt the sleep immediately. */
-  if ((td->td_flags & TDF_NEEDSIGCHK) && (timeout == 0)) {
+  if (td->td_flags & TDF_NEEDSIGCHK) {
     mtx_unlock(td->td_lock);
-    sc_release(sc);
     return EINTR;
   }
 
@@ -314,7 +297,8 @@ int sleepq_wait_timed(void *wchan, const void *waitpt, systime_t timeout) {
     callout_schedule(&td->td_slpcallout, timeout);
   }
 
-  td->td_flags |= (timeout > 0) ? TDF_SLPTIMED : TDF_SLPINTR;
+  td->td_flags |= TDF_SLPINTR;
+  td->td_flags |= (timeout > 0) ? TDF_SLPTIMED : 0;
   sq_enter(td, sc, wchan, waitpt);
 
   /* After wakeup, only one of the following flags may be set:
diff --git a/sys/kern/thread.c b/sys/kern/thread.c
index 492b228e72..eca4e9703c 100644
--- a/sys/kern/thread.c
+++ b/sys/kern/thread.c
@@ -84,8 +84,7 @@ thread_t *thread_create(const char *name, void (*fn)(void *), void *arg,
   td->td_prio = prio;
   td->td_base_prio = prio;
 
-  td->td_lock = kmalloc(M_TEMP, sizeof(mtx_t), M_ZERO);
-  mtx_init(td->td_lock, MTX_SPIN | MTX_NODEBUG);
+  sched_init(td);
 
   cv_init(&td->td_waitcv, "thread waiters");
   LIST_INIT(&td->td_contested);
@@ -221,3 +220,18 @@ void thread_continue(thread_t *td) {
     sched_wakeup(td);
   }
 }
+
+void thread_lock_set(thread_t *td, mtx_t *mtx) {
+  assert(mtx_owned(mtx));
+  mtx_t *old = td->td_lock;
+  assert(mtx_owned(old));
+  td->td_lock = mtx;
+  mtx_unlock(old);
+}
+
+mtx_t *thread_lock_block(thread_t *td) {
+  mtx_t *mtx = td->td_lock;
+  assert(mtx_owned(mtx));
+  td->td_lock = &blocked_lock;
+  return mtx;
+}
diff --git a/sys/kern/time.c b/sys/kern/time.c
index 7cb8a06fd8..2695639619 100644
--- a/sys/kern/time.c
+++ b/sys/kern/time.c
@@ -89,7 +89,8 @@ int do_clock_nanosleep(clockid_t clk, int flags, timespec_t *rqtp,
   }
 
   do {
-    error = sleepq_wait_timed((void *)(&rmt_start), __caller(0), timo);
+    sleepq_lock(&rmt_start);
+    error = sleepq_wait_timed(&rmt_start, __caller(0), timo);
     if (error == ETIMEDOUT)
       goto timedout;
 
diff --git a/sys/kern/turnstile.c b/sys/kern/turnstile.c
index f8b0bd46c4..93e34a0fa6 100644
--- a/sys/kern/turnstile.c
+++ b/sys/kern/turnstile.c
@@ -2,36 +2,45 @@
 #include <sys/pool.h>
 #include <sys/mutex.h>
 #include <sys/sched.h>
-#include <sys/turnstile.h>
 #include <sys/queue.h>
 
 #define TC_TABLESIZE 256 /* Must be power of 2. */
 #define TC_MASK (TC_TABLESIZE - 1)
 #define TC_SHIFT 8
-#define TC_HASH(wc)                                                            \
-  ((((uintptr_t)(wc) >> TC_SHIFT) ^ (uintptr_t)(wc)) & TC_MASK)
-#define TC_LOOKUP(wc) &turnstile_chains[TC_HASH(wc)]
+#define TC_HASH(m) ((((uintptr_t)(m) >> TC_SHIFT) ^ (uintptr_t)(m)) & TC_MASK)
+#define TC_LOOKUP(m) &turnstile_chains[TC_HASH(m)]
 
 typedef TAILQ_HEAD(td_queue, thread) td_queue_t;
 typedef LIST_HEAD(ts_list, turnstile) ts_list_t;
 
+typedef struct turnstile_chain {
+  mtx_t tc_lock;
+  ts_list_t tc_turnstiles;
+} turnstile_chain_t;
+
+static turnstile_chain_t turnstile_chains[TC_TABLESIZE];
+
+static bool tc_owned(turnstile_chain_t *tc) {
+  return mtx_owned(&tc->tc_lock);
+}
+
 /* Possible turnstile ts states:
  * - FREE_UNBLOCKED:
  *   > ts is owned by some unblocked thread td
  *   > td->td_turnstile is equal to ts
- *   > ts->ts_wchan is equal to NULL
+ *   > ts->ts_mtx is equal to NULL
  *
  * - FREE_BLOCKED:
  *   > ts formerly owned by a thread td
  *   > td is now blocked on mutex mtx
  *   > td was not the first one to block on mtx
- *   > ts->ts_wchan is equal to NULL
+ *   > ts->ts_mtx is equal to NULL
  *
  * - USED_BLOCKED:
  *   > ts formerly owned by a thread td
  *   > td is now blocked on mutex mtx
  *   > td was the first one to block on mtx
- *   > ts->ts_wchan is equal to &mtx
+ *   > ts->ts_mtx is equal to &mtx
  *   > other threads blocked on mtx are appended to ts->ts_blocked
  */
 typedef enum { FREE_UNBLOCKED, FREE_BLOCKED, USED_BLOCKED } ts_state_t;
@@ -46,26 +55,11 @@ typedef struct turnstile {
   ts_list_t ts_free;
   /* blocked threads sorted by decreasing active priority */
   td_queue_t ts_blocked;
-  void *ts_wchan;      /* waiting channel */
+  mtx_t *ts_mtx;       /* contested mutex */
   thread_t *ts_owner;  /* who owns the lock */
   ts_state_t ts_state; /* state of turnstile */
 } turnstile_t;
 
-typedef struct turnstile_chain {
-  mtx_t tc_lock;
-  ts_list_t tc_turnstiles;
-} turnstile_chain_t;
-
-static turnstile_chain_t turnstile_chains[TC_TABLESIZE];
-
-static void turnstile_ctor(turnstile_t *ts) {
-  LIST_INIT(&ts->ts_free);
-  TAILQ_INIT(&ts->ts_blocked);
-  ts->ts_wchan = NULL;
-  ts->ts_owner = NULL;
-  ts->ts_state = FREE_UNBLOCKED;
-}
-
 void init_turnstile(void) {
   for (int i = 0; i < TC_TABLESIZE; i++) {
     turnstile_chain_t *tc = &turnstile_chains[i];
@@ -74,20 +68,36 @@ void init_turnstile(void) {
   }
 }
 
+void turnstile_lock(mtx_t *mtx) {
+  turnstile_chain_t *tc = TC_LOOKUP(mtx);
+  mtx_lock(&tc->tc_lock);
+}
+
+void turnstile_unlock(mtx_t *mtx) {
+  turnstile_chain_t *tc = TC_LOOKUP(mtx);
+  mtx_unlock(&tc->tc_lock);
+}
+
 static POOL_DEFINE(P_TURNSTILE, "turnstile", sizeof(turnstile_t));
 
 turnstile_t *turnstile_alloc(void) {
-  turnstile_t *ts = pool_alloc(P_TURNSTILE, M_ZERO);
-  turnstile_ctor(ts);
+  turnstile_t *ts = pool_alloc(P_TURNSTILE, M_WAITOK | M_ZERO);
+  LIST_INIT(&ts->ts_free);
+  TAILQ_INIT(&ts->ts_blocked);
+  ts->ts_state = FREE_UNBLOCKED;
   return ts;
 }
 
 void turnstile_destroy(turnstile_t *ts) {
+  assert(LIST_EMPTY(&ts->ts_free));
+  assert(TAILQ_EMPTY(&ts->ts_blocked));
+  assert(ts->ts_mtx == NULL);
+  assert(ts->ts_owner == NULL);
+  assert(ts->ts_state == FREE_UNBLOCKED);
   pool_free(P_TURNSTILE, ts);
 }
 
 static void adjust_thread_forward(turnstile_t *ts, thread_t *td) {
-  assert(ts->ts_state == USED_BLOCKED);
   thread_t *next = td;
 
   while (TAILQ_NEXT(next, td_blockedq) != NULL &&
@@ -101,7 +111,6 @@ static void adjust_thread_forward(turnstile_t *ts, thread_t *td) {
 }
 
 static void adjust_thread_backward(turnstile_t *ts, thread_t *td) {
-  assert(ts->ts_state == USED_BLOCKED);
   thread_t *prev = td;
 
   while (TAILQ_PREV(prev, td_queue, td_blockedq) != NULL &&
@@ -117,9 +126,6 @@ static void adjust_thread_backward(turnstile_t *ts, thread_t *td) {
 /* Adjusts thread's position on ts_blocked queue after its priority
  * has been changed. */
 static void adjust_thread(turnstile_t *ts, thread_t *td, prio_t oldprio) {
-  assert(ts->ts_state == USED_BLOCKED);
-  assert(td_is_blocked(td));
-
   if (prio_gt(td->td_prio, oldprio))
     adjust_thread_backward(ts, td);
   else if (prio_lt(td->td_prio, oldprio))
@@ -127,11 +133,13 @@ static void adjust_thread(turnstile_t *ts, thread_t *td, prio_t oldprio) {
 }
 
 /* \note Acquires td_lock! */
-static thread_t *acquire_owner(turnstile_t *ts) {
+static thread_t *acquire_owner(turnstile_chain_t *tc, turnstile_t *ts,
+                               bool *dropp) {
   assert(ts->ts_state == USED_BLOCKED);
   thread_t *td = ts->ts_owner;
   assert(td != NULL); /* Turnstile must have an owner. */
-  mtx_lock(td->td_lock);
+  if ((*dropp = !thread_lock_eq(td, &tc->tc_lock)))
+    mtx_lock(td->td_lock);
   assert(!td_is_sleeping(td)); /* You must not sleep while holding a mutex. */
   return td;
 }
@@ -141,9 +149,11 @@ static thread_t *acquire_owner(turnstile_t *ts) {
  * td can run again. */
 static void propagate_priority(thread_t *td) {
   turnstile_t *ts = td->td_blocked;
+  turnstile_chain_t *tc = TC_LOOKUP(ts->ts_mtx);
   prio_t prio = td->td_prio;
+  bool drop = false;
 
-  td = acquire_owner(ts);
+  td = acquire_owner(tc, ts, &drop);
 
   /* Walk through blocked threads. */
   while (prio_lt(td->td_prio, prio) && !td_is_ready(td) && !td_is_running(td)) {
@@ -154,14 +164,15 @@ static void propagate_priority(thread_t *td) {
     sched_lend_prio(td, prio);
 
     ts = td->td_blocked;
-    assert(ts != NULL);
-    assert(ts->ts_state == USED_BLOCKED);
+    tc = TC_LOOKUP(ts->ts_mtx);
+    assert(thread_lock_eq(td, &tc->tc_lock));
 
     /* Resort td on the blocked list if needed. */
     adjust_thread(ts, td, oldprio);
-    mtx_unlock(td->td_lock);
+    if (drop)
+      mtx_unlock(td->td_lock);
 
-    td = acquire_owner(ts);
+    td = acquire_owner(tc, ts, &drop);
   }
 
   /* Possibly finish at a running/runnable thread. */
@@ -170,7 +181,8 @@ static void propagate_priority(thread_t *td) {
     assert(td->td_blocked == NULL);
   }
 
-  mtx_unlock(td->td_lock);
+  if (drop)
+    mtx_unlock(td->td_lock);
 }
 
 void turnstile_adjust(thread_t *td, prio_t oldprio) {
@@ -178,7 +190,8 @@ void turnstile_adjust(thread_t *td, prio_t oldprio) {
   assert(td_is_blocked(td));
 
   turnstile_t *ts = td->td_blocked;
-  assert(ts != NULL);
+  turnstile_chain_t *tc = TC_LOOKUP(ts->ts_mtx);
+  assert(thread_lock_eq(td, &tc->tc_lock));
   assert(ts->ts_state == USED_BLOCKED);
 
   adjust_thread(ts, td, oldprio);
@@ -189,46 +202,34 @@ void turnstile_adjust(thread_t *td, prio_t oldprio) {
     propagate_priority(td);
 }
 
-static void switch_away(turnstile_t *ts, const void *waitpt) {
-  assert(ts->ts_state == USED_BLOCKED);
-  thread_t *td = thread_self();
-
-  mtx_lock(td->td_lock);
-  td->td_turnstile = NULL;
-  td->td_blocked = ts;
-  td->td_wchan = ts->ts_wchan;
-  td->td_waitpt = waitpt;
-  td->td_state = TDS_BLOCKED;
-  propagate_priority(td);
-  sched_switch();
-}
-
 /* Give back turnstiles from ts_free to threads blocked on ts_blocked.
  *
  * As there are more threads on ts_blocked than turnstiles on ts_free (by one),
  * one thread instead of getting some turnstile from ts_free will get ts. */
 static void give_back_turnstiles(turnstile_t *ts) {
-  assert(ts != NULL);
-  assert(ts->ts_state == USED_BLOCKED);
-  assert(ts->ts_owner == thread_self());
+  turnstile_chain_t *tc = TC_LOOKUP(ts->ts_mtx);
 
   thread_t *td;
   TAILQ_FOREACH (td, &ts->ts_blocked, td_blockedq) {
+    assert(mtx_owned(td->td_lock));
+    assert(thread_lock_eq(td, &tc->tc_lock));
+    assert(td_is_blocked(td));
+
     turnstile_t *ts_for_td;
     if (LIST_EMPTY(&ts->ts_free)) {
       assert(TAILQ_NEXT(td, td_blockedq) == NULL);
       ts_for_td = ts;
 
       assert(ts_for_td->ts_state == USED_BLOCKED);
-      assert(ts_for_td->ts_wchan != NULL);
+      assert(ts_for_td->ts_mtx != NULL);
 
-      ts_for_td->ts_wchan = NULL;
+      ts_for_td->ts_mtx = NULL;
       LIST_REMOVE(ts_for_td, ts_chain_link);
     } else {
       ts_for_td = LIST_FIRST(&ts->ts_free);
 
       assert(ts_for_td->ts_state == FREE_BLOCKED);
-      assert(ts_for_td->ts_wchan == NULL);
+      assert(ts_for_td->ts_mtx == NULL);
 
       LIST_REMOVE(ts_for_td, ts_free_link);
     }
@@ -241,133 +242,127 @@ static void give_back_turnstiles(turnstile_t *ts) {
 /* Walks td_contested list of thread_self(), counts maximum priority of
  * threads locked on us, and calls sched_unlend_prio. */
 static void unlend_self(turnstile_t *ts) {
-  assert(ts != NULL);
-
   thread_t *td = thread_self();
-  assert(ts->ts_owner == td);
-
   prio_t prio = prio_uthread(255);
 
+  SCOPED_MTX_LOCK(td->td_lock);
+
   ts->ts_owner = NULL;
   LIST_REMOVE(ts, ts_contested_link);
 
   turnstile_t *ts_owned;
   LIST_FOREACH (ts_owned, &td->td_contested, ts_contested_link) {
+    /* XXX: how to protect the contested turnstiles? */
     assert(ts_owned->ts_owner == td);
     prio_t p = TAILQ_FIRST(&ts_owned->ts_blocked)->td_prio;
     if (prio_gt(p, prio))
       prio = p;
   }
 
-  WITH_MTX_LOCK (td->td_lock)
-    sched_unlend_prio(td, prio);
+  sched_unlend_prio(td, prio);
 }
 
 static void wakeup_blocked(td_queue_t *blocked_threads) {
   while (!TAILQ_EMPTY(blocked_threads)) {
     thread_t *td = TAILQ_FIRST(blocked_threads);
     TAILQ_REMOVE(blocked_threads, td, td_blockedq);
-
-    WITH_MTX_LOCK (td->td_lock) {
-      assert(td_is_blocked(td));
-      td->td_blocked = NULL;
-      td->td_wchan = NULL;
-      td->td_waitpt = NULL;
-      sched_wakeup(td);
-    }
+    td->td_blocked = NULL;
+    td->td_wchan = NULL;
+    td->td_waitpt = NULL;
+    sched_wakeup(td);
   }
 }
 
 /* Looks for turnstile associated with wchan in turnstile chains and returns
  * it or NULL if no turnstile is found in chains. */
-static __used turnstile_t *turnstile_lookup(void *wchan) {
-  turnstile_chain_t *tc = TC_LOOKUP(wchan);
+static turnstile_t *ts_lookup(mtx_t *mtx) {
+  turnstile_chain_t *tc = TC_LOOKUP(mtx);
+  assert(tc_owned(tc));
   turnstile_t *ts;
   LIST_FOREACH (ts, &tc->tc_turnstiles, ts_chain_link) {
     assert(ts->ts_state == USED_BLOCKED);
-    if (ts->ts_wchan == wchan)
+    if (ts->ts_mtx == mtx)
       return ts;
   }
   return NULL;
 }
 
-turnstile_t *turnstile_take(void *wchan) {
-  assert(preempt_disabled());
+void turnstile_wait(mtx_t *mtx, void *waitpt) {
+  turnstile_chain_t *tc = TC_LOOKUP(mtx);
+  assert(tc_owned(tc));
 
-  turnstile_t *ts = turnstile_lookup(wchan);
-
-  if (ts != NULL)
-    return ts;
-
-  thread_t *td = thread_self();
-  ts = td->td_turnstile;
-  assert(ts != NULL);
-  ts->ts_wchan = wchan;
-  return ts;
-}
-
-void turnstile_give(turnstile_t *ts) {
-  assert(preempt_disabled());
+  if (waitpt == NULL)
+    waitpt = __caller(0);
 
   thread_t *td = thread_self();
-  if (ts == td->td_turnstile)
-    ts->ts_wchan = NULL;
-}
+  turnstile_t *td_ts = td->td_turnstile;
 
-void turnstile_wait(turnstile_t *ts, thread_t *owner, const void *waitpt) {
-  assert(preempt_disabled());
-  assert(ts != NULL);
+  assert(td_ts != NULL);
+  assert(LIST_EMPTY(&td_ts->ts_free));
+  assert(TAILQ_EMPTY(&td_ts->ts_blocked));
+  assert(td_ts->ts_mtx == NULL);
+  assert(td_ts->ts_owner == NULL);
+  assert(td_ts->ts_state == FREE_UNBLOCKED);
 
-  thread_t *td = thread_self();
+  thread_t *owner = mtx_owner(mtx);
+  turnstile_t *ts = ts_lookup(mtx);
 
-  if (ts != td->td_turnstile) {
+  if (ts != NULL) {
     /* Hang off thread's turnstile from ts_free list as we're not the first
-     * thread to block on waiting channel. */
+     * thread to block on the mutex. */
+    assert(ts->ts_mtx == mtx);
+    assert(ts->ts_owner == owner);
     assert(ts->ts_state == USED_BLOCKED);
 
     TAILQ_INSERT_HEAD(&ts->ts_blocked, td, td_blockedq);
     adjust_thread_forward(ts, td);
 
-    turnstile_t *nts = td->td_turnstile;
-    td->td_turnstile = NULL;
-
-    assert(owner == ts->ts_owner);
-    assert(nts != NULL);
-    assert(nts->ts_state == FREE_UNBLOCKED);
-
-    nts->ts_state = FREE_BLOCKED;
-    LIST_INSERT_HEAD(&ts->ts_free, nts, ts_free_link);
+    td_ts->ts_state = FREE_BLOCKED;
+    LIST_INSERT_HEAD(&ts->ts_free, td_ts, ts_free_link);
   } else {
     /* Provide thread's own turnstile to be used as head of list of threads
-     * blocked on given waiting channel. */
-    assert(TAILQ_EMPTY(&ts->ts_blocked));
-    assert(LIST_EMPTY(&ts->ts_free));
-    assert(ts->ts_owner == NULL);
-    assert(ts->ts_wchan != NULL);
-    assert(ts->ts_state == FREE_UNBLOCKED);
-
+     * blocked on given mutex. */
+    ts = td_ts;
+    ts->ts_mtx = mtx;
     ts->ts_owner = owner;
-
-    turnstile_chain_t *tc = TC_LOOKUP(ts->ts_wchan);
-    LIST_INSERT_HEAD(&owner->td_contested, ts, ts_contested_link);
-    LIST_INSERT_HEAD(&tc->tc_turnstiles, ts, ts_chain_link);
-    TAILQ_INSERT_TAIL(&ts->ts_blocked, td, td_blockedq);
-
     ts->ts_state = USED_BLOCKED;
+    TAILQ_INSERT_TAIL(&ts->ts_blocked, td, td_blockedq);
+    LIST_INSERT_HEAD(&tc->tc_turnstiles, ts, ts_chain_link);
+    LIST_INSERT_HEAD(&owner->td_contested, ts, ts_contested_link);
   }
 
-  switch_away(ts, waitpt);
+  mtx_lock(td->td_lock);
+  thread_lock_set(td, &tc->tc_lock);
+
+  klog("Thread %u blocks on %p at pc=%p", td->td_tid, mtx, waitpt);
+
+  assert(td->td_turnstile != NULL);
+  assert(td->td_blocked == NULL);
+  assert(td->td_wchan == NULL);
+  assert(td->td_waitpt == NULL);
+
+  td->td_turnstile = NULL;
+  td->td_blocked = ts;
+  td->td_wchan = mtx;
+  td->td_waitpt = waitpt;
+  td->td_state = TDS_BLOCKED;
+
+  propagate_priority(td);
+  sched_switch();
 }
 
-void turnstile_broadcast(void *wchan) {
-  assert(preempt_disabled());
+void turnstile_broadcast(mtx_t *mtx) {
+  turnstile_chain_t *tc = TC_LOOKUP(mtx);
+  assert(tc_owned(tc));
 
-  turnstile_t *ts = turnstile_lookup(wchan);
+  turnstile_t *ts = ts_lookup(mtx);
 
   assert(ts != NULL);
-  assert(ts->ts_state == USED_BLOCKED);
-  assert(ts->ts_owner == thread_self());
+  assert(!LIST_EMPTY(&ts->ts_free));
   assert(!TAILQ_EMPTY(&ts->ts_blocked));
+  assert(ts->ts_mtx == mtx);
+  assert(ts->ts_owner == thread_self());
+  assert(ts->ts_state == USED_BLOCKED);
 
   give_back_turnstiles(ts);
   unlend_self(ts);
diff --git a/sys/riscv/genassym.cf b/sys/riscv/genassym.cf
index 22094945ee..5f383779d6 100644
--- a/sys/riscv/genassym.cf
+++ b/sys/riscv/genassym.cf
@@ -4,6 +4,7 @@ include <riscv/mcontext.h>
 include <riscv/riscvreg.h>
 include <riscv/vm_param.h>
 
+define TD_LOCK offsetof(thread_t, td_lock)
 define TD_KCTX offsetof(thread_t, td_kctx)
 define TD_UCTX offsetof(thread_t, td_uctx)
 define TD_ONFAULT offsetof(thread_t, td_onfault)
diff --git a/sys/riscv/switch.S b/sys/riscv/switch.S
index 7d3e23f9b8..5d508f4085 100644
--- a/sys/riscv/switch.S
+++ b/sys/riscv/switch.S
@@ -93,7 +93,7 @@
 .endm
 
 /*
- * long ctx_switch(thread_t *from, thread_t *to)
+ * void ctx_switch(thread_t *from, thread_t *to, mtx_t *mtx)
  */
 ENTRY(ctx_switch)
 	/* `ctx_switch` must be called with interrupts disabled. */
@@ -146,18 +146,23 @@ set_ctxsaved:
 skip_fpu_save:
 #endif /* FPU */
 
-	/* Switch stack pointer to `to` thread. */
-	PTR_L	sp, TD_KCTX(a1)
-
 	/* Update `curthread` pointer to reference `to` thread. */
 	PTR_S	a1, PCPU_CURTHREAD(tp)
 
 	/* Switch address space if necessary. */
+	mv	s0, a0
+	mv	s1, a1
+	mv	s2, a2
 	mv	a0, a1
 	call	vm_map_switch
 
+	PTR_S	s2, TD_LOCK(s0)
+
+	/* Switch stack pointer to `to` thread. */
+	PTR_L	sp, TD_KCTX(s1)
+
 	/* Restore `to` thread context. */
-	load_ctx t0
+	load_ctx	t0
 
 	ret
 halt:
diff --git a/sys/tests/sleepq.c b/sys/tests/sleepq.c
index 331a619f48..4dec7f8dbd 100644
--- a/sys/tests/sleepq.c
+++ b/sys/tests/sleepq.c
@@ -9,15 +9,16 @@ static volatile int wakeups;
 
 static void test_thread(void *expected) {
   while (wakeups < (intptr_t)expected) {
-    WITH_NO_PREEMPTION {
-      wakeups++;
-    }
+    sleepq_lock(&test_thread);
+    wakeups++;
     sleepq_wait(&test_thread, NULL);
   }
 }
 
 static void wake_threads_up(void *arg) {
+  sleepq_lock(&test_thread);
   sleepq_broadcast(&test_thread);
+  sleepq_unlock(&test_thread);
 }
 
 static int test_sleepq_sync(void) {
diff --git a/sys/tests/sleepq_abort.c b/sys/tests/sleepq_abort.c
index b2f63e9259..20a282ea25 100644
--- a/sys/tests/sleepq_abort.c
+++ b/sys/tests/sleepq_abort.c
@@ -35,6 +35,7 @@ static volatile int interrupted;
  * when waiters can't.
  * Therefore there should be only one waiter active at once */
 static void waiter_routine(void *_arg) {
+  sleepq_lock(&some_val);
   int rsn = sleepq_wait_intr(&some_val, __caller(0));
 
   if (rsn == EINTR)
@@ -58,14 +59,19 @@ static void waker_routine(void *_arg) {
     rand_next++;
 
     if (wake) {
+      sleepq_lock(&some_val);
       bool succ = sleepq_signal(&some_val);
       assert(succ);
+      sleepq_unlock(&some_val);
       wakened++;
     } else {
       bool succ = false;
       while (!succ) {
+        thread_t *td = waiters[waiters_ord[next_abort]];
         assert(next_abort < T && waiters_ord[next_abort] < T);
-        succ = sleepq_abort(waiters[waiters_ord[next_abort]]);
+        WITH_MTX_LOCK (td->td_lock) {
+          succ = sleepq_abort(td);
+        }
         next_abort++;
       }
       aborted++;
@@ -105,7 +111,10 @@ static int test_sleepq_abort_mult(void) {
 }
 
 static void simple_waker_routine(void *_arg) {
-  sleepq_abort(waiters[0]);
+  thread_t *td = waiters[0];
+  WITH_MTX_LOCK (td->td_lock) {
+    sleepq_abort(waiters[0]);
+  }
 }
 
 /* waiter routine is shared with test_mult */
diff --git a/sys/tests/sleepq_timed.c b/sys/tests/sleepq_timed.c
index fc5816748b..71ed41be46 100644
--- a/sys/tests/sleepq_timed.c
+++ b/sys/tests/sleepq_timed.c
@@ -33,6 +33,7 @@ static thread_t *waiters[THREADS];
 static thread_t *waker;
 
 static void waiter_routine(void *_arg) {
+  sleepq_lock(&wchan);
   systime_t before_sleep = getsystime();
   int status = sleepq_wait_timed(&wchan, __caller(0), SLEEP_TICKS);
   systime_t after_sleep = getsystime();
@@ -49,12 +50,14 @@ static void waiter_routine(void *_arg) {
 }
 
 static void waker_routine(void *_arg) {
+  sleepq_lock(&wchan);
   /* try to wake up half of the threads before timeout */
   for (int i = 0; i < THREADS / 2; i++) {
     bool status = sleepq_signal(&wchan);
     if (status)
       signaled_sent++;
   }
+  sleepq_unlock(&wchan);
 }
 
 static int test_sleepq_timed(void) {
diff --git a/sys/tests/thread_stats.c b/sys/tests/thread_stats.c
index c7872ab643..b9575d22ca 100644
--- a/sys/tests/thread_stats.c
+++ b/sys/tests/thread_stats.c
@@ -46,10 +46,12 @@ static void thread_wake_function(void *arg) {
   bintime_add_frac(&end, test_time_frac);
   bintime_add(&end, &BINTIME(0.1));
   bintime_t now = binuptime();
+  sleepq_lock(arg);
   while (bintime_cmp(&now, &end, <)) {
     sleepq_broadcast(arg);
     now = binuptime();
   }
+  sleepq_unlock(arg);
 }
 
 static void thread_sleep_function(void *arg) {
@@ -58,6 +60,7 @@ static void thread_sleep_function(void *arg) {
   bintime_add_frac(&end, test_time_frac);
   bintime_t now = binuptime();
   while (bintime_cmp(&now, &end, <)) {
+    sleepq_lock(arg);
     sleepq_wait(arg, "Thread stats test sleepq");
     now = binuptime();
   }
diff --git a/sys/tests/turnstile_adjust.c b/sys/tests/turnstile_adjust.c
index 48e627e322..30ee88952d 100644
--- a/sys/tests/turnstile_adjust.c
+++ b/sys/tests/turnstile_adjust.c
@@ -30,8 +30,12 @@ static bool td_is_blocked_on_mtx(thread_t *td, mtx_t *m) {
 }
 
 static void routine(void *_arg) {
+  thread_t *td = thread_self();
+
   WITH_NO_PREEMPTION {
-    sleepq_signal(thread_self());
+    sleepq_lock(td);
+    sleepq_signal(td);
+    sleepq_unlock(td);
     mtx_lock(&ts_adj_mtx);
   }
 
@@ -81,10 +85,9 @@ static int test_turnstile_adjust(void) {
   mtx_lock(&ts_adj_mtx);
 
   for (int i = 0; i < T; i++) {
-    WITH_NO_PREEMPTION {
-      sched_add(threads[i]);
-      sleepq_wait(threads[i], "thread start");
-    }
+    sleepq_lock(threads[i]);
+    sched_add(threads[i]);
+    sleepq_wait(threads[i], "thread start");
   }
 
   for (int i = 0; i < T; i++)