Had an idea for implementing thread syncronisation without any system calls (bar 2 for thread id and yielding), also posted on reddit:

Reddit - Dive into anything

Code:
/* This is for a train of thought hence the unprefixed names, I *might*
 * switch to an actual implementation of this is I can fully flesh out
 * the thought safely, DO NOT INCLUDE THIS! I will add the include if I AND
 * many others find it to be without fault, obviously with the names edited
 * to match the paw namespace */

typedef volatile struct _exesyn_ exesyn;
typedef volatile struct _trysyn_ trysyn;
struct _exesyn_
{
	intmax_t tid;
	trysyn	*syn;
	bool	lock;
	uint	locks;
	exesyn	*nxtsib;
};
struct _trysyn_
{
	void*	ud;
	PAWCLS 	cls;
	exesyn *lock, *link, *head;
};

PAW_QCK exesyn* exesyn_new( trysyn *syn )
{
	exesyn *exe = pawnew(sizeof(exesyn));
	if ( !exe )
		return exe;
	exe->tid = pawtid();
	exe->nxtsib = exe;
	exe->syn = syn;
	return exe;
}

PAW_API void	trysyn_linknxt( exesyn *exe )
{
	trysyn *syn = exe->syn;
	pawexe *nxt = exe->nxtsib, *prv = exe->prvsib;
	if ( nxt != exe && nxt->drop )
	{
		/* Start with the members with non-critical side effects */
		if ( syn->head == nxt )
			syn->head = exe;
		exe->nxtsib = nxt->nxtsib;
		/* NOW change the member with critical side effects */
		nxt->nxtsib = nxt;
	}
	if ( syn->lock != exe )
	{
		/* This function is expected to be called outside any time critical
		 * actions so we'll use it as an oppotunity to ensure the system has a
		 * chance to switch to other threads if sharing the CPU core with them */
		pawyield();
		return;
	}
	nxt = syn->link;
	if ( nxt && exe->nxtsib == exe && syn->cls )
	{
		exe->nxtsib = nxt;
		syn->link = NULL;
	}
	if ( exe->locks )
	{
		pawyield();
		return;
	}
	for ( nxt = exe->nxtsib; nxt != exe; nxt = nxt->nxtsib )
	{
		/* Make sure we don't try to loop from a detatched thread */
		if ( nxt->nxtsib == nxt )
			nxt = syn->head;
		pawyield();
		if ( nxt->lock )
		{
			syn->lock = nxt;
			return;
		}
	}
	/* Make sure we yielded at least once */
	pawyield();
}
PAW_API void	trysyn_free( exesyn *exe )
{
	trysyn *syn = exe->syn;
	if ( syn->lock != exe )
		return;
	exe->locks -= (exe->locks > 0);
	trysyn_link(exe);
}

/* Attempt to lock the data */
PAW_API void*	trysyn_lock( exesyn *exe )
{
	trysyn *syn = exe->syn;
	if ( syn->lock == exe )
	{
		pawu locks = exe->locks + 1;
		if ( !locks )
		{
			/* Deliberate segfault to identify poorly designed threads */
			locks = *((pawu*)0);
			return NULL;
		}
		exe->locks++;
		return syn->ud;
	}
	exe->lock = 1;
	if ( exe->prvsib == exe )
	{
		syn->link = exe;
		pawyield();
		if ( exe->prvsib == exe )
			return NULL;
		if ( syn->lock == exe )
		{
			exe->lock = 0;
			return syn->ud;
		}
	}
	if ( syn->head != exe || exe->prvsib != exe || exe->nxtsib != exe )
		pawyield();
	if ( syn->lock == exe )
	{
		exe->lock = 0;
		return syn->ud;
	}
	return NULL;
}

/* Wait indefinitely until locked the data */
PAW_API void* trysyn_wait( exesyn *exe )
{
	void *ud = NULL;
	do ud = trysyn_lock(exe); while ( !ud && exe->syn->cls );
	return ud;
}

/* Try upto X times to get the lock */
PAW_API void* trysyn_spin( exesyn *exe, pawu max_tries )
{
	void *ud = NULL;
	while ( !ud && exe->syn->cls && max_tries-- )
		ud = trysyn_lock(exe);
	return NULL;
}

/* Limit by seconds, milliseconds, nanoseconds etc */
PAW_API void* trysyn_time( exesyn *exe, pawage timeoutlen )
{
	void *ud = NULL;
	pawage cutoff = pawage_add(pawage_qry_mono(),timeoutlen), now = PAWAGE_NIL;
	do { ud = trysyn_lock(exe); now = pawage_qry_mono(); }
	while ( !ud && exe->syn->cls && pawage_cmp( now, cutoff ) < 0 );
	return ud;
}

PAW_API void exesyn_del( exesyn *exe )
{
	trysyn *syn = exe->syn;
	exesyn *nxt = exe->nxtsib, *prv = exe->prvsib;
	/* Ensure any threads waiting to link and take the lock get linked in 1st
	 * so that the data is not accidently deleted before it's supposed to */
	exe->drop = 1;
	exe->locks = 0;
	while ( nxt->nxtsib != nxt || syn->lock == exe )
		trysyn_linknxt(exe);
	pawdel( exe, sizeof(exesyn) );
}

PAW_API void trysyn_term( trysyn *syn, exesyn *exe )
{
	PAWCLS cls = syn->cls;
	if ( exe->syn != syn )
		return;
	if ( syn->head != exe )
	{
		exesyn_del(exe);
		return;
	}
	exe->locks = 0;
	syn->cls = NULL;
	while ( exe->prvsib != exe || exe->nxtsib != exe || syn->lock != exe )
		trysyn_linknxt(exe);
	cls->cb( syn->ud );
	syn->ud = NULL;
	syn->head = NULL;
	syn->lock = NULL;
	syn->link = NULL;
	exesyn_del(exe);
}

PAW_API exesyn* trysyn_init( trysyn *syn, void *ud, PAWCLS cls )
{
	exesyn *exe = NULL;
	pawb alreadyInitiliased = (syn->ud || syn->cls || syn->head || syn->lock);
	if ( alreadyInitiliased || !ud || !cls || !(exe = exesyn_new(syn)) )
		return NULL;
	syn->head = exe;
	syn->lock = exe;
	syn->cls = cls;
	syn->ud = ud;
	return exe;
}