[urcu.git] / urcu-call-rcu.c

/*
 * urcu-call-rcu.c
 *
 * Userspace RCU library - batch memory reclamation with kernel API
 *
 * Copyright (c) 2010 Paul E. McKenney <paulmck@linux.vnet.ibm.com>
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this library; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 */

#include <stdio.h>
#include <pthread.h>
#include <signal.h>
#include <assert.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>
#include <poll.h>
#include <sys/time.h>
#include <syscall.h>
#include <unistd.h>

#include "config.h"
#include "urcu/wfqueue.h"
#include "urcu-call-rcu.h"
#include "urcu-pointer.h"
#include "urcu/list.h"

/* Data structure that identifies a call_rcu thread. */

struct call_rcu_data {
	struct cds_wfq_queue cbs;
	unsigned long flags;
	pthread_mutex_t mtx;
	pthread_cond_t cond;
	unsigned long qlen;
	pthread_t tid;
	struct cds_list_head list;
} __attribute__((aligned(CAA_CACHE_LINE_SIZE)));

/*
 * List of all call_rcu_data structures to keep valgrind happy.
 * Protected by call_rcu_mutex.
 */

CDS_LIST_HEAD(call_rcu_data_list);

/* Link a thread using call_rcu() to its call_rcu thread. */

static __thread struct call_rcu_data *thread_call_rcu_data;

/* Guard call_rcu thread creation. */

static pthread_mutex_t call_rcu_mutex = PTHREAD_MUTEX_INITIALIZER;

/* If a given thread does not have its own call_rcu thread, this is default. */

static struct call_rcu_data *default_call_rcu_data;

extern void synchronize_rcu(void);

/*
 * If the sched_getcpu() and sysconf(_SC_NPROCESSORS_CONF) calls are
 * available, then we can have call_rcu threads assigned to individual
 * CPUs rather than only to specific threads.
 */

#if defined(HAVE_SCHED_GETCPU) && defined(HAVE_SYSCONF)

/*
 * Pointer to array of pointers to per-CPU call_rcu_data structures
 * and # CPUs.
 */

static struct call_rcu_data **per_cpu_call_rcu_data;
static long maxcpus;

/* Allocate the array if it has not already been allocated. */

static void alloc_cpu_call_rcu_data(void)
{
	struct call_rcu_data **p;
	static int warned = 0;

	if (maxcpus != 0)
		return;
	maxcpus = sysconf(_SC_NPROCESSORS_CONF);
	if (maxcpus <= 0) {
		return;
	}
	p = malloc(maxcpus * sizeof(*per_cpu_call_rcu_data));
	if (p != NULL) {
		memset(p, '\0', maxcpus * sizeof(*per_cpu_call_rcu_data));
		per_cpu_call_rcu_data = p;
	} else {
		if (!warned) {
			fprintf(stderr, "[error] liburcu: unable to allocate per-CPU pointer array\n");
		}
		warned = 1;
	}
}

#else /* #if defined(HAVE_SCHED_GETCPU) && defined(HAVE_SYSCONF) */

static const struct call_rcu_data **per_cpu_call_rcu_data = NULL;
static const long maxcpus = -1;

static void alloc_cpu_call_rcu_data(void)
{
}

static int sched_getcpu(void)
{
	return -1;
}

#endif /* #else #if defined(HAVE_SCHED_GETCPU) && defined(HAVE_SYSCONF) */

/* Acquire the specified pthread mutex. */

static void call_rcu_lock(pthread_mutex_t *pmp)
{
	if (pthread_mutex_lock(pmp) != 0) {
		perror("pthread_mutex_lock");
		exit(-1);
	}
}

/* Release the specified pthread mutex. */

static void call_rcu_unlock(pthread_mutex_t *pmp)
{
	if (pthread_mutex_unlock(pmp) != 0) {
		perror("pthread_mutex_unlock");
		exit(-1);
	}
}

/* This is the code run by each call_rcu thread. */

static void *call_rcu_thread(void *arg)
{
	unsigned long cbcount;
	struct cds_wfq_node *cbs;
	struct cds_wfq_node **cbs_tail;
	struct call_rcu_data *crdp = (struct call_rcu_data *)arg;
	struct rcu_head *rhp;

	thread_call_rcu_data = crdp;
	for (;;) {
		if (&crdp->cbs.head != _CMM_LOAD_SHARED(crdp->cbs.tail)) {
			while ((cbs = _CMM_LOAD_SHARED(crdp->cbs.head)) == NULL)
				poll(NULL, 0, 1);
			_CMM_STORE_SHARED(crdp->cbs.head, NULL);
			cbs_tail = (struct cds_wfq_node **)
				uatomic_xchg(&crdp->cbs.tail, &crdp->cbs.head);
			synchronize_rcu();
			cbcount = 0;
			do {
				while (cbs->next == NULL &&
				       &cbs->next != cbs_tail)
				       	poll(NULL, 0, 1);
				if (cbs == &crdp->cbs.dummy) {
					cbs = cbs->next;
					continue;
				}
				rhp = (struct rcu_head *)cbs;
				cbs = cbs->next;
				rhp->func(rhp);
				cbcount++;
			} while (cbs != NULL);
			uatomic_sub(&crdp->qlen, cbcount);
		}
		if (crdp->flags & URCU_CALL_RCU_RT)
			poll(NULL, 0, 10);
		else {
			call_rcu_lock(&crdp->mtx);
			_CMM_STORE_SHARED(crdp->flags,
				     crdp->flags & ~URCU_CALL_RCU_RUNNING);
			if (&crdp->cbs.head ==
			    _CMM_LOAD_SHARED(crdp->cbs.tail) &&
			    pthread_cond_wait(&crdp->cond, &crdp->mtx) != 0) {
				perror("pthread_cond_wait");
				exit(-1);
			}
			_CMM_STORE_SHARED(crdp->flags,
				     crdp->flags | URCU_CALL_RCU_RUNNING);
			poll(NULL, 0, 10);
			call_rcu_unlock(&crdp->mtx);
		}
	}
	return NULL;  /* NOTREACHED */
}

/*
 * Create both a call_rcu thread and the corresponding call_rcu_data
 * structure, linking the structure in as specified.  Caller must hold
 * call_rcu_mutex.
 */

static void call_rcu_data_init(struct call_rcu_data **crdpp,
			       unsigned long flags)
{
	struct call_rcu_data *crdp;

	crdp = malloc(sizeof(*crdp));
	if (crdp == NULL) {
		fprintf(stderr, "Out of memory.\n");
		exit(-1);
	}
	memset(crdp, '\0', sizeof(*crdp));
	cds_wfq_init(&crdp->cbs);
	crdp->qlen = 0;
	if (pthread_mutex_init(&crdp->mtx, NULL) != 0) {
		perror("pthread_mutex_init");
		exit(-1);
	}
	if (pthread_cond_init(&crdp->cond, NULL) != 0) {
		perror("pthread_cond_init");
		exit(-1);
	}
	crdp->flags = flags | URCU_CALL_RCU_RUNNING;
	cds_list_add(&crdp->list, &call_rcu_data_list);
	cmm_smp_mb();  /* Structure initialized before pointer is planted. */
	*crdpp = crdp;
	if (pthread_create(&crdp->tid, NULL, call_rcu_thread, crdp) != 0) {
		perror("pthread_create");
		exit(-1);
	}
}

/*
 * Return a pointer to the call_rcu_data structure for the specified
 * CPU, returning NULL if there is none.  We cannot automatically
 * created it because the platform we are running on might not define
 * sched_getcpu().
 */

struct call_rcu_data *get_cpu_call_rcu_data(int cpu)
{
	static int warned = 0;

	if (per_cpu_call_rcu_data == NULL)
		return NULL;
	if (!warned && maxcpus > 0 && (cpu < 0 || maxcpus <= cpu)) {
		fprintf(stderr, "[error] liburcu: get CPU # out of range\n");
		warned = 1;
	}
	if (cpu < 0 || maxcpus <= cpu)
		return NULL;
	return per_cpu_call_rcu_data[cpu];
}

/*
 * Return the tid corresponding to the call_rcu thread whose
 * call_rcu_data structure is specified.
 */

pthread_t get_call_rcu_thread(struct call_rcu_data *crdp)
{
	return crdp->tid;
}

/*
 * Create a call_rcu_data structure (with thread) and return a pointer.
 */

static struct call_rcu_data *__create_call_rcu_data(unsigned long flags)
{
	struct call_rcu_data *crdp;

	call_rcu_data_init(&crdp, flags);
	return crdp;
}

struct call_rcu_data *create_call_rcu_data(unsigned long flags)
{
	struct call_rcu_data *crdp;

	call_rcu_lock(&call_rcu_mutex);
	crdp = __create_call_rcu_data(flags);
	call_rcu_unlock(&call_rcu_mutex);
	return crdp;
}

/*
 * Set the specified CPU to use the specified call_rcu_data structure.
 */

int set_cpu_call_rcu_data(int cpu, struct call_rcu_data *crdp)
{
	int warned = 0;

	call_rcu_lock(&call_rcu_mutex);
	if (cpu < 0 || maxcpus <= cpu) {
		if (!warned) {
			fprintf(stderr, "[error] liburcu: set CPU # out of range\n");
			warned = 1;
		}
		call_rcu_unlock(&call_rcu_mutex);
		errno = EINVAL;
		return -EINVAL;
	}
	alloc_cpu_call_rcu_data();
	call_rcu_unlock(&call_rcu_mutex);
	if (per_cpu_call_rcu_data == NULL) {
		errno = ENOMEM;
		return -ENOMEM;
	}
	per_cpu_call_rcu_data[cpu] = crdp;
	return 0;
}

/*
 * Return a pointer to the default call_rcu_data structure, creating
 * one if need be.  Because we never free call_rcu_data structures,
 * we don't need to be in an RCU read-side critical section.
 */

struct call_rcu_data *get_default_call_rcu_data(void)
{
	if (default_call_rcu_data != NULL)
		return rcu_dereference(default_call_rcu_data);
	call_rcu_lock(&call_rcu_mutex);
	if (default_call_rcu_data != NULL) {
		call_rcu_unlock(&call_rcu_mutex);
		return default_call_rcu_data;
	}
	call_rcu_data_init(&default_call_rcu_data, 0);
	call_rcu_unlock(&call_rcu_mutex);
	return default_call_rcu_data;
}

/*
 * Return the call_rcu_data structure that applies to the currently
 * running thread.  Any call_rcu_data structure assigned specifically
 * to this thread has first priority, followed by any call_rcu_data
 * structure assigned to the CPU on which the thread is running,
 * followed by the default call_rcu_data structure.  If there is not
 * yet a default call_rcu_data structure, one will be created.
 */
struct call_rcu_data *get_call_rcu_data(void)
{
	int curcpu;
	static int warned = 0;

	if (thread_call_rcu_data != NULL)
		return thread_call_rcu_data;
	if (maxcpus <= 0)
		return get_default_call_rcu_data();
	curcpu = sched_getcpu();
	if (!warned && (curcpu < 0 || maxcpus <= curcpu)) {
		fprintf(stderr, "[error] liburcu: gcrd CPU # out of range\n");
		warned = 1;
	}
	if (curcpu >= 0 && maxcpus > curcpu &&
	    per_cpu_call_rcu_data != NULL &&
	    per_cpu_call_rcu_data[curcpu] != NULL)
	    	return per_cpu_call_rcu_data[curcpu];
	return get_default_call_rcu_data();
}

/*
 * Return a pointer to this task's call_rcu_data if there is one.
 */

struct call_rcu_data *get_thread_call_rcu_data(void)
{
	return thread_call_rcu_data;
}

/*
 * Set this task's call_rcu_data structure as specified, regardless
 * of whether or not this task already had one.  (This allows switching
 * to and from real-time call_rcu threads, for example.)
 */

void set_thread_call_rcu_data(struct call_rcu_data *crdp)
{
	thread_call_rcu_data = crdp;
}

/*
 * Create a separate call_rcu thread for each CPU.  This does not
 * replace a pre-existing call_rcu thread -- use the set_cpu_call_rcu_data()
 * function if you want that behavior.
 */

int create_all_cpu_call_rcu_data(unsigned long flags)
{
	int i;
	struct call_rcu_data *crdp;
	int ret;

	call_rcu_lock(&call_rcu_mutex);
	alloc_cpu_call_rcu_data();
	call_rcu_unlock(&call_rcu_mutex);
	if (maxcpus <= 0) {
		errno = EINVAL;
		return -EINVAL;
	}
	if (per_cpu_call_rcu_data == NULL) {
		errno = ENOMEM;
		return -ENOMEM;
	}
	for (i = 0; i < maxcpus; i++) {
		call_rcu_lock(&call_rcu_mutex);
		if (get_cpu_call_rcu_data(i)) {
			call_rcu_unlock(&call_rcu_mutex);
			continue;
		}
		crdp = __create_call_rcu_data(flags);
		if (crdp == NULL) {
			call_rcu_unlock(&call_rcu_mutex);
			errno = ENOMEM;
			return -ENOMEM;
		}
		call_rcu_unlock(&call_rcu_mutex);
		if ((ret = set_cpu_call_rcu_data(i, crdp)) != 0) {
			/* FIXME: Leaks crdp for now. */
			return ret; /* Can happen on race. */
		}
	}
	return 0;
}

/*
 * Schedule a function to be invoked after a following grace period.
 * This is the only function that must be called -- the others are
 * only present to allow applications to tune their use of RCU for
 * maximum performance.
 *
 * Note that unless a call_rcu thread has not already been created,
 * the first invocation of call_rcu() will create one.  So, if you
 * need the first invocation of call_rcu() to be fast, make sure
 * to create a call_rcu thread first.  One way to accomplish this is
 * "get_call_rcu_data();", and another is create_all_cpu_call_rcu_data().
 */

void call_rcu(struct rcu_head *head,
	      void (*func)(struct rcu_head *head))
{
	struct call_rcu_data *crdp;

	cds_wfq_node_init(&head->next);
	head->func = func;
	crdp = get_call_rcu_data();
	cds_wfq_enqueue(&crdp->cbs, &head->next);
	uatomic_inc(&crdp->qlen);
	if (!(_CMM_LOAD_SHARED(crdp->flags) & URCU_CALL_RCU_RT)) {
		call_rcu_lock(&crdp->mtx);
		if (!(_CMM_LOAD_SHARED(crdp->flags) & URCU_CALL_RCU_RUNNING)) {
			if (pthread_cond_signal(&crdp->cond) != 0) {
				perror("pthread_cond_signal");
				exit(-1);
			}
		}
		call_rcu_unlock(&crdp->mtx);
	}
}
Commit	Line	Data
	1	/*
	2	* urcu-call-rcu.c
	3	*
	4	* Userspace RCU library - batch memory reclamation with kernel API
	5	*
	6	* Copyright (c) 2010 Paul E. McKenney <paulmck@linux.vnet.ibm.com>
	7	*
	8	* This library is free software; you can redistribute it and/or
	9	* modify it under the terms of the GNU Lesser General Public
	10	* License as published by the Free Software Foundation; either
	11	* version 2.1 of the License, or (at your option) any later version.
	12	*
	13	* This library is distributed in the hope that it will be useful,
	14	* but WITHOUT ANY WARRANTY; without even the implied warranty of
	15	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
	16	* Lesser General Public License for more details.
	17	*
	18	* You should have received a copy of the GNU Lesser General Public
	19	* License along with this library; if not, write to the Free Software
	20	* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
	21	*/
	22
	23	#include <stdio.h>
	24	#include <pthread.h>
	25	#include <signal.h>
	26	#include <assert.h>
	27	#include <stdlib.h>
	28	#include <string.h>
	29	#include <errno.h>
	30	#include <poll.h>
	31	#include <sys/time.h>
	32	#include <syscall.h>
	33	#include <unistd.h>
	34
	35	#include "config.h"
	36	#include "urcu/wfqueue.h"
	37	#include "urcu-call-rcu.h"
	38	#include "urcu-pointer.h"
	39	#include "urcu/list.h"
	40
	41	/* Data structure that identifies a call_rcu thread. */
	42
	43	struct call_rcu_data {
	44	struct cds_wfq_queue cbs;
	45	unsigned long flags;
	46	pthread_mutex_t mtx;
	47	pthread_cond_t cond;
	48	unsigned long qlen;
	49	pthread_t tid;
	50	struct cds_list_head list;
	51	} __attribute__((aligned(CAA_CACHE_LINE_SIZE)));
	52
	53	/*
	54	* List of all call_rcu_data structures to keep valgrind happy.
	55	* Protected by call_rcu_mutex.
	56	*/
	57
	58	CDS_LIST_HEAD(call_rcu_data_list);
	59
	60	/* Link a thread using call_rcu() to its call_rcu thread. */
	61
	62	static __thread struct call_rcu_data *thread_call_rcu_data;
	63
	64	/* Guard call_rcu thread creation. */
	65
	66	static pthread_mutex_t call_rcu_mutex = PTHREAD_MUTEX_INITIALIZER;
	67
	68	/* If a given thread does not have its own call_rcu thread, this is default. */
	69
	70	static struct call_rcu_data *default_call_rcu_data;
	71
	72	extern void synchronize_rcu(void);
	73
	74	/*
	75	* If the sched_getcpu() and sysconf(_SC_NPROCESSORS_CONF) calls are
	76	* available, then we can have call_rcu threads assigned to individual
	77	* CPUs rather than only to specific threads.
	78	*/
	79
	80	#if defined(HAVE_SCHED_GETCPU) && defined(HAVE_SYSCONF)
	81
	82	/*
	83	* Pointer to array of pointers to per-CPU call_rcu_data structures
	84	* and # CPUs.
	85	*/
	86
	87	static struct call_rcu_data **per_cpu_call_rcu_data;
	88	static long maxcpus;
	89
	90	/* Allocate the array if it has not already been allocated. */
	91
	92	static void alloc_cpu_call_rcu_data(void)
	93	{
	94	struct call_rcu_data **p;
	95	static int warned = 0;
	96
	97	if (maxcpus != 0)
	98	return;
	99	maxcpus = sysconf(_SC_NPROCESSORS_CONF);
	100	if (maxcpus <= 0) {
	101	return;
	102	}
	103	p = malloc(maxcpus * sizeof(*per_cpu_call_rcu_data));
	104	if (p != NULL) {
	105	memset(p, '\0', maxcpus * sizeof(*per_cpu_call_rcu_data));
	106	per_cpu_call_rcu_data = p;
	107	} else {
	108	if (!warned) {
	109	fprintf(stderr, "[error] liburcu: unable to allocate per-CPU pointer array\n");
	110	}
	111	warned = 1;
	112	}
	113	}
	114
	115	#else /* #if defined(HAVE_SCHED_GETCPU) && defined(HAVE_SYSCONF) */
	116
	117	static const struct call_rcu_data **per_cpu_call_rcu_data = NULL;
	118	static const long maxcpus = -1;
	119
	120	static void alloc_cpu_call_rcu_data(void)
	121	{
	122	}
	123
	124	static int sched_getcpu(void)
	125	{
	126	return -1;
	127	}
	128
	129	#endif /* #else #if defined(HAVE_SCHED_GETCPU) && defined(HAVE_SYSCONF) */
	130
	131	/* Acquire the specified pthread mutex. */
	132
	133	static void call_rcu_lock(pthread_mutex_t *pmp)
	134	{
	135	if (pthread_mutex_lock(pmp) != 0) {
	136	perror("pthread_mutex_lock");
	137	exit(-1);
	138	}
	139	}
	140
	141	/* Release the specified pthread mutex. */
	142
	143	static void call_rcu_unlock(pthread_mutex_t *pmp)
	144	{
	145	if (pthread_mutex_unlock(pmp) != 0) {
	146	perror("pthread_mutex_unlock");
	147	exit(-1);
	148	}
	149	}
	150
	151	/* This is the code run by each call_rcu thread. */
	152
	153	static void call_rcu_thread(void arg)
	154	{
	155	unsigned long cbcount;
	156	struct cds_wfq_node *cbs;
	157	struct cds_wfq_node **cbs_tail;
	158	struct call_rcu_data crdp = (struct call_rcu_data )arg;
	159	struct rcu_head *rhp;
	160
	161	thread_call_rcu_data = crdp;
	162	for (;;) {
	163	if (&crdp->cbs.head != _CMM_LOAD_SHARED(crdp->cbs.tail)) {
	164	while ((cbs = _CMM_LOAD_SHARED(crdp->cbs.head)) == NULL)
	165	poll(NULL, 0, 1);
	166	_CMM_STORE_SHARED(crdp->cbs.head, NULL);
	167	cbs_tail = (struct cds_wfq_node **)
	168	uatomic_xchg(&crdp->cbs.tail, &crdp->cbs.head);
	169	synchronize_rcu();
	170	cbcount = 0;
	171	do {
	172	while (cbs->next == NULL &&
	173	&cbs->next != cbs_tail)
	174	poll(NULL, 0, 1);
	175	if (cbs == &crdp->cbs.dummy) {
	176	cbs = cbs->next;
	177	continue;
	178	}
	179	rhp = (struct rcu_head *)cbs;
	180	cbs = cbs->next;
	181	rhp->func(rhp);
	182	cbcount++;
	183	} while (cbs != NULL);
	184	uatomic_sub(&crdp->qlen, cbcount);
	185	}
	186	if (crdp->flags & URCU_CALL_RCU_RT)
	187	poll(NULL, 0, 10);
	188	else {
	189	call_rcu_lock(&crdp->mtx);
	190	_CMM_STORE_SHARED(crdp->flags,
	191	crdp->flags & ~URCU_CALL_RCU_RUNNING);
	192	if (&crdp->cbs.head ==
	193	_CMM_LOAD_SHARED(crdp->cbs.tail) &&
	194	pthread_cond_wait(&crdp->cond, &crdp->mtx) != 0) {
	195	perror("pthread_cond_wait");
	196	exit(-1);
	197	}
	198	_CMM_STORE_SHARED(crdp->flags,
	199	crdp->flags \| URCU_CALL_RCU_RUNNING);
	200	poll(NULL, 0, 10);
	201	call_rcu_unlock(&crdp->mtx);
	202	}
	203	}
	204	return NULL; /* NOTREACHED */
	205	}
	206
	207	/*
	208	* Create both a call_rcu thread and the corresponding call_rcu_data
	209	* structure, linking the structure in as specified. Caller must hold
	210	* call_rcu_mutex.
	211	*/
	212
	213	static void call_rcu_data_init(struct call_rcu_data **crdpp,
	214	unsigned long flags)
	215	{
	216	struct call_rcu_data *crdp;
	217
	218	crdp = malloc(sizeof(*crdp));
	219	if (crdp == NULL) {
	220	fprintf(stderr, "Out of memory.\n");
	221	exit(-1);
	222	}
	223	memset(crdp, '\0', sizeof(*crdp));
	224	cds_wfq_init(&crdp->cbs);
	225	crdp->qlen = 0;
	226	if (pthread_mutex_init(&crdp->mtx, NULL) != 0) {
	227	perror("pthread_mutex_init");
	228	exit(-1);
	229	}
	230	if (pthread_cond_init(&crdp->cond, NULL) != 0) {
	231	perror("pthread_cond_init");
	232	exit(-1);
	233	}
	234	crdp->flags = flags \| URCU_CALL_RCU_RUNNING;
	235	cds_list_add(&crdp->list, &call_rcu_data_list);
	236	cmm_smp_mb(); /* Structure initialized before pointer is planted. */
	237	*crdpp = crdp;
	238	if (pthread_create(&crdp->tid, NULL, call_rcu_thread, crdp) != 0) {
	239	perror("pthread_create");
	240	exit(-1);
	241	}
	242	}
	243
	244	/*
	245	* Return a pointer to the call_rcu_data structure for the specified
	246	* CPU, returning NULL if there is none. We cannot automatically
	247	* created it because the platform we are running on might not define
	248	* sched_getcpu().
	249	*/
	250
	251	struct call_rcu_data *get_cpu_call_rcu_data(int cpu)
	252	{
	253	static int warned = 0;
	254
	255	if (per_cpu_call_rcu_data == NULL)
	256	return NULL;
	257	if (!warned && maxcpus > 0 && (cpu < 0 \|\| maxcpus <= cpu)) {
	258	fprintf(stderr, "[error] liburcu: get CPU # out of range\n");
	259	warned = 1;
	260	}
	261	if (cpu < 0 \|\| maxcpus <= cpu)
	262	return NULL;
	263	return per_cpu_call_rcu_data[cpu];
	264	}
	265
	266	/*
	267	* Return the tid corresponding to the call_rcu thread whose
	268	* call_rcu_data structure is specified.
	269	*/
	270
	271	pthread_t get_call_rcu_thread(struct call_rcu_data *crdp)
	272	{
	273	return crdp->tid;
	274	}
	275
	276	/*
	277	* Create a call_rcu_data structure (with thread) and return a pointer.
	278	*/
	279
	280	static struct call_rcu_data *__create_call_rcu_data(unsigned long flags)
	281	{
	282	struct call_rcu_data *crdp;
	283
	284	call_rcu_data_init(&crdp, flags);
	285	return crdp;
	286	}
	287
	288	struct call_rcu_data *create_call_rcu_data(unsigned long flags)
	289	{
	290	struct call_rcu_data *crdp;
	291
	292	call_rcu_lock(&call_rcu_mutex);
	293	crdp = __create_call_rcu_data(flags);
	294	call_rcu_unlock(&call_rcu_mutex);
	295	return crdp;
	296	}
	297
	298	/*
	299	* Set the specified CPU to use the specified call_rcu_data structure.
	300	*/
	301
	302	int set_cpu_call_rcu_data(int cpu, struct call_rcu_data *crdp)
	303	{
	304	int warned = 0;
	305
	306	call_rcu_lock(&call_rcu_mutex);
	307	if (cpu < 0 \|\| maxcpus <= cpu) {
	308	if (!warned) {
	309	fprintf(stderr, "[error] liburcu: set CPU # out of range\n");
	310	warned = 1;
	311	}
	312	call_rcu_unlock(&call_rcu_mutex);
	313	errno = EINVAL;
	314	return -EINVAL;
	315	}
	316	alloc_cpu_call_rcu_data();
	317	call_rcu_unlock(&call_rcu_mutex);
	318	if (per_cpu_call_rcu_data == NULL) {
	319	errno = ENOMEM;
	320	return -ENOMEM;
	321	}
	322	per_cpu_call_rcu_data[cpu] = crdp;
	323	return 0;
	324	}
	325
	326	/*
	327	* Return a pointer to the default call_rcu_data structure, creating
	328	* one if need be. Because we never free call_rcu_data structures,
	329	* we don't need to be in an RCU read-side critical section.
	330	*/
	331
	332	struct call_rcu_data *get_default_call_rcu_data(void)
	333	{
	334	if (default_call_rcu_data != NULL)
	335	return rcu_dereference(default_call_rcu_data);
	336	call_rcu_lock(&call_rcu_mutex);
	337	if (default_call_rcu_data != NULL) {
	338	call_rcu_unlock(&call_rcu_mutex);
	339	return default_call_rcu_data;
	340	}
	341	call_rcu_data_init(&default_call_rcu_data, 0);
	342	call_rcu_unlock(&call_rcu_mutex);
	343	return default_call_rcu_data;
	344	}
	345
	346	/*
	347	* Return the call_rcu_data structure that applies to the currently
	348	* running thread. Any call_rcu_data structure assigned specifically
	349	* to this thread has first priority, followed by any call_rcu_data
	350	* structure assigned to the CPU on which the thread is running,
	351	* followed by the default call_rcu_data structure. If there is not
	352	* yet a default call_rcu_data structure, one will be created.
	353	*/
	354	struct call_rcu_data *get_call_rcu_data(void)
	355	{
	356	int curcpu;
	357	static int warned = 0;
	358
	359	if (thread_call_rcu_data != NULL)
	360	return thread_call_rcu_data;
	361	if (maxcpus <= 0)
	362	return get_default_call_rcu_data();
	363	curcpu = sched_getcpu();
	364	if (!warned && (curcpu < 0 \|\| maxcpus <= curcpu)) {
	365	fprintf(stderr, "[error] liburcu: gcrd CPU # out of range\n");
	366	warned = 1;
	367	}
	368	if (curcpu >= 0 && maxcpus > curcpu &&
	369	per_cpu_call_rcu_data != NULL &&
	370	per_cpu_call_rcu_data[curcpu] != NULL)
	371	return per_cpu_call_rcu_data[curcpu];
	372	return get_default_call_rcu_data();
	373	}
	374
	375	/*
	376	* Return a pointer to this task's call_rcu_data if there is one.
	377	*/
	378
	379	struct call_rcu_data *get_thread_call_rcu_data(void)
	380	{
	381	return thread_call_rcu_data;
	382	}
	383
	384	/*
	385	* Set this task's call_rcu_data structure as specified, regardless
	386	* of whether or not this task already had one. (This allows switching
	387	* to and from real-time call_rcu threads, for example.)
	388	*/
	389
	390	void set_thread_call_rcu_data(struct call_rcu_data *crdp)
	391	{
	392	thread_call_rcu_data = crdp;
	393	}
	394
	395	/*
	396	* Create a separate call_rcu thread for each CPU. This does not
	397	* replace a pre-existing call_rcu thread -- use the set_cpu_call_rcu_data()
	398	* function if you want that behavior.
	399	*/
	400
	401	int create_all_cpu_call_rcu_data(unsigned long flags)
	402	{
	403	int i;
	404	struct call_rcu_data *crdp;
	405	int ret;
	406
	407	call_rcu_lock(&call_rcu_mutex);
	408	alloc_cpu_call_rcu_data();
	409	call_rcu_unlock(&call_rcu_mutex);
	410	if (maxcpus <= 0) {
	411	errno = EINVAL;
	412	return -EINVAL;
	413	}
	414	if (per_cpu_call_rcu_data == NULL) {
	415	errno = ENOMEM;
	416	return -ENOMEM;
	417	}
	418	for (i = 0; i < maxcpus; i++) {
	419	call_rcu_lock(&call_rcu_mutex);
	420	if (get_cpu_call_rcu_data(i)) {
	421	call_rcu_unlock(&call_rcu_mutex);
	422	continue;
	423	}
	424	crdp = __create_call_rcu_data(flags);
	425	if (crdp == NULL) {
	426	call_rcu_unlock(&call_rcu_mutex);
	427	errno = ENOMEM;
	428	return -ENOMEM;
	429	}
	430	call_rcu_unlock(&call_rcu_mutex);
	431	if ((ret = set_cpu_call_rcu_data(i, crdp)) != 0) {
	432	/* FIXME: Leaks crdp for now. */
	433	return ret; /* Can happen on race. */
	434	}
	435	}
	436	return 0;
	437	}
	438
	439	/*
	440	* Schedule a function to be invoked after a following grace period.
	441	* This is the only function that must be called -- the others are
	442	* only present to allow applications to tune their use of RCU for
	443	* maximum performance.
	444	*
	445	* Note that unless a call_rcu thread has not already been created,
	446	* the first invocation of call_rcu() will create one. So, if you
	447	* need the first invocation of call_rcu() to be fast, make sure
	448	* to create a call_rcu thread first. One way to accomplish this is
	449	* "get_call_rcu_data();", and another is create_all_cpu_call_rcu_data().
	450	*/
	451
	452	void call_rcu(struct rcu_head *head,
	453	void (func)(struct rcu_head head))
	454	{
	455	struct call_rcu_data *crdp;
	456
	457	cds_wfq_node_init(&head->next);
	458	head->func = func;
	459	crdp = get_call_rcu_data();
	460	cds_wfq_enqueue(&crdp->cbs, &head->next);
	461	uatomic_inc(&crdp->qlen);
	462	if (!(_CMM_LOAD_SHARED(crdp->flags) & URCU_CALL_RCU_RT)) {
	463	call_rcu_lock(&crdp->mtx);
	464	if (!(_CMM_LOAD_SHARED(crdp->flags) & URCU_CALL_RCU_RUNNING)) {
	465	if (pthread_cond_signal(&crdp->cond) != 0) {
	466	perror("pthread_cond_signal");
	467	exit(-1);
	468	}
	469	}
	470	call_rcu_unlock(&crdp->mtx);
	471	}
	472	}