X-Git-Url: https://git.lttng.org/?p=urcu.git;a=blobdiff_plain;f=urcu-call-rcu-impl.h;h=5dfdb9277b5a351f62d09ee7b534f94c0a4ca3b5;hp=13b24ff20b7c71c265c1fc5b0b3d17f1c28ce55c;hb=f89147e91cccf7bd625260104c87acc0bae2449e;hpb=4a6d73787de534cbd7d5fc7a6b60af64de66e7ea diff --git a/urcu-call-rcu-impl.h b/urcu-call-rcu-impl.h index 13b24ff..5dfdb92 100644 --- a/urcu-call-rcu-impl.h +++ b/urcu-call-rcu-impl.h @@ -21,6 +21,7 @@ */ #define _GNU_SOURCE +#define _LGPL_SOURCE #include #include #include @@ -35,7 +36,7 @@ #include #include "config.h" -#include "urcu/wfqueue.h" +#include "urcu/wfcqueue.h" #include "urcu-call-rcu.h" #include "urcu-pointer.h" #include "urcu/list.h" @@ -46,7 +47,15 @@ /* Data structure that identifies a call_rcu thread. */ struct call_rcu_data { - struct cds_wfq_queue cbs; + /* + * We do not align head on a different cache-line than tail + * mainly because call_rcu callback-invocation threads use + * batching ("splice") to get an entire list of callbacks, which + * effectively empties the queue, and requires to touch the tail + * anyway. + */ + struct cds_wfcq_tail cbs_tail; + struct cds_wfcq_head cbs_head; unsigned long flags; int32_t futex; unsigned long qlen; /* maintained for debugging. */ @@ -60,7 +69,7 @@ struct call_rcu_data { * Protected by call_rcu_mutex. */ -CDS_LIST_HEAD(call_rcu_data_list); +static CDS_LIST_HEAD(call_rcu_data_list); /* Link a thread using call_rcu() to its call_rcu thread. */ @@ -220,10 +229,7 @@ static void call_rcu_wake_up(struct call_rcu_data *crdp) static void *call_rcu_thread(void *arg) { unsigned long cbcount; - struct cds_wfq_node *cbs; - struct cds_wfq_node **cbs_tail; - struct call_rcu_data *crdp = (struct call_rcu_data *)arg; - struct rcu_head *rhp; + struct call_rcu_data *crdp = (struct call_rcu_data *) arg; int rt = !!(uatomic_read(&crdp->flags) & URCU_CALL_RCU_RT); int ret; @@ -243,35 +249,33 @@ static void *call_rcu_thread(void *arg) cmm_smp_mb(); } for (;;) { - if (&crdp->cbs.head != _CMM_LOAD_SHARED(crdp->cbs.tail)) { - while ((cbs = _CMM_LOAD_SHARED(crdp->cbs.head)) == NULL) - poll(NULL, 0, 1); - _CMM_STORE_SHARED(crdp->cbs.head, NULL); - cbs_tail = (struct cds_wfq_node **) - uatomic_xchg(&crdp->cbs.tail, &crdp->cbs.head); + struct cds_wfcq_head cbs_tmp_head; + struct cds_wfcq_tail cbs_tmp_tail; + struct cds_wfcq_node *cbs, *cbs_tmp_n; + + cds_wfcq_init(&cbs_tmp_head, &cbs_tmp_tail); + __cds_wfcq_splice_blocking(&cbs_tmp_head, &cbs_tmp_tail, + &crdp->cbs_head, &crdp->cbs_tail); + if (!cds_wfcq_empty(&cbs_tmp_head, &cbs_tmp_tail)) { synchronize_rcu(); cbcount = 0; - do { - while (cbs->next == NULL && - &cbs->next != cbs_tail) - poll(NULL, 0, 1); - if (cbs == &crdp->cbs.dummy) { - cbs = cbs->next; - continue; - } - rhp = (struct rcu_head *)cbs; - cbs = cbs->next; + __cds_wfcq_for_each_blocking_safe(&cbs_tmp_head, + &cbs_tmp_tail, cbs, cbs_tmp_n) { + struct rcu_head *rhp; + + rhp = caa_container_of(cbs, + struct rcu_head, next); rhp->func(rhp); cbcount++; - } while (cbs != NULL); + } uatomic_sub(&crdp->qlen, cbcount); } if (uatomic_read(&crdp->flags) & URCU_CALL_RCU_STOP) break; rcu_thread_offline(); if (!rt) { - if (&crdp->cbs.head - == _CMM_LOAD_SHARED(crdp->cbs.tail)) { + if (cds_wfcq_empty(&crdp->cbs_head, + &crdp->cbs_tail)) { call_rcu_wait(crdp); poll(NULL, 0, 10); uatomic_dec(&crdp->futex); @@ -317,7 +321,7 @@ static void call_rcu_data_init(struct call_rcu_data **crdpp, if (crdp == NULL) urcu_die(errno); memset(crdp, '\0', sizeof(*crdp)); - cds_wfq_init(&crdp->cbs); + cds_wfcq_init(&crdp->cbs_head, &crdp->cbs_tail); crdp->qlen = 0; crdp->futex = 0; crdp->flags = flags; @@ -590,12 +594,12 @@ void call_rcu(struct rcu_head *head, { struct call_rcu_data *crdp; - cds_wfq_node_init(&head->next); + cds_wfcq_node_init(&head->next); head->func = func; /* Holding rcu read-side lock across use of per-cpu crdp */ rcu_read_lock(); crdp = get_call_rcu_data(); - cds_wfq_enqueue(&crdp->cbs, &head->next); + cds_wfcq_enqueue(&crdp->cbs_head, &crdp->cbs_tail, &head->next); uatomic_inc(&crdp->qlen); wake_call_rcu_thread(crdp); rcu_read_unlock(); @@ -622,13 +626,13 @@ void call_rcu(struct rcu_head *head, * The caller must wait for a grace-period to pass between return from * set_cpu_call_rcu_data() and call to call_rcu_data_free() passing the * previous call rcu data as argument. + * + * Note: introducing __cds_wfcq_splice_blocking() in this function fixed + * a list corruption bug in the 0.7.x series. The equivalent fix + * appeared in 0.6.8 for the stable-0.6 branch. */ void call_rcu_data_free(struct call_rcu_data *crdp) { - struct cds_wfq_node *cbs; - struct cds_wfq_node **cbs_tail; - struct cds_wfq_node **cbs_endprev; - if (crdp == NULL || crdp == default_call_rcu_data) { return; } @@ -638,17 +642,12 @@ void call_rcu_data_free(struct call_rcu_data *crdp) while ((uatomic_read(&crdp->flags) & URCU_CALL_RCU_STOPPED) == 0) poll(NULL, 0, 1); } - if (&crdp->cbs.head != _CMM_LOAD_SHARED(crdp->cbs.tail)) { - while ((cbs = _CMM_LOAD_SHARED(crdp->cbs.head)) == NULL) - poll(NULL, 0, 1); - _CMM_STORE_SHARED(crdp->cbs.head, NULL); - cbs_tail = (struct cds_wfq_node **) - uatomic_xchg(&crdp->cbs.tail, &crdp->cbs.head); + if (!cds_wfcq_empty(&crdp->cbs_head, &crdp->cbs_tail)) { /* Create default call rcu data if need be */ (void) get_default_call_rcu_data(); - cbs_endprev = (struct cds_wfq_node **) - uatomic_xchg(&default_call_rcu_data, cbs_tail); - *cbs_endprev = cbs; + __cds_wfcq_splice_blocking(&default_call_rcu_data->cbs_head, + &default_call_rcu_data->cbs_tail, + &crdp->cbs_head, &crdp->cbs_tail); uatomic_add(&default_call_rcu_data->qlen, uatomic_read(&crdp->qlen)); wake_call_rcu_thread(default_call_rcu_data);