Add kmalloc failover to vmalloc

author Michael Jeanson <mjeanson@efficios.com>

Mon, 25 Sep 2017 14:56:20 +0000 (10:56 -0400)

committer Mathieu Desnoyers <mathieu.desnoyers@efficios.com>

Tue, 26 Sep 2017 14:59:41 +0000 (10:59 -0400)
author Michael Jeanson <mjeanson@efficios.com>
Mon, 25 Sep 2017 14:56:20 +0000 (10:56 -0400)
committer Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Tue, 26 Sep 2017 14:59:41 +0000 (10:59 -0400)
diff --git a/lib/prio_heap/lttng_prio_heap.c b/lib/prio_heap/lttng_prio_heap.c

index 6db7f524008c332dfa426f5dbf7c8a5d4b9361b7..01ed69f2cac1a52b62a794e22ca5a43f20a798b9 100644 (file)
--- a/lib/prio_heap/lttng_prio_heap.c
+++ b/lib/prio_heap/lttng_prio_heap.c
@@ -26,6 +26,7 @@
  
  #include <linux/slab.h>
  #include <lib/prio_heap/lttng_prio_heap.h>
+#include <wrapper/vmalloc.h>
  
  #ifdef DEBUG_HEAP
  void lttng_check_heap(const struct lttng_ptr_heap *heap)
@@ -70,12 +71,12 @@ int heap_grow(struct lttng_ptr_heap *heap, size_t new_len)
                 return 0;
  
         heap->alloc_len = max_t(size_t, new_len, heap->alloc_len << 1);
-       new_ptrs = kmalloc(heap->alloc_len * sizeof(void *), heap->gfpmask);
+       new_ptrs = lttng_kvmalloc(heap->alloc_len * sizeof(void *), heap->gfpmask);
         if (!new_ptrs)
                 return -ENOMEM;
         if (heap->ptrs)
                 memcpy(new_ptrs, heap->ptrs, heap->len * sizeof(void *));
-       kfree(heap->ptrs);
+       lttng_kvfree(heap->ptrs);
         heap->ptrs = new_ptrs;
         return 0;
  }
@@ -109,7 +110,7 @@ int lttng_heap_init(struct lttng_ptr_heap *heap, size_t alloc_len,
  
  void lttng_heap_free(struct lttng_ptr_heap *heap)
  {
-       kfree(heap->ptrs);
+       lttng_kvfree(heap->ptrs);
  }
  
  static void heapify(struct lttng_ptr_heap *heap, size_t i)
diff --git a/lib/ringbuffer/ring_buffer_backend.c b/lib/ringbuffer/ring_buffer_backend.c

index f760836cfe50032e18c9d3774a0e1cd7c6338ed5..3efa1d12ecb6bc6cffc12ffadd321934a55d95de 100644 (file)
--- a/lib/ringbuffer/ring_buffer_backend.c
+++ b/lib/ringbuffer/ring_buffer_backend.c
@@ -71,7 +71,7 @@ int lib_ring_buffer_backend_allocate(const struct lib_ring_buffer_config *config
         if (unlikely(!pages))
                 goto pages_error;
  
-       bufb->array = kmalloc_node(ALIGN(sizeof(*bufb->array)
+       bufb->array = lttng_kvmalloc_node(ALIGN(sizeof(*bufb->array)
                                          * num_subbuf_alloc,
                                   1 << INTERNODE_CACHE_SHIFT),
                         GFP_KERNEL | __GFP_NOWARN,
@@ -90,7 +90,7 @@ int lib_ring_buffer_backend_allocate(const struct lib_ring_buffer_config *config
         /* Allocate backend pages array elements */
         for (i = 0; i < num_subbuf_alloc; i++) {
                 bufb->array[i] =
-                       kzalloc_node(ALIGN(
+                       lttng_kvzalloc_node(ALIGN(
                                 sizeof(struct lib_ring_buffer_backend_pages) +
                                 sizeof(struct lib_ring_buffer_backend_page)
                                 * num_pages_per_subbuf,
@@ -102,7 +102,7 @@ int lib_ring_buffer_backend_allocate(const struct lib_ring_buffer_config *config
         }
  
         /* Allocate write-side subbuffer table */
-       bufb->buf_wsb = kzalloc_node(ALIGN(
+       bufb->buf_wsb = lttng_kvzalloc_node(ALIGN(
                                 sizeof(struct lib_ring_buffer_backend_subbuffer)
                                 * num_subbuf,
                                 1 << INTERNODE_CACHE_SHIFT),
@@ -122,7 +122,7 @@ int lib_ring_buffer_backend_allocate(const struct lib_ring_buffer_config *config
                 bufb->buf_rsb.id = subbuffer_id(config, 0, 1, 0);
  
         /* Allocate subbuffer packet counter table */
-       bufb->buf_cnt = kzalloc_node(ALIGN(
+       bufb->buf_cnt = lttng_kvzalloc_node(ALIGN(
                                 sizeof(struct lib_ring_buffer_backend_counts)
                                 * num_subbuf,
                                 1 << INTERNODE_CACHE_SHIFT),
@@ -154,15 +154,15 @@ int lib_ring_buffer_backend_allocate(const struct lib_ring_buffer_config *config
         return 0;
  
  free_wsb:
-       kfree(bufb->buf_wsb);
+       lttng_kvfree(bufb->buf_wsb);
  free_array:
         for (i = 0; (i < num_subbuf_alloc && bufb->array[i]); i++)
-               kfree(bufb->array[i]);
+               lttng_kvfree(bufb->array[i]);
  depopulate:
         /* Free all allocated pages */
         for (i = 0; (i < num_pages && pages[i]); i++)
                 __free_page(pages[i]);
-       kfree(bufb->array);
+       lttng_kvfree(bufb->array);
  array_error:
         vfree(pages);
  pages_error:
@@ -191,14 +191,14 @@ void lib_ring_buffer_backend_free(struct lib_ring_buffer_backend *bufb)
         if (chanb->extra_reader_sb)
                 num_subbuf_alloc++;
  
-       kfree(bufb->buf_wsb);
-       kfree(bufb->buf_cnt);
+       lttng_kvfree(bufb->buf_wsb);
+       lttng_kvfree(bufb->buf_cnt);
         for (i = 0; i < num_subbuf_alloc; i++) {
                 for (j = 0; j < bufb->num_pages_per_subbuf; j++)
                         __free_page(pfn_to_page(bufb->array[i]->p[j].pfn));
-               kfree(bufb->array[i]);
+               lttng_kvfree(bufb->array[i]);
         }
-       kfree(bufb->array);
+       lttng_kvfree(bufb->array);
         bufb->allocated = 0;
  }
  
diff --git a/lib/ringbuffer/ring_buffer_frontend.c b/lib/ringbuffer/ring_buffer_frontend.c

index 0cd57581bb011168a351d044fe7fb739b536832c..d75e48cb8f231e6e775386d16d831f9cf066990b 100644 (file)
--- a/lib/ringbuffer/ring_buffer_frontend.c
+++ b/lib/ringbuffer/ring_buffer_frontend.c
@@ -65,6 +65,7 @@
  #include <wrapper/kref.h>
  #include <wrapper/percpu-defs.h>
  #include <wrapper/timer.h>
+#include <wrapper/vmalloc.h>
  
  /*
   * Internal structure representing offsets to use at a sub-buffer switch.
@@ -147,8 +148,8 @@ void lib_ring_buffer_free(struct lib_ring_buffer *buf)
         struct channel *chan = buf->backend.chan;
  
         lib_ring_buffer_print_errors(chan, buf, buf->backend.cpu);
-       kfree(buf->commit_hot);
-       kfree(buf->commit_cold);
+       lttng_kvfree(buf->commit_hot);
+       lttng_kvfree(buf->commit_cold);
  
         lib_ring_buffer_backend_free(&buf->backend);
  }
@@ -245,7 +246,7 @@ int lib_ring_buffer_create(struct lib_ring_buffer *buf,
                 return ret;
  
         buf->commit_hot =
-               kzalloc_node(ALIGN(sizeof(*buf->commit_hot)
+               lttng_kvzalloc_node(ALIGN(sizeof(*buf->commit_hot)
                                    * chan->backend.num_subbuf,
                                    1 << INTERNODE_CACHE_SHIFT),
                         GFP_KERNEL | __GFP_NOWARN,
@@ -256,7 +257,7 @@ int lib_ring_buffer_create(struct lib_ring_buffer *buf,
         }
  
         buf->commit_cold =
-               kzalloc_node(ALIGN(sizeof(*buf->commit_cold)
+               lttng_kvzalloc_node(ALIGN(sizeof(*buf->commit_cold)
                                    * chan->backend.num_subbuf,
                                    1 << INTERNODE_CACHE_SHIFT),
                         GFP_KERNEL | __GFP_NOWARN,
@@ -305,9 +306,9 @@ int lib_ring_buffer_create(struct lib_ring_buffer *buf,
  
         /* Error handling */
  free_init:
-       kfree(buf->commit_cold);
+       lttng_kvfree(buf->commit_cold);
  free_commit:
-       kfree(buf->commit_hot);
+       lttng_kvfree(buf->commit_hot);
  free_chanbuf:
         lib_ring_buffer_backend_free(&buf->backend);
         return ret;
diff --git a/lttng-context-perf-counters.c b/lttng-context-perf-counters.c

index 8afc11f89019a9aea251c8b68cb2f39aa47b00da..260e5d0d9a5873cef635b93bbc93548d70104a46 100644 (file)
--- a/lttng-context-perf-counters.c
+++ b/lttng-context-perf-counters.c
@@ -119,7 +119,7 @@ void lttng_destroy_perf_counter_field(struct lttng_ctx_field *field)
  #endif /* #else #if (LINUX_VERSION_CODE >= KERNEL_VERSION(4,10,0)) */
         kfree(field->event_field.name);
         kfree(field->u.perf_counter->attr);
-       kfree(events);
+       lttng_kvfree(events);
         kfree(field->u.perf_counter);
  }
  
@@ -237,7 +237,7 @@ int lttng_add_perf_counter_to_ctx(uint32_t type,
         int ret;
         char *name_alloc;
  
-       events = kzalloc(num_possible_cpus() * sizeof(*events), GFP_KERNEL);
+       events = lttng_kvzalloc(num_possible_cpus() * sizeof(*events), GFP_KERNEL);
         if (!events)
                 return -ENOMEM;
  
@@ -372,6 +372,6 @@ name_alloc_error:
  error_alloc_perf_field:
         kfree(attr);
  error_attr:
-       kfree(events);
+       lttng_kvfree(events);
         return ret;
  }
diff --git a/lttng-context.c b/lttng-context.c

index 406f479de92994217b2c1e5c480086752b173e27..544e95f836f6cf3ab90119adae77f32608d200b5 100644 (file)
--- a/lttng-context.c
+++ b/lttng-context.c
@@ -95,12 +95,12 @@ struct lttng_ctx_field *lttng_append_context(struct lttng_ctx **ctx_p)
                 struct lttng_ctx_field *new_fields;
  
                 ctx->allocated_fields = max_t(size_t, 1, 2 * ctx->allocated_fields);
-               new_fields = kzalloc(ctx->allocated_fields * sizeof(struct lttng_ctx_field), GFP_KERNEL);
+               new_fields = lttng_kvzalloc(ctx->allocated_fields * sizeof(struct lttng_ctx_field), GFP_KERNEL);
                 if (!new_fields)
                         return NULL;
                 if (ctx->fields)
                         memcpy(new_fields, ctx->fields, sizeof(*ctx->fields) * ctx->nr_fields);
-               kfree(ctx->fields);
+               lttng_kvfree(ctx->fields);
                 ctx->fields = new_fields;
         }
         field = &ctx->fields[ctx->nr_fields];
@@ -240,7 +240,7 @@ void lttng_destroy_context(struct lttng_ctx *ctx)
                 if (ctx->fields[i].destroy)
                         ctx->fields[i].destroy(&ctx->fields[i]);
         }
-       kfree(ctx->fields);
+       lttng_kvfree(ctx->fields);
         kfree(ctx);
  }
  
diff --git a/lttng-events.c b/lttng-events.c

index 6aa994ca0925f38fd56d04c43c4937f0cf93a783..21c41133b859623e6a5ea63c65927021cb646171 100644 (file)
--- a/lttng-events.c
+++ b/lttng-events.c
@@ -132,7 +132,7 @@ struct lttng_session *lttng_session_create(void)
         int i;
  
         mutex_lock(&sessions_mutex);
-       session = kzalloc(sizeof(struct lttng_session), GFP_KERNEL);
+       session = lttng_kvzalloc(sizeof(struct lttng_session), GFP_KERNEL);
         if (!session)
                 goto err;
         INIT_LIST_HEAD(&session->chan);
@@ -163,7 +163,7 @@ struct lttng_session *lttng_session_create(void)
  err_free_cache:
         kfree(metadata_cache);
  err_free_session:
-       kfree(session);
+       lttng_kvfree(session);
  err:
         mutex_unlock(&sessions_mutex);
         return NULL;
@@ -212,7 +212,7 @@ void lttng_session_destroy(struct lttng_session *session)
         kref_put(&session->metadata_cache->refcount, metadata_cache_destroy);
         list_del(&session->list);
         mutex_unlock(&sessions_mutex);
-       kfree(session);
+       lttng_kvfree(session);
  }
  
  int lttng_session_statedump(struct lttng_session *session)
diff --git a/wrapper/vmalloc.h b/wrapper/vmalloc.h

index 2332439fe34c56515cefcaa4eda787a65ecaf835..2dd06cbf53e0fe565538b99fd633be504f7b7f9f 100644 (file)
--- a/wrapper/vmalloc.h
+++ b/wrapper/vmalloc.h
@@ -25,6 +25,9 @@
   * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
   */
  
+#include <linux/version.h>
+#include <linux/vmalloc.h>
+
  #ifdef CONFIG_KALLSYMS
  
  #include <linux/kallsyms.h>
@@ -51,8 +54,6 @@ void wrapper_vmalloc_sync_all(void)
  }
  #else
  
-#include <linux/vmalloc.h>
-
  static inline
  void wrapper_vmalloc_sync_all(void)
  {
@@ -60,4 +61,168 @@ void wrapper_vmalloc_sync_all(void)
  }
  #endif
  
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4,12,0))
+static inline
+void *lttng_kvmalloc_node(unsigned long size, gfp_t flags, int node)
+{
+       void *ret;
+
+       ret = kvmalloc_node(size, flags, node);
+       if (is_vmalloc_addr(ret)) {
+               /*
+                * Make sure we don't trigger recursive page faults in the
+                * tracing fast path.
+                */
+               wrapper_vmalloc_sync_all();
+       }
+       return ret;
+}
+
+static inline
+void *lttng_kvzalloc_node(unsigned long size, gfp_t flags, int node)
+{
+       return lttng_kvmalloc_node(size, flags | __GFP_ZERO, node);
+}
+
+static inline
+void *lttng_kvmalloc(unsigned long size, gfp_t flags)
+{
+       return lttng_kvmalloc_node(size, flags, NUMA_NO_NODE);
+}
+
+static inline
+void *lttng_kvzalloc(unsigned long size, gfp_t flags)
+{
+       return lttng_kvzalloc_node(size, flags, NUMA_NO_NODE);
+}
+
+static inline
+void lttng_kvfree(const void *addr)
+{
+       kvfree(addr);
+}
+
+#else
+
+#include <linux/slab.h>
+#include <linux/mm.h>
+
+/*
+ * kallsyms wrapper of __vmalloc_node with a fallback to kmalloc_node.
+ */
+static inline
+void *__lttng_vmalloc_node_fallback(unsigned long size, unsigned long align,
+                         gfp_t gfp_mask, pgprot_t prot, int node, void *caller)
+{
+       void *ret;
+
+#ifdef CONFIG_KALLSYMS
+       /*
+        * If we have KALLSYMS, get * __vmalloc_node which is not exported.
+        */
+       void *(*lttng__vmalloc_node)(unsigned long size, unsigned long align,
+                       gfp_t gfp_mask, pgprot_t prot, int node, void *caller);
+
+       lttng__vmalloc_node = (void *) kallsyms_lookup_funcptr("__vmalloc_node");
+       ret = lttng__vmalloc_node(size, align, gfp_mask, prot, node, caller);
+#else
+       /*
+        * If we don't have KALLSYMS, fallback to kmalloc_node.
+        */
+       ret = kmalloc_node(size, flags, node);
+#endif
+
+       return ret;
+}
+
+/**
+ * lttng_kvmalloc_node - attempt to allocate physically contiguous memory, but upon
+ * failure, fall back to non-contiguous (vmalloc) allocation.
+ * @size: size of the request.
+ * @flags: gfp mask for the allocation - must be compatible with GFP_KERNEL.
+ *
+ * Uses kmalloc to get the memory but if the allocation fails then falls back
+ * to the vmalloc allocator. Use lttng_kvfree to free the memory.
+ *
+ * Reclaim modifiers - __GFP_NORETRY, __GFP_REPEAT and __GFP_NOFAIL are not supported
+ */
+static inline
+void *lttng_kvmalloc_node(unsigned long size, gfp_t flags, int node)
+{
+       void *ret;
+
+       /*
+        * vmalloc uses GFP_KERNEL for some internal allocations (e.g page tables)
+        * so the given set of flags has to be compatible.
+        */
+       WARN_ON_ONCE((flags & GFP_KERNEL) != GFP_KERNEL);
+
+       /*
+        * If the allocation fits in a single page, do not fallback.
+        */
+       if (size <= PAGE_SIZE) {
+               return kmalloc_node(size, flags, node);
+       }
+
+       /*
+        * Make sure that larger requests are not too disruptive - no OOM
+        * killer and no allocation failure warnings as we have a fallback
+        */
+       ret = kmalloc_node(size, flags | __GFP_NOWARN | __GFP_NORETRY, node);
+       if (!ret) {
+               if (node == NUMA_NO_NODE) {
+                       /*
+                        * If no node was specified, use __vmalloc which is
+                        * always exported.
+                        */
+                       ret = __vmalloc(size, flags | __GFP_HIGHMEM, PAGE_KERNEL);
+               } else {
+                       /*
+                        * Otherwise, we need to select a node but __vmalloc_node
+                        * is not exported, use this fallback wrapper which uses
+                        * kallsyms if available or falls back to kmalloc_node.
+                        */
+                       ret = __lttng_vmalloc_node_fallback(size, 1,
+                                       flags | __GFP_HIGHMEM, PAGE_KERNEL, node,
+                                       __builtin_return_address(0));
+               }
+
+               /*
+                * Make sure we don't trigger recursive page faults in the
+                * tracing fast path.
+                */
+               wrapper_vmalloc_sync_all();
+       }
+       return ret;
+}
+
+static inline
+void *lttng_kvzalloc_node(unsigned long size, gfp_t flags, int node)
+{
+       return lttng_kvmalloc_node(size, flags | __GFP_ZERO, node);
+}
+
+static inline
+void *lttng_kvmalloc(unsigned long size, gfp_t flags)
+{
+       return lttng_kvmalloc_node(size, flags, NUMA_NO_NODE);
+}
+
+static inline
+void *lttng_kvzalloc(unsigned long size, gfp_t flags)
+{
+       return lttng_kvzalloc_node(size, flags, NUMA_NO_NODE);
+}
+
+static inline
+void lttng_kvfree(const void *addr)
+{
+       if (is_vmalloc_addr(addr)) {
+               vfree(addr);
+       } else {
+               kfree(addr);
+       }
+}
+#endif
+
  #endif /* _LTTNG_WRAPPER_VMALLOC_H */
author	Michael Jeanson <mjeanson@efficios.com>
	Mon, 25 Sep 2017 14:56:20 +0000 (10:56 -0400)
committer	Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
	Tue, 26 Sep 2017 14:59:41 +0000 (10:59 -0400)
lib/prio_heap/lttng_prio_heap.c		patch \| blob \| blame \| history
lib/ringbuffer/ring_buffer_backend.c		patch \| blob \| blame \| history
lib/ringbuffer/ring_buffer_frontend.c		patch \| blob \| blame \| history
lttng-context-perf-counters.c		patch \| blob \| blame \| history
lttng-context.c		patch \| blob \| blame \| history
lttng-events.c		patch \| blob \| blame \| history
wrapper/vmalloc.h		patch \| blob \| blame \| history