DPDK  20.05.0-rc0
rte_graph_worker.h
Go to the documentation of this file.
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(C) 2020 Marvell International Ltd.
3  */
4 
5 #ifndef _RTE_GRAPH_WORKER_H_
6 #define _RTE_GRAPH_WORKER_H_
7 
18 #include <rte_common.h>
19 #include <rte_cycles.h>
20 #include <rte_prefetch.h>
21 #include <rte_memcpy.h>
22 #include <rte_memory.h>
23 
24 #include "rte_graph.h"
25 
26 #ifdef __cplusplus
27 extern "C" {
28 #endif
29 
35 struct rte_graph {
36  uint32_t tail;
37  uint32_t head;
38  uint32_t cir_mask;
39  rte_node_t nb_nodes;
40  rte_graph_off_t *cir_start;
41  rte_graph_off_t nodes_start;
42  rte_graph_t id;
43  int socket;
44  char name[RTE_GRAPH_NAMESIZE];
45  uint64_t fence;
47 
53 struct rte_node {
54  /* Slow path area */
55  uint64_t fence;
56  rte_graph_off_t next;
57  rte_node_t id;
58  rte_node_t parent_id;
59  rte_edge_t nb_edges;
60  uint32_t realloc_count;
62  char parent[RTE_NODE_NAMESIZE];
63  char name[RTE_NODE_NAMESIZE];
65  /* Fast path area */
66 #define RTE_NODE_CTX_SZ 16
67  uint8_t ctx[RTE_NODE_CTX_SZ] __rte_cache_aligned;
68  uint16_t size;
69  uint16_t idx;
70  rte_graph_off_t off;
71  uint64_t total_cycles;
72  uint64_t total_calls;
73  uint64_t total_objs;
75  union {
76  void **objs;
77  uint64_t objs_u64;
78  };
80  union {
81  rte_node_process_t process;
82  uint64_t process_u64;
83  };
84  struct rte_node *nodes[] __rte_cache_min_aligned;
86 
101 __rte_experimental
102 void __rte_node_stream_alloc(struct rte_graph *graph, struct rte_node *node);
103 
120 __rte_experimental
121 void __rte_node_stream_alloc_size(struct rte_graph *graph,
122  struct rte_node *node, uint16_t req_size);
123 
136 __rte_experimental
137 static inline void
138 rte_graph_walk(struct rte_graph *graph)
139 {
140  const rte_graph_off_t *cir_start = graph->cir_start;
141  const rte_node_t mask = graph->cir_mask;
142  uint32_t head = graph->head;
143  struct rte_node *node;
144  uint64_t start;
145  uint16_t rc;
146  void **objs;
147 
148  /*
149  * Walk on the source node(s) ((cir_start - head) -> cir_start) and then
150  * on the pending streams (cir_start -> (cir_start + mask) -> cir_start)
151  * in a circular buffer fashion.
152  *
153  * +-----+ <= cir_start - head [number of source nodes]
154  * | |
155  * | ... | <= source nodes
156  * | |
157  * +-----+ <= cir_start [head = 0] [tail = 0]
158  * | |
159  * | ... | <= pending streams
160  * | |
161  * +-----+ <= cir_start + mask
162  */
163  while (likely(head != graph->tail)) {
164  node = RTE_PTR_ADD(graph, cir_start[(int32_t)head++]);
165  RTE_ASSERT(node->fence == RTE_GRAPH_FENCE);
166  objs = node->objs;
167  rte_prefetch0(objs);
168 
170  start = rte_rdtsc();
171  rc = node->process(graph, node, objs, node->idx);
172  node->total_cycles += rte_rdtsc() - start;
173  node->total_calls++;
174  node->total_objs += rc;
175  } else {
176  node->process(graph, node, objs, node->idx);
177  }
178  node->idx = 0;
179  head = likely((int32_t)head > 0) ? head & mask : head;
180  }
181  graph->tail = 0;
182 }
183 
184 /* Fast path helper functions */
185 
196 static __rte_always_inline void
197 __rte_node_enqueue_tail_update(struct rte_graph *graph, struct rte_node *node)
198 {
199  uint32_t tail;
200 
201  tail = graph->tail;
202  graph->cir_start[tail++] = node->off;
203  graph->tail = tail & graph->cir_mask;
204 }
205 
223 static __rte_always_inline void
224 __rte_node_enqueue_prologue(struct rte_graph *graph, struct rte_node *node,
225  const uint16_t idx, const uint16_t space)
226 {
227 
228  /* Add to the pending stream list if the node is new */
229  if (idx == 0)
230  __rte_node_enqueue_tail_update(graph, node);
231 
232  if (unlikely(node->size < (idx + space)))
233  __rte_node_stream_alloc(graph, node);
234 }
235 
249 static __rte_always_inline struct rte_node *
250 __rte_node_next_node_get(struct rte_node *node, rte_edge_t next)
251 {
252  RTE_ASSERT(next < node->nb_edges);
253  RTE_ASSERT(node->fence == RTE_GRAPH_FENCE);
254  node = node->nodes[next];
255  RTE_ASSERT(node->fence == RTE_GRAPH_FENCE);
256 
257  return node;
258 }
259 
278 __rte_experimental
279 static inline void
280 rte_node_enqueue(struct rte_graph *graph, struct rte_node *node,
281  rte_edge_t next, void **objs, uint16_t nb_objs)
282 {
283  node = __rte_node_next_node_get(node, next);
284  const uint16_t idx = node->idx;
285 
286  __rte_node_enqueue_prologue(graph, node, idx, nb_objs);
287 
288  rte_memcpy(&node->objs[idx], objs, nb_objs * sizeof(void *));
289  node->idx = idx + nb_objs;
290 }
291 
308 __rte_experimental
309 static inline void
310 rte_node_enqueue_x1(struct rte_graph *graph, struct rte_node *node,
311  rte_edge_t next, void *obj)
312 {
313  node = __rte_node_next_node_get(node, next);
314  uint16_t idx = node->idx;
315 
316  __rte_node_enqueue_prologue(graph, node, idx, 1);
317 
318  node->objs[idx++] = obj;
319  node->idx = idx;
320 }
321 
341 __rte_experimental
342 static inline void
343 rte_node_enqueue_x2(struct rte_graph *graph, struct rte_node *node,
344  rte_edge_t next, void *obj0, void *obj1)
345 {
346  node = __rte_node_next_node_get(node, next);
347  uint16_t idx = node->idx;
348 
349  __rte_node_enqueue_prologue(graph, node, idx, 2);
350 
351  node->objs[idx++] = obj0;
352  node->objs[idx++] = obj1;
353  node->idx = idx;
354 }
355 
379 __rte_experimental
380 static inline void
381 rte_node_enqueue_x4(struct rte_graph *graph, struct rte_node *node,
382  rte_edge_t next, void *obj0, void *obj1, void *obj2,
383  void *obj3)
384 {
385  node = __rte_node_next_node_get(node, next);
386  uint16_t idx = node->idx;
387 
388  __rte_node_enqueue_prologue(graph, node, idx, 4);
389 
390  node->objs[idx++] = obj0;
391  node->objs[idx++] = obj1;
392  node->objs[idx++] = obj2;
393  node->objs[idx++] = obj3;
394  node->idx = idx;
395 }
396 
416 __rte_experimental
417 static inline void
418 rte_node_enqueue_next(struct rte_graph *graph, struct rte_node *node,
419  rte_edge_t *nexts, void **objs, uint16_t nb_objs)
420 {
421  uint16_t i;
422 
423  for (i = 0; i < nb_objs; i++)
424  rte_node_enqueue_x1(graph, node, nexts[i], objs[i]);
425 }
426 
449 __rte_experimental
450 static inline void **
451 rte_node_next_stream_get(struct rte_graph *graph, struct rte_node *node,
452  rte_edge_t next, uint16_t nb_objs)
453 {
454  node = __rte_node_next_node_get(node, next);
455  const uint16_t idx = node->idx;
456  uint16_t free_space = node->size - idx;
457 
458  if (unlikely(free_space < nb_objs))
459  __rte_node_stream_alloc_size(graph, node, nb_objs);
460 
461  return &node->objs[idx];
462 }
463 
484 __rte_experimental
485 static inline void
486 rte_node_next_stream_put(struct rte_graph *graph, struct rte_node *node,
487  rte_edge_t next, uint16_t idx)
488 {
489  if (unlikely(!idx))
490  return;
491 
492  node = __rte_node_next_node_get(node, next);
493  if (node->idx == 0)
494  __rte_node_enqueue_tail_update(graph, node);
495 
496  node->idx += idx;
497 }
498 
516 __rte_experimental
517 static inline void
518 rte_node_next_stream_move(struct rte_graph *graph, struct rte_node *src,
519  rte_edge_t next)
520 {
521  struct rte_node *dst = __rte_node_next_node_get(src, next);
522 
523  /* Let swap the pointers if dst don't have valid objs */
524  if (likely(dst->idx == 0)) {
525  void **dobjs = dst->objs;
526  uint16_t dsz = dst->size;
527  dst->objs = src->objs;
528  dst->size = src->size;
529  src->objs = dobjs;
530  src->size = dsz;
531  dst->idx = src->idx;
532  __rte_node_enqueue_tail_update(graph, dst);
533  } else { /* Move the objects from src node to dst node */
534  rte_node_enqueue(graph, src, next, src->objs, src->idx);
535  }
536 }
537 
538 #ifdef __cplusplus
539 }
540 #endif
541 
542 #endif /* _RTE_GRAPH_WORKER_H_ */
uint32_t rte_node_t
Definition: rte_graph.h:44
static __rte_experimental void rte_node_enqueue(struct rte_graph *graph, struct rte_node *node, rte_edge_t next, void **objs, uint16_t nb_objs)
#define __rte_always_inline
Definition: rte_common.h:183
static __rte_experimental void rte_node_next_stream_move(struct rte_graph *graph, struct rte_node *src, rte_edge_t next)
uint16_t rte_edge_t
Definition: rte_graph.h:45
#define __rte_cache_min_aligned
Definition: rte_common.h:350
#define likely(x)
static __rte_experimental void rte_node_enqueue_x2(struct rte_graph *graph, struct rte_node *node, rte_edge_t next, void *obj0, void *obj1)
#define RTE_NODE_NAMESIZE
Definition: rte_graph.h:36
static __rte_always_inline int rte_graph_has_stats_feature(void)
Definition: rte_graph.h:773
uint16_t rte_graph_t
Definition: rte_graph.h:46
uint16_t(* rte_node_process_t)(struct rte_graph *graph, struct rte_node *node, void **objs, uint16_t nb_objs)
Definition: rte_graph.h:101
#define RTE_GRAPH_NAMESIZE
Definition: rte_graph.h:35
static __rte_experimental void rte_node_next_stream_put(struct rte_graph *graph, struct rte_node *node, rte_edge_t next, uint16_t idx)
#define RTE_PTR_ADD(ptr, x)
Definition: rte_common.h:195
static __rte_experimental void rte_node_enqueue_next(struct rte_graph *graph, struct rte_node *node, rte_edge_t *nexts, void **objs, uint16_t nb_objs)
#define unlikely(x)
static __rte_experimental void rte_graph_walk(struct rte_graph *graph)
#define RTE_GRAPH_FENCE
Definition: rte_graph.h:41
static __rte_experimental void rte_node_enqueue_x1(struct rte_graph *graph, struct rte_node *node, rte_edge_t next, void *obj)
#define RTE_STD_C11
Definition: rte_common.h:40
#define __rte_cache_aligned
Definition: rte_common.h:347
uint32_t rte_graph_off_t
Definition: rte_graph.h:43
static __rte_experimental void ** rte_node_next_stream_get(struct rte_graph *graph, struct rte_node *node, rte_edge_t next, uint16_t nb_objs)
static void * rte_memcpy(void *dst, const void *src, size_t n)
static __rte_experimental void rte_node_enqueue_x4(struct rte_graph *graph, struct rte_node *node, rte_edge_t next, void *obj0, void *obj1, void *obj2, void *obj3)
static void rte_prefetch0(const volatile void *p)