/* SPDX-License-Identifier: BSD-3-Clause * Copyright(c) 2017 Cavium, Inc */ #ifndef _TEST_PERF_COMMON_ #define _TEST_PERF_COMMON_ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "evt_common.h" #include "evt_options.h" #include "evt_test.h" #define TEST_PERF_CA_ID 0 #define TEST_PERF_DA_ID 0 struct test_perf; struct worker_data { uint64_t processed_pkts; uint64_t latency; uint8_t dev_id; uint8_t port_id; struct test_perf *t; } __rte_cache_aligned; struct crypto_adptr_data { uint8_t cdev_id; uint16_t cdev_qp_id; void **crypto_sess; }; struct dma_adptr_data { uint8_t dma_dev_id; uint16_t vchan_id; void **dma_op; }; struct prod_data { uint8_t dev_id; uint8_t port_id; uint8_t queue_id; struct crypto_adptr_data ca; struct dma_adptr_data da; struct test_perf *t; } __rte_cache_aligned; struct test_perf { /* Don't change the offset of "done". Signal handler use this memory * to terminate all lcores work. */ int done; uint64_t outstand_pkts; uint8_t nb_workers; enum evt_test_result result; uint32_t nb_flows; uint64_t nb_pkts; struct rte_mempool *pool; struct prod_data prod[EVT_MAX_PORTS]; struct worker_data worker[EVT_MAX_PORTS]; struct evt_options *opt; uint8_t sched_type_list[EVT_MAX_STAGES] __rte_cache_aligned; struct rte_event_timer_adapter *timer_adptr[ RTE_EVENT_TIMER_ADAPTER_NUM_MAX] __rte_cache_aligned; struct rte_mempool *ca_op_pool; struct rte_mempool *ca_sess_pool; struct rte_mempool *ca_asym_sess_pool; struct rte_mempool *ca_vector_pool; struct rte_mempool *da_op_pool; } __rte_cache_aligned; struct perf_elt { union { struct rte_event_timer tim; struct { char pad[offsetof(struct rte_event_timer, user_meta)]; uint64_t timestamp; }; }; } __rte_cache_aligned; #define BURST_SIZE 16 #define MAX_PROD_ENQ_BURST_SIZE 128 #define PERF_WORKER_INIT\ struct worker_data *w = arg;\ struct test_perf *t = w->t;\ struct evt_options *opt = t->opt;\ const uint8_t dev = w->dev_id;\ const uint8_t port = w->port_id;\ const uint8_t prod_timer_type = \ opt->prod_type == EVT_PROD_TYPE_EVENT_TIMER_ADPTR;\ uint8_t *const sched_type_list = &t->sched_type_list[0];\ const enum evt_prod_type prod_type = opt->prod_type;\ struct rte_mempool *const pool = t->pool;\ const uint8_t nb_stages = t->opt->nb_stages;\ const uint8_t laststage = nb_stages - 1;\ uint8_t cnt = 0;\ void *bufs[16] __rte_cache_aligned;\ int const sz = RTE_DIM(bufs);\ uint8_t stage;\ struct perf_elt *pe = NULL;\ if (opt->verbose_level > 1)\ printf("%s(): lcore %d dev_id %d port=%d\n", __func__,\ rte_lcore_id(), dev, port) static __rte_always_inline void perf_mark_fwd_latency(struct perf_elt *const pe) { pe->timestamp = rte_get_timer_cycles(); } static __rte_always_inline int perf_handle_crypto_ev(struct rte_event *ev, struct perf_elt **pe, int enable_fwd_latency) { struct rte_crypto_op *op = ev->event_ptr; struct rte_mbuf *m; if (unlikely(op->status != RTE_CRYPTO_OP_STATUS_SUCCESS)) { rte_crypto_op_free(op); return op->status; } /* Forward latency not enabled - perf data will not be accessed */ if (!enable_fwd_latency) return 0; /* Get pointer to perf data */ if (op->type == RTE_CRYPTO_OP_TYPE_SYMMETRIC) { if (op->sym->m_dst == NULL) m = op->sym->m_src; else m = op->sym->m_dst; *pe = rte_pktmbuf_mtod(m, struct perf_elt *); } else { *pe = RTE_PTR_ADD(op->asym->modex.result.data, op->asym->modex.result.length); } return 0; } static __rte_always_inline struct perf_elt * perf_elt_from_vec_get(struct rte_event_vector *vec) { /* Timestamp for vector event stored in first element */ struct rte_crypto_op *cop = vec->ptrs[0]; struct rte_mbuf *m; if (cop->type == RTE_CRYPTO_OP_TYPE_SYMMETRIC) { m = cop->sym->m_dst == NULL ? cop->sym->m_src : cop->sym->m_dst; return rte_pktmbuf_mtod(m, struct perf_elt *); } else { return RTE_PTR_ADD(cop->asym->modex.result.data, cop->asym->modex.result.length); } } static __rte_always_inline int perf_handle_crypto_vector_ev(struct rte_event *ev, struct perf_elt **pe, const int enable_fwd_latency) { struct rte_event_vector *vec = ev->vec; struct rte_crypto_op *cop; struct rte_mbuf *m; int i, n = 0; void *data; for (i = 0; i < vec->nb_elem; i++) { cop = vec->ptrs[i]; if (unlikely(cop->status != RTE_CRYPTO_OP_STATUS_SUCCESS)) { if (cop->type == RTE_CRYPTO_OP_TYPE_SYMMETRIC) { m = cop->sym->m_dst == NULL ? cop->sym->m_src : cop->sym->m_dst; rte_pktmbuf_free(m); } else { data = cop->asym->modex.result.data; rte_mempool_put(rte_mempool_from_obj(data), data); } rte_crypto_op_free(cop); continue; } vec->ptrs[n++] = cop; } /* All cops failed, free the vector */ if (n == 0) { rte_mempool_put(rte_mempool_from_obj(vec), vec); return -ENOENT; } vec->nb_elem = n; /* Forward latency not enabled - perf data will be not accessed */ if (!enable_fwd_latency) return 0; /* Get pointer to perf data */ *pe = perf_elt_from_vec_get(vec); return 0; } static __rte_always_inline int perf_process_last_stage(struct rte_mempool *const pool, enum evt_prod_type prod_type, struct rte_event *const ev, struct worker_data *const w, void *bufs[], int const buf_sz, uint8_t count) { void *to_free_in_bulk; /* release fence here ensures event_prt is * stored before updating the number of * processed packets for worker lcores */ rte_atomic_thread_fence(__ATOMIC_RELEASE); w->processed_pkts++; if (prod_type == EVT_PROD_TYPE_EVENT_CRYPTO_ADPTR) { struct rte_crypto_op *op = ev->event_ptr; struct rte_mbuf *m; if (op->type == RTE_CRYPTO_OP_TYPE_SYMMETRIC) { if (op->sym->m_dst == NULL) m = op->sym->m_src; else m = op->sym->m_dst; to_free_in_bulk = m; } else { to_free_in_bulk = op->asym->modex.result.data; } rte_crypto_op_free(op); } else if (prod_type == EVT_PROD_TYPE_EVENT_DMA_ADPTR) { return count; } else { to_free_in_bulk = ev->event_ptr; } bufs[count++] = to_free_in_bulk; if (unlikely(count == buf_sz)) { count = 0; rte_mempool_put_bulk(pool, bufs, buf_sz); } return count; } static __rte_always_inline uint8_t perf_process_last_stage_latency(struct rte_mempool *const pool, enum evt_prod_type prod_type, struct rte_event *const ev, struct worker_data *const w, void *bufs[], int const buf_sz, uint8_t count) { uint64_t latency; struct perf_elt *pe; void *to_free_in_bulk; /* Release fence here ensures event_prt is stored before updating the number of processed * packets for worker lcores. */ rte_atomic_thread_fence(__ATOMIC_RELEASE); w->processed_pkts++; if (prod_type == EVT_PROD_TYPE_EVENT_CRYPTO_ADPTR) { struct rte_crypto_op *op = ev->event_ptr; struct rte_mbuf *m; if (op->type == RTE_CRYPTO_OP_TYPE_SYMMETRIC) { if (op->sym->m_dst == NULL) m = op->sym->m_src; else m = op->sym->m_dst; to_free_in_bulk = m; pe = rte_pktmbuf_mtod(m, struct perf_elt *); } else { pe = RTE_PTR_ADD(op->asym->modex.result.data, op->asym->modex.result.length); to_free_in_bulk = op->asym->modex.result.data; } rte_crypto_op_free(op); } else if (prod_type == EVT_PROD_TYPE_EVENT_DMA_ADPTR) { return count; } else { pe = ev->event_ptr; to_free_in_bulk = pe; } latency = rte_get_timer_cycles() - pe->timestamp; w->latency += latency; bufs[count++] = to_free_in_bulk; if (unlikely(count == buf_sz)) { count = 0; rte_mempool_put_bulk(pool, bufs, buf_sz); } return count; } static __rte_always_inline void perf_process_vector_last_stage(struct rte_mempool *const pool, struct rte_mempool *const ca_pool, struct rte_event *const ev, struct worker_data *const w, const bool enable_fwd_latency) { struct rte_event_vector *vec = ev->vec; struct rte_crypto_op *cop; void *bufs[vec->nb_elem]; struct perf_elt *pe; uint64_t latency; int i; /* Release fence here ensures event_prt is stored before updating the number of processed * packets for worker lcores. */ rte_atomic_thread_fence(__ATOMIC_RELEASE); w->processed_pkts += vec->nb_elem; if (enable_fwd_latency) { pe = perf_elt_from_vec_get(vec); latency = rte_get_timer_cycles() - pe->timestamp; w->latency += latency; } for (i = 0; i < vec->nb_elem; i++) { cop = vec->ptrs[i]; if (cop->type == RTE_CRYPTO_OP_TYPE_SYMMETRIC) bufs[i] = cop->sym->m_dst == NULL ? cop->sym->m_src : cop->sym->m_dst; else bufs[i] = cop->asym->modex.result.data; } rte_mempool_put_bulk(pool, bufs, vec->nb_elem); rte_mempool_put_bulk(ca_pool, (void * const *)vec->ptrs, vec->nb_elem); rte_mempool_put(rte_mempool_from_obj(vec), vec); } static inline int perf_nb_event_ports(struct evt_options *opt) { return evt_nr_active_lcores(opt->wlcores) + evt_nr_active_lcores(opt->plcores); } int perf_test_result(struct evt_test *test, struct evt_options *opt); int perf_opt_check(struct evt_options *opt, uint64_t nb_queues); int perf_test_setup(struct evt_test *test, struct evt_options *opt); int perf_ethdev_setup(struct evt_test *test, struct evt_options *opt); int perf_cryptodev_setup(struct evt_test *test, struct evt_options *opt); int perf_dmadev_setup(struct evt_test *test, struct evt_options *opt); int perf_mempool_setup(struct evt_test *test, struct evt_options *opt); int perf_event_dev_port_setup(struct evt_test *test, struct evt_options *opt, uint8_t stride, uint8_t nb_queues, const struct rte_event_port_conf *port_conf); int perf_event_dev_service_setup(uint8_t dev_id); int perf_launch_lcores(struct evt_test *test, struct evt_options *opt, int (*worker)(void *)); void perf_opt_dump(struct evt_options *opt, uint8_t nb_queues); void perf_test_destroy(struct evt_test *test, struct evt_options *opt); void perf_eventdev_destroy(struct evt_test *test, struct evt_options *opt); void perf_cryptodev_destroy(struct evt_test *test, struct evt_options *opt); void perf_dmadev_destroy(struct evt_test *test, struct evt_options *opt); void perf_ethdev_destroy(struct evt_test *test, struct evt_options *opt); void perf_ethdev_rx_stop(struct evt_test *test, struct evt_options *opt); void perf_mempool_destroy(struct evt_test *test, struct evt_options *opt); void perf_worker_cleanup(struct rte_mempool *const pool, uint8_t dev_id, uint8_t port_id, struct rte_event events[], uint16_t nb_enq, uint16_t nb_deq); #endif /* _TEST_PERF_COMMON_ */