Skip to content

Instantly share code, notes, and snippets.

@andreybleme
Last active January 20, 2021 01:42
Show Gist options
  • Save andreybleme/f2c80a22dcb6817fc06e22265b12d976 to your computer and use it in GitHub Desktop.
Save andreybleme/f2c80a22dcb6817fc06e22265b12d976 to your computer and use it in GitHub Desktop.
DPDK application to reply ICMP packets received in a Linux server - Advanced Operating Systems DCC/UFMG (Lucas Andrey Caldeira Bleme)
/* SPDX-License-Identifier: BSD-3-Clause
* Copyright(c) 2010-2015 Intel Corporation
*/
#include <stdint.h>
#include <inttypes.h>
#include <rte_eal.h>
#include <rte_ethdev.h>
#include <rte_ether.h>
#include <rte_cycles.h>
#include <rte_lcore.h>
#include <rte_mbuf.h>
#define RX_RING_SIZE 1024
#define TX_RING_SIZE 1024
#define NUM_MBUFS 8191
#define MBUF_CACHE_SIZE 250
#define BURST_SIZE 1024
static const struct rte_eth_conf port_conf_default = {
.rxmode = {
.max_rx_pkt_len = RTE_ETHER_MAX_LEN,
},
};
/* basicfwd.c: Basic DPDK skeleton forwarding example. */
/*
* Initializes a given port using global settings and with the RX buffers
* coming from the mbuf_pool passed as a parameter.
*/
static inline int
port_init(uint16_t port, struct rte_mempool *mbuf_pool)
{
struct rte_eth_conf port_conf = port_conf_default;
const uint16_t rx_rings = 1, tx_rings = 1;
uint16_t nb_rxd = RX_RING_SIZE;
uint16_t nb_txd = TX_RING_SIZE;
int retval;
uint16_t q;
struct rte_eth_dev_info dev_info;
struct rte_eth_txconf txconf;
if (!rte_eth_dev_is_valid_port(port))
return -1;
retval = rte_eth_dev_info_get(port, &dev_info);
if (retval != 0) {
printf("Error during getting device (port %u) info: %s\n",
port, strerror(-retval));
return retval;
}
if (dev_info.tx_offload_capa & DEV_TX_OFFLOAD_MBUF_FAST_FREE)
port_conf.txmode.offloads |=
DEV_TX_OFFLOAD_MBUF_FAST_FREE;
/* Configure the Ethernet device. */
retval = rte_eth_dev_configure(port, rx_rings, tx_rings, &port_conf);
if (retval != 0)
return retval;
retval = rte_eth_dev_adjust_nb_rx_tx_desc(port, &nb_rxd, &nb_txd);
if (retval != 0)
return retval;
/* Allocate and set up 1 RX queue per Ethernet port. */
for (q = 0; q < rx_rings; q++) {
retval = rte_eth_rx_queue_setup(port, q, nb_rxd,
rte_eth_dev_socket_id(port), NULL, mbuf_pool);
if (retval < 0)
return retval;
}
txconf = dev_info.default_txconf;
txconf.offloads = port_conf.txmode.offloads;
/* Allocate and set up 1 TX queue per Ethernet port. */
for (q = 0; q < tx_rings; q++) {
retval = rte_eth_tx_queue_setup(port, q, nb_txd,
rte_eth_dev_socket_id(port), &txconf);
if (retval < 0)
return retval;
}
/* Start the Ethernet port. */
retval = rte_eth_dev_start(port);
if (retval < 0)
return retval;
/* Display the port MAC address. */
struct rte_ether_addr addr;
retval = rte_eth_macaddr_get(port, &addr);
if (retval != 0)
return retval;
printf("Port %u MAC: %02" PRIx8 " %02" PRIx8 " %02" PRIx8
" %02" PRIx8 " %02" PRIx8 " %02" PRIx8 "\n",
port,
addr.addr_bytes[0], addr.addr_bytes[1],
addr.addr_bytes[2], addr.addr_bytes[3],
addr.addr_bytes[4], addr.addr_bytes[5]);
/* Enable RX in promiscuous mode for the Ethernet device. */
retval = rte_eth_promiscuous_enable(port);
if (retval != 0)
return retval;
return 0;
}
/*
* The lcore main. This is the main thread that does the work, reading from
* an input port and writing to an output port.
*/
static __rte_noreturn void
lcore_main(void)
{
uint16_t port = 1;
uint16_t queue = 0;
struct rte_mbuf *pkt;
struct rte_mbuf *bufs[BURST_SIZE];
struct rte_ether_hdr *eth_h;
struct rte_ether_addr eth_addr;
struct rte_icmp_hdr *icmp_h;
struct rte_ipv4_hdr *ip_h;
uint32_t ip_addr;
uint32_t cksum;
uint16_t nb_replies;
uint16_t nb_tx;
// Performance
uint64_t hz;
uint64_t begin;
uint64_t elapsed_cycles = 0;
uint64_t microseconds = 0;
/*
* Check that the port is on the same NUMA node as the polling thread
* for best performance.
*/
if (rte_eth_dev_socket_id(port) > 0 &&
rte_eth_dev_socket_id(port) !=
(int)rte_socket_id())
printf("WARNING, port %u is on remote NUMA node to "
"polling thread.\n\tPerformance will "
"not be optimal.\n", port);
printf("\nCore %u receiving packages. [Ctrl+C to quit]\n",
rte_lcore_id());
// Init performance counters
hz = rte_get_timer_hz();
begin = rte_rdtsc_precise();
// Run until the application is quit or killed.
for (;;) {
// Get burst of RX packets
const uint16_t nb_rx = rte_eth_rx_burst(port, queue, bufs, BURST_SIZE);
for (uint8_t i = 0; i < nb_rx; i++) {
if (likely(i < nb_rx - 1))
rte_prefetch0(rte_pktmbuf_mtod(bufs[i + 1], void *));
pkt = bufs[i];
eth_h = rte_pktmbuf_mtod(pkt, struct rte_ether_hdr *);
int l2_len = sizeof(struct rte_ether_hdr);
ip_h = (struct rte_ipv4_hdr *) ((char *)eth_h + l2_len);
// Check if packet is a ICMP echo
icmp_h = (struct rte_icmp_hdr *) ((char *)ip_h + sizeof(struct rte_ipv4_hdr));
if (! ((ip_h->next_proto_id == IPPROTO_ICMP) &&
(icmp_h->icmp_type == RTE_IP_ICMP_ECHO_REQUEST) &&
(icmp_h->icmp_code == 0))) {
rte_pktmbuf_free(pkt);
continue;
}
printf("ICMP packet received here id=%d\n", rte_be_to_cpu_16(icmp_h->icmp_seq_nb));
// Send back the ICMP echo reply, keeping the IP header and setting the RTE_IP_ICMP_ECHO_REPLY
rte_ether_addr_copy(&eth_h->s_addr, &eth_addr);
rte_ether_addr_copy(&eth_h->d_addr, &eth_h->s_addr);
rte_ether_addr_copy(&eth_addr, &eth_h->d_addr);
ip_addr = ip_h->src_addr;
ip_h->src_addr = ip_h->dst_addr;
ip_h->dst_addr = ip_addr;
icmp_h->icmp_type = RTE_IP_ICMP_ECHO_REPLY;
cksum = ~icmp_h->icmp_cksum & 0xffff;
cksum += ~htons(RTE_IP_ICMP_ECHO_REQUEST << 8) & 0xffff;
cksum += htons(RTE_IP_ICMP_ECHO_REPLY << 8);
cksum = (cksum & 0xffff) + (cksum >> 16);
cksum = (cksum & 0xffff) + (cksum >> 16);
icmp_h->icmp_cksum = ~cksum;
bufs[nb_replies++] = pkt;
}
// Send back the existing ICMP echo replies
if (nb_replies > 0) {
nb_tx = rte_eth_tx_burst(port, queue, bufs, nb_replies);
// Retry
if (unlikely(nb_tx < nb_replies)) {
uint32_t retry = 0;
while (nb_tx < nb_replies && retry++ < 64) {
rte_delay_us(1);
nb_tx += rte_eth_tx_burst(port, queue, &bufs[nb_tx], nb_replies - nb_tx);
}
}
if (unlikely(nb_tx < nb_replies)) {
do {
rte_pktmbuf_free(bufs[nb_tx]);
} while (++nb_tx < nb_replies);
}
printf("ICMP packet replied id=%d\n", rte_be_to_cpu_16(icmp_h->icmp_seq_nb));
// Terminate performance counters
elapsed_cycles = rte_rdtsc_precise() - begin;
microseconds = elapsed_cycles * 1000000 / hz;
printf("\nICMP packet replied time: %u", microseconds);
continue;
}
}
}
/*
* The main function, which does initialization and calls the per-lcore
* functions.
*/
int
main(int argc, char *argv[])
{
struct rte_mempool *mbuf_pool;
unsigned nb_ports;
uint16_t portid = 1;
/* Initialize the Environment Abstraction Layer (EAL). */
int ret = rte_eal_init(argc, argv);
if (ret < 0)
rte_exit(EXIT_FAILURE, "Error with EAL initialization\n");
argc -= ret;
argv += ret;
/* Check that there is an even number of ports to send/receive on. */
nb_ports = rte_eth_dev_count_avail();
if (nb_ports < 2 || (nb_ports & 1))
rte_exit(EXIT_FAILURE, "Error: number of ports must be even\n");
/* Creates a new mempool in memory to hold the mbufs. */
mbuf_pool = rte_pktmbuf_pool_create("MBUF_POOL", NUM_MBUFS * nb_ports,
MBUF_CACHE_SIZE, 0, RTE_MBUF_DEFAULT_BUF_SIZE, rte_socket_id());
if (mbuf_pool == NULL)
rte_exit(EXIT_FAILURE, "Cannot create mbuf pool\n");
/* Initialize only port 1. */
if (port_init(portid, mbuf_pool) != 0)
rte_exit(EXIT_FAILURE, "Cannot init port %"PRIu16 "\n", portid);
if (rte_lcore_count() > 1)
printf("\nWARNING: Too many lcores enabled. Only 1 used.\n");
/* Call lcore_main on the main core only. */
lcore_main();
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment