From ec50dd4634ae06091e61f42b7ba975f9ed510ad0 Mon Sep 17 00:00:00 2001 From: Gonglei Date: Thu, 25 Jun 2015 14:24:10 +0800 Subject: rocker: fix memory leak Meanwhile, using g_new0 instead of g_malloc0, refer to commit 5839e53. Signed-off-by: Gonglei Message-id: 1435213450-6700-1-git-send-email-arei.gonglei@huawei.com Signed-off-by: Stefan Hajnoczi --- hw/net/rocker/rocker.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'hw') diff --git a/hw/net/rocker/rocker.c b/hw/net/rocker/rocker.c index 4d25842509..7e060153ab 100644 --- a/hw/net/rocker/rocker.c +++ b/hw/net/rocker/rocker.c @@ -96,7 +96,7 @@ World *rocker_get_world(Rocker *r, enum rocker_world_type type) RockerSwitch *qmp_query_rocker(const char *name, Error **errp) { - RockerSwitch *rocker = g_malloc0(sizeof(*rocker)); + RockerSwitch *rocker; Rocker *r; r = rocker_find(name); @@ -106,6 +106,7 @@ RockerSwitch *qmp_query_rocker(const char *name, Error **errp) return NULL; } + rocker = g_new0(RockerSwitch, 1); rocker->name = g_strdup(r->name); rocker->id = r->switch_id; rocker->ports = r->fp_ports; -- cgit v1.2.3-70-g09d2 From 5df6a1855b62dc653515d919e48c5b6f00c48f32 Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Thu, 25 Jun 2015 10:18:05 +0100 Subject: e1000: flush packets when link comes up e1000_can_receive() checks the link up status register bit. If the bit is clear, packets will be queued and the peer may disable receive to avoid wasting CPU reading packets that cannot be delivered. The queue must be flushed once the link comes back up again. This patch fixes broken e1000 receive with Mac OS X Snow Leopard guests and tap networking. Flushing the queue invokes the async send callback, which re-enables tap fd read. Reported-by: Jonathan Liu Signed-off-by: Stefan Hajnoczi Reviewed-by: Fam Zheng Message-id: 1435223885-12745-1-git-send-email-stefanha@redhat.com --- hw/net/e1000.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'hw') diff --git a/hw/net/e1000.c b/hw/net/e1000.c index bab8e2abfb..5c6bcd0014 100644 --- a/hw/net/e1000.c +++ b/hw/net/e1000.c @@ -185,6 +185,9 @@ e1000_link_up(E1000State *s) { s->mac_reg[STATUS] |= E1000_STATUS_LU; s->phy_reg[PHY_STATUS] |= MII_SR_LINK_STATUS; + + /* E1000_STATUS_LU is tested by e1000_can_receive() */ + qemu_flush_queued_packets(qemu_get_queue(s->nic)); } static bool -- cgit v1.2.3-70-g09d2 From b83b5f2ef9753713c2fb64ff4cae7cb1e080624e Mon Sep 17 00:00:00 2001 From: Brian Kress Date: Tue, 23 Jun 2015 11:49:25 -0400 Subject: vmxnet3: Fix incorrect small packet padding When running ESXi under qemu there is an issue with the ESXi guest discarding packets that are too short. The guest discards any packets under the normal minimum length for an ethernet packet (60). This results in odd behaviour where other hosts or VMs on other hosts can communicate with the ESXi guest just fine (since there's a physical NIC somewhere doing padding), but VMs on the host and the host itself cannot because the ARP request packets are too small for the ESXi host to accept. Someone in the past thought this was worth fixing, and added code to the vmxnet3 qemu emulation such that if it is receiving packets smaller than 60 bytes to pad the packet out to 60. Unfortunately this code is wrong (or at least in the wrong place). It does so BEFORE before taking into account the vnet_hdr at the front of the packet added by the tap device. As a result, it might add padding, but it never adds enough. Specifically it adds 10 less (the length of the vnet_hdr) than it needs to. The following (hopefully "obviously correct") patch simply swaps the order of processing the vnet header and the padding. With this patch an ESXi guest is able to communicate with the host or other local VMs. Signed-off-by: Brian Kress Reviewed-by: Paolo Bonzini Reviewed-by: Dmitry Fleytman Signed-off-by: Stefan Hajnoczi --- hw/net/vmxnet3.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'hw') diff --git a/hw/net/vmxnet3.c b/hw/net/vmxnet3.c index 104a0f599b..706e0606a9 100644 --- a/hw/net/vmxnet3.c +++ b/hw/net/vmxnet3.c @@ -1879,6 +1879,12 @@ vmxnet3_receive(NetClientState *nc, const uint8_t *buf, size_t size) return -1; } + if (s->peer_has_vhdr) { + vmxnet_rx_pkt_set_vhdr(s->rx_pkt, (struct virtio_net_hdr *)buf); + buf += sizeof(struct virtio_net_hdr); + size -= sizeof(struct virtio_net_hdr); + } + /* Pad to minimum Ethernet frame length */ if (size < sizeof(min_buf)) { memcpy(min_buf, buf, size); @@ -1887,12 +1893,6 @@ vmxnet3_receive(NetClientState *nc, const uint8_t *buf, size_t size) size = sizeof(min_buf); } - if (s->peer_has_vhdr) { - vmxnet_rx_pkt_set_vhdr(s->rx_pkt, (struct virtio_net_hdr *)buf); - buf += sizeof(struct virtio_net_hdr); - size -= sizeof(struct virtio_net_hdr); - } - vmxnet_rx_pkt_set_packet_type(s->rx_pkt, get_eth_packet_type(PKT_GET_ETH_HDR(buf))); -- cgit v1.2.3-70-g09d2 From 66851f640b73a5a84160ee6ab19ab429f68bbb9f Mon Sep 17 00:00:00 2001 From: Scott Feldman Date: Tue, 30 Jun 2015 19:25:53 -0700 Subject: rocker: don't queue receive pkts when port is disabled Commit 6e99c63 ("net/socket: Drop net_socket_can_send") changed the semantics around .can_receive for sockets to now require the device to flush queued pkts when transitioning to a .can_receive=true state. Rocker device was not flushing the queue on .can_receive=true transition, so the receiver was stuck. But, turns out we really don't want any queuing at all on the port when the port is disabled, otherwise when the port transitions to enabled, we'd receive and forward stale pkts that really should have been dropped. So, let's remove .can_receive so avoid queuing and drop the pkt in .receive if the port is disabled. Signed-off-by: Scott Feldman Reviewed-by: Fam Zheng Message-id: 1435717553-36187-1-git-send-email-sfeldma@gmail.com Signed-off-by: Stefan Hajnoczi --- hw/net/rocker/rocker_fp.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) (limited to 'hw') diff --git a/hw/net/rocker/rocker_fp.c b/hw/net/rocker/rocker_fp.c index d8d934c396..c693ae5081 100644 --- a/hw/net/rocker/rocker_fp.c +++ b/hw/net/rocker/rocker_fp.c @@ -125,18 +125,21 @@ int fp_port_eg(FpPort *port, const struct iovec *iov, int iovcnt) return ROCKER_OK; } -static int fp_port_can_receive(NetClientState *nc) -{ - FpPort *port = qemu_get_nic_opaque(nc); - - return port->enabled; -} - static ssize_t fp_port_receive_iov(NetClientState *nc, const struct iovec *iov, int iovcnt) { FpPort *port = qemu_get_nic_opaque(nc); + /* If the port is disabled, we want to drop this pkt + * now rather than queing it for later. We don't want + * any stale pkts getting into the device when the port + * transitions to enabled. + */ + + if (!port->enabled) { + return -1; + } + return world_ingress(port->world, port->pport, iov, iovcnt); } @@ -165,7 +168,6 @@ static void fp_port_set_link_status(NetClientState *nc) static NetClientInfo fp_port_info = { .type = NET_CLIENT_OPTIONS_KIND_NIC, .size = sizeof(NICState), - .can_receive = fp_port_can_receive, .receive = fp_port_receive, .receive_iov = fp_port_receive_iov, .cleanup = fp_port_cleanup, -- cgit v1.2.3-70-g09d2 From d1a88c96b7f94c8e12c07518f55fce8873e814d0 Mon Sep 17 00:00:00 2001 From: Scott Feldman Date: Wed, 1 Jul 2015 03:33:08 -0700 Subject: rocker: fix misplaced break statement Premature break in switch case block. This particular case (group L2 rewrite) will be used for L2 LAG and L3 ECMP support, neither of which are enabled in the guest driver at this time, but are under development. Signed-off-by: Scott Feldman Reported-by: Paolo Bonzini Message-id: 1435746792-41278-2-git-send-email-sfeldma@gmail.com Signed-off-by: Stefan Hajnoczi --- hw/net/rocker/rocker_of_dpa.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'hw') diff --git a/hw/net/rocker/rocker_of_dpa.c b/hw/net/rocker/rocker_of_dpa.c index b25a17d6d7..02b3896a76 100644 --- a/hw/net/rocker/rocker_of_dpa.c +++ b/hw/net/rocker/rocker_of_dpa.c @@ -2525,7 +2525,6 @@ static void of_dpa_group_fill(void *key, void *value, void *user_data) ngroup->has_set_vlan_id = true; ngroup->set_vlan_id = ntohs(group->l2_rewrite.vlan_id); } - break; if (memcmp(group->l2_rewrite.src_mac.a, zero_mac.a, ETH_ALEN)) { ngroup->has_set_eth_src = true; ngroup->set_eth_src = @@ -2536,6 +2535,7 @@ static void of_dpa_group_fill(void *key, void *value, void *user_data) ngroup->set_eth_dst = qemu_mac_strdup_printf(group->l2_rewrite.dst_mac.a); } + break; case ROCKER_OF_DPA_GROUP_TYPE_L2_FLOOD: case ROCKER_OF_DPA_GROUP_TYPE_L2_MCAST: ngroup->has_vlan_id = true; -- cgit v1.2.3-70-g09d2 From f211fcd75fec96ec9850885622ed028c6f7ebdf4 Mon Sep 17 00:00:00 2001 From: Scott Feldman Date: Wed, 1 Jul 2015 03:33:09 -0700 Subject: rocker: fix missing break statements Signed-off-by: Scott Feldman Reported-by: Paolo Bonzini Message-id: 1435746792-41278-3-git-send-email-sfeldma@gmail.com Signed-off-by: Stefan Hajnoczi --- hw/net/rocker/rocker.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'hw') diff --git a/hw/net/rocker/rocker.c b/hw/net/rocker/rocker.c index 7e060153ab..6e3d35ae84 100644 --- a/hw/net/rocker/rocker.c +++ b/hw/net/rocker/rocker.c @@ -193,11 +193,13 @@ static int tx_consume(Rocker *r, DescInfo *info) if (!tlvs[ROCKER_TLV_TX_L3_CSUM_OFF]) { return -ROCKER_EINVAL; } + break; case ROCKER_TX_OFFLOAD_TSO: if (!tlvs[ROCKER_TLV_TX_TSO_MSS] || !tlvs[ROCKER_TLV_TX_TSO_HDR_LEN]) { return -ROCKER_EINVAL; } + break; } if (tlvs[ROCKER_TLV_TX_L3_CSUM_OFF]) { -- cgit v1.2.3-70-g09d2 From 96497af0afd60e57749316f1bc196b417055c585 Mon Sep 17 00:00:00 2001 From: Scott Feldman Date: Wed, 1 Jul 2015 03:33:10 -0700 Subject: rocker: return -1 when dropping packet on ingress Signed-off-by: Scott Feldman Message-id: 1435746792-41278-4-git-send-email-sfeldma@gmail.com Signed-off-by: Stefan Hajnoczi --- hw/net/rocker/rocker_world.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'hw') diff --git a/hw/net/rocker/rocker_world.c b/hw/net/rocker/rocker_world.c index b991e871d3..a6b18f1754 100644 --- a/hw/net/rocker/rocker_world.c +++ b/hw/net/rocker/rocker_world.c @@ -32,7 +32,7 @@ ssize_t world_ingress(World *world, uint32_t pport, return world->ops->ig(world, pport, iov, iovcnt); } - return iov_size(iov, iovcnt); + return -1; } int world_do_cmd(World *world, DescInfo *info, -- cgit v1.2.3-70-g09d2 From d0d2555852c5e684a97dce787d3c2a65b9a6d64c Mon Sep 17 00:00:00 2001 From: Scott Feldman Date: Wed, 1 Jul 2015 03:33:11 -0700 Subject: rocker: mark copy-to-cpu pkts as forwarding offloaded For pkts copied to the CPU (to be processed by guest driver), mark the Rx descriptor with flag "OFFLOAD_FWD" to indicate device has already forwarded pkt. The guest driver will use this indicator to avoid duplicate forwarding in the guest OS. Examples include bcast/mcast/unknown ucast pkts flooded to bridged ports. We want to avoid both the device and the guest bridge driver flooding these pkts, which would result in duplicates pkts on the wire. Packet sampling, such as sFlow, can also use this technique to mark pkts for the guest OS to record but otherwise drop. Signed-off-by: Scott Feldman Message-id: 1435746792-41278-5-git-send-email-sfeldma@gmail.com Signed-off-by: Stefan Hajnoczi --- docs/specs/rocker.txt | 4 ++++ hw/net/rocker/rocker.c | 6 +++++- hw/net/rocker/rocker.h | 2 +- hw/net/rocker/rocker_hw.h | 1 + hw/net/rocker/rocker_of_dpa.c | 5 ++++- 5 files changed, 15 insertions(+), 3 deletions(-) (limited to 'hw') diff --git a/docs/specs/rocker.txt b/docs/specs/rocker.txt index 0af5c61585..1c743515c1 100644 --- a/docs/specs/rocker.txt +++ b/docs/specs/rocker.txt @@ -637,6 +637,7 @@ The TLVs for Rx descriptor buffer are: (1 << 5): TCP packet (1 << 6): UDP packet (1 << 7): TCP/UDP csum good + (1 << 8): Offload forward RX_CSUM 2 IP calculated checksum: IPv4: IP payload csum IPv6: header and payload csum @@ -645,6 +646,9 @@ The TLVs for Rx descriptor buffer are: RX_FRAG_MAX_LEN 2 Packet maximum fragment length RX_FRAG_LEN 2 Actual packet fragment length after receive +Offload forward RX_FLAG indicates the device has already forwarded the packet +so the host CPU should not also forward the packet. + Possible status return codes in descriptor on completion are: DESC_COMP_ERR reason diff --git a/hw/net/rocker/rocker.c b/hw/net/rocker/rocker.c index 6e3d35ae84..47d080fd33 100644 --- a/hw/net/rocker/rocker.c +++ b/hw/net/rocker/rocker.c @@ -603,7 +603,7 @@ static DescRing *rocker_get_rx_ring_by_pport(Rocker *r, } int rx_produce(World *world, uint32_t pport, - const struct iovec *iov, int iovcnt) + const struct iovec *iov, int iovcnt, uint8_t copy_to_cpu) { Rocker *r = world_rocker(world); PCIDevice *dev = (PCIDevice *)r; @@ -646,6 +646,10 @@ int rx_produce(World *world, uint32_t pport, goto out; } + if (copy_to_cpu) { + rx_flags |= ROCKER_RX_FLAGS_FWD_OFFLOAD; + } + /* XXX calc rx flags/csum */ tlv_size = rocker_tlv_total_size(sizeof(uint16_t)) + /* flags */ diff --git a/hw/net/rocker/rocker.h b/hw/net/rocker/rocker.h index b3310b61eb..f9c80f8013 100644 --- a/hw/net/rocker/rocker.h +++ b/hw/net/rocker/rocker.h @@ -77,7 +77,7 @@ int rocker_event_link_changed(Rocker *r, uint32_t pport, bool link_up); int rocker_event_mac_vlan_seen(Rocker *r, uint32_t pport, uint8_t *addr, uint16_t vlan_id); int rx_produce(World *world, uint32_t pport, - const struct iovec *iov, int iovcnt); + const struct iovec *iov, int iovcnt, uint8_t copy_to_cpu); int rocker_port_eg(Rocker *r, uint32_t pport, const struct iovec *iov, int iovcnt); diff --git a/hw/net/rocker/rocker_hw.h b/hw/net/rocker/rocker_hw.h index fe639badd4..8c50830325 100644 --- a/hw/net/rocker/rocker_hw.h +++ b/hw/net/rocker/rocker_hw.h @@ -250,6 +250,7 @@ enum { #define ROCKER_RX_FLAGS_TCP (1 << 5) #define ROCKER_RX_FLAGS_UDP (1 << 6) #define ROCKER_RX_FLAGS_TCP_UDP_CSUM_GOOD (1 << 7) +#define ROCKER_RX_FLAGS_FWD_OFFLOAD (1 << 8) /* Tx msg */ enum { diff --git a/hw/net/rocker/rocker_of_dpa.c b/hw/net/rocker/rocker_of_dpa.c index 02b3896a76..874fb01d69 100644 --- a/hw/net/rocker/rocker_of_dpa.c +++ b/hw/net/rocker/rocker_of_dpa.c @@ -825,6 +825,8 @@ static OfDpaGroup *of_dpa_group_alloc(uint32_t id) static void of_dpa_output_l2_interface(OfDpaFlowContext *fc, OfDpaGroup *group) { + uint8_t copy_to_cpu = fc->action_set.apply.copy_to_cpu; + if (group->l2_interface.pop_vlan) { of_dpa_flow_pkt_strip_vlan(fc); } @@ -837,7 +839,8 @@ static void of_dpa_output_l2_interface(OfDpaFlowContext *fc, */ if (group->l2_interface.out_pport == 0) { - rx_produce(fc->of_dpa->world, fc->in_pport, fc->iov, fc->iovcnt); + rx_produce(fc->of_dpa->world, fc->in_pport, fc->iov, fc->iovcnt, + copy_to_cpu); } else if (group->l2_interface.out_pport != fc->in_pport) { rocker_port_eg(world_rocker(fc->of_dpa->world), group->l2_interface.out_pport, -- cgit v1.2.3-70-g09d2