staging: hv: move hv_netvsc out of staging area

hv_netvsc has been reviewed on netdev mailing list on 6/09/2011.
All recommended changes have been made. We are requesting to move
it out of staging area.

Signed-off-by: Haiyang Zhang <haiyangz@microsoft.com>
Signed-off-by: KY Srinivasan <kys@microsoft.com>
Signed-off-by: Mike Sterling <Mike.Sterling@microsoft.com>
Acked-by: Stephen Hemminger <shemminger@vyatta.com>
Acked-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
This commit is contained in:
Haiyang Zhang
2011-11-28 13:35:35 -08:00
committed by Greg Kroah-Hartman
parent 3b724ca145
commit 95fa0405c5
11 changed files with 12 additions and 9 deletions

View File

@@ -0,0 +1,5 @@
config HYPERV_NET
tristate "Microsoft Hyper-V virtual network driver"
depends on HYPERV
help
Select this option to enable the Hyper-V virtual network driver.

View File

@@ -0,0 +1,3 @@
obj-$(CONFIG_HYPERV_NET) += hv_netvsc.o
hv_netvsc-y := netvsc_drv.o netvsc.o rndis_filter.o

File diff suppressed because it is too large Load Diff

939
drivers/net/hyperv/netvsc.c Normal file
View File

@@ -0,0 +1,939 @@
/*
* Copyright (c) 2009, Microsoft Corporation.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License along with
* this program; if not, write to the Free Software Foundation, Inc., 59 Temple
* Place - Suite 330, Boston, MA 02111-1307 USA.
*
* Authors:
* Haiyang Zhang <haiyangz@microsoft.com>
* Hank Janssen <hjanssen@microsoft.com>
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/kernel.h>
#include <linux/sched.h>
#include <linux/wait.h>
#include <linux/mm.h>
#include <linux/delay.h>
#include <linux/io.h>
#include <linux/slab.h>
#include <linux/netdevice.h>
#include "hyperv_net.h"
static struct netvsc_device *alloc_net_device(struct hv_device *device)
{
struct netvsc_device *net_device;
struct net_device *ndev = hv_get_drvdata(device);
net_device = kzalloc(sizeof(struct netvsc_device), GFP_KERNEL);
if (!net_device)
return NULL;
net_device->destroy = false;
net_device->dev = device;
net_device->ndev = ndev;
hv_set_drvdata(device, net_device);
return net_device;
}
static struct netvsc_device *get_outbound_net_device(struct hv_device *device)
{
struct netvsc_device *net_device;
net_device = hv_get_drvdata(device);
if (net_device && net_device->destroy)
net_device = NULL;
return net_device;
}
static struct netvsc_device *get_inbound_net_device(struct hv_device *device)
{
struct netvsc_device *net_device;
net_device = hv_get_drvdata(device);
if (!net_device)
goto get_in_err;
if (net_device->destroy &&
atomic_read(&net_device->num_outstanding_sends) == 0)
net_device = NULL;
get_in_err:
return net_device;
}
static int netvsc_destroy_recv_buf(struct netvsc_device *net_device)
{
struct nvsp_message *revoke_packet;
int ret = 0;
struct net_device *ndev = net_device->ndev;
/*
* If we got a section count, it means we received a
* SendReceiveBufferComplete msg (ie sent
* NvspMessage1TypeSendReceiveBuffer msg) therefore, we need
* to send a revoke msg here
*/
if (net_device->recv_section_cnt) {
/* Send the revoke receive buffer */
revoke_packet = &net_device->revoke_packet;
memset(revoke_packet, 0, sizeof(struct nvsp_message));
revoke_packet->hdr.msg_type =
NVSP_MSG1_TYPE_REVOKE_RECV_BUF;
revoke_packet->msg.v1_msg.
revoke_recv_buf.id = NETVSC_RECEIVE_BUFFER_ID;
ret = vmbus_sendpacket(net_device->dev->channel,
revoke_packet,
sizeof(struct nvsp_message),
(unsigned long)revoke_packet,
VM_PKT_DATA_INBAND, 0);
/*
* If we failed here, we might as well return and
* have a leak rather than continue and a bugchk
*/
if (ret != 0) {
netdev_err(ndev, "unable to send "
"revoke receive buffer to netvsp\n");
return ret;
}
}
/* Teardown the gpadl on the vsp end */
if (net_device->recv_buf_gpadl_handle) {
ret = vmbus_teardown_gpadl(net_device->dev->channel,
net_device->recv_buf_gpadl_handle);
/* If we failed here, we might as well return and have a leak
* rather than continue and a bugchk
*/
if (ret != 0) {
netdev_err(ndev,
"unable to teardown receive buffer's gpadl\n");
return ret;
}
net_device->recv_buf_gpadl_handle = 0;
}
if (net_device->recv_buf) {
/* Free up the receive buffer */
free_pages((unsigned long)net_device->recv_buf,
get_order(net_device->recv_buf_size));
net_device->recv_buf = NULL;
}
if (net_device->recv_section) {
net_device->recv_section_cnt = 0;
kfree(net_device->recv_section);
net_device->recv_section = NULL;
}
return ret;
}
static int netvsc_init_recv_buf(struct hv_device *device)
{
int ret = 0;
int t;
struct netvsc_device *net_device;
struct nvsp_message *init_packet;
struct net_device *ndev;
net_device = get_outbound_net_device(device);
if (!net_device)
return -ENODEV;
ndev = net_device->ndev;
net_device->recv_buf =
(void *)__get_free_pages(GFP_KERNEL|__GFP_ZERO,
get_order(net_device->recv_buf_size));
if (!net_device->recv_buf) {
netdev_err(ndev, "unable to allocate receive "
"buffer of size %d\n", net_device->recv_buf_size);
ret = -ENOMEM;
goto cleanup;
}
/*
* Establish the gpadl handle for this buffer on this
* channel. Note: This call uses the vmbus connection rather
* than the channel to establish the gpadl handle.
*/
ret = vmbus_establish_gpadl(device->channel, net_device->recv_buf,
net_device->recv_buf_size,
&net_device->recv_buf_gpadl_handle);
if (ret != 0) {
netdev_err(ndev,
"unable to establish receive buffer's gpadl\n");
goto cleanup;
}
/* Notify the NetVsp of the gpadl handle */
init_packet = &net_device->channel_init_pkt;
memset(init_packet, 0, sizeof(struct nvsp_message));
init_packet->hdr.msg_type = NVSP_MSG1_TYPE_SEND_RECV_BUF;
init_packet->msg.v1_msg.send_recv_buf.
gpadl_handle = net_device->recv_buf_gpadl_handle;
init_packet->msg.v1_msg.
send_recv_buf.id = NETVSC_RECEIVE_BUFFER_ID;
/* Send the gpadl notification request */
ret = vmbus_sendpacket(device->channel, init_packet,
sizeof(struct nvsp_message),
(unsigned long)init_packet,
VM_PKT_DATA_INBAND,
VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
if (ret != 0) {
netdev_err(ndev,
"unable to send receive buffer's gpadl to netvsp\n");
goto cleanup;
}
t = wait_for_completion_timeout(&net_device->channel_init_wait, 5*HZ);
BUG_ON(t == 0);
/* Check the response */
if (init_packet->msg.v1_msg.
send_recv_buf_complete.status != NVSP_STAT_SUCCESS) {
netdev_err(ndev, "Unable to complete receive buffer "
"initialization with NetVsp - status %d\n",
init_packet->msg.v1_msg.
send_recv_buf_complete.status);
ret = -EINVAL;
goto cleanup;
}
/* Parse the response */
net_device->recv_section_cnt = init_packet->msg.
v1_msg.send_recv_buf_complete.num_sections;
net_device->recv_section = kmemdup(init_packet->msg.v1_msg.send_recv_buf_complete.sections,
net_device->recv_section_cnt * sizeof(struct nvsp_1_receive_buffer_section),
GFP_KERNEL);
if (net_device->recv_section == NULL) {
ret = -EINVAL;
goto cleanup;
}
/*
* For 1st release, there should only be 1 section that represents the
* entire receive buffer
*/
if (net_device->recv_section_cnt != 1 ||
net_device->recv_section->offset != 0) {
ret = -EINVAL;
goto cleanup;
}
goto exit;
cleanup:
netvsc_destroy_recv_buf(net_device);
exit:
return ret;
}
static int netvsc_connect_vsp(struct hv_device *device)
{
int ret, t;
struct netvsc_device *net_device;
struct nvsp_message *init_packet;
int ndis_version;
struct net_device *ndev;
net_device = get_outbound_net_device(device);
if (!net_device)
return -ENODEV;
ndev = net_device->ndev;
init_packet = &net_device->channel_init_pkt;
memset(init_packet, 0, sizeof(struct nvsp_message));
init_packet->hdr.msg_type = NVSP_MSG_TYPE_INIT;
init_packet->msg.init_msg.init.min_protocol_ver =
NVSP_MIN_PROTOCOL_VERSION;
init_packet->msg.init_msg.init.max_protocol_ver =
NVSP_MAX_PROTOCOL_VERSION;
/* Send the init request */
ret = vmbus_sendpacket(device->channel, init_packet,
sizeof(struct nvsp_message),
(unsigned long)init_packet,
VM_PKT_DATA_INBAND,
VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
if (ret != 0)
goto cleanup;
t = wait_for_completion_timeout(&net_device->channel_init_wait, 5*HZ);
if (t == 0) {
ret = -ETIMEDOUT;
goto cleanup;
}
if (init_packet->msg.init_msg.init_complete.status !=
NVSP_STAT_SUCCESS) {
ret = -EINVAL;
goto cleanup;
}
if (init_packet->msg.init_msg.init_complete.
negotiated_protocol_ver != NVSP_PROTOCOL_VERSION_1) {
ret = -EPROTO;
goto cleanup;
}
/* Send the ndis version */
memset(init_packet, 0, sizeof(struct nvsp_message));
ndis_version = 0x00050000;
init_packet->hdr.msg_type = NVSP_MSG1_TYPE_SEND_NDIS_VER;
init_packet->msg.v1_msg.
send_ndis_ver.ndis_major_ver =
(ndis_version & 0xFFFF0000) >> 16;
init_packet->msg.v1_msg.
send_ndis_ver.ndis_minor_ver =
ndis_version & 0xFFFF;
/* Send the init request */
ret = vmbus_sendpacket(device->channel, init_packet,
sizeof(struct nvsp_message),
(unsigned long)init_packet,
VM_PKT_DATA_INBAND, 0);
if (ret != 0)
goto cleanup;
/* Post the big receive buffer to NetVSP */
ret = netvsc_init_recv_buf(device);
cleanup:
return ret;
}
static void netvsc_disconnect_vsp(struct netvsc_device *net_device)
{
netvsc_destroy_recv_buf(net_device);
}
/*
* netvsc_device_remove - Callback when the root bus device is removed
*/
int netvsc_device_remove(struct hv_device *device)
{
struct netvsc_device *net_device;
struct hv_netvsc_packet *netvsc_packet, *pos;
unsigned long flags;
net_device = hv_get_drvdata(device);
spin_lock_irqsave(&device->channel->inbound_lock, flags);
net_device->destroy = true;
spin_unlock_irqrestore(&device->channel->inbound_lock, flags);
/* Wait for all send completions */
while (atomic_read(&net_device->num_outstanding_sends)) {
dev_info(&device->device,
"waiting for %d requests to complete...\n",
atomic_read(&net_device->num_outstanding_sends));
udelay(100);
}
netvsc_disconnect_vsp(net_device);
/*
* Since we have already drained, we don't need to busy wait
* as was done in final_release_stor_device()
* Note that we cannot set the ext pointer to NULL until
* we have drained - to drain the outgoing packets, we need to
* allow incoming packets.
*/
spin_lock_irqsave(&device->channel->inbound_lock, flags);
hv_set_drvdata(device, NULL);
spin_unlock_irqrestore(&device->channel->inbound_lock, flags);
/*
* At this point, no one should be accessing net_device
* except in here
*/
dev_notice(&device->device, "net device safe to remove\n");
/* Now, we can close the channel safely */
vmbus_close(device->channel);
/* Release all resources */
list_for_each_entry_safe(netvsc_packet, pos,
&net_device->recv_pkt_list, list_ent) {
list_del(&netvsc_packet->list_ent);
kfree(netvsc_packet);
}
kfree(net_device);
return 0;
}
static void netvsc_send_completion(struct hv_device *device,
struct vmpacket_descriptor *packet)
{
struct netvsc_device *net_device;
struct nvsp_message *nvsp_packet;
struct hv_netvsc_packet *nvsc_packet;
struct net_device *ndev;
net_device = get_inbound_net_device(device);
if (!net_device)
return;
ndev = net_device->ndev;
nvsp_packet = (struct nvsp_message *)((unsigned long)packet +
(packet->offset8 << 3));
if ((nvsp_packet->hdr.msg_type == NVSP_MSG_TYPE_INIT_COMPLETE) ||
(nvsp_packet->hdr.msg_type ==
NVSP_MSG1_TYPE_SEND_RECV_BUF_COMPLETE) ||
(nvsp_packet->hdr.msg_type ==
NVSP_MSG1_TYPE_SEND_SEND_BUF_COMPLETE)) {
/* Copy the response back */
memcpy(&net_device->channel_init_pkt, nvsp_packet,
sizeof(struct nvsp_message));
complete(&net_device->channel_init_wait);
} else if (nvsp_packet->hdr.msg_type ==
NVSP_MSG1_TYPE_SEND_RNDIS_PKT_COMPLETE) {
/* Get the send context */
nvsc_packet = (struct hv_netvsc_packet *)(unsigned long)
packet->trans_id;
/* Notify the layer above us */
nvsc_packet->completion.send.send_completion(
nvsc_packet->completion.send.send_completion_ctx);
atomic_dec(&net_device->num_outstanding_sends);
} else {
netdev_err(ndev, "Unknown send completion packet type- "
"%d received!!\n", nvsp_packet->hdr.msg_type);
}
}
int netvsc_send(struct hv_device *device,
struct hv_netvsc_packet *packet)
{
struct netvsc_device *net_device;
int ret = 0;
struct nvsp_message sendMessage;
struct net_device *ndev;
net_device = get_outbound_net_device(device);
if (!net_device)
return -ENODEV;
ndev = net_device->ndev;
sendMessage.hdr.msg_type = NVSP_MSG1_TYPE_SEND_RNDIS_PKT;
if (packet->is_data_pkt) {
/* 0 is RMC_DATA; */
sendMessage.msg.v1_msg.send_rndis_pkt.channel_type = 0;
} else {
/* 1 is RMC_CONTROL; */
sendMessage.msg.v1_msg.send_rndis_pkt.channel_type = 1;
}
/* Not using send buffer section */
sendMessage.msg.v1_msg.send_rndis_pkt.send_buf_section_index =
0xFFFFFFFF;
sendMessage.msg.v1_msg.send_rndis_pkt.send_buf_section_size = 0;
if (packet->page_buf_cnt) {
ret = vmbus_sendpacket_pagebuffer(device->channel,
packet->page_buf,
packet->page_buf_cnt,
&sendMessage,
sizeof(struct nvsp_message),
(unsigned long)packet);
} else {
ret = vmbus_sendpacket(device->channel, &sendMessage,
sizeof(struct nvsp_message),
(unsigned long)packet,
VM_PKT_DATA_INBAND,
VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
}
if (ret != 0)
netdev_err(ndev, "Unable to send packet %p ret %d\n",
packet, ret);
else
atomic_inc(&net_device->num_outstanding_sends);
return ret;
}
static void netvsc_send_recv_completion(struct hv_device *device,
u64 transaction_id)
{
struct nvsp_message recvcompMessage;
int retries = 0;
int ret;
struct net_device *ndev;
struct netvsc_device *net_device = hv_get_drvdata(device);
ndev = net_device->ndev;
recvcompMessage.hdr.msg_type =
NVSP_MSG1_TYPE_SEND_RNDIS_PKT_COMPLETE;
/* FIXME: Pass in the status */
recvcompMessage.msg.v1_msg.send_rndis_pkt_complete.status =
NVSP_STAT_SUCCESS;
retry_send_cmplt:
/* Send the completion */
ret = vmbus_sendpacket(device->channel, &recvcompMessage,
sizeof(struct nvsp_message), transaction_id,
VM_PKT_COMP, 0);
if (ret == 0) {
/* success */
/* no-op */
} else if (ret == -EAGAIN) {
/* no more room...wait a bit and attempt to retry 3 times */
retries++;
netdev_err(ndev, "unable to send receive completion pkt"
" (tid %llx)...retrying %d\n", transaction_id, retries);
if (retries < 4) {
udelay(100);
goto retry_send_cmplt;
} else {
netdev_err(ndev, "unable to send receive "
"completion pkt (tid %llx)...give up retrying\n",
transaction_id);
}
} else {
netdev_err(ndev, "unable to send receive "
"completion pkt - %llx\n", transaction_id);
}
}
/* Send a receive completion packet to RNDIS device (ie NetVsp) */
static void netvsc_receive_completion(void *context)
{
struct hv_netvsc_packet *packet = context;
struct hv_device *device = (struct hv_device *)packet->device;
struct netvsc_device *net_device;
u64 transaction_id = 0;
bool fsend_receive_comp = false;
unsigned long flags;
struct net_device *ndev;
/*
* Even though it seems logical to do a GetOutboundNetDevice() here to
* send out receive completion, we are using GetInboundNetDevice()
* since we may have disable outbound traffic already.
*/
net_device = get_inbound_net_device(device);
if (!net_device)
return;
ndev = net_device->ndev;
/* Overloading use of the lock. */
spin_lock_irqsave(&net_device->recv_pkt_list_lock, flags);
packet->xfer_page_pkt->count--;
/*
* Last one in the line that represent 1 xfer page packet.
* Return the xfer page packet itself to the freelist
*/
if (packet->xfer_page_pkt->count == 0) {
fsend_receive_comp = true;
transaction_id = packet->completion.recv.recv_completion_tid;
list_add_tail(&packet->xfer_page_pkt->list_ent,
&net_device->recv_pkt_list);
}
/* Put the packet back */
list_add_tail(&packet->list_ent, &net_device->recv_pkt_list);
spin_unlock_irqrestore(&net_device->recv_pkt_list_lock, flags);
/* Send a receive completion for the xfer page packet */
if (fsend_receive_comp)
netvsc_send_recv_completion(device, transaction_id);
}
static void netvsc_receive(struct hv_device *device,
struct vmpacket_descriptor *packet)
{
struct netvsc_device *net_device;
struct vmtransfer_page_packet_header *vmxferpage_packet;
struct nvsp_message *nvsp_packet;
struct hv_netvsc_packet *netvsc_packet = NULL;
unsigned long start;
unsigned long end, end_virtual;
/* struct netvsc_driver *netvscDriver; */
struct xferpage_packet *xferpage_packet = NULL;
int i, j;
int count = 0, bytes_remain = 0;
unsigned long flags;
struct net_device *ndev;
LIST_HEAD(listHead);
net_device = get_inbound_net_device(device);
if (!net_device)
return;
ndev = net_device->ndev;
/*
* All inbound packets other than send completion should be xfer page
* packet
*/
if (packet->type != VM_PKT_DATA_USING_XFER_PAGES) {
netdev_err(ndev, "Unknown packet type received - %d\n",
packet->type);
return;
}
nvsp_packet = (struct nvsp_message *)((unsigned long)packet +
(packet->offset8 << 3));
/* Make sure this is a valid nvsp packet */
if (nvsp_packet->hdr.msg_type !=
NVSP_MSG1_TYPE_SEND_RNDIS_PKT) {
netdev_err(ndev, "Unknown nvsp packet type received-"
" %d\n", nvsp_packet->hdr.msg_type);
return;
}
vmxferpage_packet = (struct vmtransfer_page_packet_header *)packet;
if (vmxferpage_packet->xfer_pageset_id != NETVSC_RECEIVE_BUFFER_ID) {
netdev_err(ndev, "Invalid xfer page set id - "
"expecting %x got %x\n", NETVSC_RECEIVE_BUFFER_ID,
vmxferpage_packet->xfer_pageset_id);
return;
}
/*
* Grab free packets (range count + 1) to represent this xfer
* page packet. +1 to represent the xfer page packet itself.
* We grab it here so that we know exactly how many we can
* fulfil
*/
spin_lock_irqsave(&net_device->recv_pkt_list_lock, flags);
while (!list_empty(&net_device->recv_pkt_list)) {
list_move_tail(net_device->recv_pkt_list.next, &listHead);
if (++count == vmxferpage_packet->range_cnt + 1)
break;
}
spin_unlock_irqrestore(&net_device->recv_pkt_list_lock, flags);
/*
* We need at least 2 netvsc pkts (1 to represent the xfer
* page and at least 1 for the range) i.e. we can handled
* some of the xfer page packet ranges...
*/
if (count < 2) {
netdev_err(ndev, "Got only %d netvsc pkt...needed "
"%d pkts. Dropping this xfer page packet completely!\n",
count, vmxferpage_packet->range_cnt + 1);
/* Return it to the freelist */
spin_lock_irqsave(&net_device->recv_pkt_list_lock, flags);
for (i = count; i != 0; i--) {
list_move_tail(listHead.next,
&net_device->recv_pkt_list);
}
spin_unlock_irqrestore(&net_device->recv_pkt_list_lock,
flags);
netvsc_send_recv_completion(device,
vmxferpage_packet->d.trans_id);
return;
}
/* Remove the 1st packet to represent the xfer page packet itself */
xferpage_packet = (struct xferpage_packet *)listHead.next;
list_del(&xferpage_packet->list_ent);
/* This is how much we can satisfy */
xferpage_packet->count = count - 1;
if (xferpage_packet->count != vmxferpage_packet->range_cnt) {
netdev_err(ndev, "Needed %d netvsc pkts to satisfy "
"this xfer page...got %d\n",
vmxferpage_packet->range_cnt, xferpage_packet->count);
}
/* Each range represents 1 RNDIS pkt that contains 1 ethernet frame */
for (i = 0; i < (count - 1); i++) {
netvsc_packet = (struct hv_netvsc_packet *)listHead.next;
list_del(&netvsc_packet->list_ent);
/* Initialize the netvsc packet */
netvsc_packet->xfer_page_pkt = xferpage_packet;
netvsc_packet->completion.recv.recv_completion =
netvsc_receive_completion;
netvsc_packet->completion.recv.recv_completion_ctx =
netvsc_packet;
netvsc_packet->device = device;
/* Save this so that we can send it back */
netvsc_packet->completion.recv.recv_completion_tid =
vmxferpage_packet->d.trans_id;
netvsc_packet->total_data_buflen =
vmxferpage_packet->ranges[i].byte_count;
netvsc_packet->page_buf_cnt = 1;
netvsc_packet->page_buf[0].len =
vmxferpage_packet->ranges[i].byte_count;
start = virt_to_phys((void *)((unsigned long)net_device->
recv_buf + vmxferpage_packet->ranges[i].byte_offset));
netvsc_packet->page_buf[0].pfn = start >> PAGE_SHIFT;
end_virtual = (unsigned long)net_device->recv_buf
+ vmxferpage_packet->ranges[i].byte_offset
+ vmxferpage_packet->ranges[i].byte_count - 1;
end = virt_to_phys((void *)end_virtual);
/* Calculate the page relative offset */
netvsc_packet->page_buf[0].offset =
vmxferpage_packet->ranges[i].byte_offset &
(PAGE_SIZE - 1);
if ((end >> PAGE_SHIFT) != (start >> PAGE_SHIFT)) {
/* Handle frame across multiple pages: */
netvsc_packet->page_buf[0].len =
(netvsc_packet->page_buf[0].pfn <<
PAGE_SHIFT)
+ PAGE_SIZE - start;
bytes_remain = netvsc_packet->total_data_buflen -
netvsc_packet->page_buf[0].len;
for (j = 1; j < NETVSC_PACKET_MAXPAGE; j++) {
netvsc_packet->page_buf[j].offset = 0;
if (bytes_remain <= PAGE_SIZE) {
netvsc_packet->page_buf[j].len =
bytes_remain;
bytes_remain = 0;
} else {
netvsc_packet->page_buf[j].len =
PAGE_SIZE;
bytes_remain -= PAGE_SIZE;
}
netvsc_packet->page_buf[j].pfn =
virt_to_phys((void *)(end_virtual -
bytes_remain)) >> PAGE_SHIFT;
netvsc_packet->page_buf_cnt++;
if (bytes_remain == 0)
break;
}
}
/* Pass it to the upper layer */
rndis_filter_receive(device, netvsc_packet);
netvsc_receive_completion(netvsc_packet->
completion.recv.recv_completion_ctx);
}
}
static void netvsc_channel_cb(void *context)
{
int ret;
struct hv_device *device = context;
struct netvsc_device *net_device;
u32 bytes_recvd;
u64 request_id;
unsigned char *packet;
struct vmpacket_descriptor *desc;
unsigned char *buffer;
int bufferlen = NETVSC_PACKET_SIZE;
struct net_device *ndev;
packet = kzalloc(NETVSC_PACKET_SIZE * sizeof(unsigned char),
GFP_ATOMIC);
if (!packet)
return;
buffer = packet;
net_device = get_inbound_net_device(device);
if (!net_device)
goto out;
ndev = net_device->ndev;
do {
ret = vmbus_recvpacket_raw(device->channel, buffer, bufferlen,
&bytes_recvd, &request_id);
if (ret == 0) {
if (bytes_recvd > 0) {
desc = (struct vmpacket_descriptor *)buffer;
switch (desc->type) {
case VM_PKT_COMP:
netvsc_send_completion(device, desc);
break;
case VM_PKT_DATA_USING_XFER_PAGES:
netvsc_receive(device, desc);
break;
default:
netdev_err(ndev,
"unhandled packet type %d, "
"tid %llx len %d\n",
desc->type, request_id,
bytes_recvd);
break;
}
/* reset */
if (bufferlen > NETVSC_PACKET_SIZE) {
kfree(buffer);
buffer = packet;
bufferlen = NETVSC_PACKET_SIZE;
}
} else {
/* reset */
if (bufferlen > NETVSC_PACKET_SIZE) {
kfree(buffer);
buffer = packet;
bufferlen = NETVSC_PACKET_SIZE;
}
break;
}
} else if (ret == -ENOBUFS) {
/* Handle large packet */
buffer = kmalloc(bytes_recvd, GFP_ATOMIC);
if (buffer == NULL) {
/* Try again next time around */
netdev_err(ndev,
"unable to allocate buffer of size "
"(%d)!!\n", bytes_recvd);
break;
}
bufferlen = bytes_recvd;
}
} while (1);
out:
kfree(buffer);
return;
}
/*
* netvsc_device_add - Callback when the device belonging to this
* driver is added
*/
int netvsc_device_add(struct hv_device *device, void *additional_info)
{
int ret = 0;
int i;
int ring_size =
((struct netvsc_device_info *)additional_info)->ring_size;
struct netvsc_device *net_device;
struct hv_netvsc_packet *packet, *pos;
struct net_device *ndev;
net_device = alloc_net_device(device);
if (!net_device) {
ret = -ENOMEM;
goto cleanup;
}
/*
* Coming into this function, struct net_device * is
* registered as the driver private data.
* In alloc_net_device(), we register struct netvsc_device *
* as the driver private data and stash away struct net_device *
* in struct netvsc_device *.
*/
ndev = net_device->ndev;
/* Initialize the NetVSC channel extension */
net_device->recv_buf_size = NETVSC_RECEIVE_BUFFER_SIZE;
spin_lock_init(&net_device->recv_pkt_list_lock);
INIT_LIST_HEAD(&net_device->recv_pkt_list);
for (i = 0; i < NETVSC_RECEIVE_PACKETLIST_COUNT; i++) {
packet = kzalloc(sizeof(struct hv_netvsc_packet) +
(NETVSC_RECEIVE_SG_COUNT *
sizeof(struct hv_page_buffer)), GFP_KERNEL);
if (!packet)
break;
list_add_tail(&packet->list_ent,
&net_device->recv_pkt_list);
}
init_completion(&net_device->channel_init_wait);
/* Open the channel */
ret = vmbus_open(device->channel, ring_size * PAGE_SIZE,
ring_size * PAGE_SIZE, NULL, 0,
netvsc_channel_cb, device);
if (ret != 0) {
netdev_err(ndev, "unable to open channel: %d\n", ret);
goto cleanup;
}
/* Channel is opened */
pr_info("hv_netvsc channel opened successfully\n");
/* Connect with the NetVsp */
ret = netvsc_connect_vsp(device);
if (ret != 0) {
netdev_err(ndev,
"unable to connect to NetVSP - %d\n", ret);
goto close;
}
return ret;
close:
/* Now, we can close the channel safely */
vmbus_close(device->channel);
cleanup:
if (net_device) {
list_for_each_entry_safe(packet, pos,
&net_device->recv_pkt_list,
list_ent) {
list_del(&packet->list_ent);
kfree(packet);
}
kfree(net_device);
}
return ret;
}

View File

@@ -0,0 +1,456 @@
/*
* Copyright (c) 2009, Microsoft Corporation.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License along with
* this program; if not, write to the Free Software Foundation, Inc., 59 Temple
* Place - Suite 330, Boston, MA 02111-1307 USA.
*
* Authors:
* Haiyang Zhang <haiyangz@microsoft.com>
* Hank Janssen <hjanssen@microsoft.com>
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/init.h>
#include <linux/atomic.h>
#include <linux/module.h>
#include <linux/highmem.h>
#include <linux/device.h>
#include <linux/io.h>
#include <linux/delay.h>
#include <linux/netdevice.h>
#include <linux/inetdevice.h>
#include <linux/etherdevice.h>
#include <linux/skbuff.h>
#include <linux/in.h>
#include <linux/slab.h>
#include <net/arp.h>
#include <net/route.h>
#include <net/sock.h>
#include <net/pkt_sched.h>
#include "hyperv_net.h"
struct net_device_context {
/* point back to our device context */
struct hv_device *device_ctx;
atomic_t avail;
struct delayed_work dwork;
};
#define PACKET_PAGES_LOWATER 8
/* Need this many pages to handle worst case fragmented packet */
#define PACKET_PAGES_HIWATER (MAX_SKB_FRAGS + 2)
static int ring_size = 128;
module_param(ring_size, int, S_IRUGO);
MODULE_PARM_DESC(ring_size, "Ring buffer size (# of pages)");
/* no-op so the netdev core doesn't return -EINVAL when modifying the the
* multicast address list in SIOCADDMULTI. hv is setup to get all multicast
* when it calls RndisFilterOnOpen() */
static void netvsc_set_multicast_list(struct net_device *net)
{
}
static int netvsc_open(struct net_device *net)
{
struct net_device_context *net_device_ctx = netdev_priv(net);
struct hv_device *device_obj = net_device_ctx->device_ctx;
int ret = 0;
/* Open up the device */
ret = rndis_filter_open(device_obj);
if (ret != 0) {
netdev_err(net, "unable to open device (ret %d).\n", ret);
return ret;
}
netif_start_queue(net);
return ret;
}
static int netvsc_close(struct net_device *net)
{
struct net_device_context *net_device_ctx = netdev_priv(net);
struct hv_device *device_obj = net_device_ctx->device_ctx;
int ret;
netif_stop_queue(net);
ret = rndis_filter_close(device_obj);
if (ret != 0)
netdev_err(net, "unable to close device (ret %d).\n", ret);
return ret;
}
static void netvsc_xmit_completion(void *context)
{
struct hv_netvsc_packet *packet = (struct hv_netvsc_packet *)context;
struct sk_buff *skb = (struct sk_buff *)
(unsigned long)packet->completion.send.send_completion_tid;
kfree(packet);
if (skb) {
struct net_device *net = skb->dev;
struct net_device_context *net_device_ctx = netdev_priv(net);
unsigned int num_pages = skb_shinfo(skb)->nr_frags + 2;
dev_kfree_skb_any(skb);
atomic_add(num_pages, &net_device_ctx->avail);
if (atomic_read(&net_device_ctx->avail) >=
PACKET_PAGES_HIWATER)
netif_wake_queue(net);
}
}
static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net)
{
struct net_device_context *net_device_ctx = netdev_priv(net);
struct hv_netvsc_packet *packet;
int ret;
unsigned int i, num_pages;
/* Add 1 for skb->data and additional one for RNDIS */
num_pages = skb_shinfo(skb)->nr_frags + 1 + 1;
if (num_pages > atomic_read(&net_device_ctx->avail))
return NETDEV_TX_BUSY;
/* Allocate a netvsc packet based on # of frags. */
packet = kzalloc(sizeof(struct hv_netvsc_packet) +
(num_pages * sizeof(struct hv_page_buffer)) +
sizeof(struct rndis_filter_packet), GFP_ATOMIC);
if (!packet) {
/* out of memory, drop packet */
netdev_err(net, "unable to allocate hv_netvsc_packet\n");
dev_kfree_skb(skb);
net->stats.tx_dropped++;
return NETDEV_TX_BUSY;
}
packet->extension = (void *)(unsigned long)packet +
sizeof(struct hv_netvsc_packet) +
(num_pages * sizeof(struct hv_page_buffer));
/* Setup the rndis header */
packet->page_buf_cnt = num_pages;
/* Initialize it from the skb */
packet->total_data_buflen = skb->len;
/* Start filling in the page buffers starting after RNDIS buffer. */
packet->page_buf[1].pfn = virt_to_phys(skb->data) >> PAGE_SHIFT;
packet->page_buf[1].offset
= (unsigned long)skb->data & (PAGE_SIZE - 1);
packet->page_buf[1].len = skb_headlen(skb);
/* Additional fragments are after SKB data */
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
const skb_frag_t *f = &skb_shinfo(skb)->frags[i];
packet->page_buf[i+2].pfn = page_to_pfn(skb_frag_page(f));
packet->page_buf[i+2].offset = f->page_offset;
packet->page_buf[i+2].len = skb_frag_size(f);
}
/* Set the completion routine */
packet->completion.send.send_completion = netvsc_xmit_completion;
packet->completion.send.send_completion_ctx = packet;
packet->completion.send.send_completion_tid = (unsigned long)skb;
ret = rndis_filter_send(net_device_ctx->device_ctx,
packet);
if (ret == 0) {
net->stats.tx_bytes += skb->len;
net->stats.tx_packets++;
atomic_sub(num_pages, &net_device_ctx->avail);
if (atomic_read(&net_device_ctx->avail) < PACKET_PAGES_LOWATER)
netif_stop_queue(net);
} else {
/* we are shutting down or bus overloaded, just drop packet */
net->stats.tx_dropped++;
kfree(packet);
dev_kfree_skb_any(skb);
}
return ret ? NETDEV_TX_BUSY : NETDEV_TX_OK;
}
/*
* netvsc_linkstatus_callback - Link up/down notification
*/
void netvsc_linkstatus_callback(struct hv_device *device_obj,
unsigned int status)
{
struct net_device *net;
struct net_device_context *ndev_ctx;
struct netvsc_device *net_device;
net_device = hv_get_drvdata(device_obj);
net = net_device->ndev;
if (!net) {
netdev_err(net, "got link status but net device "
"not initialized yet\n");
return;
}
if (status == 1) {
netif_carrier_on(net);
netif_wake_queue(net);
ndev_ctx = netdev_priv(net);
schedule_delayed_work(&ndev_ctx->dwork, 0);
schedule_delayed_work(&ndev_ctx->dwork, msecs_to_jiffies(20));
} else {
netif_carrier_off(net);
netif_stop_queue(net);
}
}
/*
* netvsc_recv_callback - Callback when we receive a packet from the
* "wire" on the specified device.
*/
int netvsc_recv_callback(struct hv_device *device_obj,
struct hv_netvsc_packet *packet)
{
struct net_device *net = dev_get_drvdata(&device_obj->device);
struct sk_buff *skb;
void *data;
int i;
unsigned long flags;
struct netvsc_device *net_device;
net_device = hv_get_drvdata(device_obj);
net = net_device->ndev;
if (!net) {
netdev_err(net, "got receive callback but net device"
" not initialized yet\n");
return 0;
}
/* Allocate a skb - TODO direct I/O to pages? */
skb = netdev_alloc_skb_ip_align(net, packet->total_data_buflen);
if (unlikely(!skb)) {
++net->stats.rx_dropped;
return 0;
}
/* for kmap_atomic */
local_irq_save(flags);
/*
* Copy to skb. This copy is needed here since the memory pointed by
* hv_netvsc_packet cannot be deallocated
*/
for (i = 0; i < packet->page_buf_cnt; i++) {
data = kmap_atomic(pfn_to_page(packet->page_buf[i].pfn),
KM_IRQ1);
data = (void *)(unsigned long)data +
packet->page_buf[i].offset;
memcpy(skb_put(skb, packet->page_buf[i].len), data,
packet->page_buf[i].len);
kunmap_atomic((void *)((unsigned long)data -
packet->page_buf[i].offset), KM_IRQ1);
}
local_irq_restore(flags);
skb->protocol = eth_type_trans(skb, net);
skb->ip_summed = CHECKSUM_NONE;
net->stats.rx_packets++;
net->stats.rx_bytes += skb->len;
/*
* Pass the skb back up. Network stack will deallocate the skb when it
* is done.
* TODO - use NAPI?
*/
netif_rx(skb);
return 0;
}
static void netvsc_get_drvinfo(struct net_device *net,
struct ethtool_drvinfo *info)
{
strcpy(info->driver, "hv_netvsc");
strcpy(info->version, HV_DRV_VERSION);
strcpy(info->fw_version, "N/A");
}
static const struct ethtool_ops ethtool_ops = {
.get_drvinfo = netvsc_get_drvinfo,
.get_link = ethtool_op_get_link,
};
static const struct net_device_ops device_ops = {
.ndo_open = netvsc_open,
.ndo_stop = netvsc_close,
.ndo_start_xmit = netvsc_start_xmit,
.ndo_set_rx_mode = netvsc_set_multicast_list,
.ndo_change_mtu = eth_change_mtu,
.ndo_validate_addr = eth_validate_addr,
.ndo_set_mac_address = eth_mac_addr,
};
/*
* Send GARP packet to network peers after migrations.
* After Quick Migration, the network is not immediately operational in the
* current context when receiving RNDIS_STATUS_MEDIA_CONNECT event. So, add
* another netif_notify_peers() into a delayed work, otherwise GARP packet
* will not be sent after quick migration, and cause network disconnection.
*/
static void netvsc_send_garp(struct work_struct *w)
{
struct net_device_context *ndev_ctx;
struct net_device *net;
struct netvsc_device *net_device;
ndev_ctx = container_of(w, struct net_device_context, dwork.work);
net_device = hv_get_drvdata(ndev_ctx->device_ctx);
net = net_device->ndev;
netif_notify_peers(net);
}
static int netvsc_probe(struct hv_device *dev,
const struct hv_vmbus_device_id *dev_id)
{
struct net_device *net = NULL;
struct net_device_context *net_device_ctx;
struct netvsc_device_info device_info;
int ret;
net = alloc_etherdev(sizeof(struct net_device_context));
if (!net)
return -ENOMEM;
/* Set initial state */
netif_carrier_off(net);
net_device_ctx = netdev_priv(net);
net_device_ctx->device_ctx = dev;
atomic_set(&net_device_ctx->avail, ring_size);
hv_set_drvdata(dev, net);
INIT_DELAYED_WORK(&net_device_ctx->dwork, netvsc_send_garp);
net->netdev_ops = &device_ops;
/* TODO: Add GSO and Checksum offload */
net->hw_features = NETIF_F_SG;
net->features = NETIF_F_SG;
SET_ETHTOOL_OPS(net, &ethtool_ops);
SET_NETDEV_DEV(net, &dev->device);
ret = register_netdev(net);
if (ret != 0) {
pr_err("Unable to register netdev.\n");
free_netdev(net);
goto out;
}
/* Notify the netvsc driver of the new device */
device_info.ring_size = ring_size;
ret = rndis_filter_device_add(dev, &device_info);
if (ret != 0) {
netdev_err(net, "unable to add netvsc device (ret %d)\n", ret);
unregister_netdev(net);
free_netdev(net);
hv_set_drvdata(dev, NULL);
return ret;
}
memcpy(net->dev_addr, device_info.mac_adr, ETH_ALEN);
netif_carrier_on(net);
out:
return ret;
}
static int netvsc_remove(struct hv_device *dev)
{
struct net_device *net;
struct net_device_context *ndev_ctx;
struct netvsc_device *net_device;
net_device = hv_get_drvdata(dev);
net = net_device->ndev;
if (net == NULL) {
dev_err(&dev->device, "No net device to remove\n");
return 0;
}
ndev_ctx = netdev_priv(net);
cancel_delayed_work_sync(&ndev_ctx->dwork);
/* Stop outbound asap */
netif_stop_queue(net);
unregister_netdev(net);
/*
* Call to the vsc driver to let it know that the device is being
* removed
*/
rndis_filter_device_remove(dev);
free_netdev(net);
return 0;
}
static const struct hv_vmbus_device_id id_table[] = {
/* Network guid */
{ VMBUS_DEVICE(0x63, 0x51, 0x61, 0xF8, 0x3E, 0xDF, 0xc5, 0x46,
0x91, 0x3F, 0xF2, 0xD2, 0xF9, 0x65, 0xED, 0x0E) },
{ },
};
MODULE_DEVICE_TABLE(vmbus, id_table);
/* The one and only one */
static struct hv_driver netvsc_drv = {
.name = "netvsc",
.id_table = id_table,
.probe = netvsc_probe,
.remove = netvsc_remove,
};
static void __exit netvsc_drv_exit(void)
{
vmbus_driver_unregister(&netvsc_drv);
}
static int __init netvsc_drv_init(void)
{
return vmbus_driver_register(&netvsc_drv);
}
MODULE_LICENSE("GPL");
MODULE_VERSION(HV_DRV_VERSION);
MODULE_DESCRIPTION("Microsoft Hyper-V network driver");
module_init(netvsc_drv_init);
module_exit(netvsc_drv_exit);

View File

@@ -0,0 +1,855 @@
/*
* Copyright (c) 2009, Microsoft Corporation.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License along with
* this program; if not, write to the Free Software Foundation, Inc., 59 Temple
* Place - Suite 330, Boston, MA 02111-1307 USA.
*
* Authors:
* Haiyang Zhang <haiyangz@microsoft.com>
* Hank Janssen <hjanssen@microsoft.com>
*/
#include <linux/kernel.h>
#include <linux/sched.h>
#include <linux/wait.h>
#include <linux/highmem.h>
#include <linux/slab.h>
#include <linux/io.h>
#include <linux/if_ether.h>
#include <linux/netdevice.h>
#include "hyperv_net.h"
enum rndis_device_state {
RNDIS_DEV_UNINITIALIZED = 0,
RNDIS_DEV_INITIALIZING,
RNDIS_DEV_INITIALIZED,
RNDIS_DEV_DATAINITIALIZED,
};
struct rndis_device {
struct netvsc_device *net_dev;
enum rndis_device_state state;
bool link_state;
atomic_t new_req_id;
spinlock_t request_lock;
struct list_head req_list;
unsigned char hw_mac_adr[ETH_ALEN];
};
struct rndis_request {
struct list_head list_ent;
struct completion wait_event;
/*
* FIXME: We assumed a fixed size response here. If we do ever need to
* handle a bigger response, we can either define a max response
* message or add a response buffer variable above this field
*/
struct rndis_message response_msg;
/* Simplify allocation by having a netvsc packet inline */
struct hv_netvsc_packet pkt;
struct hv_page_buffer buf;
/* FIXME: We assumed a fixed size request here. */
struct rndis_message request_msg;
};
static void rndis_filter_send_completion(void *ctx);
static void rndis_filter_send_request_completion(void *ctx);
static struct rndis_device *get_rndis_device(void)
{
struct rndis_device *device;
device = kzalloc(sizeof(struct rndis_device), GFP_KERNEL);
if (!device)
return NULL;
spin_lock_init(&device->request_lock);
INIT_LIST_HEAD(&device->req_list);
device->state = RNDIS_DEV_UNINITIALIZED;
return device;
}
static struct rndis_request *get_rndis_request(struct rndis_device *dev,
u32 msg_type,
u32 msg_len)
{
struct rndis_request *request;
struct rndis_message *rndis_msg;
struct rndis_set_request *set;
unsigned long flags;
request = kzalloc(sizeof(struct rndis_request), GFP_KERNEL);
if (!request)
return NULL;
init_completion(&request->wait_event);
rndis_msg = &request->request_msg;
rndis_msg->ndis_msg_type = msg_type;
rndis_msg->msg_len = msg_len;
/*
* Set the request id. This field is always after the rndis header for
* request/response packet types so we just used the SetRequest as a
* template
*/
set = &rndis_msg->msg.set_req;
set->req_id = atomic_inc_return(&dev->new_req_id);
/* Add to the request list */
spin_lock_irqsave(&dev->request_lock, flags);
list_add_tail(&request->list_ent, &dev->req_list);
spin_unlock_irqrestore(&dev->request_lock, flags);
return request;
}
static void put_rndis_request(struct rndis_device *dev,
struct rndis_request *req)
{
unsigned long flags;
spin_lock_irqsave(&dev->request_lock, flags);
list_del(&req->list_ent);
spin_unlock_irqrestore(&dev->request_lock, flags);
kfree(req);
}
static void dump_rndis_message(struct hv_device *hv_dev,
struct rndis_message *rndis_msg)
{
struct net_device *netdev;
struct netvsc_device *net_device;
net_device = hv_get_drvdata(hv_dev);
netdev = net_device->ndev;
switch (rndis_msg->ndis_msg_type) {
case REMOTE_NDIS_PACKET_MSG:
netdev_dbg(netdev, "REMOTE_NDIS_PACKET_MSG (len %u, "
"data offset %u data len %u, # oob %u, "
"oob offset %u, oob len %u, pkt offset %u, "
"pkt len %u\n",
rndis_msg->msg_len,
rndis_msg->msg.pkt.data_offset,
rndis_msg->msg.pkt.data_len,
rndis_msg->msg.pkt.num_oob_data_elements,
rndis_msg->msg.pkt.oob_data_offset,
rndis_msg->msg.pkt.oob_data_len,
rndis_msg->msg.pkt.per_pkt_info_offset,
rndis_msg->msg.pkt.per_pkt_info_len);
break;
case REMOTE_NDIS_INITIALIZE_CMPLT:
netdev_dbg(netdev, "REMOTE_NDIS_INITIALIZE_CMPLT "
"(len %u, id 0x%x, status 0x%x, major %d, minor %d, "
"device flags %d, max xfer size 0x%x, max pkts %u, "
"pkt aligned %u)\n",
rndis_msg->msg_len,
rndis_msg->msg.init_complete.req_id,
rndis_msg->msg.init_complete.status,
rndis_msg->msg.init_complete.major_ver,
rndis_msg->msg.init_complete.minor_ver,
rndis_msg->msg.init_complete.dev_flags,
rndis_msg->msg.init_complete.max_xfer_size,
rndis_msg->msg.init_complete.
max_pkt_per_msg,
rndis_msg->msg.init_complete.
pkt_alignment_factor);
break;
case REMOTE_NDIS_QUERY_CMPLT:
netdev_dbg(netdev, "REMOTE_NDIS_QUERY_CMPLT "
"(len %u, id 0x%x, status 0x%x, buf len %u, "
"buf offset %u)\n",
rndis_msg->msg_len,
rndis_msg->msg.query_complete.req_id,
rndis_msg->msg.query_complete.status,
rndis_msg->msg.query_complete.
info_buflen,
rndis_msg->msg.query_complete.
info_buf_offset);
break;
case REMOTE_NDIS_SET_CMPLT:
netdev_dbg(netdev,
"REMOTE_NDIS_SET_CMPLT (len %u, id 0x%x, status 0x%x)\n",
rndis_msg->msg_len,
rndis_msg->msg.set_complete.req_id,
rndis_msg->msg.set_complete.status);
break;
case REMOTE_NDIS_INDICATE_STATUS_MSG:
netdev_dbg(netdev, "REMOTE_NDIS_INDICATE_STATUS_MSG "
"(len %u, status 0x%x, buf len %u, buf offset %u)\n",
rndis_msg->msg_len,
rndis_msg->msg.indicate_status.status,
rndis_msg->msg.indicate_status.status_buflen,
rndis_msg->msg.indicate_status.status_buf_offset);
break;
default:
netdev_dbg(netdev, "0x%x (len %u)\n",
rndis_msg->ndis_msg_type,
rndis_msg->msg_len);
break;
}
}
static int rndis_filter_send_request(struct rndis_device *dev,
struct rndis_request *req)
{
int ret;
struct hv_netvsc_packet *packet;
/* Setup the packet to send it */
packet = &req->pkt;
packet->is_data_pkt = false;
packet->total_data_buflen = req->request_msg.msg_len;
packet->page_buf_cnt = 1;
packet->page_buf[0].pfn = virt_to_phys(&req->request_msg) >>
PAGE_SHIFT;
packet->page_buf[0].len = req->request_msg.msg_len;
packet->page_buf[0].offset =
(unsigned long)&req->request_msg & (PAGE_SIZE - 1);
packet->completion.send.send_completion_ctx = req;/* packet; */
packet->completion.send.send_completion =
rndis_filter_send_request_completion;
packet->completion.send.send_completion_tid = (unsigned long)dev;
ret = netvsc_send(dev->net_dev->dev, packet);
return ret;
}
static void rndis_filter_receive_response(struct rndis_device *dev,
struct rndis_message *resp)
{
struct rndis_request *request = NULL;
bool found = false;
unsigned long flags;
struct net_device *ndev;
ndev = dev->net_dev->ndev;
spin_lock_irqsave(&dev->request_lock, flags);
list_for_each_entry(request, &dev->req_list, list_ent) {
/*
* All request/response message contains RequestId as the 1st
* field
*/
if (request->request_msg.msg.init_req.req_id
== resp->msg.init_complete.req_id) {
found = true;
break;
}
}
spin_unlock_irqrestore(&dev->request_lock, flags);
if (found) {
if (resp->msg_len <= sizeof(struct rndis_message)) {
memcpy(&request->response_msg, resp,
resp->msg_len);
} else {
netdev_err(ndev,
"rndis response buffer overflow "
"detected (size %u max %zu)\n",
resp->msg_len,
sizeof(struct rndis_filter_packet));
if (resp->ndis_msg_type ==
REMOTE_NDIS_RESET_CMPLT) {
/* does not have a request id field */
request->response_msg.msg.reset_complete.
status = STATUS_BUFFER_OVERFLOW;
} else {
request->response_msg.msg.
init_complete.status =
STATUS_BUFFER_OVERFLOW;
}
}
complete(&request->wait_event);
} else {
netdev_err(ndev,
"no rndis request found for this response "
"(id 0x%x res type 0x%x)\n",
resp->msg.init_complete.req_id,
resp->ndis_msg_type);
}
}
static void rndis_filter_receive_indicate_status(struct rndis_device *dev,
struct rndis_message *resp)
{
struct rndis_indicate_status *indicate =
&resp->msg.indicate_status;
if (indicate->status == RNDIS_STATUS_MEDIA_CONNECT) {
netvsc_linkstatus_callback(
dev->net_dev->dev, 1);
} else if (indicate->status == RNDIS_STATUS_MEDIA_DISCONNECT) {
netvsc_linkstatus_callback(
dev->net_dev->dev, 0);
} else {
/*
* TODO:
*/
}
}
static void rndis_filter_receive_data(struct rndis_device *dev,
struct rndis_message *msg,
struct hv_netvsc_packet *pkt)
{
struct rndis_packet *rndis_pkt;
u32 data_offset;
int i;
rndis_pkt = &msg->msg.pkt;
/*
* FIXME: Handle multiple rndis pkt msgs that maybe enclosed in this
* netvsc packet (ie TotalDataBufferLength != MessageLength)
*/
/* Remove the rndis header and pass it back up the stack */
data_offset = RNDIS_HEADER_SIZE + rndis_pkt->data_offset;
pkt->total_data_buflen -= data_offset;
pkt->page_buf[0].offset += data_offset;
pkt->page_buf[0].len -= data_offset;
/* Drop the 0th page, if rndis data go beyond page boundary */
if (pkt->page_buf[0].offset >= PAGE_SIZE) {
pkt->page_buf[1].offset = pkt->page_buf[0].offset - PAGE_SIZE;
pkt->page_buf[1].len -= pkt->page_buf[1].offset;
pkt->page_buf_cnt--;
for (i = 0; i < pkt->page_buf_cnt; i++)
pkt->page_buf[i] = pkt->page_buf[i+1];
}
pkt->is_data_pkt = true;
netvsc_recv_callback(dev->net_dev->dev, pkt);
}
int rndis_filter_receive(struct hv_device *dev,
struct hv_netvsc_packet *pkt)
{
struct netvsc_device *net_dev = hv_get_drvdata(dev);
struct rndis_device *rndis_dev;
struct rndis_message rndis_msg;
struct rndis_message *rndis_hdr;
struct net_device *ndev;
if (!net_dev)
return -EINVAL;
ndev = net_dev->ndev;
/* Make sure the rndis device state is initialized */
if (!net_dev->extension) {
netdev_err(ndev, "got rndis message but no rndis device - "
"dropping this message!\n");
return -ENODEV;
}
rndis_dev = (struct rndis_device *)net_dev->extension;
if (rndis_dev->state == RNDIS_DEV_UNINITIALIZED) {
netdev_err(ndev, "got rndis message but rndis device "
"uninitialized...dropping this message!\n");
return -ENODEV;
}
rndis_hdr = (struct rndis_message *)kmap_atomic(
pfn_to_page(pkt->page_buf[0].pfn), KM_IRQ0);
rndis_hdr = (void *)((unsigned long)rndis_hdr +
pkt->page_buf[0].offset);
/* Make sure we got a valid rndis message */
if ((rndis_hdr->ndis_msg_type != REMOTE_NDIS_PACKET_MSG) &&
(rndis_hdr->msg_len > sizeof(struct rndis_message))) {
netdev_err(ndev, "incoming rndis message buffer overflow "
"detected (got %u, max %zu)..marking it an error!\n",
rndis_hdr->msg_len,
sizeof(struct rndis_message));
}
memcpy(&rndis_msg, rndis_hdr,
(rndis_hdr->msg_len > sizeof(struct rndis_message)) ?
sizeof(struct rndis_message) :
rndis_hdr->msg_len);
kunmap_atomic(rndis_hdr - pkt->page_buf[0].offset, KM_IRQ0);
dump_rndis_message(dev, &rndis_msg);
switch (rndis_msg.ndis_msg_type) {
case REMOTE_NDIS_PACKET_MSG:
/* data msg */
rndis_filter_receive_data(rndis_dev, &rndis_msg, pkt);
break;
case REMOTE_NDIS_INITIALIZE_CMPLT:
case REMOTE_NDIS_QUERY_CMPLT:
case REMOTE_NDIS_SET_CMPLT:
/* completion msgs */
rndis_filter_receive_response(rndis_dev, &rndis_msg);
break;
case REMOTE_NDIS_INDICATE_STATUS_MSG:
/* notification msgs */
rndis_filter_receive_indicate_status(rndis_dev, &rndis_msg);
break;
default:
netdev_err(ndev,
"unhandled rndis message (type %u len %u)\n",
rndis_msg.ndis_msg_type,
rndis_msg.msg_len);
break;
}
return 0;
}
static int rndis_filter_query_device(struct rndis_device *dev, u32 oid,
void *result, u32 *result_size)
{
struct rndis_request *request;
u32 inresult_size = *result_size;
struct rndis_query_request *query;
struct rndis_query_complete *query_complete;
int ret = 0;
int t;
if (!result)
return -EINVAL;
*result_size = 0;
request = get_rndis_request(dev, REMOTE_NDIS_QUERY_MSG,
RNDIS_MESSAGE_SIZE(struct rndis_query_request));
if (!request) {
ret = -ENOMEM;
goto cleanup;
}
/* Setup the rndis query */
query = &request->request_msg.msg.query_req;
query->oid = oid;
query->info_buf_offset = sizeof(struct rndis_query_request);
query->info_buflen = 0;
query->dev_vc_handle = 0;
ret = rndis_filter_send_request(dev, request);
if (ret != 0)
goto cleanup;
t = wait_for_completion_timeout(&request->wait_event, 5*HZ);
if (t == 0) {
ret = -ETIMEDOUT;
goto cleanup;
}
/* Copy the response back */
query_complete = &request->response_msg.msg.query_complete;
if (query_complete->info_buflen > inresult_size) {
ret = -1;
goto cleanup;
}
memcpy(result,
(void *)((unsigned long)query_complete +
query_complete->info_buf_offset),
query_complete->info_buflen);
*result_size = query_complete->info_buflen;
cleanup:
if (request)
put_rndis_request(dev, request);
return ret;
}
static int rndis_filter_query_device_mac(struct rndis_device *dev)
{
u32 size = ETH_ALEN;
return rndis_filter_query_device(dev,
RNDIS_OID_802_3_PERMANENT_ADDRESS,
dev->hw_mac_adr, &size);
}
static int rndis_filter_query_device_link_status(struct rndis_device *dev)
{
u32 size = sizeof(u32);
u32 link_status;
int ret;
ret = rndis_filter_query_device(dev,
RNDIS_OID_GEN_MEDIA_CONNECT_STATUS,
&link_status, &size);
dev->link_state = (link_status != 0) ? true : false;
return ret;
}
static int rndis_filter_set_packet_filter(struct rndis_device *dev,
u32 new_filter)
{
struct rndis_request *request;
struct rndis_set_request *set;
struct rndis_set_complete *set_complete;
u32 status;
int ret, t;
struct net_device *ndev;
ndev = dev->net_dev->ndev;
request = get_rndis_request(dev, REMOTE_NDIS_SET_MSG,
RNDIS_MESSAGE_SIZE(struct rndis_set_request) +
sizeof(u32));
if (!request) {
ret = -ENOMEM;
goto cleanup;
}
/* Setup the rndis set */
set = &request->request_msg.msg.set_req;
set->oid = RNDIS_OID_GEN_CURRENT_PACKET_FILTER;
set->info_buflen = sizeof(u32);
set->info_buf_offset = sizeof(struct rndis_set_request);
memcpy((void *)(unsigned long)set + sizeof(struct rndis_set_request),
&new_filter, sizeof(u32));
ret = rndis_filter_send_request(dev, request);
if (ret != 0)
goto cleanup;
t = wait_for_completion_timeout(&request->wait_event, 5*HZ);
if (t == 0) {
netdev_err(ndev,
"timeout before we got a set response...\n");
/*
* We can't deallocate the request since we may still receive a
* send completion for it.
*/
goto exit;
} else {
set_complete = &request->response_msg.msg.set_complete;
status = set_complete->status;
}
cleanup:
if (request)
put_rndis_request(dev, request);
exit:
return ret;
}
static int rndis_filter_init_device(struct rndis_device *dev)
{
struct rndis_request *request;
struct rndis_initialize_request *init;
struct rndis_initialize_complete *init_complete;
u32 status;
int ret, t;
request = get_rndis_request(dev, REMOTE_NDIS_INITIALIZE_MSG,
RNDIS_MESSAGE_SIZE(struct rndis_initialize_request));
if (!request) {
ret = -ENOMEM;
goto cleanup;
}
/* Setup the rndis set */
init = &request->request_msg.msg.init_req;
init->major_ver = RNDIS_MAJOR_VERSION;
init->minor_ver = RNDIS_MINOR_VERSION;
/* FIXME: Use 1536 - rounded ethernet frame size */
init->max_xfer_size = 2048;
dev->state = RNDIS_DEV_INITIALIZING;
ret = rndis_filter_send_request(dev, request);
if (ret != 0) {
dev->state = RNDIS_DEV_UNINITIALIZED;
goto cleanup;
}
t = wait_for_completion_timeout(&request->wait_event, 5*HZ);
if (t == 0) {
ret = -ETIMEDOUT;
goto cleanup;
}
init_complete = &request->response_msg.msg.init_complete;
status = init_complete->status;
if (status == RNDIS_STATUS_SUCCESS) {
dev->state = RNDIS_DEV_INITIALIZED;
ret = 0;
} else {
dev->state = RNDIS_DEV_UNINITIALIZED;
ret = -EINVAL;
}
cleanup:
if (request)
put_rndis_request(dev, request);
return ret;
}
static void rndis_filter_halt_device(struct rndis_device *dev)
{
struct rndis_request *request;
struct rndis_halt_request *halt;
/* Attempt to do a rndis device halt */
request = get_rndis_request(dev, REMOTE_NDIS_HALT_MSG,
RNDIS_MESSAGE_SIZE(struct rndis_halt_request));
if (!request)
goto cleanup;
/* Setup the rndis set */
halt = &request->request_msg.msg.halt_req;
halt->req_id = atomic_inc_return(&dev->new_req_id);
/* Ignore return since this msg is optional. */
rndis_filter_send_request(dev, request);
dev->state = RNDIS_DEV_UNINITIALIZED;
cleanup:
if (request)
put_rndis_request(dev, request);
return;
}
static int rndis_filter_open_device(struct rndis_device *dev)
{
int ret;
if (dev->state != RNDIS_DEV_INITIALIZED)
return 0;
ret = rndis_filter_set_packet_filter(dev,
NDIS_PACKET_TYPE_BROADCAST |
NDIS_PACKET_TYPE_ALL_MULTICAST |
NDIS_PACKET_TYPE_DIRECTED);
if (ret == 0)
dev->state = RNDIS_DEV_DATAINITIALIZED;
return ret;
}
static int rndis_filter_close_device(struct rndis_device *dev)
{
int ret;
if (dev->state != RNDIS_DEV_DATAINITIALIZED)
return 0;
ret = rndis_filter_set_packet_filter(dev, 0);
if (ret == 0)
dev->state = RNDIS_DEV_INITIALIZED;
return ret;
}
int rndis_filter_device_add(struct hv_device *dev,
void *additional_info)
{
int ret;
struct netvsc_device *net_device;
struct rndis_device *rndis_device;
struct netvsc_device_info *device_info = additional_info;
rndis_device = get_rndis_device();
if (!rndis_device)
return -ENODEV;
/*
* Let the inner driver handle this first to create the netvsc channel
* NOTE! Once the channel is created, we may get a receive callback
* (RndisFilterOnReceive()) before this call is completed
*/
ret = netvsc_device_add(dev, additional_info);
if (ret != 0) {
kfree(rndis_device);
return ret;
}
/* Initialize the rndis device */
net_device = hv_get_drvdata(dev);
net_device->extension = rndis_device;
rndis_device->net_dev = net_device;
/* Send the rndis initialization message */
ret = rndis_filter_init_device(rndis_device);
if (ret != 0) {
/*
* TODO: If rndis init failed, we will need to shut down the
* channel
*/
}
/* Get the mac address */
ret = rndis_filter_query_device_mac(rndis_device);
if (ret != 0) {
/*
* TODO: shutdown rndis device and the channel
*/
}
memcpy(device_info->mac_adr, rndis_device->hw_mac_adr, ETH_ALEN);
rndis_filter_query_device_link_status(rndis_device);
device_info->link_state = rndis_device->link_state;
dev_info(&dev->device, "Device MAC %pM link state %s\n",
rndis_device->hw_mac_adr,
device_info->link_state ? "down" : "up");
return ret;
}
void rndis_filter_device_remove(struct hv_device *dev)
{
struct netvsc_device *net_dev = hv_get_drvdata(dev);
struct rndis_device *rndis_dev = net_dev->extension;
/* Halt and release the rndis device */
rndis_filter_halt_device(rndis_dev);
kfree(rndis_dev);
net_dev->extension = NULL;
netvsc_device_remove(dev);
}
int rndis_filter_open(struct hv_device *dev)
{
struct netvsc_device *net_device = hv_get_drvdata(dev);
if (!net_device)
return -EINVAL;
return rndis_filter_open_device(net_device->extension);
}
int rndis_filter_close(struct hv_device *dev)
{
struct netvsc_device *netDevice = hv_get_drvdata(dev);
if (!netDevice)
return -EINVAL;
return rndis_filter_close_device(netDevice->extension);
}
int rndis_filter_send(struct hv_device *dev,
struct hv_netvsc_packet *pkt)
{
int ret;
struct rndis_filter_packet *filterPacket;
struct rndis_message *rndisMessage;
struct rndis_packet *rndisPacket;
u32 rndisMessageSize;
/* Add the rndis header */
filterPacket = (struct rndis_filter_packet *)pkt->extension;
memset(filterPacket, 0, sizeof(struct rndis_filter_packet));
rndisMessage = &filterPacket->msg;
rndisMessageSize = RNDIS_MESSAGE_SIZE(struct rndis_packet);
rndisMessage->ndis_msg_type = REMOTE_NDIS_PACKET_MSG;
rndisMessage->msg_len = pkt->total_data_buflen +
rndisMessageSize;
rndisPacket = &rndisMessage->msg.pkt;
rndisPacket->data_offset = sizeof(struct rndis_packet);
rndisPacket->data_len = pkt->total_data_buflen;
pkt->is_data_pkt = true;
pkt->page_buf[0].pfn = virt_to_phys(rndisMessage) >> PAGE_SHIFT;
pkt->page_buf[0].offset =
(unsigned long)rndisMessage & (PAGE_SIZE-1);
pkt->page_buf[0].len = rndisMessageSize;
/* Save the packet send completion and context */
filterPacket->completion = pkt->completion.send.send_completion;
filterPacket->completion_ctx =
pkt->completion.send.send_completion_ctx;
/* Use ours */
pkt->completion.send.send_completion = rndis_filter_send_completion;
pkt->completion.send.send_completion_ctx = filterPacket;
ret = netvsc_send(dev, pkt);
if (ret != 0) {
/*
* Reset the completion to originals to allow retries from
* above
*/
pkt->completion.send.send_completion =
filterPacket->completion;
pkt->completion.send.send_completion_ctx =
filterPacket->completion_ctx;
}
return ret;
}
static void rndis_filter_send_completion(void *ctx)
{
struct rndis_filter_packet *filterPacket = ctx;
/* Pass it back to the original handler */
filterPacket->completion(filterPacket->completion_ctx);
}
static void rndis_filter_send_request_completion(void *ctx)
{
/* Noop */
}