#include <linux/kernel.h>
#include <linux/sched.h>
#include <linux/wait.h>
+#include <linux/delay.h>
#include <linux/mm.h>
#include <linux/slab.h>
#include <linux/list.h>
#include "hyperv_vmbus.h"
-struct vmbus_channel_message_table_entry {
- enum vmbus_channel_message_type message_type;
- void (*message_handler)(struct vmbus_channel_message_header *msg);
+struct vmbus_rescind_work {
+ struct work_struct work;
+ struct vmbus_channel *channel;
};
-
/**
* vmbus_prep_negotiate_resp() - Create default response for Hyper-V Negotiate message
* @icmsghdrp: Pointer to msg header structure
EXPORT_SYMBOL_GPL(vmbus_prep_negotiate_resp);
+static void vmbus_sc_creation_cb(struct work_struct *work)
+{
+ struct vmbus_channel *newchannel = container_of(work,
+ struct vmbus_channel,
+ work);
+ struct vmbus_channel *primary_channel = newchannel->primary_channel;
+
+ /*
+ * On entry sc_creation_callback has been already verified to
+ * be non-NULL.
+ */
+ primary_channel->sc_creation_callback(newchannel);
+}
+
/*
* alloc_channel - Allocate and initialize a vmbus channel object
*/
static struct vmbus_channel *alloc_channel(void)
{
+ static atomic_t chan_num = ATOMIC_INIT(0);
struct vmbus_channel *channel;
channel = kzalloc(sizeof(*channel), GFP_ATOMIC);
if (!channel)
return NULL;
+ channel->id = atomic_inc_return(&chan_num);
spin_lock_init(&channel->inbound_lock);
spin_lock_init(&channel->lock);
INIT_LIST_HEAD(&channel->sc_list);
INIT_LIST_HEAD(&channel->percpu_list);
- channel->controlwq = create_workqueue("hv_vmbus_ctl");
+ channel->controlwq = alloc_workqueue("hv_vmbus_ctl/%d", WQ_MEM_RECLAIM,
+ 1, channel->id);
if (!channel->controlwq) {
kfree(channel);
return NULL;
queue_work(vmbus_connection.work_queue, &channel->work);
}
+static void process_rescind_fn(struct work_struct *work)
+{
+ struct vmbus_rescind_work *rc_work;
+ struct vmbus_channel *channel;
+ struct device *dev;
+
+ rc_work = container_of(work, struct vmbus_rescind_work, work);
+ channel = rc_work->channel;
+
+ /*
+ * We have already acquired a reference on the channel
+ * and so it cannot vanish underneath us.
+ * It is possible (while very unlikely) that we may
+ * get here while the processing of the initial offer
+ * is still not complete. Deal with this situation by
+ * just waiting until the channel is in the correct state.
+ */
+
+ while (channel->work.func != release_channel)
+ msleep(1000);
+
+ if (channel->device_obj) {
+ dev = get_device(&channel->device_obj->device);
+ if (dev) {
+ vmbus_device_unregister(channel->device_obj);
+ put_device(dev);
+ }
+ } else {
+ hv_process_channel_removal(channel,
+ channel->offermsg.child_relid);
+ }
+ kfree(work);
+}
+
static void percpu_channel_enq(void *arg)
{
struct vmbus_channel *channel = arg;
list_del(&channel->percpu_list);
}
-/*
- * vmbus_process_rescind_offer -
- * Rescind the offer by initiating a device removal
- */
-static void vmbus_process_rescind_offer(struct work_struct *work)
+
+void hv_process_channel_removal(struct vmbus_channel *channel, u32 relid)
{
- struct vmbus_channel *channel = container_of(work,
- struct vmbus_channel,
- work);
+ struct vmbus_channel_relid_released msg;
unsigned long flags;
struct vmbus_channel *primary_channel;
- struct vmbus_channel_relid_released msg;
- struct device *dev;
-
- if (channel->device_obj) {
- dev = get_device(&channel->device_obj->device);
- if (dev) {
- vmbus_device_unregister(channel->device_obj);
- put_device(dev);
- }
- }
memset(&msg, 0, sizeof(struct vmbus_channel_relid_released));
- msg.child_relid = channel->offermsg.child_relid;
+ msg.child_relid = relid;
msg.header.msgtype = CHANNELMSG_RELID_RELEASED;
vmbus_post_msg(&msg, sizeof(struct vmbus_channel_relid_released));
+ if (channel == NULL)
+ return;
+
if (channel->target_cpu != get_cpu()) {
put_cpu();
smp_call_function_single(channel->target_cpu,
list_for_each_entry(channel, &vmbus_connection.chn_list, listentry) {
vmbus_device_unregister(channel->device_obj);
- kfree(channel->device_obj);
free_channel(channel);
}
}
+static void vmbus_do_device_register(struct work_struct *work)
+{
+ struct hv_device *device_obj;
+ int ret;
+ unsigned long flags;
+ struct vmbus_channel *newchannel = container_of(work,
+ struct vmbus_channel,
+ work);
+
+ ret = vmbus_device_register(newchannel->device_obj);
+ if (ret != 0) {
+ pr_err("unable to add child device object (relid %d)\n",
+ newchannel->offermsg.child_relid);
+ spin_lock_irqsave(&vmbus_connection.channel_lock, flags);
+ list_del(&newchannel->listentry);
+ device_obj = newchannel->device_obj;
+ newchannel->device_obj = NULL;
+ spin_unlock_irqrestore(&vmbus_connection.channel_lock, flags);
+
+ if (newchannel->target_cpu != get_cpu()) {
+ put_cpu();
+ smp_call_function_single(newchannel->target_cpu,
+ percpu_channel_deq, newchannel, true);
+ } else {
+ percpu_channel_deq(newchannel);
+ put_cpu();
+ }
+
+ kfree(device_obj);
+ if (!newchannel->rescind) {
+ free_channel(newchannel);
+ return;
+ }
+ }
+ /*
+ * The next state for this channel is to be freed.
+ */
+ INIT_WORK(&newchannel->work, release_channel);
+}
+
/*
* vmbus_process_offer - Process the offer by creating a channel/device
* associated with this offer
*/
-static void vmbus_process_offer(struct work_struct *work)
+static void vmbus_process_offer(struct vmbus_channel *newchannel)
{
- struct vmbus_channel *newchannel = container_of(work,
- struct vmbus_channel,
- work);
struct vmbus_channel *channel;
bool fnew = true;
bool enq = false;
- int ret;
unsigned long flags;
/* Make sure this is a new offer */
}
newchannel->state = CHANNEL_OPEN_STATE;
- if (channel->sc_creation_callback != NULL)
- channel->sc_creation_callback(newchannel);
+ channel->num_sc++;
+ if (channel->sc_creation_callback != NULL) {
+ /*
+ * We need to invoke the sub-channel creation
+ * callback; invoke this in a seperate work
+ * context since we are currently running on
+ * the global work context in which we handle
+ * messages from the host.
+ */
+ INIT_WORK(&newchannel->work,
+ vmbus_sc_creation_cb);
+ queue_work(newchannel->controlwq,
+ &newchannel->work);
+ }
- goto done_init_rescind;
+ return;
}
goto err_free_chan;
&newchannel->offermsg.offer.if_instance,
newchannel);
if (!newchannel->device_obj)
- goto err_free_chan;
+ goto err_deq_chan;
/*
* Add the new device to the bus. This will kick off device-driver
* binding which eventually invokes the device driver's AddDevice()
* method.
+ * Invoke this call on the per-channel work context.
+ * Until we return from this function, rescind offer message
+ * cannot be processed as we are running on the global message
+ * handling work.
*/
- ret = vmbus_device_register(newchannel->device_obj);
- if (ret != 0) {
- pr_err("unable to add child device object (relid %d)\n",
- newchannel->offermsg.child_relid);
+ INIT_WORK(&newchannel->work, vmbus_do_device_register);
+ queue_work(newchannel->controlwq, &newchannel->work);
+ return;
- spin_lock_irqsave(&vmbus_connection.channel_lock, flags);
- list_del(&newchannel->listentry);
- spin_unlock_irqrestore(&vmbus_connection.channel_lock, flags);
- kfree(newchannel->device_obj);
- goto err_free_chan;
+err_deq_chan:
+ spin_lock_irqsave(&vmbus_connection.channel_lock, flags);
+ list_del(&newchannel->listentry);
+ spin_unlock_irqrestore(&vmbus_connection.channel_lock, flags);
+
+ if (newchannel->target_cpu != get_cpu()) {
+ put_cpu();
+ smp_call_function_single(newchannel->target_cpu,
+ percpu_channel_deq, newchannel, true);
+ } else {
+ percpu_channel_deq(newchannel);
+ put_cpu();
}
-done_init_rescind:
- spin_lock_irqsave(&newchannel->lock, flags);
- /* The next possible work is rescind handling */
- INIT_WORK(&newchannel->work, vmbus_process_rescind_offer);
- /* Check if rescind offer was already received */
- if (newchannel->rescind)
- queue_work(newchannel->controlwq, &newchannel->work);
- spin_unlock_irqrestore(&newchannel->lock, flags);
- return;
+
err_free_chan:
free_channel(newchannel);
}
{ HV_SCSI_GUID, },
/* Network */
{ HV_NIC_GUID, },
+ /* NetworkDirect Guest RDMA */
+ { HV_ND_GUID, },
};
newchannel->monitor_grp = (u8)offer->monitorid / 32;
newchannel->monitor_bit = (u8)offer->monitorid % 32;
- INIT_WORK(&newchannel->work, vmbus_process_offer);
- queue_work(newchannel->controlwq, &newchannel->work);
+ vmbus_process_offer(newchannel);
}
/*
{
struct vmbus_channel_rescind_offer *rescind;
struct vmbus_channel *channel;
- unsigned long flags;
+ struct vmbus_rescind_work *rc_work;
rescind = (struct vmbus_channel_rescind_offer *)hdr;
- channel = relid2channel(rescind->child_relid);
+ channel = relid2channel(rescind->child_relid, true);
- if (channel == NULL)
- /* Just return here, no channel found */
+ if (channel == NULL) {
+ hv_process_channel_removal(NULL, rescind->child_relid);
return;
+ }
- spin_lock_irqsave(&channel->lock, flags);
- channel->rescind = true;
/*
- * channel->work.func != vmbus_process_rescind_offer means we are still
- * processing offer request and the rescind offer processing should be
- * postponed. It will be done at the very end of vmbus_process_offer()
- * as rescind flag is being checked there.
+ * We have acquired a reference on the channel and have posted
+ * the rescind state. Perform further cleanup in a work context
+ * that is different from the global work context in which
+ * we process messages from the host (we are currently executing
+ * on that global context.
*/
- if (channel->work.func == vmbus_process_rescind_offer)
- /* work is initialized for vmbus_process_rescind_offer() from
- * vmbus_process_offer() where the channel got created */
- queue_work(channel->controlwq, &channel->work);
-
- spin_unlock_irqrestore(&channel->lock, flags);
+ rc_work = kzalloc(sizeof(struct vmbus_rescind_work), GFP_KERNEL);
+ if (!rc_work) {
+ pr_err("Unable to allocate memory for rescind processing ");
+ return;
+ }
+ rc_work->channel = channel;
+ INIT_WORK(&rc_work->work, process_rescind_fn);
+ schedule_work(&rc_work->work);
}
/*
}
/* Channel message dispatch table */
-static struct vmbus_channel_message_table_entry
+struct vmbus_channel_message_table_entry
channel_message_table[CHANNELMSG_COUNT] = {
- {CHANNELMSG_INVALID, NULL},
- {CHANNELMSG_OFFERCHANNEL, vmbus_onoffer},
- {CHANNELMSG_RESCIND_CHANNELOFFER, vmbus_onoffer_rescind},
- {CHANNELMSG_REQUESTOFFERS, NULL},
- {CHANNELMSG_ALLOFFERS_DELIVERED, vmbus_onoffers_delivered},
- {CHANNELMSG_OPENCHANNEL, NULL},
- {CHANNELMSG_OPENCHANNEL_RESULT, vmbus_onopen_result},
- {CHANNELMSG_CLOSECHANNEL, NULL},
- {CHANNELMSG_GPADL_HEADER, NULL},
- {CHANNELMSG_GPADL_BODY, NULL},
- {CHANNELMSG_GPADL_CREATED, vmbus_ongpadl_created},
- {CHANNELMSG_GPADL_TEARDOWN, NULL},
- {CHANNELMSG_GPADL_TORNDOWN, vmbus_ongpadl_torndown},
- {CHANNELMSG_RELID_RELEASED, NULL},
- {CHANNELMSG_INITIATE_CONTACT, NULL},
- {CHANNELMSG_VERSION_RESPONSE, vmbus_onversion_response},
- {CHANNELMSG_UNLOAD, NULL},
+ {CHANNELMSG_INVALID, 0, NULL},
+ {CHANNELMSG_OFFERCHANNEL, 0, vmbus_onoffer},
+ {CHANNELMSG_RESCIND_CHANNELOFFER, 0, vmbus_onoffer_rescind},
+ {CHANNELMSG_REQUESTOFFERS, 0, NULL},
+ {CHANNELMSG_ALLOFFERS_DELIVERED, 1, vmbus_onoffers_delivered},
+ {CHANNELMSG_OPENCHANNEL, 0, NULL},
+ {CHANNELMSG_OPENCHANNEL_RESULT, 1, vmbus_onopen_result},
+ {CHANNELMSG_CLOSECHANNEL, 0, NULL},
+ {CHANNELMSG_GPADL_HEADER, 0, NULL},
+ {CHANNELMSG_GPADL_BODY, 0, NULL},
+ {CHANNELMSG_GPADL_CREATED, 1, vmbus_ongpadl_created},
+ {CHANNELMSG_GPADL_TEARDOWN, 0, NULL},
+ {CHANNELMSG_GPADL_TORNDOWN, 1, vmbus_ongpadl_torndown},
+ {CHANNELMSG_RELID_RELEASED, 0, NULL},
+ {CHANNELMSG_INITIATE_CONTACT, 0, NULL},
+ {CHANNELMSG_VERSION_RESPONSE, 1, vmbus_onversion_response},
+ {CHANNELMSG_UNLOAD, 0, NULL},
};
/*
{
struct vmbus_channel_message_header *msg;
struct vmbus_channel_msginfo *msginfo;
- int ret, t;
+ int ret;
msginfo = kmalloc(sizeof(*msginfo) +
sizeof(struct vmbus_channel_message_header),
if (!msginfo)
return -ENOMEM;
- init_completion(&msginfo->waitevent);
-
msg = (struct vmbus_channel_message_header *)msginfo->msg;
msg->msgtype = CHANNELMSG_REQUESTOFFERS;
goto cleanup;
}
- t = wait_for_completion_timeout(&msginfo->waitevent, 5*HZ);
- if (t == 0) {
- ret = -ETIMEDOUT;
- goto cleanup;
- }
-
-
-
cleanup:
kfree(msginfo);
/*
* Retrieve the (sub) channel on which to send an outgoing request.
- * When a primary channel has multiple sub-channels, we choose a
- * channel whose VCPU binding is closest to the VCPU on which
- * this call is being made.
+ * When a primary channel has multiple sub-channels, we try to
+ * distribute the load equally amongst all available channels.
*/
struct vmbus_channel *vmbus_get_outgoing_channel(struct vmbus_channel *primary)
{
int cur_cpu;
struct vmbus_channel *cur_channel;
struct vmbus_channel *outgoing_channel = primary;
- int cpu_distance, new_cpu_distance;
+ int next_channel;
+ int i = 1;
if (list_empty(&primary->sc_list))
return outgoing_channel;
+ next_channel = primary->next_oc++;
+
+ if (next_channel > (primary->num_sc)) {
+ primary->next_oc = 0;
+ return outgoing_channel;
+ }
+
cur_cpu = hv_context.vp_index[get_cpu()];
put_cpu();
list_for_each_safe(cur, tmp, &primary->sc_list) {
if (cur_channel->target_vp == cur_cpu)
return cur_channel;
- cpu_distance = ((outgoing_channel->target_vp > cur_cpu) ?
- (outgoing_channel->target_vp - cur_cpu) :
- (cur_cpu - outgoing_channel->target_vp));
-
- new_cpu_distance = ((cur_channel->target_vp > cur_cpu) ?
- (cur_channel->target_vp - cur_cpu) :
- (cur_cpu - cur_channel->target_vp));
-
- if (cpu_distance < new_cpu_distance)
- continue;
+ if (i == next_channel)
+ return cur_channel;
- outgoing_channel = cur_channel;
+ i++;
}
return outgoing_channel;