1  // SPDX-License-Identifier: GPL-2.0-only
2  /*
3   * Copyright (c) 2009, Microsoft Corporation.
4   *
5   * Authors:
6   *   Haiyang Zhang <haiyangz@microsoft.com>
7   *   Hank Janssen  <hjanssen@microsoft.com>
8   */
9  #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
10  
11  #include <linux/kernel.h>
12  #include <linux/sched.h>
13  #include <linux/wait.h>
14  #include <linux/mm.h>
15  #include <linux/delay.h>
16  #include <linux/io.h>
17  #include <linux/slab.h>
18  #include <linux/netdevice.h>
19  #include <linux/if_ether.h>
20  #include <linux/vmalloc.h>
21  #include <linux/rtnetlink.h>
22  #include <linux/prefetch.h>
23  #include <linux/filter.h>
24  
25  #include <asm/sync_bitops.h>
26  #include <asm/mshyperv.h>
27  
28  #include "hyperv_net.h"
29  #include "netvsc_trace.h"
30  
31  /*
32   * Switch the data path from the synthetic interface to the VF
33   * interface.
34   */
netvsc_switch_datapath(struct net_device * ndev,bool vf)35  int netvsc_switch_datapath(struct net_device *ndev, bool vf)
36  {
37  	struct net_device_context *net_device_ctx = netdev_priv(ndev);
38  	struct hv_device *dev = net_device_ctx->device_ctx;
39  	struct netvsc_device *nv_dev = rtnl_dereference(net_device_ctx->nvdev);
40  	struct nvsp_message *init_pkt = &nv_dev->channel_init_pkt;
41  	int ret, retry = 0;
42  
43  	/* Block sending traffic to VF if it's about to be gone */
44  	if (!vf)
45  		net_device_ctx->data_path_is_vf = vf;
46  
47  	memset(init_pkt, 0, sizeof(struct nvsp_message));
48  	init_pkt->hdr.msg_type = NVSP_MSG4_TYPE_SWITCH_DATA_PATH;
49  	if (vf)
50  		init_pkt->msg.v4_msg.active_dp.active_datapath =
51  			NVSP_DATAPATH_VF;
52  	else
53  		init_pkt->msg.v4_msg.active_dp.active_datapath =
54  			NVSP_DATAPATH_SYNTHETIC;
55  
56  again:
57  	trace_nvsp_send(ndev, init_pkt);
58  
59  	ret = vmbus_sendpacket(dev->channel, init_pkt,
60  			       sizeof(struct nvsp_message),
61  			       (unsigned long)init_pkt, VM_PKT_DATA_INBAND,
62  			       VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
63  
64  	/* If failed to switch to/from VF, let data_path_is_vf stay false,
65  	 * so we use synthetic path to send data.
66  	 */
67  	if (ret) {
68  		if (ret != -EAGAIN) {
69  			netdev_err(ndev,
70  				   "Unable to send sw datapath msg, err: %d\n",
71  				   ret);
72  			return ret;
73  		}
74  
75  		if (retry++ < RETRY_MAX) {
76  			usleep_range(RETRY_US_LO, RETRY_US_HI);
77  			goto again;
78  		} else {
79  			netdev_err(
80  				ndev,
81  				"Retry failed to send sw datapath msg, err: %d\n",
82  				ret);
83  			return ret;
84  		}
85  	}
86  
87  	wait_for_completion(&nv_dev->channel_init_wait);
88  	net_device_ctx->data_path_is_vf = vf;
89  
90  	return 0;
91  }
92  
93  /* Worker to setup sub channels on initial setup
94   * Initial hotplug event occurs in softirq context
95   * and can't wait for channels.
96   */
netvsc_subchan_work(struct work_struct * w)97  static void netvsc_subchan_work(struct work_struct *w)
98  {
99  	struct netvsc_device *nvdev =
100  		container_of(w, struct netvsc_device, subchan_work);
101  	struct rndis_device *rdev;
102  	int i, ret;
103  
104  	/* Avoid deadlock with device removal already under RTNL */
105  	if (!rtnl_trylock()) {
106  		schedule_work(w);
107  		return;
108  	}
109  
110  	rdev = nvdev->extension;
111  	if (rdev) {
112  		ret = rndis_set_subchannel(rdev->ndev, nvdev, NULL);
113  		if (ret == 0) {
114  			netif_device_attach(rdev->ndev);
115  		} else {
116  			/* fallback to only primary channel */
117  			for (i = 1; i < nvdev->num_chn; i++)
118  				netif_napi_del(&nvdev->chan_table[i].napi);
119  
120  			nvdev->max_chn = 1;
121  			nvdev->num_chn = 1;
122  		}
123  	}
124  
125  	rtnl_unlock();
126  }
127  
alloc_net_device(void)128  static struct netvsc_device *alloc_net_device(void)
129  {
130  	struct netvsc_device *net_device;
131  
132  	net_device = kzalloc(sizeof(struct netvsc_device), GFP_KERNEL);
133  	if (!net_device)
134  		return NULL;
135  
136  	init_waitqueue_head(&net_device->wait_drain);
137  	net_device->destroy = false;
138  	net_device->tx_disable = true;
139  
140  	net_device->max_pkt = RNDIS_MAX_PKT_DEFAULT;
141  	net_device->pkt_align = RNDIS_PKT_ALIGN_DEFAULT;
142  
143  	init_completion(&net_device->channel_init_wait);
144  	init_waitqueue_head(&net_device->subchan_open);
145  	INIT_WORK(&net_device->subchan_work, netvsc_subchan_work);
146  
147  	return net_device;
148  }
149  
free_netvsc_device(struct rcu_head * head)150  static void free_netvsc_device(struct rcu_head *head)
151  {
152  	struct netvsc_device *nvdev
153  		= container_of(head, struct netvsc_device, rcu);
154  	int i;
155  
156  	kfree(nvdev->extension);
157  
158  	if (!nvdev->recv_buf_gpadl_handle.decrypted)
159  		vfree(nvdev->recv_buf);
160  	if (!nvdev->send_buf_gpadl_handle.decrypted)
161  		vfree(nvdev->send_buf);
162  	bitmap_free(nvdev->send_section_map);
163  
164  	for (i = 0; i < VRSS_CHANNEL_MAX; i++) {
165  		xdp_rxq_info_unreg(&nvdev->chan_table[i].xdp_rxq);
166  		kfree(nvdev->chan_table[i].recv_buf);
167  		vfree(nvdev->chan_table[i].mrc.slots);
168  	}
169  
170  	kfree(nvdev);
171  }
172  
free_netvsc_device_rcu(struct netvsc_device * nvdev)173  static void free_netvsc_device_rcu(struct netvsc_device *nvdev)
174  {
175  	call_rcu(&nvdev->rcu, free_netvsc_device);
176  }
177  
netvsc_revoke_recv_buf(struct hv_device * device,struct netvsc_device * net_device,struct net_device * ndev)178  static void netvsc_revoke_recv_buf(struct hv_device *device,
179  				   struct netvsc_device *net_device,
180  				   struct net_device *ndev)
181  {
182  	struct nvsp_message *revoke_packet;
183  	int ret;
184  
185  	/*
186  	 * If we got a section count, it means we received a
187  	 * SendReceiveBufferComplete msg (ie sent
188  	 * NvspMessage1TypeSendReceiveBuffer msg) therefore, we need
189  	 * to send a revoke msg here
190  	 */
191  	if (net_device->recv_section_cnt) {
192  		/* Send the revoke receive buffer */
193  		revoke_packet = &net_device->revoke_packet;
194  		memset(revoke_packet, 0, sizeof(struct nvsp_message));
195  
196  		revoke_packet->hdr.msg_type =
197  			NVSP_MSG1_TYPE_REVOKE_RECV_BUF;
198  		revoke_packet->msg.v1_msg.
199  		revoke_recv_buf.id = NETVSC_RECEIVE_BUFFER_ID;
200  
201  		trace_nvsp_send(ndev, revoke_packet);
202  
203  		ret = vmbus_sendpacket(device->channel,
204  				       revoke_packet,
205  				       sizeof(struct nvsp_message),
206  				       VMBUS_RQST_ID_NO_RESPONSE,
207  				       VM_PKT_DATA_INBAND, 0);
208  		/* If the failure is because the channel is rescinded;
209  		 * ignore the failure since we cannot send on a rescinded
210  		 * channel. This would allow us to properly cleanup
211  		 * even when the channel is rescinded.
212  		 */
213  		if (device->channel->rescind)
214  			ret = 0;
215  		/*
216  		 * If we failed here, we might as well return and
217  		 * have a leak rather than continue and a bugchk
218  		 */
219  		if (ret != 0) {
220  			netdev_err(ndev, "unable to send "
221  				"revoke receive buffer to netvsp\n");
222  			return;
223  		}
224  		net_device->recv_section_cnt = 0;
225  	}
226  }
227  
netvsc_revoke_send_buf(struct hv_device * device,struct netvsc_device * net_device,struct net_device * ndev)228  static void netvsc_revoke_send_buf(struct hv_device *device,
229  				   struct netvsc_device *net_device,
230  				   struct net_device *ndev)
231  {
232  	struct nvsp_message *revoke_packet;
233  	int ret;
234  
235  	/* Deal with the send buffer we may have setup.
236  	 * If we got a  send section size, it means we received a
237  	 * NVSP_MSG1_TYPE_SEND_SEND_BUF_COMPLETE msg (ie sent
238  	 * NVSP_MSG1_TYPE_SEND_SEND_BUF msg) therefore, we need
239  	 * to send a revoke msg here
240  	 */
241  	if (net_device->send_section_cnt) {
242  		/* Send the revoke receive buffer */
243  		revoke_packet = &net_device->revoke_packet;
244  		memset(revoke_packet, 0, sizeof(struct nvsp_message));
245  
246  		revoke_packet->hdr.msg_type =
247  			NVSP_MSG1_TYPE_REVOKE_SEND_BUF;
248  		revoke_packet->msg.v1_msg.revoke_send_buf.id =
249  			NETVSC_SEND_BUFFER_ID;
250  
251  		trace_nvsp_send(ndev, revoke_packet);
252  
253  		ret = vmbus_sendpacket(device->channel,
254  				       revoke_packet,
255  				       sizeof(struct nvsp_message),
256  				       VMBUS_RQST_ID_NO_RESPONSE,
257  				       VM_PKT_DATA_INBAND, 0);
258  
259  		/* If the failure is because the channel is rescinded;
260  		 * ignore the failure since we cannot send on a rescinded
261  		 * channel. This would allow us to properly cleanup
262  		 * even when the channel is rescinded.
263  		 */
264  		if (device->channel->rescind)
265  			ret = 0;
266  
267  		/* If we failed here, we might as well return and
268  		 * have a leak rather than continue and a bugchk
269  		 */
270  		if (ret != 0) {
271  			netdev_err(ndev, "unable to send "
272  				   "revoke send buffer to netvsp\n");
273  			return;
274  		}
275  		net_device->send_section_cnt = 0;
276  	}
277  }
278  
netvsc_teardown_recv_gpadl(struct hv_device * device,struct netvsc_device * net_device,struct net_device * ndev)279  static void netvsc_teardown_recv_gpadl(struct hv_device *device,
280  				       struct netvsc_device *net_device,
281  				       struct net_device *ndev)
282  {
283  	int ret;
284  
285  	if (net_device->recv_buf_gpadl_handle.gpadl_handle) {
286  		ret = vmbus_teardown_gpadl(device->channel,
287  					   &net_device->recv_buf_gpadl_handle);
288  
289  		/* If we failed here, we might as well return and have a leak
290  		 * rather than continue and a bugchk
291  		 */
292  		if (ret != 0) {
293  			netdev_err(ndev,
294  				   "unable to teardown receive buffer's gpadl\n");
295  			return;
296  		}
297  	}
298  }
299  
netvsc_teardown_send_gpadl(struct hv_device * device,struct netvsc_device * net_device,struct net_device * ndev)300  static void netvsc_teardown_send_gpadl(struct hv_device *device,
301  				       struct netvsc_device *net_device,
302  				       struct net_device *ndev)
303  {
304  	int ret;
305  
306  	if (net_device->send_buf_gpadl_handle.gpadl_handle) {
307  		ret = vmbus_teardown_gpadl(device->channel,
308  					   &net_device->send_buf_gpadl_handle);
309  
310  		/* If we failed here, we might as well return and have a leak
311  		 * rather than continue and a bugchk
312  		 */
313  		if (ret != 0) {
314  			netdev_err(ndev,
315  				   "unable to teardown send buffer's gpadl\n");
316  			return;
317  		}
318  	}
319  }
320  
netvsc_alloc_recv_comp_ring(struct netvsc_device * net_device,u32 q_idx)321  int netvsc_alloc_recv_comp_ring(struct netvsc_device *net_device, u32 q_idx)
322  {
323  	struct netvsc_channel *nvchan = &net_device->chan_table[q_idx];
324  	int node = cpu_to_node(nvchan->channel->target_cpu);
325  	size_t size;
326  
327  	size = net_device->recv_completion_cnt * sizeof(struct recv_comp_data);
328  	nvchan->mrc.slots = vzalloc_node(size, node);
329  	if (!nvchan->mrc.slots)
330  		nvchan->mrc.slots = vzalloc(size);
331  
332  	return nvchan->mrc.slots ? 0 : -ENOMEM;
333  }
334  
netvsc_init_buf(struct hv_device * device,struct netvsc_device * net_device,const struct netvsc_device_info * device_info)335  static int netvsc_init_buf(struct hv_device *device,
336  			   struct netvsc_device *net_device,
337  			   const struct netvsc_device_info *device_info)
338  {
339  	struct nvsp_1_message_send_receive_buffer_complete *resp;
340  	struct net_device *ndev = hv_get_drvdata(device);
341  	struct nvsp_message *init_packet;
342  	unsigned int buf_size;
343  	int i, ret = 0;
344  
345  	/* Get receive buffer area. */
346  	buf_size = device_info->recv_sections * device_info->recv_section_size;
347  	buf_size = roundup(buf_size, PAGE_SIZE);
348  
349  	/* Legacy hosts only allow smaller receive buffer */
350  	if (net_device->nvsp_version <= NVSP_PROTOCOL_VERSION_2)
351  		buf_size = min_t(unsigned int, buf_size,
352  				 NETVSC_RECEIVE_BUFFER_SIZE_LEGACY);
353  
354  	net_device->recv_buf = vzalloc(buf_size);
355  	if (!net_device->recv_buf) {
356  		netdev_err(ndev,
357  			   "unable to allocate receive buffer of size %u\n",
358  			   buf_size);
359  		ret = -ENOMEM;
360  		goto cleanup;
361  	}
362  
363  	net_device->recv_buf_size = buf_size;
364  
365  	/*
366  	 * Establish the gpadl handle for this buffer on this
367  	 * channel.  Note: This call uses the vmbus connection rather
368  	 * than the channel to establish the gpadl handle.
369  	 */
370  	ret = vmbus_establish_gpadl(device->channel, net_device->recv_buf,
371  				    buf_size,
372  				    &net_device->recv_buf_gpadl_handle);
373  	if (ret != 0) {
374  		netdev_err(ndev,
375  			"unable to establish receive buffer's gpadl\n");
376  		goto cleanup;
377  	}
378  
379  	/* Notify the NetVsp of the gpadl handle */
380  	init_packet = &net_device->channel_init_pkt;
381  	memset(init_packet, 0, sizeof(struct nvsp_message));
382  	init_packet->hdr.msg_type = NVSP_MSG1_TYPE_SEND_RECV_BUF;
383  	init_packet->msg.v1_msg.send_recv_buf.
384  		gpadl_handle = net_device->recv_buf_gpadl_handle.gpadl_handle;
385  	init_packet->msg.v1_msg.
386  		send_recv_buf.id = NETVSC_RECEIVE_BUFFER_ID;
387  
388  	trace_nvsp_send(ndev, init_packet);
389  
390  	/* Send the gpadl notification request */
391  	ret = vmbus_sendpacket(device->channel, init_packet,
392  			       sizeof(struct nvsp_message),
393  			       (unsigned long)init_packet,
394  			       VM_PKT_DATA_INBAND,
395  			       VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
396  	if (ret != 0) {
397  		netdev_err(ndev,
398  			"unable to send receive buffer's gpadl to netvsp\n");
399  		goto cleanup;
400  	}
401  
402  	wait_for_completion(&net_device->channel_init_wait);
403  
404  	/* Check the response */
405  	resp = &init_packet->msg.v1_msg.send_recv_buf_complete;
406  	if (resp->status != NVSP_STAT_SUCCESS) {
407  		netdev_err(ndev,
408  			   "Unable to complete receive buffer initialization with NetVsp - status %d\n",
409  			   resp->status);
410  		ret = -EINVAL;
411  		goto cleanup;
412  	}
413  
414  	/* Parse the response */
415  	netdev_dbg(ndev, "Receive sections: %u sub_allocs: size %u count: %u\n",
416  		   resp->num_sections, resp->sections[0].sub_alloc_size,
417  		   resp->sections[0].num_sub_allocs);
418  
419  	/* There should only be one section for the entire receive buffer */
420  	if (resp->num_sections != 1 || resp->sections[0].offset != 0) {
421  		ret = -EINVAL;
422  		goto cleanup;
423  	}
424  
425  	net_device->recv_section_size = resp->sections[0].sub_alloc_size;
426  	net_device->recv_section_cnt = resp->sections[0].num_sub_allocs;
427  
428  	/* Ensure buffer will not overflow */
429  	if (net_device->recv_section_size < NETVSC_MTU_MIN || (u64)net_device->recv_section_size *
430  	    (u64)net_device->recv_section_cnt > (u64)buf_size) {
431  		netdev_err(ndev, "invalid recv_section_size %u\n",
432  			   net_device->recv_section_size);
433  		ret = -EINVAL;
434  		goto cleanup;
435  	}
436  
437  	for (i = 0; i < VRSS_CHANNEL_MAX; i++) {
438  		struct netvsc_channel *nvchan = &net_device->chan_table[i];
439  
440  		nvchan->recv_buf = kzalloc(net_device->recv_section_size, GFP_KERNEL);
441  		if (nvchan->recv_buf == NULL) {
442  			ret = -ENOMEM;
443  			goto cleanup;
444  		}
445  	}
446  
447  	/* Setup receive completion ring.
448  	 * Add 1 to the recv_section_cnt because at least one entry in a
449  	 * ring buffer has to be empty.
450  	 */
451  	net_device->recv_completion_cnt = net_device->recv_section_cnt + 1;
452  	ret = netvsc_alloc_recv_comp_ring(net_device, 0);
453  	if (ret)
454  		goto cleanup;
455  
456  	/* Now setup the send buffer. */
457  	buf_size = device_info->send_sections * device_info->send_section_size;
458  	buf_size = round_up(buf_size, PAGE_SIZE);
459  
460  	net_device->send_buf = vzalloc(buf_size);
461  	if (!net_device->send_buf) {
462  		netdev_err(ndev, "unable to allocate send buffer of size %u\n",
463  			   buf_size);
464  		ret = -ENOMEM;
465  		goto cleanup;
466  	}
467  	net_device->send_buf_size = buf_size;
468  
469  	/* Establish the gpadl handle for this buffer on this
470  	 * channel.  Note: This call uses the vmbus connection rather
471  	 * than the channel to establish the gpadl handle.
472  	 */
473  	ret = vmbus_establish_gpadl(device->channel, net_device->send_buf,
474  				    buf_size,
475  				    &net_device->send_buf_gpadl_handle);
476  	if (ret != 0) {
477  		netdev_err(ndev,
478  			   "unable to establish send buffer's gpadl\n");
479  		goto cleanup;
480  	}
481  
482  	/* Notify the NetVsp of the gpadl handle */
483  	init_packet = &net_device->channel_init_pkt;
484  	memset(init_packet, 0, sizeof(struct nvsp_message));
485  	init_packet->hdr.msg_type = NVSP_MSG1_TYPE_SEND_SEND_BUF;
486  	init_packet->msg.v1_msg.send_send_buf.gpadl_handle =
487  		net_device->send_buf_gpadl_handle.gpadl_handle;
488  	init_packet->msg.v1_msg.send_send_buf.id = NETVSC_SEND_BUFFER_ID;
489  
490  	trace_nvsp_send(ndev, init_packet);
491  
492  	/* Send the gpadl notification request */
493  	ret = vmbus_sendpacket(device->channel, init_packet,
494  			       sizeof(struct nvsp_message),
495  			       (unsigned long)init_packet,
496  			       VM_PKT_DATA_INBAND,
497  			       VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
498  	if (ret != 0) {
499  		netdev_err(ndev,
500  			   "unable to send send buffer's gpadl to netvsp\n");
501  		goto cleanup;
502  	}
503  
504  	wait_for_completion(&net_device->channel_init_wait);
505  
506  	/* Check the response */
507  	if (init_packet->msg.v1_msg.
508  	    send_send_buf_complete.status != NVSP_STAT_SUCCESS) {
509  		netdev_err(ndev, "Unable to complete send buffer "
510  			   "initialization with NetVsp - status %d\n",
511  			   init_packet->msg.v1_msg.
512  			   send_send_buf_complete.status);
513  		ret = -EINVAL;
514  		goto cleanup;
515  	}
516  
517  	/* Parse the response */
518  	net_device->send_section_size = init_packet->msg.
519  				v1_msg.send_send_buf_complete.section_size;
520  	if (net_device->send_section_size < NETVSC_MTU_MIN) {
521  		netdev_err(ndev, "invalid send_section_size %u\n",
522  			   net_device->send_section_size);
523  		ret = -EINVAL;
524  		goto cleanup;
525  	}
526  
527  	/* Section count is simply the size divided by the section size. */
528  	net_device->send_section_cnt = buf_size / net_device->send_section_size;
529  
530  	netdev_dbg(ndev, "Send section size: %d, Section count:%d\n",
531  		   net_device->send_section_size, net_device->send_section_cnt);
532  
533  	/* Setup state for managing the send buffer. */
534  	net_device->send_section_map = bitmap_zalloc(net_device->send_section_cnt,
535  						     GFP_KERNEL);
536  	if (!net_device->send_section_map) {
537  		ret = -ENOMEM;
538  		goto cleanup;
539  	}
540  
541  	goto exit;
542  
543  cleanup:
544  	netvsc_revoke_recv_buf(device, net_device, ndev);
545  	netvsc_revoke_send_buf(device, net_device, ndev);
546  	netvsc_teardown_recv_gpadl(device, net_device, ndev);
547  	netvsc_teardown_send_gpadl(device, net_device, ndev);
548  
549  exit:
550  	return ret;
551  }
552  
553  /* Negotiate NVSP protocol version */
negotiate_nvsp_ver(struct hv_device * device,struct netvsc_device * net_device,struct nvsp_message * init_packet,u32 nvsp_ver)554  static int negotiate_nvsp_ver(struct hv_device *device,
555  			      struct netvsc_device *net_device,
556  			      struct nvsp_message *init_packet,
557  			      u32 nvsp_ver)
558  {
559  	struct net_device *ndev = hv_get_drvdata(device);
560  	int ret;
561  
562  	memset(init_packet, 0, sizeof(struct nvsp_message));
563  	init_packet->hdr.msg_type = NVSP_MSG_TYPE_INIT;
564  	init_packet->msg.init_msg.init.min_protocol_ver = nvsp_ver;
565  	init_packet->msg.init_msg.init.max_protocol_ver = nvsp_ver;
566  	trace_nvsp_send(ndev, init_packet);
567  
568  	/* Send the init request */
569  	ret = vmbus_sendpacket(device->channel, init_packet,
570  			       sizeof(struct nvsp_message),
571  			       (unsigned long)init_packet,
572  			       VM_PKT_DATA_INBAND,
573  			       VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
574  
575  	if (ret != 0)
576  		return ret;
577  
578  	wait_for_completion(&net_device->channel_init_wait);
579  
580  	if (init_packet->msg.init_msg.init_complete.status !=
581  	    NVSP_STAT_SUCCESS)
582  		return -EINVAL;
583  
584  	if (nvsp_ver == NVSP_PROTOCOL_VERSION_1)
585  		return 0;
586  
587  	/* NVSPv2 or later: Send NDIS config */
588  	memset(init_packet, 0, sizeof(struct nvsp_message));
589  	init_packet->hdr.msg_type = NVSP_MSG2_TYPE_SEND_NDIS_CONFIG;
590  	init_packet->msg.v2_msg.send_ndis_config.mtu = ndev->mtu + ETH_HLEN;
591  	init_packet->msg.v2_msg.send_ndis_config.capability.ieee8021q = 1;
592  
593  	if (nvsp_ver >= NVSP_PROTOCOL_VERSION_5) {
594  		if (hv_is_isolation_supported())
595  			netdev_info(ndev, "SR-IOV not advertised by guests on the host supporting isolation\n");
596  		else
597  			init_packet->msg.v2_msg.send_ndis_config.capability.sriov = 1;
598  
599  		/* Teaming bit is needed to receive link speed updates */
600  		init_packet->msg.v2_msg.send_ndis_config.capability.teaming = 1;
601  	}
602  
603  	if (nvsp_ver >= NVSP_PROTOCOL_VERSION_61)
604  		init_packet->msg.v2_msg.send_ndis_config.capability.rsc = 1;
605  
606  	trace_nvsp_send(ndev, init_packet);
607  
608  	ret = vmbus_sendpacket(device->channel, init_packet,
609  				sizeof(struct nvsp_message),
610  				VMBUS_RQST_ID_NO_RESPONSE,
611  				VM_PKT_DATA_INBAND, 0);
612  
613  	return ret;
614  }
615  
netvsc_connect_vsp(struct hv_device * device,struct netvsc_device * net_device,const struct netvsc_device_info * device_info)616  static int netvsc_connect_vsp(struct hv_device *device,
617  			      struct netvsc_device *net_device,
618  			      const struct netvsc_device_info *device_info)
619  {
620  	struct net_device *ndev = hv_get_drvdata(device);
621  	static const u32 ver_list[] = {
622  		NVSP_PROTOCOL_VERSION_1, NVSP_PROTOCOL_VERSION_2,
623  		NVSP_PROTOCOL_VERSION_4, NVSP_PROTOCOL_VERSION_5,
624  		NVSP_PROTOCOL_VERSION_6, NVSP_PROTOCOL_VERSION_61
625  	};
626  	struct nvsp_message *init_packet;
627  	int ndis_version, i, ret;
628  
629  	init_packet = &net_device->channel_init_pkt;
630  
631  	/* Negotiate the latest NVSP protocol supported */
632  	for (i = ARRAY_SIZE(ver_list) - 1; i >= 0; i--)
633  		if (negotiate_nvsp_ver(device, net_device, init_packet,
634  				       ver_list[i])  == 0) {
635  			net_device->nvsp_version = ver_list[i];
636  			break;
637  		}
638  
639  	if (i < 0) {
640  		ret = -EPROTO;
641  		goto cleanup;
642  	}
643  
644  	if (hv_is_isolation_supported() && net_device->nvsp_version < NVSP_PROTOCOL_VERSION_61) {
645  		netdev_err(ndev, "Invalid NVSP version 0x%x (expected >= 0x%x) from the host supporting isolation\n",
646  			   net_device->nvsp_version, NVSP_PROTOCOL_VERSION_61);
647  		ret = -EPROTO;
648  		goto cleanup;
649  	}
650  
651  	pr_debug("Negotiated NVSP version:%x\n", net_device->nvsp_version);
652  
653  	/* Send the ndis version */
654  	memset(init_packet, 0, sizeof(struct nvsp_message));
655  
656  	if (net_device->nvsp_version <= NVSP_PROTOCOL_VERSION_4)
657  		ndis_version = 0x00060001;
658  	else
659  		ndis_version = 0x0006001e;
660  
661  	init_packet->hdr.msg_type = NVSP_MSG1_TYPE_SEND_NDIS_VER;
662  	init_packet->msg.v1_msg.
663  		send_ndis_ver.ndis_major_ver =
664  				(ndis_version & 0xFFFF0000) >> 16;
665  	init_packet->msg.v1_msg.
666  		send_ndis_ver.ndis_minor_ver =
667  				ndis_version & 0xFFFF;
668  
669  	trace_nvsp_send(ndev, init_packet);
670  
671  	/* Send the init request */
672  	ret = vmbus_sendpacket(device->channel, init_packet,
673  				sizeof(struct nvsp_message),
674  				VMBUS_RQST_ID_NO_RESPONSE,
675  				VM_PKT_DATA_INBAND, 0);
676  	if (ret != 0)
677  		goto cleanup;
678  
679  
680  	ret = netvsc_init_buf(device, net_device, device_info);
681  
682  cleanup:
683  	return ret;
684  }
685  
686  /*
687   * netvsc_device_remove - Callback when the root bus device is removed
688   */
netvsc_device_remove(struct hv_device * device)689  void netvsc_device_remove(struct hv_device *device)
690  {
691  	struct net_device *ndev = hv_get_drvdata(device);
692  	struct net_device_context *net_device_ctx = netdev_priv(ndev);
693  	struct netvsc_device *net_device
694  		= rtnl_dereference(net_device_ctx->nvdev);
695  	int i;
696  
697  	/*
698  	 * Revoke receive buffer. If host is pre-Win2016 then tear down
699  	 * receive buffer GPADL. Do the same for send buffer.
700  	 */
701  	netvsc_revoke_recv_buf(device, net_device, ndev);
702  	if (vmbus_proto_version < VERSION_WIN10)
703  		netvsc_teardown_recv_gpadl(device, net_device, ndev);
704  
705  	netvsc_revoke_send_buf(device, net_device, ndev);
706  	if (vmbus_proto_version < VERSION_WIN10)
707  		netvsc_teardown_send_gpadl(device, net_device, ndev);
708  
709  	RCU_INIT_POINTER(net_device_ctx->nvdev, NULL);
710  
711  	/* Disable NAPI and disassociate its context from the device. */
712  	for (i = 0; i < net_device->num_chn; i++) {
713  		/* See also vmbus_reset_channel_cb(). */
714  		/* only disable enabled NAPI channel */
715  		if (i < ndev->real_num_rx_queues) {
716  			netif_queue_set_napi(ndev, i, NETDEV_QUEUE_TYPE_TX,
717  					     NULL);
718  			netif_queue_set_napi(ndev, i, NETDEV_QUEUE_TYPE_RX,
719  					     NULL);
720  			napi_disable(&net_device->chan_table[i].napi);
721  		}
722  
723  		netif_napi_del(&net_device->chan_table[i].napi);
724  	}
725  
726  	/*
727  	 * At this point, no one should be accessing net_device
728  	 * except in here
729  	 */
730  	netdev_dbg(ndev, "net device safe to remove\n");
731  
732  	/* Now, we can close the channel safely */
733  	vmbus_close(device->channel);
734  
735  	/*
736  	 * If host is Win2016 or higher then we do the GPADL tear down
737  	 * here after VMBus is closed.
738  	*/
739  	if (vmbus_proto_version >= VERSION_WIN10) {
740  		netvsc_teardown_recv_gpadl(device, net_device, ndev);
741  		netvsc_teardown_send_gpadl(device, net_device, ndev);
742  	}
743  
744  	/* Release all resources */
745  	free_netvsc_device_rcu(net_device);
746  }
747  
748  #define RING_AVAIL_PERCENT_HIWATER 20
749  #define RING_AVAIL_PERCENT_LOWATER 10
750  
netvsc_free_send_slot(struct netvsc_device * net_device,u32 index)751  static inline void netvsc_free_send_slot(struct netvsc_device *net_device,
752  					 u32 index)
753  {
754  	sync_change_bit(index, net_device->send_section_map);
755  }
756  
netvsc_send_tx_complete(struct net_device * ndev,struct netvsc_device * net_device,struct vmbus_channel * channel,const struct vmpacket_descriptor * desc,int budget)757  static void netvsc_send_tx_complete(struct net_device *ndev,
758  				    struct netvsc_device *net_device,
759  				    struct vmbus_channel *channel,
760  				    const struct vmpacket_descriptor *desc,
761  				    int budget)
762  {
763  	struct net_device_context *ndev_ctx = netdev_priv(ndev);
764  	struct sk_buff *skb;
765  	u16 q_idx = 0;
766  	int queue_sends;
767  	u64 cmd_rqst;
768  
769  	cmd_rqst = channel->request_addr_callback(channel, desc->trans_id);
770  	if (cmd_rqst == VMBUS_RQST_ERROR) {
771  		netdev_err(ndev, "Invalid transaction ID %llx\n", desc->trans_id);
772  		return;
773  	}
774  
775  	skb = (struct sk_buff *)(unsigned long)cmd_rqst;
776  
777  	/* Notify the layer above us */
778  	if (likely(skb)) {
779  		struct hv_netvsc_packet *packet
780  			= (struct hv_netvsc_packet *)skb->cb;
781  		u32 send_index = packet->send_buf_index;
782  		struct netvsc_stats_tx *tx_stats;
783  
784  		if (send_index != NETVSC_INVALID_INDEX)
785  			netvsc_free_send_slot(net_device, send_index);
786  		q_idx = packet->q_idx;
787  
788  		tx_stats = &net_device->chan_table[q_idx].tx_stats;
789  
790  		u64_stats_update_begin(&tx_stats->syncp);
791  		tx_stats->packets += packet->total_packets;
792  		tx_stats->bytes += packet->total_bytes;
793  		u64_stats_update_end(&tx_stats->syncp);
794  
795  		netvsc_dma_unmap(ndev_ctx->device_ctx, packet);
796  		napi_consume_skb(skb, budget);
797  	}
798  
799  	queue_sends =
800  		atomic_dec_return(&net_device->chan_table[q_idx].queue_sends);
801  
802  	if (unlikely(net_device->destroy)) {
803  		if (queue_sends == 0)
804  			wake_up(&net_device->wait_drain);
805  	} else {
806  		struct netdev_queue *txq = netdev_get_tx_queue(ndev, q_idx);
807  
808  		if (netif_tx_queue_stopped(txq) && !net_device->tx_disable &&
809  		    (hv_get_avail_to_write_percent(&channel->outbound) >
810  		     RING_AVAIL_PERCENT_HIWATER || queue_sends < 1)) {
811  			netif_tx_wake_queue(txq);
812  			ndev_ctx->eth_stats.wake_queue++;
813  		}
814  	}
815  }
816  
netvsc_send_completion(struct net_device * ndev,struct netvsc_device * net_device,struct vmbus_channel * incoming_channel,const struct vmpacket_descriptor * desc,int budget)817  static void netvsc_send_completion(struct net_device *ndev,
818  				   struct netvsc_device *net_device,
819  				   struct vmbus_channel *incoming_channel,
820  				   const struct vmpacket_descriptor *desc,
821  				   int budget)
822  {
823  	const struct nvsp_message *nvsp_packet;
824  	u32 msglen = hv_pkt_datalen(desc);
825  	struct nvsp_message *pkt_rqst;
826  	u64 cmd_rqst;
827  	u32 status;
828  
829  	/* First check if this is a VMBUS completion without data payload */
830  	if (!msglen) {
831  		cmd_rqst = incoming_channel->request_addr_callback(incoming_channel,
832  								   desc->trans_id);
833  		if (cmd_rqst == VMBUS_RQST_ERROR) {
834  			netdev_err(ndev, "Invalid transaction ID %llx\n", desc->trans_id);
835  			return;
836  		}
837  
838  		pkt_rqst = (struct nvsp_message *)(uintptr_t)cmd_rqst;
839  		switch (pkt_rqst->hdr.msg_type) {
840  		case NVSP_MSG4_TYPE_SWITCH_DATA_PATH:
841  			complete(&net_device->channel_init_wait);
842  			break;
843  
844  		default:
845  			netdev_err(ndev, "Unexpected VMBUS completion!!\n");
846  		}
847  		return;
848  	}
849  
850  	/* Ensure packet is big enough to read header fields */
851  	if (msglen < sizeof(struct nvsp_message_header)) {
852  		netdev_err(ndev, "nvsp_message length too small: %u\n", msglen);
853  		return;
854  	}
855  
856  	nvsp_packet = hv_pkt_data(desc);
857  	switch (nvsp_packet->hdr.msg_type) {
858  	case NVSP_MSG_TYPE_INIT_COMPLETE:
859  		if (msglen < sizeof(struct nvsp_message_header) +
860  				sizeof(struct nvsp_message_init_complete)) {
861  			netdev_err(ndev, "nvsp_msg length too small: %u\n",
862  				   msglen);
863  			return;
864  		}
865  		break;
866  
867  	case NVSP_MSG1_TYPE_SEND_RECV_BUF_COMPLETE:
868  		if (msglen < sizeof(struct nvsp_message_header) +
869  				struct_size_t(struct nvsp_1_message_send_receive_buffer_complete,
870  					      sections, 1)) {
871  			netdev_err(ndev, "nvsp_msg1 length too small: %u\n",
872  				   msglen);
873  			return;
874  		}
875  		break;
876  
877  	case NVSP_MSG1_TYPE_SEND_SEND_BUF_COMPLETE:
878  		if (msglen < sizeof(struct nvsp_message_header) +
879  				sizeof(struct nvsp_1_message_send_send_buffer_complete)) {
880  			netdev_err(ndev, "nvsp_msg1 length too small: %u\n",
881  				   msglen);
882  			return;
883  		}
884  		break;
885  
886  	case NVSP_MSG5_TYPE_SUBCHANNEL:
887  		if (msglen < sizeof(struct nvsp_message_header) +
888  				sizeof(struct nvsp_5_subchannel_complete)) {
889  			netdev_err(ndev, "nvsp_msg5 length too small: %u\n",
890  				   msglen);
891  			return;
892  		}
893  		break;
894  
895  	case NVSP_MSG1_TYPE_SEND_RNDIS_PKT_COMPLETE:
896  		if (msglen < sizeof(struct nvsp_message_header) +
897  		    sizeof(struct nvsp_1_message_send_rndis_packet_complete)) {
898  			if (net_ratelimit())
899  				netdev_err(ndev, "nvsp_rndis_pkt_complete length too small: %u\n",
900  					   msglen);
901  			return;
902  		}
903  
904  		/* If status indicates an error, output a message so we know
905  		 * there's a problem. But process the completion anyway so the
906  		 * resources are released.
907  		 */
908  		status = nvsp_packet->msg.v1_msg.send_rndis_pkt_complete.status;
909  		if (status != NVSP_STAT_SUCCESS && net_ratelimit())
910  			netdev_err(ndev, "nvsp_rndis_pkt_complete error status: %x\n",
911  				   status);
912  
913  		netvsc_send_tx_complete(ndev, net_device, incoming_channel,
914  					desc, budget);
915  		return;
916  
917  	default:
918  		netdev_err(ndev,
919  			   "Unknown send completion type %d received!!\n",
920  			   nvsp_packet->hdr.msg_type);
921  		return;
922  	}
923  
924  	/* Copy the response back */
925  	memcpy(&net_device->channel_init_pkt, nvsp_packet,
926  	       sizeof(struct nvsp_message));
927  	complete(&net_device->channel_init_wait);
928  }
929  
netvsc_get_next_send_section(struct netvsc_device * net_device)930  static u32 netvsc_get_next_send_section(struct netvsc_device *net_device)
931  {
932  	unsigned long *map_addr = net_device->send_section_map;
933  	unsigned int i;
934  
935  	for_each_clear_bit(i, map_addr, net_device->send_section_cnt) {
936  		if (sync_test_and_set_bit(i, map_addr) == 0)
937  			return i;
938  	}
939  
940  	return NETVSC_INVALID_INDEX;
941  }
942  
netvsc_copy_to_send_buf(struct netvsc_device * net_device,unsigned int section_index,u32 pend_size,struct hv_netvsc_packet * packet,struct rndis_message * rndis_msg,struct hv_page_buffer * pb,bool xmit_more)943  static void netvsc_copy_to_send_buf(struct netvsc_device *net_device,
944  				    unsigned int section_index,
945  				    u32 pend_size,
946  				    struct hv_netvsc_packet *packet,
947  				    struct rndis_message *rndis_msg,
948  				    struct hv_page_buffer *pb,
949  				    bool xmit_more)
950  {
951  	char *start = net_device->send_buf;
952  	char *dest = start + (section_index * net_device->send_section_size)
953  		     + pend_size;
954  	int i;
955  	u32 padding = 0;
956  	u32 page_count = packet->cp_partial ? packet->rmsg_pgcnt :
957  		packet->page_buf_cnt;
958  	u32 remain;
959  
960  	/* Add padding */
961  	remain = packet->total_data_buflen & (net_device->pkt_align - 1);
962  	if (xmit_more && remain) {
963  		padding = net_device->pkt_align - remain;
964  		rndis_msg->msg_len += padding;
965  		packet->total_data_buflen += padding;
966  	}
967  
968  	for (i = 0; i < page_count; i++) {
969  		char *src = phys_to_virt(pb[i].pfn << HV_HYP_PAGE_SHIFT);
970  		u32 offset = pb[i].offset;
971  		u32 len = pb[i].len;
972  
973  		memcpy(dest, (src + offset), len);
974  		dest += len;
975  	}
976  
977  	if (padding)
978  		memset(dest, 0, padding);
979  }
980  
netvsc_dma_unmap(struct hv_device * hv_dev,struct hv_netvsc_packet * packet)981  void netvsc_dma_unmap(struct hv_device *hv_dev,
982  		      struct hv_netvsc_packet *packet)
983  {
984  	int i;
985  
986  	if (!hv_is_isolation_supported())
987  		return;
988  
989  	if (!packet->dma_range)
990  		return;
991  
992  	for (i = 0; i < packet->page_buf_cnt; i++)
993  		dma_unmap_single(&hv_dev->device, packet->dma_range[i].dma,
994  				 packet->dma_range[i].mapping_size,
995  				 DMA_TO_DEVICE);
996  
997  	kfree(packet->dma_range);
998  }
999  
1000  /* netvsc_dma_map - Map swiotlb bounce buffer with data page of
1001   * packet sent by vmbus_sendpacket_pagebuffer() in the Isolation
1002   * VM.
1003   *
1004   * In isolation VM, netvsc send buffer has been marked visible to
1005   * host and so the data copied to send buffer doesn't need to use
1006   * bounce buffer. The data pages handled by vmbus_sendpacket_pagebuffer()
1007   * may not be copied to send buffer and so these pages need to be
1008   * mapped with swiotlb bounce buffer. netvsc_dma_map() is to do
1009   * that. The pfns in the struct hv_page_buffer need to be converted
1010   * to bounce buffer's pfn. The loop here is necessary because the
1011   * entries in the page buffer array are not necessarily full
1012   * pages of data.  Each entry in the array has a separate offset and
1013   * len that may be non-zero, even for entries in the middle of the
1014   * array.  And the entries are not physically contiguous.  So each
1015   * entry must be individually mapped rather than as a contiguous unit.
1016   * So not use dma_map_sg() here.
1017   */
netvsc_dma_map(struct hv_device * hv_dev,struct hv_netvsc_packet * packet,struct hv_page_buffer * pb)1018  static int netvsc_dma_map(struct hv_device *hv_dev,
1019  			  struct hv_netvsc_packet *packet,
1020  			  struct hv_page_buffer *pb)
1021  {
1022  	u32 page_count = packet->page_buf_cnt;
1023  	dma_addr_t dma;
1024  	int i;
1025  
1026  	if (!hv_is_isolation_supported())
1027  		return 0;
1028  
1029  	packet->dma_range = kcalloc(page_count,
1030  				    sizeof(*packet->dma_range),
1031  				    GFP_ATOMIC);
1032  	if (!packet->dma_range)
1033  		return -ENOMEM;
1034  
1035  	for (i = 0; i < page_count; i++) {
1036  		char *src = phys_to_virt((pb[i].pfn << HV_HYP_PAGE_SHIFT)
1037  					 + pb[i].offset);
1038  		u32 len = pb[i].len;
1039  
1040  		dma = dma_map_single(&hv_dev->device, src, len,
1041  				     DMA_TO_DEVICE);
1042  		if (dma_mapping_error(&hv_dev->device, dma)) {
1043  			kfree(packet->dma_range);
1044  			return -ENOMEM;
1045  		}
1046  
1047  		/* pb[].offset and pb[].len are not changed during dma mapping
1048  		 * and so not reassign.
1049  		 */
1050  		packet->dma_range[i].dma = dma;
1051  		packet->dma_range[i].mapping_size = len;
1052  		pb[i].pfn = dma >> HV_HYP_PAGE_SHIFT;
1053  	}
1054  
1055  	return 0;
1056  }
1057  
netvsc_send_pkt(struct hv_device * device,struct hv_netvsc_packet * packet,struct netvsc_device * net_device,struct hv_page_buffer * pb,struct sk_buff * skb)1058  static inline int netvsc_send_pkt(
1059  	struct hv_device *device,
1060  	struct hv_netvsc_packet *packet,
1061  	struct netvsc_device *net_device,
1062  	struct hv_page_buffer *pb,
1063  	struct sk_buff *skb)
1064  {
1065  	struct nvsp_message nvmsg;
1066  	struct nvsp_1_message_send_rndis_packet *rpkt =
1067  		&nvmsg.msg.v1_msg.send_rndis_pkt;
1068  	struct netvsc_channel * const nvchan =
1069  		&net_device->chan_table[packet->q_idx];
1070  	struct vmbus_channel *out_channel = nvchan->channel;
1071  	struct net_device *ndev = hv_get_drvdata(device);
1072  	struct net_device_context *ndev_ctx = netdev_priv(ndev);
1073  	struct netdev_queue *txq = netdev_get_tx_queue(ndev, packet->q_idx);
1074  	u64 req_id;
1075  	int ret;
1076  	u32 ring_avail = hv_get_avail_to_write_percent(&out_channel->outbound);
1077  
1078  	memset(&nvmsg, 0, sizeof(struct nvsp_message));
1079  	nvmsg.hdr.msg_type = NVSP_MSG1_TYPE_SEND_RNDIS_PKT;
1080  	if (skb)
1081  		rpkt->channel_type = 0;		/* 0 is RMC_DATA */
1082  	else
1083  		rpkt->channel_type = 1;		/* 1 is RMC_CONTROL */
1084  
1085  	rpkt->send_buf_section_index = packet->send_buf_index;
1086  	if (packet->send_buf_index == NETVSC_INVALID_INDEX)
1087  		rpkt->send_buf_section_size = 0;
1088  	else
1089  		rpkt->send_buf_section_size = packet->total_data_buflen;
1090  
1091  	req_id = (ulong)skb;
1092  
1093  	if (out_channel->rescind)
1094  		return -ENODEV;
1095  
1096  	trace_nvsp_send_pkt(ndev, out_channel, rpkt);
1097  
1098  	packet->dma_range = NULL;
1099  	if (packet->page_buf_cnt) {
1100  		if (packet->cp_partial)
1101  			pb += packet->rmsg_pgcnt;
1102  
1103  		ret = netvsc_dma_map(ndev_ctx->device_ctx, packet, pb);
1104  		if (ret) {
1105  			ret = -EAGAIN;
1106  			goto exit;
1107  		}
1108  
1109  		ret = vmbus_sendpacket_pagebuffer(out_channel,
1110  						  pb, packet->page_buf_cnt,
1111  						  &nvmsg, sizeof(nvmsg),
1112  						  req_id);
1113  
1114  		if (ret)
1115  			netvsc_dma_unmap(ndev_ctx->device_ctx, packet);
1116  	} else {
1117  		ret = vmbus_sendpacket(out_channel,
1118  				       &nvmsg, sizeof(nvmsg),
1119  				       req_id, VM_PKT_DATA_INBAND,
1120  				       VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
1121  	}
1122  
1123  exit:
1124  	if (ret == 0) {
1125  		atomic_inc_return(&nvchan->queue_sends);
1126  
1127  		if (ring_avail < RING_AVAIL_PERCENT_LOWATER) {
1128  			netif_tx_stop_queue(txq);
1129  			ndev_ctx->eth_stats.stop_queue++;
1130  		}
1131  	} else if (ret == -EAGAIN) {
1132  		netif_tx_stop_queue(txq);
1133  		ndev_ctx->eth_stats.stop_queue++;
1134  	} else {
1135  		netdev_err(ndev,
1136  			   "Unable to send packet pages %u len %u, ret %d\n",
1137  			   packet->page_buf_cnt, packet->total_data_buflen,
1138  			   ret);
1139  	}
1140  
1141  	if (netif_tx_queue_stopped(txq) &&
1142  	    atomic_read(&nvchan->queue_sends) < 1 &&
1143  	    !net_device->tx_disable) {
1144  		netif_tx_wake_queue(txq);
1145  		ndev_ctx->eth_stats.wake_queue++;
1146  		if (ret == -EAGAIN)
1147  			ret = -ENOSPC;
1148  	}
1149  
1150  	return ret;
1151  }
1152  
1153  /* Move packet out of multi send data (msd), and clear msd */
move_pkt_msd(struct hv_netvsc_packet ** msd_send,struct sk_buff ** msd_skb,struct multi_send_data * msdp)1154  static inline void move_pkt_msd(struct hv_netvsc_packet **msd_send,
1155  				struct sk_buff **msd_skb,
1156  				struct multi_send_data *msdp)
1157  {
1158  	*msd_skb = msdp->skb;
1159  	*msd_send = msdp->pkt;
1160  	msdp->skb = NULL;
1161  	msdp->pkt = NULL;
1162  	msdp->count = 0;
1163  }
1164  
1165  /* RCU already held by caller */
1166  /* Batching/bouncing logic is designed to attempt to optimize
1167   * performance.
1168   *
1169   * For small, non-LSO packets we copy the packet to a send buffer
1170   * which is pre-registered with the Hyper-V side. This enables the
1171   * hypervisor to avoid remapping the aperture to access the packet
1172   * descriptor and data.
1173   *
1174   * If we already started using a buffer and the netdev is transmitting
1175   * a burst of packets, keep on copying into the buffer until it is
1176   * full or we are done collecting a burst. If there is an existing
1177   * buffer with space for the RNDIS descriptor but not the packet, copy
1178   * the RNDIS descriptor to the buffer, keeping the packet in place.
1179   *
1180   * If we do batching and send more than one packet using a single
1181   * NetVSC message, free the SKBs of the packets copied, except for the
1182   * last packet. This is done to streamline the handling of the case
1183   * where the last packet only had the RNDIS descriptor copied to the
1184   * send buffer, with the data pointers included in the NetVSC message.
1185   */
netvsc_send(struct net_device * ndev,struct hv_netvsc_packet * packet,struct rndis_message * rndis_msg,struct hv_page_buffer * pb,struct sk_buff * skb,bool xdp_tx)1186  int netvsc_send(struct net_device *ndev,
1187  		struct hv_netvsc_packet *packet,
1188  		struct rndis_message *rndis_msg,
1189  		struct hv_page_buffer *pb,
1190  		struct sk_buff *skb,
1191  		bool xdp_tx)
1192  {
1193  	struct net_device_context *ndev_ctx = netdev_priv(ndev);
1194  	struct netvsc_device *net_device
1195  		= rcu_dereference_bh(ndev_ctx->nvdev);
1196  	struct hv_device *device = ndev_ctx->device_ctx;
1197  	int ret = 0;
1198  	struct netvsc_channel *nvchan;
1199  	u32 pktlen = packet->total_data_buflen, msd_len = 0;
1200  	unsigned int section_index = NETVSC_INVALID_INDEX;
1201  	struct multi_send_data *msdp;
1202  	struct hv_netvsc_packet *msd_send = NULL, *cur_send = NULL;
1203  	struct sk_buff *msd_skb = NULL;
1204  	bool try_batch, xmit_more;
1205  
1206  	/* If device is rescinded, return error and packet will get dropped. */
1207  	if (unlikely(!net_device || net_device->destroy))
1208  		return -ENODEV;
1209  
1210  	nvchan = &net_device->chan_table[packet->q_idx];
1211  	packet->send_buf_index = NETVSC_INVALID_INDEX;
1212  	packet->cp_partial = false;
1213  
1214  	/* Send a control message or XDP packet directly without accessing
1215  	 * msd (Multi-Send Data) field which may be changed during data packet
1216  	 * processing.
1217  	 */
1218  	if (!skb || xdp_tx)
1219  		return netvsc_send_pkt(device, packet, net_device, pb, skb);
1220  
1221  	/* batch packets in send buffer if possible */
1222  	msdp = &nvchan->msd;
1223  	if (msdp->pkt)
1224  		msd_len = msdp->pkt->total_data_buflen;
1225  
1226  	try_batch =  msd_len > 0 && msdp->count < net_device->max_pkt;
1227  	if (try_batch && msd_len + pktlen + net_device->pkt_align <
1228  	    net_device->send_section_size) {
1229  		section_index = msdp->pkt->send_buf_index;
1230  
1231  	} else if (try_batch && msd_len + packet->rmsg_size <
1232  		   net_device->send_section_size) {
1233  		section_index = msdp->pkt->send_buf_index;
1234  		packet->cp_partial = true;
1235  
1236  	} else if (pktlen + net_device->pkt_align <
1237  		   net_device->send_section_size) {
1238  		section_index = netvsc_get_next_send_section(net_device);
1239  		if (unlikely(section_index == NETVSC_INVALID_INDEX)) {
1240  			++ndev_ctx->eth_stats.tx_send_full;
1241  		} else {
1242  			move_pkt_msd(&msd_send, &msd_skb, msdp);
1243  			msd_len = 0;
1244  		}
1245  	}
1246  
1247  	/* Keep aggregating only if stack says more data is coming
1248  	 * and not doing mixed modes send and not flow blocked
1249  	 */
1250  	xmit_more = netdev_xmit_more() &&
1251  		!packet->cp_partial &&
1252  		!netif_xmit_stopped(netdev_get_tx_queue(ndev, packet->q_idx));
1253  
1254  	if (section_index != NETVSC_INVALID_INDEX) {
1255  		netvsc_copy_to_send_buf(net_device,
1256  					section_index, msd_len,
1257  					packet, rndis_msg, pb, xmit_more);
1258  
1259  		packet->send_buf_index = section_index;
1260  
1261  		if (packet->cp_partial) {
1262  			packet->page_buf_cnt -= packet->rmsg_pgcnt;
1263  			packet->total_data_buflen = msd_len + packet->rmsg_size;
1264  		} else {
1265  			packet->page_buf_cnt = 0;
1266  			packet->total_data_buflen += msd_len;
1267  		}
1268  
1269  		if (msdp->pkt) {
1270  			packet->total_packets += msdp->pkt->total_packets;
1271  			packet->total_bytes += msdp->pkt->total_bytes;
1272  		}
1273  
1274  		if (msdp->skb)
1275  			dev_consume_skb_any(msdp->skb);
1276  
1277  		if (xmit_more) {
1278  			msdp->skb = skb;
1279  			msdp->pkt = packet;
1280  			msdp->count++;
1281  		} else {
1282  			cur_send = packet;
1283  			msdp->skb = NULL;
1284  			msdp->pkt = NULL;
1285  			msdp->count = 0;
1286  		}
1287  	} else {
1288  		move_pkt_msd(&msd_send, &msd_skb, msdp);
1289  		cur_send = packet;
1290  	}
1291  
1292  	if (msd_send) {
1293  		int m_ret = netvsc_send_pkt(device, msd_send, net_device,
1294  					    NULL, msd_skb);
1295  
1296  		if (m_ret != 0) {
1297  			netvsc_free_send_slot(net_device,
1298  					      msd_send->send_buf_index);
1299  			dev_kfree_skb_any(msd_skb);
1300  		}
1301  	}
1302  
1303  	if (cur_send)
1304  		ret = netvsc_send_pkt(device, cur_send, net_device, pb, skb);
1305  
1306  	if (ret != 0 && section_index != NETVSC_INVALID_INDEX)
1307  		netvsc_free_send_slot(net_device, section_index);
1308  
1309  	return ret;
1310  }
1311  
1312  /* Send pending recv completions */
send_recv_completions(struct net_device * ndev,struct netvsc_device * nvdev,struct netvsc_channel * nvchan)1313  static int send_recv_completions(struct net_device *ndev,
1314  				 struct netvsc_device *nvdev,
1315  				 struct netvsc_channel *nvchan)
1316  {
1317  	struct multi_recv_comp *mrc = &nvchan->mrc;
1318  	struct recv_comp_msg {
1319  		struct nvsp_message_header hdr;
1320  		u32 status;
1321  	}  __packed;
1322  	struct recv_comp_msg msg = {
1323  		.hdr.msg_type = NVSP_MSG1_TYPE_SEND_RNDIS_PKT_COMPLETE,
1324  	};
1325  	int ret;
1326  
1327  	while (mrc->first != mrc->next) {
1328  		const struct recv_comp_data *rcd
1329  			= mrc->slots + mrc->first;
1330  
1331  		msg.status = rcd->status;
1332  		ret = vmbus_sendpacket(nvchan->channel, &msg, sizeof(msg),
1333  				       rcd->tid, VM_PKT_COMP, 0);
1334  		if (unlikely(ret)) {
1335  			struct net_device_context *ndev_ctx = netdev_priv(ndev);
1336  
1337  			++ndev_ctx->eth_stats.rx_comp_busy;
1338  			return ret;
1339  		}
1340  
1341  		if (++mrc->first == nvdev->recv_completion_cnt)
1342  			mrc->first = 0;
1343  	}
1344  
1345  	/* receive completion ring has been emptied */
1346  	if (unlikely(nvdev->destroy))
1347  		wake_up(&nvdev->wait_drain);
1348  
1349  	return 0;
1350  }
1351  
1352  /* Count how many receive completions are outstanding */
recv_comp_slot_avail(const struct netvsc_device * nvdev,const struct multi_recv_comp * mrc,u32 * filled,u32 * avail)1353  static void recv_comp_slot_avail(const struct netvsc_device *nvdev,
1354  				 const struct multi_recv_comp *mrc,
1355  				 u32 *filled, u32 *avail)
1356  {
1357  	u32 count = nvdev->recv_completion_cnt;
1358  
1359  	if (mrc->next >= mrc->first)
1360  		*filled = mrc->next - mrc->first;
1361  	else
1362  		*filled = (count - mrc->first) + mrc->next;
1363  
1364  	*avail = count - *filled - 1;
1365  }
1366  
1367  /* Add receive complete to ring to send to host. */
enq_receive_complete(struct net_device * ndev,struct netvsc_device * nvdev,u16 q_idx,u64 tid,u32 status)1368  static void enq_receive_complete(struct net_device *ndev,
1369  				 struct netvsc_device *nvdev, u16 q_idx,
1370  				 u64 tid, u32 status)
1371  {
1372  	struct netvsc_channel *nvchan = &nvdev->chan_table[q_idx];
1373  	struct multi_recv_comp *mrc = &nvchan->mrc;
1374  	struct recv_comp_data *rcd;
1375  	u32 filled, avail;
1376  
1377  	recv_comp_slot_avail(nvdev, mrc, &filled, &avail);
1378  
1379  	if (unlikely(filled > NAPI_POLL_WEIGHT)) {
1380  		send_recv_completions(ndev, nvdev, nvchan);
1381  		recv_comp_slot_avail(nvdev, mrc, &filled, &avail);
1382  	}
1383  
1384  	if (unlikely(!avail)) {
1385  		netdev_err(ndev, "Recv_comp full buf q:%hd, tid:%llx\n",
1386  			   q_idx, tid);
1387  		return;
1388  	}
1389  
1390  	rcd = mrc->slots + mrc->next;
1391  	rcd->tid = tid;
1392  	rcd->status = status;
1393  
1394  	if (++mrc->next == nvdev->recv_completion_cnt)
1395  		mrc->next = 0;
1396  }
1397  
netvsc_receive(struct net_device * ndev,struct netvsc_device * net_device,struct netvsc_channel * nvchan,const struct vmpacket_descriptor * desc)1398  static int netvsc_receive(struct net_device *ndev,
1399  			  struct netvsc_device *net_device,
1400  			  struct netvsc_channel *nvchan,
1401  			  const struct vmpacket_descriptor *desc)
1402  {
1403  	struct net_device_context *net_device_ctx = netdev_priv(ndev);
1404  	struct vmbus_channel *channel = nvchan->channel;
1405  	const struct vmtransfer_page_packet_header *vmxferpage_packet
1406  		= container_of(desc, const struct vmtransfer_page_packet_header, d);
1407  	const struct nvsp_message *nvsp = hv_pkt_data(desc);
1408  	u32 msglen = hv_pkt_datalen(desc);
1409  	u16 q_idx = channel->offermsg.offer.sub_channel_index;
1410  	char *recv_buf = net_device->recv_buf;
1411  	u32 status = NVSP_STAT_SUCCESS;
1412  	int i;
1413  	int count = 0;
1414  
1415  	/* Ensure packet is big enough to read header fields */
1416  	if (msglen < sizeof(struct nvsp_message_header)) {
1417  		netif_err(net_device_ctx, rx_err, ndev,
1418  			  "invalid nvsp header, length too small: %u\n",
1419  			  msglen);
1420  		return 0;
1421  	}
1422  
1423  	/* Make sure this is a valid nvsp packet */
1424  	if (unlikely(nvsp->hdr.msg_type != NVSP_MSG1_TYPE_SEND_RNDIS_PKT)) {
1425  		netif_err(net_device_ctx, rx_err, ndev,
1426  			  "Unknown nvsp packet type received %u\n",
1427  			  nvsp->hdr.msg_type);
1428  		return 0;
1429  	}
1430  
1431  	/* Validate xfer page pkt header */
1432  	if ((desc->offset8 << 3) < sizeof(struct vmtransfer_page_packet_header)) {
1433  		netif_err(net_device_ctx, rx_err, ndev,
1434  			  "Invalid xfer page pkt, offset too small: %u\n",
1435  			  desc->offset8 << 3);
1436  		return 0;
1437  	}
1438  
1439  	if (unlikely(vmxferpage_packet->xfer_pageset_id != NETVSC_RECEIVE_BUFFER_ID)) {
1440  		netif_err(net_device_ctx, rx_err, ndev,
1441  			  "Invalid xfer page set id - expecting %x got %x\n",
1442  			  NETVSC_RECEIVE_BUFFER_ID,
1443  			  vmxferpage_packet->xfer_pageset_id);
1444  		return 0;
1445  	}
1446  
1447  	count = vmxferpage_packet->range_cnt;
1448  
1449  	/* Check count for a valid value */
1450  	if (NETVSC_XFER_HEADER_SIZE(count) > desc->offset8 << 3) {
1451  		netif_err(net_device_ctx, rx_err, ndev,
1452  			  "Range count is not valid: %d\n",
1453  			  count);
1454  		return 0;
1455  	}
1456  
1457  	/* Each range represents 1 RNDIS pkt that contains 1 ethernet frame */
1458  	for (i = 0; i < count; i++) {
1459  		u32 offset = vmxferpage_packet->ranges[i].byte_offset;
1460  		u32 buflen = vmxferpage_packet->ranges[i].byte_count;
1461  		void *data;
1462  		int ret;
1463  
1464  		if (unlikely(offset > net_device->recv_buf_size ||
1465  			     buflen > net_device->recv_buf_size - offset)) {
1466  			nvchan->rsc.cnt = 0;
1467  			status = NVSP_STAT_FAIL;
1468  			netif_err(net_device_ctx, rx_err, ndev,
1469  				  "Packet offset:%u + len:%u too big\n",
1470  				  offset, buflen);
1471  
1472  			continue;
1473  		}
1474  
1475  		/* We're going to copy (sections of) the packet into nvchan->recv_buf;
1476  		 * make sure that nvchan->recv_buf is large enough to hold the packet.
1477  		 */
1478  		if (unlikely(buflen > net_device->recv_section_size)) {
1479  			nvchan->rsc.cnt = 0;
1480  			status = NVSP_STAT_FAIL;
1481  			netif_err(net_device_ctx, rx_err, ndev,
1482  				  "Packet too big: buflen=%u recv_section_size=%u\n",
1483  				  buflen, net_device->recv_section_size);
1484  
1485  			continue;
1486  		}
1487  
1488  		data = recv_buf + offset;
1489  
1490  		nvchan->rsc.is_last = (i == count - 1);
1491  
1492  		trace_rndis_recv(ndev, q_idx, data);
1493  
1494  		/* Pass it to the upper layer */
1495  		ret = rndis_filter_receive(ndev, net_device,
1496  					   nvchan, data, buflen);
1497  
1498  		if (unlikely(ret != NVSP_STAT_SUCCESS)) {
1499  			/* Drop incomplete packet */
1500  			nvchan->rsc.cnt = 0;
1501  			status = NVSP_STAT_FAIL;
1502  		}
1503  	}
1504  
1505  	enq_receive_complete(ndev, net_device, q_idx,
1506  			     vmxferpage_packet->d.trans_id, status);
1507  
1508  	return count;
1509  }
1510  
netvsc_send_table(struct net_device * ndev,struct netvsc_device * nvscdev,const struct nvsp_message * nvmsg,u32 msglen)1511  static void netvsc_send_table(struct net_device *ndev,
1512  			      struct netvsc_device *nvscdev,
1513  			      const struct nvsp_message *nvmsg,
1514  			      u32 msglen)
1515  {
1516  	struct net_device_context *net_device_ctx = netdev_priv(ndev);
1517  	u32 count, offset, *tab;
1518  	int i;
1519  
1520  	/* Ensure packet is big enough to read send_table fields */
1521  	if (msglen < sizeof(struct nvsp_message_header) +
1522  		     sizeof(struct nvsp_5_send_indirect_table)) {
1523  		netdev_err(ndev, "nvsp_v5_msg length too small: %u\n", msglen);
1524  		return;
1525  	}
1526  
1527  	count = nvmsg->msg.v5_msg.send_table.count;
1528  	offset = nvmsg->msg.v5_msg.send_table.offset;
1529  
1530  	if (count != VRSS_SEND_TAB_SIZE) {
1531  		netdev_err(ndev, "Received wrong send-table size:%u\n", count);
1532  		return;
1533  	}
1534  
1535  	/* If negotiated version <= NVSP_PROTOCOL_VERSION_6, the offset may be
1536  	 * wrong due to a host bug. So fix the offset here.
1537  	 */
1538  	if (nvscdev->nvsp_version <= NVSP_PROTOCOL_VERSION_6 &&
1539  	    msglen >= sizeof(struct nvsp_message_header) +
1540  	    sizeof(union nvsp_6_message_uber) + count * sizeof(u32))
1541  		offset = sizeof(struct nvsp_message_header) +
1542  			 sizeof(union nvsp_6_message_uber);
1543  
1544  	/* Boundary check for all versions */
1545  	if (msglen < count * sizeof(u32) || offset > msglen - count * sizeof(u32)) {
1546  		netdev_err(ndev, "Received send-table offset too big:%u\n",
1547  			   offset);
1548  		return;
1549  	}
1550  
1551  	tab = (void *)nvmsg + offset;
1552  
1553  	for (i = 0; i < count; i++)
1554  		net_device_ctx->tx_table[i] = tab[i];
1555  }
1556  
netvsc_send_vf(struct net_device * ndev,const struct nvsp_message * nvmsg,u32 msglen)1557  static void netvsc_send_vf(struct net_device *ndev,
1558  			   const struct nvsp_message *nvmsg,
1559  			   u32 msglen)
1560  {
1561  	struct net_device_context *net_device_ctx = netdev_priv(ndev);
1562  
1563  	/* Ensure packet is big enough to read its fields */
1564  	if (msglen < sizeof(struct nvsp_message_header) +
1565  		     sizeof(struct nvsp_4_send_vf_association)) {
1566  		netdev_err(ndev, "nvsp_v4_msg length too small: %u\n", msglen);
1567  		return;
1568  	}
1569  
1570  	net_device_ctx->vf_alloc = nvmsg->msg.v4_msg.vf_assoc.allocated;
1571  	net_device_ctx->vf_serial = nvmsg->msg.v4_msg.vf_assoc.serial;
1572  
1573  	if (net_device_ctx->vf_alloc)
1574  		complete(&net_device_ctx->vf_add);
1575  
1576  	netdev_info(ndev, "VF slot %u %s\n",
1577  		    net_device_ctx->vf_serial,
1578  		    net_device_ctx->vf_alloc ? "added" : "removed");
1579  }
1580  
netvsc_receive_inband(struct net_device * ndev,struct netvsc_device * nvscdev,const struct vmpacket_descriptor * desc)1581  static void netvsc_receive_inband(struct net_device *ndev,
1582  				  struct netvsc_device *nvscdev,
1583  				  const struct vmpacket_descriptor *desc)
1584  {
1585  	const struct nvsp_message *nvmsg = hv_pkt_data(desc);
1586  	u32 msglen = hv_pkt_datalen(desc);
1587  
1588  	/* Ensure packet is big enough to read header fields */
1589  	if (msglen < sizeof(struct nvsp_message_header)) {
1590  		netdev_err(ndev, "inband nvsp_message length too small: %u\n", msglen);
1591  		return;
1592  	}
1593  
1594  	switch (nvmsg->hdr.msg_type) {
1595  	case NVSP_MSG5_TYPE_SEND_INDIRECTION_TABLE:
1596  		netvsc_send_table(ndev, nvscdev, nvmsg, msglen);
1597  		break;
1598  
1599  	case NVSP_MSG4_TYPE_SEND_VF_ASSOCIATION:
1600  		if (hv_is_isolation_supported())
1601  			netdev_err(ndev, "Ignore VF_ASSOCIATION msg from the host supporting isolation\n");
1602  		else
1603  			netvsc_send_vf(ndev, nvmsg, msglen);
1604  		break;
1605  	}
1606  }
1607  
netvsc_process_raw_pkt(struct hv_device * device,struct netvsc_channel * nvchan,struct netvsc_device * net_device,struct net_device * ndev,const struct vmpacket_descriptor * desc,int budget)1608  static int netvsc_process_raw_pkt(struct hv_device *device,
1609  				  struct netvsc_channel *nvchan,
1610  				  struct netvsc_device *net_device,
1611  				  struct net_device *ndev,
1612  				  const struct vmpacket_descriptor *desc,
1613  				  int budget)
1614  {
1615  	struct vmbus_channel *channel = nvchan->channel;
1616  	const struct nvsp_message *nvmsg = hv_pkt_data(desc);
1617  
1618  	trace_nvsp_recv(ndev, channel, nvmsg);
1619  
1620  	switch (desc->type) {
1621  	case VM_PKT_COMP:
1622  		netvsc_send_completion(ndev, net_device, channel, desc, budget);
1623  		break;
1624  
1625  	case VM_PKT_DATA_USING_XFER_PAGES:
1626  		return netvsc_receive(ndev, net_device, nvchan, desc);
1627  
1628  	case VM_PKT_DATA_INBAND:
1629  		netvsc_receive_inband(ndev, net_device, desc);
1630  		break;
1631  
1632  	default:
1633  		netdev_err(ndev, "unhandled packet type %d, tid %llx\n",
1634  			   desc->type, desc->trans_id);
1635  		break;
1636  	}
1637  
1638  	return 0;
1639  }
1640  
netvsc_channel_to_device(struct vmbus_channel * channel)1641  static struct hv_device *netvsc_channel_to_device(struct vmbus_channel *channel)
1642  {
1643  	struct vmbus_channel *primary = channel->primary_channel;
1644  
1645  	return primary ? primary->device_obj : channel->device_obj;
1646  }
1647  
1648  /* Network processing softirq
1649   * Process data in incoming ring buffer from host
1650   * Stops when ring is empty or budget is met or exceeded.
1651   */
netvsc_poll(struct napi_struct * napi,int budget)1652  int netvsc_poll(struct napi_struct *napi, int budget)
1653  {
1654  	struct netvsc_channel *nvchan
1655  		= container_of(napi, struct netvsc_channel, napi);
1656  	struct netvsc_device *net_device = nvchan->net_device;
1657  	struct vmbus_channel *channel = nvchan->channel;
1658  	struct hv_device *device = netvsc_channel_to_device(channel);
1659  	struct net_device *ndev = hv_get_drvdata(device);
1660  	int work_done = 0;
1661  	int ret;
1662  
1663  	/* If starting a new interval */
1664  	if (!nvchan->desc)
1665  		nvchan->desc = hv_pkt_iter_first(channel);
1666  
1667  	nvchan->xdp_flush = false;
1668  
1669  	while (nvchan->desc && work_done < budget) {
1670  		work_done += netvsc_process_raw_pkt(device, nvchan, net_device,
1671  						    ndev, nvchan->desc, budget);
1672  		nvchan->desc = hv_pkt_iter_next(channel, nvchan->desc);
1673  	}
1674  
1675  	if (nvchan->xdp_flush)
1676  		xdp_do_flush();
1677  
1678  	/* Send any pending receive completions */
1679  	ret = send_recv_completions(ndev, net_device, nvchan);
1680  
1681  	/* If it did not exhaust NAPI budget this time
1682  	 *  and not doing busy poll
1683  	 * then re-enable host interrupts
1684  	 *  and reschedule if ring is not empty
1685  	 *   or sending receive completion failed.
1686  	 */
1687  	if (work_done < budget &&
1688  	    napi_complete_done(napi, work_done) &&
1689  	    (ret || hv_end_read(&channel->inbound)) &&
1690  	    napi_schedule_prep(napi)) {
1691  		hv_begin_read(&channel->inbound);
1692  		__napi_schedule(napi);
1693  	}
1694  
1695  	/* Driver may overshoot since multiple packets per descriptor */
1696  	return min(work_done, budget);
1697  }
1698  
1699  /* Call back when data is available in host ring buffer.
1700   * Processing is deferred until network softirq (NAPI)
1701   */
netvsc_channel_cb(void * context)1702  void netvsc_channel_cb(void *context)
1703  {
1704  	struct netvsc_channel *nvchan = context;
1705  	struct vmbus_channel *channel = nvchan->channel;
1706  	struct hv_ring_buffer_info *rbi = &channel->inbound;
1707  
1708  	/* preload first vmpacket descriptor */
1709  	prefetch(hv_get_ring_buffer(rbi) + rbi->priv_read_index);
1710  
1711  	if (napi_schedule_prep(&nvchan->napi)) {
1712  		/* disable interrupts from host */
1713  		hv_begin_read(rbi);
1714  
1715  		__napi_schedule_irqoff(&nvchan->napi);
1716  	}
1717  }
1718  
1719  /*
1720   * netvsc_device_add - Callback when the device belonging to this
1721   * driver is added
1722   */
netvsc_device_add(struct hv_device * device,const struct netvsc_device_info * device_info)1723  struct netvsc_device *netvsc_device_add(struct hv_device *device,
1724  				const struct netvsc_device_info *device_info)
1725  {
1726  	int i, ret = 0;
1727  	struct netvsc_device *net_device;
1728  	struct net_device *ndev = hv_get_drvdata(device);
1729  	struct net_device_context *net_device_ctx = netdev_priv(ndev);
1730  
1731  	net_device = alloc_net_device();
1732  	if (!net_device)
1733  		return ERR_PTR(-ENOMEM);
1734  
1735  	for (i = 0; i < VRSS_SEND_TAB_SIZE; i++)
1736  		net_device_ctx->tx_table[i] = 0;
1737  
1738  	/* Because the device uses NAPI, all the interrupt batching and
1739  	 * control is done via Net softirq, not the channel handling
1740  	 */
1741  	set_channel_read_mode(device->channel, HV_CALL_ISR);
1742  
1743  	/* If we're reopening the device we may have multiple queues, fill the
1744  	 * chn_table with the default channel to use it before subchannels are
1745  	 * opened.
1746  	 * Initialize the channel state before we open;
1747  	 * we can be interrupted as soon as we open the channel.
1748  	 */
1749  
1750  	for (i = 0; i < VRSS_CHANNEL_MAX; i++) {
1751  		struct netvsc_channel *nvchan = &net_device->chan_table[i];
1752  
1753  		nvchan->channel = device->channel;
1754  		nvchan->net_device = net_device;
1755  		u64_stats_init(&nvchan->tx_stats.syncp);
1756  		u64_stats_init(&nvchan->rx_stats.syncp);
1757  
1758  		ret = xdp_rxq_info_reg(&nvchan->xdp_rxq, ndev, i, 0);
1759  
1760  		if (ret) {
1761  			netdev_err(ndev, "xdp_rxq_info_reg fail: %d\n", ret);
1762  			goto cleanup2;
1763  		}
1764  
1765  		ret = xdp_rxq_info_reg_mem_model(&nvchan->xdp_rxq,
1766  						 MEM_TYPE_PAGE_SHARED, NULL);
1767  
1768  		if (ret) {
1769  			netdev_err(ndev, "xdp reg_mem_model fail: %d\n", ret);
1770  			goto cleanup2;
1771  		}
1772  	}
1773  
1774  	/* Enable NAPI handler before init callbacks */
1775  	netif_napi_add(ndev, &net_device->chan_table[0].napi, netvsc_poll);
1776  
1777  	/* Open the channel */
1778  	device->channel->next_request_id_callback = vmbus_next_request_id;
1779  	device->channel->request_addr_callback = vmbus_request_addr;
1780  	device->channel->rqstor_size = netvsc_rqstor_size(netvsc_ring_bytes);
1781  	device->channel->max_pkt_size = NETVSC_MAX_PKT_SIZE;
1782  
1783  	ret = vmbus_open(device->channel, netvsc_ring_bytes,
1784  			 netvsc_ring_bytes,  NULL, 0,
1785  			 netvsc_channel_cb, net_device->chan_table);
1786  
1787  	if (ret != 0) {
1788  		netdev_err(ndev, "unable to open channel: %d\n", ret);
1789  		goto cleanup;
1790  	}
1791  
1792  	/* Channel is opened */
1793  	netdev_dbg(ndev, "hv_netvsc channel opened successfully\n");
1794  
1795  	napi_enable(&net_device->chan_table[0].napi);
1796  	netif_queue_set_napi(ndev, 0, NETDEV_QUEUE_TYPE_RX,
1797  			     &net_device->chan_table[0].napi);
1798  	netif_queue_set_napi(ndev, 0, NETDEV_QUEUE_TYPE_TX,
1799  			     &net_device->chan_table[0].napi);
1800  
1801  	/* Connect with the NetVsp */
1802  	ret = netvsc_connect_vsp(device, net_device, device_info);
1803  	if (ret != 0) {
1804  		netdev_err(ndev,
1805  			"unable to connect to NetVSP - %d\n", ret);
1806  		goto close;
1807  	}
1808  
1809  	/* Writing nvdev pointer unlocks netvsc_send(), make sure chn_table is
1810  	 * populated.
1811  	 */
1812  	rcu_assign_pointer(net_device_ctx->nvdev, net_device);
1813  
1814  	return net_device;
1815  
1816  close:
1817  	RCU_INIT_POINTER(net_device_ctx->nvdev, NULL);
1818  	netif_queue_set_napi(ndev, 0, NETDEV_QUEUE_TYPE_TX, NULL);
1819  	netif_queue_set_napi(ndev, 0, NETDEV_QUEUE_TYPE_RX, NULL);
1820  	napi_disable(&net_device->chan_table[0].napi);
1821  
1822  	/* Now, we can close the channel safely */
1823  	vmbus_close(device->channel);
1824  
1825  cleanup:
1826  	netif_napi_del(&net_device->chan_table[0].napi);
1827  
1828  cleanup2:
1829  	free_netvsc_device(&net_device->rcu);
1830  
1831  	return ERR_PTR(ret);
1832  }
1833