1 // Copyright (C) 2021 Red Hat, Inc. All rights reserved.
2 // SPDX-License-Identifier: Apache-2.0 or BSD-3-Clause
3 
4 //! Kernel-based vhost-vdpa backend.
5 
6 use std::fs::{File, OpenOptions};
7 use std::io::Error as IOError;
8 use std::os::raw::{c_uchar, c_uint};
9 use std::os::unix::fs::OpenOptionsExt;
10 use std::os::unix::io::{AsRawFd, RawFd};
11 
12 use vm_memory::GuestAddressSpace;
13 use vmm_sys_util::eventfd::EventFd;
14 use vmm_sys_util::fam::*;
15 use vmm_sys_util::ioctl::{ioctl, ioctl_with_mut_ref, ioctl_with_ptr, ioctl_with_ref};
16 
17 use super::vhost_binding::*;
18 use super::{ioctl_result, Error, Result, VhostKernBackend, VhostKernFeatures};
19 use crate::vdpa::*;
20 use crate::{VhostAccess, VhostIotlbBackend, VhostIotlbMsg, VhostIotlbType, VringConfigData};
21 
22 // Implement the FamStruct trait for vhost_vdpa_config
23 generate_fam_struct_impl!(
24     vhost_vdpa_config,
25     c_uchar,
26     buf,
27     c_uint,
28     len,
29     c_uint::MAX as usize
30 );
31 
32 type VhostVdpaConfig = FamStructWrapper<vhost_vdpa_config>;
33 
34 /// Handle for running VHOST_VDPA ioctls.
35 pub struct VhostKernVdpa<AS: GuestAddressSpace> {
36     fd: File,
37     mem: AS,
38     backend_features_acked: u64,
39 }
40 
41 impl<AS: GuestAddressSpace> VhostKernVdpa<AS> {
42     /// Open a handle to a new VHOST-VDPA instance.
new(path: &str, mem: AS) -> Result<Self>43     pub fn new(path: &str, mem: AS) -> Result<Self> {
44         Ok(VhostKernVdpa {
45             fd: OpenOptions::new()
46                 .read(true)
47                 .write(true)
48                 .custom_flags(libc::O_CLOEXEC | libc::O_NONBLOCK)
49                 .open(path)
50                 .map_err(Error::VhostOpen)?,
51             mem,
52             backend_features_acked: 0,
53         })
54     }
55 
56     /// Create a `VhostKernVdpa` object with given content.
with(fd: File, mem: AS, backend_features_acked: u64) -> Self57     pub fn with(fd: File, mem: AS, backend_features_acked: u64) -> Self {
58         VhostKernVdpa {
59             fd,
60             mem,
61             backend_features_acked,
62         }
63     }
64 
65     /// Set the addresses for a given vring.
66     ///
67     /// # Arguments
68     /// * `queue_index` - Index of the queue to set addresses for.
69     /// * `config_data` - Vring config data, addresses of desc_table, avail_ring
70     ///     and used_ring are in the guest address space.
set_vring_addr(&self, queue_index: usize, config_data: &VringConfigData) -> Result<()>71     pub fn set_vring_addr(&self, queue_index: usize, config_data: &VringConfigData) -> Result<()> {
72         if !self.is_valid(config_data) {
73             return Err(Error::InvalidQueue);
74         }
75 
76         // vDPA backends expect IOVA (that can be mapped 1:1 with
77         // GPA when no IOMMU is involved).
78         let vring_addr = vhost_vring_addr {
79             index: queue_index as u32,
80             flags: config_data.flags,
81             desc_user_addr: config_data.desc_table_addr,
82             used_user_addr: config_data.used_ring_addr,
83             avail_user_addr: config_data.avail_ring_addr,
84             log_guest_addr: config_data.get_log_addr(),
85         };
86 
87         // SAFETY: This ioctl is called on a valid vhost-vdpa fd and has its
88         // return value checked.
89         let ret = unsafe { ioctl_with_ref(self, VHOST_SET_VRING_ADDR(), &vring_addr) };
90         ioctl_result(ret, ())
91     }
92 }
93 
94 impl<AS: GuestAddressSpace> VhostVdpa for VhostKernVdpa<AS> {
get_device_id(&self) -> Result<u32>95     fn get_device_id(&self) -> Result<u32> {
96         let mut device_id: u32 = 0;
97 
98         // SAFETY: This ioctl is called on a valid vhost-vdpa fd and has its
99         // return value checked.
100         let ret = unsafe { ioctl_with_mut_ref(self, VHOST_VDPA_GET_DEVICE_ID(), &mut device_id) };
101         ioctl_result(ret, device_id)
102     }
103 
get_status(&self) -> Result<u8>104     fn get_status(&self) -> Result<u8> {
105         let mut status: u8 = 0;
106 
107         // SAFETY: This ioctl is called on a valid vhost-vdpa fd and has its
108         // return value checked.
109         let ret = unsafe { ioctl_with_mut_ref(self, VHOST_VDPA_GET_STATUS(), &mut status) };
110         ioctl_result(ret, status)
111     }
112 
set_status(&self, status: u8) -> Result<()>113     fn set_status(&self, status: u8) -> Result<()> {
114         // SAFETY: This ioctl is called on a valid vhost-vdpa fd and has its
115         // return value checked.
116         let ret = unsafe { ioctl_with_ref(self, VHOST_VDPA_SET_STATUS(), &status) };
117         ioctl_result(ret, ())
118     }
119 
get_config(&self, offset: u32, buffer: &mut [u8]) -> Result<()>120     fn get_config(&self, offset: u32, buffer: &mut [u8]) -> Result<()> {
121         let mut config = VhostVdpaConfig::new(buffer.len())
122             .map_err(|_| Error::IoctlError(IOError::from_raw_os_error(libc::ENOMEM)))?;
123 
124         config.as_mut_fam_struct().off = offset;
125 
126         // SAFETY: This ioctl is called on a valid vhost-vdpa fd and has its
127         // return value checked.
128         let ret = unsafe {
129             ioctl_with_ptr(
130                 self,
131                 VHOST_VDPA_GET_CONFIG(),
132                 config.as_mut_fam_struct_ptr(),
133             )
134         };
135 
136         buffer.copy_from_slice(config.as_slice());
137 
138         ioctl_result(ret, ())
139     }
140 
set_config(&self, offset: u32, buffer: &[u8]) -> Result<()>141     fn set_config(&self, offset: u32, buffer: &[u8]) -> Result<()> {
142         let mut config = VhostVdpaConfig::new(buffer.len())
143             .map_err(|_| Error::IoctlError(IOError::from_raw_os_error(libc::ENOMEM)))?;
144 
145         config.as_mut_fam_struct().off = offset;
146         config.as_mut_slice().copy_from_slice(buffer);
147 
148         let ret =
149             // SAFETY: This ioctl is called on a valid vhost-vdpa fd and has its
150             // return value checked.
151             unsafe { ioctl_with_ptr(self, VHOST_VDPA_SET_CONFIG(), config.as_fam_struct_ptr()) };
152         ioctl_result(ret, ())
153     }
154 
set_vring_enable(&self, queue_index: usize, enabled: bool) -> Result<()>155     fn set_vring_enable(&self, queue_index: usize, enabled: bool) -> Result<()> {
156         let vring_state = vhost_vring_state {
157             index: queue_index as u32,
158             num: enabled as u32,
159         };
160 
161         // SAFETY: This ioctl is called on a valid vhost-vdpa fd and has its
162         // return value checked.
163         let ret = unsafe { ioctl_with_ref(self, VHOST_VDPA_SET_VRING_ENABLE(), &vring_state) };
164         ioctl_result(ret, ())
165     }
166 
get_vring_num(&self) -> Result<u16>167     fn get_vring_num(&self) -> Result<u16> {
168         let mut vring_num: u16 = 0;
169 
170         // SAFETY: This ioctl is called on a valid vhost-vdpa fd and has its
171         // return value checked.
172         let ret = unsafe { ioctl_with_mut_ref(self, VHOST_VDPA_GET_VRING_NUM(), &mut vring_num) };
173         ioctl_result(ret, vring_num)
174     }
175 
set_config_call(&self, fd: &EventFd) -> Result<()>176     fn set_config_call(&self, fd: &EventFd) -> Result<()> {
177         let event_fd: ::std::os::raw::c_int = fd.as_raw_fd();
178 
179         // SAFETY: This ioctl is called on a valid vhost-vdpa fd and has its
180         // return value checked.
181         let ret = unsafe { ioctl_with_ref(self, VHOST_VDPA_SET_CONFIG_CALL(), &event_fd) };
182         ioctl_result(ret, ())
183     }
184 
get_iova_range(&self) -> Result<VhostVdpaIovaRange>185     fn get_iova_range(&self) -> Result<VhostVdpaIovaRange> {
186         let mut low_iova_range = vhost_vdpa_iova_range { first: 0, last: 0 };
187 
188         let ret =
189             // SAFETY: This ioctl is called on a valid vhost-vdpa fd and has its
190             // return value checked.
191             unsafe { ioctl_with_mut_ref(self, VHOST_VDPA_GET_IOVA_RANGE(), &mut low_iova_range) };
192 
193         let iova_range = VhostVdpaIovaRange {
194             first: low_iova_range.first,
195             last: low_iova_range.last,
196         };
197 
198         ioctl_result(ret, iova_range)
199     }
200 
get_config_size(&self) -> Result<u32>201     fn get_config_size(&self) -> Result<u32> {
202         let mut config_size: u32 = 0;
203 
204         let ret =
205             // SAFETY: This ioctl is called on a valid vhost-vdpa fd and has its
206             // return value checked.
207             unsafe { ioctl_with_mut_ref(self, VHOST_VDPA_GET_CONFIG_SIZE(), &mut config_size) };
208         ioctl_result(ret, config_size)
209     }
210 
get_vqs_count(&self) -> Result<u32>211     fn get_vqs_count(&self) -> Result<u32> {
212         let mut vqs_count: u32 = 0;
213 
214         // SAFETY: This ioctl is called on a valid vhost-vdpa fd and has its
215         // return value checked.
216         let ret = unsafe { ioctl_with_mut_ref(self, VHOST_VDPA_GET_VQS_COUNT(), &mut vqs_count) };
217         ioctl_result(ret, vqs_count)
218     }
219 
get_group_num(&self) -> Result<u32>220     fn get_group_num(&self) -> Result<u32> {
221         let mut group_num: u32 = 0;
222 
223         // SAFETY: This ioctl is called on a valid vhost-vdpa fd and has its
224         // return value checked.
225         let ret = unsafe { ioctl_with_mut_ref(self, VHOST_VDPA_GET_GROUP_NUM(), &mut group_num) };
226         ioctl_result(ret, group_num)
227     }
228 
get_as_num(&self) -> Result<u32>229     fn get_as_num(&self) -> Result<u32> {
230         let mut as_num: u32 = 0;
231 
232         // SAFETY: This ioctl is called on a valid vhost-vdpa fd and has its
233         // return value checked.
234         let ret = unsafe { ioctl_with_mut_ref(self, VHOST_VDPA_GET_AS_NUM(), &mut as_num) };
235         ioctl_result(ret, as_num)
236     }
237 
get_vring_group(&self, queue_index: u32) -> Result<u32>238     fn get_vring_group(&self, queue_index: u32) -> Result<u32> {
239         let mut vring_state = vhost_vring_state {
240             index: queue_index,
241             ..Default::default()
242         };
243 
244         let ret =
245             // SAFETY: This ioctl is called on a valid vhost-vdpa fd and has its
246             // return value checked.
247             unsafe { ioctl_with_mut_ref(self, VHOST_VDPA_GET_VRING_GROUP(), &mut vring_state) };
248         ioctl_result(ret, vring_state.num)
249     }
250 
set_group_asid(&self, group_index: u32, asid: u32) -> Result<()>251     fn set_group_asid(&self, group_index: u32, asid: u32) -> Result<()> {
252         let vring_state = vhost_vring_state {
253             index: group_index,
254             num: asid,
255         };
256 
257         // SAFETY: This ioctl is called on a valid vhost-vdpa fd and has its
258         // return value checked.
259         let ret = unsafe { ioctl_with_ref(self, VHOST_VDPA_GET_VRING_GROUP(), &vring_state) };
260         ioctl_result(ret, ())
261     }
262 
suspend(&self) -> Result<()>263     fn suspend(&self) -> Result<()> {
264         // SAFETY: This ioctl is called on a valid vhost-vdpa fd and has its
265         // return value checked.
266         let ret = unsafe { ioctl(self, VHOST_VDPA_SUSPEND()) };
267         ioctl_result(ret, ())
268     }
269 
dma_map(&self, iova: u64, size: u64, vaddr: *const u8, readonly: bool) -> Result<()>270     fn dma_map(&self, iova: u64, size: u64, vaddr: *const u8, readonly: bool) -> Result<()> {
271         let iotlb = VhostIotlbMsg {
272             iova,
273             size,
274             userspace_addr: vaddr as u64,
275             perm: match readonly {
276                 true => VhostAccess::ReadOnly,
277                 false => VhostAccess::ReadWrite,
278             },
279             msg_type: VhostIotlbType::Update,
280         };
281 
282         self.send_iotlb_msg(&iotlb)
283     }
284 
dma_unmap(&self, iova: u64, size: u64) -> Result<()>285     fn dma_unmap(&self, iova: u64, size: u64) -> Result<()> {
286         let iotlb = VhostIotlbMsg {
287             iova,
288             size,
289             msg_type: VhostIotlbType::Invalidate,
290             ..Default::default()
291         };
292 
293         self.send_iotlb_msg(&iotlb)
294     }
295 }
296 
297 impl<AS: GuestAddressSpace> VhostKernBackend for VhostKernVdpa<AS> {
298     type AS = AS;
299 
mem(&self) -> &Self::AS300     fn mem(&self) -> &Self::AS {
301         &self.mem
302     }
303 
304     /// Check whether the ring configuration is valid.
is_valid(&self, config_data: &VringConfigData) -> bool305     fn is_valid(&self, config_data: &VringConfigData) -> bool {
306         let queue_size = config_data.queue_size;
307         if queue_size > config_data.queue_max_size
308             || queue_size == 0
309             || (queue_size & (queue_size - 1)) != 0
310         {
311             return false;
312         }
313 
314         // Since vDPA could be dealing with IOVAs corresponding to GVAs, it
315         // wouldn't make sense to go through the validation of the descriptor
316         // table address, available ring address and used ring address against
317         // the guest memory representation we have access to.
318 
319         config_data.is_log_addr_valid()
320     }
321 }
322 
323 impl<AS: GuestAddressSpace> AsRawFd for VhostKernVdpa<AS> {
as_raw_fd(&self) -> RawFd324     fn as_raw_fd(&self) -> RawFd {
325         self.fd.as_raw_fd()
326     }
327 }
328 
329 impl<AS: GuestAddressSpace> VhostKernFeatures for VhostKernVdpa<AS> {
get_backend_features_acked(&self) -> u64330     fn get_backend_features_acked(&self) -> u64 {
331         self.backend_features_acked
332     }
333 
set_backend_features_acked(&mut self, features: u64)334     fn set_backend_features_acked(&mut self, features: u64) {
335         self.backend_features_acked = features;
336     }
337 }
338 
339 #[cfg(test)]
340 mod tests {
341     const VHOST_VDPA_PATH: &str = "/dev/vhost-vdpa-0";
342 
343     use std::alloc::{alloc, dealloc, Layout};
344     use vm_memory::{GuestAddress, GuestMemory, GuestMemoryMmap};
345     use vmm_sys_util::eventfd::EventFd;
346 
347     use super::*;
348     use crate::{
349         VhostBackend, VhostUserDirtyLogRegion, VhostUserMemoryRegionInfo, VringConfigData,
350     };
351     use serial_test::serial;
352     use std::io::ErrorKind;
353 
354     /// macro to skip test if vhost-vdpa device path is not found.
355     ///
356     /// vDPA simulators are available since Linux 5.7, but the CI may have
357     /// an older kernel, so for now we skip the test if we don't find
358     /// the device.
359     macro_rules! unwrap_not_found {
360         ( $e:expr ) => {
361             match $e {
362                 Ok(v) => v,
363                 Err(error) => match error {
364                     Error::VhostOpen(ref e) if e.kind() == ErrorKind::NotFound => {
365                         println!("Err: {:?} SKIPPED", e);
366                         return;
367                     }
368                     e => panic!("Err: {:?}", e),
369                 },
370             }
371         };
372     }
373 
374     macro_rules! validate_ioctl {
375         ( $e:expr, $ref_value:expr ) => {
376             match $e {
377                 Ok(v) => assert_eq!(v, $ref_value),
378                 Err(error) => match error {
379                     Error::IoctlError(e) if e.raw_os_error().unwrap() == libc::ENOTTY => {
380                         println!("Err: {:?} SKIPPED", e);
381                     }
382                     e => panic!("Err: {:?}", e),
383                 },
384             }
385         };
386     }
387 
388     #[test]
389     #[serial]
test_vdpa_kern_new_device()390     fn test_vdpa_kern_new_device() {
391         let m = GuestMemoryMmap::<()>::from_ranges(&[(GuestAddress(0), 0x10_0000)]).unwrap();
392         let vdpa = unwrap_not_found!(VhostKernVdpa::new(VHOST_VDPA_PATH, &m));
393 
394         assert!(vdpa.as_raw_fd() >= 0);
395         assert!(vdpa.mem().find_region(GuestAddress(0x100)).is_some());
396         assert!(vdpa.mem().find_region(GuestAddress(0x10_0000)).is_none());
397     }
398 
399     #[test]
400     #[serial]
test_vdpa_kern_is_valid()401     fn test_vdpa_kern_is_valid() {
402         let m = GuestMemoryMmap::<()>::from_ranges(&[(GuestAddress(0), 0x10_0000)]).unwrap();
403         let vdpa = unwrap_not_found!(VhostKernVdpa::new(VHOST_VDPA_PATH, &m));
404 
405         let mut config = VringConfigData {
406             queue_max_size: 32,
407             queue_size: 32,
408             flags: 0,
409             desc_table_addr: 0x1000,
410             used_ring_addr: 0x2000,
411             avail_ring_addr: 0x3000,
412             log_addr: None,
413         };
414         assert!(vdpa.is_valid(&config));
415 
416         config.queue_size = 0;
417         assert!(!vdpa.is_valid(&config));
418         config.queue_size = 31;
419         assert!(!vdpa.is_valid(&config));
420         config.queue_size = 33;
421         assert!(!vdpa.is_valid(&config));
422     }
423 
424     #[test]
425     #[serial]
test_vdpa_kern_ioctls()426     fn test_vdpa_kern_ioctls() {
427         let m = GuestMemoryMmap::<()>::from_ranges(&[(GuestAddress(0), 0x10_0000)]).unwrap();
428         let vdpa = unwrap_not_found!(VhostKernVdpa::new(VHOST_VDPA_PATH, &m));
429 
430         let features = vdpa.get_features().unwrap();
431         // VIRTIO_F_VERSION_1 (bit 32) should be set
432         assert_ne!(features & (1 << 32), 0);
433         vdpa.set_features(features).unwrap();
434 
435         vdpa.set_owner().unwrap();
436 
437         vdpa.set_mem_table(&[]).unwrap_err();
438 
439         let region = VhostUserMemoryRegionInfo::new(
440             0x0,
441             0x10_0000,
442             m.get_host_address(GuestAddress(0x0)).unwrap() as u64,
443             0,
444             -1,
445         );
446         vdpa.set_mem_table(&[region]).unwrap();
447 
448         let device_id = vdpa.get_device_id().unwrap();
449         assert!(device_id > 0);
450 
451         assert_eq!(vdpa.get_status().unwrap(), 0x0);
452         vdpa.set_status(0x1).unwrap();
453         assert_eq!(vdpa.get_status().unwrap(), 0x1);
454 
455         let mut vec = vec![0u8; 8];
456         vdpa.get_config(0, &mut vec).unwrap();
457         vdpa.set_config(0, &vec).unwrap();
458 
459         let eventfd = EventFd::new(0).unwrap();
460 
461         // set_log_base() and set_log_fd() are not supported by vhost-vdpa
462         vdpa.set_log_base(
463             0x4000,
464             Some(VhostUserDirtyLogRegion {
465                 mmap_size: 0x1000,
466                 mmap_offset: 0x10,
467                 mmap_handle: 1,
468             }),
469         )
470         .unwrap_err();
471         vdpa.set_log_base(0x4000, None).unwrap_err();
472         vdpa.set_log_fd(eventfd.as_raw_fd()).unwrap_err();
473 
474         let max_queues = vdpa.get_vring_num().unwrap();
475         vdpa.set_vring_num(0, max_queues + 1).unwrap_err();
476 
477         vdpa.set_vring_num(0, 32).unwrap();
478 
479         let config = VringConfigData {
480             queue_max_size: 32,
481             queue_size: 32,
482             flags: 0,
483             desc_table_addr: 0x1000,
484             used_ring_addr: 0x2000,
485             avail_ring_addr: 0x3000,
486             log_addr: None,
487         };
488         vdpa.set_vring_addr(0, &config).unwrap();
489         vdpa.set_vring_base(0, 1).unwrap();
490         vdpa.set_vring_call(0, &eventfd).unwrap();
491         vdpa.set_vring_kick(0, &eventfd).unwrap();
492         vdpa.set_vring_err(0, &eventfd).unwrap();
493 
494         vdpa.set_config_call(&eventfd).unwrap();
495 
496         let iova_range = vdpa.get_iova_range().unwrap();
497         // vDPA-block simulator returns [0, u64::MAX] range
498         assert_eq!(iova_range.first, 0);
499         assert_eq!(iova_range.last, u64::MAX);
500 
501         let (config_size, vqs_count, group_num, as_num, vring_group) = if device_id == 1 {
502             (24, 3, 2, 2, 0)
503         } else if device_id == 2 {
504             (60, 1, 1, 1, 0)
505         } else {
506             panic!("Unexpected device id {}", device_id)
507         };
508 
509         validate_ioctl!(vdpa.get_config_size(), config_size);
510         validate_ioctl!(vdpa.get_vqs_count(), vqs_count);
511         validate_ioctl!(vdpa.get_group_num(), group_num);
512         validate_ioctl!(vdpa.get_as_num(), as_num);
513         validate_ioctl!(vdpa.get_vring_group(0), vring_group);
514         validate_ioctl!(vdpa.set_group_asid(0, 12345), ());
515 
516         if vdpa.get_backend_features().unwrap() & (1 << VHOST_BACKEND_F_SUSPEND)
517             == (1 << VHOST_BACKEND_F_SUSPEND)
518         {
519             validate_ioctl!(vdpa.suspend(), ());
520         }
521 
522         assert_eq!(vdpa.get_vring_base(0).unwrap(), 1);
523 
524         vdpa.set_vring_enable(0, true).unwrap();
525         vdpa.set_vring_enable(0, false).unwrap();
526     }
527 
528     #[test]
529     #[serial]
test_vdpa_kern_dma()530     fn test_vdpa_kern_dma() {
531         let m = GuestMemoryMmap::<()>::from_ranges(&[(GuestAddress(0), 0x10_0000)]).unwrap();
532         let mut vdpa = unwrap_not_found!(VhostKernVdpa::new(VHOST_VDPA_PATH, &m));
533 
534         let features = vdpa.get_features().unwrap();
535         // VIRTIO_F_VERSION_1 (bit 32) should be set
536         assert_ne!(features & (1 << 32), 0);
537         vdpa.set_features(features).unwrap();
538 
539         let backend_features = vdpa.get_backend_features().unwrap();
540         assert_ne!(backend_features & (1 << VHOST_BACKEND_F_IOTLB_MSG_V2), 0);
541         vdpa.set_backend_features(backend_features).unwrap();
542 
543         vdpa.set_owner().unwrap();
544 
545         vdpa.dma_map(0xFFFF_0000, 0xFFFF, std::ptr::null::<u8>(), false)
546             .unwrap_err();
547 
548         let layout = Layout::from_size_align(0xFFFF, 1).unwrap();
549 
550         // SAFETY: Safe because layout has non-zero size.
551         let ptr = unsafe { alloc(layout) };
552 
553         vdpa.dma_map(0xFFFF_0000, 0xFFFF, ptr, false).unwrap();
554         vdpa.dma_unmap(0xFFFF_0000, 0xFFFF).unwrap();
555 
556         // SAFETY: Safe because `ptr` is allocated with the same allocator
557         // using the same `layout`.
558         unsafe { dealloc(ptr, layout) };
559     }
560 }
561