Lines Matching +full:write +full:- +full:to +full:- +full:write

1 // SPDX-License-Identifier: GPL-2.0
15 #include <linux/blk-mq.h>
21 #include "blk-mq-sched.h"
22 #include "blk-mq-debugfs.h"
38 * Per-zone write plug.
40 * @ref: Zone write plug reference counter. A zone write plug reference is
43 * submitted and when a function needs to manipulate a plug. The
46 * reference is dropped whenever the zone of the zone write plug is reset,
47 * finished and when the zone becomes full (last write BIO to the zone
49 * @lock: Spinlock to atomically manipulate the plug.
52 * @wp_offset: The zone write pointer location relative to the start of the zone
55 * @bio_work: Work struct to handle issuing of plugged BIOs
56 * @rcu_head: RCU head to free zone write plugs with an RCU grace period.
57 * @disk: The gendisk the plug belongs to.
73 * Zone write plug flags bits:
74 * - BLK_ZONE_WPLUG_PLUGGED: Indicates that the zone write plug is plugged,
75 * that is, that write BIOs are being throttled due to a write BIO already
76 * being executed or the zone write plug bio list is not empty.
77 * - BLK_ZONE_WPLUG_NEED_WP_UPDATE: Indicates that we lost track of a zone
78 * write pointer offset and need to update it.
79 * - BLK_ZONE_WPLUG_UNHASHED: Indicates that the zone write plug was removed
80 * from the disk hash table and that the initial reference to the zone
81 * write plug set when the plug was first added to the hash table has been
83 * to prevent new references to the zone write plug to be taken for
84 * newly incoming BIOs. A zone write plug flagged with this flag will be
92 * blk_zone_cond_str - Return string XXX in BLK_ZONE_COND_XXX.
95 * Description: Centralize block layer function to convert BLK_ZONE_COND_XXX
123 struct gendisk *disk = args->disk; in disk_report_zones_cb()
125 if (disk->zone_wplugs_hash) in disk_report_zones_cb()
128 if (!args->user_cb) in disk_report_zones_cb()
131 return args->user_cb(zone, idx, args->user_data); in disk_report_zones_cb()
135 * blkdev_report_zones - Get zones information
137 * @sector: Sector from which to report zones
138 * @nr_zones: Maximum number of zones to report
145 * To report all zones in a device starting from @sector, the BLK_ALL_ZONES
146 * constant can be passed to @nr_zones.
150 * Note: The caller must use memalloc_noXX_save/restore() calls to control
156 struct gendisk *disk = bdev->bd_disk; in blkdev_report_zones()
164 if (!bdev_is_zoned(bdev) || WARN_ON_ONCE(!disk->fops->report_zones)) in blkdev_report_zones()
165 return -EOPNOTSUPP; in blkdev_report_zones()
170 return disk->fops->report_zones(disk, sector, nr_zones, in blkdev_report_zones()
184 * blkdev_zone_mgmt - Execute a zone management operation on a range of zones
186 * @op: Operation to be performed on the zones
187 * @sector: Start sector of the first zone to operate on
195 * The operation to execute on each zone can be a zone reset, open, close
208 return -EOPNOTSUPP; in blkdev_zone_mgmt()
211 return -EPERM; in blkdev_zone_mgmt()
214 return -EOPNOTSUPP; in blkdev_zone_mgmt()
218 return -EINVAL; in blkdev_zone_mgmt()
222 return -EINVAL; in blkdev_zone_mgmt()
225 return -EINVAL; in blkdev_zone_mgmt()
236 bio->bi_iter.bi_sector = sector; in blkdev_zone_mgmt()
239 /* This may take a while, so be nice to others */ in blkdev_zone_mgmt()
259 if (copy_to_user(&args->zones[idx], zone, sizeof(struct blk_zone))) in blkdev_copy_zone_to_user()
260 return -EFAULT; in blkdev_copy_zone_to_user()
277 return -EINVAL; in blkdev_report_zones_ioctl()
280 return -ENOTTY; in blkdev_report_zones_ioctl()
283 return -EFAULT; in blkdev_report_zones_ioctl()
286 return -EINVAL; in blkdev_report_zones_ioctl()
297 return -EFAULT; in blkdev_report_zones_ioctl()
306 if (zrange->sector + zrange->nr_sectors <= zrange->sector || in blkdev_truncate_zone_range()
307 zrange->sector + zrange->nr_sectors > get_capacity(bdev->bd_disk)) in blkdev_truncate_zone_range()
309 return -EINVAL; in blkdev_truncate_zone_range()
311 start = zrange->sector << SECTOR_SHIFT; in blkdev_truncate_zone_range()
312 end = ((zrange->sector + zrange->nr_sectors) << SECTOR_SHIFT) - 1; in blkdev_truncate_zone_range()
330 return -EINVAL; in blkdev_zone_mgmt_ioctl()
333 return -ENOTTY; in blkdev_zone_mgmt_ioctl()
336 return -EBADF; in blkdev_zone_mgmt_ioctl()
339 return -EFAULT; in blkdev_zone_mgmt_ioctl()
346 filemap_invalidate_lock(bdev->bd_mapping); in blkdev_zone_mgmt_ioctl()
361 return -ENOTTY; in blkdev_zone_mgmt_ioctl()
368 filemap_invalidate_unlock(bdev->bd_mapping); in blkdev_zone_mgmt_ioctl()
375 return zone->start + zone->len >= get_capacity(disk); in disk_zone_is_last()
381 if (zno < disk->nr_zones - 1) in disk_zone_is_full()
382 return offset_in_zone >= disk->zone_capacity; in disk_zone_is_full()
383 return offset_in_zone >= disk->last_zone_capacity; in disk_zone_is_full()
389 return disk_zone_is_full(disk, zwplug->zone_no, zwplug->wp_offset); in disk_zone_wplug_is_full()
398 hash_32(zwplug->zone_no, disk->zone_wplugs_hash_bits); in disk_insert_zone_wplug()
401 * Add the new zone write plug to the hash table, but carefully as we in disk_insert_zone_wplug()
403 * zone write plug for the same zone. in disk_insert_zone_wplug()
405 spin_lock_irqsave(&disk->zone_wplugs_lock, flags); in disk_insert_zone_wplug()
406 hlist_for_each_entry_rcu(zwplg, &disk->zone_wplugs_hash[idx], node) { in disk_insert_zone_wplug()
407 if (zwplg->zone_no == zwplug->zone_no) { in disk_insert_zone_wplug()
408 spin_unlock_irqrestore(&disk->zone_wplugs_lock, flags); in disk_insert_zone_wplug()
412 hlist_add_head_rcu(&zwplug->node, &disk->zone_wplugs_hash[idx]); in disk_insert_zone_wplug()
413 atomic_inc(&disk->nr_zone_wplugs); in disk_insert_zone_wplug()
414 spin_unlock_irqrestore(&disk->zone_wplugs_lock, flags); in disk_insert_zone_wplug()
423 unsigned int idx = hash_32(zno, disk->zone_wplugs_hash_bits); in disk_get_hashed_zone_wplug()
428 hlist_for_each_entry_rcu(zwplug, &disk->zone_wplugs_hash[idx], node) { in disk_get_hashed_zone_wplug()
429 if (zwplug->zone_no == zno && in disk_get_hashed_zone_wplug()
430 refcount_inc_not_zero(&zwplug->ref)) { in disk_get_hashed_zone_wplug()
444 if (!atomic_read(&disk->nr_zone_wplugs)) in disk_get_zone_wplug()
455 mempool_free(zwplug, zwplug->disk->zone_wplugs_pool); in disk_free_zone_wplug_rcu()
460 if (refcount_dec_and_test(&zwplug->ref)) { in disk_put_zone_wplug()
461 WARN_ON_ONCE(!bio_list_empty(&zwplug->bio_list)); in disk_put_zone_wplug()
462 WARN_ON_ONCE(zwplug->flags & BLK_ZONE_WPLUG_PLUGGED); in disk_put_zone_wplug()
463 WARN_ON_ONCE(!(zwplug->flags & BLK_ZONE_WPLUG_UNHASHED)); in disk_put_zone_wplug()
465 call_rcu(&zwplug->rcu_head, disk_free_zone_wplug_rcu); in disk_put_zone_wplug()
472 lockdep_assert_held(&zwplug->lock); in disk_should_remove_zone_wplug()
474 /* If the zone write plug was already removed, we are done. */ in disk_should_remove_zone_wplug()
475 if (zwplug->flags & BLK_ZONE_WPLUG_UNHASHED) in disk_should_remove_zone_wplug()
478 /* If the zone write plug is still plugged, it cannot be removed. */ in disk_should_remove_zone_wplug()
479 if (zwplug->flags & BLK_ZONE_WPLUG_PLUGGED) in disk_should_remove_zone_wplug()
487 * should not attempt to remove the zone write plug until all BIO in disk_should_remove_zone_wplug()
488 * completions are seen. Check by looking at the zone write plug in disk_should_remove_zone_wplug()
493 if (refcount_read(&zwplug->ref) > 2) in disk_should_remove_zone_wplug()
496 /* We can remove zone write plugs for zones that are empty or full. */ in disk_should_remove_zone_wplug()
497 return !zwplug->wp_offset || disk_zone_wplug_is_full(disk, zwplug); in disk_should_remove_zone_wplug()
505 /* If the zone write plug was already removed, we have nothing to do. */ in disk_remove_zone_wplug()
506 if (zwplug->flags & BLK_ZONE_WPLUG_UNHASHED) in disk_remove_zone_wplug()
510 * Mark the zone write plug as unhashed and drop the extra reference we in disk_remove_zone_wplug()
513 zwplug->flags |= BLK_ZONE_WPLUG_UNHASHED; in disk_remove_zone_wplug()
514 spin_lock_irqsave(&disk->zone_wplugs_lock, flags); in disk_remove_zone_wplug()
515 hlist_del_init_rcu(&zwplug->node); in disk_remove_zone_wplug()
516 atomic_dec(&disk->nr_zone_wplugs); in disk_remove_zone_wplug()
517 spin_unlock_irqrestore(&disk->zone_wplugs_lock, flags); in disk_remove_zone_wplug()
524 * Get a reference on the write plug for the zone containing @sector.
526 * Return a pointer to the zone write plug with the plug spinlock held.
540 * operation has not already removed the zone write plug from in disk_get_and_lock_zone_wplug()
542 * we need to get a new plug so start over from the beginning. in disk_get_and_lock_zone_wplug()
544 spin_lock_irqsave(&zwplug->lock, *flags); in disk_get_and_lock_zone_wplug()
545 if (zwplug->flags & BLK_ZONE_WPLUG_UNHASHED) { in disk_get_and_lock_zone_wplug()
546 spin_unlock_irqrestore(&zwplug->lock, *flags); in disk_get_and_lock_zone_wplug()
554 * Allocate and initialize a zone write plug with an extra reference in disk_get_and_lock_zone_wplug()
555 * so that it is not freed when the zone write plug becomes idle without in disk_get_and_lock_zone_wplug()
558 zwplug = mempool_alloc(disk->zone_wplugs_pool, gfp_mask); in disk_get_and_lock_zone_wplug()
562 INIT_HLIST_NODE(&zwplug->node); in disk_get_and_lock_zone_wplug()
563 refcount_set(&zwplug->ref, 2); in disk_get_and_lock_zone_wplug()
564 spin_lock_init(&zwplug->lock); in disk_get_and_lock_zone_wplug()
565 zwplug->flags = 0; in disk_get_and_lock_zone_wplug()
566 zwplug->zone_no = zno; in disk_get_and_lock_zone_wplug()
567 zwplug->wp_offset = bdev_offset_from_zone_start(disk->part0, sector); in disk_get_and_lock_zone_wplug()
568 bio_list_init(&zwplug->bio_list); in disk_get_and_lock_zone_wplug()
569 INIT_WORK(&zwplug->bio_work, blk_zone_wplug_bio_work); in disk_get_and_lock_zone_wplug()
570 zwplug->disk = disk; in disk_get_and_lock_zone_wplug()
572 spin_lock_irqsave(&zwplug->lock, *flags); in disk_get_and_lock_zone_wplug()
575 * Insert the new zone write plug in the hash table. This can fail only in disk_get_and_lock_zone_wplug()
580 spin_unlock_irqrestore(&zwplug->lock, *flags); in disk_get_and_lock_zone_wplug()
581 mempool_free(zwplug, disk->zone_wplugs_pool); in disk_get_and_lock_zone_wplug()
591 struct request_queue *q = zwplug->disk->queue; in blk_zone_wplug_bio_io_error()
601 * Abort (fail) all plugged BIOs of a zone write plug.
607 if (bio_list_empty(&zwplug->bio_list)) in disk_zone_wplug_abort()
611 zwplug->disk->disk_name, zwplug->zone_no); in disk_zone_wplug_abort()
612 while ((bio = bio_list_pop(&zwplug->bio_list))) in disk_zone_wplug_abort()
617 * Set a zone write plug write pointer offset to the specified value.
620 * update from a report zone after a write error.
626 lockdep_assert_held(&zwplug->lock); in disk_zone_wplug_set_wp_offset()
628 /* Update the zone write pointer and abort all plugged BIOs. */ in disk_zone_wplug_set_wp_offset()
629 zwplug->flags &= ~BLK_ZONE_WPLUG_NEED_WP_UPDATE; in disk_zone_wplug_set_wp_offset()
630 zwplug->wp_offset = wp_offset; in disk_zone_wplug_set_wp_offset()
634 * The zone write plug now has no BIO plugged: remove it from the in disk_zone_wplug_set_wp_offset()
644 switch (zone->cond) { in blk_zone_wp_offset()
648 return zone->wp - zone->start; in blk_zone_wp_offset()
650 return zone->len; in blk_zone_wp_offset()
658 * Conventional, offline and read-only zones do not have a valid in blk_zone_wp_offset()
659 * write pointer. in blk_zone_wp_offset()
671 zwplug = disk_get_zone_wplug(disk, zone->start); in disk_zone_wplug_sync_wp_offset()
675 spin_lock_irqsave(&zwplug->lock, flags); in disk_zone_wplug_sync_wp_offset()
676 if (zwplug->flags & BLK_ZONE_WPLUG_NEED_WP_UPDATE) in disk_zone_wplug_sync_wp_offset()
679 spin_unlock_irqrestore(&zwplug->lock, flags); in disk_zone_wplug_sync_wp_offset()
690 return disk->fops->report_zones(disk, sector, 1, in disk_zone_sync_wp_offset()
697 struct gendisk *disk = bio->bi_bdev->bd_disk; in blk_zone_wplug_handle_reset_or_finish()
698 sector_t sector = bio->bi_iter.bi_sector; in blk_zone_wplug_handle_reset_or_finish()
703 if (!bdev_zone_is_seq(bio->bi_bdev, sector)) { in blk_zone_wplug_handle_reset_or_finish()
709 * No-wait reset or finish BIOs do not make much sense as the callers in blk_zone_wplug_handle_reset_or_finish()
710 * issue these as blocking operations in most cases. To avoid issues in blk_zone_wplug_handle_reset_or_finish()
714 if (WARN_ON_ONCE(bio->bi_opf & REQ_NOWAIT)) in blk_zone_wplug_handle_reset_or_finish()
715 bio->bi_opf &= ~REQ_NOWAIT; in blk_zone_wplug_handle_reset_or_finish()
718 * If we have a zone write plug, set its write pointer offset to 0 in blk_zone_wplug_handle_reset_or_finish()
719 * (reset case) or to the zone size (finish case). This will abort all in blk_zone_wplug_handle_reset_or_finish()
721 * finishing zones while writes are still in-flight will result in the in blk_zone_wplug_handle_reset_or_finish()
726 spin_lock_irqsave(&zwplug->lock, flags); in blk_zone_wplug_handle_reset_or_finish()
728 spin_unlock_irqrestore(&zwplug->lock, flags); in blk_zone_wplug_handle_reset_or_finish()
737 struct gendisk *disk = bio->bi_bdev->bd_disk; in blk_zone_wplug_handle_reset_all()
743 * Set the write pointer offset of all zone write plugs to 0. This will in blk_zone_wplug_handle_reset_all()
745 * are still in-flight will result in the writes failing anyway. in blk_zone_wplug_handle_reset_all()
748 sector += disk->queue->limits.chunk_sectors) { in blk_zone_wplug_handle_reset_all()
751 spin_lock_irqsave(&zwplug->lock, flags); in blk_zone_wplug_handle_reset_all()
753 spin_unlock_irqrestore(&zwplug->lock, flags); in blk_zone_wplug_handle_reset_all()
765 * Take a reference on the zone write plug and schedule the submission in disk_zone_wplug_schedule_bio_work()
769 WARN_ON_ONCE(!(zwplug->flags & BLK_ZONE_WPLUG_PLUGGED)); in disk_zone_wplug_schedule_bio_work()
770 refcount_inc(&zwplug->ref); in disk_zone_wplug_schedule_bio_work()
771 queue_work(disk->zone_wplugs_wq, &zwplug->bio_work); in disk_zone_wplug_schedule_bio_work()
782 * This reference will be reused to submit a request for the BIO for in disk_zone_wplug_add_bio()
783 * blk-mq devices and dropped when the BIO is failed and after in disk_zone_wplug_add_bio()
784 * it is issued in the case of BIO-based devices. in disk_zone_wplug_add_bio()
786 percpu_ref_get(&bio->bi_bdev->bd_disk->queue->q_usage_counter); in disk_zone_wplug_add_bio()
789 * The BIO is being plugged and thus will have to wait for the on-going in disk_zone_wplug_add_bio()
790 * write and for all other writes already plugged. So polling makes in disk_zone_wplug_add_bio()
796 * REQ_NOWAIT BIOs are always handled using the zone write plug BIO in disk_zone_wplug_add_bio()
800 if (bio->bi_opf & REQ_NOWAIT) { in disk_zone_wplug_add_bio()
801 schedule_bio_work = !(zwplug->flags & BLK_ZONE_WPLUG_PLUGGED); in disk_zone_wplug_add_bio()
802 bio->bi_opf &= ~REQ_NOWAIT; in disk_zone_wplug_add_bio()
806 * Reuse the poll cookie field to store the number of segments when in disk_zone_wplug_add_bio()
807 * split to the hardware limits. in disk_zone_wplug_add_bio()
809 bio->__bi_nr_segments = nr_segs; in disk_zone_wplug_add_bio()
812 * We always receive BIOs after they are split and ready to be issued. in disk_zone_wplug_add_bio()
814 * user must also issue write sequentially. So simply add the new BIO in disk_zone_wplug_add_bio()
815 * at the tail of the list to preserve the sequential write order. in disk_zone_wplug_add_bio()
817 bio_list_add(&zwplug->bio_list, bio); in disk_zone_wplug_add_bio()
819 zwplug->flags |= BLK_ZONE_WPLUG_PLUGGED; in disk_zone_wplug_add_bio()
835 * blk_zone_write_plug_init_request() -> blk_attempt_bio_merge(). in blk_zone_write_plug_bio_merged()
836 * For this case, we already hold a reference on the zone write plug for in blk_zone_write_plug_bio_merged()
838 * zone write pointer offset update. in blk_zone_write_plug_bio_merged()
846 * Get a reference on the zone write plug of the target zone and advance in blk_zone_write_plug_bio_merged()
847 * the zone write pointer offset. Given that this is a merge, we already in blk_zone_write_plug_bio_merged()
848 * have at least one request and one BIO referencing the zone write in blk_zone_write_plug_bio_merged()
851 zwplug = disk_get_zone_wplug(bio->bi_bdev->bd_disk, in blk_zone_write_plug_bio_merged()
852 bio->bi_iter.bi_sector); in blk_zone_write_plug_bio_merged()
856 spin_lock_irqsave(&zwplug->lock, flags); in blk_zone_write_plug_bio_merged()
857 zwplug->wp_offset += bio_sectors(bio); in blk_zone_write_plug_bio_merged()
858 spin_unlock_irqrestore(&zwplug->lock, flags); in blk_zone_write_plug_bio_merged()
862 * Attempt to merge plugged BIOs with a newly prepared request for a BIO that
863 * already went through zone write plugging (either a new BIO or one that was
869 struct request_queue *q = req->q; in blk_zone_write_plug_init_request()
870 struct gendisk *disk = q->disk; in blk_zone_write_plug_init_request()
880 * Indicate that completion of this request needs to be handled with in blk_zone_write_plug_init_request()
882 * on the zone write plug we took above on entry to this function. in blk_zone_write_plug_init_request()
884 req->rq_flags |= RQF_ZONE_WRITE_PLUGGING; in blk_zone_write_plug_init_request()
890 * Walk through the list of plugged BIOs to check if they can be merged in blk_zone_write_plug_init_request()
893 spin_lock_irqsave(&zwplug->lock, flags); in blk_zone_write_plug_init_request()
895 bio = bio_list_peek(&zwplug->bio_list); in blk_zone_write_plug_init_request()
899 if (bio->bi_iter.bi_sector != req_back_sector || in blk_zone_write_plug_init_request()
904 !bio->__bi_nr_segments); in blk_zone_write_plug_init_request()
906 bio_list_pop(&zwplug->bio_list); in blk_zone_write_plug_init_request()
907 if (bio_attempt_back_merge(req, bio, bio->__bi_nr_segments) != in blk_zone_write_plug_init_request()
909 bio_list_add_head(&zwplug->bio_list, bio); in blk_zone_write_plug_init_request()
915 zwplug->wp_offset += bio_sectors(bio); in blk_zone_write_plug_init_request()
919 spin_unlock_irqrestore(&zwplug->lock, flags); in blk_zone_write_plug_init_request()
923 * Check and prepare a BIO for submission by incrementing the write pointer
924 * offset of its zone write plug and changing zone append operations into
925 * regular write when zone append emulation is needed.
930 struct gendisk *disk = bio->bi_bdev->bd_disk; in blk_zone_wplug_prepare_bio()
932 lockdep_assert_held(&zwplug->lock); in blk_zone_wplug_prepare_bio()
935 * If we lost track of the zone write pointer due to a write error, in blk_zone_wplug_prepare_bio()
937 * the to recover a reliable write pointer position. Fail BIOs if the in blk_zone_wplug_prepare_bio()
941 if (zwplug->flags & BLK_ZONE_WPLUG_NEED_WP_UPDATE) in blk_zone_wplug_prepare_bio()
945 * Check that the user is not attempting to write to a full zone. in blk_zone_wplug_prepare_bio()
947 * write pointer offset beyond the end of the zone. in blk_zone_wplug_prepare_bio()
954 * Use a regular write starting at the current write pointer. in blk_zone_wplug_prepare_bio()
955 * Similarly to native zone append operations, do not allow in blk_zone_wplug_prepare_bio()
958 bio->bi_opf &= ~REQ_OP_MASK; in blk_zone_wplug_prepare_bio()
959 bio->bi_opf |= REQ_OP_WRITE | REQ_NOMERGE; in blk_zone_wplug_prepare_bio()
960 bio->bi_iter.bi_sector += zwplug->wp_offset; in blk_zone_wplug_prepare_bio()
969 * Check for non-sequential writes early as we know that BIOs in blk_zone_wplug_prepare_bio()
970 * with a start sector not unaligned to the zone write pointer in blk_zone_wplug_prepare_bio()
973 if (bio_offset_from_zone_start(bio) != zwplug->wp_offset) in blk_zone_wplug_prepare_bio()
977 /* Advance the zone write pointer offset. */ in blk_zone_wplug_prepare_bio()
978 zwplug->wp_offset += bio_sectors(bio); in blk_zone_wplug_prepare_bio()
985 struct gendisk *disk = bio->bi_bdev->bd_disk; in blk_zone_wplug_handle_write()
986 sector_t sector = bio->bi_iter.bi_sector; in blk_zone_wplug_handle_write()
993 * zone write plug for the entire BIO. For blk-mq devices, the block in blk_zone_wplug_handle_write()
994 * layer should already have done any splitting required to ensure this in blk_zone_wplug_handle_write()
996 * BIO-based devices, it is the responsibility of the driver to split in blk_zone_wplug_handle_write()
1004 /* Conventional zones do not need write plugging. */ in blk_zone_wplug_handle_write()
1005 if (!bdev_zone_is_seq(bio->bi_bdev, sector)) { in blk_zone_wplug_handle_write()
1006 /* Zone append to conventional zones is not allowed. */ in blk_zone_wplug_handle_write()
1014 if (bio->bi_opf & REQ_NOWAIT) in blk_zone_wplug_handle_write()
1019 if (bio->bi_opf & REQ_NOWAIT) in blk_zone_wplug_handle_write()
1026 /* Indicate that this BIO is being handled using zone write plugging. */ in blk_zone_wplug_handle_write()
1030 * If the zone is already plugged, add the BIO to the plug BIO list. in blk_zone_wplug_handle_write()
1031 * Do the same for REQ_NOWAIT BIOs to ensure that we will not see a in blk_zone_wplug_handle_write()
1035 if ((zwplug->flags & BLK_ZONE_WPLUG_PLUGGED) || in blk_zone_wplug_handle_write()
1036 (bio->bi_opf & REQ_NOWAIT)) in blk_zone_wplug_handle_write()
1040 spin_unlock_irqrestore(&zwplug->lock, flags); in blk_zone_wplug_handle_write()
1045 zwplug->flags |= BLK_ZONE_WPLUG_PLUGGED; in blk_zone_wplug_handle_write()
1047 spin_unlock_irqrestore(&zwplug->lock, flags); in blk_zone_wplug_handle_write()
1054 spin_unlock_irqrestore(&zwplug->lock, flags); in blk_zone_wplug_handle_write()
1061 struct gendisk *disk = bio->bi_bdev->bd_disk; in blk_zone_wplug_handle_native_zone_append()
1067 * going to handle @bio through plugging. However, we may already have a in blk_zone_wplug_handle_native_zone_append()
1068 * zone write plug for the target zone if that zone was previously in blk_zone_wplug_handle_native_zone_append()
1071 * zone append operations. Avoid this by removing the zone write plug. in blk_zone_wplug_handle_native_zone_append()
1073 zwplug = disk_get_zone_wplug(disk, bio->bi_iter.bi_sector); in blk_zone_wplug_handle_native_zone_append()
1077 spin_lock_irqsave(&zwplug->lock, flags); in blk_zone_wplug_handle_native_zone_append()
1080 * We are about to remove the zone write plug. But if the user in blk_zone_wplug_handle_native_zone_append()
1084 * return NULL after the plug is removed. Aborting the plugged write in blk_zone_wplug_handle_native_zone_append()
1087 * operations and regular write operations. in blk_zone_wplug_handle_native_zone_append()
1089 if (!bio_list_empty(&zwplug->bio_list)) { in blk_zone_wplug_handle_native_zone_append()
1091 disk->disk_name, zwplug->zone_no); in blk_zone_wplug_handle_native_zone_append()
1095 spin_unlock_irqrestore(&zwplug->lock, flags); in blk_zone_wplug_handle_native_zone_append()
1101 * blk_zone_plug_bio - Handle a zone write BIO with zone write plugging
1105 * Handle write, write zeroes and zone append operations requiring emulation
1106 * using zone write plugging.
1108 * Return true whenever @bio execution needs to be delayed through the zone
1109 * write plug. Otherwise, return false to let the submission path process
1114 struct block_device *bdev = bio->bi_bdev; in blk_zone_plug_bio()
1116 if (!bdev->bd_disk->zone_wplugs_hash) in blk_zone_plug_bio()
1128 * We do not need to do anything special for empty flush BIOs, e.g in blk_zone_plug_bio()
1130 * the responsibility of the user to first wait for the completion of in blk_zone_plug_bio()
1131 * write operations for flush to have any effect on the persistence of in blk_zone_plug_bio()
1134 if (op_is_flush(bio->bi_opf) && !bio_sectors(bio)) in blk_zone_plug_bio()
1138 * Regular writes and write zeroes need to be handled through the target in blk_zone_plug_bio()
1139 * zone write plug. This includes writes with REQ_FUA | REQ_PREFLUSH in blk_zone_plug_bio()
1140 * which may need to go through the flush machinery depending on the in blk_zone_plug_bio()
1144 * completion, which will handle zone write plugging. in blk_zone_plug_bio()
1147 * write BIOs. in blk_zone_plug_bio()
1149 * to correctly track the write pointer offset of zones. These commands in blk_zone_plug_bio()
1150 * are not plugged as we do not need serialization with write in blk_zone_plug_bio()
1151 * operations. It is the responsibility of the user to not issue reset in blk_zone_plug_bio()
1152 * and finish commands when write operations are in flight. in blk_zone_plug_bio()
1184 spin_lock_irqsave(&zwplug->lock, flags); in disk_zone_wplug_unplug_bio()
1187 if (!bio_list_empty(&zwplug->bio_list)) { in disk_zone_wplug_unplug_bio()
1189 spin_unlock_irqrestore(&zwplug->lock, flags); in disk_zone_wplug_unplug_bio()
1193 zwplug->flags &= ~BLK_ZONE_WPLUG_PLUGGED; in disk_zone_wplug_unplug_bio()
1197 * (it was reset), remove its zone write plug from the hash table. in disk_zone_wplug_unplug_bio()
1202 spin_unlock_irqrestore(&zwplug->lock, flags); in disk_zone_wplug_unplug_bio()
1207 struct gendisk *disk = bio->bi_bdev->bd_disk; in blk_zone_write_plug_bio_endio()
1209 disk_get_zone_wplug(disk, bio->bi_iter.bi_sector); in blk_zone_write_plug_bio_endio()
1219 * If this is a regular write emulating a zone append operation, in blk_zone_write_plug_bio_endio()
1223 bio->bi_opf &= ~REQ_OP_MASK; in blk_zone_write_plug_bio_endio()
1224 bio->bi_opf |= REQ_OP_ZONE_APPEND; in blk_zone_write_plug_bio_endio()
1229 * needing a write pointer update. in blk_zone_write_plug_bio_endio()
1231 if (bio->bi_status != BLK_STS_OK) { in blk_zone_write_plug_bio_endio()
1232 spin_lock_irqsave(&zwplug->lock, flags); in blk_zone_write_plug_bio_endio()
1234 zwplug->flags |= BLK_ZONE_WPLUG_NEED_WP_UPDATE; in blk_zone_write_plug_bio_endio()
1235 spin_unlock_irqrestore(&zwplug->lock, flags); in blk_zone_write_plug_bio_endio()
1242 * For BIO-based devices, blk_zone_write_plug_finish_request() in blk_zone_write_plug_bio_endio()
1243 * is not called. So we need to schedule execution of the next in blk_zone_write_plug_bio_endio()
1246 if (bdev_test_flag(bio->bi_bdev, BD_HAS_SUBMIT_BIO)) in blk_zone_write_plug_bio_endio()
1255 struct gendisk *disk = req->q->disk; in blk_zone_write_plug_finish_request()
1258 zwplug = disk_get_zone_wplug(disk, req->__sector); in blk_zone_write_plug_finish_request()
1262 req->rq_flags &= ~RQF_ZONE_WRITE_PLUGGING; in blk_zone_write_plug_finish_request()
1288 spin_lock_irqsave(&zwplug->lock, flags); in blk_zone_wplug_bio_work()
1291 bio = bio_list_pop(&zwplug->bio_list); in blk_zone_wplug_bio_work()
1293 zwplug->flags &= ~BLK_ZONE_WPLUG_PLUGGED; in blk_zone_wplug_bio_work()
1294 spin_unlock_irqrestore(&zwplug->lock, flags); in blk_zone_wplug_bio_work()
1303 spin_unlock_irqrestore(&zwplug->lock, flags); in blk_zone_wplug_bio_work()
1305 bdev = bio->bi_bdev; in blk_zone_wplug_bio_work()
1309 * blk-mq devices will reuse the extra reference on the request queue in blk_zone_wplug_bio_work()
1311 * path for BIO-based devices will not do that. So drop this extra in blk_zone_wplug_bio_work()
1315 blk_queue_exit(bdev->bd_disk->queue); in blk_zone_wplug_bio_work()
1324 return 1U << disk->zone_wplugs_hash_bits; in disk_zone_wplugs_hash_size()
1329 spin_lock_init(&disk->zone_wplugs_lock); in disk_init_zone_resources()
1333 * For the size of a disk zone write plug hash table, use the size of the
1334 * zone write plug mempool, which is the maximum of the disk open zones and
1336 * 9 bits. For a disk that has no limits, mempool size defaults to 128.
1346 atomic_set(&disk->nr_zone_wplugs, 0); in disk_alloc_zone_resources()
1347 disk->zone_wplugs_hash_bits = in disk_alloc_zone_resources()
1350 disk->zone_wplugs_hash = in disk_alloc_zone_resources()
1353 if (!disk->zone_wplugs_hash) in disk_alloc_zone_resources()
1354 return -ENOMEM; in disk_alloc_zone_resources()
1357 INIT_HLIST_HEAD(&disk->zone_wplugs_hash[i]); in disk_alloc_zone_resources()
1359 disk->zone_wplugs_pool = mempool_create_kmalloc_pool(pool_size, in disk_alloc_zone_resources()
1361 if (!disk->zone_wplugs_pool) in disk_alloc_zone_resources()
1364 disk->zone_wplugs_wq = in disk_alloc_zone_resources()
1366 pool_size, disk->disk_name); in disk_alloc_zone_resources()
1367 if (!disk->zone_wplugs_wq) in disk_alloc_zone_resources()
1373 mempool_destroy(disk->zone_wplugs_pool); in disk_alloc_zone_resources()
1374 disk->zone_wplugs_pool = NULL; in disk_alloc_zone_resources()
1376 kfree(disk->zone_wplugs_hash); in disk_alloc_zone_resources()
1377 disk->zone_wplugs_hash = NULL; in disk_alloc_zone_resources()
1378 disk->zone_wplugs_hash_bits = 0; in disk_alloc_zone_resources()
1379 return -ENOMEM; in disk_alloc_zone_resources()
1387 if (!disk->zone_wplugs_hash) in disk_destroy_zone_wplugs_hash_table()
1390 /* Free all the zone write plugs we have. */ in disk_destroy_zone_wplugs_hash_table()
1392 while (!hlist_empty(&disk->zone_wplugs_hash[i])) { in disk_destroy_zone_wplugs_hash_table()
1393 zwplug = hlist_entry(disk->zone_wplugs_hash[i].first, in disk_destroy_zone_wplugs_hash_table()
1395 refcount_inc(&zwplug->ref); in disk_destroy_zone_wplugs_hash_table()
1401 WARN_ON_ONCE(atomic_read(&disk->nr_zone_wplugs)); in disk_destroy_zone_wplugs_hash_table()
1402 kfree(disk->zone_wplugs_hash); in disk_destroy_zone_wplugs_hash_table()
1403 disk->zone_wplugs_hash = NULL; in disk_destroy_zone_wplugs_hash_table()
1404 disk->zone_wplugs_hash_bits = 0; in disk_destroy_zone_wplugs_hash_table()
1413 spin_lock_irqsave(&disk->zone_wplugs_lock, flags); in disk_set_conv_zones_bitmap()
1415 nr_conv_zones = bitmap_weight(bitmap, disk->nr_zones); in disk_set_conv_zones_bitmap()
1416 bitmap = rcu_replace_pointer(disk->conv_zones_bitmap, bitmap, in disk_set_conv_zones_bitmap()
1417 lockdep_is_held(&disk->zone_wplugs_lock)); in disk_set_conv_zones_bitmap()
1418 spin_unlock_irqrestore(&disk->zone_wplugs_lock, flags); in disk_set_conv_zones_bitmap()
1427 if (!disk->zone_wplugs_pool) in disk_free_zone_resources()
1430 if (disk->zone_wplugs_wq) { in disk_free_zone_resources()
1431 destroy_workqueue(disk->zone_wplugs_wq); in disk_free_zone_resources()
1432 disk->zone_wplugs_wq = NULL; in disk_free_zone_resources()
1438 * Wait for the zone write plugs to be RCU-freed before in disk_free_zone_resources()
1443 mempool_destroy(disk->zone_wplugs_pool); in disk_free_zone_resources()
1444 disk->zone_wplugs_pool = NULL; in disk_free_zone_resources()
1447 disk->zone_capacity = 0; in disk_free_zone_resources()
1448 disk->last_zone_capacity = 0; in disk_free_zone_resources()
1449 disk->nr_zones = 0; in disk_free_zone_resources()
1456 * can automatically handle write BIO plugging. BIO-based device drivers in disk_need_zone_resources()
1457 * (e.g. DM devices) are normally responsible for handling zone write in disk_need_zone_resources()
1461 return queue_is_mq(disk->queue) || in disk_need_zone_resources()
1462 queue_emulates_zone_append(disk->queue); in disk_need_zone_resources()
1468 struct queue_limits *lim = &disk->queue->limits; in disk_revalidate_zone_resources()
1478 pool_size = max(lim->max_open_zones, lim->max_active_zones); in disk_revalidate_zone_resources()
1482 if (!disk->zone_wplugs_hash) in disk_revalidate_zone_resources()
1504 struct request_queue *q = disk->queue; in disk_update_zone_resources()
1509 disk->nr_zones = args->nr_zones; in disk_update_zone_resources()
1510 disk->zone_capacity = args->zone_capacity; in disk_update_zone_resources()
1511 disk->last_zone_capacity = args->last_zone_capacity; in disk_update_zone_resources()
1513 disk_set_conv_zones_bitmap(disk, args->conv_zones_bitmap); in disk_update_zone_resources()
1514 if (nr_conv_zones >= disk->nr_zones) { in disk_update_zone_resources()
1516 disk->disk_name, nr_conv_zones, disk->nr_zones); in disk_update_zone_resources()
1517 return -ENODEV; in disk_update_zone_resources()
1528 nr_seq_zones = disk->nr_zones - nr_conv_zones; in disk_update_zone_resources()
1534 if (!disk->zone_wplugs_pool) in disk_update_zone_resources()
1539 * zones, set its max open zone limit to the mempool size to indicate in disk_update_zone_resources()
1540 * to the user that there is a potential performance impact due to in disk_update_zone_resources()
1541 * dynamic zone write plug allocation when simultaneously writing to in disk_update_zone_resources()
1548 mempool_resize(disk->zone_wplugs_pool, pool_size); in disk_update_zone_resources()
1564 struct gendisk *disk = args->disk; in blk_revalidate_conv_zone()
1566 if (zone->capacity != zone->len) { in blk_revalidate_conv_zone()
1568 disk->disk_name); in blk_revalidate_conv_zone()
1569 return -ENODEV; in blk_revalidate_conv_zone()
1573 args->last_zone_capacity = zone->capacity; in blk_revalidate_conv_zone()
1578 if (!args->conv_zones_bitmap) { in blk_revalidate_conv_zone()
1579 args->conv_zones_bitmap = in blk_revalidate_conv_zone()
1580 bitmap_zalloc(args->nr_zones, GFP_NOIO); in blk_revalidate_conv_zone()
1581 if (!args->conv_zones_bitmap) in blk_revalidate_conv_zone()
1582 return -ENOMEM; in blk_revalidate_conv_zone()
1585 set_bit(idx, args->conv_zones_bitmap); in blk_revalidate_conv_zone()
1593 struct gendisk *disk = args->disk; in blk_revalidate_seq_zone()
1603 if (!args->zone_capacity) in blk_revalidate_seq_zone()
1604 args->zone_capacity = zone->capacity; in blk_revalidate_seq_zone()
1606 args->last_zone_capacity = zone->capacity; in blk_revalidate_seq_zone()
1607 } else if (zone->capacity != args->zone_capacity) { in blk_revalidate_seq_zone()
1609 disk->disk_name); in blk_revalidate_seq_zone()
1610 return -ENODEV; in blk_revalidate_seq_zone()
1614 * If the device needs zone append emulation, we need to track the in blk_revalidate_seq_zone()
1615 * write pointer of all zones that are not empty nor full. So make sure in blk_revalidate_seq_zone()
1616 * we have a zone write plug for such zone if the device has a zone in blk_revalidate_seq_zone()
1617 * write plug hash table. in blk_revalidate_seq_zone()
1619 if (!queue_emulates_zone_append(disk->queue) || !disk->zone_wplugs_hash) in blk_revalidate_seq_zone()
1625 if (!wp_offset || wp_offset >= zone->capacity) in blk_revalidate_seq_zone()
1628 zwplug = disk_get_and_lock_zone_wplug(disk, zone->wp, GFP_NOIO, &flags); in blk_revalidate_seq_zone()
1630 return -ENOMEM; in blk_revalidate_seq_zone()
1631 spin_unlock_irqrestore(&zwplug->lock, flags); in blk_revalidate_seq_zone()
1638 * Helper function to check the validity of zones of a zoned block device.
1644 struct gendisk *disk = args->disk; in blk_revalidate_zone_cb()
1645 sector_t zone_sectors = disk->queue->limits.chunk_sectors; in blk_revalidate_zone_cb()
1649 if (zone->start != args->sector) { in blk_revalidate_zone_cb()
1651 disk->disk_name, args->sector, zone->start); in blk_revalidate_zone_cb()
1652 return -ENODEV; in blk_revalidate_zone_cb()
1655 if (zone->start >= get_capacity(disk) || !zone->len) { in blk_revalidate_zone_cb()
1657 disk->disk_name, zone->start, zone->len); in blk_revalidate_zone_cb()
1658 return -ENODEV; in blk_revalidate_zone_cb()
1666 if (zone->len != zone_sectors) { in blk_revalidate_zone_cb()
1668 disk->disk_name); in blk_revalidate_zone_cb()
1669 return -ENODEV; in blk_revalidate_zone_cb()
1671 } else if (zone->len > zone_sectors) { in blk_revalidate_zone_cb()
1673 disk->disk_name); in blk_revalidate_zone_cb()
1674 return -ENODEV; in blk_revalidate_zone_cb()
1677 if (!zone->capacity || zone->capacity > zone->len) { in blk_revalidate_zone_cb()
1679 disk->disk_name); in blk_revalidate_zone_cb()
1680 return -ENODEV; in blk_revalidate_zone_cb()
1684 switch (zone->type) { in blk_revalidate_zone_cb()
1694 disk->disk_name, (int)zone->type, zone->start); in blk_revalidate_zone_cb()
1695 ret = -ENODEV; in blk_revalidate_zone_cb()
1699 args->sector += zone->len; in blk_revalidate_zone_cb()
1705 * blk_revalidate_disk_zones - (re)allocate and initialize zone write plugs
1708 * Helper function for low-level device drivers to check, (re) allocate and
1710 * normally be called by blk-mq based drivers when a zoned gendisk is probed
1719 struct request_queue *q = disk->queue; in blk_revalidate_disk_zones()
1720 sector_t zone_sectors = q->limits.chunk_sectors; in blk_revalidate_disk_zones()
1724 int ret = -ENOMEM; in blk_revalidate_disk_zones()
1727 return -EIO; in blk_revalidate_disk_zones()
1730 return -ENODEV; in blk_revalidate_disk_zones()
1738 disk->disk_name, zone_sectors); in blk_revalidate_disk_zones()
1739 return -ENODEV; in blk_revalidate_disk_zones()
1747 args.nr_zones = (capacity + zone_sectors - 1) >> ilog2(zone_sectors); in blk_revalidate_disk_zones()
1755 ret = disk->fops->report_zones(disk, 0, UINT_MAX, in blk_revalidate_disk_zones()
1758 pr_warn("%s: No zones reported\n", disk->disk_name); in blk_revalidate_disk_zones()
1759 ret = -ENODEV; in blk_revalidate_disk_zones()
1769 disk->disk_name, args.sector); in blk_revalidate_disk_zones()
1770 ret = -ENODEV; in blk_revalidate_disk_zones()
1780 pr_warn("%s: failed to revalidate zones\n", disk->disk_name); in blk_revalidate_disk_zones()
1793 * blk_zone_issue_zeroout - zero-fill a block range in a zone
1794 * @bdev: blockdev to write
1796 * @nr_sects: number of sectors to write
1800 * Zero-fill a block range in a zone (@sector must be equal to the zone write
1801 * pointer), handling potential errors due to the (initially unknown) lack of
1810 return -EIO; in blk_zone_issue_zeroout()
1814 if (ret != -EOPNOTSUPP) in blk_zone_issue_zeroout()
1818 * The failed call to blkdev_issue_zeroout() advanced the zone write in blk_zone_issue_zeroout()
1819 * pointer. Undo this using a report zone to update the zone write in blk_zone_issue_zeroout()
1820 * pointer to the correct current value. in blk_zone_issue_zeroout()
1822 ret = disk_zone_sync_wp_offset(bdev->bd_disk, sector); in blk_zone_issue_zeroout()
1824 return ret < 0 ? ret : -EIO; in blk_zone_issue_zeroout()
1827 * Retry without BLKDEV_ZERO_NOFALLBACK to force the fallback to a in blk_zone_issue_zeroout()
1828 * regular write with zero-pages. in blk_zone_issue_zeroout()
1843 spin_lock_irqsave(&zwplug->lock, flags); in queue_zone_wplug_show()
1844 zwp_zone_no = zwplug->zone_no; in queue_zone_wplug_show()
1845 zwp_flags = zwplug->flags; in queue_zone_wplug_show()
1846 zwp_ref = refcount_read(&zwplug->ref); in queue_zone_wplug_show()
1847 zwp_wp_offset = zwplug->wp_offset; in queue_zone_wplug_show()
1848 zwp_bio_list_size = bio_list_size(&zwplug->bio_list); in queue_zone_wplug_show()
1849 spin_unlock_irqrestore(&zwplug->lock, flags); in queue_zone_wplug_show()
1858 struct gendisk *disk = q->disk; in queue_zone_wplugs_show()
1862 if (!disk->zone_wplugs_hash) in queue_zone_wplugs_show()
1867 hlist_for_each_entry_rcu(zwplug, &disk->zone_wplugs_hash[i], in queue_zone_wplugs_show()