diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2022-05-24 18:52:35 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2022-05-24 18:52:35 -0700 |
commit | bd1b7c1384ec15294ee45bf3add7b7036e146dad (patch) | |
tree | 5b8efc004782d52f8697b2831bdcce9c9a884988 /fs/btrfs/ctree.c | |
parent | 3842007b1a33589d57f67eac479b132b77767514 (diff) | |
parent | 0a05fafe9def0d9f0fbef3dfc8094925af9e3185 (diff) |
Merge tag 'for-5.19-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux
Pull btrfs updates from David Sterba:
"Features:
- subpage:
- support for PAGE_SIZE > 4K (previously only 64K)
- make it work with raid56
- repair super block num_devices automatically if it does not match
the number of device items
- defrag can convert inline extents to regular extents, up to now
inline files were skipped but the setting of mount option
max_inline could affect the decision logic
- zoned:
- minimal accepted zone size is explicitly set to 4MiB
- make zone reclaim less aggressive and don't reclaim if there are
enough free zones
- add per-profile sysfs tunable of the reclaim threshold
- allow automatic block group reclaim for non-zoned filesystems, with
sysfs tunables
- tree-checker: new check, compare extent buffer owner against owner
rootid
Performance:
- avoid blocking on space reservation when doing nowait direct io
writes (+7% throughput for reads and writes)
- NOCOW write throughput improvement due to refined locking (+3%)
- send: reduce pressure to page cache by dropping extent pages right
after they're processed
Core:
- convert all radix trees to xarray
- add iterators for b-tree node items
- support printk message index
- user bulk page allocation for extent buffers
- switch to bio_alloc API, use on-stack bios where convenient, other
bio cleanups
- use rw lock for block groups to favor concurrent reads
- simplify workques, don't allocate high priority threads for all
normal queues as we need only one
- refactor scrub, process chunks based on their constraints and
similarity
- allocate direct io structures on stack and pass around only
pointers, avoids allocation and reduces potential error handling
Fixes:
- fix count of reserved transaction items for various inode
operations
- fix deadlock between concurrent dio writes when low on free data
space
- fix a few cases when zones need to be finished
VFS, iomap:
- add helper to check if sb write has started (usable for assertions)
- new helper iomap_dio_alloc_bio, export iomap_dio_bio_end_io"
* tag 'for-5.19-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux: (173 commits)
btrfs: zoned: introduce a minimal zone size 4M and reject mount
btrfs: allow defrag to convert inline extents to regular extents
btrfs: add "0x" prefix for unsupported optional features
btrfs: do not account twice for inode ref when reserving metadata units
btrfs: zoned: fix comparison of alloc_offset vs meta_write_pointer
btrfs: send: avoid trashing the page cache
btrfs: send: keep the current inode open while processing it
btrfs: allocate the btrfs_dio_private as part of the iomap dio bio
btrfs: move struct btrfs_dio_private to inode.c
btrfs: remove the disk_bytenr in struct btrfs_dio_private
btrfs: allocate dio_data on stack
iomap: add per-iomap_iter private data
iomap: allow the file system to provide a bio_set for direct I/O
btrfs: add a btrfs_dio_rw wrapper
btrfs: zoned: zone finish unused block group
btrfs: zoned: properly finish block group on metadata write
btrfs: zoned: finish block group when there are no more allocatable bytes left
btrfs: zoned: consolidate zone finish functions
btrfs: zoned: introduce btrfs_zoned_bg_is_full
btrfs: improve error reporting in lookup_inline_extent_backref
...
Diffstat (limited to 'fs/btrfs/ctree.c')
-rw-r--r-- | fs/btrfs/ctree.c | 102 |
1 files changed, 82 insertions, 20 deletions
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 0eecf98d0abb..6e556031a8f3 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -16,6 +16,7 @@ #include "volumes.h" #include "qgroup.h" #include "tree-mod-log.h" +#include "tree-checker.h" static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, int level); @@ -342,7 +343,7 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans, int level = btrfs_header_level(buf); ret = btrfs_set_disk_extent_flags(trans, buf, - new_flags, level, 0); + new_flags, level); if (ret) return ret; } @@ -1390,12 +1391,13 @@ static noinline void unlock_up(struct btrfs_path *path, int level, } /* - * helper function for btrfs_search_slot. The goal is to find a block - * in cache without setting the path to blocking. If we find the block - * we return zero and the path is unchanged. + * Helper function for btrfs_search_slot() and other functions that do a search + * on a btree. The goal is to find a tree block in the cache (the radix tree at + * fs_info->buffer_radix), but if we can't find it, or it's not up to date, read + * its pages from disk. * - * If we can't find the block, we set the path blocking and do some - * reada. -EAGAIN is returned and the search must be repeated. + * Returns -EAGAIN, with the path unlocked, if the caller needs to repeat the + * whole btree search, starting again from the current root node. */ static int read_block_for_search(struct btrfs_root *root, struct btrfs_path *p, @@ -1409,12 +1411,21 @@ read_block_for_search(struct btrfs_root *root, struct btrfs_path *p, struct btrfs_key first_key; int ret; int parent_level; + bool unlock_up; + unlock_up = ((level + 1 < BTRFS_MAX_LEVEL) && p->locks[level + 1]); blocknr = btrfs_node_blockptr(*eb_ret, slot); gen = btrfs_node_ptr_generation(*eb_ret, slot); parent_level = btrfs_header_level(*eb_ret); btrfs_node_key_to_cpu(*eb_ret, &first_key, slot); + /* + * If we need to read an extent buffer from disk and we are holding locks + * on upper level nodes, we unlock all the upper nodes before reading the + * extent buffer, and then return -EAGAIN to the caller as it needs to + * restart the search. We don't release the lock on the current level + * because we need to walk this node to figure out which blocks to read. + */ tmp = find_extent_buffer(fs_info, blocknr); if (tmp) { if (p->reada == READA_FORWARD_ALWAYS) @@ -1436,30 +1447,38 @@ read_block_for_search(struct btrfs_root *root, struct btrfs_path *p, return 0; } + if (unlock_up) + btrfs_unlock_up_safe(p, level + 1); + /* now we're allowed to do a blocking uptodate check */ - ret = btrfs_read_buffer(tmp, gen, parent_level - 1, &first_key); + ret = btrfs_read_extent_buffer(tmp, gen, parent_level - 1, &first_key); if (ret) { free_extent_buffer(tmp); btrfs_release_path(p); return -EIO; } - *eb_ret = tmp; - return 0; + if (btrfs_check_eb_owner(tmp, root->root_key.objectid)) { + free_extent_buffer(tmp); + btrfs_release_path(p); + return -EUCLEAN; + } + + if (unlock_up) + ret = -EAGAIN; + + goto out; } - /* - * reduce lock contention at high levels - * of the btree by dropping locks before - * we read. Don't release the lock on the current - * level because we need to walk this node to figure - * out which blocks to read. - */ - btrfs_unlock_up_safe(p, level + 1); + if (unlock_up) { + btrfs_unlock_up_safe(p, level + 1); + ret = -EAGAIN; + } else { + ret = 0; + } if (p->reada != READA_NONE) reada_for_search(fs_info, p, level, slot, key->objectid); - ret = -EAGAIN; tmp = read_tree_block(fs_info, blocknr, root->root_key.objectid, gen, parent_level - 1, &first_key); if (IS_ERR(tmp)) { @@ -1474,9 +1493,15 @@ read_block_for_search(struct btrfs_root *root, struct btrfs_path *p, */ if (!extent_buffer_uptodate(tmp)) ret = -EIO; - free_extent_buffer(tmp); - btrfs_release_path(p); +out: + if (ret == 0) { + *eb_ret = tmp; + } else { + free_extent_buffer(tmp); + btrfs_release_path(p); + } + return ret; } @@ -2279,6 +2304,43 @@ int btrfs_search_backwards(struct btrfs_root *root, struct btrfs_key *key, return ret; } +/** + * Search for a valid slot for the given path. + * + * @root: The root node of the tree. + * @key: Will contain a valid item if found. + * @path: The starting point to validate the slot. + * + * Return: 0 if the item is valid + * 1 if not found + * <0 if error. + */ +int btrfs_get_next_valid_item(struct btrfs_root *root, struct btrfs_key *key, + struct btrfs_path *path) +{ + while (1) { + int ret; + const int slot = path->slots[0]; + const struct extent_buffer *leaf = path->nodes[0]; + + /* This is where we start walking the path. */ + if (slot >= btrfs_header_nritems(leaf)) { + /* + * If we've reached the last slot in this leaf we need + * to go to the next leaf and reset the path. + */ + ret = btrfs_next_leaf(root, path); + if (ret) + return ret; + continue; + } + /* Store the found, valid item in @key. */ + btrfs_item_key_to_cpu(leaf, key, slot); + break; + } + return 0; +} + /* * adjust the pointers going up the tree, starting at level * making sure the right key of each node is points to 'key'. |