summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2024-09-23 10:05:41 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2024-09-23 10:05:41 -0700
commitb3f391fddf3cfaadda59ec8da8fd17f4520bbf42 (patch)
tree42a950e8661c3f4a3ec2754190323a56a94f71c6 /lib
parentf8ffbc365f703d74ecca8ca787318d05bbee2bf7 (diff)
parent025c55a4c7f11ea38521c6e797f3192ad8768c93 (diff)
Merge tag 'bcachefs-2024-09-21' of git://evilpiepirate.org/bcachefs
Pull bcachefs updates from Kent Overstreet: - rcu_pending, btree key cache rework: this solves lock contenting in the key cache, eliminating the biggest source of the srcu lock hold time warnings, and drastically improving performance on some metadata heavy workloads - on multithreaded creates we're now 3-4x faster than xfs. - We're now using an rhashtable instead of the system inode hash table; this is another significant performance improvement on multithreaded metadata workloads, eliminating more lock contention. - for_each_btree_key_in_subvolume_upto(): new helper for iterating over keys within a specific subvolume, eliminating a lot of open coded "subvolume_get_snapshot()" and also fixing another source of srcu lock time warnings, by running each loop iteration in its own transaction (as the existing for_each_btree_key() does). - More work on btree_trans locking asserts; we now assert that we don't hold btree node locks when trans->locked is false, which is important because we don't use lockdep for tracking individual btree node locks. - Some cleanups and improvements in the bset.c btree node lookup code, from Alan. - Rework of btree node pinning, which we use in backpointers fsck. The old hacky implementation, where the shrinker just skipped over nodes in the pinned range, was causing OOMs; instead we now use another shrinker with a much higher seeks number for pinned nodes. - Rebalance now uses BCH_WRITE_ONLY_SPECIFIED_DEVS; this fixes an issue where rebalance would sometimes fall back to allocating from the full filesystem, which is not what we want when it's trying to move data to a specific target. - Use __GFP_ACCOUNT, GFP_RECLAIMABLE for btree node, key cache allocations. - Idmap mounts are now supported (Hongbo Li) - Rename whiteouts are now supported (Hongbo Li) - Erasure coding can now handle devices being marked as failed, or forcibly removed. We still need the evacuate path for erasure coding, but it's getting very close to ready for people to start using. * tag 'bcachefs-2024-09-21' of git://evilpiepirate.org/bcachefs: (99 commits) bcachefs: return err ptr instead of null in read sb clean bcachefs: Remove duplicated include in backpointers.c bcachefs: Don't drop devices with stripe pointers bcachefs: bch2_ec_stripe_head_get() now checks for change in rw devices bcachefs: bch_fs.rw_devs_change_count bcachefs: bch2_dev_remove_stripes() bcachefs: bch2_trigger_ptr() calculates sectors even when no device bcachefs: improve error messages in bch2_ec_read_extent() bcachefs: improve error message on too few devices for ec bcachefs: improve bch2_new_stripe_to_text() bcachefs: ec_stripe_head.nr_created bcachefs: bch_stripe.disk_label bcachefs: stripe_to_mem() bcachefs: EIO errcode cleanup bcachefs: Rework btree node pinning bcachefs: split up btree cache counters for live, freeable bcachefs: btree cache counters should be size_t bcachefs: Don't count "skipped access bit" as touched in btree cache scan bcachefs: Failed devices no longer require mounting in degraded mode bcachefs: bch2_dev_rcu_noerror() ...
Diffstat (limited to 'lib')
-rw-r--r--lib/generic-radix-tree.c80
1 files changed, 6 insertions, 74 deletions
diff --git a/lib/generic-radix-tree.c b/lib/generic-radix-tree.c
index fa692c86f069..79e067b51488 100644
--- a/lib/generic-radix-tree.c
+++ b/lib/generic-radix-tree.c
@@ -5,99 +5,31 @@
#include <linux/gfp.h>
#include <linux/kmemleak.h>
-#define GENRADIX_ARY (GENRADIX_NODE_SIZE / sizeof(struct genradix_node *))
-#define GENRADIX_ARY_SHIFT ilog2(GENRADIX_ARY)
-
-struct genradix_node {
- union {
- /* Interior node: */
- struct genradix_node *children[GENRADIX_ARY];
-
- /* Leaf: */
- u8 data[GENRADIX_NODE_SIZE];
- };
-};
-
-static inline int genradix_depth_shift(unsigned depth)
-{
- return GENRADIX_NODE_SHIFT + GENRADIX_ARY_SHIFT * depth;
-}
-
-/*
- * Returns size (of data, in bytes) that a tree of a given depth holds:
- */
-static inline size_t genradix_depth_size(unsigned depth)
-{
- return 1UL << genradix_depth_shift(depth);
-}
-
-/* depth that's needed for a genradix that can address up to ULONG_MAX: */
-#define GENRADIX_MAX_DEPTH \
- DIV_ROUND_UP(BITS_PER_LONG - GENRADIX_NODE_SHIFT, GENRADIX_ARY_SHIFT)
-
-#define GENRADIX_DEPTH_MASK \
- ((unsigned long) (roundup_pow_of_two(GENRADIX_MAX_DEPTH + 1) - 1))
-
-static inline unsigned genradix_root_to_depth(struct genradix_root *r)
-{
- return (unsigned long) r & GENRADIX_DEPTH_MASK;
-}
-
-static inline struct genradix_node *genradix_root_to_node(struct genradix_root *r)
-{
- return (void *) ((unsigned long) r & ~GENRADIX_DEPTH_MASK);
-}
-
/*
* Returns pointer to the specified byte @offset within @radix, or NULL if not
* allocated
*/
void *__genradix_ptr(struct __genradix *radix, size_t offset)
{
- struct genradix_root *r = READ_ONCE(radix->root);
- struct genradix_node *n = genradix_root_to_node(r);
- unsigned level = genradix_root_to_depth(r);
-
- if (ilog2(offset) >= genradix_depth_shift(level))
- return NULL;
-
- while (1) {
- if (!n)
- return NULL;
- if (!level)
- break;
-
- level--;
-
- n = n->children[offset >> genradix_depth_shift(level)];
- offset &= genradix_depth_size(level) - 1;
- }
-
- return &n->data[offset];
+ return __genradix_ptr_inlined(radix, offset);
}
EXPORT_SYMBOL(__genradix_ptr);
-static inline struct genradix_node *genradix_alloc_node(gfp_t gfp_mask)
-{
- return kzalloc(GENRADIX_NODE_SIZE, gfp_mask);
-}
-
-static inline void genradix_free_node(struct genradix_node *node)
-{
- kfree(node);
-}
-
/*
* Returns pointer to the specified byte @offset within @radix, allocating it if
* necessary - newly allocated slots are always zeroed out:
*/
void *__genradix_ptr_alloc(struct __genradix *radix, size_t offset,
+ struct genradix_node **preallocated,
gfp_t gfp_mask)
{
struct genradix_root *v = READ_ONCE(radix->root);
struct genradix_node *n, *new_node = NULL;
unsigned level;
+ if (preallocated)
+ swap(new_node, *preallocated);
+
/* Increase tree depth if necessary: */
while (1) {
struct genradix_root *r = v, *new_root;
@@ -281,7 +213,7 @@ int __genradix_prealloc(struct __genradix *radix, size_t size,
size_t offset;
for (offset = 0; offset < size; offset += GENRADIX_NODE_SIZE)
- if (!__genradix_ptr_alloc(radix, offset, gfp_mask))
+ if (!__genradix_ptr_alloc(radix, offset, NULL, gfp_mask))
return -ENOMEM;
return 0;