summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2024-09-29 08:51:30 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2024-09-29 08:51:30 -0700
commitec03de73b1e11e5b0c30d3c35e09dffe7496bb06 (patch)
tree948d5d9052f2c3f21fc48820bdd77f0ef5aed7aa
parent68e4b0e024e3ee4d4bc8bf212f4db63e0e3ad094 (diff)
parentae39e0bd150bd2739582f72ad70b5a0b15e74576 (diff)
Merge tag 'locking-urgent-2024-09-29' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull locking updates from Ingo Molnar: "lockdep: - Fix potential deadlock between lockdep and RCU (Zhiguo Niu) - Use str_plural() to address Coccinelle warning (Thorsten Blum) - Add debuggability enhancement (Luis Claudio R. Goncalves) static keys & calls: - Fix static_key_slow_dec() yet again (Peter Zijlstra) - Handle module init failure correctly in static_call_del_module() (Thomas Gleixner) - Replace pointless WARN_ON() in static_call_module_notify() (Thomas Gleixner) <linux/cleanup.h>: - Add usage and style documentation (Dan Williams) rwsems: - Move is_rwsem_reader_owned() and rwsem_owner() under CONFIG_DEBUG_RWSEMS (Waiman Long) atomic ops, x86: - Redeclare x86_32 arch_atomic64_{add,sub}() as void (Uros Bizjak) - Introduce the read64_nonatomic macro to x86_32 with cx8 (Uros Bizjak)" Signed-off-by: Ingo Molnar <mingo@kernel.org> * tag 'locking-urgent-2024-09-29' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: locking/rwsem: Move is_rwsem_reader_owned() and rwsem_owner() under CONFIG_DEBUG_RWSEMS jump_label: Fix static_key_slow_dec() yet again static_call: Replace pointless WARN_ON() in static_call_module_notify() static_call: Handle module init failure correctly in static_call_del_module() locking/lockdep: Simplify character output in seq_line() lockdep: fix deadlock issue between lockdep and rcu lockdep: Use str_plural() to fix Coccinelle warning cleanup: Add usage and style documentation lockdep: suggest the fix for "lockdep bfs error:-1" on print_bfs_bug locking/atomic/x86: Redeclare x86_32 arch_atomic64_{add,sub}() as void locking/atomic/x86: Introduce the read64_nonatomic macro to x86_32 with cx8
-rw-r--r--Documentation/core-api/cleanup.rst8
-rw-r--r--Documentation/core-api/index.rst1
-rw-r--r--arch/x86/include/asm/atomic64_32.h6
-rw-r--r--arch/x86/lib/atomic64_cx8_32.S9
-rw-r--r--include/linux/cleanup.h136
-rw-r--r--kernel/jump_label.c34
-rw-r--r--kernel/locking/lockdep.c53
-rw-r--r--kernel/locking/lockdep_proc.c2
-rw-r--r--kernel/locking/rwsem.c22
-rw-r--r--kernel/static_call_inline.c13
10 files changed, 240 insertions, 44 deletions
diff --git a/Documentation/core-api/cleanup.rst b/Documentation/core-api/cleanup.rst
new file mode 100644
index 000000000000..527eb2f8ec6e
--- /dev/null
+++ b/Documentation/core-api/cleanup.rst
@@ -0,0 +1,8 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+===========================
+Scope-based Cleanup Helpers
+===========================
+
+.. kernel-doc:: include/linux/cleanup.h
+ :doc: scope-based cleanup helpers
diff --git a/Documentation/core-api/index.rst b/Documentation/core-api/index.rst
index e18a2ffe0787..a331d2c814f5 100644
--- a/Documentation/core-api/index.rst
+++ b/Documentation/core-api/index.rst
@@ -35,6 +35,7 @@ Library functionality that is used throughout the kernel.
kobject
kref
+ cleanup
assoc_array
xarray
maple_tree
diff --git a/arch/x86/include/asm/atomic64_32.h b/arch/x86/include/asm/atomic64_32.h
index 8db2ec4d6cda..1f650b4dde50 100644
--- a/arch/x86/include/asm/atomic64_32.h
+++ b/arch/x86/include/asm/atomic64_32.h
@@ -163,20 +163,18 @@ static __always_inline s64 arch_atomic64_dec_return(atomic64_t *v)
}
#define arch_atomic64_dec_return arch_atomic64_dec_return
-static __always_inline s64 arch_atomic64_add(s64 i, atomic64_t *v)
+static __always_inline void arch_atomic64_add(s64 i, atomic64_t *v)
{
__alternative_atomic64(add, add_return,
ASM_OUTPUT2("+A" (i), "+c" (v)),
ASM_NO_INPUT_CLOBBER("memory"));
- return i;
}
-static __always_inline s64 arch_atomic64_sub(s64 i, atomic64_t *v)
+static __always_inline void arch_atomic64_sub(s64 i, atomic64_t *v)
{
__alternative_atomic64(sub, sub_return,
ASM_OUTPUT2("+A" (i), "+c" (v)),
ASM_NO_INPUT_CLOBBER("memory"));
- return i;
}
static __always_inline void arch_atomic64_inc(atomic64_t *v)
diff --git a/arch/x86/lib/atomic64_cx8_32.S b/arch/x86/lib/atomic64_cx8_32.S
index 90afb488b396..b2eff07d65e4 100644
--- a/arch/x86/lib/atomic64_cx8_32.S
+++ b/arch/x86/lib/atomic64_cx8_32.S
@@ -16,6 +16,11 @@
cmpxchg8b (\reg)
.endm
+.macro read64_nonatomic reg
+ movl (\reg), %eax
+ movl 4(\reg), %edx
+.endm
+
SYM_FUNC_START(atomic64_read_cx8)
read64 %ecx
RET
@@ -51,7 +56,7 @@ SYM_FUNC_START(atomic64_\func\()_return_cx8)
movl %edx, %edi
movl %ecx, %ebp
- read64 %ecx
+ read64_nonatomic %ecx
1:
movl %eax, %ebx
movl %edx, %ecx
@@ -79,7 +84,7 @@ addsub_return sub sub sbb
SYM_FUNC_START(atomic64_\func\()_return_cx8)
pushl %ebx
- read64 %esi
+ read64_nonatomic %esi
1:
movl %eax, %ebx
movl %edx, %ecx
diff --git a/include/linux/cleanup.h b/include/linux/cleanup.h
index a3d3e888cf1f..038b2d523bf8 100644
--- a/include/linux/cleanup.h
+++ b/include/linux/cleanup.h
@@ -4,6 +4,142 @@
#include <linux/compiler.h>
+/**
+ * DOC: scope-based cleanup helpers
+ *
+ * The "goto error" pattern is notorious for introducing subtle resource
+ * leaks. It is tedious and error prone to add new resource acquisition
+ * constraints into code paths that already have several unwind
+ * conditions. The "cleanup" helpers enable the compiler to help with
+ * this tedium and can aid in maintaining LIFO (last in first out)
+ * unwind ordering to avoid unintentional leaks.
+ *
+ * As drivers make up the majority of the kernel code base, here is an
+ * example of using these helpers to clean up PCI drivers. The target of
+ * the cleanups are occasions where a goto is used to unwind a device
+ * reference (pci_dev_put()), or unlock the device (pci_dev_unlock())
+ * before returning.
+ *
+ * The DEFINE_FREE() macro can arrange for PCI device references to be
+ * dropped when the associated variable goes out of scope::
+ *
+ * DEFINE_FREE(pci_dev_put, struct pci_dev *, if (_T) pci_dev_put(_T))
+ * ...
+ * struct pci_dev *dev __free(pci_dev_put) =
+ * pci_get_slot(parent, PCI_DEVFN(0, 0));
+ *
+ * The above will automatically call pci_dev_put() if @dev is non-NULL
+ * when @dev goes out of scope (automatic variable scope). If a function
+ * wants to invoke pci_dev_put() on error, but return @dev (i.e. without
+ * freeing it) on success, it can do::
+ *
+ * return no_free_ptr(dev);
+ *
+ * ...or::
+ *
+ * return_ptr(dev);
+ *
+ * The DEFINE_GUARD() macro can arrange for the PCI device lock to be
+ * dropped when the scope where guard() is invoked ends::
+ *
+ * DEFINE_GUARD(pci_dev, struct pci_dev *, pci_dev_lock(_T), pci_dev_unlock(_T))
+ * ...
+ * guard(pci_dev)(dev);
+ *
+ * The lifetime of the lock obtained by the guard() helper follows the
+ * scope of automatic variable declaration. Take the following example::
+ *
+ * func(...)
+ * {
+ * if (...) {
+ * ...
+ * guard(pci_dev)(dev); // pci_dev_lock() invoked here
+ * ...
+ * } // <- implied pci_dev_unlock() triggered here
+ * }
+ *
+ * Observe the lock is held for the remainder of the "if ()" block not
+ * the remainder of "func()".
+ *
+ * Now, when a function uses both __free() and guard(), or multiple
+ * instances of __free(), the LIFO order of variable definition order
+ * matters. GCC documentation says:
+ *
+ * "When multiple variables in the same scope have cleanup attributes,
+ * at exit from the scope their associated cleanup functions are run in
+ * reverse order of definition (last defined, first cleanup)."
+ *
+ * When the unwind order matters it requires that variables be defined
+ * mid-function scope rather than at the top of the file. Take the
+ * following example and notice the bug highlighted by "!!"::
+ *
+ * LIST_HEAD(list);
+ * DEFINE_MUTEX(lock);
+ *
+ * struct object {
+ * struct list_head node;
+ * };
+ *
+ * static struct object *alloc_add(void)
+ * {
+ * struct object *obj;
+ *
+ * lockdep_assert_held(&lock);
+ * obj = kzalloc(sizeof(*obj), GFP_KERNEL);
+ * if (obj) {
+ * LIST_HEAD_INIT(&obj->node);
+ * list_add(obj->node, &list):
+ * }
+ * return obj;
+ * }
+ *
+ * static void remove_free(struct object *obj)
+ * {
+ * lockdep_assert_held(&lock);
+ * list_del(&obj->node);
+ * kfree(obj);
+ * }
+ *
+ * DEFINE_FREE(remove_free, struct object *, if (_T) remove_free(_T))
+ * static int init(void)
+ * {
+ * struct object *obj __free(remove_free) = NULL;
+ * int err;
+ *
+ * guard(mutex)(&lock);
+ * obj = alloc_add();
+ *
+ * if (!obj)
+ * return -ENOMEM;
+ *
+ * err = other_init(obj);
+ * if (err)
+ * return err; // remove_free() called without the lock!!
+ *
+ * no_free_ptr(obj);
+ * return 0;
+ * }
+ *
+ * That bug is fixed by changing init() to call guard() and define +
+ * initialize @obj in this order::
+ *
+ * guard(mutex)(&lock);
+ * struct object *obj __free(remove_free) = alloc_add();
+ *
+ * Given that the "__free(...) = NULL" pattern for variables defined at
+ * the top of the function poses this potential interdependency problem
+ * the recommendation is to always define and assign variables in one
+ * statement and not group variable definitions at the top of the
+ * function when __free() is used.
+ *
+ * Lastly, given that the benefit of cleanup helpers is removal of
+ * "goto", and that the "goto" statement can jump between scopes, the
+ * expectation is that usage of "goto" and cleanup helpers is never
+ * mixed in the same function. I.e. for a given routine, convert all
+ * resources that need a "goto" cleanup to scope-based cleanup, or
+ * convert none of them.
+ */
+
/*
* DEFINE_FREE(name, type, free):
* simple helper macro that defines the required wrapper for a __free()
diff --git a/kernel/jump_label.c b/kernel/jump_label.c
index 6dc76b590703..93a822d3c468 100644
--- a/kernel/jump_label.c
+++ b/kernel/jump_label.c
@@ -168,7 +168,7 @@ bool static_key_slow_inc_cpuslocked(struct static_key *key)
jump_label_update(key);
/*
* Ensure that when static_key_fast_inc_not_disabled() or
- * static_key_slow_try_dec() observe the positive value,
+ * static_key_dec_not_one() observe the positive value,
* they must also observe all the text changes.
*/
atomic_set_release(&key->enabled, 1);
@@ -250,7 +250,7 @@ void static_key_disable(struct static_key *key)
}
EXPORT_SYMBOL_GPL(static_key_disable);
-static bool static_key_slow_try_dec(struct static_key *key)
+static bool static_key_dec_not_one(struct static_key *key)
{
int v;
@@ -274,6 +274,14 @@ static bool static_key_slow_try_dec(struct static_key *key)
* enabled. This suggests an ordering problem on the user side.
*/
WARN_ON_ONCE(v < 0);
+
+ /*
+ * Warn about underflow, and lie about success in an attempt to
+ * not make things worse.
+ */
+ if (WARN_ON_ONCE(v == 0))
+ return true;
+
if (v <= 1)
return false;
} while (!likely(atomic_try_cmpxchg(&key->enabled, &v, v - 1)));
@@ -284,15 +292,27 @@ static bool static_key_slow_try_dec(struct static_key *key)
static void __static_key_slow_dec_cpuslocked(struct static_key *key)
{
lockdep_assert_cpus_held();
+ int val;
- if (static_key_slow_try_dec(key))
+ if (static_key_dec_not_one(key))
return;
guard(mutex)(&jump_label_mutex);
- if (atomic_cmpxchg(&key->enabled, 1, 0) == 1)
+ val = atomic_read(&key->enabled);
+ /*
+ * It should be impossible to observe -1 with jump_label_mutex held,
+ * see static_key_slow_inc_cpuslocked().
+ */
+ if (WARN_ON_ONCE(val == -1))
+ return;
+ /*
+ * Cannot already be 0, something went sideways.
+ */
+ if (WARN_ON_ONCE(val == 0))
+ return;
+
+ if (atomic_dec_and_test(&key->enabled))
jump_label_update(key);
- else
- WARN_ON_ONCE(!static_key_slow_try_dec(key));
}
static void __static_key_slow_dec(struct static_key *key)
@@ -329,7 +349,7 @@ void __static_key_slow_dec_deferred(struct static_key *key,
{
STATIC_KEY_CHECK_USE(key);
- if (static_key_slow_try_dec(key))
+ if (static_key_dec_not_one(key))
return;
schedule_delayed_work(work, timeout);
diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index 7963deac33c3..536bd471557f 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -788,7 +788,7 @@ static void lockdep_print_held_locks(struct task_struct *p)
printk("no locks held by %s/%d.\n", p->comm, task_pid_nr(p));
else
printk("%d lock%s held by %s/%d:\n", depth,
- depth > 1 ? "s" : "", p->comm, task_pid_nr(p));
+ str_plural(depth), p->comm, task_pid_nr(p));
/*
* It's not reliable to print a task's held locks if it's not sleeping
* and it's not the current task.
@@ -2084,6 +2084,9 @@ static noinline void print_bfs_bug(int ret)
/*
* Breadth-first-search failed, graph got corrupted?
*/
+ if (ret == BFS_EQUEUEFULL)
+ pr_warn("Increase LOCKDEP_CIRCULAR_QUEUE_BITS to avoid this warning:\n");
+
WARN(1, "lockdep bfs error:%d\n", ret);
}
@@ -6263,25 +6266,27 @@ static struct pending_free *get_pending_free(void)
static void free_zapped_rcu(struct rcu_head *cb);
/*
- * Schedule an RCU callback if no RCU callback is pending. Must be called with
- * the graph lock held.
- */
-static void call_rcu_zapped(struct pending_free *pf)
+* See if we need to queue an RCU callback, must called with
+* the lockdep lock held, returns false if either we don't have
+* any pending free or the callback is already scheduled.
+* Otherwise, a call_rcu() must follow this function call.
+*/
+static bool prepare_call_rcu_zapped(struct pending_free *pf)
{
WARN_ON_ONCE(inside_selftest());
if (list_empty(&pf->zapped))
- return;
+ return false;
if (delayed_free.scheduled)
- return;
+ return false;
delayed_free.scheduled = true;
WARN_ON_ONCE(delayed_free.pf + delayed_free.index != pf);
delayed_free.index ^= 1;
- call_rcu(&delayed_free.rcu_head, free_zapped_rcu);
+ return true;
}
/* The caller must hold the graph lock. May be called from RCU context. */
@@ -6307,6 +6312,7 @@ static void free_zapped_rcu(struct rcu_head *ch)
{
struct pending_free *pf;
unsigned long flags;
+ bool need_callback;
if (WARN_ON_ONCE(ch != &delayed_free.rcu_head))
return;
@@ -6318,14 +6324,18 @@ static void free_zapped_rcu(struct rcu_head *ch)
pf = delayed_free.pf + (delayed_free.index ^ 1);
__free_zapped_classes(pf);
delayed_free.scheduled = false;
+ need_callback =
+ prepare_call_rcu_zapped(delayed_free.pf + delayed_free.index);
+ lockdep_unlock();
+ raw_local_irq_restore(flags);
/*
- * If there's anything on the open list, close and start a new callback.
- */
- call_rcu_zapped(delayed_free.pf + delayed_free.index);
+ * If there's pending free and its callback has not been scheduled,
+ * queue an RCU callback.
+ */
+ if (need_callback)
+ call_rcu(&delayed_free.rcu_head, free_zapped_rcu);
- lockdep_unlock();
- raw_local_irq_restore(flags);
}
/*
@@ -6365,6 +6375,7 @@ static void lockdep_free_key_range_reg(void *start, unsigned long size)
{
struct pending_free *pf;
unsigned long flags;
+ bool need_callback;
init_data_structures_once();
@@ -6372,10 +6383,11 @@ static void lockdep_free_key_range_reg(void *start, unsigned long size)
lockdep_lock();
pf = get_pending_free();
__lockdep_free_key_range(pf, start, size);
- call_rcu_zapped(pf);
+ need_callback = prepare_call_rcu_zapped(pf);
lockdep_unlock();
raw_local_irq_restore(flags);
-
+ if (need_callback)
+ call_rcu(&delayed_free.rcu_head, free_zapped_rcu);
/*
* Wait for any possible iterators from look_up_lock_class() to pass
* before continuing to free the memory they refer to.
@@ -6469,6 +6481,7 @@ static void lockdep_reset_lock_reg(struct lockdep_map *lock)
struct pending_free *pf;
unsigned long flags;
int locked;
+ bool need_callback = false;
raw_local_irq_save(flags);
locked = graph_lock();
@@ -6477,11 +6490,13 @@ static void lockdep_reset_lock_reg(struct lockdep_map *lock)
pf = get_pending_free();
__lockdep_reset_lock(pf, lock);
- call_rcu_zapped(pf);
+ need_callback = prepare_call_rcu_zapped(pf);
graph_unlock();
out_irq:
raw_local_irq_restore(flags);
+ if (need_callback)
+ call_rcu(&delayed_free.rcu_head, free_zapped_rcu);
}
/*
@@ -6525,6 +6540,7 @@ void lockdep_unregister_key(struct lock_class_key *key)
struct pending_free *pf;
unsigned long flags;
bool found = false;
+ bool need_callback = false;
might_sleep();
@@ -6545,11 +6561,14 @@ void lockdep_unregister_key(struct lock_class_key *key)
if (found) {
pf = get_pending_free();
__lockdep_free_key_range(pf, key, 1);
- call_rcu_zapped(pf);
+ need_callback = prepare_call_rcu_zapped(pf);
}
lockdep_unlock();
raw_local_irq_restore(flags);
+ if (need_callback)
+ call_rcu(&delayed_free.rcu_head, free_zapped_rcu);
+
/* Wait until is_dynamic_key() has finished accessing k->hash_entry. */
synchronize_rcu();
}
diff --git a/kernel/locking/lockdep_proc.c b/kernel/locking/lockdep_proc.c
index e2bfb1db589d..6db0f43fc4df 100644
--- a/kernel/locking/lockdep_proc.c
+++ b/kernel/locking/lockdep_proc.c
@@ -424,7 +424,7 @@ static void seq_line(struct seq_file *m, char c, int offset, int length)
for (i = 0; i < offset; i++)
seq_puts(m, " ");
for (i = 0; i < length; i++)
- seq_printf(m, "%c", c);
+ seq_putc(m, c);
seq_puts(m, "\n");
}
diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c
index 5ded7dff46ef..2bbb6eca5144 100644
--- a/kernel/locking/rwsem.c
+++ b/kernel/locking/rwsem.c
@@ -181,12 +181,21 @@ static inline void rwsem_set_reader_owned(struct rw_semaphore *sem)
__rwsem_set_reader_owned(sem, current);
}
+#ifdef CONFIG_DEBUG_RWSEMS
+/*
+ * Return just the real task structure pointer of the owner
+ */
+static inline struct task_struct *rwsem_owner(struct rw_semaphore *sem)
+{
+ return (struct task_struct *)
+ (atomic_long_read(&sem->owner) & ~RWSEM_OWNER_FLAGS_MASK);
+}
+
/*
* Return true if the rwsem is owned by a reader.
*/
static inline bool is_rwsem_reader_owned(struct rw_semaphore *sem)
{
-#ifdef CONFIG_DEBUG_RWSEMS
/*
* Check the count to see if it is write-locked.
*/
@@ -194,11 +203,9 @@ static inline bool is_rwsem_reader_owned(struct rw_semaphore *sem)
if (count & RWSEM_WRITER_MASK)
return false;
-#endif
return rwsem_test_oflags(sem, RWSEM_READER_OWNED);
}
-#ifdef CONFIG_DEBUG_RWSEMS
/*
* With CONFIG_DEBUG_RWSEMS configured, it will make sure that if there
* is a task pointer in owner of a reader-owned rwsem, it will be the
@@ -266,15 +273,6 @@ static inline bool rwsem_write_trylock(struct rw_semaphore *sem)
}
/*
- * Return just the real task structure pointer of the owner
- */
-static inline struct task_struct *rwsem_owner(struct rw_semaphore *sem)
-{
- return (struct task_struct *)
- (atomic_long_read(&sem->owner) & ~RWSEM_OWNER_FLAGS_MASK);
-}
-
-/*
* Return the real task structure pointer of the owner and the embedded
* flags in the owner. pflags must be non-NULL.
*/
diff --git a/kernel/static_call_inline.c b/kernel/static_call_inline.c
index 639397b5491c..5259cda486d0 100644
--- a/kernel/static_call_inline.c
+++ b/kernel/static_call_inline.c
@@ -411,6 +411,17 @@ static void static_call_del_module(struct module *mod)
for (site = start; site < stop; site++) {
key = static_call_key(site);
+
+ /*
+ * If the key was not updated due to a memory allocation
+ * failure in __static_call_init() then treating key::sites
+ * as key::mods in the code below would cause random memory
+ * access and #GP. In that case all subsequent sites have
+ * not been touched either, so stop iterating.
+ */
+ if (!static_call_key_has_mods(key))
+ break;
+
if (key == prev_key)
continue;
@@ -442,7 +453,7 @@ static int static_call_module_notify(struct notifier_block *nb,
case MODULE_STATE_COMING:
ret = static_call_add_module(mod);
if (ret) {
- WARN(1, "Failed to allocate memory for static calls");
+ pr_warn("Failed to allocate memory for static calls\n");
static_call_del_module(mod);
}
break;