From ee7dc86b6d3e3b86c2c487f713eda657850de238 Mon Sep 17 00:00:00 2001 From: Gabriel Krisman Bertazi Date: Tue, 15 Nov 2022 17:45:52 -0500 Subject: wait: Return number of exclusive waiters awaken Sbitmap code will need to know how many waiters were actually woken for its batched wakeups implementation. Return the number of woken exclusive waiters from __wake_up() to facilitate that. Suggested-by: Jan Kara Signed-off-by: Gabriel Krisman Bertazi Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/20221115224553.23594-3-krisman@suse.de Signed-off-by: Jens Axboe --- kernel/sched/wait.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) (limited to 'kernel') diff --git a/kernel/sched/wait.c b/kernel/sched/wait.c index 9860bb9a847c..133b74730738 100644 --- a/kernel/sched/wait.c +++ b/kernel/sched/wait.c @@ -121,11 +121,12 @@ static int __wake_up_common(struct wait_queue_head *wq_head, unsigned int mode, return nr_exclusive; } -static void __wake_up_common_lock(struct wait_queue_head *wq_head, unsigned int mode, +static int __wake_up_common_lock(struct wait_queue_head *wq_head, unsigned int mode, int nr_exclusive, int wake_flags, void *key) { unsigned long flags; wait_queue_entry_t bookmark; + int remaining = nr_exclusive; bookmark.flags = 0; bookmark.private = NULL; @@ -134,10 +135,12 @@ static void __wake_up_common_lock(struct wait_queue_head *wq_head, unsigned int do { spin_lock_irqsave(&wq_head->lock, flags); - nr_exclusive = __wake_up_common(wq_head, mode, nr_exclusive, + remaining = __wake_up_common(wq_head, mode, remaining, wake_flags, key, &bookmark); spin_unlock_irqrestore(&wq_head->lock, flags); } while (bookmark.flags & WQ_FLAG_BOOKMARK); + + return nr_exclusive - remaining; } /** @@ -147,13 +150,14 @@ static void __wake_up_common_lock(struct wait_queue_head *wq_head, unsigned int * @nr_exclusive: how many wake-one or wake-many threads to wake up * @key: is directly passed to the wakeup function * - * If this function wakes up a task, it executes a full memory barrier before - * accessing the task state. + * If this function wakes up a task, it executes a full memory barrier + * before accessing the task state. Returns the number of exclusive + * tasks that were awaken. */ -void __wake_up(struct wait_queue_head *wq_head, unsigned int mode, - int nr_exclusive, void *key) +int __wake_up(struct wait_queue_head *wq_head, unsigned int mode, + int nr_exclusive, void *key) { - __wake_up_common_lock(wq_head, mode, nr_exclusive, 0, key); + return __wake_up_common_lock(wq_head, mode, nr_exclusive, 0, key); } EXPORT_SYMBOL(__wake_up); -- cgit v1.2.3-70-g09d2 From dae590a6c96c799434e0ff8156ef29b88c257e60 Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Fri, 4 Nov 2022 20:59:02 -0400 Subject: blk-cgroup: Flush stats at blkgs destruction path As noted by Michal, the blkg_iostat_set's in the lockless list hold reference to blkg's to protect against their removal. Those blkg's hold reference to blkcg. When a cgroup is being destroyed, cgroup_rstat_flush() is only called at css_release_work_fn() which is called when the blkcg reference count reaches 0. This circular dependency will prevent blkcg from being freed until some other events cause cgroup_rstat_flush() to be called to flush out the pending blkcg stats. To prevent this delayed blkcg removal, add a new cgroup_rstat_css_flush() function to flush stats for a given css and cpu and call it at the blkgs destruction path, blkcg_destroy_blkgs(), whenever there are still some pending stats to be flushed. This will ensure that blkcg reference count can reach 0 ASAP. Signed-off-by: Waiman Long Acked-by: Tejun Heo Link: https://lore.kernel.org/r/20221105005902.407297-4-longman@redhat.com Signed-off-by: Jens Axboe --- block/blk-cgroup.c | 15 ++++++++++++++- include/linux/cgroup.h | 1 + kernel/cgroup/rstat.c | 20 ++++++++++++++++++++ 3 files changed, 35 insertions(+), 1 deletion(-) (limited to 'kernel') diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index 3e03c0d13253..57941d2a8ba3 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -1084,10 +1084,12 @@ struct list_head *blkcg_get_cgwb_list(struct cgroup_subsys_state *css) */ static void blkcg_destroy_blkgs(struct blkcg *blkcg) { + int cpu; + might_sleep(); + css_get(&blkcg->css); spin_lock_irq(&blkcg->lock); - while (!hlist_empty(&blkcg->blkg_list)) { struct blkcg_gq *blkg = hlist_entry(blkcg->blkg_list.first, struct blkcg_gq, blkcg_node); @@ -1110,6 +1112,17 @@ static void blkcg_destroy_blkgs(struct blkcg *blkcg) } spin_unlock_irq(&blkcg->lock); + + /* + * Flush all the non-empty percpu lockless lists. + */ + for_each_possible_cpu(cpu) { + struct llist_head *lhead = per_cpu_ptr(blkcg->lhead, cpu); + + if (!llist_empty(lhead)) + cgroup_rstat_css_cpu_flush(&blkcg->css, cpu); + } + css_put(&blkcg->css); } /** diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 528bd44b59e2..6c4e66b3fa84 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -766,6 +766,7 @@ void cgroup_rstat_flush(struct cgroup *cgrp); void cgroup_rstat_flush_irqsafe(struct cgroup *cgrp); void cgroup_rstat_flush_hold(struct cgroup *cgrp); void cgroup_rstat_flush_release(void); +void cgroup_rstat_css_cpu_flush(struct cgroup_subsys_state *css, int cpu); /* * Basic resource stats. diff --git a/kernel/cgroup/rstat.c b/kernel/cgroup/rstat.c index 793ecff29038..910e633869b0 100644 --- a/kernel/cgroup/rstat.c +++ b/kernel/cgroup/rstat.c @@ -281,6 +281,26 @@ void cgroup_rstat_flush_release(void) spin_unlock_irq(&cgroup_rstat_lock); } +/** + * cgroup_rstat_css_cpu_flush - flush stats for the given css and cpu + * @css: target css to be flush + * @cpu: the cpu that holds the stats to be flush + * + * A lightweight rstat flush operation for a given css and cpu. + * Only the cpu_lock is being held for mutual exclusion, the cgroup_rstat_lock + * isn't used. + */ +void cgroup_rstat_css_cpu_flush(struct cgroup_subsys_state *css, int cpu) +{ + raw_spinlock_t *cpu_lock = per_cpu_ptr(&cgroup_rstat_cpu_lock, cpu); + + raw_spin_lock_irq(cpu_lock); + rcu_read_lock(); + css->ss->css_rstat_flush(css, cpu); + rcu_read_unlock(); + raw_spin_unlock_irq(cpu_lock); +} + int cgroup_rstat_init(struct cgroup *cgrp) { int cpu; -- cgit v1.2.3-70-g09d2 From c62256dda37133a48d56cecc15e4a4d527d4cc46 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 30 Nov 2022 08:25:46 -0700 Subject: Revert "blk-cgroup: Flush stats at blkgs destruction path" This reverts commit dae590a6c96c799434e0ff8156ef29b88c257e60. We've had a few reports on this causing a crash at boot time, because of a reference issue. While this problem seemginly did exist before the patch and needs solving separately, this patch makes it a lot easier to trigger. Link: https://lore.kernel.org/linux-block/CA+QYu4oxiRKC6hJ7F27whXy-PRBx=Tvb+-7TQTONN8qTtV3aDA@mail.gmail.com/ Link: https://lore.kernel.org/linux-block/69af7ccb-6901-c84c-0e95-5682ccfb750c@acm.org/ Signed-off-by: Jens Axboe --- block/blk-cgroup.c | 15 +-------------- include/linux/cgroup.h | 1 - kernel/cgroup/rstat.c | 20 -------------------- 3 files changed, 1 insertion(+), 35 deletions(-) (limited to 'kernel') diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index 57941d2a8ba3..3e03c0d13253 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -1084,12 +1084,10 @@ struct list_head *blkcg_get_cgwb_list(struct cgroup_subsys_state *css) */ static void blkcg_destroy_blkgs(struct blkcg *blkcg) { - int cpu; - might_sleep(); - css_get(&blkcg->css); spin_lock_irq(&blkcg->lock); + while (!hlist_empty(&blkcg->blkg_list)) { struct blkcg_gq *blkg = hlist_entry(blkcg->blkg_list.first, struct blkcg_gq, blkcg_node); @@ -1112,17 +1110,6 @@ static void blkcg_destroy_blkgs(struct blkcg *blkcg) } spin_unlock_irq(&blkcg->lock); - - /* - * Flush all the non-empty percpu lockless lists. - */ - for_each_possible_cpu(cpu) { - struct llist_head *lhead = per_cpu_ptr(blkcg->lhead, cpu); - - if (!llist_empty(lhead)) - cgroup_rstat_css_cpu_flush(&blkcg->css, cpu); - } - css_put(&blkcg->css); } /** diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 6c4e66b3fa84..528bd44b59e2 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -766,7 +766,6 @@ void cgroup_rstat_flush(struct cgroup *cgrp); void cgroup_rstat_flush_irqsafe(struct cgroup *cgrp); void cgroup_rstat_flush_hold(struct cgroup *cgrp); void cgroup_rstat_flush_release(void); -void cgroup_rstat_css_cpu_flush(struct cgroup_subsys_state *css, int cpu); /* * Basic resource stats. diff --git a/kernel/cgroup/rstat.c b/kernel/cgroup/rstat.c index 910e633869b0..793ecff29038 100644 --- a/kernel/cgroup/rstat.c +++ b/kernel/cgroup/rstat.c @@ -281,26 +281,6 @@ void cgroup_rstat_flush_release(void) spin_unlock_irq(&cgroup_rstat_lock); } -/** - * cgroup_rstat_css_cpu_flush - flush stats for the given css and cpu - * @css: target css to be flush - * @cpu: the cpu that holds the stats to be flush - * - * A lightweight rstat flush operation for a given css and cpu. - * Only the cpu_lock is being held for mutual exclusion, the cgroup_rstat_lock - * isn't used. - */ -void cgroup_rstat_css_cpu_flush(struct cgroup_subsys_state *css, int cpu) -{ - raw_spinlock_t *cpu_lock = per_cpu_ptr(&cgroup_rstat_cpu_lock, cpu); - - raw_spin_lock_irq(cpu_lock); - rcu_read_lock(); - css->ss->css_rstat_flush(css, cpu); - rcu_read_unlock(); - raw_spin_unlock_irq(cpu_lock); -} - int cgroup_rstat_init(struct cgroup *cgrp) { int cpu; -- cgit v1.2.3-70-g09d2 From 2e833c8c8c42a3b6e22d6b3a9d2d18e425551261 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Wed, 30 Nov 2022 23:03:31 -0800 Subject: block: bdev & blktrace: use consistent function doc. notation Use only one hyphen in kernel-doc notation between the function name and its short description. The is the documented kerenl-doc format. It also fixes the HTML presentation to be consistent with other functions. Signed-off-by: Randy Dunlap Cc: Jens Axboe Cc: linux-block@vger.kernel.org Link: https://lore.kernel.org/r/20221201070331.25685-1-rdunlap@infradead.org Signed-off-by: Jens Axboe --- block/bdev.c | 4 ++-- kernel/trace/blktrace.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'kernel') diff --git a/block/bdev.c b/block/bdev.c index d699ecdb3260..edc110d90df4 100644 --- a/block/bdev.c +++ b/block/bdev.c @@ -224,7 +224,7 @@ int fsync_bdev(struct block_device *bdev) EXPORT_SYMBOL(fsync_bdev); /** - * freeze_bdev -- lock a filesystem and force it into a consistent state + * freeze_bdev - lock a filesystem and force it into a consistent state * @bdev: blockdevice to lock * * If a superblock is found on this device, we take the s_umount semaphore @@ -268,7 +268,7 @@ done: EXPORT_SYMBOL(freeze_bdev); /** - * thaw_bdev -- unlock filesystem + * thaw_bdev - unlock filesystem * @bdev: blockdevice to unlock * * Unlocks the filesystem and marks it writeable again after freeze_bdev(). diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index a995ea1ef849..94fde027bfc0 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -721,7 +721,7 @@ EXPORT_SYMBOL_GPL(blk_trace_startstop); */ /** - * blk_trace_ioctl: - handle the ioctls associated with tracing + * blk_trace_ioctl - handle the ioctls associated with tracing * @bdev: the block device * @cmd: the ioctl cmd * @arg: the argument data, if any @@ -769,7 +769,7 @@ int blk_trace_ioctl(struct block_device *bdev, unsigned cmd, char __user *arg) } /** - * blk_trace_shutdown: - stop and cleanup trace structures + * blk_trace_shutdown - stop and cleanup trace structures * @q: the request queue associated with the device * **/ -- cgit v1.2.3-70-g09d2 From f596da3efaf4130ff61cd029558845808df9bf99 Mon Sep 17 00:00:00 2001 From: Yang Jihong Date: Tue, 22 Nov 2022 12:04:10 +0800 Subject: blktrace: Fix output non-blktrace event when blk_classic option enabled When the blk_classic option is enabled, non-blktrace events must be filtered out. Otherwise, events of other types are output in the blktrace classic format, which is unexpected. The problem can be triggered in the following ways: # echo 1 > /sys/kernel/debug/tracing/options/blk_classic # echo 1 > /sys/kernel/debug/tracing/events/enable # echo blk > /sys/kernel/debug/tracing/current_tracer # cat /sys/kernel/debug/tracing/trace_pipe Fixes: c71a89615411 ("blktrace: add ftrace plugin") Signed-off-by: Yang Jihong Link: https://lore.kernel.org/r/20221122040410.85113-1-yangjihong1@huawei.com Signed-off-by: Jens Axboe --- kernel/trace/blktrace.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index 94fde027bfc0..918a7d12df8f 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -1548,7 +1548,8 @@ blk_trace_event_print_binary(struct trace_iterator *iter, int flags, static enum print_line_t blk_tracer_print_line(struct trace_iterator *iter) { - if (!(blk_tracer_flags.val & TRACE_BLK_OPT_CLASSIC)) + if ((iter->ent->type != TRACE_BLK) || + !(blk_tracer_flags.val & TRACE_BLK_OPT_CLASSIC)) return TRACE_TYPE_UNHANDLED; return print_one_line(iter, true); -- cgit v1.2.3-70-g09d2