From 9484ad6c17a735284d4ff75ed3140ded9e0065fd Mon Sep 17 00:00:00 2001 From: Fiona Ebner Date: Thu, 11 Jul 2024 14:09:14 +0200 Subject: copy-before-write: allow specifying minimum cluster size In the context of backup fleecing, discarding the source will not work when the fleecing image has a larger granularity than the one used for block-copy operations (can happen if the backup target has smaller cluster size), because cbw_co_pdiscard_snapshot() will align down the discard requests and thus effectively ignore then. To make @discard-source work in such a scenario, allow specifying the minimum cluster size used for block-copy operations and thus in particular also the granularity for discard requests to the source. The type 'size' (corresponding to uint64_t in C) is used in QAPI to rule out negative inputs and for consistency with already existing @cluster-size parameters. Since block_copy_calculate_cluster_size() uses int64_t for its result, a check that the input is not too large is added in block_copy_state_new() before calling it. The calculation in block_copy_calculate_cluster_size() is done in the target int64_t type. Suggested-by: Vladimir Sementsov-Ogievskiy Acked-by: Markus Armbruster (QAPI schema) Signed-off-by: Fiona Ebner Message-Id: <20240711120915.310243-2-f.ebner@proxmox.com> [vsementsov: switch version to 9.2 in QAPI doc] Reviewed-by: Vladimir Sementsov-Ogievskiy Signed-off-by: Vladimir Sementsov-Ogievskiy --- block/block-copy.c | 36 ++++++++++++++++++++++++++---------- block/copy-before-write.c | 5 ++++- include/block/block-copy.h | 1 + qapi/block-core.json | 8 +++++++- 4 files changed, 38 insertions(+), 12 deletions(-) diff --git a/block/block-copy.c b/block/block-copy.c index cc618e4561..93eb1b2664 100644 --- a/block/block-copy.c +++ b/block/block-copy.c @@ -310,6 +310,7 @@ void block_copy_set_copy_opts(BlockCopyState *s, bool use_copy_range, } static int64_t block_copy_calculate_cluster_size(BlockDriverState *target, + int64_t min_cluster_size, Error **errp) { int ret; @@ -319,6 +320,9 @@ static int64_t block_copy_calculate_cluster_size(BlockDriverState *target, GLOBAL_STATE_CODE(); GRAPH_RDLOCK_GUARD_MAINLOOP(); + min_cluster_size = MAX(min_cluster_size, + (int64_t)BLOCK_COPY_CLUSTER_SIZE_DEFAULT); + target_does_cow = bdrv_backing_chain_next(target); /* @@ -329,13 +333,13 @@ static int64_t block_copy_calculate_cluster_size(BlockDriverState *target, ret = bdrv_get_info(target, &bdi); if (ret == -ENOTSUP && !target_does_cow) { /* Cluster size is not defined */ - warn_report("The target block device doesn't provide " - "information about the block size and it doesn't have a " - "backing file. The default block size of %u bytes is " - "used. If the actual block size of the target exceeds " - "this default, the backup may be unusable", - BLOCK_COPY_CLUSTER_SIZE_DEFAULT); - return BLOCK_COPY_CLUSTER_SIZE_DEFAULT; + warn_report("The target block device doesn't provide information about " + "the block size and it doesn't have a backing file. The " + "(default) block size of %" PRIi64 " bytes is used. If the " + "actual block size of the target exceeds this value, the " + "backup may be unusable", + min_cluster_size); + return min_cluster_size; } else if (ret < 0 && !target_does_cow) { error_setg_errno(errp, -ret, "Couldn't determine the cluster size of the target image, " @@ -345,16 +349,17 @@ static int64_t block_copy_calculate_cluster_size(BlockDriverState *target, return ret; } else if (ret < 0 && target_does_cow) { /* Not fatal; just trudge on ahead. */ - return BLOCK_COPY_CLUSTER_SIZE_DEFAULT; + return min_cluster_size; } - return MAX(BLOCK_COPY_CLUSTER_SIZE_DEFAULT, bdi.cluster_size); + return MAX(min_cluster_size, bdi.cluster_size); } BlockCopyState *block_copy_state_new(BdrvChild *source, BdrvChild *target, BlockDriverState *copy_bitmap_bs, const BdrvDirtyBitmap *bitmap, bool discard_source, + uint64_t min_cluster_size, Error **errp) { ERRP_GUARD(); @@ -365,7 +370,18 @@ BlockCopyState *block_copy_state_new(BdrvChild *source, BdrvChild *target, GLOBAL_STATE_CODE(); - cluster_size = block_copy_calculate_cluster_size(target->bs, errp); + if (min_cluster_size > INT64_MAX) { + error_setg(errp, "min-cluster-size too large: %" PRIu64 " > %" PRIi64, + min_cluster_size, INT64_MAX); + return NULL; + } else if (min_cluster_size && !is_power_of_2(min_cluster_size)) { + error_setg(errp, "min-cluster-size needs to be a power of 2"); + return NULL; + } + + cluster_size = block_copy_calculate_cluster_size(target->bs, + (int64_t)min_cluster_size, + errp); if (cluster_size < 0) { return NULL; } diff --git a/block/copy-before-write.c b/block/copy-before-write.c index 853e01a1eb..a919b1f41b 100644 --- a/block/copy-before-write.c +++ b/block/copy-before-write.c @@ -417,6 +417,7 @@ static BlockdevOptions *cbw_parse_options(QDict *options, Error **errp) qdict_extract_subqdict(options, NULL, "bitmap"); qdict_del(options, "on-cbw-error"); qdict_del(options, "cbw-timeout"); + qdict_del(options, "min-cluster-size"); out: visit_free(v); @@ -476,8 +477,10 @@ static int cbw_open(BlockDriverState *bs, QDict *options, int flags, bs->file->bs->supported_zero_flags); s->discard_source = flags & BDRV_O_CBW_DISCARD_SOURCE; + s->bcs = block_copy_state_new(bs->file, s->target, bs, bitmap, - flags & BDRV_O_CBW_DISCARD_SOURCE, errp); + flags & BDRV_O_CBW_DISCARD_SOURCE, + opts->min_cluster_size, errp); if (!s->bcs) { error_prepend(errp, "Cannot create block-copy-state: "); return -EINVAL; diff --git a/include/block/block-copy.h b/include/block/block-copy.h index bdc703bacd..dd5cc82f3b 100644 --- a/include/block/block-copy.h +++ b/include/block/block-copy.h @@ -28,6 +28,7 @@ BlockCopyState *block_copy_state_new(BdrvChild *source, BdrvChild *target, BlockDriverState *copy_bitmap_bs, const BdrvDirtyBitmap *bitmap, bool discard_source, + uint64_t min_cluster_size, Error **errp); /* Function should be called prior any actual copy request */ diff --git a/qapi/block-core.json b/qapi/block-core.json index 9f6dd59298..6751022428 100644 --- a/qapi/block-core.json +++ b/qapi/block-core.json @@ -4639,12 +4639,18 @@ # @on-cbw-error parameter will decide how this failure is handled. # Default 0. (Since 7.1) # +# @min-cluster-size: Minimum size of blocks used by copy-before-write +# operations. Has to be a power of 2. No effect if smaller than +# the maximum of the target's cluster size and 64 KiB. Default 0. +# (Since 9.2) +# # Since: 6.2 ## { 'struct': 'BlockdevOptionsCbw', 'base': 'BlockdevOptionsGenericFormat', 'data': { 'target': 'BlockdevRef', '*bitmap': 'BlockDirtyBitmap', - '*on-cbw-error': 'OnCbwError', '*cbw-timeout': 'uint32' } } + '*on-cbw-error': 'OnCbwError', '*cbw-timeout': 'uint32', + '*min-cluster-size': 'size' } } ## # @BlockdevOptions: -- cgit v1.2.3-70-g09d2 From 6252deb244b2d60d03545c17eec548a43eb3aefa Mon Sep 17 00:00:00 2001 From: Fiona Ebner Date: Thu, 11 Jul 2024 14:09:15 +0200 Subject: backup: add minimum cluster size to performance options In the context of backup fleecing, discarding the source will not work when the fleecing image has a larger granularity than the one used for block-copy operations (can happen if the backup target has smaller cluster size), because cbw_co_pdiscard_snapshot() will align down the discard requests and thus effectively ignore then. To make @discard-source work in such a scenario, allow specifying the minimum cluster size used for block-copy operations and thus in particular also the granularity for discard requests to the source. Suggested-by: Vladimir Sementsov-Ogievskiy Acked-by: Markus Armbruster (QAPI schema) Signed-off-by: Fiona Ebner Message-Id: <20240711120915.310243-3-f.ebner@proxmox.com> [vsementsov: switch version to 9.2 in QAPI doc] Reviewed-by: Vladimir Sementsov-Ogievskiy Signed-off-by: Vladimir Sementsov-Ogievskiy --- block/backup.c | 2 +- block/copy-before-write.c | 9 +++++++++ block/copy-before-write.h | 1 + blockdev.c | 3 +++ qapi/block-core.json | 9 +++++++-- 5 files changed, 21 insertions(+), 3 deletions(-) diff --git a/block/backup.c b/block/backup.c index 3dd2e229d2..a1292c01ec 100644 --- a/block/backup.c +++ b/block/backup.c @@ -458,7 +458,7 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs, } cbw = bdrv_cbw_append(bs, target, filter_node_name, discard_source, - &bcs, errp); + perf->min_cluster_size, &bcs, errp); if (!cbw) { goto error; } diff --git a/block/copy-before-write.c b/block/copy-before-write.c index a919b1f41b..e835987e52 100644 --- a/block/copy-before-write.c +++ b/block/copy-before-write.c @@ -548,6 +548,7 @@ BlockDriverState *bdrv_cbw_append(BlockDriverState *source, BlockDriverState *target, const char *filter_node_name, bool discard_source, + uint64_t min_cluster_size, BlockCopyState **bcs, Error **errp) { @@ -567,6 +568,14 @@ BlockDriverState *bdrv_cbw_append(BlockDriverState *source, qdict_put_str(opts, "file", bdrv_get_node_name(source)); qdict_put_str(opts, "target", bdrv_get_node_name(target)); + if (min_cluster_size > INT64_MAX) { + error_setg(errp, "min-cluster-size too large: %" PRIu64 " > %" PRIi64, + min_cluster_size, INT64_MAX); + qobject_unref(opts); + return NULL; + } + qdict_put_int(opts, "min-cluster-size", (int64_t)min_cluster_size); + top = bdrv_insert_node(source, opts, flags, errp); if (!top) { return NULL; diff --git a/block/copy-before-write.h b/block/copy-before-write.h index 01af0cd3c4..2a5d4ba693 100644 --- a/block/copy-before-write.h +++ b/block/copy-before-write.h @@ -40,6 +40,7 @@ BlockDriverState *bdrv_cbw_append(BlockDriverState *source, BlockDriverState *target, const char *filter_node_name, bool discard_source, + uint64_t min_cluster_size, BlockCopyState **bcs, Error **errp); void bdrv_cbw_drop(BlockDriverState *bs); diff --git a/blockdev.c b/blockdev.c index 835064ed03..6740663fda 100644 --- a/blockdev.c +++ b/blockdev.c @@ -2655,6 +2655,9 @@ static BlockJob *do_backup_common(BackupCommon *backup, if (backup->x_perf->has_max_chunk) { perf.max_chunk = backup->x_perf->max_chunk; } + if (backup->x_perf->has_min_cluster_size) { + perf.min_cluster_size = backup->x_perf->min_cluster_size; + } } if ((backup->sync == MIRROR_SYNC_MODE_BITMAP) || diff --git a/qapi/block-core.json b/qapi/block-core.json index 6751022428..c3b0a2376b 100644 --- a/qapi/block-core.json +++ b/qapi/block-core.json @@ -1551,11 +1551,16 @@ # it should not be less than job cluster size which is calculated # as maximum of target image cluster size and 64k. Default 0. # +# @min-cluster-size: Minimum size of blocks used by copy-before-write +# and background copy operations. Has to be a power of 2. No +# effect if smaller than the maximum of the target's cluster size +# and 64 KiB. Default 0. (Since 9.2) +# # Since: 6.0 ## { 'struct': 'BackupPerf', - 'data': { '*use-copy-range': 'bool', - '*max-workers': 'int', '*max-chunk': 'int64' } } + 'data': { '*use-copy-range': 'bool', '*max-workers': 'int', + '*max-chunk': 'int64', '*min-cluster-size': 'size' } } ## # @BackupCommon: -- cgit v1.2.3-70-g09d2 From 6475155d519209c80fdda53e05130365aa769838 Mon Sep 17 00:00:00 2001 From: Fiona Ebner Date: Fri, 12 Jul 2024 16:07:16 +0200 Subject: block/reqlist: allow adding overlapping requests Allow overlapping request by removing the assert that made it impossible. There are only two callers: 1. block_copy_task_create() It already asserts the very same condition before calling reqlist_init_req(). 2. cbw_snapshot_read_lock() There is no need to have read requests be non-overlapping in copy-before-write when used for snapshot-access. In fact, there was no protection against two callers of cbw_snapshot_read_lock() calling reqlist_init_req() with overlapping ranges and this could lead to an assertion failure [1]. In particular, with the reproducer script below [0], two cbw_co_snapshot_block_status() callers could race, with the second calling reqlist_init_req() before the first one finishes and removes its conflicting request. [0]: > #!/bin/bash -e > dd if=/dev/urandom of=/tmp/disk.raw bs=1M count=1024 > ./qemu-img create /tmp/fleecing.raw -f raw 1G > ( > ./qemu-system-x86_64 --qmp stdio \ > --blockdev raw,node-name=node0,file.driver=file,file.filename=/tmp/disk.raw \ > --blockdev raw,node-name=node1,file.driver=file,file.filename=/tmp/fleecing.raw \ > < {"execute": "qmp_capabilities"} > {"execute": "blockdev-add", "arguments": { "driver": "copy-before-write", "file": "node0", "target": "node1", "node-name": "node3" } } > {"execute": "blockdev-add", "arguments": { "driver": "snapshot-access", "file": "node3", "node-name": "snap0" } } > {"execute": "nbd-server-start", "arguments": {"addr": { "type": "unix", "data": { "path": "/tmp/nbd.socket" } } } } > {"execute": "block-export-add", "arguments": {"id": "exp0", "node-name": "snap0", "type": "nbd", "name": "exp0"}} > EOF > ) & > sleep 5 > while true; do > ./qemu-nbd -d /dev/nbd0 > ./qemu-nbd -c /dev/nbd0 nbd:unix:/tmp/nbd.socket:exportname=exp0 -f raw -r > nbdinfo --map 'nbd+unix:///exp0?socket=/tmp/nbd.socket' > done [1]: > #5 0x000071e5f0088eb2 in __GI___assert_fail (...) at ./assert/assert.c:101 > #6 0x0000615285438017 in reqlist_init_req (...) at ../block/reqlist.c:23 > #7 0x00006152853e2d98 in cbw_snapshot_read_lock (...) at ../block/copy-before-write.c:237 > #8 0x00006152853e3068 in cbw_co_snapshot_block_status (...) at ../block/copy-before-write.c:304 > #9 0x00006152853f4d22 in bdrv_co_snapshot_block_status (...) at ../block/io.c:3726 > #10 0x000061528543a63e in snapshot_access_co_block_status (...) at ../block/snapshot-access.c:48 > #11 0x00006152853f1a0a in bdrv_co_do_block_status (...) at ../block/io.c:2474 > #12 0x00006152853f2016 in bdrv_co_common_block_status_above (...) at ../block/io.c:2652 > #13 0x00006152853f22cf in bdrv_co_block_status_above (...) at ../block/io.c:2732 > #14 0x00006152853d9a86 in blk_co_block_status_above (...) at ../block/block-backend.c:1473 > #15 0x000061528538da6c in blockstatus_to_extents (...) at ../nbd/server.c:2374 > #16 0x000061528538deb1 in nbd_co_send_block_status (...) at ../nbd/server.c:2481 > #17 0x000061528538f424 in nbd_handle_request (...) at ../nbd/server.c:2978 > #18 0x000061528538f906 in nbd_trip (...) at ../nbd/server.c:3121 > #19 0x00006152855a7caf in coroutine_trampoline (...) at ../util/coroutine-ucontext.c:175 Cc: qemu-stable@nongnu.org Suggested-by: Vladimir Sementsov-Ogievskiy Signed-off-by: Fiona Ebner Message-Id: <20240712140716.517911-1-f.ebner@proxmox.com> Reviewed-by: Vladimir Sementsov-Ogievskiy Signed-off-by: Vladimir Sementsov-Ogievskiy --- block/copy-before-write.c | 3 ++- block/reqlist.c | 2 -- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/block/copy-before-write.c b/block/copy-before-write.c index e835987e52..81afeff1c7 100644 --- a/block/copy-before-write.c +++ b/block/copy-before-write.c @@ -66,7 +66,8 @@ typedef struct BDRVCopyBeforeWriteState { /* * @frozen_read_reqs: current read requests for fleecing user in bs->file - * node. These areas must not be rewritten by guest. + * node. These areas must not be rewritten by guest. There can be multiple + * overlapping read requests. */ BlockReqList frozen_read_reqs; diff --git a/block/reqlist.c b/block/reqlist.c index 08cb57cfa4..098e807378 100644 --- a/block/reqlist.c +++ b/block/reqlist.c @@ -20,8 +20,6 @@ void reqlist_init_req(BlockReqList *reqs, BlockReq *req, int64_t offset, int64_t bytes) { - assert(!reqlist_find_conflict(reqs, offset, bytes)); - *req = (BlockReq) { .offset = offset, .bytes = bytes, -- cgit v1.2.3-70-g09d2 From e84af3eb727d2c6de00693c48fbec1d6e68fbf9a Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Tue, 17 Sep 2024 01:20:07 +0100 Subject: block: Remove unused aio_task_pool_empty aio_task_pool_empty has been unused since it was added in 6e9b225f73 ("block: introduce aio task pool") Remove it. Signed-off-by: Dr. David Alan Gilbert Message-Id: <20240917002007.330689-1-dave@treblig.org> Signed-off-by: Vladimir Sementsov-Ogievskiy --- block/aio_task.c | 5 ----- include/block/aio_task.h | 2 -- 2 files changed, 7 deletions(-) diff --git a/block/aio_task.c b/block/aio_task.c index 9bd17ea2c1..bb5c05f455 100644 --- a/block/aio_task.c +++ b/block/aio_task.c @@ -119,8 +119,3 @@ int aio_task_pool_status(AioTaskPool *pool) return pool->status; } - -bool aio_task_pool_empty(AioTaskPool *pool) -{ - return pool->busy_tasks == 0; -} diff --git a/include/block/aio_task.h b/include/block/aio_task.h index 18a9c41f4e..c81d637617 100644 --- a/include/block/aio_task.h +++ b/include/block/aio_task.h @@ -40,8 +40,6 @@ void aio_task_pool_free(AioTaskPool *); /* error code of failed task or 0 if all is OK */ int aio_task_pool_status(AioTaskPool *pool); -bool aio_task_pool_empty(AioTaskPool *pool); - /* User provides filled @task, however task->pool will be set automatically */ void coroutine_fn aio_task_pool_start_task(AioTaskPool *pool, AioTask *task); -- cgit v1.2.3-70-g09d2 From b74987cd3bf9bd4bf452ed371d864cbd1dccb08e Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Wed, 18 Sep 2024 13:42:20 +0100 Subject: util/co-shared-resource: Remove unused co_try_get_from_shres co_try_get_from_shres hasn't been used since it was added in 55fa54a789 ("co-shared-resource: protect with a mutex") (Everyone uses the _locked version) Remove it. Signed-off-by: Dr. David Alan Gilbert Message-Id: <20240918124220.27871-1-dave@treblig.org> Signed-off-by: Vladimir Sementsov-Ogievskiy --- include/qemu/co-shared-resource.h | 7 ------- util/qemu-co-shared-resource.c | 6 ------ 2 files changed, 13 deletions(-) diff --git a/include/qemu/co-shared-resource.h b/include/qemu/co-shared-resource.h index 78ca5850f8..41be1a8131 100644 --- a/include/qemu/co-shared-resource.h +++ b/include/qemu/co-shared-resource.h @@ -44,13 +44,6 @@ SharedResource *shres_create(uint64_t total); */ void shres_destroy(SharedResource *s); -/* - * Try to allocate an amount of @n. Return true on success, and false - * if there is too little left of the collective resource to fulfill - * the request. - */ -bool co_try_get_from_shres(SharedResource *s, uint64_t n); - /* * Allocate an amount of @n, and, if necessary, yield until * that becomes possible. diff --git a/util/qemu-co-shared-resource.c b/util/qemu-co-shared-resource.c index a66cc07e75..752eb5a1c5 100644 --- a/util/qemu-co-shared-resource.c +++ b/util/qemu-co-shared-resource.c @@ -66,12 +66,6 @@ static bool co_try_get_from_shres_locked(SharedResource *s, uint64_t n) return false; } -bool co_try_get_from_shres(SharedResource *s, uint64_t n) -{ - QEMU_LOCK_GUARD(&s->lock); - return co_try_get_from_shres_locked(s, n); -} - void coroutine_fn co_get_from_shres(SharedResource *s, uint64_t n) { assert(n <= s->total); -- cgit v1.2.3-70-g09d2