summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlexey Radul <axch@google.com>2023-11-17 15:35:11 -0500
committerAlexey Radul <axch@google.com>2023-11-17 15:35:11 -0500
commit7b17756fcb6f27f64ad346718bbc98917e22f40c (patch)
tree9767189e16e38cb267399d958240dafeb049523f
parent941d7f84c433a6d4125c930ff7e75f7caad73349 (diff)
Better comments and variable names.
-rw-r--r--src/lib/work-stealing.c33
1 files changed, 20 insertions, 13 deletions
diff --git a/src/lib/work-stealing.c b/src/lib/work-stealing.c
index 3a79fcd5..a4d8357a 100644
--- a/src/lib/work-stealing.c
+++ b/src/lib/work-stealing.c
@@ -286,39 +286,46 @@ Work* increase_cont_capacity(Work* cont, int joiners) {
Work* execute_pure_loop_task(int id, Work* self);
+// The recursive workhorse for running a pure loop.
Work* execute_pure_loop(int thread_id, Work* cont, GenLoopBody body, void** env, int start_iter, int end_iter) {
- if (end_iter - start_iter <= 1) {
+ int grain_size = 1
+ if (end_iter - start_iter <= grain_size) {
// Few enough iterations; just do them.
for (int i = start_iter; i < end_iter; i++) {
do_work(thread_id, body(thread_id, i, env));
}
return join_work(cont);
} else {
- // Create Works that represent schedulable pieces of the loop.
+ // Break the loop up into chunks of iterations, and schedule those.
int branching_factor = 2;
div_t iters_per_branch = div(end_iter - start_iter, branching_factor);
int this_iter = start_iter;
+ // We are increasing the number of `Work`s that are permitted to join our
+ // continuation `cont`. Account for that.
Work* subcont = increase_cont_capacity(cont, branching_factor);
- // Queue up all but one chunk of the loop
+ // Queue up all but one chunk of the loop for idle workers to potentially
+ // steal.
for (int i = 0; i < branching_factor - 1; i++) {
int next_iter = this_iter + iters_per_branch.quot;
if (i < iters_per_branch.rem) {
+ // The chunks may be slightly uneven if the trip count is not evenly
+ // divisible by the branching factor.
next_iter++;
}
- Work* section = (Work*) malloc(sizeof(Work) + 5 * sizeof(int*));
- section->code = &execute_pure_loop_task;
- section->join_count = 0;
- section->args[0] = subcont;
- section->args[1] = body;
- section->args[2] = env;
+ Work* chunk = (Work*) malloc(sizeof(Work) + 5 * sizeof(int*));
+ chunk->code = &execute_pure_loop_task;
+ chunk->join_count = 0;
+ chunk->args[0] = subcont;
+ chunk->args[1] = body;
+ chunk->args[2] = env;
// TODO Is just casting ok here, or do I have to heap-allocate these ints?
// gcc complains about the integer and the pointer having different sizes.
- section->args[3] = (void*)this_iter;
- section->args[4] = (void*)next_iter;
- push(&thread_queues[thread_id], section);
+ chunk->args[3] = (void*)this_iter;
+ chunk->args[4] = (void*)next_iter;
+ push(&thread_queues[thread_id], chunk);
this_iter = next_iter;
}
- // Do the last chunk directly yourself
+ // Do the last chunk immediately, to avoid allocating a `Work` for it.
return execute_pure_loop(thread_id, subcont, body, env, this_iter, end_iter);
}
}