Skip to content

Commit a4a6329

Browse files
Fixes to HPC runtime code path. (#773)
Details: - Fixed hpx::for_each invocation and replace with hpx::for_loop. The HPX runtime was initialized using hpx::start, but the hpx::for_each function was being called on a non-hpx runtime (i.e standard BLIS runtime - single main thread). To run hpx::for_each on HPX runtime correctly, the code now uses hpx::run_as_hpx_thread(func, args...). - Replaced hpx::for_each with hpx::for_loop, which eliminates use of hpx::util::counting_iterator. - Employ hpx::execution::chunk_size(1) to make sure that a thread resides on a particular core. - Replaced hpx::apply() with updated version hpx::post(). - Initialize tdata->id = 0 in libblis.c to 0, as it is the main thread and is needed for writing results to output file. - By default, if not specified, the HPX runtime uses all N threads/cores available in the system. But, if we want to only specify n_threads out N threads, we use hpx::execution::experimental::num_cores(n_threads).
1 parent c6546c1 commit a4a6329

File tree

3 files changed

+17
-11
lines changed

3 files changed

+17
-11
lines changed

CREDITS

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -130,6 +130,7 @@ but many others have contributed code, ideas, and feedback, including
130130
Benda Xu @heroxbd
131131
Guodong Xu @docularxu (Linaro.org)
132132
RuQing Xu @xrq-phys (The University of Tokyo)
133+
Srinivas Yadav @srinivasyadav18
133134
Costas Yamin @cosstas
134135
Chenhan Yu @ChenhanYu (The University of Texas at Austin)
135136
Roman Yurchak @rth (Symerio)

frame/thread/bli_thread_hpx.cpp

Lines changed: 13 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -36,9 +36,10 @@
3636

3737
#ifdef BLIS_ENABLE_HPX
3838

39-
#include <hpx/local/execution.hpp>
40-
#include <hpx/parallel/algorithms/for_each.hpp>
39+
#include <hpx/execution.hpp>
4140
#include <hpx/hpx_start.hpp>
41+
#include <hpx/parallel/algorithms/for_loop.hpp>
42+
#include <hpx/runtime_local/run_as_hpx_thread.hpp>
4243

4344
extern "C"
4445
{
@@ -55,13 +56,16 @@ void bli_thread_launch_hpx
5556
// Allocate a global communicator for the root thrinfo_t structures.
5657
pool_t* gl_comm_pool = nullptr;
5758
thrcomm_t* gl_comm = bli_thrcomm_create( ti, gl_comm_pool, n_threads );
58-
59-
auto irange = hpx::util::counting_shape(n_threads);
60-
61-
hpx::for_each(hpx::execution::par, hpx::util::begin(irange), hpx::util::end(irange),
62-
[&gl_comm, &func, &params](const dim_t tid)
59+
hpx::threads::run_as_hpx_thread([&]()
6360
{
64-
func( gl_comm, tid, params );
61+
hpx::execution::experimental::num_cores num_cores_(n_threads);
62+
hpx::execution::static_chunk_size chunk_size_(1);
63+
hpx::experimental::for_loop(
64+
hpx::execution::par.with(num_cores_).with(chunk_size_), 0, n_threads,
65+
[&gl_comm, &func, &params](const dim_t tid)
66+
{
67+
func( gl_comm, tid, params );
68+
});
6569
});
6670

6771
// Free the global communicator, because the root thrinfo_t node
@@ -76,7 +80,7 @@ void bli_thread_initialize_hpx( int argc, char** argv )
7680

7781
int bli_thread_finalize_hpx()
7882
{
79-
hpx::apply([]() { hpx::finalize(); });
83+
hpx::post([]() { hpx::finalize(); });
8084
return hpx::stop();
8185
}
8286

testsuite/src/test_libblis.c

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -135,15 +135,16 @@ void libblis_test_thread_decorator( test_params_t* params, test_ops_t* ops )
135135
err_t r_val;
136136

137137
#ifdef BLIS_ENABLE_HPX
138+
size_t nt = ( size_t )params->n_app_threads;
138139

139-
size_t tdata_size = ( size_t )params->n_app_threads *
140+
size_t tdata_size = ( size_t )nt *
140141
( size_t )sizeof( thread_data_t );
141142
thread_data_t* tdata = bli_malloc_user( tdata_size, &r_val );
142143

143144
tdata->params = params;
144145
tdata->ops = ops;
145146
tdata->nt = nt;
146-
tdata->id = 1;
147+
tdata->id = 0;
147148
tdata->xc = 0;
148149

149150
// Walk through all test modules.

0 commit comments

Comments
 (0)