Skip to content

Commit a8b0b93

Browse files
authored
Merge branch 'develop' into NEMO_opt_init
2 parents d62b1b7 + df2469f commit a8b0b93

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

55 files changed

+2005
-1937
lines changed

.github/workflows/regression.yml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ jobs:
6060
strategy:
6161
fail-fast: false
6262
matrix:
63-
testscript: ['tutorials.py', 'parallel_regression.py', 'parallel_regression_AD.py', 'serial_regression.py', 'serial_regression_AD.py', 'hybrid_regression.py']
63+
testscript: ['tutorials.py', 'parallel_regression.py', 'parallel_regression_AD.py', 'serial_regression.py', 'serial_regression_AD.py', 'hybrid_regression.py', 'hybrid_regression_AD.py']
6464
include:
6565
- testscript: 'tutorials.py'
6666
tag: MPI
@@ -74,6 +74,8 @@ jobs:
7474
tag: NoMPI
7575
- testscript: 'hybrid_regression.py'
7676
tag: OMP
77+
- testscript: 'hybrid_regression_AD.py'
78+
tag: OMP
7779
steps:
7880
- name: Download All artifact
7981
uses: actions/download-artifact@v2

Common/include/basic_types/ad_structure.hpp

Lines changed: 63 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -252,7 +252,7 @@ namespace AD{
252252

253253
/*!
254254
* \brief Start a passive region, i.e. stop recording.
255-
* \return True is tape was active.
255+
* \return True if tape was active.
256256
*/
257257
inline bool BeginPassive() { return false; }
258258

@@ -262,6 +262,28 @@ namespace AD{
262262
*/
263263
inline void EndPassive(bool wasActive) {}
264264

265+
/*!
266+
* \brief Pause the use of preaccumulation.
267+
* \return True if preaccumulation was active.
268+
*/
269+
inline bool PausePreaccumulation() { return false; }
270+
271+
/*!
272+
* \brief Resume the use of preaccumulation.
273+
* \param[in] wasActive - Whether preaccumulation was active before pausing.
274+
*/
275+
inline void ResumePreaccumulation(bool wasActive) {}
276+
277+
/*!
278+
* \brief Begin a hybrid parallel adjoint evaluation mode that assumes an inherently safe reverse path.
279+
*/
280+
inline void StartNoSharedReading() {}
281+
282+
/*!
283+
* \brief End the "no shared reading" adjoint evaluation mode.
284+
*/
285+
inline void EndNoSharedReading() {}
286+
265287
#else
266288
using CheckpointHandler = codi::DataStore;
267289

@@ -271,9 +293,10 @@ namespace AD{
271293

272294
extern ExtFuncHelper* FuncHelper;
273295

274-
extern bool Status;
275-
276296
extern bool PreaccActive;
297+
#ifdef HAVE_OPDI
298+
SU2_OMP(threadprivate(PreaccActive))
299+
#endif
277300

278301
extern bool PreaccEnabled;
279302

@@ -290,6 +313,9 @@ namespace AD{
290313
extern std::vector<TapePosition> TapePositions;
291314

292315
extern codi::PreaccumulationHelper<su2double> PreaccHelper;
316+
#ifdef HAVE_OPDI
317+
SU2_OMP(threadprivate(PreaccHelper))
318+
#endif
293319

294320
/*--- Reference to the tape. ---*/
295321

@@ -446,6 +472,7 @@ namespace AD{
446472
FORCEINLINE void EndPreacc(){
447473
if (PreaccActive) {
448474
PreaccHelper.finish(false);
475+
PreaccActive = false;
449476
}
450477
}
451478

@@ -522,6 +549,39 @@ namespace AD{
522549

523550
FORCEINLINE void EndPassive(bool wasActive) { if(wasActive) StartRecording(); }
524551

552+
FORCEINLINE bool PausePreaccumulation() {
553+
const auto current = PreaccEnabled;
554+
if (!current) return false;
555+
SU2_OMP_BARRIER
556+
SU2_OMP_MASTER
557+
PreaccEnabled = false;
558+
END_SU2_OMP_MASTER
559+
SU2_OMP_BARRIER
560+
return true;
561+
}
562+
563+
FORCEINLINE void ResumePreaccumulation(bool wasActive) {
564+
if (!wasActive) return;
565+
SU2_OMP_BARRIER
566+
SU2_OMP_MASTER
567+
PreaccEnabled = true;
568+
END_SU2_OMP_MASTER
569+
SU2_OMP_BARRIER
570+
}
571+
572+
FORCEINLINE void StartNoSharedReading() {
573+
#ifdef HAVE_OPDI
574+
opdi::logic->setAdjointAccessMode(opdi::LogicInterface::AdjointAccessMode::Classical);
575+
opdi::logic->addReverseBarrier();
576+
#endif
577+
}
578+
579+
FORCEINLINE void EndNoSharedReading() {
580+
#ifdef HAVE_OPDI
581+
opdi::logic->setAdjointAccessMode(opdi::LogicInterface::AdjointAccessMode::Atomic);
582+
opdi::logic->addReverseBarrier();
583+
#endif
584+
}
525585
#endif // CODI_REVERSE_TYPE
526586

527587
} // namespace AD

Common/include/code_config.hpp

Lines changed: 5 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -79,25 +79,15 @@ using su2conditional_t = typename su2conditional<B,T,F>::type;
7979
#include "codi.hpp"
8080
#include "codi/tools/dataStore.hpp"
8181

82-
#ifndef CODI_INDEX_TAPE
83-
#define CODI_INDEX_TAPE 0
84-
#endif
85-
#ifndef CODI_PRIMAL_TAPE
86-
#define CODI_PRIMAL_TAPE 0
87-
#endif
88-
#ifndef CODI_PRIMAL_INDEX_TAPE
89-
#define CODI_PRIMAL_INDEX_TAPE 0
90-
#endif
91-
9282
#if defined(HAVE_OMP)
9383
using su2double = codi::RealReverseIndexParallel;
9484
#else
95-
#if CODI_INDEX_TAPE
85+
#if defined(CODI_INDEX_TAPE)
9686
using su2double = codi::RealReverseIndex;
97-
#elif CODI_PRIMAL_TAPE
98-
using su2double = codi::RealReversePrimal;
99-
#elif CODI_PRIMAL_INDEX_TAPE
100-
using su2double = codi::RealReversePrimalIndex;
87+
//#elif defined(CODI_PRIMAL_TAPE)
88+
//using su2double = codi::RealReversePrimal;
89+
//#elif defined(CODI_PRIMAL_INDEX_TAPE)
90+
//using su2double = codi::RealReversePrimalIndex;
10191
#else
10292
using su2double = codi::RealReverse;
10393
#endif

Common/include/linear_algebra/CSysSolve.hpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -256,6 +256,7 @@ class CSysSolve {
256256
void HandleTemporariesOut(CSysVector<OtherType>& LinSysSol) {
257257

258258
/*--- Reset the pointers. ---*/
259+
SU2_OMP_BARRIER
259260
SU2_OMP_MASTER {
260261
LinSysRes_ptr = nullptr;
261262
LinSysSol_ptr = nullptr;
@@ -276,6 +277,7 @@ class CSysSolve {
276277
LinSysSol.PassiveCopy(LinSysSol_tmp);
277278

278279
/*--- Reset the pointers. ---*/
280+
SU2_OMP_BARRIER
279281
SU2_OMP_MASTER {
280282
LinSysRes_ptr = nullptr;
281283
LinSysSol_ptr = nullptr;

Common/include/toolboxes/graph_toolbox.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -527,7 +527,7 @@ T createNaturalColoring(Index_t numInnerIndexes)
527527
* \param[out] indexColor - Optional, vector with colors given to the outer indices.
528528
* \return Coloring in the same type of the input pattern.
529529
*/
530-
template<class T, typename Color_t = char, size_t MaxColors = 32, size_t MaxMB = 128>
530+
template<class T, typename Color_t = char, size_t MaxColors = 64, size_t MaxMB = 128>
531531
T colorSparsePattern(const T& pattern, size_t groupSize = 1, bool balanceColors = false,
532532
std::vector<Color_t>* indexColor = nullptr)
533533
{

Common/include/toolboxes/printing_toolbox.hpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,6 @@ namespace PrintingToolbox {
7171
class CTablePrinter{
7272
public:
7373
CTablePrinter(std::ostream * output, const std::string & separator = "|");
74-
~CTablePrinter();
7574

7675
enum alignment {
7776
CENTER,

Common/src/CConfig.cpp

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4477,11 +4477,7 @@ void CConfig::SetPostprocessing(SU2_COMPONENT val_software, unsigned short val_i
44774477
#if defined CODI_REVERSE_TYPE
44784478
AD_Mode = YES;
44794479

4480-
#if defined HAVE_OMP
4481-
AD::PreaccEnabled = false;
4482-
#else
44834480
AD::PreaccEnabled = AD_Preaccumulation;
4484-
#endif
44854481

44864482
#else
44874483
if (AD_Mode == YES) {

Common/src/basic_types/ad_structure.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,9 +35,16 @@ namespace AD {
3535
std::vector<TapePosition> TapePositions;
3636

3737
bool PreaccActive = false;
38+
#ifdef HAVE_OPDI
39+
SU2_OMP(threadprivate(PreaccActive))
40+
#endif
41+
3842
bool PreaccEnabled = true;
3943

4044
codi::PreaccumulationHelper<su2double> PreaccHelper;
45+
#ifdef HAVE_OPDI
46+
SU2_OMP(threadprivate(PreaccHelper))
47+
#endif
4148

4249
ExtFuncHelper* FuncHelper;
4350

Common/src/geometry/CPhysicalGeometry.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7701,7 +7701,8 @@ void CPhysicalGeometry::SetBoundControlVolume(const CConfig *config, unsigned sh
77017701

77027702
const auto nNodes = bound[iMarker][iElem]->GetnNodes();
77037703

7704-
AD::StartPreacc();
7704+
/*--- Cannot preaccumulate if hybrid parallel due to shared reading. ---*/
7705+
if (omp_get_num_threads() == 1) AD::StartPreacc();
77057706

77067707
/*--- Get pointers to the coordinates of all the element nodes ---*/
77077708
array<const su2double*, N_POINTS_MAXIMUM> Coord;

Common/src/linear_algebra/CSysMatrix.cpp

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -185,10 +185,17 @@ void CSysMatrix<ScalarType>::Initialize(unsigned long npoint, unsigned long npoi
185185
/*--- This is akin to the row_ptr. ---*/
186186
omp_partitions = new unsigned long [omp_num_parts+1];
187187

188-
/// TODO: Use a work estimate to produce more balanced partitions.
189-
auto pts_per_part = roundUpDiv(nPointDomain, omp_num_parts);
190-
for(auto part = 0ul; part < omp_num_parts; ++part)
191-
omp_partitions[part] = part * pts_per_part;
188+
/*--- Work estimate based on non-zeros to produce balanced partitions. ---*/
189+
190+
const auto row_ptr_prec = ilu_needed? row_ptr_ilu : row_ptr;
191+
const auto nnz_prec = row_ptr_prec[nPointDomain];
192+
193+
const auto nnz_per_part = roundUpDiv(nnz_prec, omp_num_parts);
194+
195+
for (auto iPoint = 0ul, part = 0ul; iPoint < nPointDomain; ++iPoint) {
196+
if (row_ptr_prec[iPoint] >= part*nnz_per_part)
197+
omp_partitions[part++] = iPoint;
198+
}
192199
omp_partitions[omp_num_parts] = nPointDomain;
193200

194201
/*--- Generate MKL Kernels ---*/

0 commit comments

Comments
 (0)