Snippet that reproduces the issue:
#include<stdio.h>
#include<stdlib.h>
#include "cblas.h"
int main(){
int n = 14000;
int incx = 1;
double *x, ddt;
x = (double*)malloc(n*sizeof(double));
for (int i=0; i<n; i++){
x[i] = 1.0;
}
#pragma omp parallel
{
#pragma omp parallel for
for (int i=0; i<10; i++){
ddt = cblas_ddot(n, x, incx, x, incx);
}
}
printf("dot(x, x) = %f", ddt);
free(x);
}
The dead-lock is due to: #4359:
Context: