diff --git a/check/features.frm b/check/features.frm index c7bd4488..aafc5384 100644 --- a/check/features.frm +++ b/check/features.frm @@ -307,9 +307,6 @@ assert succeeded? # assert anything. We only test whether this hits the test suite timeout limit, # or not. TODO in principle this can be considered to be a bug, to be looked at # in the future. -# For parform, there is no attempt to ensure the master process recieves SIGALRM -# and not the workers. -#pend_if mpi? *--#] TimeoutAfter_2 : *--#[ dedup : * Test deduplication @@ -2350,7 +2347,6 @@ Local F = mzv_(2,1); Evaluate mzv_; .end #pend_if wordsize == 2 -#pend_if mpi? assert runtime_error?("Error: Weight of Euler/MZV sum greater than 2.") assert runtime_error?("Please increase the maximum weight in #startfloat.") *--#] mzv_error_1 : @@ -2360,7 +2356,6 @@ Local F = mzv_(1,2); Evaluate mzv_; .end #pend_if wordsize == 2 -#pend_if mpi? assert runtime_error?("Divergent MZV in CalculateMZV") *--#] mzv_error_2 : *--#[ mzv_error_3: @@ -2369,7 +2364,6 @@ Local F = mzv_(-2,1); Evaluate mzv_; .end #pend_if wordsize == 2 -#pend_if mpi? assert runtime_error?("Illegal index[0] in CalculateMZV: -2") *--#] mzv_error_3 : *--#[ mzv_error_4: @@ -2378,7 +2372,6 @@ Local F = mzvhalf_(2,-1); Evaluate mzvhalf_; .end #pend_if wordsize == 2 -#pend_if mpi? assert runtime_error?("Illegal index[1] in CalculateMZVhalf: -1") *--#] mzv_error_4 : *--#[ mzv_error_5: @@ -2387,7 +2380,6 @@ Local F = euler_(1,-2); Evaluate euler_; .end #pend_if wordsize == 2 -#pend_if mpi? assert runtime_error?("Divergent Euler sum in CalculateEuler") *--#] mzv_error_5 : *--#[ humanstats : @@ -3227,7 +3219,6 @@ Model PHI3; Vertex phi,phi,phi:1; EndModel; .end -#pend_if mpi? assert runtime_error?('Invalid coupling constant in vertex statement.') *--#] diagrams_err_1 : *--#[ diagrams_err_2 : @@ -3236,7 +3227,6 @@ Model PHI3; Vertex phi,phi,phi:g^-1; EndModel; .end -#pend_if mpi? assert runtime_error?('Invalid negative power of coupling constant.') *--#] diagrams_err_2 : *--#[ diagrams_err_3 : @@ -3247,7 +3237,6 @@ Model PHI3; EndModel; Local test = diagrams_(PHI3,{phi},{phi},{},{p1,p2},1,0); .end -#pend_if mpi? assert runtime_error?('Insufficient external momenta in diagrams_') *--#] diagrams_err_3 : *--#[ diagrams_err_4 : @@ -3258,7 +3247,6 @@ Model PHI3; EndModel; Local test = diagrams_(PHI3,{phi},{phi},{q1,q2},{},1,0); .end -#pend_if mpi? assert runtime_error?('Insufficient internal momenta in diagrams_') *--#] diagrams_err_4 : *--#[ diagrams_err_5 : @@ -3269,7 +3257,6 @@ Model PHI3; EndModel; Local test = diagrams_(PHI3,{phi},{phi},{q1,-q2},{p1,p2},1,0); .end -#pend_if mpi? assert runtime_error?('Invalid negative external momentum in diagrams_: -q2') *--#] diagrams_err_5 : *--#[ diagrams_err_6 : @@ -3280,7 +3267,6 @@ Model PHI3; EndModel; Local test = diagrams_(PHI3,{phi},{phi},{q1,q2},{-p1,p2},1,0); .end -#pend_if mpi? assert runtime_error?('Invalid negative internal momentum in diagrams_: -p1') *--#] diagrams_err_6 : *--#[ diagrams_err_7 : @@ -3291,7 +3277,6 @@ Model PHI3; EndModel; Local test = diagrams_(PHI3,{phi},{phi},{q1,q1},{p1,p2},1,0); .end -#pend_if mpi? assert runtime_error?('Invalid repeated momentum in diagrams_: q1') *--#] diagrams_err_7 : *--#[ diagrams_err_8 : @@ -3302,6 +3287,5 @@ Model PHI3; EndModel; Local test = diagrams_(PHI3,{phi},{phi},{q1,q2},{q1,p2},1,0); .end -#pend_if mpi? assert runtime_error?('Invalid repeated momentum in diagrams_: q1') *--#] diagrams_err_8 : diff --git a/check/fixes.frm b/check/fixes.frm index e3a22cd5..d7a2e655 100644 --- a/check/fixes.frm +++ b/check/fixes.frm @@ -389,8 +389,6 @@ PolyRatFun rat; L F = rat(a.a,1); P; .end -# Runtime errors may freeze ParFORM. -#pend_if mpi? assert runtime_error? *--#] Issue39 : *--#[ Issue41 : @@ -1004,7 +1002,6 @@ L F6 = f(1000*g5_); L F7 = f(10000*g5_); .end # Runtime errors may freeze ParFORM. -#pend_if mpi? assert runtime_error? *--#] Issue94 : *--#[ Issue95 : @@ -1809,8 +1806,6 @@ L F = f(,...,); transform f,mulargs(1,last); * silent crash P; .end -# Runtime errors may freeze ParFORM. -#pend_if mpi? assert runtime_error?("Term too complex during normalization") *--#] Issue183 : *--#[ Issue185 : @@ -2144,8 +2139,6 @@ Print; # For now it fails because # "Currently Stage 4 sorts are not allowed for function arguments or $ variables." assert runtime_error? -# Runtime errors may freeze ParFORM. -#pend_if mpi? #assert succeeded? #assert result("test1") =~ expr("0") #assert result("test2") =~ expr("g(0)") @@ -2385,16 +2378,12 @@ assert result("F40") =~ expr("0") L F11 = div_(1,0); P; .end -# Runtime errors may freeze ParFORM. -#pend_if mpi? assert runtime_error? *--#] Issue261_2 : *--#[ Issue261_3 : L F23 = rem_(0,0); P; .end -# Runtime errors may freeze ParFORM. -#pend_if mpi? assert runtime_error? *--#] Issue261_3 : *--#[ Issue261_4 : @@ -2404,8 +2393,6 @@ S x; L F34 = inverse_($x,$z); P; .end -# Runtime errors may freeze ParFORM. -#pend_if mpi? assert runtime_error? *--#] Issue261_4 : *--#[ Issue261_5 : @@ -2413,8 +2400,6 @@ assert runtime_error? L F16 = div_($z,$z); P; .end -# Runtime errors may freeze ParFORM. -#pend_if mpi? assert runtime_error? *--#] Issue261_5 : *--#[ Issue261_6 : @@ -2423,8 +2408,6 @@ S x; L F27 = rem_($x,0); P; .end -# Runtime errors may freeze ParFORM. -#pend_if mpi? assert runtime_error? *--#] Issue261_6 : *--#[ Issue261_7 : @@ -2432,8 +2415,6 @@ assert runtime_error? L F39 = inverse_(1,$z); P; .end -# Runtime errors may freeze ParFORM. -#pend_if mpi? assert runtime_error? *--#] Issue261_7 : *--#[ Issue267 : @@ -3068,7 +3049,7 @@ Identify f(x?) = prf(n-x,n+x); .end # Fails due to polynomial size on 32bit builds #require wordsize >= 4 -# Runtime errors may freeze ParFORM. +# This set of buffer sizes does not cause the error in ParFORM. #pend_if mpi? assert runtime_error?("Please increase SmallExtension setup parameter.") *--#] Issue512_1 : @@ -3097,8 +3078,6 @@ EndTerm; .end # Fails due to polynomial size on 32bit builds #require wordsize >= 4 -# Runtime errors may freeze ParFORM. -#pend_if mpi? assert runtime_error?("Please increase SubSmallExtension setup parameter.") *--#] Issue512_2 : *--#[ Issue512_3 : @@ -3709,8 +3688,6 @@ Vector v; PolyRatFun rat; Local F = rat(v,1); .end -# Runtime errors may freeze ParFORM. -#pend_if mpi? assert runtime_error?("ERROR: polynomials and polyratfuns must contain symbols only") *--#] Issue567_1 : *--#[ Issue567_2 : @@ -3719,8 +3696,6 @@ Index i; PolyRatFun rat; Local F = rat(i,1); .end -# Runtime errors may freeze ParFORM. -#pend_if mpi? assert runtime_error?("ERROR: polynomials and polyratfuns must contain symbols only") *--#] Issue567_2 : *--#[ Issue567_3a : @@ -3729,8 +3704,6 @@ Function f; PolyRatFun rat; Local F = rat(f,1); .end -# Runtime errors may freeze ParFORM. -#pend_if mpi? assert runtime_error?("ERROR: polynomials and polyratfuns must contain symbols only") *--#] Issue567_3a : *--#[ Issue567_3b : @@ -3739,8 +3712,6 @@ CFunction f; PolyRatFun rat; Local F = rat(f,1); .end -# Runtime errors may freeze ParFORM. -#pend_if mpi? assert runtime_error?("ERROR: polynomials and polyratfuns must contain symbols only") *--#] Issue567_3b : *--#[ Issue567_3c : @@ -3749,8 +3720,6 @@ Table f(1); PolyRatFun rat; Local F = rat(f,1); .end -# Runtime errors may freeze ParFORM. -#pend_if mpi? assert runtime_error?("ERROR: polynomials and polyratfuns must contain symbols only") *--#] Issue567_3c : *--#[ Issue567_3d : @@ -3759,8 +3728,6 @@ CTable f(1); PolyRatFun rat; Local F = rat(f,1); .end -# Runtime errors may freeze ParFORM. -#pend_if mpi? assert runtime_error?("ERROR: polynomials and polyratfuns must contain symbols only") *--#] Issue567_3d : *--#[ Issue567_3e : @@ -3769,8 +3736,6 @@ Tensor f; PolyRatFun rat; Local F = rat(f,1); .end -# Runtime errors may freeze ParFORM. -#pend_if mpi? assert runtime_error?("ERROR: polynomials and polyratfuns must contain symbols only") *--#] Issue567_3e : *--#[ Issue567_3f : @@ -3779,8 +3744,6 @@ CTensor f; PolyRatFun rat; Local F = rat(f,1); .end -# Runtime errors may freeze ParFORM. -#pend_if mpi? assert runtime_error?("ERROR: polynomials and polyratfuns must contain symbols only") *--#] Issue567_3f : *--#[ Issue577_1 : @@ -3829,7 +3792,7 @@ Multiply 2; print; .end # ParFORM has valgrind errors with this. See discussion in PR 586. -#pend_if mpi? +#pend_if valgrind? && mpi? assert succeeded? assert result("test2") =~ expr("4") assert result("test3") =~ expr("6") @@ -3858,7 +3821,7 @@ Local test3 = 3; #endif .end # ParFORM has valgrind errors with this. See discussion in PR 586. -#pend_if mpi? +#pend_if valgrind? && mpi? assert runtime_error?("isnumerical: expression is not yet defined!") *--#] Issue577_2 : *--#[ Issue599 : @@ -4165,7 +4128,6 @@ ModuleOption inparallel; Print; .end -#pend_if mpi? assert succeeded? assert result("diff1") =~ expr("0") assert result("diff2") =~ expr("0") @@ -4474,7 +4436,7 @@ Local expr = 1; .sort $a = 1; .end -#require threaded? +#require threaded? || mpi? assert warning?("This module is forced to run in sequential mode due to $-variable: $a") *--#] PullReq649_1 : *--#[ PullReq649_2 : @@ -4483,7 +4445,7 @@ Local expr = 1; .sort $n1MdWu6rNU1d29yW3ukhzV7YuY = 1; .end -#require threaded? +#require threaded? || mpi? assert warning?("This module is forced to run in sequential mode due to $-variable: $n1MdWu6rNU1d29yW3ukhzV7YuY") *--#] PullReq649_2 : *--#[ PullReq649_3 : @@ -4501,7 +4463,7 @@ Local expr = 1; Symbol x; id x?$a = x; .end -#require threaded? +#require threaded? || mpi? assert warning?("This module is forced to run in sequential mode due to $-variable: $a") *--#] PullReq649_4 : *--#[ PullReq649_5 : @@ -4545,7 +4507,7 @@ Local expr = 1; $a = 1; moduleoption local $b; .end -#require threaded? +#require threaded? || mpi? assert warning?("This module is forced to run in sequential mode due to $-variable: $a") *--#] PullReq649_9 : *--#[ PullReq652 : diff --git a/check/polynomial.frm b/check/polynomial.frm index e5928dca..c551cac9 100644 --- a/check/polynomial.frm +++ b/check/polynomial.frm @@ -580,7 +580,7 @@ Drop; #call genpoly(`NPOLYS',`NVARS',`NEGPOW',`MAXPOW',`MAXCOEFF',`NTERMS') #call testijaik(gcd,2,`NPOLYS',2,`NPOLYS',2,`NPOLYS') .end -#pend_if wordsize == 2 || mpi? +#pend_if wordsize == 2 assert succeeded? || warning?("FORM was not built with FLINT support.") assert stdout =~ exact_pattern("gcd: OK") *--#] polynomial_gcd_nvar_1 : @@ -596,7 +596,7 @@ assert stdout =~ exact_pattern("gcd: OK") #call genpoly(`NPOLYS',`NVARS',`NEGPOW',`MAXPOW',`MAXCOEFF',`NTERMS') #call testijaik(gcd,2,`NPOLYS',2,`NPOLYS',2,`NPOLYS') .end -#pend_if wordsize == 2 || mpi? +#pend_if wordsize == 2 assert succeeded? || warning?("FORM was not built with FLINT support.") assert stdout =~ exact_pattern("gcd: OK") *--#] polynomial_gcd_nvar_2 : @@ -612,7 +612,7 @@ assert stdout =~ exact_pattern("gcd: OK") #call genpoly(`NPOLYS',`NVARS',`NEGPOW',`MAXPOW',`MAXCOEFF',`NTERMS') #call testijaik(gcd,2,`NPOLYS',2,`NPOLYS',2,`NPOLYS') .end -#pend_if wordsize == 2 || mpi? +#pend_if wordsize == 2 assert succeeded? || warning?("FORM was not built with FLINT support.") assert stdout =~ exact_pattern("gcd: OK") *--#] polynomial_gcd_nvar_5 : @@ -628,7 +628,7 @@ assert stdout =~ exact_pattern("gcd: OK") #call genpoly(`NPOLYS',`NVARS',`NEGPOW',`MAXPOW',`MAXCOEFF',`NTERMS') #call testiaj(mul,2,`NPOLYS',2,`NPOLYS') .end -#pend_if wordsize == 2 || mpi? +#pend_if wordsize == 2 assert succeeded? || warning?("FORM was not built with FLINT support.") assert stdout =~ exact_pattern("mul: OK") *--#] polynomial_mul_nvar_1 : @@ -644,7 +644,7 @@ assert stdout =~ exact_pattern("mul: OK") #call genpoly(`NPOLYS',`NVARS',`NEGPOW',`MAXPOW',`MAXCOEFF',`NTERMS') #call testiaj(mul,2,`NPOLYS',2,`NPOLYS') .end -#pend_if wordsize == 2 || mpi? +#pend_if wordsize == 2 assert succeeded? || warning?("FORM was not built with FLINT support.") assert stdout =~ exact_pattern("mul: OK") *--#] polynomial_mul_nvar_2 : @@ -660,7 +660,7 @@ assert stdout =~ exact_pattern("mul: OK") #call genpoly(`NPOLYS',`NVARS',`NEGPOW',`MAXPOW',`MAXCOEFF',`NTERMS') #call testiaj(mul,2,`NPOLYS',2,`NPOLYS') .end -#pend_if wordsize == 2 || mpi? +#pend_if wordsize == 2 assert succeeded? || warning?("FORM was not built with FLINT support.") assert stdout =~ exact_pattern("mul: OK") *--#] polynomial_mul_nvar_5 : @@ -676,7 +676,7 @@ assert stdout =~ exact_pattern("mul: OK") #call genpoly(`NPOLYS',`NVARS',`NEGPOW',`MAXPOW',`MAXCOEFF',`NTERMS') #call testijaj(div,2,`NPOLYS',2,`NPOLYS') .end -#pend_if wordsize == 2 || mpi? +#pend_if wordsize == 2 assert succeeded? || warning?("FORM was not built with FLINT support.") assert stdout =~ exact_pattern("div: OK") *--#] polynomial_div_nvar_1 : @@ -692,7 +692,7 @@ assert stdout =~ exact_pattern("div: OK") #call genpoly(`NPOLYS',`NVARS',`NEGPOW',`MAXPOW',`MAXCOEFF',`NTERMS') #call testijaj(div,2,`NPOLYS',2,`NPOLYS') .end -#pend_if wordsize == 2 || mpi? +#pend_if wordsize == 2 assert succeeded? || warning?("FORM was not built with FLINT support.") assert stdout =~ exact_pattern("div: OK") *--#] polynomial_div_nvar_2 : @@ -708,7 +708,7 @@ assert stdout =~ exact_pattern("div: OK") #call genpoly(`NPOLYS',`NVARS',`NEGPOW',`MAXPOW',`MAXCOEFF',`NTERMS') #call testijaj(div,2,`NPOLYS',2,`NPOLYS') .end -#pend_if wordsize == 2 || mpi? +#pend_if wordsize == 2 assert succeeded? || warning?("FORM was not built with FLINT support.") assert stdout =~ exact_pattern("div: OK") *--#] polynomial_div_nvar_5 : @@ -724,7 +724,7 @@ assert stdout =~ exact_pattern("div: OK") #call genpoly(`NPOLYS',`NVARS',`NEGPOW',`MAXPOW',`MAXCOEFF',`NTERMS') #call testijak(rem,2,`NPOLYS',2,`NPOLYS',2,`NPOLYS') .end -#pend_if wordsize == 2 || mpi? +#pend_if wordsize == 2 assert succeeded? || warning?("FORM was not built with FLINT support.") assert stdout =~ exact_pattern("rem: OK") *--#] polynomial_rem_nvar_1 : @@ -740,7 +740,7 @@ assert stdout =~ exact_pattern("rem: OK") #call genpoly(`NPOLYS',`NVARS',`NEGPOW',`MAXPOW',`MAXCOEFF',`NTERMS') #call testijak(rem,2,`NPOLYS',2,`NPOLYS',2,`NPOLYS') .end -#pend_if wordsize == 2 || mpi? +#pend_if wordsize == 2 assert succeeded? || warning?("FORM was not built with FLINT support.") assert stdout =~ exact_pattern("rem: OK") *--#] polynomial_rem_nvar_2 : @@ -756,7 +756,7 @@ assert stdout =~ exact_pattern("rem: OK") #call genpoly(`NPOLYS',`NVARS',`NEGPOW',`MAXPOW',`MAXCOEFF',`NTERMS') #call testijak(rem,2,`NPOLYS',2,`NPOLYS',2,`NPOLYS') .end -#pend_if wordsize == 2 || mpi? +#pend_if wordsize == 2 assert succeeded? || warning?("FORM was not built with FLINT support.") assert stdout =~ exact_pattern("rem: OK") *--#] polynomial_rem_nvar_5 : @@ -772,7 +772,7 @@ assert stdout =~ exact_pattern("rem: OK") #call genpoly(`NPOLYS',`NVARS',`NEGPOW',`MAXPOW',`MAXCOEFF',`NTERMS') #call testfactij(2,`NPOLYS',2,`NPOLYS') .end -#pend_if wordsize == 2 || mpi? +#pend_if wordsize == 2 # This takes too long when running without FLINT under lcov. Skip if valgrind, and hence lcov. #pend_if valgrind? # This needs longer if running without flint. @@ -792,7 +792,7 @@ assert stdout =~ exact_pattern("factarg: OK") #call genpoly(`NPOLYS',`NVARS',`NEGPOW',`MAXPOW',`MAXCOEFF',`NTERMS') #call testfactij(2,`NPOLYS',2,`NPOLYS') .end -#pend_if wordsize == 2 || mpi? +#pend_if wordsize == 2 # This takes too long when running without FLINT under lcov. Skip if valgrind, and hence lcov. #pend_if valgrind? # This needs longer if running without flint. @@ -812,7 +812,7 @@ assert stdout =~ exact_pattern("factarg: OK") #call genpoly(`NPOLYS',`NVARS',`NEGPOW',`MAXPOW',`MAXCOEFF',`NTERMS') #call testfactij(2,`NPOLYS',2,`NPOLYS') .end -#pend_if wordsize == 2 || mpi? +#pend_if wordsize == 2 # This takes too long when running without FLINT under lcov. Skip if valgrind, and hence lcov. # This one is not valgrind clean for flint < 3.2.1 ! #pend_if valgrind? @@ -833,7 +833,7 @@ assert stdout =~ exact_pattern("factarg: OK") #call genpoly(`NPOLYS',`NVARS',`NEGPOW',`MAXPOW',`MAXCOEFF',`NTERMS') #call testfactdolij(2,`NPOLYS',2,`NPOLYS') .end -#pend_if wordsize == 2 || mpi? +#pend_if wordsize == 2 # This takes too long when running without FLINT under lcov. Skip if valgrind, and hence lcov. #pend_if valgrind? # This needs longer if running without flint. @@ -853,7 +853,7 @@ assert stdout =~ exact_pattern("factdollar: OK") #call genpoly(`NPOLYS',`NVARS',`NEGPOW',`MAXPOW',`MAXCOEFF',`NTERMS') #call testfactdolij(2,`NPOLYS',2,`NPOLYS') .end -#pend_if wordsize == 2 || mpi? +#pend_if wordsize == 2 # This takes too long when running without FLINT under lcov. Skip if valgrind, and hence lcov. #pend_if valgrind? # This needs longer if running without flint. @@ -873,7 +873,7 @@ assert stdout =~ exact_pattern("factdollar: OK") #call genpoly(`NPOLYS',`NVARS',`NEGPOW',`MAXPOW',`MAXCOEFF',`NTERMS') #call testfactdolij(2,`NPOLYS',2,`NPOLYS') .end -#pend_if wordsize == 2 || mpi? +#pend_if wordsize == 2 # This takes too long when running without FLINT under lcov. Skip if valgrind, and hence lcov. #pend_if valgrind? # This needs longer if running without flint. @@ -893,7 +893,7 @@ assert stdout =~ exact_pattern("factdollar: OK") #call genpoly(`NPOLYS',`NVARS',`NEGPOW',`MAXPOW',`MAXCOEFF',`NTERMS') #call testprf(norm,2,`NPOLYS',2,`NPOLYS',2,`NPOLYS',2,`NPOLYS') .end -#pend_if wordsize == 2 || mpi? +#pend_if wordsize == 2 assert succeeded? || warning?("FORM was not built with FLINT support.") assert stdout =~ exact_pattern("prf norm: OK") *--#] polynomial_prf_norm_nvar_1 : @@ -909,7 +909,7 @@ assert stdout =~ exact_pattern("prf norm: OK") #call genpoly(`NPOLYS',`NVARS',`NEGPOW',`MAXPOW',`MAXCOEFF',`NTERMS') #call testprf(norm,2,`NPOLYS',2,`NPOLYS',2,`NPOLYS',2,`NPOLYS') .end -#pend_if wordsize == 2 || mpi? +#pend_if wordsize == 2 assert succeeded? || warning?("FORM was not built with FLINT support.") assert stdout =~ exact_pattern("prf norm: OK") *--#] polynomial_prf_norm_nvar_2 : @@ -925,7 +925,7 @@ assert stdout =~ exact_pattern("prf norm: OK") #call genpoly(`NPOLYS',`NVARS',`NEGPOW',`MAXPOW',`MAXCOEFF',`NTERMS') #call testprf(norm,2,`NPOLYS',2,`NPOLYS',2,`NPOLYS',2,`NPOLYS') .end -#pend_if wordsize == 2 || mpi? +#pend_if wordsize == 2 assert succeeded? || warning?("FORM was not built with FLINT support.") assert stdout =~ exact_pattern("prf norm: OK") *--#] polynomial_prf_norm_nvar_5 : @@ -941,7 +941,7 @@ assert stdout =~ exact_pattern("prf norm: OK") #call genpoly(`NPOLYS',`NVARS',`NEGPOW',`MAXPOW',`MAXCOEFF',`NTERMS') #call testprf(add,2,`NPOLYS',2,`NPOLYS',2,`NPOLYS',2,`NPOLYS') .end -#pend_if wordsize == 2 || mpi? +#pend_if wordsize == 2 assert succeeded? || warning?("FORM was not built with FLINT support.") assert stdout =~ exact_pattern("prf add: OK") *--#] polynomial_prf_add_nvar_1 : @@ -957,7 +957,7 @@ assert stdout =~ exact_pattern("prf add: OK") #call genpoly(`NPOLYS',`NVARS',`NEGPOW',`MAXPOW',`MAXCOEFF',`NTERMS') #call testprf(add,2,`NPOLYS',2,`NPOLYS',2,`NPOLYS',2,`NPOLYS') .end -#pend_if wordsize == 2 || mpi? +#pend_if wordsize == 2 assert succeeded? || warning?("FORM was not built with FLINT support.") assert stdout =~ exact_pattern("prf add: OK") *--#] polynomial_prf_add_nvar_2 : @@ -973,7 +973,7 @@ assert stdout =~ exact_pattern("prf add: OK") #call genpoly(`NPOLYS',`NVARS',`NEGPOW',`MAXPOW',`MAXCOEFF',`NTERMS') #call testprf(add,2,`NPOLYS',2,`NPOLYS',2,`NPOLYS',2,`NPOLYS') .end -#pend_if wordsize == 2 || mpi? +#pend_if wordsize == 2 assert succeeded? || warning?("FORM was not built with FLINT support.") assert stdout =~ exact_pattern("prf add: OK") *--#] polynomial_prf_add_nvar_5 : @@ -989,7 +989,7 @@ assert stdout =~ exact_pattern("prf add: OK") #call genpoly(`NPOLYS',`NVARS',`NEGPOW',`MAXPOW',`MAXCOEFF',`NTERMS') #call testinv(1,`NPOLYS',2,`NPOLYS') .end -#pend_if wordsize == 2 || mpi? +#pend_if wordsize == 2 assert succeeded? || warning?("FORM was not built with FLINT support.") assert stdout =~ exact_pattern("inv: OK") *--#] polynomial_inverse_nvar_1 : @@ -1031,7 +1031,6 @@ PolyRatFun prf; Print; .end -#pend_if mpi? assert succeeded? assert result("test1") =~ expr("prf(x*y + x*z + 1,w*x)") assert result("test2") =~ expr("prf(x^2 + x + 1,x^2)") @@ -1052,7 +1051,6 @@ CFunction f,prf; PolyRatFun prf; Local test = prf(f(1),1); .end -#pend_if mpi? assert runtime_error? assert stdout =~ exact_pattern("ERROR: polynomials and polyratfuns must contain symbols only") *--#] polynomial_error_1 : @@ -1062,7 +1060,6 @@ PolyRatFun prf; * Fast notation version Local test = prf(f,1); .end -#pend_if mpi? assert runtime_error? assert stdout =~ exact_pattern("ERROR: polynomials and polyratfuns must contain symbols only") *--#] polynomial_error_2 : @@ -1070,7 +1067,6 @@ assert stdout =~ exact_pattern("ERROR: polynomials and polyratfuns must contain Symbol x; Local test = inverse_(x+1,x+1); .end -#pend_if mpi? assert runtime_error? assert stdout =~ exact_pattern("flint::inverse_poly error: inverse does not exist") || stdout =~ exact_pattern("ERROR: polynomial inverse does not exist") *--#] polynomial_error_3 : @@ -1079,7 +1075,6 @@ CFunction prf; PolyRatFun prf; Local test = prf; .end -#pend_if mpi? assert runtime_error? assert stdout =~ exact_pattern("ERROR: PolyRatFun cannot have zero arguments") *--#] polynomial_error_4 : @@ -1089,7 +1084,6 @@ PolyRatFun prf; Local test = prf(1,2,3); Print; .end -#pend_if mpi? assert runtime_error? assert stdout =~ exact_pattern("ERROR: PolyRatFun cannot have more than two arguments") *--#] polynomial_error_5 : @@ -1100,7 +1094,6 @@ PolyRatFun prf; Local test = prf(x,y,3); Print; .end -#pend_if mpi? assert runtime_error? assert stdout =~ exact_pattern("ERROR: PolyRatFun cannot have more than two arguments") *--#] polynomial_error_6 : @@ -1115,7 +1108,6 @@ PolyRatFun prf; Local test = prf((x+1)^`N',(x+2)^`N')*prf((x^`N'-1)^`N',(x^`N'-2)^`N'); Print; .end -#pend_if mpi? assert runtime_error? assert stdout =~ exact_pattern("flint::ratfun_normalize: output exceeds MaxTermSize") || stdout =~ exact_pattern("ERROR: PolyRatFun doesn't fit in a term") *--#] polynomial_error_7 : @@ -1128,7 +1120,6 @@ PolyRatFun prf; Local test = prf((x+1)^`N',(x+2)^`N')*prf((x^`N'-1)^`N',(x^`N'-2)^`N'); Print; .end -#pend_if mpi? assert runtime_error? assert stdout =~ exact_pattern("flint::to_argument_poly: output exceeds MaxTermSize") || stdout =~ exact_pattern("ERROR: PolyRatFun doesn't fit in a term") *--#] polynomial_error_8 : @@ -1142,7 +1133,6 @@ PolyRatFun prf; Local test = prf((x+1)^`N',(x+2)^`N')*prf((x^`N'-1)^`N',(x^`N'-2)^`N'); Print; .end -#pend_if mpi? assert runtime_error? assert stdout =~ exact_pattern("flint::to_argument_poly: output exceeds MaxTermSize") || stdout =~ exact_pattern("ERROR: PolyRatFun doesn't fit in a term") *--#] polynomial_error_9 : @@ -1156,7 +1146,6 @@ PolyRatFun prf; Local test = prf((x+y)^`N',(x+2*y)^`N')*prf((x^`N'-y)^`N',(x^`N'-2*y)^`N'); Print; .end -#pend_if mpi? assert runtime_error? assert stdout =~ exact_pattern("flint::ratfun_normalize: output exceeds MaxTermSize") || stdout =~ exact_pattern("ERROR: PolyRatFun doesn't fit in a term") *--#] polynomial_error_10 : @@ -1169,7 +1158,6 @@ PolyRatFun prf; Local test = prf((x+y)^`N',(x+2*y)^`N')*prf((x^`N'-y)^`N',(x^`N'-2*y)^`N'); Print; .end -#pend_if mpi? assert runtime_error? assert stdout =~ exact_pattern("flint::to_argument_mpoly: output exceeds MaxTermSize") || stdout =~ exact_pattern("ERROR: PolyRatFun doesn't fit in a term") *--#] polynomial_error_11 : @@ -1183,7 +1171,6 @@ PolyRatFun prf; Local test = prf((x+y+z)^`N',(x+2*y)^`N')*prf((x^`N'-y-z)^`N',(x^`N'-2*y)^`N'); Print; .end -#pend_if mpi? assert runtime_error? assert stdout =~ exact_pattern("flint::to_argument_mpoly: output exceeds MaxTermSize") || stdout =~ exact_pattern("ERROR: PolyRatFun doesn't fit in a term") *--#] polynomial_error_12 : diff --git a/sources/mpi.c b/sources/mpi.c index 2a8aa573..80cf5310 100644 --- a/sources/mpi.c +++ b/sources/mpi.c @@ -36,6 +36,11 @@ #[ Includes and variables : */ +/* +#define MPIDEBUGGING +#define MPIDEBUGGING_DELAY_US 10000 +*/ + #include #include "form3.h" @@ -338,7 +343,14 @@ int PF_RecvWbuf(WORD *b, LONG *s, int *src) { int i, r = 0; - r = MPI_Recv(b,(int)*s,PF_WORD,*src,PF_ANY_MSGTAG,PF_COMM,&PF_status); + for (;;) { + r = MPI_Probe(*src,PF_ANY_MSGTAG,PF_COMM,&PF_status); + if ( r != MPI_SUCCESS ) { if ( r > 0 ) r *= -1; return(r); } + if ( PF_status.MPI_TAG != PF_RUNTIME_ERROR_MSGTAG ) break; + PF_ReceiveRuntimeError(); + } + + r = MPI_Recv(b,(int)*s,PF_WORD,PF_status.MPI_SOURCE,PF_status.MPI_TAG,PF_COMM,&PF_status); if ( r != MPI_SUCCESS ) { if ( r > 0 ) r *= -1; return(r); } r = MPI_Get_count(&PF_status,PF_WORD,&i); @@ -446,6 +458,29 @@ int PF_Bcast(void *buffer, int count) /* #] PF_Bcast : + #[ PF_Reduce : +*/ + +/** + * Performs a reduce operation across all processes. + * + * @param[in] sendbuf the buffer containing the data to be reduced. + * @param[out] recvbuf the buffer to store the reduced result (only for the root process). + * @param count the number of elements in the buffers. + * @param type the datatype of the elements. + * @param op the operation to apply. + * @param root the root process number. + * @return 0 if OK, nonzero on error. + */ +int PF_Reduce(const void *sendbuf, void *recvbuf, int count, MPI_Datatype type, MPI_Op op, int root) +{ + if ( MPI_Reduce(sendbuf,recvbuf,count,type,op,root,PF_COMM) != MPI_SUCCESS ) + return(-1); + return(0); +} + +/* + #] PF_Reduce : #[ PF_RawSend : */ @@ -459,7 +494,6 @@ int PF_Bcast(void *buffer, int count) * @param tag the message tag. * @return 0 if OK, nonzero on error. */ - int PF_RawSend(int dest, void *buf, LONG l, int tag) { int ret=MPI_Ssend(buf,(int)l,MPI_BYTE,dest,tag,PF_COMM); @@ -523,6 +557,85 @@ int PF_RawProbe(int *src, int *tag, int *bytesize) /* #] PF_RawProbe : + #[ PF_RawIsend : +*/ + +/** + * Performs a nonblocking send. + * + * @param dest the destination process number. + * @param[in] buf the send buffer. + * @param count the number of elements in the send buffer. + * @param type the datatype of the data in the send buffer. + * @param tag the message tag. + * @param[out] request the request handle for the nonblocking operation. + * @return 0 if OK, nonzero on error. + */ +int PF_RawIsend(int dest, const void *buf, int count, MPI_Datatype type, int tag, MPI_Request *request) +{ + int ret = MPI_Isend(buf, count, type, dest, tag, PF_COMM, request); + if ( ret != MPI_SUCCESS ) return(-1); + return(0); +} + +/* + #] PF_RawIsend : + #[ PF_RawWaitAll : +*/ + +/** + * Waits for all the given requests to complete. + * + * @param count the number of requests. + * @param[in,out] request the array of request handles. + * @param[out] status the array of status objects to store the status of each completed request. + * @return 0 if OK, nonzero on error. + */ +int PF_RawWaitAll(int count, MPI_Request *request, MPI_Status *status) +{ + int ret = MPI_Waitall(count, request, status); + if ( ret != MPI_SUCCESS ) return(-1); + return(0); +} + +/* + #] PF_RawWaitAll : + #[ PF_Discard : +*/ + +/** + * Discards an incoming message. + * + * @param[in,out] src the source process number. + * On output, that of the actual received message. + * @param[in,out] tag the message tag. + * On output, that of the actual received message. + * @return 0 if OK, nonzero on error. + */ +int PF_Discard(int *src, int *tag) +{ + enum { DEFAULT_BUF_SIZE = 1024 }; + MPI_Status stat; + int count; + void *buf; + char default_buf[DEFAULT_BUF_SIZE]; + int srcval = src != NULL ? *src : PF_ANY_SOURCE; + int tagval = tag != NULL ? *tag : PF_ANY_MSGTAG; + int ret = MPI_Probe(srcval, tagval, PF_COMM, &stat); + if ( ret != MPI_SUCCESS ) return -1; + if ( src != NULL ) *src = stat.MPI_SOURCE; + if ( tag != NULL ) *tag = stat.MPI_TAG; + ret = MPI_Get_count(&stat, MPI_BYTE, &count); + if ( ret != MPI_SUCCESS ) return -1; + buf = count <= DEFAULT_BUF_SIZE ? default_buf : Malloc1(count, "PF_Discard"); + ret = MPI_Recv(buf, count, MPI_BYTE, stat.MPI_SOURCE, stat.MPI_TAG, PF_COMM, MPI_STATUS_IGNORE); + if ( buf != default_buf ) M_free(buf, "PF_Discard"); + if ( ret != MPI_SUCCESS ) return -1; + return 0; +} + +/* + #] PF_Discard : #[ The pack buffer : #[ Variables : */ @@ -620,7 +733,6 @@ int PF_PrintPackBuf(char *s, int size) * * @return 0 if OK, nonzero on error. */ - int PF_PreparePack(void) { return PF_InitPackBuf(); @@ -818,7 +930,6 @@ int PF_UnpackString(UBYTE *str) * @param tag the message tag. * @return 0 if OK, nonzero on error. */ - int PF_Send(int to, int tag) { int err; @@ -1447,7 +1558,6 @@ static inline int PF_longMultiReset(int is_sender) * * @return 0 if OK, nonzero on error. */ - int PF_PrepareLongSinglePack(void) { return PF_longSingleReset(1); @@ -1639,7 +1749,6 @@ int PF_LongSingleReceive(int src, int tag, int *psrc, int *ptag) * * @return 0 if OK, nonzero on error. */ - int PF_PrepareLongMultiPack(void) { return PF_longMultiReset(1); diff --git a/sources/mpidbg.h b/sources/mpidbg.h index 924096e6..036bef56 100644 --- a/sources/mpidbg.h +++ b/sources/mpidbg.h @@ -42,6 +42,12 @@ #include #include #include +#if defined(MPIDEBUGGING_DELAY_US) && MPIDEBUGGING_DELAY_US > 0 +#include // for usleep() +#endif + +#define MPIDBG_LINESIZE 1024 +#define MPIDBG_BUFSIZE 128 /* #] Includes : @@ -60,11 +66,11 @@ static inline int MPIDBG_Get_rank(void) { */ static inline void MPIDBG_Out(const char *file, int line, const char *func, const char *fmt, ...) { - char buf[1024]; /* Enough. */ + char buf[MPIDBG_LINESIZE]; va_list ap; va_start(ap, fmt); - snprintf(buf,1024, "*** [%d] %10s %4d @ %-16s: ", MPIDBG_RANK, file, line, func); - vsnprintf(buf + strlen(buf),1024-strlen(buf), fmt, ap); + snprintf(buf,MPIDBG_LINESIZE, "*** [%d] %10s %4d @ %-16s: ", MPIDBG_RANK, file, line, func); + vsnprintf(buf + strlen(buf),MPIDBG_LINESIZE-strlen(buf), fmt, ap); va_end(ap); /* Assume fprintf with a line will work well even in multi-processes. */ fprintf(stderr, "%s\n", buf); @@ -73,6 +79,18 @@ static inline void MPIDBG_Out(const char *file, int line, const char *func, cons /* #] MPIDBG_Out : + #[ MPIDBG_insert_delay : +*/ + +static inline void MPIDBG_insert_delay(void) +{ +#if defined(MPIDEBUGGING_DELAY_US) && MPIDEBUGGING_DELAY_US > 0 + usleep(MPIDEBUGGING_DELAY_US); +#endif +} + +/* + #] MPIDBG_insert_delay : #[ MPIDBG_sprint_requests : */ @@ -139,6 +157,43 @@ static inline void MPIDBG_sprint_statuses(char *buf, int count, const MPI_Reques #define MPIDBG_EXTARG const char *file, int line, const char *func /* + #[ MPI_Init : +*/ + +static inline int MPIDBG_Init(int* argc, char*** argv, MPIDBG_EXTARG) +{ + int ret = MPI_Init(argc, argv); + if ( ret == MPI_SUCCESS ) { + MPIDBG_Out("MPI_Init: OK"); + } + else { + MPIDBG_Out("MPI_Init: Failed"); + } + return ret; +} +#define MPI_Init(...) MPIDBG_Init(__VA_ARGS__, __FILE__, __LINE__, __func__) + +/* + #] MPI_Init : + #[ MPI_Finalize : +*/ + +static inline int MPIDBG_Finalize(MPIDBG_EXTARG) +{ + MPIDBG_Out("MPI_Finalize"); + int ret = MPI_Finalize(); + if ( ret == MPI_SUCCESS ) { + MPIDBG_Out("MPI_Finalize: OK"); + } + else { + MPIDBG_Out("MPI_Finalize: Failed"); + } + return ret; +} +#define MPI_Finalize() MPIDBG_Finalize(__FILE__, __LINE__, __func__) + +/* + #] MPI_Finalize : #[ MPI_Send : */ @@ -147,10 +202,10 @@ static inline int MPIDBG_Send(void *buf, int count, MPI_Datatype datatype, int d MPIDBG_Out("MPI_Send: src=%d dest=%d tag=%d count=%d", MPIDBG_RANK, dest, tag, count); int ret = MPI_Send(buf, count, datatype, dest, tag, comm); if ( ret == MPI_SUCCESS ) { - MPIDBG_Out("MPI_Send: OK"); + MPIDBG_Out("MPI_Send: OK src=%d dest=%d tag=%d count=%d", MPIDBG_RANK, dest, tag, count); } else { - MPIDBG_Out("MPI_Send: Failed"); + MPIDBG_Out("MPI_Send: Failed src=%d dest=%d tag=%d count=%d", MPIDBG_RANK, dest, tag, count); } return ret; } @@ -163,6 +218,8 @@ static inline int MPIDBG_Send(void *buf, int count, MPI_Datatype datatype, int d static inline int MPIDBG_Recv(void* buf, int count, MPI_Datatype datatype, int source, int tag, MPI_Comm comm, MPI_Status *status, MPIDBG_EXTARG) { + MPI_Status st; + if ( status == MPI_STATUS_IGNORE ) status = &st; MPIDBG_Out("MPI_Recv: src=%d dest=%d tag=%d", source, MPIDBG_RANK, tag); int ret = MPI_Recv(buf, count, datatype, source, tag, comm, status); if ( ret == MPI_SUCCESS ) { @@ -171,7 +228,7 @@ static inline int MPIDBG_Recv(void* buf, int count, MPI_Datatype datatype, int s MPIDBG_Out("MPI_Recv: OK src=%d dest=%d tag=%d count=%d", status->MPI_SOURCE, MPIDBG_RANK, status->MPI_TAG, ret_count); } else { - MPIDBG_Out("MPI_Recv: Failed"); + MPIDBG_Out("MPI_Recv: Failed src=%d dest=%d tag=%d", source, MPIDBG_RANK, tag); } return ret; } @@ -187,10 +244,10 @@ static inline int MPIDBG_Bsend(void* buf, int count, MPI_Datatype datatype, int MPIDBG_Out("MPI_Bsend: src=%d dest=%d tag=%d count=%d", MPIDBG_RANK, dest, tag, count); int ret = MPI_Bsend(buf, count, datatype, dest, tag, comm); if ( ret == MPI_SUCCESS ) { - MPIDBG_Out("MPI_Bsend: OK"); + MPIDBG_Out("MPI_Bsend: OK src=%d dest=%d tag=%d count=%d", MPIDBG_RANK, dest, tag, count); } else { - MPIDBG_Out("MPI_Bsend: Failed"); + MPIDBG_Out("MPI_Bsend: Failed src=%d dest=%d tag=%d count=%d", MPIDBG_RANK, dest, tag, count); } return ret; } @@ -206,10 +263,10 @@ static inline int MPIDBG_Ssend(void* buf, int count, MPI_Datatype datatype, int MPIDBG_Out("MPI_Ssend: src=%d dest=%d tag=%d count=%d", MPIDBG_RANK, dest, tag, count); int ret = MPI_Ssend(buf, count, datatype, dest, tag, comm); if ( ret == MPI_SUCCESS ) { - MPIDBG_Out("MPI_Ssend: OK"); + MPIDBG_Out("MPI_Ssend: OK src=%d dest=%d tag=%d count=%d", MPIDBG_RANK, dest, tag, count); } else { - MPIDBG_Out("MPI_Ssend: Failed"); + MPIDBG_Out("MPI_Ssend: Failed src=%d dest=%d tag=%d count=%d", MPIDBG_RANK, dest, tag, count); } return ret; } @@ -225,10 +282,10 @@ static inline int MPIDBG_Rsend(void* buf, int count, MPI_Datatype datatype, int MPIDBG_Out("MPI_Rsend: src=%d dest=%d tag=%d count=%d", MPIDBG_RANK, dest, tag, count); int ret = MPI_Rsend(buf, count, datatype, dest, tag, comm); if ( ret == MPI_SUCCESS ) { - MPIDBG_Out("MPI_Rsend: OK"); + MPIDBG_Out("MPI_Rsend: OK src=%d dest=%d tag=%d count=%d", MPIDBG_RANK, dest, tag, count); } else { - MPIDBG_Out("MPI_Rsend: Failed"); + MPIDBG_Out("MPI_Rsend: Failed src=%d dest=%d tag=%d count=%d", MPIDBG_RANK, dest, tag, count); } return ret; } @@ -239,15 +296,15 @@ static inline int MPIDBG_Rsend(void* buf, int count, MPI_Datatype datatype, int #[ MPI_Isend : */ -static inline int MPIDBG_Isend(void* buf, int count, MPI_Datatype datatype, int dest, int tag, MPI_Comm comm, MPI_Request *request, MPIDBG_EXTARG) +static inline int MPIDBG_Isend(const void* buf, int count, MPI_Datatype datatype, int dest, int tag, MPI_Comm comm, MPI_Request *request, MPIDBG_EXTARG) { MPIDBG_Out("MPI_Isend: src=%d dest=%d tag=%d count=%d", MPIDBG_RANK, dest, tag, count); int ret = MPI_Isend(buf, count, datatype, dest, tag, comm, request); if ( ret == MPI_SUCCESS ) { - MPIDBG_Out("MPI_Isend: OK"); + MPIDBG_Out("MPI_Isend: OK src=%d dest=%d tag=%d count=%d", MPIDBG_RANK, dest, tag, count); } else { - MPIDBG_Out("MPI_Isend: Failed"); + MPIDBG_Out("MPI_Isend: Failed src=%d dest=%d tag=%d count=%d", MPIDBG_RANK, dest, tag, count); } return ret; } @@ -263,10 +320,10 @@ static inline int MPIDBG_Ibsend(void* buf, int count, MPI_Datatype datatype, int MPIDBG_Out("MPI_Ibsend: src=%d dest=%d tag=%d count=%d", MPIDBG_RANK, dest, tag, count); int ret = MPI_Ibsend(buf, count, datatype, dest, tag, comm, request); if ( ret == MPI_SUCCESS ) { - MPIDBG_Out("MPI_Ibsend: OK"); + MPIDBG_Out("MPI_Ibsend: OK src=%d dest=%d tag=%d count=%d", MPIDBG_RANK, dest, tag, count); } else { - MPIDBG_Out("MPI_Ibsend: Failed"); + MPIDBG_Out("MPI_Ibsend: Failed src=%d dest=%d tag=%d count=%d", MPIDBG_RANK, dest, tag, count); } return ret; } @@ -282,10 +339,10 @@ static inline int MPIDBG_Issend(void* buf, int count, MPI_Datatype datatype, int MPIDBG_Out("MPI_Issend: src=%d dest=%d tag=%d count=%d", MPIDBG_RANK, dest, tag, count); int ret = MPI_Issend(buf, count, datatype, dest, tag, comm, request); if ( ret == MPI_SUCCESS ) { - MPIDBG_Out("MPI_Issend: OK"); + MPIDBG_Out("MPI_Issend: OK src=%d dest=%d tag=%d count=%d", MPIDBG_RANK, dest, tag, count); } else { - MPIDBG_Out("MPI_Issend: Failed"); + MPIDBG_Out("MPI_Issend: Failed src=%d dest=%d tag=%d count=%d", MPIDBG_RANK, dest, tag, count); } return ret; } @@ -301,10 +358,10 @@ static inline int MPIDBG_Irsend(void* buf, int count, MPI_Datatype datatype, int MPIDBG_Out("MPI_Irsend: src=%d dest=%d tag=%d count=%d", MPIDBG_RANK, dest, tag, count); int ret = MPI_Irsend(buf, count, datatype, dest, tag, comm, request); if ( ret == MPI_SUCCESS ) { - MPIDBG_Out("MPI_Irsend: OK"); + MPIDBG_Out("MPI_Irsend: OK src=%d dest=%d tag=%d count=%d", MPIDBG_RANK, dest, tag, count); } else { - MPIDBG_Out("MPI_Irsend: Failed"); + MPIDBG_Out("MPI_Irsend: Failed src=%d dest=%d tag=%d count=%d", MPIDBG_RANK, dest, tag, count); } return ret; } @@ -320,10 +377,10 @@ static inline int MPIDBG_Irecv(void* buf, int count, MPI_Datatype datatype, int MPIDBG_Out("MPI_Irecv: src=%d dest=%d tag=%d", source, MPIDBG_RANK, tag); int ret = MPI_Irecv(buf, count, datatype, source, tag, comm, request); if ( ret == MPI_SUCCESS ) { - MPIDBG_Out("MPI_Irecv: OK dest=%d", MPIDBG_RANK); + MPIDBG_Out("MPI_Irecv: OK src=%d dest=%d tag=%d", source, MPIDBG_RANK, tag); } else { - MPIDBG_Out("MPI_Irecv: Failed"); + MPIDBG_Out("MPI_Irecv: Failed src=%d dest=%d tag=%d", source, MPIDBG_RANK, tag); } return ret; } @@ -336,17 +393,19 @@ static inline int MPIDBG_Irecv(void* buf, int count, MPI_Datatype datatype, int static inline int MPIDBG_Wait(MPI_Request *request, MPI_Status *status, MPIDBG_EXTARG) { - char buf[256 * 1]; /* Enough. */ + MPI_Status st; + if ( status == MPI_STATUS_IGNORE ) status = &st; + char buf1[MPIDBG_BUFSIZE * 1], buf2[MPIDBG_BUFSIZE * 1]; MPI_Request old_request = *request; - MPIDBG_sprint_requests(buf, 1, request); - MPIDBG_Out("MPI_Wait: rank=%d request=%s", MPIDBG_RANK, buf); + MPIDBG_sprint_requests(buf1, 1, request); + MPIDBG_Out("MPI_Wait: rank=%d request=%s", MPIDBG_RANK, buf1); int ret = MPI_Wait(request, status); if ( ret == MPI_SUCCESS ) { - MPIDBG_sprint_statuses(buf, 1, request, &old_request, status); - MPIDBG_Out("MPI_Wait: OK rank=%d result=%s", MPIDBG_RANK, buf); + MPIDBG_sprint_statuses(buf2, 1, request, &old_request, status); + MPIDBG_Out("MPI_Wait: OK rank=%d request=%s result=%s", MPIDBG_RANK, buf1, buf2); } else { - MPIDBG_Out("MPI_Wait: Failed"); + MPIDBG_Out("MPI_Wait: Failed rank=%d request=%s", MPIDBG_RANK, buf1); } return ret; } @@ -359,22 +418,24 @@ static inline int MPIDBG_Wait(MPI_Request *request, MPI_Status *status, MPIDBG_E static inline int MPIDBG_Test(MPI_Request *request, int *flag, MPI_Status *status, MPIDBG_EXTARG) { - char buf[256 * 1]; /* Enough. */ + MPI_Status st; + if ( status == MPI_STATUS_IGNORE ) status = &st; + char buf1[MPIDBG_BUFSIZE * 1], buf2[MPIDBG_BUFSIZE * 1]; MPI_Request old_request = *request; - MPIDBG_sprint_requests(buf, 1, request); - MPIDBG_Out("MPI_Test: rank=%d request=%s", MPIDBG_RANK, buf); + MPIDBG_sprint_requests(buf1, 1, request); + MPIDBG_Out("MPI_Test: rank=%d request=%s", MPIDBG_RANK, buf1); int ret = MPI_Test(request, flag, status); if ( ret == MPI_SUCCESS ) { if ( *flag ) { - MPIDBG_sprint_statuses(buf, 1, request, &old_request, status); - MPIDBG_Out("MPI_Test: OK rank=%d result=%s", MPIDBG_RANK, buf); + MPIDBG_sprint_statuses(buf2, 1, request, &old_request, status); + MPIDBG_Out("MPI_Test: OK rank=%d request=%s result=%s", MPIDBG_RANK, buf1, buf2); } else { - MPIDBG_Out("MPI_Test: OK flag=false"); + MPIDBG_Out("MPI_Test: OK rank=%d request=%s flag=false", MPIDBG_RANK, buf1); } } else { - MPIDBG_Out("MPI_Test: Failed"); + MPIDBG_Out("MPI_Test: Failed rank=%d request=%s", MPIDBG_RANK, buf1); } return ret; } @@ -387,20 +448,22 @@ static inline int MPIDBG_Test(MPI_Request *request, int *flag, MPI_Status *statu static inline int MPIDBG_Waitany(int count, MPI_Request *array_of_requests, int *index, MPI_Status *status, MPIDBG_EXTARG) { - char buf[256]; /* Enough. */ + MPI_Status st; + if ( status == MPI_STATUS_IGNORE ) status = &st; + char buf1[MPIDBG_BUFSIZE * 1], buf2[MPIDBG_BUFSIZE * 1]; MPI_Request old_requests[count]; memcpy(old_requests, array_of_requests, sizeof(MPI_Request) * count); - MPIDBG_sprint_requests(buf, count, array_of_requests); - MPIDBG_Out("MPI_Waitany: rank=%d request=%s", MPIDBG_RANK, buf); + MPIDBG_sprint_requests(buf1, count, array_of_requests); + MPIDBG_Out("MPI_Waitany: rank=%d request=%s", MPIDBG_RANK, buf1); int ret = MPI_Waitany(count, array_of_requests, index, status); if ( ret == MPI_SUCCESS ) { MPI_Status statuses[count]; statuses[*index] = *status; - MPIDBG_sprint_statuses(buf, count, old_requests, array_of_requests, statuses); - MPIDBG_Out("MPI_Waitany: OK rank=%d result=%s", MPIDBG_RANK, buf); + MPIDBG_sprint_statuses(buf2, count, old_requests, array_of_requests, statuses); + MPIDBG_Out("MPI_Waitany: OK rank=%d request=%s result=%s", MPIDBG_RANK, buf1, buf2); } else { - MPIDBG_Out("MPI_Waitany: Failed"); + MPIDBG_Out("MPI_Waitany: Failed rank=%d request=%s", MPIDBG_RANK, buf1); } return ret; } @@ -413,25 +476,27 @@ static inline int MPIDBG_Waitany(int count, MPI_Request *array_of_requests, int static inline int MPIDBG_Testany(int count, MPI_Request *array_of_requests, int *index, int *flag, MPI_Status *status, MPIDBG_EXTARG) { - char buf[256]; /* Enough. */ + MPI_Status st; + if ( status == MPI_STATUS_IGNORE ) status = &st; + char buf1[MPIDBG_BUFSIZE * 1], buf2[MPIDBG_BUFSIZE * 1]; MPI_Request old_requests[count]; memcpy(old_requests, array_of_requests, sizeof(MPI_Request) * count); - MPIDBG_sprint_requests(buf, count, array_of_requests); - MPIDBG_Out("MPI_Testany: rank=%d request=%s", MPIDBG_RANK, buf); + MPIDBG_sprint_requests(buf1, count, array_of_requests); + MPIDBG_Out("MPI_Testany: rank=%d request=%s", MPIDBG_RANK, buf1); int ret = MPI_Testany(count, array_of_requests, index, flag, status); if ( ret == MPI_SUCCESS ) { if ( *flag ) { MPI_Status statuses[count]; statuses[*index] = *status; - MPIDBG_sprint_statuses(buf, count, old_requests, array_of_requests, statuses); - MPIDBG_Out("MPI_Testany: OK rank=%d result=%s", MPIDBG_RANK, buf); + MPIDBG_sprint_statuses(buf2, count, old_requests, array_of_requests, statuses); + MPIDBG_Out("MPI_Testany: OK rank=%d request=%s result=%s", MPIDBG_RANK, buf1, buf2); } else { - MPIDBG_Out("MPI_Testany: OK flag=false"); + MPIDBG_Out("MPI_Testany: OK rank=%d request=%s flag=false", MPIDBG_RANK, buf1); } } else { - MPIDBG_Out("MPI_Testany: Failed"); + MPIDBG_Out("MPI_Testany: Failed rank=%d request=%s", MPIDBG_RANK, buf1); } return ret; } @@ -444,18 +509,20 @@ static inline int MPIDBG_Testany(int count, MPI_Request *array_of_requests, int static inline int MPIDBG_Waitall(int count, MPI_Request *array_of_requests, MPI_Status *array_of_statuses, MPIDBG_EXTARG) { - char buf[256 * count]; /* Enough. */ + MPI_Status st[count]; + if ( array_of_statuses == MPI_STATUSES_IGNORE ) array_of_statuses = st; + char buf1[MPIDBG_BUFSIZE * count], buf2[MPIDBG_BUFSIZE * count]; MPI_Request old_requests[count]; memcpy(old_requests, array_of_requests, sizeof(MPI_Request) * count); - MPIDBG_sprint_requests(buf, count, array_of_requests); - MPIDBG_Out("MPI_Waitall: rank=%d request=%s", MPIDBG_RANK, buf); + MPIDBG_sprint_requests(buf1, count, array_of_requests); + MPIDBG_Out("MPI_Waitall: rank=%d request=%s", MPIDBG_RANK, buf1); int ret = MPI_Waitall(count, array_of_requests, array_of_statuses); if ( ret == MPI_SUCCESS ) { - MPIDBG_sprint_statuses(buf, count, old_requests, array_of_requests, array_of_statuses); - MPIDBG_Out("MPI_Waitall: OK rank=%d result=%s", MPIDBG_RANK, buf); + MPIDBG_sprint_statuses(buf2, count, old_requests, array_of_requests, array_of_statuses); + MPIDBG_Out("MPI_Waitall: OK rank=%d request=%s result=%s", MPIDBG_RANK, buf1, buf2); } else { - MPIDBG_Out("MPI_Waitall: Failed"); + MPIDBG_Out("MPI_Waitall: Failed rank=%d request=%s", MPIDBG_RANK, buf1); } return ret; } @@ -468,23 +535,25 @@ static inline int MPIDBG_Waitall(int count, MPI_Request *array_of_requests, MPI_ static inline int MPIDBG_Testall(int count, MPI_Request *array_of_requests, int *flag, MPI_Status *array_of_statuses, MPIDBG_EXTARG) { - char buf[256 * count]; /* Enough. */ + MPI_Status st[count]; + if ( array_of_statuses == MPI_STATUSES_IGNORE ) array_of_statuses = st; + char buf1[MPIDBG_BUFSIZE * count], buf2[MPIDBG_BUFSIZE * count]; MPI_Request old_requests[count]; memcpy(old_requests, array_of_requests, sizeof(MPI_Request) * count); - MPIDBG_sprint_requests(buf, count, array_of_requests); - MPIDBG_Out("MPI_Testall: rank=%d request=%s", MPIDBG_RANK, buf); + MPIDBG_sprint_requests(buf1, count, array_of_requests); + MPIDBG_Out("MPI_Testall: rank=%d request=%s", MPIDBG_RANK, buf1); int ret = MPI_Testall(count, array_of_requests, flag, array_of_statuses); if ( ret == MPI_SUCCESS ) { if ( *flag ) { - MPIDBG_sprint_statuses(buf, count, old_requests, array_of_requests, array_of_statuses); - MPIDBG_Out("MPI_Testall: OK rank=%d result=%s", MPIDBG_RANK, buf); + MPIDBG_sprint_statuses(buf2, count, old_requests, array_of_requests, array_of_statuses); + MPIDBG_Out("MPI_Testall: OK rank=%d request=%s result=%s", MPIDBG_RANK, buf1, buf2); } else { - MPIDBG_Out("MPI_Testall: OK flag=false"); + MPIDBG_Out("MPI_Testall: OK rank=%d request=%s flag=false", MPIDBG_RANK, buf1); } } else { - MPIDBG_Out("MPI_Testall: Failed"); + MPIDBG_Out("MPI_Testall: Failed rank=%d request=%s", MPIDBG_RANK, buf1); } return ret; } @@ -497,18 +566,20 @@ static inline int MPIDBG_Testall(int count, MPI_Request *array_of_requests, int static inline int MPIDBG_Waitsome(int incount, MPI_Request *array_of_requests, int *outcount, int *array_of_indices, MPI_Status *array_of_statuses, MPIDBG_EXTARG) { - char buf[256 * incount]; /* Enough. */ + MPI_Status st[incount]; + if ( array_of_statuses == MPI_STATUSES_IGNORE ) array_of_statuses = st; + char buf1[MPIDBG_BUFSIZE * incount], buf2[MPIDBG_BUFSIZE * incount]; MPI_Request old_requests[incount]; memcpy(old_requests, array_of_requests, sizeof(MPI_Request) * incount); - MPIDBG_sprint_requests(buf, incount, array_of_requests); - MPIDBG_Out("MPI_Waitsome: rank=%d request=%s", MPIDBG_RANK, buf); + MPIDBG_sprint_requests(buf1, incount, array_of_requests); + MPIDBG_Out("MPI_Waitsome: rank=%d request=%s", MPIDBG_RANK, buf1); int ret = MPI_Waitsome(incount, array_of_requests, outcount, array_of_indices, array_of_statuses); if ( ret == MPI_SUCCESS ) { - MPIDBG_sprint_statuses(buf, incount, old_requests, array_of_requests, array_of_statuses); - MPIDBG_Out("MPI_Waitsome: OK rank=%d result=%s", MPIDBG_RANK, buf); + MPIDBG_sprint_statuses(buf2, incount, old_requests, array_of_requests, array_of_statuses); + MPIDBG_Out("MPI_Waitsome: OK rank=%d request=%s result=%s", MPIDBG_RANK, buf1, buf2); } else { - MPIDBG_Out("MPI_Waitsome: Failed"); + MPIDBG_Out("MPI_Waitsome: Failed rank=%d request=%s", MPIDBG_RANK, buf1); } return ret; } @@ -521,18 +592,20 @@ static inline int MPIDBG_Waitsome(int incount, MPI_Request *array_of_requests, i static inline int MPIDBG_Testsome(int incount, MPI_Request *array_of_requests, int *outcount, int *array_of_indices, MPI_Status *array_of_statuses, MPIDBG_EXTARG) { - char buf[256 * incount]; /* Enough. */ + MPI_Status st[incount]; + if ( array_of_statuses == MPI_STATUSES_IGNORE ) array_of_statuses = st; + char buf1[MPIDBG_BUFSIZE * incount], buf2[MPIDBG_BUFSIZE * incount]; MPI_Request old_requests[incount]; memcpy(old_requests, array_of_requests, sizeof(MPI_Request) * incount); - MPIDBG_sprint_requests(buf, incount, array_of_requests); - MPIDBG_Out("MPI_Testsome: rank=%d request=%s", MPIDBG_RANK, buf); + MPIDBG_sprint_requests(buf1, incount, array_of_requests); + MPIDBG_Out("MPI_Testsome: rank=%d request=%s", MPIDBG_RANK, buf1); int ret = MPI_Testsome(incount, array_of_requests, outcount, array_of_indices, array_of_statuses); if ( ret == MPI_SUCCESS ) { - MPIDBG_sprint_statuses(buf, incount, old_requests, array_of_requests, array_of_statuses); - MPIDBG_Out("MPI_Testsome: OK rank=%d result=%s", MPIDBG_RANK, buf); + MPIDBG_sprint_statuses(buf2, incount, old_requests, array_of_requests, array_of_statuses); + MPIDBG_Out("MPI_Testsome: OK rank=%d request=%s result=%s", MPIDBG_RANK, buf1, buf2); } else { - MPIDBG_Out("MPI_Testsome: Failed"); + MPIDBG_Out("MPI_Testsome: Failed rank=%d request=%s", MPIDBG_RANK, buf1); } return ret; } @@ -545,7 +618,10 @@ static inline int MPIDBG_Testsome(int incount, MPI_Request *array_of_requests, i static inline int MPIDBG_Iprobe(int source, int tag, MPI_Comm comm, int *flag, MPI_Status *status, MPIDBG_EXTARG) { + MPI_Status st; + if ( status == MPI_STATUS_IGNORE ) status = &st; MPIDBG_Out("MPI_Iprobe: src=%d dest=%d tag=%d", source, MPIDBG_RANK, tag); + MPIDBG_insert_delay(); int ret = MPI_Iprobe(source, tag, comm, flag, status); if ( ret == MPI_SUCCESS ) { if ( *flag ) { @@ -554,11 +630,11 @@ static inline int MPIDBG_Iprobe(int source, int tag, MPI_Comm comm, int *flag, M MPIDBG_Out("MPI_Iprobe: OK src=%d dest=%d tag=%d size=%d", status->MPI_SOURCE, MPIDBG_RANK, status->MPI_TAG, ret_size); } else { - MPIDBG_Out("MPI_Iprobe: OK flag=false"); + MPIDBG_Out("MPI_Iprobe: OK src=%d dest=%d tag=%d flag=false", source, MPIDBG_RANK, tag); } } else { - MPIDBG_Out("MPI_Iprobe: Failed"); + MPIDBG_Out("MPI_Iprobe: Failed src=%d dest=%d tag=%d", source, MPIDBG_RANK, tag); } return ret; } @@ -571,6 +647,8 @@ static inline int MPIDBG_Iprobe(int source, int tag, MPI_Comm comm, int *flag, M static inline int MPIDBG_Probe(int source, int tag, MPI_Comm comm, MPI_Status *status, MPIDBG_EXTARG) { + MPI_Status st; + if ( status == MPI_STATUS_IGNORE ) status = &st; MPIDBG_Out("MPI_Probe: src=%d dest=%d tag=%d", source, MPIDBG_RANK, tag); int ret = MPI_Probe(source, tag, comm, status); if ( ret == MPI_SUCCESS ) { @@ -579,7 +657,7 @@ static inline int MPIDBG_Probe(int source, int tag, MPI_Comm comm, MPI_Status *s MPIDBG_Out("MPI_Probe: OK src=%d dest=%d tag=%d size=%d", status->MPI_SOURCE, MPIDBG_RANK, status->MPI_TAG, ret_size); } else { - MPIDBG_Out("MPI_Probe: Failed"); + MPIDBG_Out("MPI_Probe: Failed src=%d dest=%d tag=%d", source, MPIDBG_RANK, tag); } return ret; } @@ -592,7 +670,7 @@ static inline int MPIDBG_Probe(int source, int tag, MPI_Comm comm, MPI_Status *s static inline int MPIDBG_Cancel(MPI_Request *request, MPIDBG_EXTARG) { - MPIDBG_Out("MPI_Cancel: rank=%d", MPIDBG_RANK); + MPIDBG_Out("MPI_Cancel", MPIDBG_RANK); int ret = MPI_Cancel(request); if ( ret == MPI_SUCCESS ) { MPIDBG_Out("MPI_Cancel: OK"); @@ -611,7 +689,7 @@ static inline int MPIDBG_Cancel(MPI_Request *request, MPIDBG_EXTARG) static inline int MPIDBG_Test_cancelled(MPI_Status *status, int *flag, MPIDBG_EXTARG) { - MPIDBG_Out("MPI_Test_cancelled: rank=%d", MPIDBG_RANK); + MPIDBG_Out("MPI_Test_cancelled", MPIDBG_RANK); int ret = MPI_Test_cancelled(status, flag); if ( ret == MPI_SUCCESS ) { if ( *flag ) { @@ -635,7 +713,7 @@ static inline int MPIDBG_Test_cancelled(MPI_Status *status, int *flag, MPIDBG_EX static inline int MPIDBG_Barrier(MPI_Comm comm, MPIDBG_EXTARG) { - MPIDBG_Out("MPI_Barrier: rank=%d", MPIDBG_RANK); + MPIDBG_Out("MPI_Barrier"); int ret = MPI_Barrier(comm); if ( ret == MPI_SUCCESS ) { MPIDBG_Out("MPI_Barrier: OK"); @@ -657,10 +735,10 @@ static inline int MPIDBG_Bcast(void* buffer, int count, MPI_Datatype datatype, i MPIDBG_Out("MPI_Bcast: root=%d count=%d", root, count); int ret = MPI_Bcast(buffer, count, datatype, root, comm); if ( ret == MPI_SUCCESS ) { - MPIDBG_Out("MPI_Bcast: OK"); + MPIDBG_Out("MPI_Bcast: OK root=%d count=%d", root, count); } else { - MPIDBG_Out("MPI_Bcast: Failed"); + MPIDBG_Out("MPI_Bcast: Failed root=%d count=%d", root, count); } return ret; } @@ -668,6 +746,25 @@ static inline int MPIDBG_Bcast(void* buffer, int count, MPI_Datatype datatype, i /* #] MPI_Bcast : + #[ MPI_Reduce : +*/ + +static inline int MPIDBG_Reduce(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, int root, MPI_Comm comm, MPIDBG_EXTARG) +{ + MPIDBG_Out("MPI_Reduce: root=%d count=%d", root, count); + int ret = MPI_Reduce(sendbuf, recvbuf, count, datatype, op, root, comm); + if ( ret == MPI_SUCCESS ) { + MPIDBG_Out("MPI_Reduce: OK root=%d count=%d", root, count); + } + else { + MPIDBG_Out("MPI_Reduce: Failed root=%d count=%d", root, count); + } + return ret; +} +#define MPI_Reduce(...) MPIDBG_Reduce(__VA_ARGS__, __FILE__, __LINE__, __func__) + +/* + #] MPI_Reduce : #] MPI APIs : */ diff --git a/sources/parallel.c b/sources/parallel.c index 34c1e991..e2fd504b 100644 --- a/sources/parallel.c +++ b/sources/parallel.c @@ -40,6 +40,8 @@ #include "form3.h" #include "vector.h" +#include // must come after form3.h + /* #define PF_DEBUG_BCAST_LONG #define PF_DEBUG_BCAST_BUF @@ -63,6 +65,9 @@ int PF_WaitRbuf(PF_BUFFER *,int,LONG *); int PF_RawSend(int dest, void *buf, LONG l, int tag); LONG PF_RawRecv(int *src,void *buf,LONG thesize,int *tag); int PF_RawProbe(int *src, int *tag, int *bytesize); +int PF_RawIsend(int dest, const void *buf, int count, MPI_Datatype type, int tag, MPI_Request *request); +int PF_RawWaitAll(int count, MPI_Request *request, MPI_Status *status); +int PF_Discard(int *src, int *tag); /* Private functions */ @@ -85,13 +90,15 @@ static void PF_CatchErrorMessages(int *src, int *tag); static void PF_CatchErrorMessagesForAll(void); static int PF_ProbeWithCatchingErrorMessages(int *src); +static void PF_RaiseRuntimeError(void); +static void PF_BroadcastRuntimeError(void); +static void PF_PostEndSortBarrier(void); + /* Variables */ PARALLELVARS PF; -#ifdef MPI2 - WORD *PF_shared_buff; -#endif +static int PF_processing; /* Flag indicating that parallel processing of terms is in progress */ static LONG PF_goutterms; /* (master) Total out terms at PF_EndSort(), used in PF_Statistics(). */ static POSITION PF_exprsize; /* (master) The size of the expression at PF_EndSort(), used in PF_Processor(). */ @@ -277,7 +284,7 @@ typedef struct NoDe { static NODE *PF_root; /* root of tree of losers */ static WORD PF_loser; /* this is the last loser */ static WORD **PF_term; /* these point to the active terms */ -static WORD **PF_newcpos; /* new coeffs of merged terms */ +static WORD **PF_newcpos; /* new coefficients of merged terms */ static WORD *PF_newclen; /* length of new coefficients */ /* @@ -540,6 +547,9 @@ static WORD *PF_PutIn(int src) very first term from this src */ tag = PF_WaitRbuf(rbuf,a,&size); + if ( tag == PF_RUNTIME_ERROR_MSGTAG ) { + PF_ReceiveRuntimeError(); + } rbuf->full[a] += size; if ( tag == PF_ENDBUFFER_MSGTAG ) *rbuf->full[a]++ = 0; else if ( rbuf->numbufs > 1 ) { @@ -1005,13 +1015,12 @@ static WORD *PF_CurrentBracket; */ static WORD PF_GetTerm(WORD *term) { + assert(PF.me != MASTER); GETIDENTITY FILEHANDLE *fi = AC.RhsExprInModuleFlag && PF.rhsInParallel ? &PF.slavebuf : AR.infile; WORD i; WORD *next, *np, *last, *lp = 0, *nextstop, *tp=term; - /* Only on the slaves. */ - AN.deferskipped = 0; if ( fi->POfill >= fi->POfull || fi->POfull == fi->PObuffer ) { ReceiveNew: @@ -1050,22 +1059,6 @@ static WORD PF_GetTerm(WORD *term) } size = fi->POstop - fi->PObuffer - 1; -#ifdef AbsolutelyExtra - PF_Receive(MASTER,PF_ANY_MSGTAG,&src,&tag); -#ifdef MPI2 - if ( tag == PF_TERM_MSGTAG ) { - PF_Unpack(&size, 1, PF_LONG); - if ( PF_Put_target(src) == 0 ) { - printf("PF_Put_target error ...\n"); - } - } - else { - PF_RecvWbuf(fi->PObuffer,&size,&src); - } -#else - PF_RecvWbuf(fi->PObuffer,&size,&src); -#endif -#endif tag=PF_RecvWbuf(fi->PObuffer,&size,&src); fi->POfill = fi->PObuffer; @@ -1549,15 +1542,6 @@ int PF_Processor(EXPRESSIONS e, WORD i, WORD LastExpression) int k, src, tag; FILEHANDLE *oldoutfile = AR.outfile; -#ifdef MPI2 - if ( PF_shared_buff == NULL ) { - if ( PF_SMWin_Init() == 0 ) { - MesPrint("PF_SMWin_Init error"); - exit(-1); - } - } -#endif - if ( ( (WORD *)(((UBYTE *)(AT.WorkPointer)) + AM.MaxTer ) ) > AT.WorkTop ) { MesWork(); } @@ -1571,6 +1555,8 @@ int PF_Processor(EXPRESSIONS e, WORD i, WORD LastExpression) if ( AC.mparallelflag != PARALLELFLAG ) return(0); + PF_processing = 1; + if ( PF.me == MASTER ) { /* #[ Master: @@ -1681,13 +1667,8 @@ int PF_Processor(EXPRESSIONS e, WORD i, WORD LastExpression) sb->fill[0] = sb->full[0] = sb->buff[0]; sb->active = next; -#ifdef MPI2 - if ( PF_Put_origin(next) == 0 ) { - printf("PF_Put_origin error...\n"); - } -#else PF_ISendSbuf(next,PF_TERM_MSGTAG); -#endif + /* Initialize the next bucket. */ termsinbucket = 0; PACK_LONG(sb->fill[0], AN.ninterms); @@ -1754,6 +1735,8 @@ int PF_Processor(EXPRESSIONS e, WORD i, WORD LastExpression) #] Clean up & EndSort: #[ Collect (stats,prepro,...): */ + PF_PostEndSortBarrier(); + DBGOUT_NINTERMS(1, ("PF.me=%d AN.ninterms=%d ENDSORT\n", (int)PF.me, (int)AN.ninterms)); PF_CatchErrorMessagesForAll(); e->numdummies = 0; @@ -1835,9 +1818,6 @@ int PF_Processor(EXPRESSIONS e, WORD i, WORD LastExpression) AN.ninterms = 0; PF_linterms = 0; PF.parallel = 1; -#ifdef MPI2 - AR.infile->POfull = AR.infile->POfill = AR.infile->PObuffer = PF_shared_buff; -#endif { FILEHANDLE *fi = AC.RhsExprInModuleFlag && PF.rhsInParallel ? &PF.slavebuf : AR.infile; fi->POfull = fi->POfill = fi->PObuffer; @@ -1870,8 +1850,8 @@ int PF_Processor(EXPRESSIONS e, WORD i, WORD LastExpression) PolyFunClean(BHEAD term); } if ( Generator(BHEAD term,0) ) { - MesPrint("[%d] PF_Processor: Error in Generator",PF.me); - LowerSortLevel(); return(-1); + LowerSortLevel(); + Terminate(-1); } PF_linterms += dd; AN.ninterms += dd; } @@ -1907,6 +1887,8 @@ int PF_Processor(EXPRESSIONS e, WORD i, WORD LastExpression) #] Generator Loop & EndSort : #[ Collect (stats,prepro...) : */ + PF_PostEndSortBarrier(); + DBGOUT_NINTERMS(1, ("PF.me=%d AN.ninterms=%d PF_linterms=%d ENDSORT\n", (int)PF.me, (int)AN.ninterms, (int)PF_linterms)); PF_PrepareLongSinglePack(); cpu = TimeCPU(1); @@ -2047,15 +2029,34 @@ int PF_Init(int *argc, char ***argv) } /* #] PF_Init : - #[ PF_Terminate : + #[ PF_PreTerminate : +*/ + +/** + * Prepares for termination. + * Called by Terminate(). + * + * @param errorcode an error code. + */ +void PF_PreTerminate(int errorcode) +{ + if ( errorcode != 0 && PF_processing ) { + PF_processing = 0; + PF_RaiseRuntimeError(); + } +} + +/* + #] PF_PreTerminate : + #[ PF_Terminate : */ /** * Performs the finalization of ParFORM. * To be called by Terminate(). * - * @param error an error code. - * @return 0 if OK, nonzero on error. + * @param errorcode an error code. + * @return 0 if OK, nonzero on error. */ int PF_Terminate(int errorcode) { @@ -2077,7 +2078,8 @@ LONG PF_GetSlaveTimes(void) { LONG slavetimes = 0; LONG t = PF.me == MASTER ? 0 : AM.SumTime + TimeCPU(1); - MPI_Reduce(&t, &slavetimes, 1, PF_LONG, MPI_SUM, MASTER, PF_COMM); + int ret = PF_Reduce(&t, &slavetimes, 1, PF_LONG, MPI_SUM, MASTER); + CHECK(ret == 0); return slavetimes; } @@ -3639,6 +3641,7 @@ int PF_InParallelProcessor(void) } } } + PF_processing = 1; if(PF.me == MASTER){ if ( PF.numtasks >= 3 ) { partodoexr = (WORD*)Malloc1(sizeof(WORD)*(PF.numtasks+1),"PF_InParallelProcessor"); @@ -3688,6 +3691,7 @@ int PF_InParallelProcessor(void) Expressions[i].partodo = 0; } } + PF_PostEndSortBarrier(); return(0); }/*if(PF.me == MASTER)*/ /*Slave:*/ @@ -3720,6 +3724,7 @@ int PF_InParallelProcessor(void) }/*if(tag == PF_DATA_MSGTAG)*/ }while(tag!=PF_EMPTY_MSGTAG); PF.exprtodo=-1; + PF_PostEndSortBarrier(); return(0); }/*PF_InParallelProcessor*/ @@ -3796,7 +3801,8 @@ static int PF_DoOneExpr(void)/*the processor*/ } position = AS.OldOnFile[i]; - if ( e->status == HIDDENLEXPRESSION || e->status == HIDDENGEXPRESSION ) { + if ( e->status == HIDDENLEXPRESSION || e->status == HIDDENGEXPRESSION + || e->status == UNHIDELEXPRESSION || e->status == UNHIDEGEXPRESSION ) { AR.GetFile = 2; fi = AR.hidefile; } else { @@ -3976,6 +3982,8 @@ static int PF_Slave2MasterIP(int src)/*both master and slave*/ /*partodoexr[src] is the number of expression.*/ e = Expressions +partodoexr[src]; /*Get metadata:*/ + i = PF_ANY_MSGTAG; + PF_CatchErrorMessages(&src, &i); if (PF_RawRecv(&src, &exprData,sizeof(bufIPstruct_t),&i)!= sizeof(bufIPstruct_t)) return(-1); /*Fill in the expression data:*/ @@ -4034,7 +4042,8 @@ static int PF_Master2SlaveIP(int dest, EXPRESSIONS e) } if(PF_RawSend(dest,&exprData,sizeof(bufIPstruct_t),PF_DATA_MSGTAG)) return(-1); - if ( e->status == HIDDENLEXPRESSION || e->status == HIDDENGEXPRESSION ) + if ( e->status == HIDDENLEXPRESSION || e->status == HIDDENGEXPRESSION + || e->status == UNHIDELEXPRESSION || e->status == UNHIDEGEXPRESSION ) fi = AR.hidefile; else fi = AR.infile; @@ -4068,18 +4077,21 @@ static int PF_ReadMaster(void)/*reads directly to its scratch!*/ LONG ll=0; int l; /*Get metadata:*/ + tag = PF_ANY_MSGTAG; + PF_CatchErrorMessages(&m, &tag); if (PF_RawRecv(&m, &exprData,sizeof(bufIPstruct_t),&tag)!= sizeof(bufIPstruct_t)) return(-1); if(tag == PF_EMPTY_MSGTAG)/*No data, no job*/ return(tag); - /*data expected, tag must be == PF_DATA_MSTAG!*/ + /*data expected, tag must be == PF_DATA_MSGTAG!*/ PF.exprtodo=exprData.i; e=Expressions + PF.exprtodo; /*Fill in the expression data:*/ /* memcpy(e, &(exprData.e), sizeof(struct ExPrEsSiOn)); */ - if ( e->status == HIDDENLEXPRESSION || e->status == HIDDENGEXPRESSION ) + if ( e->status == HIDDENLEXPRESSION || e->status == HIDDENGEXPRESSION + || e->status == UNHIDELEXPRESSION || e->status == UNHIDEGEXPRESSION ) fi = AR.hidefile; else fi = AR.infile; @@ -4340,7 +4352,7 @@ static Vector(UBYTE, logBuffer); /* (slaves) The buffer for AC.LogHandle. */ */ void PF_MLock(void) { - /* Only on slaves. */ + assert(PF.me != MASTER); if ( errorMessageLock++ > 0 ) return; VectorClear(stdoutBuffer); VectorClear(logBuffer); @@ -4356,7 +4368,7 @@ void PF_MLock(void) */ void PF_MUnlock(void) { - /* Only on slaves. */ + assert(PF.me != MASTER); if ( --errorMessageLock > 0 ) return; if ( !VectorEmpty(stdoutBuffer) ) { PF_RawSend(MASTER, VectorPtr(stdoutBuffer), VectorSize(stdoutBuffer), PF_STDOUT_MSGTAG); @@ -4502,7 +4514,7 @@ void PF_FlushStdOutBuffer(void) */ static void PF_ReceiveErrorMessage(int src, int tag) { - /* Only on the master. */ + assert(PF.me == MASTER); int size; int ret = PF_RawProbe(&src, &tag, &size); CHECK(ret == 0); @@ -4533,22 +4545,26 @@ static void PF_ReceiveErrorMessage(int src, int tag) * Processes all incoming messages whose tag is PF_STDOUT_MSGTAG * or PF_LOG_MSGTAG. It ensures that the next PF_Receive(src, tag, ...) * will not receive the message with PF_STDOUT_MSGTAG or PF_LOG_MSGTAG. + * This function also handles PF_RUNTIME_ERROR_MSGTAG. * * @param[in,out] src the source process. * @param[in,out] tag the tag value. */ static void PF_CatchErrorMessages(int *src, int *tag) { - /* Only on the master. */ for (;;) { int asrc = *src; int atag = *tag; int ret = PF_RawProbe(&asrc, &atag, NULL); CHECK(ret == 0); if ( atag == PF_STDOUT_MSGTAG || atag == PF_LOG_MSGTAG ) { + assert(PF.me == MASTER); PF_ReceiveErrorMessage(asrc, atag); continue; } + if ( atag == PF_RUNTIME_ERROR_MSGTAG ) { + PF_ReceiveRuntimeError(); + } *src = asrc; *tag = atag; break; @@ -4566,7 +4582,7 @@ static void PF_CatchErrorMessages(int *src, int *tag) */ static void PF_CatchErrorMessagesForAll(void) { - /* Only on the master. */ + assert(PF.me == MASTER); int i; for ( i = 1; i < PF.numtasks; i++ ) { int src = i; @@ -4583,6 +4599,7 @@ static void PF_CatchErrorMessagesForAll(void) /** * Same as PF_Probe() except processing incoming messages with PF_STDOUT_MSGTAG * and PF_LOG_MSGTAG. + * This function also handles PF_RUNTIME_ERROR_MSGTAG. * * @param[in,out] src the source process. The output value is that of the actual found message. * @return the tag value of the next incoming message if found, @@ -4595,10 +4612,14 @@ static int PF_ProbeWithCatchingErrorMessages(int *src) int newsrc = *src; int tag = PF_Probe(&newsrc); if ( tag == PF_STDOUT_MSGTAG || tag == PF_LOG_MSGTAG ) { + assert(PF.me == MASTER); PF_ReceiveErrorMessage(newsrc, tag); continue; } - if ( tag > 0 ) *src = newsrc; + if ( tag == PF_RUNTIME_ERROR_MSGTAG ) { + PF_ReceiveRuntimeError(); + } + *src = newsrc; return tag; } } @@ -4622,4 +4643,200 @@ void PF_FreeErrorMessageBuffers(void) /* #] PF_FreeErrorMessageBuffers : #] Synchronised output : + #[ Handling runtime errors : + #[ PF_RaiseRuntimeError : +*/ + +/** + * Sends a runtime error message to the master if called on a slave, + * or broadcasts it to the slaves if called on the master. + * Called via PF_PreTerminate() when Terminate() is called with a negative value. + */ +static void PF_RaiseRuntimeError(void) +{ + if ( PF.me == MASTER ) { + PF_BroadcastRuntimeError(); + } + else { + int ret, dummy; + ret = PF_RawSend(MASTER, &dummy, 0, PF_RUNTIME_ERROR_MSGTAG); + CHECK(ret == 0); + int src = MASTER; + int tag = PF_RUNTIME_ERROR_MSGTAG; + ret = PF_RawRecv(&src, &dummy, 0, &tag); + CHECK(ret == 0); + } +} + +/* + #] PF_RaiseRuntimeError : + #[ PF_BroadcastRuntimeError : +*/ + +/** + * Broadcasts a runtime error message from the master to all slaves + * and collects one reply from each slave. + */ +static void PF_BroadcastRuntimeError(void) +{ + assert(PF.me == MASTER); + + int ret, dummy; + MPI_Request requests[PF.numtasks - 1]; + + /* + * Notify all slaves of program termination by sending PF_RUNTIME_ERROR_MSGTAG. + * This must be non-blocking to avoid deadlock if some slaves have already + * performed a blocking send. + */ + for ( int i = 1; i < PF.numtasks; i++ ) { + ret = PF_RawIsend(i, &dummy, 0, PF_BYTE, PF_RUNTIME_ERROR_MSGTAG, &requests[i - 1]); + CHECK(ret == 0); + } + + /* + * Receive exactly one PF_RUNTIME_SYNC_MSGTAG or PF_RUNTIME_ERROR_MSGTAG + * message from each slave. + */ + for ( int i = 1; i < PF.numtasks; i++ ) { +retry: + int asrc = PF_ANY_SOURCE; // blocking probe + int tag = PF_Probe(&asrc); + CHECK(tag >= 0); + assert(1 <= asrc && asrc < PF.numtasks); + switch ( tag ) { + case PF_STDOUT_MSGTAG: + case PF_LOG_MSGTAG: + PF_ReceiveErrorMessage(asrc, tag); + goto retry; + case PF_RUNTIME_ERROR_MSGTAG: + case PF_RUNTIME_SYNC_MSGTAG: + ret = PF_RawRecv(&asrc, &dummy, 0, &tag); + CHECK(ret == 0); + break; + default: + ret = PF_Discard(&asrc, &tag); + CHECK(ret == 0); + goto retry; + } + } + + ret = PF_RawWaitAll(PF.numtasks - 1, requests, MPI_STATUSES_IGNORE); + CHECK(ret == 0); +} + +/* + #] PF_BroadcastRuntimeError : + #[ PF_PostEndSortBarrier : +*/ + +/** + * Synchronization after EndSort(). + * Ensures that all processes have completed without runtime errors. + */ +void PF_PostEndSortBarrier(void) +{ + assert(PF_processing); + PF_processing = 0; + + int ret, dummy; + + /* + * Each slave reports either PF_RUNTIME_SYNC_MSGTAG (completed without errors) + * or PF_RUNTIME_ERROR_MSGTAG (see PF_RaiseRuntimeError()) + * to the master. After collecting one message from each slave, the master sends + * PF_RUNTIME_SYNC_MSGTAG to all slaves on success, + * or PF_RUNTIME_ERROR_MSGTAG otherwise. + * + * This matches the behaviour of PF_RaiseRuntimeError() and + * PF_ReceiveRuntimeError(). In all cases, each slave sends exactly one + * PF_RUNTIME_SYNC_MSGTAG or PF_RUNTIME_ERROR_MSGTAG message to the master, + * and the master sends exactly one such message to each slave. + */ + if ( PF.me == MASTER ) { + int error = 0; + for ( int i = 1; i < PF.numtasks; i++ ) { +retry: + int asrc = PF_ANY_SOURCE; // blocking probe + int tag = PF_Probe(&asrc); + CHECK(tag >= 0); + assert(1 <= asrc && asrc < PF.numtasks); + switch ( tag ) { + case PF_STDOUT_MSGTAG: + case PF_LOG_MSGTAG: + PF_ReceiveErrorMessage(asrc, tag); + goto retry; + case PF_RUNTIME_ERROR_MSGTAG: + error = 1; + ret = PF_RawRecv(&asrc, &dummy, 0, &tag); + CHECK(ret == 0); + break; + case PF_RUNTIME_SYNC_MSGTAG: + ret = PF_RawRecv(&asrc, &dummy, 0, &tag); + CHECK(ret == 0); + break; + default: + MesPrint("!!!Unexpected MPI message src=%d tag=%d.", asrc, tag); + ret = PF_Discard(&asrc, &tag); + CHECK(ret == 0); + goto retry; + } + } + for ( int i = 1; i < PF.numtasks; i++ ) { + ret = PF_RawSend(i, &dummy, 0, error ? PF_RUNTIME_ERROR_MSGTAG : PF_RUNTIME_SYNC_MSGTAG); + CHECK(ret == 0); + } + } + else { + int tag; + ret = PF_RawSend(MASTER, &dummy, 0, PF_RUNTIME_SYNC_MSGTAG); + CHECK(ret == 0); + int src = MASTER; + ret = PF_RawRecv(&src, &dummy, 0, &tag); + CHECK(ret == 0); + switch ( tag ) { + case PF_RUNTIME_SYNC_MSGTAG: + break; + case PF_RUNTIME_ERROR_MSGTAG: + PF.notMyFault = 1; + Terminate(-1); + break; + default: + MesPrint("!!!Unexpected MPI message src=%d tag=%d.", src, tag); + break; + } + } +} + +/* + #] PF_PostEndSortBarrier : + #[ PF_ReceiveRuntimeError : +*/ + +/** + * Handles a runtime error message received from another process. + * It ultimately calls Terminate(-1). + */ +void PF_ReceiveRuntimeError(void) +{ + PF_processing = 0; + PF.notMyFault = 1; + if ( PF.me == MASTER ) { + PF_BroadcastRuntimeError(); + } + else { + int ret, dummy; + int src = MASTER; + int tag = PF_RUNTIME_ERROR_MSGTAG; + ret = PF_RawRecv(&src, &dummy, 0, &tag); + CHECK(ret == 0); + ret = PF_RawSend(MASTER, &dummy, 0, PF_RUNTIME_ERROR_MSGTAG); + CHECK(ret == 0); + } + Terminate(-1); +} + +/* + #] PF_ReceiveRuntimeError : + #] Handling runtime errors : */ diff --git a/sources/parallel.h b/sources/parallel.h index dbd8b96f..f60564f8 100644 --- a/sources/parallel.h +++ b/sources/parallel.h @@ -36,11 +36,21 @@ /* #[ macros & definitions : */ + +/* + * Rank of the master process. + */ #define MASTER 0 -#define PF_RESET 0 -#define PF_TIME 1 +/* + * Selector constants for PF_RealTime(). + */ +#define PF_RESET 0 /* reset the timer */ +#define PF_TIME 1 /* get the elapsed time */ +/* + * Message tags for communication during parallel execution. + */ #define PF_TERM_MSGTAG 10 /* master -> slave: sending terms */ #define PF_ENDSORT_MSGTAG 11 /* master -> slave: no more terms to be distributed, slave -> master: after EndSort() */ #define PF_DOLLAR_MSGTAG 12 /* slave -> master: sending $-variables */ @@ -54,6 +64,8 @@ #define PF_OPT_MCTS_MSGTAG 70 /* master <-> slave: optimization */ #define PF_OPT_HORNER_MSGTAG 71 /* master <-> slave: optimization */ #define PF_OPT_COLLECT_MSGTAG 72 /* slave -> master: optimization */ +#define PF_RUNTIME_ERROR_MSGTAG 80 /* slave <-> master: runtime error */ +#define PF_RUNTIME_SYNC_MSGTAG 81 /* master <-> slave: sync after EndSort() */ #define PF_MISC_MSGTAG 100 /* @@ -179,6 +191,7 @@ typedef struct ParallelVars { int exprbufsize; /* buffer size in WORDs to be used for transferring expressions */ int exprtodo; /* >= 0: the expression to do in InParallel, -1: otherwise */ int log; /* flag for logging mode */ + int notMyFault; /* flag for termination due to another process's runtime error */ WORD numsbufs; /* number of cyclic send buffers (PF.sbuf->numbufs) */ WORD numrbufs; /* number of cyclic receive buffers (PF.rbufs[i]->numbufs, i=1,...numtasks-1) */ } PARALLELVARS; @@ -197,6 +210,7 @@ extern LONG PF_maxDollarChunkSize; /* mpi.c */ extern int PF_ISendSbuf(int,int); extern int PF_Bcast(void *buffer, int count); +extern int PF_Reduce(const void *sendbuf, void *recvbuf, int count, MPI_Datatype type, MPI_Op op, int root); extern int PF_RawSend(int,void *,LONG,int); extern LONG PF_RawRecv(int *,void *,LONG,int *); @@ -237,6 +251,7 @@ extern int PF_EndSort(void); extern WORD PF_Deferred(WORD *,WORD); extern int PF_Processor(EXPRESSIONS,WORD,WORD); extern int PF_Init(int*,char ***); +extern void PF_PreTerminate(int errorcode); extern int PF_Terminate(int); extern LONG PF_GetSlaveTimes(void); extern LONG PF_BroadcastNumber(LONG); @@ -259,6 +274,7 @@ extern void PF_MLock(void); extern void PF_MUnlock(void); extern LONG PF_WriteFileToFile(int,UBYTE *,LONG); extern void PF_FlushStdOutBuffer(void); +extern void PF_ReceiveRuntimeError(void) NORETURN; /* #] Function prototypes : diff --git a/sources/setfile.c b/sources/setfile.c index 3768a973..33bd3dbe 100644 --- a/sources/setfile.c +++ b/sources/setfile.c @@ -553,7 +553,7 @@ int AllocSetups(void) sp = GetSetupPar((UBYTE *)"threadsortfilesynch"); AC.ThreadSortFileSynch = AM.gThreadSortFileSynch = AM.ggThreadSortFileSynch = sp->value; /* - The size for shared memory window for oneside MPI2 communications + The size for shared memory window for oneside MPI2 communications (unused) */ sp = GetSetupPar((UBYTE *)"shmwinsize"); AM.shmWinSize = sp->value/sizeof(WORD); diff --git a/sources/startup.c b/sources/startup.c index 4f7adcde..ffa414aa 100644 --- a/sources/startup.c +++ b/sources/startup.c @@ -1921,7 +1921,11 @@ static int firstterminate = 1; void TerminateImpl(int errorcode, const char* file, int line, const char* function) { +#ifdef WITHMPI + if ( errorcode && firstterminate && !PF.notMyFault ) { +#else if ( errorcode && firstterminate ) { +#endif firstterminate = 0; MLOCK(ErrorMessageLock); @@ -2047,6 +2051,9 @@ backtrace_fallback: ; MUNLOCK(ErrorMessageLock); +#ifdef WITHMPI + PF_PreTerminate(errorcode); +#endif Crash(); } #ifdef TRAPSIGNALS