diff --git a/check/features.frm b/check/features.frm
index c7bd4488..aafc5384 100644
--- a/check/features.frm
+++ b/check/features.frm
@@ -307,9 +307,6 @@ assert succeeded?
 # assert anything. We only test whether this hits the test suite timeout limit,
 # or not. TODO in principle this can be considered to be a bug, to be looked at
 # in the future.
-# For parform, there is no attempt to ensure the master process recieves SIGALRM
-# and not the workers.
-#pend_if mpi?
 *--#] TimeoutAfter_2 :
 *--#[ dedup :
 * Test deduplication
@@ -2350,7 +2347,6 @@ Local F = mzv_(2,1);
 Evaluate mzv_;
 .end
 #pend_if wordsize == 2
-#pend_if mpi?
 assert runtime_error?("Error: Weight of Euler/MZV sum greater than 2.")
 assert runtime_error?("Please increase the maximum weight in #startfloat.")
 *--#] mzv_error_1 :
@@ -2360,7 +2356,6 @@ Local F = mzv_(1,2);
 Evaluate mzv_;
 .end
 #pend_if wordsize == 2
-#pend_if mpi?
 assert runtime_error?("Divergent MZV in CalculateMZV")
 *--#] mzv_error_2 :
 *--#[ mzv_error_3: 
@@ -2369,7 +2364,6 @@ Local F = mzv_(-2,1);
 Evaluate mzv_;
 .end
 #pend_if wordsize == 2
-#pend_if mpi?
 assert runtime_error?("Illegal index[0] in CalculateMZV: -2")
 *--#] mzv_error_3 :
 *--#[ mzv_error_4: 
@@ -2378,7 +2372,6 @@ Local F = mzvhalf_(2,-1);
 Evaluate mzvhalf_;
 .end
 #pend_if wordsize == 2
-#pend_if mpi?
 assert runtime_error?("Illegal index[1] in CalculateMZVhalf: -1")
 *--#] mzv_error_4 :
 *--#[ mzv_error_5: 
@@ -2387,7 +2380,6 @@ Local F = euler_(1,-2);
 Evaluate euler_;
 .end
 #pend_if wordsize == 2
-#pend_if mpi?
 assert runtime_error?("Divergent Euler sum in CalculateEuler")
 *--#] mzv_error_5 :
 *--#[ humanstats :
@@ -3227,7 +3219,6 @@ Model PHI3;
 	Vertex phi,phi,phi:1;
 EndModel;
 .end
-#pend_if mpi?
 assert runtime_error?('Invalid coupling constant in vertex statement.')
 *--#] diagrams_err_1 :
 *--#[ diagrams_err_2 :
@@ -3236,7 +3227,6 @@ Model PHI3;
 	Vertex phi,phi,phi:g^-1;
 EndModel;
 .end
-#pend_if mpi?
 assert runtime_error?('Invalid negative power of coupling constant.')
 *--#] diagrams_err_2 :
 *--#[ diagrams_err_3 :
@@ -3247,7 +3237,6 @@ Model PHI3;
 EndModel;
 Local test = diagrams_(PHI3,{phi},{phi},{},{p1,p2},1,0);
 .end
-#pend_if mpi?
 assert runtime_error?('Insufficient external momenta in diagrams_')
 *--#] diagrams_err_3 :
 *--#[ diagrams_err_4 :
@@ -3258,7 +3247,6 @@ Model PHI3;
 EndModel;
 Local test = diagrams_(PHI3,{phi},{phi},{q1,q2},{},1,0);
 .end
-#pend_if mpi?
 assert runtime_error?('Insufficient internal momenta in diagrams_')
 *--#] diagrams_err_4 :
 *--#[ diagrams_err_5 :
@@ -3269,7 +3257,6 @@ Model PHI3;
 EndModel;
 Local test = diagrams_(PHI3,{phi},{phi},{q1,-q2},{p1,p2},1,0);
 .end
-#pend_if mpi?
 assert runtime_error?('Invalid negative external momentum in diagrams_: -q2')
 *--#] diagrams_err_5 :
 *--#[ diagrams_err_6 :
@@ -3280,7 +3267,6 @@ Model PHI3;
 EndModel;
 Local test = diagrams_(PHI3,{phi},{phi},{q1,q2},{-p1,p2},1,0);
 .end
-#pend_if mpi?
 assert runtime_error?('Invalid negative internal momentum in diagrams_: -p1')
 *--#] diagrams_err_6 :
 *--#[ diagrams_err_7 :
@@ -3291,7 +3277,6 @@ Model PHI3;
 EndModel;
 Local test = diagrams_(PHI3,{phi},{phi},{q1,q1},{p1,p2},1,0);
 .end
-#pend_if mpi?
 assert runtime_error?('Invalid repeated momentum in diagrams_: q1')
 *--#] diagrams_err_7 :
 *--#[ diagrams_err_8 :
@@ -3302,6 +3287,5 @@ Model PHI3;
 EndModel;
 Local test = diagrams_(PHI3,{phi},{phi},{q1,q2},{q1,p2},1,0);
 .end
-#pend_if mpi?
 assert runtime_error?('Invalid repeated momentum in diagrams_: q1')
 *--#] diagrams_err_8 :
diff --git a/check/fixes.frm b/check/fixes.frm
index e3a22cd5..d7a2e655 100644
--- a/check/fixes.frm
+++ b/check/fixes.frm
@@ -389,8 +389,6 @@ PolyRatFun rat;
 L F = rat(a.a,1);
 P;
 .end
-# Runtime errors may freeze ParFORM.
-#pend_if mpi?
 assert runtime_error?
 *--#] Issue39 : 
 *--#[ Issue41 :
@@ -1004,7 +1002,6 @@ L F6 = f(1000*g5_);
 L F7 = f(10000*g5_);
 .end
 # Runtime errors may freeze ParFORM.
-#pend_if mpi?
 assert runtime_error?
 *--#] Issue94 : 
 *--#[ Issue95 :
@@ -1809,8 +1806,6 @@ L F = f(<x1+x2+x3+x4>,...,<x6+x7+x8+x9>);
 transform f,mulargs(1,last);  * silent crash
 P;
 .end
-# Runtime errors may freeze ParFORM.
-#pend_if mpi?
 assert runtime_error?("Term too complex during normalization")
 *--#] Issue183 :
 *--#[ Issue185 :
@@ -2144,8 +2139,6 @@ Print;
 # For now it fails because
 #   "Currently Stage 4 sorts are not allowed for function arguments or $ variables."
 assert runtime_error?
-# Runtime errors may freeze ParFORM.
-#pend_if mpi?
 #assert succeeded?
 #assert result("test1") =~ expr("0")
 #assert result("test2") =~ expr("g(0)")
@@ -2385,16 +2378,12 @@ assert result("F40") =~ expr("0")
 L F11 = div_(1,0);
 P;
 .end
-# Runtime errors may freeze ParFORM.
-#pend_if mpi?
 assert runtime_error?
 *--#] Issue261_2 : 
 *--#[ Issue261_3 :
 L F23 = rem_(0,0);
 P;
 .end
-# Runtime errors may freeze ParFORM.
-#pend_if mpi?
 assert runtime_error?
 *--#] Issue261_3 : 
 *--#[ Issue261_4 :
@@ -2404,8 +2393,6 @@ S x;
 L F34 = inverse_($x,$z);
 P;
 .end
-# Runtime errors may freeze ParFORM.
-#pend_if mpi?
 assert runtime_error?
 *--#] Issue261_4 : 
 *--#[ Issue261_5 :
@@ -2413,8 +2400,6 @@ assert runtime_error?
 L F16 = div_($z,$z);
 P;
 .end
-# Runtime errors may freeze ParFORM.
-#pend_if mpi?
 assert runtime_error?
 *--#] Issue261_5 : 
 *--#[ Issue261_6 :
@@ -2423,8 +2408,6 @@ S x;
 L F27 = rem_($x,0);
 P;
 .end
-# Runtime errors may freeze ParFORM.
-#pend_if mpi?
 assert runtime_error?
 *--#] Issue261_6 : 
 *--#[ Issue261_7 :
@@ -2432,8 +2415,6 @@ assert runtime_error?
 L F39 = inverse_(1,$z);
 P;
 .end
-# Runtime errors may freeze ParFORM.
-#pend_if mpi?
 assert runtime_error?
 *--#] Issue261_7 : 
 *--#[ Issue267 :
@@ -3068,7 +3049,7 @@ Identify f(x?) = prf(n-x,n+x);
 .end
 # Fails due to polynomial size on 32bit builds
 #require wordsize >= 4
-# Runtime errors may freeze ParFORM.
+# This set of buffer sizes does not cause the error in ParFORM.
 #pend_if mpi?
 assert runtime_error?("Please increase SmallExtension setup parameter.")
 *--#] Issue512_1 :
@@ -3097,8 +3078,6 @@ EndTerm;
 .end
 # Fails due to polynomial size on 32bit builds
 #require wordsize >= 4
-# Runtime errors may freeze ParFORM.
-#pend_if mpi?
 assert runtime_error?("Please increase SubSmallExtension setup parameter.")
 *--#] Issue512_2 :
 *--#[ Issue512_3 :
@@ -3709,8 +3688,6 @@ Vector v;
 PolyRatFun rat;
 Local F = rat(v,1);
 .end
-# Runtime errors may freeze ParFORM.
-#pend_if mpi?
 assert runtime_error?("ERROR: polynomials and polyratfuns must contain symbols only")
 *--#] Issue567_1 :
 *--#[ Issue567_2 :
@@ -3719,8 +3696,6 @@ Index i;
 PolyRatFun rat;
 Local F = rat(i,1);
 .end
-# Runtime errors may freeze ParFORM.
-#pend_if mpi?
 assert runtime_error?("ERROR: polynomials and polyratfuns must contain symbols only")
 *--#] Issue567_2 :
 *--#[ Issue567_3a :
@@ -3729,8 +3704,6 @@ Function f;
 PolyRatFun rat;
 Local F = rat(f,1);
 .end
-# Runtime errors may freeze ParFORM.
-#pend_if mpi?
 assert runtime_error?("ERROR: polynomials and polyratfuns must contain symbols only")
 *--#] Issue567_3a :
 *--#[ Issue567_3b :
@@ -3739,8 +3712,6 @@ CFunction f;
 PolyRatFun rat;
 Local F = rat(f,1);
 .end
-# Runtime errors may freeze ParFORM.
-#pend_if mpi?
 assert runtime_error?("ERROR: polynomials and polyratfuns must contain symbols only")
 *--#] Issue567_3b :
 *--#[ Issue567_3c :
@@ -3749,8 +3720,6 @@ Table f(1);
 PolyRatFun rat;
 Local F = rat(f,1);
 .end
-# Runtime errors may freeze ParFORM.
-#pend_if mpi?
 assert runtime_error?("ERROR: polynomials and polyratfuns must contain symbols only")
 *--#] Issue567_3c :
 *--#[ Issue567_3d :
@@ -3759,8 +3728,6 @@ CTable f(1);
 PolyRatFun rat;
 Local F = rat(f,1);
 .end
-# Runtime errors may freeze ParFORM.
-#pend_if mpi?
 assert runtime_error?("ERROR: polynomials and polyratfuns must contain symbols only")
 *--#] Issue567_3d :
 *--#[ Issue567_3e :
@@ -3769,8 +3736,6 @@ Tensor f;
 PolyRatFun rat;
 Local F = rat(f,1);
 .end
-# Runtime errors may freeze ParFORM.
-#pend_if mpi?
 assert runtime_error?("ERROR: polynomials and polyratfuns must contain symbols only")
 *--#] Issue567_3e :
 *--#[ Issue567_3f :
@@ -3779,8 +3744,6 @@ CTensor f;
 PolyRatFun rat;
 Local F = rat(f,1);
 .end
-# Runtime errors may freeze ParFORM.
-#pend_if mpi?
 assert runtime_error?("ERROR: polynomials and polyratfuns must contain symbols only")
 *--#] Issue567_3f :
 *--#[ Issue577_1 :
@@ -3829,7 +3792,7 @@ Multiply 2;
 print;
 .end
 # ParFORM has valgrind errors with this. See discussion in PR 586.
-#pend_if mpi?
+#pend_if valgrind? && mpi?
 assert succeeded?
 assert result("test2") =~ expr("4")
 assert result("test3") =~ expr("6")
@@ -3858,7 +3821,7 @@ Local test3 = 3;
 #endif
 .end
 # ParFORM has valgrind errors with this. See discussion in PR 586.
-#pend_if mpi?
+#pend_if valgrind? && mpi?
 assert runtime_error?("isnumerical: expression is not yet defined!")
 *--#] Issue577_2 :
 *--#[ Issue599 :
@@ -4165,7 +4128,6 @@ ModuleOption inparallel;
 
 Print;
 .end
-#pend_if mpi?
 assert succeeded?
 assert result("diff1") =~ expr("0")
 assert result("diff2") =~ expr("0")
@@ -4474,7 +4436,7 @@ Local expr = 1;
 .sort
 $a = 1;
 .end
-#require threaded?
+#require threaded? || mpi?
 assert warning?("This module is forced to run in sequential mode due to $-variable: $a")
 *--#] PullReq649_1 :
 *--#[ PullReq649_2 :
@@ -4483,7 +4445,7 @@ Local expr = 1;
 .sort
 $n1MdWu6rNU1d29yW3ukhzV7YuY = 1;
 .end
-#require threaded?
+#require threaded? || mpi?
 assert warning?("This module is forced to run in sequential mode due to $-variable: $n1MdWu6rNU1d29yW3ukhzV7YuY")
 *--#] PullReq649_2 :
 *--#[ PullReq649_3 :
@@ -4501,7 +4463,7 @@ Local expr = 1;
 Symbol x;
 id x?$a = x;
 .end
-#require threaded?
+#require threaded? || mpi?
 assert warning?("This module is forced to run in sequential mode due to $-variable: $a")
 *--#] PullReq649_4 :
 *--#[ PullReq649_5 :
@@ -4545,7 +4507,7 @@ Local expr = 1;
 $a = 1;
 moduleoption local $b;
 .end
-#require threaded?
+#require threaded? || mpi?
 assert warning?("This module is forced to run in sequential mode due to $-variable: $a")
 *--#] PullReq649_9 :
 *--#[ PullReq652 :
diff --git a/check/polynomial.frm b/check/polynomial.frm
index e5928dca..c551cac9 100644
--- a/check/polynomial.frm
+++ b/check/polynomial.frm
@@ -580,7 +580,7 @@ Drop;
 #call genpoly(`NPOLYS',`NVARS',`NEGPOW',`MAXPOW',`MAXCOEFF',`NTERMS')
 #call testijaik(gcd,2,`NPOLYS',2,`NPOLYS',2,`NPOLYS')
 .end
-#pend_if wordsize == 2 || mpi?
+#pend_if wordsize == 2
 assert succeeded? || warning?("FORM was not built with FLINT support.")
 assert stdout =~ exact_pattern("gcd: OK")
 *--#] polynomial_gcd_nvar_1 :
@@ -596,7 +596,7 @@ assert stdout =~ exact_pattern("gcd: OK")
 #call genpoly(`NPOLYS',`NVARS',`NEGPOW',`MAXPOW',`MAXCOEFF',`NTERMS')
 #call testijaik(gcd,2,`NPOLYS',2,`NPOLYS',2,`NPOLYS')
 .end
-#pend_if wordsize == 2 || mpi?
+#pend_if wordsize == 2
 assert succeeded? || warning?("FORM was not built with FLINT support.")
 assert stdout =~ exact_pattern("gcd: OK")
 *--#] polynomial_gcd_nvar_2 :
@@ -612,7 +612,7 @@ assert stdout =~ exact_pattern("gcd: OK")
 #call genpoly(`NPOLYS',`NVARS',`NEGPOW',`MAXPOW',`MAXCOEFF',`NTERMS')
 #call testijaik(gcd,2,`NPOLYS',2,`NPOLYS',2,`NPOLYS')
 .end
-#pend_if wordsize == 2 || mpi?
+#pend_if wordsize == 2
 assert succeeded? || warning?("FORM was not built with FLINT support.")
 assert stdout =~ exact_pattern("gcd: OK")
 *--#] polynomial_gcd_nvar_5 :
@@ -628,7 +628,7 @@ assert stdout =~ exact_pattern("gcd: OK")
 #call genpoly(`NPOLYS',`NVARS',`NEGPOW',`MAXPOW',`MAXCOEFF',`NTERMS')
 #call testiaj(mul,2,`NPOLYS',2,`NPOLYS')
 .end
-#pend_if wordsize == 2 || mpi?
+#pend_if wordsize == 2
 assert succeeded? || warning?("FORM was not built with FLINT support.")
 assert stdout =~ exact_pattern("mul: OK")
 *--#] polynomial_mul_nvar_1 :
@@ -644,7 +644,7 @@ assert stdout =~ exact_pattern("mul: OK")
 #call genpoly(`NPOLYS',`NVARS',`NEGPOW',`MAXPOW',`MAXCOEFF',`NTERMS')
 #call testiaj(mul,2,`NPOLYS',2,`NPOLYS')
 .end
-#pend_if wordsize == 2 || mpi?
+#pend_if wordsize == 2
 assert succeeded? || warning?("FORM was not built with FLINT support.")
 assert stdout =~ exact_pattern("mul: OK")
 *--#] polynomial_mul_nvar_2 :
@@ -660,7 +660,7 @@ assert stdout =~ exact_pattern("mul: OK")
 #call genpoly(`NPOLYS',`NVARS',`NEGPOW',`MAXPOW',`MAXCOEFF',`NTERMS')
 #call testiaj(mul,2,`NPOLYS',2,`NPOLYS')
 .end
-#pend_if wordsize == 2 || mpi?
+#pend_if wordsize == 2
 assert succeeded? || warning?("FORM was not built with FLINT support.")
 assert stdout =~ exact_pattern("mul: OK")
 *--#] polynomial_mul_nvar_5 :
@@ -676,7 +676,7 @@ assert stdout =~ exact_pattern("mul: OK")
 #call genpoly(`NPOLYS',`NVARS',`NEGPOW',`MAXPOW',`MAXCOEFF',`NTERMS')
 #call testijaj(div,2,`NPOLYS',2,`NPOLYS')
 .end
-#pend_if wordsize == 2 || mpi?
+#pend_if wordsize == 2
 assert succeeded? || warning?("FORM was not built with FLINT support.")
 assert stdout =~ exact_pattern("div: OK")
 *--#] polynomial_div_nvar_1 :
@@ -692,7 +692,7 @@ assert stdout =~ exact_pattern("div: OK")
 #call genpoly(`NPOLYS',`NVARS',`NEGPOW',`MAXPOW',`MAXCOEFF',`NTERMS')
 #call testijaj(div,2,`NPOLYS',2,`NPOLYS')
 .end
-#pend_if wordsize == 2 || mpi?
+#pend_if wordsize == 2
 assert succeeded? || warning?("FORM was not built with FLINT support.")
 assert stdout =~ exact_pattern("div: OK")
 *--#] polynomial_div_nvar_2 :
@@ -708,7 +708,7 @@ assert stdout =~ exact_pattern("div: OK")
 #call genpoly(`NPOLYS',`NVARS',`NEGPOW',`MAXPOW',`MAXCOEFF',`NTERMS')
 #call testijaj(div,2,`NPOLYS',2,`NPOLYS')
 .end
-#pend_if wordsize == 2 || mpi?
+#pend_if wordsize == 2
 assert succeeded? || warning?("FORM was not built with FLINT support.")
 assert stdout =~ exact_pattern("div: OK")
 *--#] polynomial_div_nvar_5 :
@@ -724,7 +724,7 @@ assert stdout =~ exact_pattern("div: OK")
 #call genpoly(`NPOLYS',`NVARS',`NEGPOW',`MAXPOW',`MAXCOEFF',`NTERMS')
 #call testijak(rem,2,`NPOLYS',2,`NPOLYS',2,`NPOLYS')
 .end
-#pend_if wordsize == 2 || mpi?
+#pend_if wordsize == 2
 assert succeeded? || warning?("FORM was not built with FLINT support.")
 assert stdout =~ exact_pattern("rem: OK")
 *--#] polynomial_rem_nvar_1 :
@@ -740,7 +740,7 @@ assert stdout =~ exact_pattern("rem: OK")
 #call genpoly(`NPOLYS',`NVARS',`NEGPOW',`MAXPOW',`MAXCOEFF',`NTERMS')
 #call testijak(rem,2,`NPOLYS',2,`NPOLYS',2,`NPOLYS')
 .end
-#pend_if wordsize == 2 || mpi?
+#pend_if wordsize == 2
 assert succeeded? || warning?("FORM was not built with FLINT support.")
 assert stdout =~ exact_pattern("rem: OK")
 *--#] polynomial_rem_nvar_2 :
@@ -756,7 +756,7 @@ assert stdout =~ exact_pattern("rem: OK")
 #call genpoly(`NPOLYS',`NVARS',`NEGPOW',`MAXPOW',`MAXCOEFF',`NTERMS')
 #call testijak(rem,2,`NPOLYS',2,`NPOLYS',2,`NPOLYS')
 .end
-#pend_if wordsize == 2 || mpi?
+#pend_if wordsize == 2
 assert succeeded? || warning?("FORM was not built with FLINT support.")
 assert stdout =~ exact_pattern("rem: OK")
 *--#] polynomial_rem_nvar_5 :
@@ -772,7 +772,7 @@ assert stdout =~ exact_pattern("rem: OK")
 #call genpoly(`NPOLYS',`NVARS',`NEGPOW',`MAXPOW',`MAXCOEFF',`NTERMS')
 #call testfactij(2,`NPOLYS',2,`NPOLYS')
 .end
-#pend_if wordsize == 2 || mpi?
+#pend_if wordsize == 2
 # This takes too long when running without FLINT under lcov. Skip if valgrind, and hence lcov.
 #pend_if valgrind?
 # This needs longer if running without flint.
@@ -792,7 +792,7 @@ assert stdout =~ exact_pattern("factarg: OK")
 #call genpoly(`NPOLYS',`NVARS',`NEGPOW',`MAXPOW',`MAXCOEFF',`NTERMS')
 #call testfactij(2,`NPOLYS',2,`NPOLYS')
 .end
-#pend_if wordsize == 2 || mpi?
+#pend_if wordsize == 2
 # This takes too long when running without FLINT under lcov. Skip if valgrind, and hence lcov.
 #pend_if valgrind?
 # This needs longer if running without flint.
@@ -812,7 +812,7 @@ assert stdout =~ exact_pattern("factarg: OK")
 #call genpoly(`NPOLYS',`NVARS',`NEGPOW',`MAXPOW',`MAXCOEFF',`NTERMS')
 #call testfactij(2,`NPOLYS',2,`NPOLYS')
 .end
-#pend_if wordsize == 2 || mpi?
+#pend_if wordsize == 2
 # This takes too long when running without FLINT under lcov. Skip if valgrind, and hence lcov.
 # This one is not valgrind clean for flint < 3.2.1 !
 #pend_if valgrind?
@@ -833,7 +833,7 @@ assert stdout =~ exact_pattern("factarg: OK")
 #call genpoly(`NPOLYS',`NVARS',`NEGPOW',`MAXPOW',`MAXCOEFF',`NTERMS')
 #call testfactdolij(2,`NPOLYS',2,`NPOLYS')
 .end
-#pend_if wordsize == 2 || mpi?
+#pend_if wordsize == 2
 # This takes too long when running without FLINT under lcov. Skip if valgrind, and hence lcov.
 #pend_if valgrind?
 # This needs longer if running without flint.
@@ -853,7 +853,7 @@ assert stdout =~ exact_pattern("factdollar: OK")
 #call genpoly(`NPOLYS',`NVARS',`NEGPOW',`MAXPOW',`MAXCOEFF',`NTERMS')
 #call testfactdolij(2,`NPOLYS',2,`NPOLYS')
 .end
-#pend_if wordsize == 2 || mpi?
+#pend_if wordsize == 2
 # This takes too long when running without FLINT under lcov. Skip if valgrind, and hence lcov.
 #pend_if valgrind?
 # This needs longer if running without flint.
@@ -873,7 +873,7 @@ assert stdout =~ exact_pattern("factdollar: OK")
 #call genpoly(`NPOLYS',`NVARS',`NEGPOW',`MAXPOW',`MAXCOEFF',`NTERMS')
 #call testfactdolij(2,`NPOLYS',2,`NPOLYS')
 .end
-#pend_if wordsize == 2 || mpi?
+#pend_if wordsize == 2
 # This takes too long when running without FLINT under lcov. Skip if valgrind, and hence lcov.
 #pend_if valgrind?
 # This needs longer if running without flint.
@@ -893,7 +893,7 @@ assert stdout =~ exact_pattern("factdollar: OK")
 #call genpoly(`NPOLYS',`NVARS',`NEGPOW',`MAXPOW',`MAXCOEFF',`NTERMS')
 #call testprf(norm,2,`NPOLYS',2,`NPOLYS',2,`NPOLYS',2,`NPOLYS')
 .end
-#pend_if wordsize == 2 || mpi?
+#pend_if wordsize == 2
 assert succeeded? || warning?("FORM was not built with FLINT support.")
 assert stdout =~ exact_pattern("prf norm: OK")
 *--#] polynomial_prf_norm_nvar_1 :
@@ -909,7 +909,7 @@ assert stdout =~ exact_pattern("prf norm: OK")
 #call genpoly(`NPOLYS',`NVARS',`NEGPOW',`MAXPOW',`MAXCOEFF',`NTERMS')
 #call testprf(norm,2,`NPOLYS',2,`NPOLYS',2,`NPOLYS',2,`NPOLYS')
 .end
-#pend_if wordsize == 2 || mpi?
+#pend_if wordsize == 2
 assert succeeded? || warning?("FORM was not built with FLINT support.")
 assert stdout =~ exact_pattern("prf norm: OK")
 *--#] polynomial_prf_norm_nvar_2 :
@@ -925,7 +925,7 @@ assert stdout =~ exact_pattern("prf norm: OK")
 #call genpoly(`NPOLYS',`NVARS',`NEGPOW',`MAXPOW',`MAXCOEFF',`NTERMS')
 #call testprf(norm,2,`NPOLYS',2,`NPOLYS',2,`NPOLYS',2,`NPOLYS')
 .end
-#pend_if wordsize == 2 || mpi?
+#pend_if wordsize == 2
 assert succeeded? || warning?("FORM was not built with FLINT support.")
 assert stdout =~ exact_pattern("prf norm: OK")
 *--#] polynomial_prf_norm_nvar_5 :
@@ -941,7 +941,7 @@ assert stdout =~ exact_pattern("prf norm: OK")
 #call genpoly(`NPOLYS',`NVARS',`NEGPOW',`MAXPOW',`MAXCOEFF',`NTERMS')
 #call testprf(add,2,`NPOLYS',2,`NPOLYS',2,`NPOLYS',2,`NPOLYS')
 .end
-#pend_if wordsize == 2 || mpi?
+#pend_if wordsize == 2
 assert succeeded? || warning?("FORM was not built with FLINT support.")
 assert stdout =~ exact_pattern("prf add: OK")
 *--#] polynomial_prf_add_nvar_1 :
@@ -957,7 +957,7 @@ assert stdout =~ exact_pattern("prf add: OK")
 #call genpoly(`NPOLYS',`NVARS',`NEGPOW',`MAXPOW',`MAXCOEFF',`NTERMS')
 #call testprf(add,2,`NPOLYS',2,`NPOLYS',2,`NPOLYS',2,`NPOLYS')
 .end
-#pend_if wordsize == 2 || mpi?
+#pend_if wordsize == 2
 assert succeeded? || warning?("FORM was not built with FLINT support.")
 assert stdout =~ exact_pattern("prf add: OK")
 *--#] polynomial_prf_add_nvar_2 :
@@ -973,7 +973,7 @@ assert stdout =~ exact_pattern("prf add: OK")
 #call genpoly(`NPOLYS',`NVARS',`NEGPOW',`MAXPOW',`MAXCOEFF',`NTERMS')
 #call testprf(add,2,`NPOLYS',2,`NPOLYS',2,`NPOLYS',2,`NPOLYS')
 .end
-#pend_if wordsize == 2 || mpi?
+#pend_if wordsize == 2
 assert succeeded? || warning?("FORM was not built with FLINT support.")
 assert stdout =~ exact_pattern("prf add: OK")
 *--#] polynomial_prf_add_nvar_5 :
@@ -989,7 +989,7 @@ assert stdout =~ exact_pattern("prf add: OK")
 #call genpoly(`NPOLYS',`NVARS',`NEGPOW',`MAXPOW',`MAXCOEFF',`NTERMS')
 #call testinv(1,`NPOLYS',2,`NPOLYS')
 .end
-#pend_if wordsize == 2 || mpi?
+#pend_if wordsize == 2
 assert succeeded? || warning?("FORM was not built with FLINT support.")
 assert stdout =~ exact_pattern("inv: OK")
 *--#] polynomial_inverse_nvar_1 :
@@ -1031,7 +1031,6 @@ PolyRatFun prf;
 
 Print;
 .end
-#pend_if mpi?
 assert succeeded?
 assert result("test1")  =~ expr("prf(x*y + x*z + 1,w*x)")
 assert result("test2")  =~ expr("prf(x^2 + x + 1,x^2)")
@@ -1052,7 +1051,6 @@ CFunction f,prf;
 PolyRatFun prf;
 Local test = prf(f(1),1);
 .end
-#pend_if mpi?
 assert runtime_error?
 assert stdout =~ exact_pattern("ERROR: polynomials and polyratfuns must contain symbols only")
 *--#] polynomial_error_1 :
@@ -1062,7 +1060,6 @@ PolyRatFun prf;
 * Fast notation version
 Local test = prf(f,1);
 .end
-#pend_if mpi?
 assert runtime_error?
 assert stdout =~ exact_pattern("ERROR: polynomials and polyratfuns must contain symbols only")
 *--#] polynomial_error_2 :
@@ -1070,7 +1067,6 @@ assert stdout =~ exact_pattern("ERROR: polynomials and polyratfuns must contain
 Symbol x;
 Local test = inverse_(x+1,x+1);
 .end
-#pend_if mpi?
 assert runtime_error?
 assert stdout =~ exact_pattern("flint::inverse_poly error: inverse does not exist") || stdout =~ exact_pattern("ERROR: polynomial inverse does not exist")
 *--#] polynomial_error_3 :
@@ -1079,7 +1075,6 @@ CFunction prf;
 PolyRatFun prf;
 Local test = prf;
 .end
-#pend_if mpi?
 assert runtime_error?
 assert stdout =~ exact_pattern("ERROR: PolyRatFun cannot have zero arguments")
 *--#] polynomial_error_4 :
@@ -1089,7 +1084,6 @@ PolyRatFun prf;
 Local test = prf(1,2,3);
 Print;
 .end
-#pend_if mpi?
 assert runtime_error?
 assert stdout =~ exact_pattern("ERROR: PolyRatFun cannot have more than two arguments")
 *--#] polynomial_error_5 :
@@ -1100,7 +1094,6 @@ PolyRatFun prf;
 Local test = prf(x,y,3);
 Print;
 .end
-#pend_if mpi?
 assert runtime_error?
 assert stdout =~ exact_pattern("ERROR: PolyRatFun cannot have more than two arguments")
 *--#] polynomial_error_6 :
@@ -1115,7 +1108,6 @@ PolyRatFun prf;
 Local test = prf((x+1)^`N',(x+2)^`N')*prf((x^`N'-1)^`N',(x^`N'-2)^`N');
 Print;
 .end
-#pend_if mpi?
 assert runtime_error?
 assert stdout =~ exact_pattern("flint::ratfun_normalize: output exceeds MaxTermSize") || stdout =~ exact_pattern("ERROR: PolyRatFun doesn't fit in a term")
 *--#] polynomial_error_7 :
@@ -1128,7 +1120,6 @@ PolyRatFun prf;
 Local test = prf((x+1)^`N',(x+2)^`N')*prf((x^`N'-1)^`N',(x^`N'-2)^`N');
 Print;
 .end
-#pend_if mpi?
 assert runtime_error?
 assert stdout =~ exact_pattern("flint::to_argument_poly: output exceeds MaxTermSize") || stdout =~ exact_pattern("ERROR: PolyRatFun doesn't fit in a term")
 *--#] polynomial_error_8 :
@@ -1142,7 +1133,6 @@ PolyRatFun prf;
 Local test = prf((x+1)^`N',(x+2)^`N')*prf((x^`N'-1)^`N',(x^`N'-2)^`N');
 Print;
 .end
-#pend_if mpi?
 assert runtime_error?
 assert stdout =~ exact_pattern("flint::to_argument_poly: output exceeds MaxTermSize") || stdout =~ exact_pattern("ERROR: PolyRatFun doesn't fit in a term")
 *--#] polynomial_error_9 :
@@ -1156,7 +1146,6 @@ PolyRatFun prf;
 Local test = prf((x+y)^`N',(x+2*y)^`N')*prf((x^`N'-y)^`N',(x^`N'-2*y)^`N');
 Print;
 .end
-#pend_if mpi?
 assert runtime_error?
 assert stdout =~ exact_pattern("flint::ratfun_normalize: output exceeds MaxTermSize") || stdout =~ exact_pattern("ERROR: PolyRatFun doesn't fit in a term")
 *--#] polynomial_error_10 :
@@ -1169,7 +1158,6 @@ PolyRatFun prf;
 Local test = prf((x+y)^`N',(x+2*y)^`N')*prf((x^`N'-y)^`N',(x^`N'-2*y)^`N');
 Print;
 .end
-#pend_if mpi?
 assert runtime_error?
 assert stdout =~ exact_pattern("flint::to_argument_mpoly: output exceeds MaxTermSize") || stdout =~ exact_pattern("ERROR: PolyRatFun doesn't fit in a term")
 *--#] polynomial_error_11 :
@@ -1183,7 +1171,6 @@ PolyRatFun prf;
 Local test = prf((x+y+z)^`N',(x+2*y)^`N')*prf((x^`N'-y-z)^`N',(x^`N'-2*y)^`N');
 Print;
 .end
-#pend_if mpi?
 assert runtime_error?
 assert stdout =~ exact_pattern("flint::to_argument_mpoly: output exceeds MaxTermSize") || stdout =~ exact_pattern("ERROR: PolyRatFun doesn't fit in a term")
 *--#] polynomial_error_12 :
diff --git a/sources/mpi.c b/sources/mpi.c
index 2a8aa573..80cf5310 100644
--- a/sources/mpi.c
+++ b/sources/mpi.c
@@ -36,6 +36,11 @@
   	#[ Includes and variables :
 */
 
+/*
+#define MPIDEBUGGING
+#define MPIDEBUGGING_DELAY_US 10000
+*/
+
 #include <limits.h>
 #include "form3.h"
 
@@ -338,7 +343,14 @@ int PF_RecvWbuf(WORD *b, LONG *s, int *src)
 {
 	int i, r = 0;
 
-	r = MPI_Recv(b,(int)*s,PF_WORD,*src,PF_ANY_MSGTAG,PF_COMM,&PF_status);
+	for (;;) {
+		r = MPI_Probe(*src,PF_ANY_MSGTAG,PF_COMM,&PF_status);
+		if ( r != MPI_SUCCESS ) { if ( r > 0 ) r *= -1; return(r); }
+		if ( PF_status.MPI_TAG != PF_RUNTIME_ERROR_MSGTAG ) break;
+		PF_ReceiveRuntimeError();
+	}
+
+	r = MPI_Recv(b,(int)*s,PF_WORD,PF_status.MPI_SOURCE,PF_status.MPI_TAG,PF_COMM,&PF_status);
 	if ( r != MPI_SUCCESS ) { if ( r > 0 ) r *= -1; return(r); }
 
 	r = MPI_Get_count(&PF_status,PF_WORD,&i);
@@ -446,6 +458,29 @@ int PF_Bcast(void *buffer, int count)
 
 /*
   	#] PF_Bcast : 
+  	#[ PF_Reduce :
+*/
+
+/**
+ * Performs a reduce operation across all processes.
+ *
+ * @param[in]  sendbuf  the buffer containing the data to be reduced.
+ * @param[out] recvbuf  the buffer to store the reduced result (only for the root process).
+ * @param      count    the number of elements in the buffers.
+ * @param      type     the datatype of the elements.
+ * @param      op       the operation to apply.
+ * @param      root     the root process number.
+ * @return              0 if OK, nonzero on error.
+ */
+int PF_Reduce(const void *sendbuf, void *recvbuf, int count, MPI_Datatype type, MPI_Op op, int root)
+{
+	if ( MPI_Reduce(sendbuf,recvbuf,count,type,op,root,PF_COMM) != MPI_SUCCESS )
+		return(-1);
+	return(0);
+}
+
+/*
+  	#] PF_Reduce : 
   	#[ PF_RawSend :
 */
 
@@ -459,7 +494,6 @@ int PF_Bcast(void *buffer, int count)
  * @param  tag   the message tag.
  * @return       0 if OK, nonzero on error.
  */
-
 int PF_RawSend(int dest, void *buf, LONG l, int tag)
 {
 	int ret=MPI_Ssend(buf,(int)l,MPI_BYTE,dest,tag,PF_COMM);
@@ -523,6 +557,85 @@ int PF_RawProbe(int *src, int *tag, int *bytesize)
 
 /*
   	#] PF_RawProbe : 
+  	#[ PF_RawIsend :
+*/
+
+/**
+ * Performs a nonblocking send.
+ *
+ * @param       dest     the destination process number.
+ * @param[in]   buf      the send buffer.
+ * @param       count    the number of elements in the send buffer.
+ * @param       type     the datatype of the data in the send buffer.
+ * @param       tag      the message tag.
+ * @param[out]  request  the request handle for the nonblocking operation.
+ * @return               0 if OK, nonzero on error.
+ */
+int PF_RawIsend(int dest, const void *buf, int count, MPI_Datatype type, int tag, MPI_Request *request)
+{
+	int ret = MPI_Isend(buf, count, type, dest, tag, PF_COMM, request);
+	if ( ret != MPI_SUCCESS ) return(-1);
+	return(0);
+}
+
+/*
+  	#] PF_RawIsend : 
+  	#[ PF_RawWaitAll :
+*/
+
+/**
+ * Waits for all the given requests to complete.
+ *
+ * @param          count    the number of requests.
+ * @param[in,out]  request  the array of request handles.
+ * @param[out]     status   the array of status objects to store the status of each completed request.
+ * @return                  0 if OK, nonzero on error.
+ */
+int PF_RawWaitAll(int count, MPI_Request *request, MPI_Status *status)
+{
+	int ret = MPI_Waitall(count, request, status);
+	if ( ret != MPI_SUCCESS ) return(-1);
+	return(0);
+}
+
+/*
+  	#] PF_RawWaitAll : 
+	#[ PF_Discard :
+*/
+
+/**
+ * Discards an incoming message.
+ *
+ * @param[in,out]  src  the source process number.
+ *                      On output, that of the actual received message.
+ * @param[in,out]  tag  the message tag.
+ *                      On output, that of the actual received message.
+ * @return              0 if OK, nonzero on error.
+ */
+int PF_Discard(int *src, int *tag)
+{
+	enum { DEFAULT_BUF_SIZE = 1024 };
+	MPI_Status stat;
+	int count;
+	void *buf;
+	char default_buf[DEFAULT_BUF_SIZE];
+	int srcval = src != NULL ? *src : PF_ANY_SOURCE;
+	int tagval = tag != NULL ? *tag : PF_ANY_MSGTAG;
+	int ret = MPI_Probe(srcval, tagval, PF_COMM, &stat);
+	if ( ret != MPI_SUCCESS ) return -1;
+	if ( src != NULL ) *src = stat.MPI_SOURCE;
+	if ( tag != NULL ) *tag = stat.MPI_TAG;
+	ret = MPI_Get_count(&stat, MPI_BYTE, &count);
+	if ( ret != MPI_SUCCESS ) return -1;
+	buf = count <= DEFAULT_BUF_SIZE ? default_buf : Malloc1(count, "PF_Discard");
+	ret = MPI_Recv(buf, count, MPI_BYTE, stat.MPI_SOURCE, stat.MPI_TAG, PF_COMM, MPI_STATUS_IGNORE);
+	if ( buf != default_buf ) M_free(buf, "PF_Discard");
+	if ( ret != MPI_SUCCESS ) return -1;
+	return 0;
+}
+
+/*
+  	#] PF_Discard : 
   	#[ The pack buffer :
  		#[ Variables :
 */
@@ -620,7 +733,6 @@ int PF_PrintPackBuf(char *s, int size)
  *
  * @return  0  if OK, nonzero on error.
  */
-
 int PF_PreparePack(void)
 {
 	return PF_InitPackBuf();
@@ -818,7 +930,6 @@ int PF_UnpackString(UBYTE *str)
  * @param  tag  the message tag.
  * @return      0 if OK, nonzero on error.
  */
-
 int PF_Send(int to, int tag)
 {
 	int err;
@@ -1447,7 +1558,6 @@ static inline int PF_longMultiReset(int is_sender)
  *
  * @return  0  if OK, nonzero on error.
  */
-
 int PF_PrepareLongSinglePack(void)
 {
 	return PF_longSingleReset(1);
@@ -1639,7 +1749,6 @@ int PF_LongSingleReceive(int src, int tag, int *psrc, int *ptag)
  *
  * @return  0  if OK, nonzero on error.
  */
-
 int PF_PrepareLongMultiPack(void)
 {
 	return PF_longMultiReset(1);
diff --git a/sources/mpidbg.h b/sources/mpidbg.h
index 924096e6..036bef56 100644
--- a/sources/mpidbg.h
+++ b/sources/mpidbg.h
@@ -42,6 +42,12 @@
 #include <stdio.h>
 #include <string.h>
 #include <mpi.h>
+#if defined(MPIDEBUGGING_DELAY_US) && MPIDEBUGGING_DELAY_US > 0
+#include <unistd.h>  // for usleep()
+#endif
+
+#define MPIDBG_LINESIZE 1024
+#define MPIDBG_BUFSIZE 128
 
 /*
   	#] Includes : 
@@ -60,11 +66,11 @@ static inline int MPIDBG_Get_rank(void) {
 */
 
 static inline void MPIDBG_Out(const char *file, int line, const char *func, const char *fmt, ...) {
-	char buf[1024];  /* Enough. */
+	char buf[MPIDBG_LINESIZE];
 	va_list ap;
 	va_start(ap, fmt);
-	snprintf(buf,1024, "*** [%d] %10s %4d @ %-16s: ", MPIDBG_RANK, file, line, func);
-	vsnprintf(buf + strlen(buf),1024-strlen(buf), fmt, ap);
+	snprintf(buf,MPIDBG_LINESIZE, "*** [%d] %10s %4d @ %-16s: ", MPIDBG_RANK, file, line, func);
+	vsnprintf(buf + strlen(buf),MPIDBG_LINESIZE-strlen(buf), fmt, ap);
 	va_end(ap);
 	/* Assume fprintf with a line will work well even in multi-processes. */
 	fprintf(stderr, "%s\n", buf);
@@ -73,6 +79,18 @@ static inline void MPIDBG_Out(const char *file, int line, const char *func, cons
 
 /*
  		#] MPIDBG_Out : 
+ 		#[ MPIDBG_insert_delay :
+*/
+
+static inline void MPIDBG_insert_delay(void)
+{
+#if defined(MPIDEBUGGING_DELAY_US) && MPIDEBUGGING_DELAY_US > 0
+	usleep(MPIDEBUGGING_DELAY_US);
+#endif
+}
+
+/*
+ 		#] MPIDBG_insert_delay : 
  		#[ MPIDBG_sprint_requests :
 */
 
@@ -139,6 +157,43 @@ static inline void MPIDBG_sprint_statuses(char *buf, int count, const MPI_Reques
 #define MPIDBG_EXTARG const char *file, int line, const char *func
 
 /*
+ 		#[ MPI_Init :
+*/
+
+static inline int MPIDBG_Init(int* argc, char*** argv, MPIDBG_EXTARG)
+{
+	int ret = MPI_Init(argc, argv);
+	if ( ret == MPI_SUCCESS ) {
+		MPIDBG_Out("MPI_Init: OK");
+	}
+	else {
+		MPIDBG_Out("MPI_Init: Failed");
+	}
+	return ret;
+}
+#define MPI_Init(...) MPIDBG_Init(__VA_ARGS__, __FILE__, __LINE__, __func__)
+
+/*
+ 		#] MPI_Init : 
+ 		#[ MPI_Finalize :
+*/
+
+static inline int MPIDBG_Finalize(MPIDBG_EXTARG)
+{
+	MPIDBG_Out("MPI_Finalize");
+	int ret = MPI_Finalize();
+	if ( ret == MPI_SUCCESS ) {
+		MPIDBG_Out("MPI_Finalize: OK");
+	}
+	else {
+		MPIDBG_Out("MPI_Finalize: Failed");
+	}
+	return ret;
+}
+#define MPI_Finalize() MPIDBG_Finalize(__FILE__, __LINE__, __func__)
+
+/*
+ 		#] MPI_Finalize : 
  		#[ MPI_Send :
 */
 
@@ -147,10 +202,10 @@ static inline int MPIDBG_Send(void *buf, int count, MPI_Datatype datatype, int d
 	MPIDBG_Out("MPI_Send: src=%d dest=%d tag=%d count=%d", MPIDBG_RANK, dest, tag, count);
 	int ret = MPI_Send(buf, count, datatype, dest, tag, comm);
 	if ( ret == MPI_SUCCESS ) {
-		MPIDBG_Out("MPI_Send: OK");
+		MPIDBG_Out("MPI_Send: OK src=%d dest=%d tag=%d count=%d", MPIDBG_RANK, dest, tag, count);
 	}
 	else {
-		MPIDBG_Out("MPI_Send: Failed");
+		MPIDBG_Out("MPI_Send: Failed src=%d dest=%d tag=%d count=%d", MPIDBG_RANK, dest, tag, count);
 	}
 	return ret;
 }
@@ -163,6 +218,8 @@ static inline int MPIDBG_Send(void *buf, int count, MPI_Datatype datatype, int d
 
 static inline int MPIDBG_Recv(void* buf, int count, MPI_Datatype datatype, int source, int tag, MPI_Comm comm, MPI_Status *status, MPIDBG_EXTARG)
 {
+	MPI_Status st;
+	if ( status == MPI_STATUS_IGNORE ) status = &st;
 	MPIDBG_Out("MPI_Recv: src=%d dest=%d tag=%d", source, MPIDBG_RANK, tag);
 	int ret = MPI_Recv(buf, count, datatype, source, tag, comm, status);
 	if ( ret == MPI_SUCCESS ) {
@@ -171,7 +228,7 @@ static inline int MPIDBG_Recv(void* buf, int count, MPI_Datatype datatype, int s
 		MPIDBG_Out("MPI_Recv: OK src=%d dest=%d tag=%d count=%d", status->MPI_SOURCE, MPIDBG_RANK, status->MPI_TAG, ret_count);
 	}
 	else {
-		MPIDBG_Out("MPI_Recv: Failed");
+		MPIDBG_Out("MPI_Recv: Failed src=%d dest=%d tag=%d", source, MPIDBG_RANK, tag);
 	}
 	return ret;
 }
@@ -187,10 +244,10 @@ static inline int MPIDBG_Bsend(void* buf, int count, MPI_Datatype datatype, int
 	MPIDBG_Out("MPI_Bsend: src=%d dest=%d tag=%d count=%d", MPIDBG_RANK, dest, tag, count);
 	int ret = MPI_Bsend(buf, count, datatype, dest, tag, comm);
 	if ( ret == MPI_SUCCESS ) {
-		MPIDBG_Out("MPI_Bsend: OK");
+		MPIDBG_Out("MPI_Bsend: OK src=%d dest=%d tag=%d count=%d", MPIDBG_RANK, dest, tag, count);
 	}
 	else {
-		MPIDBG_Out("MPI_Bsend: Failed");
+		MPIDBG_Out("MPI_Bsend: Failed src=%d dest=%d tag=%d count=%d", MPIDBG_RANK, dest, tag, count);
 	}
 	return ret;
 }
@@ -206,10 +263,10 @@ static inline int MPIDBG_Ssend(void* buf, int count, MPI_Datatype datatype, int
 	MPIDBG_Out("MPI_Ssend: src=%d dest=%d tag=%d count=%d", MPIDBG_RANK, dest, tag, count);
 	int ret = MPI_Ssend(buf, count, datatype, dest, tag, comm);
 	if ( ret == MPI_SUCCESS ) {
-		MPIDBG_Out("MPI_Ssend: OK");
+		MPIDBG_Out("MPI_Ssend: OK src=%d dest=%d tag=%d count=%d", MPIDBG_RANK, dest, tag, count);
 	}
 	else {
-		MPIDBG_Out("MPI_Ssend: Failed");
+		MPIDBG_Out("MPI_Ssend: Failed src=%d dest=%d tag=%d count=%d", MPIDBG_RANK, dest, tag, count);
 	}
 	return ret;
 }
@@ -225,10 +282,10 @@ static inline int MPIDBG_Rsend(void* buf, int count, MPI_Datatype datatype, int
 	MPIDBG_Out("MPI_Rsend: src=%d dest=%d tag=%d count=%d", MPIDBG_RANK, dest, tag, count);
 	int ret = MPI_Rsend(buf, count, datatype, dest, tag, comm);
 	if ( ret == MPI_SUCCESS ) {
-		MPIDBG_Out("MPI_Rsend: OK");
+		MPIDBG_Out("MPI_Rsend: OK src=%d dest=%d tag=%d count=%d", MPIDBG_RANK, dest, tag, count);
 	}
 	else {
-		MPIDBG_Out("MPI_Rsend: Failed");
+		MPIDBG_Out("MPI_Rsend: Failed src=%d dest=%d tag=%d count=%d", MPIDBG_RANK, dest, tag, count);
 	}
 	return ret;
 }
@@ -239,15 +296,15 @@ static inline int MPIDBG_Rsend(void* buf, int count, MPI_Datatype datatype, int
  		#[ MPI_Isend :
 */
 
-static inline int MPIDBG_Isend(void* buf, int count, MPI_Datatype datatype, int dest, int tag, MPI_Comm comm, MPI_Request *request, MPIDBG_EXTARG)
+static inline int MPIDBG_Isend(const void* buf, int count, MPI_Datatype datatype, int dest, int tag, MPI_Comm comm, MPI_Request *request, MPIDBG_EXTARG)
 {
 	MPIDBG_Out("MPI_Isend: src=%d dest=%d tag=%d count=%d", MPIDBG_RANK, dest, tag, count);
 	int ret = MPI_Isend(buf, count, datatype, dest, tag, comm, request);
 	if ( ret == MPI_SUCCESS ) {
-		MPIDBG_Out("MPI_Isend: OK");
+		MPIDBG_Out("MPI_Isend: OK src=%d dest=%d tag=%d count=%d", MPIDBG_RANK, dest, tag, count);
 	}
 	else {
-		MPIDBG_Out("MPI_Isend: Failed");
+		MPIDBG_Out("MPI_Isend: Failed src=%d dest=%d tag=%d count=%d", MPIDBG_RANK, dest, tag, count);
 	}
 	return ret;
 }
@@ -263,10 +320,10 @@ static inline int MPIDBG_Ibsend(void* buf, int count, MPI_Datatype datatype, int
 	MPIDBG_Out("MPI_Ibsend: src=%d dest=%d tag=%d count=%d", MPIDBG_RANK, dest, tag, count);
 	int ret = MPI_Ibsend(buf, count, datatype, dest, tag, comm, request);
 	if ( ret == MPI_SUCCESS ) {
-		MPIDBG_Out("MPI_Ibsend: OK");
+		MPIDBG_Out("MPI_Ibsend: OK src=%d dest=%d tag=%d count=%d", MPIDBG_RANK, dest, tag, count);
 	}
 	else {
-		MPIDBG_Out("MPI_Ibsend: Failed");
+		MPIDBG_Out("MPI_Ibsend: Failed src=%d dest=%d tag=%d count=%d", MPIDBG_RANK, dest, tag, count);
 	}
 	return ret;
 }
@@ -282,10 +339,10 @@ static inline int MPIDBG_Issend(void* buf, int count, MPI_Datatype datatype, int
 	MPIDBG_Out("MPI_Issend: src=%d dest=%d tag=%d count=%d", MPIDBG_RANK, dest, tag, count);
 	int ret = MPI_Issend(buf, count, datatype, dest, tag, comm, request);
 	if ( ret == MPI_SUCCESS ) {
-		MPIDBG_Out("MPI_Issend: OK");
+		MPIDBG_Out("MPI_Issend: OK src=%d dest=%d tag=%d count=%d", MPIDBG_RANK, dest, tag, count);
 	}
 	else {
-		MPIDBG_Out("MPI_Issend: Failed");
+		MPIDBG_Out("MPI_Issend: Failed src=%d dest=%d tag=%d count=%d", MPIDBG_RANK, dest, tag, count);
 	}
 	return ret;
 }
@@ -301,10 +358,10 @@ static inline int MPIDBG_Irsend(void* buf, int count, MPI_Datatype datatype, int
 	MPIDBG_Out("MPI_Irsend: src=%d dest=%d tag=%d count=%d", MPIDBG_RANK, dest, tag, count);
 	int ret = MPI_Irsend(buf, count, datatype, dest, tag, comm, request);
 	if ( ret == MPI_SUCCESS ) {
-		MPIDBG_Out("MPI_Irsend: OK");
+		MPIDBG_Out("MPI_Irsend: OK src=%d dest=%d tag=%d count=%d", MPIDBG_RANK, dest, tag, count);
 	}
 	else {
-		MPIDBG_Out("MPI_Irsend: Failed");
+		MPIDBG_Out("MPI_Irsend: Failed src=%d dest=%d tag=%d count=%d", MPIDBG_RANK, dest, tag, count);
 	}
 	return ret;
 }
@@ -320,10 +377,10 @@ static inline int MPIDBG_Irecv(void* buf, int count, MPI_Datatype datatype, int
 	MPIDBG_Out("MPI_Irecv: src=%d dest=%d tag=%d", source, MPIDBG_RANK, tag);
 	int ret = MPI_Irecv(buf, count, datatype, source, tag, comm, request);
 	if ( ret == MPI_SUCCESS ) {
-		MPIDBG_Out("MPI_Irecv: OK dest=%d", MPIDBG_RANK);
+		MPIDBG_Out("MPI_Irecv: OK src=%d dest=%d tag=%d", source, MPIDBG_RANK, tag);
 	}
 	else {
-		MPIDBG_Out("MPI_Irecv: Failed");
+		MPIDBG_Out("MPI_Irecv: Failed src=%d dest=%d tag=%d", source, MPIDBG_RANK, tag);
 	}
 	return ret;
 }
@@ -336,17 +393,19 @@ static inline int MPIDBG_Irecv(void* buf, int count, MPI_Datatype datatype, int
 
 static inline int MPIDBG_Wait(MPI_Request *request, MPI_Status *status, MPIDBG_EXTARG)
 {
-	char buf[256 * 1];  /* Enough. */
+	MPI_Status st;
+	if ( status == MPI_STATUS_IGNORE ) status = &st;
+	char buf1[MPIDBG_BUFSIZE * 1], buf2[MPIDBG_BUFSIZE * 1];
 	MPI_Request old_request = *request;
-	MPIDBG_sprint_requests(buf, 1, request);
-	MPIDBG_Out("MPI_Wait: rank=%d request=%s", MPIDBG_RANK, buf);
+	MPIDBG_sprint_requests(buf1, 1, request);
+	MPIDBG_Out("MPI_Wait: rank=%d request=%s", MPIDBG_RANK, buf1);
 	int ret = MPI_Wait(request, status);
 	if ( ret == MPI_SUCCESS ) {
-		MPIDBG_sprint_statuses(buf, 1, request, &old_request, status);
-		MPIDBG_Out("MPI_Wait: OK rank=%d result=%s", MPIDBG_RANK, buf);
+		MPIDBG_sprint_statuses(buf2, 1, request, &old_request, status);
+		MPIDBG_Out("MPI_Wait: OK rank=%d request=%s result=%s", MPIDBG_RANK, buf1, buf2);
 	}
 	else {
-		MPIDBG_Out("MPI_Wait: Failed");
+		MPIDBG_Out("MPI_Wait: Failed rank=%d request=%s", MPIDBG_RANK, buf1);
 	}
 	return ret;
 }
@@ -359,22 +418,24 @@ static inline int MPIDBG_Wait(MPI_Request *request, MPI_Status *status, MPIDBG_E
 
 static inline int MPIDBG_Test(MPI_Request *request, int *flag, MPI_Status *status, MPIDBG_EXTARG)
 {
-	char buf[256 * 1];  /* Enough. */
+	MPI_Status st;
+	if ( status == MPI_STATUS_IGNORE ) status = &st;
+	char buf1[MPIDBG_BUFSIZE * 1], buf2[MPIDBG_BUFSIZE * 1];
 	MPI_Request old_request = *request;
-	MPIDBG_sprint_requests(buf, 1, request);
-	MPIDBG_Out("MPI_Test: rank=%d request=%s", MPIDBG_RANK, buf);
+	MPIDBG_sprint_requests(buf1, 1, request);
+	MPIDBG_Out("MPI_Test: rank=%d request=%s", MPIDBG_RANK, buf1);
 	int ret = MPI_Test(request, flag, status);
 	if ( ret == MPI_SUCCESS ) {
 		if ( *flag ) {
-			MPIDBG_sprint_statuses(buf, 1, request, &old_request, status);
-			MPIDBG_Out("MPI_Test: OK rank=%d result=%s", MPIDBG_RANK, buf);
+			MPIDBG_sprint_statuses(buf2, 1, request, &old_request, status);
+			MPIDBG_Out("MPI_Test: OK rank=%d request=%s result=%s", MPIDBG_RANK, buf1, buf2);
 		}
 		else {
-			MPIDBG_Out("MPI_Test: OK flag=false");
+			MPIDBG_Out("MPI_Test: OK rank=%d request=%s flag=false", MPIDBG_RANK, buf1);
 		}
 	}
 	else {
-		MPIDBG_Out("MPI_Test: Failed");
+		MPIDBG_Out("MPI_Test: Failed rank=%d request=%s", MPIDBG_RANK, buf1);
 	}
 	return ret;
 }
@@ -387,20 +448,22 @@ static inline int MPIDBG_Test(MPI_Request *request, int *flag, MPI_Status *statu
 
 static inline int MPIDBG_Waitany(int count, MPI_Request *array_of_requests, int *index, MPI_Status *status, MPIDBG_EXTARG)
 {
-	char buf[256];  /* Enough. */
+	MPI_Status st;
+	if ( status == MPI_STATUS_IGNORE ) status = &st;
+	char buf1[MPIDBG_BUFSIZE * 1], buf2[MPIDBG_BUFSIZE * 1];
 	MPI_Request old_requests[count];
 	memcpy(old_requests, array_of_requests, sizeof(MPI_Request) * count);
-	MPIDBG_sprint_requests(buf, count, array_of_requests);
-	MPIDBG_Out("MPI_Waitany: rank=%d request=%s", MPIDBG_RANK, buf);
+	MPIDBG_sprint_requests(buf1, count, array_of_requests);
+	MPIDBG_Out("MPI_Waitany: rank=%d request=%s", MPIDBG_RANK, buf1);
 	int ret = MPI_Waitany(count, array_of_requests, index, status);
 	if ( ret == MPI_SUCCESS ) {
 		MPI_Status statuses[count];
 		statuses[*index] = *status;
-		MPIDBG_sprint_statuses(buf, count, old_requests, array_of_requests, statuses);
-		MPIDBG_Out("MPI_Waitany: OK rank=%d result=%s", MPIDBG_RANK, buf);
+		MPIDBG_sprint_statuses(buf2, count, old_requests, array_of_requests, statuses);
+		MPIDBG_Out("MPI_Waitany: OK rank=%d request=%s result=%s", MPIDBG_RANK, buf1, buf2);
 	}
 	else {
-		MPIDBG_Out("MPI_Waitany: Failed");
+		MPIDBG_Out("MPI_Waitany: Failed rank=%d request=%s", MPIDBG_RANK, buf1);
 	}
 	return ret;
 }
@@ -413,25 +476,27 @@ static inline int MPIDBG_Waitany(int count, MPI_Request *array_of_requests, int
 
 static inline int MPIDBG_Testany(int count, MPI_Request *array_of_requests, int *index, int *flag, MPI_Status *status, MPIDBG_EXTARG)
 {
-	char buf[256];  /* Enough. */
+	MPI_Status st;
+	if ( status == MPI_STATUS_IGNORE ) status = &st;
+	char buf1[MPIDBG_BUFSIZE * 1], buf2[MPIDBG_BUFSIZE * 1];
 	MPI_Request old_requests[count];
 	memcpy(old_requests, array_of_requests, sizeof(MPI_Request) * count);
-	MPIDBG_sprint_requests(buf, count, array_of_requests);
-	MPIDBG_Out("MPI_Testany: rank=%d request=%s", MPIDBG_RANK, buf);
+	MPIDBG_sprint_requests(buf1, count, array_of_requests);
+	MPIDBG_Out("MPI_Testany: rank=%d request=%s", MPIDBG_RANK, buf1);
 	int ret = MPI_Testany(count, array_of_requests, index, flag, status);
 	if ( ret == MPI_SUCCESS ) {
 		if ( *flag ) {
 			MPI_Status statuses[count];
 			statuses[*index] = *status;
-			MPIDBG_sprint_statuses(buf, count, old_requests, array_of_requests, statuses);
-			MPIDBG_Out("MPI_Testany: OK rank=%d result=%s", MPIDBG_RANK, buf);
+			MPIDBG_sprint_statuses(buf2, count, old_requests, array_of_requests, statuses);
+			MPIDBG_Out("MPI_Testany: OK rank=%d request=%s result=%s", MPIDBG_RANK, buf1, buf2);
 		}
 		else {
-			MPIDBG_Out("MPI_Testany: OK flag=false");
+			MPIDBG_Out("MPI_Testany: OK rank=%d request=%s flag=false", MPIDBG_RANK, buf1);
 		}
 	}
 	else {
-		MPIDBG_Out("MPI_Testany: Failed");
+		MPIDBG_Out("MPI_Testany: Failed rank=%d request=%s", MPIDBG_RANK, buf1);
 	}
 	return ret;
 }
@@ -444,18 +509,20 @@ static inline int MPIDBG_Testany(int count, MPI_Request *array_of_requests, int
 
 static inline int MPIDBG_Waitall(int count, MPI_Request *array_of_requests, MPI_Status *array_of_statuses, MPIDBG_EXTARG)
 {
-	char buf[256 * count];  /* Enough. */
+	MPI_Status st[count];
+	if ( array_of_statuses == MPI_STATUSES_IGNORE ) array_of_statuses = st;
+	char buf1[MPIDBG_BUFSIZE * count], buf2[MPIDBG_BUFSIZE * count];
 	MPI_Request old_requests[count];
 	memcpy(old_requests, array_of_requests, sizeof(MPI_Request) * count);
-	MPIDBG_sprint_requests(buf, count, array_of_requests);
-	MPIDBG_Out("MPI_Waitall: rank=%d request=%s", MPIDBG_RANK, buf);
+	MPIDBG_sprint_requests(buf1, count, array_of_requests);
+	MPIDBG_Out("MPI_Waitall: rank=%d request=%s", MPIDBG_RANK, buf1);
 	int ret = MPI_Waitall(count, array_of_requests, array_of_statuses);
 	if ( ret == MPI_SUCCESS ) {
-		MPIDBG_sprint_statuses(buf, count, old_requests, array_of_requests, array_of_statuses);
-		MPIDBG_Out("MPI_Waitall: OK rank=%d result=%s", MPIDBG_RANK, buf);
+		MPIDBG_sprint_statuses(buf2, count, old_requests, array_of_requests, array_of_statuses);
+		MPIDBG_Out("MPI_Waitall: OK rank=%d request=%s result=%s", MPIDBG_RANK, buf1, buf2);
 	}
 	else {
-		MPIDBG_Out("MPI_Waitall: Failed");
+		MPIDBG_Out("MPI_Waitall: Failed rank=%d request=%s", MPIDBG_RANK, buf1);
 	}
 	return ret;
 }
@@ -468,23 +535,25 @@ static inline int MPIDBG_Waitall(int count, MPI_Request *array_of_requests, MPI_
 
 static inline int MPIDBG_Testall(int count, MPI_Request *array_of_requests, int *flag, MPI_Status *array_of_statuses, MPIDBG_EXTARG)
 {
-	char buf[256 * count];  /* Enough. */
+	MPI_Status st[count];
+	if ( array_of_statuses == MPI_STATUSES_IGNORE ) array_of_statuses = st;
+	char buf1[MPIDBG_BUFSIZE * count], buf2[MPIDBG_BUFSIZE * count];
 	MPI_Request old_requests[count];
 	memcpy(old_requests, array_of_requests, sizeof(MPI_Request) * count);
-	MPIDBG_sprint_requests(buf, count, array_of_requests);
-	MPIDBG_Out("MPI_Testall: rank=%d request=%s", MPIDBG_RANK, buf);
+	MPIDBG_sprint_requests(buf1, count, array_of_requests);
+	MPIDBG_Out("MPI_Testall: rank=%d request=%s", MPIDBG_RANK, buf1);
 	int ret = MPI_Testall(count, array_of_requests, flag, array_of_statuses);
 	if ( ret == MPI_SUCCESS ) {
 		if ( *flag ) {
-			MPIDBG_sprint_statuses(buf, count, old_requests, array_of_requests, array_of_statuses);
-			MPIDBG_Out("MPI_Testall: OK rank=%d result=%s", MPIDBG_RANK, buf);
+			MPIDBG_sprint_statuses(buf2, count, old_requests, array_of_requests, array_of_statuses);
+			MPIDBG_Out("MPI_Testall: OK rank=%d request=%s result=%s", MPIDBG_RANK, buf1, buf2);
 		}
 		else {
-			MPIDBG_Out("MPI_Testall: OK flag=false");
+			MPIDBG_Out("MPI_Testall: OK rank=%d request=%s flag=false", MPIDBG_RANK, buf1);
 		}
 	}
 	else {
-		MPIDBG_Out("MPI_Testall: Failed");
+		MPIDBG_Out("MPI_Testall: Failed rank=%d request=%s", MPIDBG_RANK, buf1);
 	}
 	return ret;
 }
@@ -497,18 +566,20 @@ static inline int MPIDBG_Testall(int count, MPI_Request *array_of_requests, int
 
 static inline int MPIDBG_Waitsome(int incount, MPI_Request *array_of_requests, int *outcount, int *array_of_indices, MPI_Status *array_of_statuses, MPIDBG_EXTARG)
 {
-	char buf[256 * incount];  /* Enough. */
+	MPI_Status st[incount];
+	if ( array_of_statuses == MPI_STATUSES_IGNORE ) array_of_statuses = st;
+	char buf1[MPIDBG_BUFSIZE * incount], buf2[MPIDBG_BUFSIZE * incount];
 	MPI_Request old_requests[incount];
 	memcpy(old_requests, array_of_requests, sizeof(MPI_Request) * incount);
-	MPIDBG_sprint_requests(buf, incount, array_of_requests);
-	MPIDBG_Out("MPI_Waitsome: rank=%d request=%s", MPIDBG_RANK, buf);
+	MPIDBG_sprint_requests(buf1, incount, array_of_requests);
+	MPIDBG_Out("MPI_Waitsome: rank=%d request=%s", MPIDBG_RANK, buf1);
 	int ret = MPI_Waitsome(incount, array_of_requests, outcount, array_of_indices, array_of_statuses);
 	if ( ret == MPI_SUCCESS ) {
-		MPIDBG_sprint_statuses(buf, incount, old_requests, array_of_requests, array_of_statuses);
-		MPIDBG_Out("MPI_Waitsome: OK rank=%d result=%s", MPIDBG_RANK, buf);
+		MPIDBG_sprint_statuses(buf2, incount, old_requests, array_of_requests, array_of_statuses);
+		MPIDBG_Out("MPI_Waitsome: OK rank=%d request=%s result=%s", MPIDBG_RANK, buf1, buf2);
 	}
 	else {
-		MPIDBG_Out("MPI_Waitsome: Failed");
+		MPIDBG_Out("MPI_Waitsome: Failed rank=%d request=%s", MPIDBG_RANK, buf1);
 	}
 	return ret;
 }
@@ -521,18 +592,20 @@ static inline int MPIDBG_Waitsome(int incount, MPI_Request *array_of_requests, i
 
 static inline int MPIDBG_Testsome(int incount, MPI_Request *array_of_requests, int *outcount, int *array_of_indices, MPI_Status *array_of_statuses, MPIDBG_EXTARG)
 {
-	char buf[256 * incount];  /* Enough. */
+	MPI_Status st[incount];
+	if ( array_of_statuses == MPI_STATUSES_IGNORE ) array_of_statuses = st;
+	char buf1[MPIDBG_BUFSIZE * incount], buf2[MPIDBG_BUFSIZE * incount];
 	MPI_Request old_requests[incount];
 	memcpy(old_requests, array_of_requests, sizeof(MPI_Request) * incount);
-	MPIDBG_sprint_requests(buf, incount, array_of_requests);
-	MPIDBG_Out("MPI_Testsome: rank=%d request=%s", MPIDBG_RANK, buf);
+	MPIDBG_sprint_requests(buf1, incount, array_of_requests);
+	MPIDBG_Out("MPI_Testsome: rank=%d request=%s", MPIDBG_RANK, buf1);
 	int ret = MPI_Testsome(incount, array_of_requests, outcount, array_of_indices, array_of_statuses);
 	if ( ret == MPI_SUCCESS ) {
-		MPIDBG_sprint_statuses(buf, incount, old_requests, array_of_requests, array_of_statuses);
-		MPIDBG_Out("MPI_Testsome: OK rank=%d result=%s", MPIDBG_RANK, buf);
+		MPIDBG_sprint_statuses(buf2, incount, old_requests, array_of_requests, array_of_statuses);
+		MPIDBG_Out("MPI_Testsome: OK rank=%d request=%s result=%s", MPIDBG_RANK, buf1, buf2);
 	}
 	else {
-		MPIDBG_Out("MPI_Testsome: Failed");
+		MPIDBG_Out("MPI_Testsome: Failed rank=%d request=%s", MPIDBG_RANK, buf1);
 	}
 	return ret;
 }
@@ -545,7 +618,10 @@ static inline int MPIDBG_Testsome(int incount, MPI_Request *array_of_requests, i
 
 static inline int MPIDBG_Iprobe(int source, int tag, MPI_Comm comm, int *flag, MPI_Status *status, MPIDBG_EXTARG)
 {
+	MPI_Status st;
+	if ( status == MPI_STATUS_IGNORE ) status = &st;
 	MPIDBG_Out("MPI_Iprobe: src=%d dest=%d tag=%d", source, MPIDBG_RANK, tag);
+	MPIDBG_insert_delay();
 	int ret = MPI_Iprobe(source, tag, comm, flag, status);
 	if ( ret == MPI_SUCCESS ) {
 		if ( *flag ) {
@@ -554,11 +630,11 @@ static inline int MPIDBG_Iprobe(int source, int tag, MPI_Comm comm, int *flag, M
 			MPIDBG_Out("MPI_Iprobe: OK src=%d dest=%d tag=%d size=%d", status->MPI_SOURCE, MPIDBG_RANK, status->MPI_TAG, ret_size);
 		}
 		else {
-			MPIDBG_Out("MPI_Iprobe: OK flag=false");
+			MPIDBG_Out("MPI_Iprobe: OK src=%d dest=%d tag=%d flag=false", source, MPIDBG_RANK, tag);
 		}
 	}
 	else {
-		MPIDBG_Out("MPI_Iprobe: Failed");
+		MPIDBG_Out("MPI_Iprobe: Failed src=%d dest=%d tag=%d", source, MPIDBG_RANK, tag);
 	}
 	return ret;
 }
@@ -571,6 +647,8 @@ static inline int MPIDBG_Iprobe(int source, int tag, MPI_Comm comm, int *flag, M
 
 static inline int MPIDBG_Probe(int source, int tag, MPI_Comm comm, MPI_Status *status, MPIDBG_EXTARG)
 {
+	MPI_Status st;
+	if ( status == MPI_STATUS_IGNORE ) status = &st;
 	MPIDBG_Out("MPI_Probe: src=%d dest=%d tag=%d", source, MPIDBG_RANK, tag);
 	int ret = MPI_Probe(source, tag, comm, status);
 	if ( ret == MPI_SUCCESS ) {
@@ -579,7 +657,7 @@ static inline int MPIDBG_Probe(int source, int tag, MPI_Comm comm, MPI_Status *s
 		MPIDBG_Out("MPI_Probe: OK src=%d dest=%d tag=%d size=%d", status->MPI_SOURCE, MPIDBG_RANK, status->MPI_TAG, ret_size);
 	}
 	else {
-		MPIDBG_Out("MPI_Probe: Failed");
+		MPIDBG_Out("MPI_Probe: Failed src=%d dest=%d tag=%d", source, MPIDBG_RANK, tag);
 	}
 	return ret;
 }
@@ -592,7 +670,7 @@ static inline int MPIDBG_Probe(int source, int tag, MPI_Comm comm, MPI_Status *s
 
 static inline int MPIDBG_Cancel(MPI_Request *request, MPIDBG_EXTARG)
 {
-	MPIDBG_Out("MPI_Cancel: rank=%d", MPIDBG_RANK);
+	MPIDBG_Out("MPI_Cancel", MPIDBG_RANK);
 	int ret = MPI_Cancel(request);
 	if ( ret == MPI_SUCCESS ) {
 		MPIDBG_Out("MPI_Cancel: OK");
@@ -611,7 +689,7 @@ static inline int MPIDBG_Cancel(MPI_Request *request, MPIDBG_EXTARG)
 
 static inline int MPIDBG_Test_cancelled(MPI_Status *status, int *flag, MPIDBG_EXTARG)
 {
-	MPIDBG_Out("MPI_Test_cancelled: rank=%d", MPIDBG_RANK);
+	MPIDBG_Out("MPI_Test_cancelled", MPIDBG_RANK);
 	int ret = MPI_Test_cancelled(status, flag);
 	if ( ret == MPI_SUCCESS ) {
 		if ( *flag ) {
@@ -635,7 +713,7 @@ static inline int MPIDBG_Test_cancelled(MPI_Status *status, int *flag, MPIDBG_EX
 
 static inline int MPIDBG_Barrier(MPI_Comm comm, MPIDBG_EXTARG)
 {
-	MPIDBG_Out("MPI_Barrier: rank=%d", MPIDBG_RANK);
+	MPIDBG_Out("MPI_Barrier");
 	int ret = MPI_Barrier(comm);
 	if ( ret == MPI_SUCCESS ) {
 		MPIDBG_Out("MPI_Barrier: OK");
@@ -657,10 +735,10 @@ static inline int MPIDBG_Bcast(void* buffer, int count, MPI_Datatype datatype, i
 	MPIDBG_Out("MPI_Bcast: root=%d count=%d", root, count);
 	int ret = MPI_Bcast(buffer, count, datatype, root, comm);
 	if ( ret == MPI_SUCCESS ) {
-		MPIDBG_Out("MPI_Bcast: OK");
+		MPIDBG_Out("MPI_Bcast: OK root=%d count=%d", root, count);
 	}
 	else {
-		MPIDBG_Out("MPI_Bcast: Failed");
+		MPIDBG_Out("MPI_Bcast: Failed root=%d count=%d", root, count);
 	}
 	return ret;
 }
@@ -668,6 +746,25 @@ static inline int MPIDBG_Bcast(void* buffer, int count, MPI_Datatype datatype, i
 
 /*
  		#] MPI_Bcast : 
+ 		#[ MPI_Reduce :
+*/
+
+static inline int MPIDBG_Reduce(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, int root, MPI_Comm comm, MPIDBG_EXTARG)
+{
+	MPIDBG_Out("MPI_Reduce: root=%d count=%d", root, count);
+	int ret = MPI_Reduce(sendbuf, recvbuf, count, datatype, op, root, comm);
+	if ( ret == MPI_SUCCESS ) {
+		MPIDBG_Out("MPI_Reduce: OK root=%d count=%d", root, count);
+	}
+	else {
+		MPIDBG_Out("MPI_Reduce: Failed root=%d count=%d", root, count);
+	}
+	return ret;
+}
+#define MPI_Reduce(...) MPIDBG_Reduce(__VA_ARGS__, __FILE__, __LINE__, __func__)
+
+/*
+ 		#] MPI_Reduce : 
   	#] MPI APIs : 
 */
 
diff --git a/sources/parallel.c b/sources/parallel.c
index 34c1e991..e2fd504b 100644
--- a/sources/parallel.c
+++ b/sources/parallel.c
@@ -40,6 +40,8 @@
 #include "form3.h"
 #include "vector.h"
 
+#include <assert.h>  // must come after form3.h
+
 /*
 #define PF_DEBUG_BCAST_LONG
 #define PF_DEBUG_BCAST_BUF
@@ -63,6 +65,9 @@ int PF_WaitRbuf(PF_BUFFER *,int,LONG *);
 int PF_RawSend(int dest, void *buf, LONG l, int tag);
 LONG PF_RawRecv(int *src,void *buf,LONG thesize,int *tag);
 int PF_RawProbe(int *src, int *tag, int *bytesize);
+int PF_RawIsend(int dest, const void *buf, int count, MPI_Datatype type, int tag, MPI_Request *request);
+int PF_RawWaitAll(int count, MPI_Request *request, MPI_Status *status);
+int PF_Discard(int *src, int *tag);
 
 /* Private functions */
 
@@ -85,13 +90,15 @@ static void PF_CatchErrorMessages(int *src, int *tag);
 static void PF_CatchErrorMessagesForAll(void);
 static int PF_ProbeWithCatchingErrorMessages(int *src);
 
+static void PF_RaiseRuntimeError(void);
+static void PF_BroadcastRuntimeError(void);
+static void PF_PostEndSortBarrier(void);
+
 /* Variables */
 
 PARALLELVARS PF;
-#ifdef MPI2
- WORD *PF_shared_buff;
-#endif
 
+static int      PF_processing; /* Flag indicating that parallel processing of terms is in progress */
 static LONG     PF_goutterms;  /* (master) Total out terms at PF_EndSort(), used in PF_Statistics(). */
 static POSITION PF_exprsize;   /* (master) The size of the expression at PF_EndSort(), used in PF_Processor(). */
 
@@ -277,7 +284,7 @@ typedef struct NoDe {
 static  NODE *PF_root;			/* root of tree of losers */
 static  WORD PF_loser;			/* this is the last loser */
 static  WORD **PF_term;			/* these point to the active terms */
-static  WORD **PF_newcpos;		/* new coeffs of merged terms */
+static  WORD **PF_newcpos;		/* new coefficients of merged terms */
 static  WORD *PF_newclen;		/* length of new coefficients */
 
 /*
@@ -540,6 +547,9 @@ static WORD *PF_PutIn(int src)
 			very first term from this src
 */
 		tag = PF_WaitRbuf(rbuf,a,&size);
+		if ( tag == PF_RUNTIME_ERROR_MSGTAG ) {
+			PF_ReceiveRuntimeError();
+		}
 		rbuf->full[a] += size;
 		if ( tag == PF_ENDBUFFER_MSGTAG ) *rbuf->full[a]++ = 0;
 		else if ( rbuf->numbufs > 1 ) {
@@ -1005,13 +1015,12 @@ static  WORD *PF_CurrentBracket;
  */
 static WORD PF_GetTerm(WORD *term)
 {
+	assert(PF.me != MASTER);
 	GETIDENTITY
 	FILEHANDLE *fi = AC.RhsExprInModuleFlag && PF.rhsInParallel ? &PF.slavebuf : AR.infile;
 	WORD i;
 	WORD *next, *np, *last, *lp = 0, *nextstop, *tp=term;
 
-	/* Only on the slaves. */
-
 	AN.deferskipped = 0;
 	if ( fi->POfill >= fi->POfull || fi->POfull == fi->PObuffer ) {
 ReceiveNew:
@@ -1050,22 +1059,6 @@ static WORD PF_GetTerm(WORD *term)
 		}
 
 		size = fi->POstop - fi->PObuffer - 1;
-#ifdef AbsolutelyExtra
-		PF_Receive(MASTER,PF_ANY_MSGTAG,&src,&tag);
-#ifdef MPI2
-		if ( tag == PF_TERM_MSGTAG ) {
-			PF_Unpack(&size, 1, PF_LONG);
-			if ( PF_Put_target(src) == 0 ) {
-				printf("PF_Put_target error ...\n");
-			}
-		}
-		else {
-			PF_RecvWbuf(fi->PObuffer,&size,&src);
-		}
-#else
-		PF_RecvWbuf(fi->PObuffer,&size,&src);
-#endif
-#endif
 		tag=PF_RecvWbuf(fi->PObuffer,&size,&src);
 
 		fi->POfill = fi->PObuffer;
@@ -1549,15 +1542,6 @@ int PF_Processor(EXPRESSIONS e, WORD i, WORD LastExpression)
 	int k, src, tag;
 	FILEHANDLE *oldoutfile = AR.outfile;
 
-#ifdef MPI2
-	if ( PF_shared_buff == NULL ) {
-		if ( PF_SMWin_Init() == 0 ) {
-			MesPrint("PF_SMWin_Init error");
-			exit(-1);
-		}
-	}
-#endif
-
 	if ( ( (WORD *)(((UBYTE *)(AT.WorkPointer)) + AM.MaxTer ) ) > AT.WorkTop ) {
 		MesWork();
 	}
@@ -1571,6 +1555,8 @@ int PF_Processor(EXPRESSIONS e, WORD i, WORD LastExpression)
 
 	if ( AC.mparallelflag != PARALLELFLAG ) return(0);
 
+	PF_processing = 1;
+
 	if ( PF.me == MASTER ) {
 /*
  		#[ Master:
@@ -1681,13 +1667,8 @@ int PF_Processor(EXPRESSIONS e, WORD i, WORD LastExpression)
 				sb->fill[0] = sb->full[0] = sb->buff[0];
 				sb->active = next;
 
-#ifdef MPI2
-				if ( PF_Put_origin(next) == 0 ) {
-					printf("PF_Put_origin error...\n");
-				}
-#else
 				PF_ISendSbuf(next,PF_TERM_MSGTAG);
-#endif
+
 				/* Initialize the next bucket. */
 				termsinbucket = 0;
 				PACK_LONG(sb->fill[0], AN.ninterms);
@@ -1754,6 +1735,8 @@ int PF_Processor(EXPRESSIONS e, WORD i, WORD LastExpression)
 			#] Clean up & EndSort: 
 			#[ Collect (stats,prepro,...):
 */
+		PF_PostEndSortBarrier();
+
 		DBGOUT_NINTERMS(1, ("PF.me=%d AN.ninterms=%d ENDSORT\n", (int)PF.me, (int)AN.ninterms));
 		PF_CatchErrorMessagesForAll();
 		e->numdummies = 0;
@@ -1835,9 +1818,6 @@ int PF_Processor(EXPRESSIONS e, WORD i, WORD LastExpression)
 		AN.ninterms = 0;
 		PF_linterms = 0;
 		PF.parallel = 1;
-#ifdef MPI2
-		AR.infile->POfull = AR.infile->POfill = AR.infile->PObuffer = PF_shared_buff;
-#endif
 		{
 			FILEHANDLE *fi = AC.RhsExprInModuleFlag && PF.rhsInParallel ? &PF.slavebuf : AR.infile;
 			fi->POfull = fi->POfill = fi->PObuffer;
@@ -1870,8 +1850,8 @@ int PF_Processor(EXPRESSIONS e, WORD i, WORD LastExpression)
 				PolyFunClean(BHEAD term);
 			}
 			if ( Generator(BHEAD term,0) ) {
-				MesPrint("[%d] PF_Processor: Error in Generator",PF.me);
-				LowerSortLevel(); return(-1);
+				LowerSortLevel();
+				Terminate(-1);
 			}
 			PF_linterms += dd; AN.ninterms += dd;
 		}
@@ -1907,6 +1887,8 @@ int PF_Processor(EXPRESSIONS e, WORD i, WORD LastExpression)
 			#] Generator Loop & EndSort : 
 			#[ Collect (stats,prepro...) :
 */
+		PF_PostEndSortBarrier();
+
 		DBGOUT_NINTERMS(1, ("PF.me=%d AN.ninterms=%d PF_linterms=%d ENDSORT\n", (int)PF.me, (int)AN.ninterms, (int)PF_linterms));
 		PF_PrepareLongSinglePack();
 		cpu = TimeCPU(1);
@@ -2047,15 +2029,34 @@ int PF_Init(int *argc, char ***argv)
 }
 /*
  		#] PF_Init : 
- 		#[ PF_Terminate :
+ 		#[ PF_PreTerminate :
+*/
+
+/**
+ * Prepares for termination.
+ * Called by Terminate().
+ *
+ * @param  errorcode  an error code.
+ */
+void PF_PreTerminate(int errorcode)
+{
+	if ( errorcode != 0 && PF_processing ) {
+		PF_processing = 0;
+		PF_RaiseRuntimeError();
+	}
+}
+
+/*
+ 		#] PF_PreTerminate : 
+		#[ PF_Terminate :
 */
 
 /**
  * Performs the finalization of ParFORM.
  * To be called by Terminate().
  *
- * @param  error  an error code.
- * @return        0 if OK, nonzero on error.
+ * @param  errorcode  an error code.
+ * @return            0 if OK, nonzero on error.
  */
 int PF_Terminate(int errorcode)
 {
@@ -2077,7 +2078,8 @@ LONG PF_GetSlaveTimes(void)
 {
 	LONG slavetimes = 0;
 	LONG t = PF.me == MASTER ? 0 : AM.SumTime + TimeCPU(1);
-	MPI_Reduce(&t, &slavetimes, 1, PF_LONG, MPI_SUM, MASTER, PF_COMM);
+	int ret = PF_Reduce(&t, &slavetimes, 1, PF_LONG, MPI_SUM, MASTER);
+	CHECK(ret == 0);
 	return slavetimes;
 }
 
@@ -3639,6 +3641,7 @@ int PF_InParallelProcessor(void)
 			}
 		}
 	}
+	PF_processing = 1;
 	if(PF.me == MASTER){
 		if ( PF.numtasks >= 3 ) {
 			partodoexr = (WORD*)Malloc1(sizeof(WORD)*(PF.numtasks+1),"PF_InParallelProcessor");
@@ -3688,6 +3691,7 @@ int PF_InParallelProcessor(void)
 				Expressions[i].partodo = 0;
 			}
 		}
+		PF_PostEndSortBarrier();
 		return(0);
 	}/*if(PF.me == MASTER)*/
 	/*Slave:*/
@@ -3720,6 +3724,7 @@ int PF_InParallelProcessor(void)
 		}/*if(tag == PF_DATA_MSGTAG)*/
 	}while(tag!=PF_EMPTY_MSGTAG);
 	PF.exprtodo=-1;
+	PF_PostEndSortBarrier();
 	return(0);
 }/*PF_InParallelProcessor*/
 
@@ -3796,7 +3801,8 @@ static int PF_DoOneExpr(void)/*the processor*/
 				}
 
 				position = AS.OldOnFile[i];
-				if ( e->status == HIDDENLEXPRESSION || e->status == HIDDENGEXPRESSION ) {
+				if ( e->status == HIDDENLEXPRESSION || e->status == HIDDENGEXPRESSION
+					|| e->status == UNHIDELEXPRESSION || e->status == UNHIDEGEXPRESSION ) {
 					AR.GetFile = 2; fi = AR.hidefile;
 				}
 				else {
@@ -3976,6 +3982,8 @@ static int PF_Slave2MasterIP(int src)/*both master and slave*/
 	/*partodoexr[src] is the number of expression.*/
 	e = Expressions +partodoexr[src];
 	/*Get metadata:*/
+	i = PF_ANY_MSGTAG;
+	PF_CatchErrorMessages(&src, &i);
 	if (PF_RawRecv(&src, &exprData,sizeof(bufIPstruct_t),&i)!= sizeof(bufIPstruct_t))
 		return(-1);
 	/*Fill in the expression data:*/
@@ -4034,7 +4042,8 @@ static int PF_Master2SlaveIP(int dest, EXPRESSIONS e)
 	}
 	if(PF_RawSend(dest,&exprData,sizeof(bufIPstruct_t),PF_DATA_MSGTAG))
 		return(-1);
-	if ( e->status == HIDDENLEXPRESSION || e->status == HIDDENGEXPRESSION )
+	if ( e->status == HIDDENLEXPRESSION || e->status == HIDDENGEXPRESSION
+		|| e->status == UNHIDELEXPRESSION || e->status == UNHIDEGEXPRESSION )
 		fi = AR.hidefile;
 	else
 		fi = AR.infile;
@@ -4068,18 +4077,21 @@ static int PF_ReadMaster(void)/*reads directly to its scratch!*/
 	LONG ll=0;
 	int l;
 	/*Get metadata:*/
+	tag = PF_ANY_MSGTAG;
+	PF_CatchErrorMessages(&m, &tag);
 	if (PF_RawRecv(&m, &exprData,sizeof(bufIPstruct_t),&tag)!= sizeof(bufIPstruct_t))
 		return(-1);
 
 	if(tag == PF_EMPTY_MSGTAG)/*No data, no job*/
 		return(tag);
 
-	/*data expected, tag must be == PF_DATA_MSTAG!*/
+	/*data expected, tag must be == PF_DATA_MSGTAG!*/
 	PF.exprtodo=exprData.i;
 	e=Expressions + PF.exprtodo;
 	/*Fill in the expression data:*/
 /*	memcpy(e, &(exprData.e), sizeof(struct ExPrEsSiOn)); */
-	if ( e->status == HIDDENLEXPRESSION || e->status == HIDDENGEXPRESSION )
+	if ( e->status == HIDDENLEXPRESSION || e->status == HIDDENGEXPRESSION
+		|| e->status == UNHIDELEXPRESSION || e->status == UNHIDEGEXPRESSION )
 		fi = AR.hidefile;
 	else
 		fi = AR.infile;
@@ -4340,7 +4352,7 @@ static Vector(UBYTE, logBuffer);     /* (slaves) The buffer for AC.LogHandle. */
  */
 void PF_MLock(void)
 {
-	/* Only on slaves. */
+	assert(PF.me != MASTER);
 	if ( errorMessageLock++ > 0 ) return;
 	VectorClear(stdoutBuffer);
 	VectorClear(logBuffer);
@@ -4356,7 +4368,7 @@ void PF_MLock(void)
  */
 void PF_MUnlock(void)
 {
-	/* Only on slaves. */
+	assert(PF.me != MASTER);
 	if ( --errorMessageLock > 0 ) return;
 	if ( !VectorEmpty(stdoutBuffer) ) {
 		PF_RawSend(MASTER, VectorPtr(stdoutBuffer), VectorSize(stdoutBuffer), PF_STDOUT_MSGTAG);
@@ -4502,7 +4514,7 @@ void PF_FlushStdOutBuffer(void)
  */
 static void PF_ReceiveErrorMessage(int src, int tag)
 {
-	/* Only on the master. */
+	assert(PF.me == MASTER);
 	int size;
 	int ret = PF_RawProbe(&src, &tag, &size);
 	CHECK(ret == 0);
@@ -4533,22 +4545,26 @@ static void PF_ReceiveErrorMessage(int src, int tag)
  * Processes all incoming messages whose tag is PF_STDOUT_MSGTAG
  * or PF_LOG_MSGTAG. It ensures that the next PF_Receive(src, tag, ...)
  * will not receive the message with PF_STDOUT_MSGTAG or PF_LOG_MSGTAG.
+ * This function also handles PF_RUNTIME_ERROR_MSGTAG.
  *
  * @param[in,out]  src  the source process.
  * @param[in,out]  tag  the tag value.
  */
 static void PF_CatchErrorMessages(int *src, int *tag)
 {
-	/* Only on the master. */
 	for (;;) {
 		int asrc = *src;
 		int atag = *tag;
 		int ret = PF_RawProbe(&asrc, &atag, NULL);
 		CHECK(ret == 0);
 		if ( atag == PF_STDOUT_MSGTAG || atag == PF_LOG_MSGTAG ) {
+			assert(PF.me == MASTER);
 			PF_ReceiveErrorMessage(asrc, atag);
 			continue;
 		}
+		if ( atag == PF_RUNTIME_ERROR_MSGTAG ) {
+			PF_ReceiveRuntimeError();
+		}
 		*src = asrc;
 		*tag = atag;
 		break;
@@ -4566,7 +4582,7 @@ static void PF_CatchErrorMessages(int *src, int *tag)
  */
 static void PF_CatchErrorMessagesForAll(void)
 {
-	/* Only on the master. */
+	assert(PF.me == MASTER);
 	int i;
 	for ( i = 1; i < PF.numtasks; i++ ) {
 		int src = i;
@@ -4583,6 +4599,7 @@ static void PF_CatchErrorMessagesForAll(void)
 /**
  * Same as PF_Probe() except processing incoming messages with PF_STDOUT_MSGTAG
  * and PF_LOG_MSGTAG.
+ * This function also handles PF_RUNTIME_ERROR_MSGTAG.
  *
  * @param[in,out]  src  the source process. The output value is that of the actual found message.
  * @return              the tag value of the next incoming message if found,
@@ -4595,10 +4612,14 @@ static int PF_ProbeWithCatchingErrorMessages(int *src)
 		int newsrc = *src;
 		int tag = PF_Probe(&newsrc);
 		if ( tag == PF_STDOUT_MSGTAG || tag == PF_LOG_MSGTAG ) {
+			assert(PF.me == MASTER);
 			PF_ReceiveErrorMessage(newsrc, tag);
 			continue;
 		}
-		if ( tag > 0 ) *src = newsrc;
+		if ( tag == PF_RUNTIME_ERROR_MSGTAG ) {
+			PF_ReceiveRuntimeError();
+		}
+		*src = newsrc;
 		return tag;
 	}
 }
@@ -4622,4 +4643,200 @@ void PF_FreeErrorMessageBuffers(void)
 /*
  		#] PF_FreeErrorMessageBuffers : 
   	#] Synchronised output : 
+  	#[ Handling runtime errors :
+ 		#[ PF_RaiseRuntimeError :
+*/
+
+/**
+ * Sends a runtime error message to the master if called on a slave,
+ * or broadcasts it to the slaves if called on the master.
+ * Called via PF_PreTerminate() when Terminate() is called with a negative value.
+ */
+static void PF_RaiseRuntimeError(void)
+{
+	if ( PF.me == MASTER ) {
+		PF_BroadcastRuntimeError();
+	}
+	else {
+		int ret, dummy;
+		ret = PF_RawSend(MASTER, &dummy, 0, PF_RUNTIME_ERROR_MSGTAG);
+		CHECK(ret == 0);
+		int src = MASTER;
+		int tag = PF_RUNTIME_ERROR_MSGTAG;
+		ret = PF_RawRecv(&src, &dummy, 0, &tag);
+		CHECK(ret == 0);
+	}
+}
+
+/*
+ 		#] PF_RaiseRuntimeError : 
+ 		#[ PF_BroadcastRuntimeError :
+*/
+
+/**
+ * Broadcasts a runtime error message from the master to all slaves
+ * and collects one reply from each slave.
+ */
+static void PF_BroadcastRuntimeError(void)
+{
+	assert(PF.me == MASTER);
+
+	int ret, dummy;
+	MPI_Request requests[PF.numtasks - 1];
+
+	/*
+	 * Notify all slaves of program termination by sending PF_RUNTIME_ERROR_MSGTAG.
+	 * This must be non-blocking to avoid deadlock if some slaves have already
+	 * performed a blocking send.
+	 */
+	for ( int i = 1; i < PF.numtasks; i++ ) {
+		ret = PF_RawIsend(i, &dummy, 0, PF_BYTE, PF_RUNTIME_ERROR_MSGTAG, &requests[i - 1]);
+		CHECK(ret == 0);
+	}
+
+	/*
+	 * Receive exactly one PF_RUNTIME_SYNC_MSGTAG or PF_RUNTIME_ERROR_MSGTAG
+	 * message from each slave.
+	 */
+	for ( int i = 1; i < PF.numtasks; i++ ) {
+retry:
+		int asrc = PF_ANY_SOURCE;  // blocking probe
+		int tag = PF_Probe(&asrc);
+		CHECK(tag >= 0);
+		assert(1 <= asrc && asrc < PF.numtasks);
+		switch ( tag ) {
+			case PF_STDOUT_MSGTAG:
+			case PF_LOG_MSGTAG:
+				PF_ReceiveErrorMessage(asrc, tag);
+				goto retry;
+			case PF_RUNTIME_ERROR_MSGTAG:
+			case PF_RUNTIME_SYNC_MSGTAG:
+				ret = PF_RawRecv(&asrc, &dummy, 0, &tag);
+				CHECK(ret == 0);
+				break;
+			default:
+				ret = PF_Discard(&asrc, &tag);
+				CHECK(ret == 0);
+				goto retry;
+		}
+	}
+
+	ret = PF_RawWaitAll(PF.numtasks - 1, requests, MPI_STATUSES_IGNORE);
+	CHECK(ret == 0);
+}
+
+/*
+ 		#] PF_BroadcastRuntimeError : 
+ 		#[ PF_PostEndSortBarrier :
+*/
+
+/**
+ * Synchronization after EndSort().
+ * Ensures that all processes have completed without runtime errors.
+ */
+void PF_PostEndSortBarrier(void)
+{
+	assert(PF_processing);
+	PF_processing = 0;
+
+	int ret, dummy;
+
+	/*
+	 * Each slave reports either PF_RUNTIME_SYNC_MSGTAG (completed without errors)
+	 * or PF_RUNTIME_ERROR_MSGTAG (see PF_RaiseRuntimeError())
+	 * to the master. After collecting one message from each slave, the master sends
+	 * PF_RUNTIME_SYNC_MSGTAG to all slaves on success,
+	 * or PF_RUNTIME_ERROR_MSGTAG otherwise.
+	 *
+	 * This matches the behaviour of PF_RaiseRuntimeError() and
+	 * PF_ReceiveRuntimeError(). In all cases, each slave sends exactly one
+	 * PF_RUNTIME_SYNC_MSGTAG or PF_RUNTIME_ERROR_MSGTAG message to the master,
+	 * and the master sends exactly one such message to each slave.
+	 */
+	if ( PF.me == MASTER ) {
+		int error = 0;
+		for ( int i = 1; i < PF.numtasks; i++ ) {
+retry:
+			int asrc = PF_ANY_SOURCE;  // blocking probe
+			int tag = PF_Probe(&asrc);
+			CHECK(tag >= 0);
+			assert(1 <= asrc && asrc < PF.numtasks);
+			switch ( tag ) {
+				case PF_STDOUT_MSGTAG:
+				case PF_LOG_MSGTAG:
+					PF_ReceiveErrorMessage(asrc, tag);
+					goto retry;
+				case PF_RUNTIME_ERROR_MSGTAG:
+					error = 1;
+					ret = PF_RawRecv(&asrc, &dummy, 0, &tag);
+					CHECK(ret == 0);
+					break;
+				case PF_RUNTIME_SYNC_MSGTAG:
+					ret = PF_RawRecv(&asrc, &dummy, 0, &tag);
+					CHECK(ret == 0);
+					break;
+				default:
+					MesPrint("!!!Unexpected MPI message src=%d tag=%d.", asrc, tag);
+					ret = PF_Discard(&asrc, &tag);
+					CHECK(ret == 0);
+					goto retry;
+			}
+		}
+		for ( int i = 1; i < PF.numtasks; i++ ) {
+			ret = PF_RawSend(i, &dummy, 0, error ? PF_RUNTIME_ERROR_MSGTAG : PF_RUNTIME_SYNC_MSGTAG);
+			CHECK(ret == 0);
+		}
+	}
+	else {
+		int tag;
+		ret = PF_RawSend(MASTER, &dummy, 0, PF_RUNTIME_SYNC_MSGTAG);
+		CHECK(ret == 0);
+		int src = MASTER;
+		ret = PF_RawRecv(&src, &dummy, 0, &tag);
+		CHECK(ret == 0);
+		switch ( tag ) {
+			case PF_RUNTIME_SYNC_MSGTAG:
+				break;
+			case PF_RUNTIME_ERROR_MSGTAG:
+				PF.notMyFault = 1;
+				Terminate(-1);
+				break;
+			default:
+				MesPrint("!!!Unexpected MPI message src=%d tag=%d.", src, tag);
+				break;
+		}
+	}
+}
+
+/*
+ 		#] PF_PostEndSortBarrier : 
+ 		#[ PF_ReceiveRuntimeError :
+*/
+
+/**
+ * Handles a runtime error message received from another process.
+ * It ultimately calls Terminate(-1).
+ */
+void PF_ReceiveRuntimeError(void)
+{
+	PF_processing = 0;
+	PF.notMyFault = 1;
+	if ( PF.me == MASTER ) {
+		PF_BroadcastRuntimeError();
+	}
+	else {
+		int ret, dummy;
+		int src = MASTER;
+		int tag = PF_RUNTIME_ERROR_MSGTAG;
+		ret = PF_RawRecv(&src, &dummy, 0, &tag);
+		CHECK(ret == 0);
+		ret = PF_RawSend(MASTER, &dummy, 0, PF_RUNTIME_ERROR_MSGTAG);
+		CHECK(ret == 0);
+	}
+	Terminate(-1);
+}
+
+/*
+ 		#] PF_ReceiveRuntimeError : 
+  	#] Handling runtime errors : 
 */
diff --git a/sources/parallel.h b/sources/parallel.h
index dbd8b96f..f60564f8 100644
--- a/sources/parallel.h
+++ b/sources/parallel.h
@@ -36,11 +36,21 @@
 /*
   	#[ macros & definitions :
 */
+
+/*
+ * Rank of the master process.
+ */
 #define MASTER 0
 
-#define PF_RESET 0
-#define PF_TIME  1
+/*
+ * Selector constants for PF_RealTime().
+ */
+#define PF_RESET 0  /* reset the timer */
+#define PF_TIME  1  /* get the elapsed time */
 
+/*
+ * Message tags for communication during parallel execution.
+ */
 #define PF_TERM_MSGTAG          10  /* master -> slave: sending terms */
 #define PF_ENDSORT_MSGTAG       11  /* master -> slave: no more terms to be distributed, slave -> master: after EndSort() */
 #define PF_DOLLAR_MSGTAG        12  /* slave -> master: sending $-variables */
@@ -54,6 +64,8 @@
 #define PF_OPT_MCTS_MSGTAG      70  /* master <-> slave: optimization */
 #define PF_OPT_HORNER_MSGTAG    71  /* master <-> slave: optimization */
 #define PF_OPT_COLLECT_MSGTAG   72  /* slave -> master: optimization */
+#define PF_RUNTIME_ERROR_MSGTAG 80  /* slave <-> master: runtime error */
+#define PF_RUNTIME_SYNC_MSGTAG  81  /* master <-> slave: sync after EndSort() */
 #define PF_MISC_MSGTAG         100
 
 /*
@@ -179,6 +191,7 @@ typedef struct ParallelVars {
 	int         exprbufsize;    /* buffer size in WORDs to be used for transferring expressions */
 	int         exprtodo;       /* >= 0: the expression to do in InParallel, -1: otherwise */
 	int         log;            /* flag for logging mode */
+	int         notMyFault;     /* flag for termination due to another process's runtime error */
 	WORD        numsbufs;       /* number of cyclic send buffers (PF.sbuf->numbufs) */
 	WORD        numrbufs;       /* number of cyclic receive buffers (PF.rbufs[i]->numbufs, i=1,...numtasks-1) */
 } PARALLELVARS;
@@ -197,6 +210,7 @@ extern LONG PF_maxDollarChunkSize;
 /* mpi.c */
 extern int    PF_ISendSbuf(int,int);
 extern int    PF_Bcast(void *buffer, int count);
+extern int    PF_Reduce(const void *sendbuf, void *recvbuf, int count, MPI_Datatype type, MPI_Op op, int root);
 extern int    PF_RawSend(int,void *,LONG,int);
 extern LONG   PF_RawRecv(int *,void *,LONG,int *);
 
@@ -237,6 +251,7 @@ extern int    PF_EndSort(void);
 extern WORD   PF_Deferred(WORD *,WORD);
 extern int    PF_Processor(EXPRESSIONS,WORD,WORD);
 extern int    PF_Init(int*,char ***);
+extern void   PF_PreTerminate(int errorcode);
 extern int    PF_Terminate(int);
 extern LONG   PF_GetSlaveTimes(void);
 extern LONG   PF_BroadcastNumber(LONG);
@@ -259,6 +274,7 @@ extern void   PF_MLock(void);
 extern void   PF_MUnlock(void);
 extern LONG   PF_WriteFileToFile(int,UBYTE *,LONG);
 extern void   PF_FlushStdOutBuffer(void);
+extern void   PF_ReceiveRuntimeError(void) NORETURN;
 
 /*
   	#] Function prototypes : 
diff --git a/sources/setfile.c b/sources/setfile.c
index 3768a973..33bd3dbe 100644
--- a/sources/setfile.c
+++ b/sources/setfile.c
@@ -553,7 +553,7 @@ int AllocSetups(void)
 	sp = GetSetupPar((UBYTE *)"threadsortfilesynch");
 	AC.ThreadSortFileSynch = AM.gThreadSortFileSynch = AM.ggThreadSortFileSynch = sp->value;
 /*
-     The size for shared memory window for oneside MPI2 communications
+     The size for shared memory window for oneside MPI2 communications (unused)
 */
 	sp = GetSetupPar((UBYTE *)"shmwinsize");
 	AM.shmWinSize = sp->value/sizeof(WORD);
diff --git a/sources/startup.c b/sources/startup.c
index 4f7adcde..ffa414aa 100644
--- a/sources/startup.c
+++ b/sources/startup.c
@@ -1921,7 +1921,11 @@ static int firstterminate = 1;
 
 void TerminateImpl(int errorcode, const char* file, int line, const char* function)
 {
+#ifdef WITHMPI
+	if ( errorcode && firstterminate && !PF.notMyFault ) {
+#else
 	if ( errorcode && firstterminate ) {
+#endif
 		firstterminate = 0;
 
 		MLOCK(ErrorMessageLock);
@@ -2047,6 +2051,9 @@ backtrace_fallback: ;
 
 		MUNLOCK(ErrorMessageLock);
 
+#ifdef WITHMPI
+		PF_PreTerminate(errorcode);
+#endif
 		Crash();
 	}
 #ifdef TRAPSIGNALS