From e9a101967d0908212369843c68221e79fbe5503b Mon Sep 17 00:00:00 2001 From: Francois Farquet Date: Mon, 4 May 2026 17:22:16 +0200 Subject: [PATCH 1/5] Add GraalPy native-image PolyBench guest VM --- mx.graalpython/mx_graalpython_benchmark.py | 452 +++++++++++++++++++++ 1 file changed, 452 insertions(+) diff --git a/mx.graalpython/mx_graalpython_benchmark.py b/mx.graalpython/mx_graalpython_benchmark.py index 58a59c4934..871c2fee14 100644 --- a/mx.graalpython/mx_graalpython_benchmark.py +++ b/mx.graalpython/mx_graalpython_benchmark.py @@ -40,8 +40,10 @@ import mx import mx_benchmark import mx_polybench +import mx_sdk_benchmark from mx_benchmark import StdOutRule, java_vm_registry, OutputCapturingVm, GuestVm, VmBenchmarkSuite, AveragingBenchmarkMixin, bm_exec_context from mx_graalpython_bench_param import HARNESS_PATH +from mx_util import StageName # ---------------------------------------------------------------------------------------------------------------------- # @@ -400,6 +402,445 @@ def get_extra_polyglot_args(self): return ["--experimental-options", "--python.MaxNativeMemory=%s" % (2**34), *self._extra_polyglot_args] +class GraalPythonPolyBenchVm(GuestVm): + HOSTED_INSTANCE = "GraalPythonPolyBenchVm.hosted=" + STAGED_PROGRAM = "staged-benchmark.py" + JAVA_MODULE_ARGS_WITH_VALUE = ( + "--add-exports", + "--add-modules", + "--add-opens", + "--add-reads", + "--enable-native-access", + ) + LAUNCHER_ARG_PREFIXES = ( + "--python.", + "--engine.", + "--vm.", + "--inspect", + "--log.", + "--experimental-options", + ) + PYTHONPATH_LAUNCHER_ARG = "--python.PythonPath" + STAGING_INCOMPATIBLE_ARG_PREFIXES = ( + "--vm.", + "--inspect", + ) + + def __init__(self, config_name=CONFIGURATION_DEFAULT, host_vm=None, extra_launcher_args=None): + super().__init__(host_vm=host_vm) + self._config_name = self.canonical_config_name(config_name) + self._extra_launcher_args = list(extra_launcher_args or []) + self._guest_run_on_java_home_value = None + self._guest_run_on_java_home_set = False + + def name(self): + return VM_NAME_GRAALPYTHON + + def config_name(self): + return self._config_name + + @staticmethod + def canonical_config_name(config_name): + return config_name if config_name else CONFIGURATION_DEFAULT + + def hosting_registry(self): + return java_vm_registry + + def with_host_vm(self, host_vm): + hosted_name = ( + f"{self.HOSTED_INSTANCE}{self.name()}:{self.config_name()}@{host_vm.name()}:{host_vm.config_name()}" + ) + if not bm_exec_context().has(hosted_name): + hosted_instance = self.__class__( + self.config_name(), host_vm=host_vm, extra_launcher_args=self._extra_launcher_args + ) + bm_exec_context().add_context_value(hosted_name, mx_benchmark.ConstantContextValue(hosted_instance)) + hosted_instance = bm_exec_context().get(hosted_name) + if self._guest_run_on_java_home_set: + hosted_instance.set_run_on_java_home(self._guest_run_on_java_home_value) + return hosted_instance + + def set_run_on_java_home(self, value): + self._guest_run_on_java_home_value = value + self._guest_run_on_java_home_set = True + host = self.host_vm() + if host is not None and hasattr(host, "set_run_on_java_home"): + host.set_run_on_java_home(value) + + def run_on_java_home(self): + if self._guest_run_on_java_home_set: + return self._guest_run_on_java_home_value + host = self.host_vm() + if host is not None and hasattr(host, "run_on_java_home"): + return host.run_on_java_home() + return None + + def extract_vm_info(self, args=None): + self._host_vm_for_execution().extract_vm_info(args) + + def polybench_staging_run_args(self, run_args): + return [arg for arg in run_args if not self._is_staging_incompatible_arg(arg)] + + @classmethod + def _is_launcher_arg(cls, arg): + return arg.startswith(cls.LAUNCHER_ARG_PREFIXES) + + @classmethod + def _is_staging_incompatible_arg(cls, arg): + return arg.startswith(cls.STAGING_INCOMPATIBLE_ARG_PREFIXES) + + def _launcher_args(self): + run_args = bm_exec_context().get("bm_suite_args") + launcher_args = list(self._extra_launcher_args) + launcher_args += [arg for arg in self.bmSuite.runArgs(run_args) if self._is_launcher_arg(arg)] + return launcher_args + + def _host_vm_for_execution(self): + host = self.host_vm() + if host is None: + mx.abort("GraalPy PolyBench guest VM requires a host VM; none was provided.") + if not isinstance(host, mx_sdk_benchmark.NativeImageVM): + mx.abort(f"GraalPy PolyBench guest VM requires NativeImageVM host; got {host.__class__.__name__}.") + if host.pgo_sampler_only or host.pgo_use_perf: + mx.abort("GraalPy PolyBench native launcher staging supports instrumentation PGO, but not PGO sampling.") + return host + + def prepare_stages(self, bm_suite, bm_suite_args): + return self._host_vm_for_execution().prepare_stages(bm_suite, bm_suite_args) + + def run(self, cwd, args): + host = self._host_vm_for_execution() + self.extract_vm_info(args) + args = host.post_process_command_line_args(args) + host.bmSuite = self.bmSuite + host.stages_info = self.bmSuite.stages_info + out = mx.TeeOutputCapture(mx.OutputCapture()) + host.config = mx_sdk_benchmark.NativeImageBenchmarkConfig(host, self.bmSuite, args) + host.stages = mx_sdk_benchmark.StagesContext( + host, out, out, False, os.path.abspath(cwd if cwd else os.getcwd()) + ) + host.config.output_dir.mkdir(parents=True, exist_ok=True) + host.config.config_dir.mkdir(parents=True, exist_ok=True) + host.config.image_build_reports_directory.mkdir(parents=True, exist_ok=True) + code = self._run_single_stage(host) + output = out.underlying.data + return code, output, host.dimensions(cwd, args, code, output) + + def _run_single_stage(self, host): + stage = host.stages_info.current_stage.stage_name + if stage == StageName.INSTRUMENT_IMAGE: + return self._run_stage_instrument_image(host) + if stage == StageName.INSTRUMENT_RUN: + return self._run_stage_instrument_run(host) + if stage == StageName.IMAGE: + return self._run_stage_image(host) + if stage == StageName.RUN: + return self._run_stage_run(host) + mx.abort(f"Unknown stage {stage}") + + def _run_stage_instrument_image(self, host): + with host.get_stage_runner() as runner: + exit_code = self._build_standalone(host, runner, instrumented=True) + if exit_code == 0: + self._move_image_build_stats_file(host, StageName.INSTRUMENT_IMAGE) + return exit_code + + def _run_stage_image(self, host): + with host.get_stage_runner() as runner: + exit_code = self._build_standalone(host, runner, instrumented=False) + if exit_code == 0: + self._move_image_build_stats_file(host, StageName.IMAGE) + return exit_code + + def _run_stage_instrument_run(self, host): + launcher = self._launcher(host, instrumented=True) + profile_vm_args = [ + f"--vm.XX:ProfilesDumpFile={host.config.profile_path}", + f"--vm.XX:ProfilesLCOVFile={host.config.output_dir / 'graalpy.info'}", + ] + with host.get_stage_runner() as runner: + exit_code = self._stage_harness(host, runner) + if exit_code != 0: + return exit_code + with self._graal_python_vm_args(*profile_vm_args): + with self._graalpy_launcher_env(): + return runner.execute_command( + host, [str(launcher)] + self._profile_launcher_args(host) + [str(self._staged_program(host))] + ) + + def _run_stage_run(self, host): + with host.get_stage_runner() as runner: + exit_code = self._stage_harness(host, runner) + if exit_code != 0: + return exit_code + with self._graalpy_launcher_env(): + return runner.execute_command( + host, + [str(self._launcher(host, instrumented=False))] + + self._run_launcher_args(host) + + [str(self._staged_program(host))], + ) + + def _stage_harness(self, host, runner): + # Native stable PolyBench shares image stages across benchmarks, but the staged harness is benchmark-specific. + staging_args = [ + "--stage-to-language", + "Python", + "--stage-to-file", + str(self._staged_program(host)), + "--log-staged-program", + "True", + ] + self._staged_program(host).parent.mkdir(parents=True, exist_ok=True) + image_run_args = self._polybench_staging_run_args(host, runner.config.image_run_args) + java_args = ( + runner.config.classpath_arguments + + runner.config.modulepath_arguments + + runner.config.system_properties + + self._java_staging_vm_args(runner.config.image_vm_args or []) + + runner.config.executable + + image_run_args + + staging_args + ) + return self._execute_setup_command(host, runner, host.generate_java_command(java_args)) + + @staticmethod + def _java_staging_vm_args(image_vm_args): + return [arg for arg in image_vm_args if not arg.startswith("-H")] + + @staticmethod + def _execute_setup_command(host, runner, command): + mx.log("Running: ") + mx.log(" ".join(command)) + write_output = False + runner.exit_code = mx.run( + command, + out=runner.stdout(write_output), + err=runner.stderr(write_output), + cwd=host.stages.cwd, + nonZeroIsFatal=False, + env=host.config.bm_suite.get_stage_env(), + ) + return runner.exit_code + + def _build_standalone(self, host, runner, instrumented): + if instrumented: + pgo_profile = "" + elif host.pgo_instrumentation: + pgo_profile = host.config.bm_suite.get_pgo_profile_for_image_build(host.config.profile_path) + else: + pgo_profile = None + if pgo_profile is not None: + pgo_profile = str(pgo_profile) + + env_pgo_profile = pgo_profile if pgo_profile is not None else "" + from mx_graalpython import ( + BUILD_NATIVE_IMAGE_WITH_ASSERTIONS, + GITHUB_CI, + _graalpy_launcher, + bytecode_dsl_build_args, + run_mx, + set_env, + ) + + enterprise = host.graalvm_edition == "ee" + standalone_dist = "GRAALPY_NATIVE_STANDALONE" + mx_args = ["-p", SUITE.dir, "--env", "native-ee" if enterprise else "native-ce"] + mx_args.append("--extra-image-builder-argument=-Ob" if GITHUB_CI else "--extra-image-builder-argument=-g") + if pgo_profile is not None: + if not enterprise: + mx.abort("PGO is only supported on enterprise NI") + if pgo_profile: + mx_args.append(f"--extra-image-builder-argument=--pgo={pgo_profile}") + mx_args.append("--extra-image-builder-argument=-H:+UnlockExperimentalVMOptions") + mx_args.append("--extra-image-builder-argument=-H:+PGOPrintProfileQuality") + else: + mx_args.append("--extra-image-builder-argument=--pgo-instrument") + mx_args.append("--extra-image-builder-argument=-H:+UnlockExperimentalVMOptions") + mx_args.append("--extra-image-builder-argument=-H:+ProfilingLCOV") + elif BUILD_NATIVE_IMAGE_WITH_ASSERTIONS: + mx_args.append("--extra-image-builder-argument=-ea") + mx_args.append("--extra-image-builder-argument=-J-ea") + mx_args += bytecode_dsl_build_args(prefix="--extra-image-builder-argument=") + for arg in self._mx_extra_image_builder_args(self._image_build_args(host)): + mx_args.append(f"--extra-image-builder-argument={arg}") + + with set_env(GRAALPY_PGO_PROFILE=env_pgo_profile): + write_output = host.stages_info.should_produce_datapoints() + exit_code = run_mx( + mx_args + ["build", "-f", "--only", f"libpythonvm,{standalone_dist}"], + nonZeroIsFatal=False, + out=runner.stdout(write_output), + err=runner.stderr(write_output), + ) + runner.exit_code = exit_code + if exit_code != 0: + return exit_code + + standalone_home = self._platform_mxbuild_output(standalone_dist) + libpythonvm_home = self._platform_mxbuild_output("libpythonvm") + debug_info = libpythonvm_home / "libpythonvm.so.debug" + if debug_info.exists(): + shutil.copy(debug_info, standalone_home / "lib") + destination = self._standalone_home(host, instrumented=instrumented) + if destination.exists(): + shutil.rmtree(destination) + shutil.copytree(standalone_home, destination, symlinks=True) + launcher = destination / "bin" / _graalpy_launcher() + if not launcher.is_file(): + mx.abort(f"GraalPy launcher was not built at expected path: {launcher}") + self._copy_native_image_artifact(host, destination, instrumented=instrumented) + return exit_code + + def _copy_native_image_artifact(self, host, standalone_home, instrumented): + libpythonvm = standalone_home / "lib" / mx.add_lib_suffix(mx.add_lib_prefix("pythonvm")) + if not libpythonvm.is_file(): + mx.abort(f"GraalPy native image artifact was not built at expected path: {libpythonvm}") + destination = host.config.instrumented_image_path if instrumented else host.config.image_path + if destination.exists(): + destination.unlink() + shutil.copy2(libpythonvm, destination) + + def _image_build_args(self, host): + stage_name = host.stages_info.current_stage.stage_name + args = [ + f"-H:BuildOutputJSONFile={host.config.get_build_output_json_file(stage_name)}", + "-H:+CollectImageBuildStatistics", + ] + args += host.config.system_properties + args += self._standalone_image_vm_args(host.config.image_vm_args or []) + if host.gc: + args += ["--gc=" + host.gc, "-H:+SpawnIsolates"] + if host.native_architecture: + args.append("-march=native") + if host.use_string_inlining: + args.append("-H:+UseStringInlining") + if host.future_defaults_all: + args.append("--future-defaults=all") + if host.optimization_level: + args.append("-" + host.optimization_level) + if host.is_quickbuild: + args.append("-Ob") + args += host.config.extra_image_build_arguments + return mx_sdk_benchmark.svm_experimental_options(args) + + @classmethod + def _mx_extra_image_builder_args(cls, args): + # GraalPy standalone builds receive these through mx --extra-image-builder-argument, whose suite-side + # filtering keeps option-like args and drops bare operands. Use the equivalent --option=value form for + # Java module options that NativeImageVM keeps as space-separated pairs. + normalized_args = [] + i = 0 + while i < len(args): + arg = args[i] + if arg in cls.JAVA_MODULE_ARGS_WITH_VALUE: + if i + 1 >= len(args): + mx.abort(f"Missing value for native-image argument {arg}") + normalized_args.append(f"{arg}={args[i + 1]}") + i += 2 + else: + normalized_args.append(arg) + i += 1 + return normalized_args + + @classmethod + def _standalone_image_vm_args(cls, args): + # These module-access arguments come from the Java PolyBench launcher command. They are valid for building + # that launcher image, but not for the separate GraalPy standalone/libpythonvm image build. + standalone_args = [] + i = 0 + while i < len(args): + arg = args[i] + if arg in cls.JAVA_MODULE_ARGS_WITH_VALUE: + i += 2 + elif any(arg.startswith(f"{prefix}=") for prefix in cls.JAVA_MODULE_ARGS_WITH_VALUE): + i += 1 + else: + standalone_args.append(arg) + i += 1 + return standalone_args + + def _move_image_build_stats_file(self, host, stage): + stats_file = self._platform_mxbuild_output("libpythonvm") / "reports" / "image_build_statistics.json" + destination = host.config.get_image_build_stats_file(stage) + if not stats_file.is_file(): + # Stable PolyBench may revisit an image stage after the shared GraalPy image was already built. + if destination.is_file(): + mx.log(f"Reusing image build statistics file from earlier stage execution: {destination}") + return + mx.abort(f"Could not find image build statistics file at expected path: {stats_file}") + if destination.exists(): + destination.unlink() + mx.move(stats_file, destination) + + def _extra_native_run_args(self, _host, prefix): + vm_args = self.bmSuite.vmArgs(bm_exec_context().get("bm_suite_args")) + return mx_sdk_benchmark.parse_prefixed_args(prefix, vm_args) + + @staticmethod + def _platform_mxbuild_output(name): + # The native standalone may be removed from mx's active dependency graph after build; its artifacts still + # live in the platform-dependent mxbuild output directory. + return Path(SUITE.dir) / "mxbuild" / f"{mx.get_os()}-{mx.get_arch()}" / name + + def _polybench_staging_run_args(self, host, image_run_args): + extra_run_args = self._extra_native_run_args(host, "-Dnative-image.benchmark.extra-run-arg=") + if extra_run_args and image_run_args[-len(extra_run_args):] == extra_run_args: + return image_run_args[:-len(extra_run_args)] + return image_run_args + + def _profile_launcher_args(self, host): + extra_profile_args = self._extra_native_run_args(host, "-Dnative-image.benchmark.extra-profile-run-arg=") + if not extra_profile_args: + extra_profile_args = self._extra_native_run_args(host, "-Dnative-image.benchmark.extra-run-arg=") + return self._launcher_args() + host.config.extra_jvm_args + extra_profile_args + + def _run_launcher_args(self, host): + return self._launcher_args() + host.config.extra_jvm_args + self._extra_native_run_args( + host, "-Dnative-image.benchmark.extra-run-arg=" + ) + + def _launcher_pythonpath(self): + prefix = f"{self.PYTHONPATH_LAUNCHER_ARG}=" + for arg in reversed(self._launcher_args()): + if arg.startswith(prefix): + return arg[len(prefix):] + return None + + @contextmanager + def _graalpy_launcher_env(self): + pythonpath = self._launcher_pythonpath() + if pythonpath is None: + yield + return + from mx_graalpython import set_env + + # GraalPy launcher reads PYTHONPATH after generic launcher option parsing, so keep it aligned + # with the explicit PolyBench launcher option while running the staged native standalone. + with set_env(PYTHONPATH=pythonpath): + yield + + @contextmanager + def _graal_python_vm_args(self, *new_args): + from mx_graalpython import set_env + + existing = os.environ.get("GRAAL_PYTHON_VM_ARGS") + value = "\v".join(([existing] if existing else []) + list(new_args)) + with set_env(GRAAL_PYTHON_VM_ARGS=value): + yield + + def _staged_program(self, host): + return host.config.output_dir / "graalpython" / self.STAGED_PROGRAM + + def _standalone_home(self, host, instrumented): + return host.config.output_dir / ("graalpy-instrumented" if instrumented else "graalpy") + + def _launcher(self, host, instrumented): + from mx_graalpython import _graalpy_launcher + + return self._standalone_home(host, instrumented=instrumented) / "bin" / _graalpy_launcher() + + # ---------------------------------------------------------------------------------------------------------------------- # # the benchmark definition @@ -1088,6 +1529,17 @@ def add_graalpy_vm(name, *extra_polyglot_args): add_graalpy_vm(CONFIGURATION_NATIVE_MULTI, '--experimental-options', '-multi-context') add_graalpy_vm(CONFIGURATION_NATIVE_INTERPRETER_MULTI, '--experimental-options', '-multi-context', '--engine.Compilation=false') + # PolyBench selects VMs through the Java VM registry. Register the GraalPy entry here so it can be selected as a + # guest of native-image via `--jvm=native-image --guest --jvm=graalpython`. + java_vm_registry.add_vm(GraalPythonPolyBenchVm(CONFIGURATION_DEFAULT), suite, 10) + java_vm_registry.add_vm( + GraalPythonPolyBenchVm( + CONFIGURATION_INTERPRETER, extra_launcher_args=['--experimental-options', '--engine.Compilation=false'] + ), + suite, + 10, + ) + # all of the graalpy vms, but with one compiler thread for name, extra_polyglot_args in graalpy_vms[:]: add_graalpy_vm(f'{name}-1-compiler-threads', *['--engine.CompilerThreads=1', *extra_polyglot_args]) From 8f199e38e9be847847961c0c6e858e0efdcd4e82 Mon Sep 17 00:00:00 2001 From: Francois Farquet Date: Fri, 22 May 2026 10:53:30 +0200 Subject: [PATCH 2/5] Use GraalPy product profiles for product configs --- mx.graalpython/mx_graalpython.py | 24 +++++++++++++++++----- mx.graalpython/mx_graalpython_benchmark.py | 15 ++++++++++---- 2 files changed, 30 insertions(+), 9 deletions(-) diff --git a/mx.graalpython/mx_graalpython.py b/mx.graalpython/mx_graalpython.py index 8392bc9d86..3dad06811b 100644 --- a/mx.graalpython/mx_graalpython.py +++ b/mx.graalpython/mx_graalpython.py @@ -106,6 +106,8 @@ def get_boolean_env(name, default=False): os.environ["GRAALPY_VERSION"] = GRAAL_VERSION MAIN_BRANCH = 'master' +GRAALPY_PGO_PROFILE_ARTIFACT_GROUP = "graalpy" +GRAALPY_PGO_PROFILE_ARTIFACT_PREFIX = "pgo-" GRAALPYTHON_MAIN_CLASS = "com.oracle.graal.python.shell.GraalPythonMain" @@ -344,6 +346,7 @@ def libpythonvm_build_args(): build_args += ['-H:-ProtectionKeys'] profile = None + require_profile = get_boolean_env("GRAALPY_REQUIRE_PGO_PROFILE") if ( "GRAALPY_PGO_PROFILE" not in os.environ and mx.suite('graalpython-enterprise', fatalIfMissing=False) @@ -353,6 +356,7 @@ def libpythonvm_build_args(): vc = SUITE.vc commit = str(vc.tip(SUITE.dir)).strip() branch = str(vc.active_branch(SUITE.dir, abortOnError=False) or 'master').strip() + artifact_name = f"{GRAALPY_PGO_PROFILE_ARTIFACT_GROUP}/{GRAALPY_PGO_PROFILE_ARTIFACT_PREFIX}{commit}" if script := os.environ.get("ARTIFACT_DOWNLOAD_SCRIPT"): # This is always available in the GraalPy CI @@ -361,12 +365,12 @@ def libpythonvm_build_args(): [ sys.executable, script, - f"graalpy/pgo-{commit}", + artifact_name, profile, ], nonZeroIsFatal=False, ) - else: + elif not require_profile: # Locally, we try to get a reasonable profile get_profile = mx.command_function('python-get-latest-profile', fatalIfMissing=False) if get_profile: @@ -377,7 +381,17 @@ def libpythonvm_build_args(): except BaseException: pass - if CI and (not profile or not os.path.isfile(profile)): + profile_missing = not profile or not os.path.isfile(profile) + if require_profile and profile_missing: + mx.abort("\n".join([ + "GRAALPY_REQUIRE_PGO_PROFILE is set, but no CI generated GraalPy PGO profile was found.", + f"GraalPy commit: {commit}", + f"Expected artifact: {artifact_name}", + "The product profile configuration does not fall back to benchmark-local PGO.", + "Run the GraalPy CI job python-pgo-profile-post_merge-linux-amd64-jdk-latest for this commit, then retry the product-ee benchmark.", + ])) + + if CI and profile_missing: mx.log("No profile in CI job") # When running on a release branch or attempting to merge into # a release branch, make sure we can use a PGO profile, and @@ -487,8 +501,8 @@ def graalpy_native_pgo_build_and_test(args=None): sys.executable, script, iprof_gz_path, - f"pgo-{commit}", - "graalpy", + f"{GRAALPY_PGO_PROFILE_ARTIFACT_PREFIX}{commit}", + GRAALPY_PGO_PROFILE_ARTIFACT_GROUP, "--lifecycle", "cache", "--artifact-repo-key", diff --git a/mx.graalpython/mx_graalpython_benchmark.py b/mx.graalpython/mx_graalpython_benchmark.py index 871c2fee14..d5b38cf2ac 100644 --- a/mx.graalpython/mx_graalpython_benchmark.py +++ b/mx.graalpython/mx_graalpython_benchmark.py @@ -624,6 +624,8 @@ def _execute_setup_command(host, runner, command): return runner.exit_code def _build_standalone(self, host, runner, instrumented): + enterprise = host.graalvm_edition == "ee" + use_product_profile = getattr(host, "product_profile", False) and enterprise if instrumented: pgo_profile = "" elif host.pgo_instrumentation: @@ -633,7 +635,13 @@ def _build_standalone(self, host, runner, instrumented): if pgo_profile is not None: pgo_profile = str(pgo_profile) - env_pgo_profile = pgo_profile if pgo_profile is not None else "" + # GRAALPY_PGO_PROFILE is a GraalPy build switch: + # unset: let libpythonvm_build_args auto-select the CI generated PGO profile; + # "": suppress auto-selection and, for benchmark-local PGO, build an instrumented image instead; + # path: build the final image with that explicit profile. + # GRAALPY_REQUIRE_PGO_PROFILE is separate: product-ee sets it to fail if the CI profile cannot be downloaded. + env_pgo_profile = pgo_profile if pgo_profile is not None else (None if use_product_profile else "") + env_require_pgo_profile = "true" if use_product_profile else None from mx_graalpython import ( BUILD_NATIVE_IMAGE_WITH_ASSERTIONS, GITHUB_CI, @@ -643,7 +651,6 @@ def _build_standalone(self, host, runner, instrumented): set_env, ) - enterprise = host.graalvm_edition == "ee" standalone_dist = "GRAALPY_NATIVE_STANDALONE" mx_args = ["-p", SUITE.dir, "--env", "native-ee" if enterprise else "native-ce"] mx_args.append("--extra-image-builder-argument=-Ob" if GITHUB_CI else "--extra-image-builder-argument=-g") @@ -658,14 +665,14 @@ def _build_standalone(self, host, runner, instrumented): mx_args.append("--extra-image-builder-argument=--pgo-instrument") mx_args.append("--extra-image-builder-argument=-H:+UnlockExperimentalVMOptions") mx_args.append("--extra-image-builder-argument=-H:+ProfilingLCOV") - elif BUILD_NATIVE_IMAGE_WITH_ASSERTIONS: + elif BUILD_NATIVE_IMAGE_WITH_ASSERTIONS and not use_product_profile: mx_args.append("--extra-image-builder-argument=-ea") mx_args.append("--extra-image-builder-argument=-J-ea") mx_args += bytecode_dsl_build_args(prefix="--extra-image-builder-argument=") for arg in self._mx_extra_image_builder_args(self._image_build_args(host)): mx_args.append(f"--extra-image-builder-argument={arg}") - with set_env(GRAALPY_PGO_PROFILE=env_pgo_profile): + with set_env(GRAALPY_PGO_PROFILE=env_pgo_profile, GRAALPY_REQUIRE_PGO_PROFILE=env_require_pgo_profile): write_output = host.stages_info.should_produce_datapoints() exit_code = run_mx( mx_args + ["build", "-f", "--only", f"libpythonvm,{standalone_dist}"], From bffc2a5a611fe210078a5de56d08a4403162ff8d Mon Sep 17 00:00:00 2001 From: Francois Farquet Date: Fri, 22 May 2026 10:53:53 +0200 Subject: [PATCH 3/5] Copy staged GraalPy benchmark resources --- mx.graalpython/mx_graalpython_benchmark.py | 25 +++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/mx.graalpython/mx_graalpython_benchmark.py b/mx.graalpython/mx_graalpython_benchmark.py index d5b38cf2ac..57d6643897 100644 --- a/mx.graalpython/mx_graalpython_benchmark.py +++ b/mx.graalpython/mx_graalpython_benchmark.py @@ -405,6 +405,11 @@ def get_extra_polyglot_args(self): class GraalPythonPolyBenchVm(GuestVm): HOSTED_INSTANCE = "GraalPythonPolyBenchVm.hosted=" STAGED_PROGRAM = "staged-benchmark.py" + # Staging rewrites a benchmark into STAGED_PROGRAM. Benchmarks listed here also need data files copied from + # GRAALPYTHON_POLYBENCH_BENCHMARKS to keep their original sibling-file lookups working. + STAGED_SIBLING_RESOURCES = { + "warmup/pyflate-fast.py": ("graalpython-pyflate-benchmark-resource.tar.gz",), + } JAVA_MODULE_ARGS_WITH_VALUE = ( "--add-exports", "--add-modules", @@ -602,7 +607,25 @@ def _stage_harness(self, host, runner): + image_run_args + staging_args ) - return self._execute_setup_command(host, runner, host.generate_java_command(java_args)) + exit_code = self._execute_setup_command(host, runner, host.generate_java_command(java_args)) + if exit_code == 0: + self._copy_staged_sibling_resources(host) + return exit_code + + def _copy_staged_sibling_resources(self, host): + # A few staged Python benchmarks load data files relative to __file__. Keep this targeted so the staging + # directory only grows for benchmarks that are known to need sibling resources. + benchmark = bm_exec_context().get("benchmark") + resources = self.STAGED_SIBLING_RESOURCES.get(benchmark, ()) + if not resources: + return + source_dir = Path(mx.dependency("GRAALPYTHON_POLYBENCH_BENCHMARKS").get_output()) / Path(benchmark).parent + destination_dir = self._staged_program(host).parent + for resource in resources: + source = source_dir / resource + if not source.is_file(): + mx.abort(f"Required staged benchmark resource does not exist: {source}") + shutil.copy2(source, destination_dir / resource) @staticmethod def _java_staging_vm_args(image_vm_args): From b8693436a8d9365096e5cb69d8dfcfb9b4adc6dc Mon Sep 17 00:00:00 2001 From: Francois Farquet Date: Tue, 26 May 2026 11:09:08 +0200 Subject: [PATCH 4/5] Use target import for GraalPy product profiles --- mx.graalpython/mx_graalpython.py | 162 +++++++++++++++++++++++++++++-- 1 file changed, 153 insertions(+), 9 deletions(-) diff --git a/mx.graalpython/mx_graalpython.py b/mx.graalpython/mx_graalpython.py index 3dad06811b..df791dbe7a 100644 --- a/mx.graalpython/mx_graalpython.py +++ b/mx.graalpython/mx_graalpython.py @@ -24,6 +24,7 @@ from __future__ import print_function +import ast import contextlib import datetime import glob @@ -292,6 +293,98 @@ def _is_overridden_native_image_arg(prefix): return any(arg.startswith(prefix) for arg in extras) +def _normalize_branch_name(branch): + if not branch: + return "" + branch = branch.strip() + for prefix in ("refs/heads/", "origin/"): + if branch.startswith(prefix): + return branch[len(prefix):] + return branch + + +def _graalpython_target_import_commit(target_branch): + """Return the GraalPy commit imported by the target branch's VM suite. + + Product PGO profiles are produced by the GraalPy post-merge profile job and + keyed by GraalPy commit. In linked cross-repo PR gates, CI can auto-bump the + VM suite's GraalPy import to the GraalPython PR merge commit, which normally + has no post-merge product profile. For feature branches, product-ee therefore + uses the GraalPy import from the target VM branch instead, bypassing that + CI-generated import bump. + + Prefer origin/. Some CI workspaces only have the PR merge commit; in + that case HEAD^1 is the target-side parent and still contains the target + branch import. Local workspaces can fall back to the checked-out VM suite. + The suite file is a literal dict, so parse it instead of executing suite.py. + """ + vm_suite = mx.suite("vm", fatalIfMissing=False) + if not vm_suite or not vm_suite.vc: + mx.warn("Cannot resolve target-branch GraalPy import: mx suite 'vm' is not available") + return None + + target_branch = _normalize_branch_name(target_branch) + suite_path = "vm/mx.vm/suite.py" + + # Some CI checkouts do not keep origin/. In PR merge checkouts, the + # first parent is the target branch side of the merge and therefore still + # gives us the target branch's imported GraalPy commit without fetching. + suite_text = None + refs = [f"origin/{target_branch}"] + parents = vm_suite.vc.git_command( + vm_suite.vc_dir, + ["rev-list", "--parents", "-n", "1", "HEAD"], + abortOnError=False, + ) + if parents and len(parents.split()) > 2: + refs.append("HEAD^1") + for ref in refs: + if ref: + suite_text = vm_suite.vc.git_command( + vm_suite.vc_dir, + ["show", f"{ref}:{suite_path}"], + abortOnError=False, + ) + if suite_text: + break + + if not suite_text: + suite_file = os.path.join(vm_suite.dir, f"mx.{vm_suite.name}", "suite.py") + try: + with open(suite_file, encoding="utf-8") as f: + suite_text = f.read() + except OSError: + mx.warn(f"Cannot read {suite_path} to resolve the GraalPy profile source") + return None + + try: + suite_node = next( + node.value + for node in ast.parse(suite_text, filename=suite_path).body + if isinstance(node, ast.Assign) + and any(isinstance(target, ast.Name) and target.id == "suite" for target in node.targets) + ) + suite = ast.literal_eval(suite_node) + imports = suite.get("imports", {}).get("suites", []) + matches = [] + for imported_suite in imports: + version = imported_suite.get("version") + if ( + imported_suite.get("name") == "graalpython" + and isinstance(version, str) + and re.match(r"^[0-9a-f]{40}$", version) + ): + matches.append(version) + except (StopIteration, SyntaxError, ValueError, TypeError) as e: + mx.warn(f"Cannot evaluate {suite_path} to resolve the GraalPy profile source: {e}") + return None + + if len(matches) == 1: + return matches[0] + mx.warn(f"Expected exactly one graalpython import in target vm suite, found {len(matches)}") + return None + + def github_ci_build_args(): # Determine memory and parallelism for GitHub CI builds # Use 90% of available memory up to 14GB, but at least 8GB @@ -354,9 +447,49 @@ def libpythonvm_build_args(): and not _is_overridden_native_image_arg("--pgo") ): vc = SUITE.vc - commit = str(vc.tip(SUITE.dir)).strip() - branch = str(vc.active_branch(SUITE.dir, abortOnError=False) or 'master').strip() - artifact_name = f"{GRAALPY_PGO_PROFILE_ARTIFACT_GROUP}/{GRAALPY_PGO_PROFILE_ARTIFACT_PREFIX}{commit}" + source_commit = str(vc.tip(SUITE.dir)).strip() + source_branch = _normalize_branch_name( + os.environ.get("FROM_BRANCH") or vc.active_branch(SUITE.dir, abortOnError=False) or 'master' + ) + target_branch = _normalize_branch_name(os.environ.get("TO_BRANCH")) + profile_source_commit = source_commit + profile_source_branch = source_branch + profile_source_reason = "current GraalPy commit" + override = os.environ.get("GRAALPY_PGO_PROFILE_SOURCE_COMMIT") + if override: + profile_source_commit = override.strip() + if not re.match(r"^[0-9a-f]{40}$", profile_source_commit): + mx.abort(f"GRAALPY_PGO_PROFILE_SOURCE_COMMIT must be a 40-character lowercase git commit, got: {override}") + profile_source_reason = "GRAALPY_PGO_PROFILE_SOURCE_COMMIT" + elif ( + target_branch + and source_branch + and source_branch != target_branch + and not (source_branch == MAIN_BRANCH or source_branch.startswith(("release/", "cpu/"))) + ): + # Feature branch merge commits can import a fresh GraalPy revision that + # has no released-product profile yet. Use the target branch's imported + # GraalPy commit so product-ee consumes the profile that already belongs + # to the baseline product launcher. + target_import_commit = _graalpython_target_import_commit(target_branch) + if target_import_commit: + profile_source_commit = target_import_commit + profile_source_branch = target_branch + profile_source_reason = f"target branch import from {target_branch}" + elif require_profile: + mx.abort("\n".join([ + "Could not resolve the GraalPy PGO profile source commit from the target branch import.", + f"GraalPy source commit: {source_commit}", + f"Source branch: {source_branch or ''}", + f"Target branch: {target_branch or ''}", + "Expected to read vm/mx.vm/suite.py from the target branch and find the graalpython import version.", + "Set GRAALPY_PGO_PROFILE_SOURCE_COMMIT=<40-character-commit> to override this lookup for debugging.", + ])) + else: + mx.warn("Falling back to the current GraalPy commit for PGO profile lookup because the target branch import could not be resolved") + artifact_name = f"{GRAALPY_PGO_PROFILE_ARTIFACT_GROUP}/{GRAALPY_PGO_PROFILE_ARTIFACT_PREFIX}{profile_source_commit}" + mx.log(f"GraalPy source commit for PGO profile lookup: {source_commit}") + mx.log(f"GraalPy PGO profile source commit: {profile_source_commit} ({profile_source_reason})") if script := os.environ.get("ARTIFACT_DOWNLOAD_SCRIPT"): # This is always available in the GraalPy CI @@ -374,7 +507,12 @@ def libpythonvm_build_args(): # Locally, we try to get a reasonable profile get_profile = mx.command_function('python-get-latest-profile', fatalIfMissing=False) if get_profile: - for b in [branch, "master"]: + seen_branches = set() + for b in [profile_source_branch, source_branch, MAIN_BRANCH]: + b = _normalize_branch_name(b) + if not b or b in seen_branches: + continue + seen_branches.add(b) if not profile: try: profile = get_profile(["--branch", b]) @@ -385,19 +523,25 @@ def libpythonvm_build_args(): if require_profile and profile_missing: mx.abort("\n".join([ "GRAALPY_REQUIRE_PGO_PROFILE is set, but no CI generated GraalPy PGO profile was found.", - f"GraalPy commit: {commit}", + f"GraalPy source commit: {source_commit}", + f"Source branch: {source_branch or ''}", + f"Target branch: {target_branch or ''}", + f"PGO profile source commit: {profile_source_commit} ({profile_source_reason})", f"Expected artifact: {artifact_name}", "The product profile configuration does not fall back to benchmark-local PGO.", - "Run the GraalPy CI job python-pgo-profile-post_merge-linux-amd64-jdk-latest for this commit, then retry the product-ee benchmark.", + "Run the GraalPy CI job python-pgo-profile-post_merge-linux-amd64-jdk-latest for the PGO profile source commit, then retry the product-ee benchmark.", ])) if CI and profile_missing: mx.log("No profile in CI job") # When running on a release branch or attempting to merge into - # a release branch, make sure we can use a PGO profile, and - # when running in the CI on a bench runner, ensure a PGO profile + # a release/CPU branch, make sure we can use a PGO profile, and + # when running in the CI on a bench runner, ensure a PGO profile. if ( - any(b.startswith("release/") for b in [branch, os.environ.get("TO_BRANCH", "")]) + any( + _normalize_branch_name(b).startswith(("release/", "cpu/")) + for b in [source_branch, target_branch] + ) or ("bench" in os.environ.get('BUILD_NAME', '')) ): mx.warn("PGO profile must exist for benchmarking and release, creating one now...") From 5f115a29f56ace118935e175118d0cbeb3922389 Mon Sep 17 00:00:00 2001 From: Francois Farquet Date: Tue, 26 May 2026 15:33:41 +0200 Subject: [PATCH 5/5] Fix PolyBench harness parsing on CPython (GR-75115) --- .../com.oracle.graal.python.benchmarks/python/harness.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/graalpython/com.oracle.graal.python.benchmarks/python/harness.py b/graalpython/com.oracle.graal.python.benchmarks/python/harness.py index bb6cfd75b8..b951cd548c 100644 --- a/graalpython/com.oracle.graal.python.benchmarks/python/harness.py +++ b/graalpython/com.oracle.graal.python.benchmarks/python/harness.py @@ -527,7 +527,7 @@ def run_benchmark(args): if GRAALPYTHON: print(f"### using bytecode DSL interpreter: {__graalpython__.is_bytecode_dsl_interpreter}") - print(f"### using forced uncached interpreter: {getattr(__graalpython__, "is_forced_uncached_interpreter", False)}") + print(f"### using forced uncached interpreter: {getattr(__graalpython__, 'is_forced_uncached_interpreter', False)}") BenchRunner(bench_file, bench_args=bench_args, iterations=iterations, warmup=warmup, warmup_runs=warmup_runs, startup=startup, live_results=live_results, self_measurement=self_measurement).run()