From f05e9f61e2aae7842a165374f1b175414809988b Mon Sep 17 00:00:00 2001 From: Barry Warsaw Date: Tue, 31 Mar 2026 17:13:06 -0700 Subject: [PATCH 01/12] Prototype of package.site.toml files --- Lib/site.py | 168 ++++++++++++++++++++- Lib/test/test_site.py | 337 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 500 insertions(+), 5 deletions(-) diff --git a/Lib/site.py b/Lib/site.py index 30015b3f26b4b3..dd5602188e3731 100644 --- a/Lib/site.py +++ b/Lib/site.py @@ -79,6 +79,10 @@ import stat import errno +lazy import importlib +lazy import tomllib +lazy import traceback + # Prefixes for site-packages; add additional prefixes like /usr/local here PREFIXES = [sys.prefix, sys.exec_prefix] # Enable per user site-packages directory @@ -163,6 +167,130 @@ def _init_pathinfo(): return d +class _SiteTOMLData: + """Parsed data from a single .site.toml file.""" + __slots__ = ('filename', 'sitedir', 'metadata', 'dirs', 'init') + + def __init__(self, filename, sitedir, metadata, dirs, init): + self.filename = filename # str: basename e.g. "foo.site.toml" + self.sitedir = sitedir # str: absolute path to site-packages dir + self.metadata = metadata # dict: raw [metadata] table (may be empty) + self.dirs = dirs # list[str]: validated [paths].dirs (may be empty) + self.init = init # list[str]: validated [entrypoints].init (may be empty) + + +def _read_site_toml(sitedir, name): + """Parse a .site.toml file and return a _SiteTOMLData, or None on error.""" + fullname = os.path.join(sitedir, name) + + # Check that name.site.toml file exists and is not hidden. + try: + st = os.lstat(fullname) + except OSError: + return None + if ((getattr(st, 'st_flags', 0) & stat.UF_HIDDEN) or + (getattr(st, 'st_file_attributes', 0) & stat.FILE_ATTRIBUTE_HIDDEN)): + _trace(f"Skipping hidden .site.toml file: {fullname!r}") + return None + + _trace(f"Processing .site.toml file: {fullname!r}") + + try: + with io.open_code(fullname) as f: + raw = f.read() + except OSError: + return None + + try: + data = tomllib.loads(raw.decode("utf-8")) + except Exception as exc: + _trace(f"Error parsing {fullname!r}: {exc}") + return None + + metadata = data.get("metadata", []) + + # Validate [paths].dirs + dirs = [] + if (paths_table := data.get("paths")) is not None: + if (raw_dirs := paths_table.get("dirs")) is not None: + if (isinstance(raw_dirs, list) and + all(isinstance(d, str) for d in raw_dirs)): + dirs = raw_dirs + else: + _trace(f"Invalid 'dirs' in {fullname!r}: " + f"expected list of strings") + + # Validate [entrypoints].init + init = [] + if (ep_table := data.get("entrypoints")) is not None: + if (raw_init := ep_table.get("init")) is not None: + if (isinstance(raw_init, list) and + all(isinstance(e, str) for e in raw_init)): + init = raw_init + else: + _trace(f"Invalid 'init' in {fullname!r}: " + f"expected list of strings") + + return _SiteTOMLData(name, sitedir, metadata, dirs, init) + + +def _process_site_toml_paths(toml_data_list, known_paths): + """Process [paths] from all parsed .site.toml data.""" + for td in toml_data_list: + for dir_entry in td.dirs: + try: + # The {sitedir} placeholder expands to the site directory where the pkg.site.toml + # file was found. When placed at the beginning of the path, this is the explicit + # way to name directories relative to sitedir. + dir_entry = dir_entry.replace("{sitedir}", td.sitedir) + # For backward compatibility with .pth files, relative directories are implicitly + # anchored to sitedir. + if not os.path.isabs(dir_entry): + dir_entry = os.path.join(td.sitedir, dir_entry) + dir, dircase = makepath(dir_entry) + if dircase not in known_paths and os.path.exists(dir): + sys.path.append(dir) + known_paths.add(dircase) + except Exception as exc: + fullname = os.path.join(td.sitedir, td.filename) + print(f"Error processing path {dir_entry!r} " + f"from {fullname}:", + file=sys.stderr) + for record in traceback.format_exception(exc): + for line in record.splitlines(): + print(' ' + line, file=sys.stderr) + + +def _process_site_toml_entrypoints(toml_data_list): + """Execute [entrypoints] from all parsed .site.toml data.""" + for td in toml_data_list: + for entry in td.init: + try: + # Parse "package.module:callable" format. When the optional :callable is not given, + # the entire string will end up in the last item, so swap things around. + modname, colon, funcname = entry.rpartition(':') + if colon != ':': + modname = funcname + funcname = None + + _trace(f"Executing entrypoint: {entry!r} " + f"from {td.filename!r}") + + mod = importlib.import_module(modname) + + # Call the callable if given. + if funcname is not None: + func = getattr(mod, funcname) + func() + except Exception as exc: + fullname = os.path.join(td.sitedir, td.filename) + print(f"Error in entrypoint {entry!r} from {fullname}:", + file=sys.stderr) + for record in traceback.format_exception(exc): + for line in record.splitlines(): + print(' ' + line, file=sys.stderr) + + def addpackage(sitedir, name, known_paths): """Process a .pth file within the site-packages directory: For each line in the file, either combine it with sitedir to a path @@ -230,8 +358,8 @@ def addpackage(sitedir, name, known_paths): def addsitedir(sitedir, known_paths=None): - """Add 'sitedir' argument to sys.path if missing and handle .pth files in - 'sitedir'""" + """Add 'sitedir' argument to sys.path if missing and handle .site.toml + and .pth files in 'sitedir'""" _trace(f"Adding directory: {sitedir!r}") if known_paths is None: known_paths = _init_pathinfo() @@ -246,10 +374,40 @@ def addsitedir(sitedir, known_paths=None): names = os.listdir(sitedir) except OSError: return - names = [name for name in names - if name.endswith(".pth") and not name.startswith(".")] - for name in sorted(names): + + # Phase 1: Discover and parse .site.toml files, sorted alphabetically. + toml_names = sorted( + name for name in names + if name.endswith(".site.toml") and not name.startswith(".") + ) + + toml_data_list = [] + superseded_pth = set() + + for name in toml_names: + # "foo.site.toml" supersedes "foo.pth" + base = name.removesuffix(".site.toml") + pth_name = base + ".pth" + if pth_name in names: + superseded_pth.add(pth_name) + td = _read_site_toml(sitedir, name) + if td is not None: + toml_data_list.append(td) + + # Phase 2: Process all .site.toml data (paths first, then entrypoints) + if toml_data_list: + _process_site_toml_paths(toml_data_list, known_paths) + _process_site_toml_entrypoints(toml_data_list) + + # Phase 3: Process remaining .pth files + pth_names = sorted( + name for name in names + if name.endswith(".pth") and not name.startswith(".") + and name not in superseded_pth + ) + for name in pth_names: addpackage(sitedir, name, known_paths) + if reset: known_paths = None return known_paths diff --git a/Lib/test/test_site.py b/Lib/test/test_site.py index e7dc5e2611c2de..3ecab6f6f30174 100644 --- a/Lib/test/test_site.py +++ b/Lib/test/test_site.py @@ -908,5 +908,342 @@ def test_both_args(self): self.assertEqual(output, excepted_output) +class SiteTomlTests(unittest.TestCase): + """Tests for .site.toml file processing.""" + + def setUp(self): + self.sys_path = sys.path[:] + self.tmpdir = self.sitedir = tempfile.mkdtemp() + self.addCleanup(shutil.rmtree, self.tmpdir) + + def tearDown(self): + sys.path[:] = self.sys_path + + def _make_site_toml(self, content, name='testpkg'): + """Write a .site.toml and return its name.""" + basename = name + '.site.toml' + filepath = os.path.join(self.tmpdir, basename) + with open(filepath, 'w', encoding='utf-8') as f: + f.write(content) + return basename + + def _make_pth(self, content, name='testpkg'): + """Write a .pth file and return its name.""" + basename = name + '.pth' + filepath = os.path.join(self.tmpdir, basename) + with open(filepath, 'w', encoding='utf-8') as f: + f.write(content) + return basename + + # --- _read_site_toml tests --- + + def test_read_site_toml_basic(self): + # Valid .site.toml with all sections. + subdir = os.path.join(self.tmpdir, 'subdir') + os.mkdir(subdir) + name = self._make_site_toml("""\ +[metadata] +schema_version = 1 +package = "testpkg" + +[paths] +dirs = ["subdir"] + +[entrypoints] +init = ["os"] +""") + tomldata = site._read_site_toml(self.sitedir, name) + self.assertIsNotNone(tomldata) + self.assertEqual(tomldata.filename, name) + self.assertEqual(tomldata.sitedir, self.sitedir) + self.assertEqual(tomldata.metadata, { + 'schema_version': 1, 'package': 'testpkg'}) + self.assertEqual(tomldata.dirs, ['subdir']) + self.assertEqual(tomldata.init, ['os']) + + def test_read_site_toml_parse_error(self): + # Invalid pkg.site.toml content is skipped. + name = self._make_site_toml("not valid [[[toml") + tomldata = site._read_site_toml(self.sitedir, name) + self.assertIsNone(tomldata) + + def test_read_site_toml_invalid_dirs_type(self): + # dirs must be a list of strings. + name = self._make_site_toml("""\ +[paths] +dirs = "not_a_list" +""") + tomldata = site._read_site_toml(self.sitedir, name) + self.assertEqual(tomldata.dirs, []) + + def test_read_site_toml_invalid_init_type(self): + # init must be a list of strings + name = self._make_site_toml("""\ +[paths] +dirs = ["subdir"] + +[entrypoints] +init = 42 +""") + subdir = os.path.join(self.tmpdir, 'subdir') + os.mkdir(subdir) + tomldata = site._read_site_toml(self.sitedir, name) + self.assertIsNotNone(tomldata) + self.assertEqual(tomldata.dirs, ['subdir']) + self.assertEqual(tomldata.init, []) + + def test_read_site_toml_empty_file(self): + # Empty .site.toml is a no-op. + name = self._make_site_toml("") + tomldata = site._read_site_toml(self.sitedir, name) + self.assertEqual(tomldata.metadata, []) + self.assertEqual(tomldata.dirs, []) + self.assertEqual(tomldata.init, []) + + def test_read_site_toml_unknown_tables_ignored(self): + # Unknown tables should not cause errors. + name = self._make_site_toml("""\ +[metadata] +schema_version = 1 + +[unknown_section] +key = "value" + +[entrypoints] +init = ["os"] +""") + tomldata = site._read_site_toml(self.sitedir, name) + self.assertIsNotNone(tomldata) + self.assertEqual(tomldata.metadata, {'schema_version': 1}) + self.assertEqual(tomldata.init, ['os']) + + def test_read_site_toml_nonexistent(self): + # Nonexistent file returns None. + tomldata = site._read_site_toml(self.tmpdir, 'nonexistent.site.toml') + self.assertIsNone(tomldata) + + # --- Path processing tests --- + + def test_process_paths_relative(self): + # Relative paths are joined with sitedir. + subdir = os.path.join(self.sitedir, 'mylib') + os.mkdir(subdir) + name = self._make_site_toml("""\ +[paths] +dirs = ["mylib"] +""") + known_paths = set() + tomldata = site._read_site_toml(self.sitedir, name) + site._process_site_toml_paths([tomldata], known_paths) + self.assertIn(subdir, sys.path) + + def test_process_paths_absolute(self): + # Absolute paths are preserved as-is. + absdir = os.path.join(self.sitedir, 'abslib') + os.mkdir(absdir) + name = self._make_site_toml( + f'[paths]\ndirs = ["{absdir}"]\n') + known_paths = set() + tomldata = site._read_site_toml(self.sitedir, name) + site._process_site_toml_paths([tomldata], known_paths) + self.assertIn(absdir, sys.path) + + def test_process_paths_sitedir_placeholder(self): + # The {sitedir} placeholder expands to the site-packages dir. + subdir = os.path.join(self.sitedir, 'extra') + os.mkdir(subdir) + name = self._make_site_toml("""\ +[paths] +dirs = ["{sitedir}/extra"] +""") + known_paths = set() + tomldata = site._read_site_toml(self.sitedir, name) + site._process_site_toml_paths([tomldata], known_paths) + self.assertIn(os.path.join(self.tmpdir, 'extra'), sys.path) + + def test_process_paths_deduplication(self): + # Same path from two different files are only added once. + subdir = os.path.join(self.tmpdir, 'shared') + os.mkdir(subdir) + tomldata1 = site._SiteTOMLData( + 'a.site.toml', self.tmpdir, [], ['shared'], []) + tomldata2 = site._SiteTOMLData( + 'b.site.toml', self.tmpdir, [], ['shared'], []) + known_paths = set() + site._process_site_toml_paths([tomldata1, tomldata2], known_paths) + self.assertEqual(sys.path.count(subdir), 1) + + def test_process_paths_nonexistent(self): + # Nonexistent directories are not added. + tomldata = site._SiteTOMLData( + 'test.site.toml', self.tmpdir, [], ['nosuchdir'], []) + known_paths = set() + sys_path = sys.path[:] + site._process_site_toml_paths([tomldata], known_paths) + self.assertEqual(sys.path, sys_path) + + # --- Entrypoint tests --- + + def test_process_entrypoints_import_only(self): + # Import-only entrypoint (no callable). + mod_dir = os.path.join(self.sitedir, 'epmod') + os.mkdir(mod_dir) + init_file = os.path.join(mod_dir, '__init__.py') + with open(init_file, 'w') as f: + f.write("""\ +called = False +def startup(): + global called + called = True +""") + sys.path.insert(0, self.sitedir) + self.addCleanup(sys.modules.pop, 'epmod', None) + tomldata = site._SiteTOMLData( + 'test.site.toml', self.sitedir, [], [self.sitedir], ['epmod']) + site._process_site_toml_entrypoints([tomldata]) + import epmod + self.assertFalse(epmod.called) + + def test_process_entrypoints_with_callable(self): + # Entrypoint with callable is invoked. + # + # Create a module with a function that sets a flag. + mod_dir = os.path.join(self.sitedir, 'epmod') + os.mkdir(mod_dir) + init_file = os.path.join(mod_dir, '__init__.py') + with open(init_file, 'w') as f: + f.write("""\ +called = False +def startup(): + global called + called = True +""") + sys.path.insert(0, self.sitedir) + self.addCleanup(sys.modules.pop, 'epmod', None) + tomldata = site._SiteTOMLData( + 'test.site.toml', self.sitedir, [], [self.sitedir], ['epmod:startup']) + site._process_site_toml_entrypoints([tomldata]) + import epmod + self.assertTrue(epmod.called) + + def test_process_entrypoints_import_error(self): + # Import error prints traceback but continues. + tomldata = site._SiteTOMLData( + 'test.site.toml', self.sitedir, [], self.sitedir, + ['nosuchmodule_xyz', 'os']) + with captured_stderr() as err: + site._process_site_toml_entrypoints([tomldata]) + self.assertIn('nosuchmodule_xyz', err.getvalue()) + self.assertIn('Traceback', err.getvalue()) + # 'os' should still have been processed (no exception for it) + + def test_process_entrypoints_callable_error(self): + # Callable that raises prints traceback but continues. + mod_dir = os.path.join(self.sitedir, 'badmod') + os.mkdir(mod_dir) + init_file = os.path.join(mod_dir, '__init__.py') + with open(init_file, 'w') as f: + f.write(""" +def fail(): + raise RuntimeError("boom") +""") + sys.path.insert(0, self.sitedir) + self.addCleanup(sys.modules.pop, 'badmod') + tomldata = site._SiteTOMLData( + 'test.site.toml', self.tmpdir, None, None, + ['badmod:fail', 'os']) + with captured_stderr() as err: + site._process_site_toml_entrypoints([tomldata]) + self.assertIn('RuntimeError', err.getvalue()) + self.assertIn('boom', err.getvalue()) + + # --- addsitedir integration tests --- + + def test_addsitedir_toml_supersedes_pth(self): + # When both foo.site.toml and foo.pth exist, only .toml is used. + # + # Start by creating two directories which will be the paths that both the foo.site.toml and + # foo.site.pth files will try to add respectively. + toml_dir = os.path.join(self.sitedir, 'tomlpath') + pth_dir = os.path.join(self.sitedir, 'pthpath') + os.mkdir(toml_dir) + os.mkdir(pth_dir) + + self._make_site_toml("""\ +[paths] +dirs = ["tomlpath"] +""", name='foo') + self._make_pth("pthpath\n", name='foo') + + site.addsitedir(self.sitedir, set()) + self.assertIn(toml_dir, sys.path) + self.assertNotIn(pth_dir, sys.path) + + def test_addsitedir_toml_and_pth_coexist(self): + # Different basenames: both .toml and .pth are processed. + toml_dir = os.path.join(self.sitedir, 'tomlpath') + pth_dir = os.path.join(self.sitedir, 'pthpath') + os.mkdir(toml_dir) + os.mkdir(pth_dir) + + self._make_site_toml("""\ +[paths] +dirs = ["tomlpath"] +""", name='foo') + self._make_pth("pthpath\n", name='bar') + + site.addsitedir(self.sitedir, set()) + self.assertIn(toml_dir, sys.path) + self.assertIn(pth_dir, sys.path) + + def test_addsitedir_paths_before_entrypoints(self): + # Paths from .site.toml are added before entrypoints execution. + # + # Create a module in a subdir that will only be importable if the path + # is added first. + mod_dir = os.path.join(self.sitedir, 'initlib') + os.mkdir(mod_dir) + mod_file = os.path.join(mod_dir, 'initmod.py') + with open(mod_file, 'w') as f: + f.write('loaded = True\n') + + self._make_site_toml("""\ +[paths] +dirs = ["initlib"] + +[entrypoints] +init = ["initmod"] +""") + + self.addCleanup(sys.modules.pop, 'initmod') + site.addsitedir(self.sitedir, set()) + import initmod + self.assertTrue(initmod.loaded) + + def test_addsitedir_alphabetical_order(self): + # Multiple .site.toml files are processed alphabetically. + dir_a = os.path.join(self.tmpdir, 'aaa') + dir_b = os.path.join(self.tmpdir, 'bbb') + os.mkdir(dir_a) + os.mkdir(dir_b) + + # Create zzz.site.toml first, then aaa.site.toml + self._make_site_toml("""\ +[paths] +dirs = ['bbb'] +""", name='zzz') + self._make_site_toml("""\ +[paths] +dirs = ['aaa'] +""", name='aaa') + + site.addsitedir(self.sitedir, set()) + # Both should be in sys.path; aaa before bbb since aaa.site.toml is + # processed before zzz.site.toml + idx_a = sys.path.index(dir_a) + idx_b = sys.path.index(dir_b) + self.assertLess(idx_a, idx_b) + + if __name__ == "__main__": unittest.main() From 594f347f43dc4421ed1c04ded99b1bb0441f7200 Mon Sep 17 00:00:00 2001 From: Barry Warsaw Date: Tue, 31 Mar 2026 17:29:12 -0700 Subject: [PATCH 02/12] Validate [metadata].schema_version It's okay to be missing, but if it's given it must be an expected version (i.e. currently, 1). --- Lib/site.py | 9 ++++++++- Lib/test/test_site.py | 23 ++++++++++++++++++++++- 2 files changed, 30 insertions(+), 2 deletions(-) diff --git a/Lib/site.py b/Lib/site.py index dd5602188e3731..326f04aecbc79b 100644 --- a/Lib/site.py +++ b/Lib/site.py @@ -207,7 +207,14 @@ def _read_site_toml(sitedir, name): _trace(f"Error parsing {fullname!r}: {exc}") return None - metadata = data.get("metadata", []) + metadata = data.get("metadata", {}) + # Validate the TOML schema version. PEP XXX defines schema_version == 1. Both the [metadata] + # section and [metadata].schema_version are optional, but if missing, future compatibility + # cannot be guaranteed. + if (schema_version := metadata.get("schema_version")) is not None: + if schema_version != 1: + _trace(f"Unsupported [metadata].schema_version: {schema_version}") + return None # Validate [paths].dirs dirs = [] diff --git a/Lib/test/test_site.py b/Lib/test/test_site.py index 3ecab6f6f30174..8c7c095113830f 100644 --- a/Lib/test/test_site.py +++ b/Lib/test/test_site.py @@ -961,6 +961,27 @@ def test_read_site_toml_basic(self): self.assertEqual(tomldata.dirs, ['subdir']) self.assertEqual(tomldata.init, ['os']) + def test_missing_schema_version_is_okay(self): + # It's okay for the schema_version to be missing, or even the [metadata] section entirely + # (which is tested below). A missing schema_version just means that no future compatibility + # can be guaranteed. + name = self._make_site_toml("""\ +[metadata] +""") + tomldata = site._read_site_toml(self.sitedir, name) + self.assertIsNotNone(tomldata) + self.assertEqual(tomldata.metadata, {}) + + def test_unexpected_schema_version_is_not_okay(self): + # If [metadata].schema_version exists, but isn't a supported number, then the entire TOML + # file is invalid and ignored. + name = self._make_site_toml("""\ +[metadata] +schema_version = 801 +""") + tomldata = site._read_site_toml(self.sitedir, name) + self.assertIsNone(tomldata) + def test_read_site_toml_parse_error(self): # Invalid pkg.site.toml content is skipped. name = self._make_site_toml("not valid [[[toml") @@ -996,7 +1017,7 @@ def test_read_site_toml_empty_file(self): # Empty .site.toml is a no-op. name = self._make_site_toml("") tomldata = site._read_site_toml(self.sitedir, name) - self.assertEqual(tomldata.metadata, []) + self.assertEqual(tomldata.metadata, {}) self.assertEqual(tomldata.dirs, []) self.assertEqual(tomldata.init, []) From ca6e4edca34bdd708b253d82d3aba1aeb1713079 Mon Sep 17 00:00:00 2001 From: Barry Warsaw Date: Tue, 31 Mar 2026 18:57:01 -0700 Subject: [PATCH 03/12] Added PEP 829 draft --- pep-0829.rst | 546 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 546 insertions(+) create mode 100644 pep-0829.rst diff --git a/pep-0829.rst b/pep-0829.rst new file mode 100644 index 00000000000000..5d14ae971c1a5d --- /dev/null +++ b/pep-0829.rst @@ -0,0 +1,546 @@ +PEP: 829 +Title: Structured Startup Configuration via .site.toml Files +Author: Barry Warsaw +Status: Draft +Type: Standards Track +Topic: Packaging +Created: 31-Mar-2026 +Python-Version: 3.15 +Post-History: + + +Abstract +======== + +This PEP proposes a TOML-based configuration file format to replace +the ``.pth`` file mechanism used by ``site.py`` during interpreter +startup. The new format, using files named ``.site.toml``, +provides structured configuration for extending ``sys.path`` and +executing package initialization code, replacing the current ad-hoc +``.pth`` format that conflates path configuration with arbitrary code +execution. + + +Motivation +========== + +Python's ``.pth`` files (processed by ``Lib/site.py`` at startup) +support two functions: + +#. **Extending** ``sys.path`` -- Lines in this file (excluding + comments and lines that start with ``import``) name directories to + be appended to ``sys.path``. Relative paths are implicitly + anchored at the site-packages directory. + +#. **Executing code** -- lines starting with ``import`` (or + ``import\\t``) are executed immediate by passing the source string + to ``exec()``. + +This design has several problems: + +#. Code execution is a side effect of the implementation. Lines that + start with ``import`` can be extended by separating multiple + statements with a semicolon. As long as all the code to be + executed appears on the same line, it all gets executed when the + ``.pth`` file is processed. + +#. ``.pth`` files are essentially unstructured, leading to contents + which are difficult to reason about or verify, and often even + difficult to read. It mixes two potentially useful features with + different security constraints, and no way to separate out these + concerns. + +#. The lack of ``.pth`` file structure also means there's no way to + express metadata, no future-proofing of the format, and no defined + execution or processing order of the contents. + +#. Using ``exec()`` on the file contents during interpreter startup is + a broad attack surface. + +#. There is no explicit concept of an entry point, which is an + established pattern in Python packaging. Packages that require + code execution and initialization at startup abuse ``import`` lines + rather than explicitly declaring entry points. + + +Specification +============= + +This PEP defines a new file format called ``.site.toml`` +which addresses all of the stated problems with ``.pth`` files. Like +``.pth`` files, ``.site.toml`` files are processed at Python +startup time by the ``site.py`` module, which means that the ``-S`` +option, which disables ``site.py`` also disables +``.site.toml`` files. + +The standard library ``tomllib`` package is used to read and process +``.site.toml`` files. + +Any parsing errors cause the entire ``.site.toml`` file to be +ignored and not processed (but it still supersedes any parallel +``.pth`` file). Any errors that occur when importing entry +point modules or calling entry point functions are reported but do no +abort the Python executable. + + +File Naming and Discovery +------------------------- + +* As with ``.pth`` files, packages may optionally install a single + ``.site.toml``, just like the current ``.pth`` file + convention. + +* The naming convention is ``.site.toml``. The ``.site`` + marker distinguishes these from other TOML files that might exist in + site-packages and describes the file's purpose (processed by + ``site.py``). + +* ``.site.toml`` files live in the same site-packages directories + where ``.pth`` files are found today. + +* The discovery rules for ``.site.toml`` files is the same as + ``.pth`` files today. File names that start with a single ``.`` + (e.g. ``.site.toml``) and files with OS-level hidden attributes (``UF_HIDDEN``, + ``FILE_ATTRIBUTE_HIDDEN``) are excluded. + +* The processing order is alphabetical by filename, matching ``.pth`` + behavior. + +* If both ``.site.toml`` and ``.pth`` exist in the same + directory, only the ``.site.toml`` file is processed. + + +Processing Model +---------------- + +All ``.site.toml`` files in a given site-packages directory +are read and parsed into an intermediate data structure before any +processing (i.e. path extension or entry point execution) occurs. +This two-phase approach (read then process) enables: + +* A future **policy mechanism** that can inspect and modify the collected data + before execution (e.g., disabling entry points for specific packages or + enforcing path restrictions). **NOTE**: Such a policy framework is + explicitly out-of-scope for this PEP. + +* Future finer-grained control over the processing of path extensions + and entry point execution. For example, one could imagine special + ``-X`` options, environment variables, or other types of + configuration that allow path extensions only, or can explicitly + manage allow or deny lists of entry points. **NOTE**: Such + configuration options are explicitly out-of-scope for this PEP. + +* Better error reporting. All parsing, format, and data type errors + can be surfaced before any processing occurs. + +Within each site-packages directory, the processing order is: + +#. Discover and parse all ``.site.toml`` files (alphabetically). +#. Process all ``[paths]`` entries from the parsed data. +#. Execute all ``[entrypoints]`` entries from the parsed data. +#. Process any remaining ``.pth`` files that are not superseded by a + ``.site.toml`` file. + +This ensures that path extensions are in place before any entry point code +runs, and that ``.site.toml``-declared paths are available to both +entry point imports and ``.pth`` import lines. + + +.site.toml file schema +------------------------------- + +A ``.site.toml`` file is defined to have three sections, all of which +are optional: + +.. _code-block: toml + + [metadata] + schema_version = 1 + package = "foo" + version = "2.3.4" + author = "A Person " + + [paths] + dirs = ["../lib", "/opt/mylib", "{sitedir}/extra"] + + [entrypoints] + init = ["foo.startup:initialize", "foo.plugins"] + + +``[metadata]`` +'''''''''''''' + +This section contains package and/or file metadata. There are no required +keys, and no semantics are assigned to any keys in this section *except* for +the optional ``schema_version`` key (see below). Any additional keys are +permitted and preserved. + +Defined keys: + +``schema_version`` (integer, recommended) + The TOML file schema version number. Must be the integer ``1`` for this + specification. If present, Python guarantees forward-compatible handling: + future versions will either process the file according to the declared + schema or skip it with a clear diagnostic. It is an error if the + ``schema_version`` is present but has an unsupported value, the entire + file is skipped. If ``schema_version`` is omitted, the file is processed + on a best-effort basis with no forward-compatibility guarantees. + +Recommended keys: + +``package`` (string) + The package name. + +``version`` (string) + The package version. + +``author`` (string) + The package author. Should be + ``email.utils.parseaddr()``-compatible, e.g., + ``"A person "`` or + ``"aperson@example.com"``. + + +``[paths]`` +''''''''''' + +Defined keys: + +``dirs`` + A list of strings specifying directories to append to ``sys.path``. + +Path entries use a hybrid resolution scheme: + +* **Relative paths** are anchored at the site-packages directory (sitedir), + matching current ``.pth`` behavior. For example, ``../lib`` in a file under + ``/usr/lib/python3.15/site-packages/`` resolves to + ``/usr/lib/python3.15/lib``. + +* **Absolute paths** are preserved as-is. For example, ``/opt/mylib`` is used + exactly as written. + +* **Placeholder variables** are supported using ``{name}`` syntax. The + placeholder ``{sitedir}`` expands to the site-packages directory where the + ``.site.toml`` file was found. Thus ``{sitedir}/relpath`` and + ``relpath`` resolve to the same path and this is the explicit form + of the relative path form. + +While only ``{sitedir}`` is defined in this PEP, additional +placeholder variables (e.g., ``{prefix}``, ``{exec_prefix}``, +``{userbase}``) may be defined in future PEPs. + +If ``dirs`` is not a list of strings, a warning is emitted (visible +with ``-v``) and the section is skipped. + +Directories that do not exist on the filesystem are silently skipped, +matching ``.pth`` behavior. Duplicate paths are +de-duplicated, also matching ``.pth`` behavior. + + +``[entrypoints]`` +''''''''''''''''' + +``init`` -- a list of strings specifying entry point references to +execute at startup. Each item uses the standard Python entry point +syntax: ``package.module:callable``. + +* The ``:callable`` portion is optional. If omitted (e.g., + ``package.module``), the module is imported via + ``importlib.import_module()`` but nothing is called. This covers the common + ``.pth`` pattern of ``import foo`` for side effects. + +* Callables are invoked with no arguments. + +* Entries are executed in the listed order. + +* The ``[extras]`` syntax from the packaging entry point spec is not + supported; it is installer metadata and has no meaning at + interpreter startup. + + +General Schema Rules +'''''''''''''''''''' + +* All three sections are optional. An empty ``.site.toml`` + file is a valid no-op. + +* Unknown tables are silently ignored, providing forward compatibility for + future extensions. + +* ``[paths]`` is always processed before ``[entrypoints]``, regardless of the + order the sections appear in the TOML file. + + +Error Handling +-------------- + +Errors are handled differently depending on the phase: + +Phase 1: Reading and Parsing + If a ``.site.toml`` file cannot be opened, decoded, or parsed as + valid TOML, it is skipped and processing continues to the next file. + Errors are reported only when ``-v`` (verbose) is given. + +Phase 2: Execution + If a path entry or entry point raises an exception during processing, the + traceback is printed to ``sys.stderr``, the failing entry is skipped, and + processing continues with the remaining entries in that file and + subsequent files. + +This is a deliberate improvement over ``.pth`` behavior, which aborts +processing the remainder of a file on the first error. + + +Rationale +========= + +TOML as the configuration format + TOML is already used by ``pyproject.toml`` and is familiar to the Python + packaging ecosystem. It is an easily human readable and writable format + that aids in validation and auditing. TOML files are structured and + typed, and can be easily reasoned about. TOML files allows for easy + future extensibility. The ``tomllib`` module is available in the standard + library since Python 3.11. + +The ``.site.toml`` naming convention + A double extension clearly communicates purpose: the ``.site`` marker + indicates this is a site-startup configuration file, while ``.toml`` + indicates the format. This avoids ambiguity with other TOML files that + might exist in site-packages now or in the future. The package name + prefix preserves the current ``.pth`` convention of a single + startup file per package. + +Hybrid path resolution + Implicit relative path joining (matching ``.pth`` behavior) + provides a smooth migration path, while ``{sitedir}`` and future + placeholder variables offer explicit, extensible alternatives. As with + ``.pth`` files, absolute paths are preserved and used verbatim. + +``importlib.import_module()`` instead of ``exec()`` + Using the standard import machinery is more predictable and auditable than + ``exec()``. It integrates with the import system's hooks and logging, and + the ``package.module:callable`` syntax is already well-established in the + Python packaging ecosystem (e.g., ``console_scripts``). Allowing for + optional ``:callable`` syntax preserves the import-side-effect + functionality of ``.pth`` files, making migration easier. + +Two-phase processing + Reading all configuration before executing any of it provides a natural + extension point for future policy mechanisms and makes error reporting + more predictable. + +Alphabetical ordering with no priority mechanism + Packages are installed independently, and there is no external arbiter of + priority. Alphabetical ordering matches ``.pth`` behavior and is + simple to reason about. Priority could be addressed by a future site-wide + policy configuration. + +``schema_version`` as recommended, not required + Requiring ``schema_version`` would make the simplest valid file more + verbose. Making it recommended strikes a balance: files that include it + get forward-compatibility guarantees, while simple files that omit it + still work on a best-effort basis. + +Continue on error rather than abort + The ``.pth`` behavior of aborting the rest of a file on the first + error is unnecessarily harsh. If a package declares three entry points + and one fails, the other two should still run. + + +Backwards Compatibility +======================= + +* ``.pth`` file processing is **not** removed. Both + ``.pth`` and ``.site.toml`` files are discovered + in parallel within each site-packages directory. This preserves + backward compatibility for all existing (pre-migration) packages. + Deprecation of ``.pth`` files is out-of-scope for this PEP. + +* When ``.site.toml`` exists alongside ``.pth``, the + ``.site.toml`` takes precedence and the ``.pth`` file is + skipped, providing for a natural migration path and easy compatibility with + older versions of Python which are unaware of ``.site.toml`` files. + +* Within a site-packages directory, all ``.site.toml`` files + are fully processed (paths and entry points) before any remaining + ``.pth`` files. + +* The ``site.addsitedir()`` public API retains its existing signature + and continues to accept ``known_paths``. + + +Security Implications +===================== + +This PEP improves the security posture of interpreter startup: + +* ``.site.toml`` files replace ``exec()`` with + ``importlib.import_module()`` and explicit ``getattr()`` calls, + which are more constrained and auditable. + +* ``io.open_code()`` is used to read ``.site.toml`` files, ensuring + that audit hooks (:pep:`578`) can monitor file access. + +* The two-phase processing model creates a natural point where a future policy + mechanism could inspect and restrict what gets executed. + +* The ``package.module:callable`` syntax limits execution to + importable modules and their attributes, unlike ``exec()`` which can + run arbitrary code. + +The overall attack surface is not eliminated -- a malicious +``.site.toml`` file can still cause arbitrary code execution via +``init`` entrypoints, but the mechanism proposed in this PEP is more +structured, auditable, and amenable to policy controls. + + +How to Teach This +================= + +For package authors +------------------- + +If your package currently ships a ``.pth`` file, you can migrate to a +``.site.toml`` file. The equivalent of a ``.pth`` file +containing a directory name is: + +.. _code-block: toml + + [paths] + dirs = ["my_directory"] + +The equivalent of a ``.pth`` file containing ``import my_package`` +is: + +.. _code-block: toml + + [entrypoints] + init = ["my_package"] + +If your ``.pth`` file calls a specific function, use the +``module:callable`` syntax: + +.. _code-block: toml + + [entrypoints] + init = ["my_package.startup:initialize"] + +If your ``.pth`` file includes arbitrary code, put that code in a +start up function and use the ``module:callable`` syntax. + +Both ``.pth`` and ``.site.toml`` can coexist during +migration. If both exist for the same package, only the ``.site.toml`` is +processed. Thus, it is recommended that packages compatible with older +Pythons ship both files. + +For tool authors +---------------- + + Build backends and installers should generate ``.site.toml`` files + alongside or instead of ``.pth`` files, depending on the package's + Python support matrix. The TOML format is easy to generate programmatically + using ``tomllib`` (for reading) or string formatting (for writing, since the + schema is simple). + + +Reference Implementation +========================= + +A reference implementation is provided as modifications to ``Lib/site.py``, +adding the following: + +* ``_SiteTOMLData`` -- a ``__slots__`` class holding parsed data from + a single ``.site.toml`` file (metadata, dirs, init). + +* ``_read_site_toml(sitedir, name)`` -- reads and parses a single + ``.site.toml`` file, validates types, and returns a + ``_SiteTOMLData`` instance or ``None`` on error. + +* ``_process_site_toml_paths(toml_data_list, known_paths)`` -- + processes ``[paths].dirs`` from all parsed files, expanding + placeholders and adding directories to ``sys.path`` as appropriate. + +* ``_process_site_toml_entrypoints(toml_data_list)`` -- executes + ``[entrypoints].init`` from all parsed files. + +* Modified ``addsitedir()`` -- orchestrates the three-phase flow: + discover and parse ``.site.toml`` files, process paths and + entry points, then process remaining ``.pth`` files. + +Tests are provided in ``Lib/test/test_site.py`` in the +``SiteTomlTests`` class. + + +Rejected Ideas +============== + +Single configuration file instead of per-package files + A single site-wide configuration file was considered but rejected + because it would require coordination between independently + installed packages and would not mirror the ``.pth`` + convention that tools already understand. + +JSON instead of TOML + JSON lacks comments and is less human-friendly. TOML is already + the standard configuration format in the Python ecosystem via + ``pyproject.toml``. + +YAML instead of TOML + YAML is not in the standard library and has well-documented + parsing pitfalls. + +Python instead of TOML + Python is imperative, TOML is declarative. Thus TOML files are + much more readily validated and reasoned about. + +``$schema`` URL reference + Unlike JSON, TOML has no standard ``$schema`` convention. A + simple integer ``schema_version`` is sufficient and + self-contained. + +Required ``schema_version`` + Requiring ``schema_version`` would make the simplest valid file + more verbose without significant benefit. The recommended-but- + optional approach balances simplicity with future-proofing. + +Separate ``load`` and ``execute`` keys in ``[entrypoints]`` + Splitting import-only and callable entry points into separate lists + was considered but rejected because it complicates execution + ordering. A single ``init`` list with both forms keeps ordering + explicit. + +Priority or weight field for processing order + Since packages are installed independently, there is no arbiter of + priority. Alphabetical ordering matches ``.pth`` + behavior. Priority could be addressed by a future site-wide + policy configuration file, not per-package metadata. + +Passing arguments to callables + Callables are invoked with no arguments for simplicity and parity + with existing ``.pth`` import behavior. Future PEPs may + define an optional context argument (e.g., the parsed TOML data or + a site info object). + + +Open Issues +=========== + +* Should a warning be emitted when both ``.pth`` and + ``.site.toml`` coexist? + +* Should future ``-X`` options provide fine-grained control over + error reporting, unknown table warnings, and entry point execution? + +* Should callables receive context (e.g., the path to the + ``.site.toml`` file, the parsed TOML data, or a site info object)? + +* What additional placeholder variables should be supported beyond + ``{sitedir}``? Candidates include ``{prefix}``, ``{exec_prefix}``, and + ``{userbase}``. + + +Copyright +========= + +This document is placed in the public domain or under the +CC0-1.0-Universal license, whichever is more permissive. From 508f4936a0dce1a5ef15de1c31450867386ce984 Mon Sep 17 00:00:00 2001 From: Barry Warsaw Date: Tue, 31 Mar 2026 19:12:27 -0700 Subject: [PATCH 04/12] Update and refine PEP 829 --- Lib/site.py | 2 +- pep-0829.rst | 30 +++++++++++++++++++----------- 2 files changed, 20 insertions(+), 12 deletions(-) diff --git a/Lib/site.py b/Lib/site.py index 326f04aecbc79b..ad46dc6943efd0 100644 --- a/Lib/site.py +++ b/Lib/site.py @@ -208,7 +208,7 @@ def _read_site_toml(sitedir, name): return None metadata = data.get("metadata", {}) - # Validate the TOML schema version. PEP XXX defines schema_version == 1. Both the [metadata] + # Validate the TOML schema version. PEP 829 defines schema_version == 1. Both the [metadata] # section and [metadata].schema_version are optional, but if missing, future compatibility # cannot be guaranteed. if (schema_version := metadata.get("schema_version")) is not None: diff --git a/pep-0829.rst b/pep-0829.rst index 5d14ae971c1a5d..d329f42a7f9171 100644 --- a/pep-0829.rst +++ b/pep-0829.rst @@ -33,7 +33,7 @@ support two functions: anchored at the site-packages directory. #. **Executing code** -- lines starting with ``import`` (or - ``import\\t``) are executed immediate by passing the source string + ``import\\t``) are executed immediately by passing the source string to ``exec()``. This design has several problems: @@ -107,7 +107,7 @@ File Naming and Discovery behavior. * If both ``.site.toml`` and ``.pth`` exist in the same - directory, only the ``.site.toml`` file is processed. + directory, only the ``.site.toml`` file is processed. Processing Model @@ -152,7 +152,7 @@ entry point imports and ``.pth`` import lines. A ``.site.toml`` file is defined to have three sections, all of which are optional: -.. _code-block: toml +.. code-block:: toml [metadata] schema_version = 1 @@ -181,9 +181,9 @@ Defined keys: The TOML file schema version number. Must be the integer ``1`` for this specification. If present, Python guarantees forward-compatible handling: future versions will either process the file according to the declared - schema or skip it with a clear diagnostic. It is an error if the - ``schema_version`` is present but has an unsupported value, the entire - file is skipped. If ``schema_version`` is omitted, the file is processed + schema or skip it with a clear diagnostic. If the + ``schema_version`` is present but has an unsupported value, the + entire file is skipped. If ``schema_version`` is omitted, the file is processed on a best-effort basis with no forward-compatibility guarantees. Recommended keys: @@ -279,7 +279,15 @@ Errors are handled differently depending on the phase: Phase 1: Reading and Parsing If a ``.site.toml`` file cannot be opened, decoded, or parsed as valid TOML, it is skipped and processing continues to the next file. - Errors are reported only when ``-v`` (verbose) is given. + Errors are reported only when ``-v`` (verbose) is given. Importantly, + a ``.site.toml`` file that fails to parse **still supersedes** + its corresponding ``.pth`` file. The existence of the + ``.site.toml`` file is sufficient to suppress + ``.pth`` processing, regardless of whether the TOML file + parses successfully. This prevents confusing dual-execution + scenarios and ensures that a broken ``.site.toml`` is + noticed rather than silently masked by fallback to the + ``.pth`` file. Phase 2: Execution If a path entry or entry point raises an exception during processing, the @@ -404,7 +412,7 @@ If your package currently ships a ``.pth`` file, you can migrate to a ``.site.toml`` file. The equivalent of a ``.pth`` file containing a directory name is: -.. _code-block: toml +.. code-block:: toml [paths] dirs = ["my_directory"] @@ -412,7 +420,7 @@ containing a directory name is: The equivalent of a ``.pth`` file containing ``import my_package`` is: -.. _code-block: toml +.. code-block:: toml [entrypoints] init = ["my_package"] @@ -420,7 +428,7 @@ is: If your ``.pth`` file calls a specific function, use the ``module:callable`` syntax: -.. _code-block: toml +.. code-block:: toml [entrypoints] init = ["my_package.startup:initialize"] @@ -429,7 +437,7 @@ If your ``.pth`` file includes arbitrary code, put that code in a start up function and use the ``module:callable`` syntax. Both ``.pth`` and ``.site.toml`` can coexist during -migration. If both exist for the same package, only the ``.site.toml`` is +migration. If both exist for the same package, only the ``.site.toml`` is processed. Thus, it is recommended that packages compatible with older Pythons ship both files. From 390de9f06461dea94e6da5ce5f79eb7db5d64f2c Mon Sep 17 00:00:00 2001 From: Barry Warsaw Date: Wed, 15 Apr 2026 11:30:48 -0700 Subject: [PATCH 05/12] Checkpointing reference implementation updates * Add _print_error() helper function for printing exceptions to sys.stderr. These are not hidden behind -v * `warnings` can be lazy imported. * Since the site.toml file is no longer proposed, rewrite how .pth and .start files are parsed and processed. * addpackage() -- which was never documented -- is deprecated; there are a few usages in the wild so we can't get rid of it, but it's generally unnecessary now, has different semantics. * Update tests --- Lib/site.py | 385 ++++++++++++++++++------------------ Lib/test/test_site.py | 440 ++++++++++++++---------------------------- 2 files changed, 335 insertions(+), 490 deletions(-) diff --git a/Lib/site.py b/Lib/site.py index ad46dc6943efd0..46a5ca5cdd5c12 100644 --- a/Lib/site.py +++ b/Lib/site.py @@ -79,9 +79,10 @@ import stat import errno -lazy import importlib -lazy import tomllib +lazy import locale +lazy import pkgutil lazy import traceback +lazy import warnings # Prefixes for site-packages; add additional prefixes like /usr/local here PREFIXES = [sys.prefix, sys.exec_prefix] @@ -101,9 +102,16 @@ def _trace(message): print(message, file=sys.stderr) -def _warn(*args, **kwargs): - import warnings +def _print_error(message, exc=None): + """Print an error message to stderr, optionally with a formatted traceback.""" + print(message, file=sys.stderr) + if exc is not None: + for record in traceback.format_exception(exc): + for line in record.splitlines(): + print(' ' + line, file=sys.stderr) + +def _warn(*args, **kwargs): warnings.warn(*args, **kwargs) @@ -167,206 +175,198 @@ def _init_pathinfo(): return d -class _SiteTOMLData: - """Parsed data from a single .site.toml file.""" - __slots__ = ('filename', 'sitedir', 'metadata', 'dirs', 'init') +# Accumulated entry points from .start files across all site-packages +# directories. Execution is deferred until all paths in .pth files have been +# appended to sys.path. Map the .pth/.start file the data is found in to the +# data. +_pending_entrypoints = {} +_pending_syspaths = {} +_pending_importexecs = {} - def __init__(self, filename, sitedir, metadata, dirs, init): - self.filename = filename # str: basename e.g. "foo.site.toml" - self.sitedir = sitedir # str: absolute path to site-packages dir - self.metadata = metadata # dict: raw [metadata] table (may be empty) - self.dirs = dirs # list[str]: validated [paths].dirs (may be empty) - self.init = init # list[str]: validated [entrypoints].init (may be empty) +def _read_pthstart_file(sitedir, name, suffix): + """Parse a .start or .pth file and return (lines, filename). -def _read_site_toml(sitedir, name): - """Parse a .site.toml file and return a _SiteTOMLData, or None on error.""" - fullname = os.path.join(sitedir, name) + Always returns a 2-tuple. On failure (hidden, unreadable, etc.), + returns ([], filename) so callers can proceed without checking. + """ + content = "" + filename = os.path.join(sitedir, name) + _trace(f"Reading startup configuration file: {filename}") - # Check that name.site.toml file exists and is not hidden. try: - st = os.lstat(fullname) - except OSError: - return None + st = os.lstat(filename) + except OSError as exc: + _print_error(f"Cannot stat {filename!r}", exc) + return [], filename + if ((getattr(st, 'st_flags', 0) & stat.UF_HIDDEN) or (getattr(st, 'st_file_attributes', 0) & stat.FILE_ATTRIBUTE_HIDDEN)): - _trace(f"Skipping hidden .site.toml file: {fullname!r}") - return None - - _trace(f"Processing .site.toml file: {fullname!r}") + _trace(f"Skipping hidden {suffix} file: {filename!r}") + return [], filename + _trace(f"Processing {suffix} file: {filename!r}") try: - with io.open_code(fullname) as f: - raw = f.read() - except OSError: - return None + with io.open_code(filename) as f: + raw_content = f.read() + except OSError as exc: + _print_error(f"Cannot read {filename!r}", exc) + return [], filename try: - data = tomllib.loads(raw.decode("utf-8")) - except Exception as exc: - _trace(f"Error parsing {fullname!r}: {exc}") - return None + # Accept BOM markers in .start and .pth files as we do in source files (Windows PowerShell + # 5.1 makes it hard to emit UTF-8 files without a BOM). + content = raw_content.decode("utf-8-sig") + except UnicodeDecodeError: + # Fallback to locale encoding for backward compatibility. We will deprecate this fallback + # in the future. + content = raw_content.decode(locale.getencoding()) + _trace(f"Cannot read {filename!r} as UTF-8. " + f"Using fallback encoding {locale.getencoding()!r}") - metadata = data.get("metadata", {}) - # Validate the TOML schema version. PEP 829 defines schema_version == 1. Both the [metadata] - # section and [metadata].schema_version are optional, but if missing, future compatibility - # cannot be guaranteed. - if (schema_version := metadata.get("schema_version")) is not None: - if schema_version != 1: - _trace(f"Unsupported [metadata].schema_version: {schema_version}") - return None + return content.splitlines(), filename - # Validate [paths].dirs - dirs = [] - if (paths_table := data.get("paths")) is not None: - if (raw_dirs := paths_table.get("dirs")) is not None: - if (isinstance(raw_dirs, list) and - all(isinstance(d, str) for d in raw_dirs)): - dirs = raw_dirs - else: - _trace(f"Invalid 'dirs' in {fullname!r}: " - f"expected list of strings") - - # Validate [entrypoints].init - init = [] - if (ep_table := data.get("entrypoints")) is not None: - if (raw_init := ep_table.get("init")) is not None: - if (isinstance(raw_init, list) and - all(isinstance(e, str) for e in raw_init)): - init = raw_init - else: - _trace(f"Invalid 'init' in {fullname!r}: " - f"expected list of strings") - return _SiteTOMLData(name, sitedir, metadata, dirs, init) +def _read_pth_file(sitedir, name, known_paths): + """Parse a .pth file, accumulating sys.path extensions and import lines. + + Errors on individual lines do not abort processing of the rest of the + file (PEP 829). + """ + lines, filename = _read_pthstart_file(sitedir, name, ".pth") + + for n, line in enumerate(lines, 1): + line = line.strip() + if len(line) == 0 or line.startswith("#"): + continue + + if line.startswith("import ") or line.startswith("import\t"): + _pending_importexecs.setdefault(filename, []).append(line) + continue + + try: + dir, dircase = makepath(sitedir, line) + except Exception as exc: + _print_error( + f"Error in {filename!r}, line {n:d}: {line!r}", exc) + continue + + if dircase in known_paths: + _trace(f"In {filename!r}, line {n:d}: " + f"skipping duplicate sys.path entry: {dir}") + else: + _pending_syspaths.setdefault(filename, []).append(dir) + known_paths.add(dircase) + + +def _read_start_file(sitedir, name): + """Parse a .start file and return a list of entry point strings.""" + lines, filename = _read_pthstart_file(sitedir, name, ".start") + + for n, line in enumerate(lines, 1): + line = line.strip() + if len(line) == 0 or line.startswith("#"): + continue + + # Validate mandatory colon-form: pkg.mod:callable. + if ':' not in line: + _trace(f"In {filename!r}, line {n:d}: " + f"skipping invalid entry point: {line}") + continue + + _pending_entrypoints.setdefault(filename, []).append(line) -def _process_site_toml_paths(toml_data_list, known_paths): - """Process [paths] from all parsed .site.toml data.""" - for td in toml_data_list: - for dir_entry in td.dirs: +def _extend_syspath(): + # We've already filtered out duplicates, either in the existing sys.path + # or in all the .pth files we've seen. We've also abspath/normpath'd all + # the entries, so all that's left to do is to ensure that the path exists. + for filename, dirs in _pending_syspaths.items(): + for dir in dirs: + if os.path.exists(dir): + _trace(f"Extending sys.path with {dir} from {filename}") + sys.path.append(dir) + else: + _print_error( + f"In {filename}: {dir} does not exist; " + f"skipping sys.path append") + + +def _exec_imports(): + # For all the `import` lines we've seen in .pth files, exec() them in + # order. However, if they come from a file with a matching .start, then + # we ignore these import lines. For the ones we do process, print a + # warning but only when -v was given. + for filename, imports in _pending_importexecs.items(): + name, dot, pth = filename.rpartition(".") + assert dot == "." and pth == "pth", f"Bad startup filename: {filename}" + + if f"{name}.start" in _pending_entrypoints: + # Skip import lines in favor of entry points. + continue + + _trace( + f"import lines in {filename} are deprecated, " + f"use entry points in a {name}.start file instead." + ) + + for line in imports: try: - # The {sitedir} placeholder expands to the site directory where the pkg.site.toml - # file was found. When placed at the beginning of the path, this is the explicit - # way to name directories relative to sitedir. - dir_entry = dir_entry.replace("{sitedir}", td.sitedir) - # For backward compatibility with .pth files, relative directories are implicitly - # anchored to sitedir. - if not os.path.isabs(dir_entry): - dir_entry = os.path.join(td.sitedir, dir_entry) - dir, dircase = makepath(dir_entry) - if dircase not in known_paths and os.path.exists(dir): - sys.path.append(dir) - known_paths.add(dircase) + _trace(f"Exec'ing from {filename}: {line}") + exec(line) except Exception as exc: - fullname = os.path.join(td.sitedir, td.filename) - print(f"Error processing path {dir_entry!r} " - f"from {fullname}:", - file=sys.stderr) - for record in traceback.format_exception(exc): - for line in record.splitlines(): - print(' ' + line, file=sys.stderr) - - -def _process_site_toml_entrypoints(toml_data_list): - """Execute [entrypoints] from all parsed .site.toml data.""" - for td in toml_data_list: - for entry in td.init: + _print_error( + f"Error in import line from {filename}: {line}", exc) + + +def _execute_start_entrypoints(): + """Execute all accumulated .start file entry points. + + Called after all site-packages directories have been processed so that + sys.path is fully populated before any entry point code runs. Uses + pkgutil.resolve_name() for resolution. + """ + for filename, entrypoints in _pending_entrypoints.items(): + for entrypoint in entrypoints: try: - # Parse "package.module:callable" format. When the optional :callable is not given, - # the entire string will end up in the last item, so swap things around. - modname, colon, funcname = entry.rpartition(':') - if colon != ':': - modname = funcname - funcname = None - - _trace(f"Executing entrypoint: {entry!r} " - f"from {td.filename!r}") - - mod = importlib.import_module(modname) - - # Call the callable if given. - if funcname is not None: - func = getattr(mod, funcname) - func() + _trace(f"Executing entry point: {entrypoint} from {filename}") + callable_ = pkgutil.resolve_name(entrypoint) + callable_() except Exception as exc: - fullname = os.path.join(td.sitedir, td.filename) - print(f"Error in entrypoint {entry!r} from {fullname}:", - file=sys.stderr) - for record in traceback.format_exception(exc): - for line in record.splitlines(): - print(' ' + line, file=sys.stderr) + _print_error( + f"Error in entry point {entrypoint} from {filename}", + exc) def addpackage(sitedir, name, known_paths): - """Process a .pth file within the site-packages directory: - For each line in the file, either combine it with sitedir to a path - and add that to known_paths, or execute it if it starts with 'import '. + """Process a .pth file within the site-packages directory. + + .. deprecated:: 3.15 + Use :func:`addsitedir` instead. """ + _warn( + "site.addpackage() is deprecated, use site.addsitedir() instead.", + DeprecationWarning, + stacklevel=2, + ) if known_paths is None: known_paths = _init_pathinfo() reset = True else: reset = False - fullname = os.path.join(sitedir, name) - try: - st = os.lstat(fullname) - except OSError: - return - if ((getattr(st, 'st_flags', 0) & stat.UF_HIDDEN) or - (getattr(st, 'st_file_attributes', 0) & stat.FILE_ATTRIBUTE_HIDDEN)): - _trace(f"Skipping hidden .pth file: {fullname!r}") - return - _trace(f"Processing .pth file: {fullname!r}") - try: - with io.open_code(fullname) as f: - pth_content = f.read() - except OSError: - return - - try: - # Accept BOM markers in .pth files as we do in source files - # (Windows PowerShell 5.1 makes it hard to emit UTF-8 files without a BOM) - pth_content = pth_content.decode("utf-8-sig") - except UnicodeDecodeError: - # Fallback to locale encoding for backward compatibility. - # We will deprecate this fallback in the future. - import locale - pth_content = pth_content.decode(locale.getencoding()) - _trace(f"Cannot read {fullname!r} as UTF-8. " - f"Using fallback encoding {locale.getencoding()!r}") - - for n, line in enumerate(pth_content.splitlines(), 1): - if line.startswith("#"): - continue - if line.strip() == "": - continue - try: - if line.startswith(("import ", "import\t")): - exec(line) - continue - line = line.rstrip() - dir, dircase = makepath(sitedir, line) - if dircase not in known_paths and os.path.exists(dir): - sys.path.append(dir) - known_paths.add(dircase) - except Exception as exc: - print(f"Error processing line {n:d} of {fullname}:\n", - file=sys.stderr) - import traceback - for record in traceback.format_exception(exc): - for line in record.splitlines(): - print(' '+line, file=sys.stderr) - print("\nRemainder of file ignored", file=sys.stderr) - break + _read_pth_file(sitedir, name, known_paths) + _extend_syspath() + _exec_imports() + _pending_syspaths.clear() + _pending_importexecs.clear() if reset: known_paths = None return known_paths def addsitedir(sitedir, known_paths=None): - """Add 'sitedir' argument to sys.path if missing and handle .site.toml - and .pth files in 'sitedir'""" + """Add 'sitedir' argument to sys.path if missing and handle startup + files.""" _trace(f"Adding directory: {sitedir!r}") if known_paths is None: known_paths = _init_pathinfo() @@ -382,41 +382,33 @@ def addsitedir(sitedir, known_paths=None): except OSError: return - # Phase 1: Discover and parse .site.toml files, sorted alphabetically. - toml_names = sorted( + # Phase 1: Discover .start files and accumulate their entry points. + start_names = sorted( name for name in names - if name.endswith(".site.toml") and not name.startswith(".") + if name.endswith(".start") and not name.startswith(".") ) + for name in start_names: + _read_start_file(sitedir, name) - toml_data_list = [] - superseded_pth = set() - - for name in toml_names: - # "foo.site.toml" supersedes "foo.pth" - base = name.removesuffix(".site.toml") - pth_name = base + ".pth" - if pth_name in names: - superseded_pth.add(pth_name) - td = _read_site_toml(sitedir, name) - if td is not None: - toml_data_list.append(td) - - # Phase 2: Process all .site.toml data (paths first, then entrypoints) - if toml_data_list: - _process_site_toml_paths(toml_data_list, known_paths) - _process_site_toml_entrypoints(toml_data_list) - - # Phase 3: Process remaining .pth files + # Phase 2: Read .pth files, accumulating paths and import lines. pth_names = sorted( name for name in names if name.endswith(".pth") and not name.startswith(".") - and name not in superseded_pth ) for name in pth_names: - addpackage(sitedir, name, known_paths) + _read_pth_file(sitedir, name, known_paths) + # If standalone call (not from main()), flush immediately + # so the caller sees the effect. if reset: + _extend_syspath() + _exec_imports() + _execute_start_entrypoints() + _pending_syspaths.clear() + _pending_importexecs.clear() + _pending_entrypoints.clear() known_paths = None + return known_paths @@ -872,6 +864,13 @@ def main(): ENABLE_USER_SITE = check_enableusersite() known_paths = addusersitepackages(known_paths) known_paths = addsitepackages(known_paths) + # PEP 829: flush accumulated data from all .pth and .start files. + # Paths are extended first, then deprecated import lines are exec'd, + # and finally .start entry points are executed — ensuring sys.path is + # fully populated before any startup code runs. + _extend_syspath() + _exec_imports() + _execute_start_entrypoints() setquit() setcopyright() sethelper() diff --git a/Lib/test/test_site.py b/Lib/test/test_site.py index 8c7c095113830f..87c3957b66b2ba 100644 --- a/Lib/test/test_site.py +++ b/Lib/test/test_site.py @@ -68,6 +68,16 @@ class HelperFunctionsTests(unittest.TestCase): """Tests for helper functions. """ + @classmethod + def setUpClass(cls): + cls._addpackage_token = support.ignore_deprecations_from( + "site", like=r".*addpackage.*" + ) + + @classmethod + def tearDownClass(cls): + support.clear_ignored_deprecations(cls._addpackage_token) + def setUp(self): """Save a copy of sys.path""" self.sys_path = sys.path[:] @@ -147,12 +157,6 @@ def test_addpackage_import_bad_syntax(self): pth_dir, pth_fn = self.make_pth("import bad-syntax\n") with captured_stderr() as err_out: site.addpackage(pth_dir, pth_fn, set()) - self.assertRegex(err_out.getvalue(), "line 1") - self.assertRegex(err_out.getvalue(), - re.escape(os.path.join(pth_dir, pth_fn))) - # XXX: the previous two should be independent checks so that the - # order doesn't matter. The next three could be a single check - # but my regex foo isn't good enough to write it. self.assertRegex(err_out.getvalue(), 'Traceback') self.assertRegex(err_out.getvalue(), r'import bad-syntax') self.assertRegex(err_out.getvalue(), 'SyntaxError') @@ -162,10 +166,6 @@ def test_addpackage_import_bad_exec(self): pth_dir, pth_fn = self.make_pth("randompath\nimport nosuchmodule\n") with captured_stderr() as err_out: site.addpackage(pth_dir, pth_fn, set()) - self.assertRegex(err_out.getvalue(), "line 2") - self.assertRegex(err_out.getvalue(), - re.escape(os.path.join(pth_dir, pth_fn))) - # XXX: ditto previous XXX comment. self.assertRegex(err_out.getvalue(), 'Traceback') self.assertRegex(err_out.getvalue(), 'ModuleNotFoundError') @@ -188,7 +188,7 @@ def test_addpackage_import_bad_pth_file(self): def test_addsitedir(self): # Same tests for test_addpackage since addsitedir() essentially just - # calls addpackage() for every .pth file in the directory + # calls _read_pth_file() for every .pth file in the directory pth_file = PthFile() pth_file.cleanup(prep=True) # Make sure that nothing is pre-existing # that is tested for @@ -400,6 +400,20 @@ def test_trace(self): self.assertEqual(sys.stderr.getvalue(), out) +class AddpackageDeprecationTests(unittest.TestCase): + """Test that site.addpackage() is deprecated (PEP 829).""" + + def test_addpackage_emits_deprecation_warning(self): + with os_helper.temp_dir() as tmpdir: + pth_fn = os.path.join(tmpdir, 'test.pth') + with open(pth_fn, 'w', encoding='utf-8') as f: + f.write('\n') + with self.assertWarns(DeprecationWarning) as cm: + site.addpackage(tmpdir, 'test.pth', set()) + self.assertIn('addpackage', str(cm.warning)) + self.assertIn('addsitedir', str(cm.warning)) + + class PthFile(object): """Helper class for handling testing of .pth files""" @@ -908,227 +922,86 @@ def test_both_args(self): self.assertEqual(output, excepted_output) -class SiteTomlTests(unittest.TestCase): - """Tests for .site.toml file processing.""" +class StartFileTests(unittest.TestCase): + """Tests for .start file processing (PEP 829).""" def setUp(self): self.sys_path = sys.path[:] - self.tmpdir = self.sitedir = tempfile.mkdtemp() - self.addCleanup(shutil.rmtree, self.tmpdir) + self.tmpdir = self.sitedir = self.enterContext(os_helper.temp_dir()) + self.saved_pending = site._pending_entrypoints.copy() + site._pending_entrypoints.clear() def tearDown(self): sys.path[:] = self.sys_path + site._pending_entrypoints.clear() + site._pending_entrypoints.update(self.saved_pending) - def _make_site_toml(self, content, name='testpkg'): - """Write a .site.toml and return its name.""" - basename = name + '.site.toml' + def _make_start(self, content, name='testpkg'): + """Write a .start file and return its basename.""" + basename = name + '.start' filepath = os.path.join(self.tmpdir, basename) with open(filepath, 'w', encoding='utf-8') as f: f.write(content) return basename def _make_pth(self, content, name='testpkg'): - """Write a .pth file and return its name.""" + """Write a .pth file and return its basename.""" basename = name + '.pth' filepath = os.path.join(self.tmpdir, basename) with open(filepath, 'w', encoding='utf-8') as f: f.write(content) return basename - # --- _read_site_toml tests --- + # --- _read_start_file tests --- - def test_read_site_toml_basic(self): - # Valid .site.toml with all sections. - subdir = os.path.join(self.tmpdir, 'subdir') - os.mkdir(subdir) - name = self._make_site_toml("""\ -[metadata] -schema_version = 1 -package = "testpkg" + def test_read_start_file_basic(self): + name = self._make_start("os.path:join\n") + entries = site._read_start_file(self.sitedir, name) + self.assertEqual(entries, ['os.path:join']) -[paths] -dirs = ["subdir"] + def test_read_start_file_multiple_entries(self): + name = self._make_start("os.path:join\nos.path:exists\n") + entries = site._read_start_file(self.sitedir, name) + self.assertEqual(entries, ['os.path:join', 'os.path:exists']) -[entrypoints] -init = ["os"] -""") - tomldata = site._read_site_toml(self.sitedir, name) - self.assertIsNotNone(tomldata) - self.assertEqual(tomldata.filename, name) - self.assertEqual(tomldata.sitedir, self.sitedir) - self.assertEqual(tomldata.metadata, { - 'schema_version': 1, 'package': 'testpkg'}) - self.assertEqual(tomldata.dirs, ['subdir']) - self.assertEqual(tomldata.init, ['os']) - - def test_missing_schema_version_is_okay(self): - # It's okay for the schema_version to be missing, or even the [metadata] section entirely - # (which is tested below). A missing schema_version just means that no future compatibility - # can be guaranteed. - name = self._make_site_toml("""\ -[metadata] -""") - tomldata = site._read_site_toml(self.sitedir, name) - self.assertIsNotNone(tomldata) - self.assertEqual(tomldata.metadata, {}) - - def test_unexpected_schema_version_is_not_okay(self): - # If [metadata].schema_version exists, but isn't a supported number, then the entire TOML - # file is invalid and ignored. - name = self._make_site_toml("""\ -[metadata] -schema_version = 801 -""") - tomldata = site._read_site_toml(self.sitedir, name) - self.assertIsNone(tomldata) - - def test_read_site_toml_parse_error(self): - # Invalid pkg.site.toml content is skipped. - name = self._make_site_toml("not valid [[[toml") - tomldata = site._read_site_toml(self.sitedir, name) - self.assertIsNone(tomldata) - - def test_read_site_toml_invalid_dirs_type(self): - # dirs must be a list of strings. - name = self._make_site_toml("""\ -[paths] -dirs = "not_a_list" -""") - tomldata = site._read_site_toml(self.sitedir, name) - self.assertEqual(tomldata.dirs, []) + def test_read_start_file_comments_and_blanks(self): + name = self._make_start("# a comment\n\nos.path:join\n \n") + entries = site._read_start_file(self.sitedir, name) + self.assertEqual(entries, ['os.path:join']) - def test_read_site_toml_invalid_init_type(self): - # init must be a list of strings - name = self._make_site_toml("""\ -[paths] -dirs = ["subdir"] + def test_read_start_file_missing_colon_skipped(self): + # Entry points without the mandatory colon are skipped. + name = self._make_start("os.path\nos.path:join\n") + entries = site._read_start_file(self.sitedir, name) + self.assertEqual(entries, ['os.path:join']) -[entrypoints] -init = 42 -""") - subdir = os.path.join(self.tmpdir, 'subdir') - os.mkdir(subdir) - tomldata = site._read_site_toml(self.sitedir, name) - self.assertIsNotNone(tomldata) - self.assertEqual(tomldata.dirs, ['subdir']) - self.assertEqual(tomldata.init, []) - - def test_read_site_toml_empty_file(self): - # Empty .site.toml is a no-op. - name = self._make_site_toml("") - tomldata = site._read_site_toml(self.sitedir, name) - self.assertEqual(tomldata.metadata, {}) - self.assertEqual(tomldata.dirs, []) - self.assertEqual(tomldata.init, []) - - def test_read_site_toml_unknown_tables_ignored(self): - # Unknown tables should not cause errors. - name = self._make_site_toml("""\ -[metadata] -schema_version = 1 - -[unknown_section] -key = "value" - -[entrypoints] -init = ["os"] -""") - tomldata = site._read_site_toml(self.sitedir, name) - self.assertIsNotNone(tomldata) - self.assertEqual(tomldata.metadata, {'schema_version': 1}) - self.assertEqual(tomldata.init, ['os']) + def test_read_start_file_empty(self): + name = self._make_start("") + entries = site._read_start_file(self.sitedir, name) + self.assertEqual(entries, []) - def test_read_site_toml_nonexistent(self): - # Nonexistent file returns None. - tomldata = site._read_site_toml(self.tmpdir, 'nonexistent.site.toml') - self.assertIsNone(tomldata) + def test_read_start_file_comments_only(self): + name = self._make_start("# just a comment\n# another\n") + entries = site._read_start_file(self.sitedir, name) + self.assertEqual(entries, []) - # --- Path processing tests --- + def test_read_start_file_nonexistent(self): + entries = site._read_start_file(self.tmpdir, 'nonexistent.start') + self.assertEqual(entries, []) - def test_process_paths_relative(self): - # Relative paths are joined with sitedir. - subdir = os.path.join(self.sitedir, 'mylib') - os.mkdir(subdir) - name = self._make_site_toml("""\ -[paths] -dirs = ["mylib"] -""") - known_paths = set() - tomldata = site._read_site_toml(self.sitedir, name) - site._process_site_toml_paths([tomldata], known_paths) - self.assertIn(subdir, sys.path) + @unittest.skipUnless(hasattr(os, 'chflags'), 'test needs os.chflags()') + def test_read_start_file_hidden_flags(self): + name = self._make_start("os.path:join\n") + filepath = os.path.join(self.tmpdir, name) + st = os.stat(filepath) + os.chflags(filepath, st.st_flags | stat.UF_HIDDEN) + entries = site._read_start_file(self.sitedir, name) + self.assertEqual(entries, []) - def test_process_paths_absolute(self): - # Absolute paths are preserved as-is. - absdir = os.path.join(self.sitedir, 'abslib') - os.mkdir(absdir) - name = self._make_site_toml( - f'[paths]\ndirs = ["{absdir}"]\n') - known_paths = set() - tomldata = site._read_site_toml(self.sitedir, name) - site._process_site_toml_paths([tomldata], known_paths) - self.assertIn(absdir, sys.path) - - def test_process_paths_sitedir_placeholder(self): - # The {sitedir} placeholder expands to the site-packages dir. - subdir = os.path.join(self.sitedir, 'extra') - os.mkdir(subdir) - name = self._make_site_toml("""\ -[paths] -dirs = ["{sitedir}/extra"] -""") - known_paths = set() - tomldata = site._read_site_toml(self.sitedir, name) - site._process_site_toml_paths([tomldata], known_paths) - self.assertIn(os.path.join(self.tmpdir, 'extra'), sys.path) - - def test_process_paths_deduplication(self): - # Same path from two different files are only added once. - subdir = os.path.join(self.tmpdir, 'shared') - os.mkdir(subdir) - tomldata1 = site._SiteTOMLData( - 'a.site.toml', self.tmpdir, [], ['shared'], []) - tomldata2 = site._SiteTOMLData( - 'b.site.toml', self.tmpdir, [], ['shared'], []) - known_paths = set() - site._process_site_toml_paths([tomldata1, tomldata2], known_paths) - self.assertEqual(sys.path.count(subdir), 1) - - def test_process_paths_nonexistent(self): - # Nonexistent directories are not added. - tomldata = site._SiteTOMLData( - 'test.site.toml', self.tmpdir, [], ['nosuchdir'], []) - known_paths = set() - sys_path = sys.path[:] - site._process_site_toml_paths([tomldata], known_paths) - self.assertEqual(sys.path, sys_path) - - # --- Entrypoint tests --- - - def test_process_entrypoints_import_only(self): - # Import-only entrypoint (no callable). - mod_dir = os.path.join(self.sitedir, 'epmod') - os.mkdir(mod_dir) - init_file = os.path.join(mod_dir, '__init__.py') - with open(init_file, 'w') as f: - f.write("""\ -called = False -def startup(): - global called - called = True -""") - sys.path.insert(0, self.sitedir) - self.addCleanup(sys.modules.pop, 'epmod', None) - tomldata = site._SiteTOMLData( - 'test.site.toml', self.sitedir, [], [self.sitedir], ['epmod']) - site._process_site_toml_entrypoints([tomldata]) - import epmod - self.assertFalse(epmod.called) + # --- _execute_start_entrypoints tests --- - def test_process_entrypoints_with_callable(self): + def test_execute_entrypoints_with_callable(self): # Entrypoint with callable is invoked. - # - # Create a module with a function that sets a flag. mod_dir = os.path.join(self.sitedir, 'epmod') os.mkdir(mod_dir) init_file = os.path.join(mod_dir, '__init__.py') @@ -1141,129 +1014,102 @@ def startup(): """) sys.path.insert(0, self.sitedir) self.addCleanup(sys.modules.pop, 'epmod', None) - tomldata = site._SiteTOMLData( - 'test.site.toml', self.sitedir, [], [self.sitedir], ['epmod:startup']) - site._process_site_toml_entrypoints([tomldata]) + fullname = os.path.join(self.sitedir, 'epmod.start') + site._pending_entrypoints.append((fullname, 'epmod:startup')) + site._execute_start_entrypoints() import epmod self.assertTrue(epmod.called) - def test_process_entrypoints_import_error(self): + def test_execute_entrypoints_import_error(self): # Import error prints traceback but continues. - tomldata = site._SiteTOMLData( - 'test.site.toml', self.sitedir, [], self.sitedir, - ['nosuchmodule_xyz', 'os']) + fullname = os.path.join(self.sitedir, 'bad.start') + site._pending_entrypoints.append( + (fullname, 'nosuchmodule_xyz:func')) + site._pending_entrypoints.append( + (fullname, 'os.path:join')) with captured_stderr() as err: - site._process_site_toml_entrypoints([tomldata]) + site._execute_start_entrypoints() self.assertIn('nosuchmodule_xyz', err.getvalue()) self.assertIn('Traceback', err.getvalue()) - # 'os' should still have been processed (no exception for it) + # os.path:join should still have been called (no exception for it) - def test_process_entrypoints_callable_error(self): + def test_execute_entrypoints_callable_error(self): # Callable that raises prints traceback but continues. mod_dir = os.path.join(self.sitedir, 'badmod') os.mkdir(mod_dir) init_file = os.path.join(mod_dir, '__init__.py') with open(init_file, 'w') as f: - f.write(""" + f.write("""\ def fail(): raise RuntimeError("boom") """) sys.path.insert(0, self.sitedir) - self.addCleanup(sys.modules.pop, 'badmod') - tomldata = site._SiteTOMLData( - 'test.site.toml', self.tmpdir, None, None, - ['badmod:fail', 'os']) + self.addCleanup(sys.modules.pop, 'badmod', None) + fullname = os.path.join(self.sitedir, 'badmod.start') + site._pending_entrypoints.append((fullname, 'badmod:fail')) with captured_stderr() as err: - site._process_site_toml_entrypoints([tomldata]) + site._execute_start_entrypoints() self.assertIn('RuntimeError', err.getvalue()) self.assertIn('boom', err.getvalue()) # --- addsitedir integration tests --- - def test_addsitedir_toml_supersedes_pth(self): - # When both foo.site.toml and foo.pth exist, only .toml is used. - # - # Start by creating two directories which will be the paths that both the foo.site.toml and - # foo.site.pth files will try to add respectively. - toml_dir = os.path.join(self.sitedir, 'tomlpath') - pth_dir = os.path.join(self.sitedir, 'pthpath') - os.mkdir(toml_dir) - os.mkdir(pth_dir) - - self._make_site_toml("""\ -[paths] -dirs = ["tomlpath"] -""", name='foo') - self._make_pth("pthpath\n", name='foo') - + def test_addsitedir_discovers_start_files(self): + # addsitedir() should discover .start files and accumulate entries. + self._make_start("os.path:join\n", name='foo') site.addsitedir(self.sitedir, set()) - self.assertIn(toml_dir, sys.path) - self.assertNotIn(pth_dir, sys.path) - - def test_addsitedir_toml_and_pth_coexist(self): - # Different basenames: both .toml and .pth are processed. - toml_dir = os.path.join(self.sitedir, 'tomlpath') - pth_dir = os.path.join(self.sitedir, 'pthpath') - os.mkdir(toml_dir) - os.mkdir(pth_dir) - - self._make_site_toml("""\ -[paths] -dirs = ["tomlpath"] -""", name='foo') - self._make_pth("pthpath\n", name='bar') - + fullname = os.path.join(self.sitedir, 'foo.start') + self.assertIn((fullname, 'os.path:join'), + site._pending_entrypoints) + + def test_addsitedir_start_suppresses_pth_imports(self): + # When foo.start exists, import lines in foo.pth are silently skipped. + self._make_start("os.path:join\n", name='foo') + self._make_pth("import sys\n", name='foo') + # No DeprecationWarning should be emitted + import warnings + with warnings.catch_warnings(): + warnings.simplefilter("error", DeprecationWarning) + site.addsitedir(self.sitedir, set()) + + def test_addsitedir_pth_import_warns_without_start(self): + # Without a matching .start file, import lines emit DeprecationWarning. + self._make_pth("import sys\n", name='foo') + with self.assertWarns(DeprecationWarning): + site.addsitedir(self.sitedir, set()) + + def test_addsitedir_pth_paths_still_work_with_start(self): + # Path lines in .pth files still work even when a .start file exists. + subdir = os.path.join(self.sitedir, 'mylib') + os.mkdir(subdir) + self._make_start("os.path:join\n", name='foo') + self._make_pth("mylib\n", name='foo') site.addsitedir(self.sitedir, set()) - self.assertIn(toml_dir, sys.path) - self.assertIn(pth_dir, sys.path) - - def test_addsitedir_paths_before_entrypoints(self): - # Paths from .site.toml are added before entrypoints execution. - # - # Create a module in a subdir that will only be importable if the path - # is added first. - mod_dir = os.path.join(self.sitedir, 'initlib') - os.mkdir(mod_dir) - mod_file = os.path.join(mod_dir, 'initmod.py') - with open(mod_file, 'w') as f: - f.write('loaded = True\n') - - self._make_site_toml("""\ -[paths] -dirs = ["initlib"] - -[entrypoints] -init = ["initmod"] -""") + self.assertIn(subdir, sys.path) - self.addCleanup(sys.modules.pop, 'initmod') + def test_addsitedir_start_and_pth_different_names(self): + # Different basenames: .start doesn't suppress .pth import warnings. + self._make_start("os.path:join\n", name='foo') + self._make_pth("import sys\n", name='bar') + with self.assertWarns(DeprecationWarning): + site.addsitedir(self.sitedir, set()) + + def test_addsitedir_start_alphabetical_order(self): + # Multiple .start files are discovered alphabetically. + self._make_start("os.path:join\n", name='zzz') + self._make_start("os.path:exists\n", name='aaa') site.addsitedir(self.sitedir, set()) - import initmod - self.assertTrue(initmod.loaded) - - def test_addsitedir_alphabetical_order(self): - # Multiple .site.toml files are processed alphabetically. - dir_a = os.path.join(self.tmpdir, 'aaa') - dir_b = os.path.join(self.tmpdir, 'bbb') - os.mkdir(dir_a) - os.mkdir(dir_b) - - # Create zzz.site.toml first, then aaa.site.toml - self._make_site_toml("""\ -[paths] -dirs = ['bbb'] -""", name='zzz') - self._make_site_toml("""\ -[paths] -dirs = ['aaa'] -""", name='aaa') - + # aaa.start is processed before zzz.start + entries = [entry for _, entry in site._pending_entrypoints] + idx_a = entries.index('os.path:exists') + idx_z = entries.index('os.path:join') + self.assertLess(idx_a, idx_z) + + def test_addsitedir_dotfile_start_ignored(self): + # .start files starting with '.' are skipped. + self._make_start("os.path:join\n", name='.hidden') site.addsitedir(self.sitedir, set()) - # Both should be in sys.path; aaa before bbb since aaa.site.toml is - # processed before zzz.site.toml - idx_a = sys.path.index(dir_a) - idx_b = sys.path.index(dir_b) - self.assertLess(idx_a, idx_b) + self.assertEqual(site._pending_entrypoints, []) if __name__ == "__main__": From 845471632674d82c80d4750ded0554bac6527094 Mon Sep 17 00:00:00 2001 From: Barry Warsaw Date: Wed, 15 Apr 2026 11:39:16 -0700 Subject: [PATCH 06/12] Fix phase ordering --- Lib/site.py | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/Lib/site.py b/Lib/site.py index 46a5ca5cdd5c12..d84b557be98da6 100644 --- a/Lib/site.py +++ b/Lib/site.py @@ -382,15 +382,8 @@ def addsitedir(sitedir, known_paths=None): except OSError: return - # Phase 1: Discover .start files and accumulate their entry points. - start_names = sorted( - name for name in names - if name.endswith(".start") and not name.startswith(".") - ) - for name in start_names: - _read_start_file(sitedir, name) - - # Phase 2: Read .pth files, accumulating paths and import lines. + # The following phases are defined by PEP 829. + # Phases 1-3: Read .pth files, accumulating paths and import lines. pth_names = sorted( name for name in names if name.endswith(".pth") and not name.startswith(".") @@ -398,6 +391,16 @@ def addsitedir(sitedir, known_paths=None): for name in pth_names: _read_pth_file(sitedir, name, known_paths) + # Phases 6-7: Discover .start files and accumulate their entry points. + # Import lines from .pth files with a matching .start file are discarded + # at flush time by _exec_imports(). + start_names = sorted( + name for name in names + if name.endswith(".start") and not name.startswith(".") + ) + for name in start_names: + _read_start_file(sitedir, name) + # If standalone call (not from main()), flush immediately # so the caller sees the effect. if reset: From 3e99f25cdc8e235ab8638eee94625189620c19c9 Mon Sep 17 00:00:00 2001 From: Barry Warsaw Date: Wed, 15 Apr 2026 16:39:16 -0700 Subject: [PATCH 07/12] Fix docs and tests for PEP 829 --- Doc/library/site.rst | 68 +++++++++- Lib/test/test_site.py | 280 +++++++++++++++++++++++++++++++++--------- 2 files changed, 286 insertions(+), 62 deletions(-) diff --git a/Doc/library/site.rst b/Doc/library/site.rst index 04895ae4ec524b..c6ee449d8cfe45 100644 --- a/Doc/library/site.rst +++ b/Doc/library/site.rst @@ -96,6 +96,18 @@ with ``import`` (followed by space or tab) are executed. The :file:`.pth` files are now decoded by UTF-8 at first and then by the :term:`locale encoding` if it fails. +.. versionchanged:: 3.15 + Lines starting with ``import`` are deprecated. During the deprecation + period, such lines are still executed, but a diagnostic message is + emitted when the :option:`-v` flag is given. If a :file:`{name}.start` + file with the same base name exists, ``import`` lines are silently + ignored. See :ref:`site-start-files` and :pep:`829`. + +.. versionchanged:: 3.15 + Errors on individual lines no longer abort processing of the rest of + the file. Each error is reported and the remaining lines continue to + be processed. + .. index:: single: package triple: path; configuration; file @@ -131,6 +143,47 @@ directory precedes the :file:`foo` directory because :file:`bar.pth` comes alphabetically before :file:`foo.pth`; and :file:`spam` is omitted because it is not mentioned in either path configuration file. +.. _site-start-files: + +Startup entry points (:file:`.start` files) +-------------------------------------------- + +.. versionadded:: 3.15 + +A startup entry point file is a file whose name has the form +:file:`{name}.start` and exists in one of the site-packages directories +described above. Each file specifies entry points to be called during +interpreter startup, using the ``pkg.mod:callable`` syntax understood by +:func:`pkgutil.resolve_name`. + +Each non-blank line that does not begin with ``#`` must contain an entry +point reference in the form ``pkg.mod:callable``. The colon and callable +portion are mandatory. Each callable is invoked with no arguments, and +any return value is discarded. + +:file:`.start` files are processed after all :file:`.pth` path extensions +have been applied to :data:`sys.path`, ensuring that paths are available +before any startup code runs. Within each site-packages directory, files +are sorted alphabetically by filename. + +Unlike :data:`sys.path` extensions from :file:`.pth` files, duplicate entry +points are **not** deduplicated --- if an entry point appears more than once, +it will be called more than once. + +If an exception occurs during resolution or invocation of an entry point, +a traceback is printed to :data:`sys.stderr` and processing continues with +the remaining entry points. + +See :pep:`829` for the full specification. + +.. note:: + + If a :file:`{name}.start` file exists alongside a :file:`{name}.pth` + file with the same base name, any ``import`` lines in the :file:`.pth` + file are ignored in favour of the entry points in the :file:`.start` + file. + + :mod:`!sitecustomize` --------------------- @@ -238,8 +291,19 @@ Module contents .. function:: addsitedir(sitedir, known_paths=None) - Add a directory to sys.path and process its :file:`.pth` files. Typically - used in :mod:`sitecustomize` or :mod:`usercustomize` (see above). + Add a directory to sys.path and process its :file:`.pth` and + :file:`.start` files. Typically used in :mod:`sitecustomize` or + :mod:`usercustomize` (see above). + + The *known_paths* argument is an optional set of case-normalized paths + used to prevent duplicate :data:`sys.path` entries. When ``None`` (the + default), the set is built from the current :data:`sys.path`. + + .. versionchanged:: 3.15 + Also processes :file:`.start` files. See :ref:`site-start-files`. + All :file:`.pth` and :file:`.start` files are now read and + accumulated before any path extensions, ``import`` line execution, + or entry point invocations take place. .. function:: getsitepackages() diff --git a/Lib/test/test_site.py b/Lib/test/test_site.py index 87c3957b66b2ba..d45eb067eaed78 100644 --- a/Lib/test/test_site.py +++ b/Lib/test/test_site.py @@ -926,15 +926,23 @@ class StartFileTests(unittest.TestCase): """Tests for .start file processing (PEP 829).""" def setUp(self): - self.sys_path = sys.path[:] + self.enterContext(import_helper.DirsOnSysPath()) self.tmpdir = self.sitedir = self.enterContext(os_helper.temp_dir()) - self.saved_pending = site._pending_entrypoints.copy() + # Save and clear all pending dicts. + self.saved_entrypoints = site._pending_entrypoints.copy() + self.saved_syspaths = site._pending_syspaths.copy() + self.saved_importexecs = site._pending_importexecs.copy() site._pending_entrypoints.clear() + site._pending_syspaths.clear() + site._pending_importexecs.clear() def tearDown(self): - sys.path[:] = self.sys_path site._pending_entrypoints.clear() - site._pending_entrypoints.update(self.saved_pending) + site._pending_entrypoints.update(self.saved_entrypoints) + site._pending_syspaths.clear() + site._pending_syspaths.update(self.saved_syspaths) + site._pending_importexecs.clear() + site._pending_importexecs.update(self.saved_importexecs) def _make_start(self, content, name='testpkg'): """Write a .start file and return its basename.""" @@ -952,51 +960,118 @@ def _make_pth(self, content, name='testpkg'): f.write(content) return basename + def _all_entrypoints(self): + """Flatten _pending_entrypoints dict into a list of (filename, entry) tuples.""" + result = [] + for filename, entries in site._pending_entrypoints.items(): + for entry in entries: + result.append((filename, entry)) + return result + # --- _read_start_file tests --- def test_read_start_file_basic(self): - name = self._make_start("os.path:join\n") - entries = site._read_start_file(self.sitedir, name) - self.assertEqual(entries, ['os.path:join']) + self._make_start("os.path:join\n", name='foo') + site._read_start_file(self.sitedir, 'foo.start') + fullname = os.path.join(self.sitedir, 'foo.start') + self.assertEqual(site._pending_entrypoints[fullname], ['os.path:join']) def test_read_start_file_multiple_entries(self): - name = self._make_start("os.path:join\nos.path:exists\n") - entries = site._read_start_file(self.sitedir, name) - self.assertEqual(entries, ['os.path:join', 'os.path:exists']) + self._make_start("os.path:join\nos.path:exists\n", name='foo') + site._read_start_file(self.sitedir, 'foo.start') + fullname = os.path.join(self.sitedir, 'foo.start') + self.assertEqual(site._pending_entrypoints[fullname], + ['os.path:join', 'os.path:exists']) def test_read_start_file_comments_and_blanks(self): - name = self._make_start("# a comment\n\nos.path:join\n \n") - entries = site._read_start_file(self.sitedir, name) - self.assertEqual(entries, ['os.path:join']) + self._make_start("# a comment\n\nos.path:join\n \n", name='foo') + site._read_start_file(self.sitedir, 'foo.start') + fullname = os.path.join(self.sitedir, 'foo.start') + self.assertEqual(site._pending_entrypoints[fullname], ['os.path:join']) def test_read_start_file_missing_colon_skipped(self): # Entry points without the mandatory colon are skipped. - name = self._make_start("os.path\nos.path:join\n") - entries = site._read_start_file(self.sitedir, name) - self.assertEqual(entries, ['os.path:join']) + self._make_start("os.path\nos.path:join\n", name='foo') + site._read_start_file(self.sitedir, 'foo.start') + fullname = os.path.join(self.sitedir, 'foo.start') + self.assertEqual(site._pending_entrypoints[fullname], ['os.path:join']) def test_read_start_file_empty(self): - name = self._make_start("") - entries = site._read_start_file(self.sitedir, name) - self.assertEqual(entries, []) + self._make_start("", name='foo') + site._read_start_file(self.sitedir, 'foo.start') + self.assertEqual(site._pending_entrypoints, {}) def test_read_start_file_comments_only(self): - name = self._make_start("# just a comment\n# another\n") - entries = site._read_start_file(self.sitedir, name) - self.assertEqual(entries, []) + self._make_start("# just a comment\n# another\n", name='foo') + site._read_start_file(self.sitedir, 'foo.start') + self.assertEqual(site._pending_entrypoints, {}) def test_read_start_file_nonexistent(self): - entries = site._read_start_file(self.tmpdir, 'nonexistent.start') - self.assertEqual(entries, []) + site._read_start_file(self.tmpdir, 'nonexistent.start') + self.assertEqual(site._pending_entrypoints, {}) @unittest.skipUnless(hasattr(os, 'chflags'), 'test needs os.chflags()') def test_read_start_file_hidden_flags(self): - name = self._make_start("os.path:join\n") - filepath = os.path.join(self.tmpdir, name) + self._make_start("os.path:join\n", name='foo') + filepath = os.path.join(self.tmpdir, 'foo.start') st = os.stat(filepath) os.chflags(filepath, st.st_flags | stat.UF_HIDDEN) - entries = site._read_start_file(self.sitedir, name) - self.assertEqual(entries, []) + site._read_start_file(self.sitedir, 'foo.start') + self.assertEqual(site._pending_entrypoints, {}) + + def test_read_start_file_duplicates_not_deduplicated(self): + # PEP 829: duplicate entry points are NOT deduplicated. + self._make_start("os.path:join\nos.path:join\n", name='foo') + site._read_start_file(self.sitedir, 'foo.start') + fullname = os.path.join(self.sitedir, 'foo.start') + self.assertEqual(site._pending_entrypoints[fullname], + ['os.path:join', 'os.path:join']) + + # --- _read_pth_file tests --- + + def test_read_pth_file_paths(self): + subdir = os.path.join(self.sitedir, 'mylib') + os.mkdir(subdir) + self._make_pth("mylib\n", name='foo') + site._read_pth_file(self.sitedir, 'foo.pth', set()) + fullname = os.path.join(self.sitedir, 'foo.pth') + self.assertIn(subdir, site._pending_syspaths[fullname]) + + def test_read_pth_file_imports_collected(self): + self._make_pth("import sys\n", name='foo') + site._read_pth_file(self.sitedir, 'foo.pth', set()) + fullname = os.path.join(self.sitedir, 'foo.pth') + self.assertEqual(site._pending_importexecs[fullname], ['import sys']) + + def test_read_pth_file_comments_and_blanks(self): + self._make_pth("# comment\n\n \n", name='foo') + site._read_pth_file(self.sitedir, 'foo.pth', set()) + self.assertEqual(site._pending_syspaths, {}) + self.assertEqual(site._pending_importexecs, {}) + + def test_read_pth_file_deduplication(self): + subdir = os.path.join(self.sitedir, 'mylib') + os.mkdir(subdir) + known_paths = set() + self._make_pth("mylib\n", name='a') + self._make_pth("mylib\n", name='b') + site._read_pth_file(self.sitedir, 'a.pth', known_paths) + site._read_pth_file(self.sitedir, 'b.pth', known_paths) + # Only one entry across both files. + all_dirs = [] + for dirs in site._pending_syspaths.values(): + all_dirs.extend(dirs) + self.assertEqual(all_dirs.count(subdir), 1) + + def test_read_pth_file_bad_line_continues(self): + # PEP 829: errors on individual lines don't abort the file. + subdir = os.path.join(self.sitedir, 'goodpath') + os.mkdir(subdir) + self._make_pth("abc\x00def\ngoodpath\n", name='foo') + with captured_stderr(): + site._read_pth_file(self.sitedir, 'foo.pth', set()) + fullname = os.path.join(self.sitedir, 'foo.pth') + self.assertIn(subdir, site._pending_syspaths.get(fullname, [])) # --- _execute_start_entrypoints tests --- @@ -1015,7 +1090,7 @@ def startup(): sys.path.insert(0, self.sitedir) self.addCleanup(sys.modules.pop, 'epmod', None) fullname = os.path.join(self.sitedir, 'epmod.start') - site._pending_entrypoints.append((fullname, 'epmod:startup')) + site._pending_entrypoints[fullname] = ['epmod:startup'] site._execute_start_entrypoints() import epmod self.assertTrue(epmod.called) @@ -1023,14 +1098,11 @@ def startup(): def test_execute_entrypoints_import_error(self): # Import error prints traceback but continues. fullname = os.path.join(self.sitedir, 'bad.start') - site._pending_entrypoints.append( - (fullname, 'nosuchmodule_xyz:func')) - site._pending_entrypoints.append( - (fullname, 'os.path:join')) + site._pending_entrypoints[fullname] = [ + 'nosuchmodule_xyz:func', 'os.path:join'] with captured_stderr() as err: site._execute_start_entrypoints() self.assertIn('nosuchmodule_xyz', err.getvalue()) - self.assertIn('Traceback', err.getvalue()) # os.path:join should still have been called (no exception for it) def test_execute_entrypoints_callable_error(self): @@ -1046,12 +1118,70 @@ def fail(): sys.path.insert(0, self.sitedir) self.addCleanup(sys.modules.pop, 'badmod', None) fullname = os.path.join(self.sitedir, 'badmod.start') - site._pending_entrypoints.append((fullname, 'badmod:fail')) + site._pending_entrypoints[fullname] = ['badmod:fail'] with captured_stderr() as err: site._execute_start_entrypoints() self.assertIn('RuntimeError', err.getvalue()) self.assertIn('boom', err.getvalue()) + def test_execute_entrypoints_duplicates_called_twice(self): + # PEP 829: duplicate entry points execute multiple times. + mod_dir = os.path.join(self.sitedir, 'countmod') + os.mkdir(mod_dir) + init_file = os.path.join(mod_dir, '__init__.py') + with open(init_file, 'w') as f: + f.write("""\ +call_count = 0 +def bump(): + global call_count + call_count += 1 +""") + sys.path.insert(0, self.sitedir) + self.addCleanup(sys.modules.pop, 'countmod', None) + fullname = os.path.join(self.sitedir, 'countmod.start') + site._pending_entrypoints[fullname] = [ + 'countmod:bump', 'countmod:bump'] + site._execute_start_entrypoints() + import countmod + self.assertEqual(countmod.call_count, 2) + + # --- _exec_imports tests --- + + def test_exec_imports_suppressed_by_matching_start(self): + # Import lines from foo.pth are suppressed when foo.start exists. + pth_fullname = os.path.join(self.sitedir, 'foo.pth') + start_fullname = os.path.join(self.sitedir, 'foo.start') + site._pending_importexecs[pth_fullname] = ['import sys'] + site._pending_entrypoints[start_fullname] = ['os.path:join'] + # Should not exec the import line; no error expected. + site._exec_imports() + + def test_exec_imports_not_suppressed_by_different_start(self): + # Import lines from foo.pth are NOT suppressed by bar.start. + pth_fullname = os.path.join(self.sitedir, 'foo.pth') + start_fullname = os.path.join(self.sitedir, 'bar.start') + site._pending_importexecs[pth_fullname] = ['import sys'] + site._pending_entrypoints[start_fullname] = ['os.path:join'] + # Should execute the import line without error. + site._exec_imports() + + # --- _extend_syspath tests --- + + def test_extend_syspath_existing_dir(self): + subdir = os.path.join(self.sitedir, 'extlib') + os.mkdir(subdir) + site._pending_syspaths['test.pth'] = [subdir] + site._extend_syspath() + self.assertIn(subdir, sys.path) + + def test_extend_syspath_nonexistent_dir(self): + nosuch = os.path.join(self.sitedir, 'nosuchdir') + site._pending_syspaths['test.pth'] = [nosuch] + with captured_stderr() as err: + site._extend_syspath() + self.assertNotIn(nosuch, sys.path) + self.assertIn('does not exist', err.getvalue()) + # --- addsitedir integration tests --- def test_addsitedir_discovers_start_files(self): @@ -1059,24 +1189,21 @@ def test_addsitedir_discovers_start_files(self): self._make_start("os.path:join\n", name='foo') site.addsitedir(self.sitedir, set()) fullname = os.path.join(self.sitedir, 'foo.start') - self.assertIn((fullname, 'os.path:join'), - site._pending_entrypoints) + self.assertIn('os.path:join', site._pending_entrypoints[fullname]) def test_addsitedir_start_suppresses_pth_imports(self): - # When foo.start exists, import lines in foo.pth are silently skipped. + # When foo.start exists, import lines in foo.pth are skipped + # at flush time by _exec_imports(). self._make_start("os.path:join\n", name='foo') self._make_pth("import sys\n", name='foo') - # No DeprecationWarning should be emitted - import warnings - with warnings.catch_warnings(): - warnings.simplefilter("error", DeprecationWarning) - site.addsitedir(self.sitedir, set()) - - def test_addsitedir_pth_import_warns_without_start(self): - # Without a matching .start file, import lines emit DeprecationWarning. - self._make_pth("import sys\n", name='foo') - with self.assertWarns(DeprecationWarning): - site.addsitedir(self.sitedir, set()) + site.addsitedir(self.sitedir, set()) + pth_fullname = os.path.join(self.sitedir, 'foo.pth') + start_fullname = os.path.join(self.sitedir, 'foo.start') + # Import line was collected... + self.assertIn('import sys', + site._pending_importexecs.get(pth_fullname, [])) + # ...but _exec_imports() will skip it because foo.start exists. + site._exec_imports() def test_addsitedir_pth_paths_still_work_with_start(self): # Path lines in .pth files still work even when a .start file exists. @@ -1085,31 +1212,64 @@ def test_addsitedir_pth_paths_still_work_with_start(self): self._make_start("os.path:join\n", name='foo') self._make_pth("mylib\n", name='foo') site.addsitedir(self.sitedir, set()) - self.assertIn(subdir, sys.path) - - def test_addsitedir_start_and_pth_different_names(self): - # Different basenames: .start doesn't suppress .pth import warnings. - self._make_start("os.path:join\n", name='foo') - self._make_pth("import sys\n", name='bar') - with self.assertWarns(DeprecationWarning): - site.addsitedir(self.sitedir, set()) + fullname = os.path.join(self.sitedir, 'foo.pth') + self.assertIn(subdir, site._pending_syspaths.get(fullname, [])) def test_addsitedir_start_alphabetical_order(self): # Multiple .start files are discovered alphabetically. self._make_start("os.path:join\n", name='zzz') self._make_start("os.path:exists\n", name='aaa') site.addsitedir(self.sitedir, set()) - # aaa.start is processed before zzz.start - entries = [entry for _, entry in site._pending_entrypoints] + all_entries = self._all_entrypoints() + entries = [entry for _, entry in all_entries] idx_a = entries.index('os.path:exists') idx_z = entries.index('os.path:join') self.assertLess(idx_a, idx_z) + def test_addsitedir_pth_before_start(self): + # PEP 829: .pth files are scanned before .start files. + # Create a .pth and .start with the same basename; verify + # the .pth data is collected before .start data. + subdir = os.path.join(self.sitedir, 'mylib') + os.mkdir(subdir) + self._make_pth("mylib\n", name='foo') + self._make_start("os.path:join\n", name='foo') + site.addsitedir(self.sitedir, set()) + # Both should be collected. + pth_fullname = os.path.join(self.sitedir, 'foo.pth') + start_fullname = os.path.join(self.sitedir, 'foo.start') + self.assertIn(subdir, site._pending_syspaths.get(pth_fullname, [])) + self.assertIn('os.path:join', + site._pending_entrypoints.get(start_fullname, [])) + def test_addsitedir_dotfile_start_ignored(self): # .start files starting with '.' are skipped. self._make_start("os.path:join\n", name='.hidden') site.addsitedir(self.sitedir, set()) - self.assertEqual(site._pending_entrypoints, []) + self.assertEqual(site._pending_entrypoints, {}) + + def test_addsitedir_standalone_flushes(self): + # When called with known_paths=None (standalone), addsitedir + # flushes immediately so the caller sees the effect. + subdir = os.path.join(self.sitedir, 'flushlib') + os.mkdir(subdir) + self._make_pth("flushlib\n", name='foo') + site.addsitedir(self.sitedir) # known_paths=None + self.assertIn(subdir, sys.path) + # Pending dicts should be cleared after flush. + self.assertEqual(site._pending_syspaths, {}) + + def test_addsitedir_internal_does_not_flush(self): + # When called with a known_paths set, addsitedir accumulates + # but does not flush. + subdir = os.path.join(self.sitedir, 'acclib') + os.mkdir(subdir) + self._make_pth("acclib\n", name='foo') + site.addsitedir(self.sitedir, set()) + # Path is pending, not yet on sys.path. + self.assertNotIn(subdir, sys.path) + fullname = os.path.join(self.sitedir, 'foo.pth') + self.assertIn(subdir, site._pending_syspaths.get(fullname, [])) if __name__ == "__main__": From e1eaffb0a2e70c91437e48c7c7b999838206513a Mon Sep 17 00:00:00 2001 From: Barry Warsaw Date: Wed, 15 Apr 2026 16:40:48 -0700 Subject: [PATCH 08/12] Trim the out-of-date site.py docstring --- Lib/site.py | 70 +++++++++++++++-------------------------------------- 1 file changed, 19 insertions(+), 51 deletions(-) diff --git a/Lib/site.py b/Lib/site.py index d84b557be98da6..df8db33e30343b 100644 --- a/Lib/site.py +++ b/Lib/site.py @@ -18,57 +18,25 @@ it is also checked for site-packages (sys.base_prefix and sys.base_exec_prefix will always be the "real" prefixes of the Python installation). If "pyvenv.cfg" (a bootstrap configuration file) contains -the key "include-system-site-packages" is set to "true" -(case-insensitive), the system-level prefixes will still also be -searched for site-packages; otherwise they won't. If the system-level -prefixes are not included then the user site prefixes are also implicitly -not searched for site-packages. - -All of the resulting site-specific directories, if they exist, are -appended to sys.path, and also inspected for path configuration -files. - -A path configuration file is a file whose name has the form -.pth; its contents are additional directories (one per line) -to be added to sys.path. Non-existing directories (or -non-directories) are never added to sys.path; no directory is added to -sys.path more than once. Blank lines and lines beginning with -'#' are skipped. Lines starting with 'import' are executed. - -For example, suppose sys.prefix and sys.exec_prefix are set to -/usr/local and there is a directory /usr/local/lib/python2.5/site-packages -with three subdirectories, foo, bar and spam, and two path -configuration files, foo.pth and bar.pth. Assume foo.pth contains the -following: - - # foo package configuration - foo - bar - bletch - -and bar.pth contains: - - # bar package configuration - bar - -Then the following directories are added to sys.path, in this order: - - /usr/local/lib/python2.5/site-packages/bar - /usr/local/lib/python2.5/site-packages/foo - -Note that bletch is omitted because it doesn't exist; bar precedes foo -because bar.pth comes alphabetically before foo.pth; and spam is -omitted because it is not mentioned in either path configuration file. - -The readline module is also automatically configured to enable -completion for systems that support it. This can be overridden in -sitecustomize, usercustomize or PYTHONSTARTUP. Starting Python in -isolated mode (-I) disables automatic readline configuration. - -After these operations, an attempt is made to import a module -named sitecustomize, which can perform arbitrary additional -site-specific customizations. If this import fails with an -ImportError exception, it is silently ignored. +the key "include-system-site-packages" set to "true" (case-insensitive), +the system-level prefixes will still also be searched for site-packages; +otherwise they won't. + +Two kinds of configuration files are processed in each site-packages +directory: + +- .pth files extend sys.path with additional directories (one per + line). Lines starting with "import" are deprecated (see PEP 829). + +- .start files specify startup entry points using the + pkg.mod:callable syntax. These are resolved via pkgutil.resolve_name() + and called with no arguments. + +All .pth path extensions are applied before any .start entry points are +executed, ensuring that paths are available before startup code runs. + +See the documentation for the site module for full details: +https://docs.python.org/3/library/site.html """ import sys From caab85cd66d8600536cd14fd0ca5d8e9c649113a Mon Sep 17 00:00:00 2001 From: Barry Warsaw Date: Wed, 15 Apr 2026 16:46:03 -0700 Subject: [PATCH 09/12] Fix some tests --- Lib/test/test_site.py | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/Lib/test/test_site.py b/Lib/test/test_site.py index d45eb067eaed78..a8c83405794297 100644 --- a/Lib/test/test_site.py +++ b/Lib/test/test_site.py @@ -177,24 +177,22 @@ def test_addpackage_empty_lines(self): def test_addpackage_import_bad_pth_file(self): # Issue 5258 + # A .pth line with null bytes should not add anything to sys.path. pth_dir, pth_fn = self.make_pth("abc\x00def\n") - with captured_stderr() as err_out: - self.assertFalse(site.addpackage(pth_dir, pth_fn, set())) - self.maxDiff = None - self.assertEqual(err_out.getvalue(), "") + site.addpackage(pth_dir, pth_fn, set()) for path in sys.path: if isinstance(path, str): self.assertNotIn("abc\x00def", path) def test_addsitedir(self): - # Same tests for test_addpackage since addsitedir() essentially just - # calls _read_pth_file() for every .pth file in the directory + # addsitedir() reads .pth files and, when called standalone + # (known_paths=None), flushes paths and import lines immediately. pth_file = PthFile() pth_file.cleanup(prep=True) # Make sure that nothing is pre-existing # that is tested for try: pth_file.create() - site.addsitedir(pth_file.base_dir, set()) + site.addsitedir(pth_file.base_dir) self.pth_file_tests(pth_file) finally: pth_file.cleanup() From e393af01920576ca289cea328e2f144c2d879c27 Mon Sep 17 00:00:00 2001 From: Barry Warsaw Date: Wed, 15 Apr 2026 16:49:16 -0700 Subject: [PATCH 10/12] A couple more improvements --- Doc/library/site.rst | 1 + Lib/test/test_site.py | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/Doc/library/site.rst b/Doc/library/site.rst index c6ee449d8cfe45..ff232807da2aa2 100644 --- a/Doc/library/site.rst +++ b/Doc/library/site.rst @@ -372,5 +372,6 @@ value greater than 2 if there is an error. .. seealso:: * :pep:`370` -- Per user site-packages directory + * :pep:`829` -- Startup entry points and the deprecation of import lines in ``.pth`` files * :ref:`sys-path-init` -- The initialization of :data:`sys.path`. diff --git a/Lib/test/test_site.py b/Lib/test/test_site.py index a8c83405794297..2c6ac4ca0c26cb 100644 --- a/Lib/test/test_site.py +++ b/Lib/test/test_site.py @@ -1005,7 +1005,8 @@ def test_read_start_file_comments_only(self): self.assertEqual(site._pending_entrypoints, {}) def test_read_start_file_nonexistent(self): - site._read_start_file(self.tmpdir, 'nonexistent.start') + with captured_stderr(): + site._read_start_file(self.tmpdir, 'nonexistent.start') self.assertEqual(site._pending_entrypoints, {}) @unittest.skipUnless(hasattr(os, 'chflags'), 'test needs os.chflags()') From 9b0b9773966019023cebb2d3a66a2cd56e17d6df Mon Sep 17 00:00:00 2001 From: Barry Warsaw Date: Wed, 15 Apr 2026 21:11:01 -0700 Subject: [PATCH 11/12] Remove unused import --- Lib/test/test_site.py | 1 - 1 file changed, 1 deletion(-) diff --git a/Lib/test/test_site.py b/Lib/test/test_site.py index 2c6ac4ca0c26cb..5d8df4cf486307 100644 --- a/Lib/test/test_site.py +++ b/Lib/test/test_site.py @@ -19,7 +19,6 @@ import glob import io import os -import re import shutil import stat import subprocess From 1804f68138e7ca63ae42cfd7bbe16886e6b4f88f Mon Sep 17 00:00:00 2001 From: Barry Warsaw Date: Wed, 15 Apr 2026 21:47:41 -0700 Subject: [PATCH 12/12] Blurb It --- .../Library/2026-04-15-21-46-52.gh-issue-148641.-aoFyC.rst | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 Misc/NEWS.d/next/Library/2026-04-15-21-46-52.gh-issue-148641.-aoFyC.rst diff --git a/Misc/NEWS.d/next/Library/2026-04-15-21-46-52.gh-issue-148641.-aoFyC.rst b/Misc/NEWS.d/next/Library/2026-04-15-21-46-52.gh-issue-148641.-aoFyC.rst new file mode 100644 index 00000000000000..53779676fd100c --- /dev/null +++ b/Misc/NEWS.d/next/Library/2026-04-15-21-46-52.gh-issue-148641.-aoFyC.rst @@ -0,0 +1,3 @@ +:pep:`829` (package startup configuration files) implements a new format +``.start`` parallel to ``.pth`` files, to replace ``import`` +lines in the latter.