From 175764e62cfa00329b3f1914451c87d3ad227a89 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bern=C3=A1t=20G=C3=A1bor?= Date: Tue, 2 Jun 2026 08:04:41 -0700 Subject: [PATCH 1/5] gh-150821: Skip URL parsing in mimetypes.guess_type() for file paths guess_type() parsed every argument as a URL before checking the extension, even for plain file paths that have no scheme. Detect the no-scheme case and go straight to extension lookup, avoiding urlparse() and its lazy import. Real URLs keep the full parsing path; results are unchanged. --- Lib/mimetypes.py | 6 +++++- .../Library/2026-06-02-15-45-02.gh-issue-150821.yKimWm.rst | 2 ++ 2 files changed, 7 insertions(+), 1 deletion(-) create mode 100644 Misc/NEWS.d/next/Library/2026-06-02-15-45-02.gh-issue-150821.yKimWm.rst diff --git a/Lib/mimetypes.py b/Lib/mimetypes.py index 15e8c0a437bfd93..3382dda34bfb2fb 100644 --- a/Lib/mimetypes.py +++ b/Lib/mimetypes.py @@ -123,10 +123,14 @@ def guess_type(self, url, strict=True): """ # Lazy import to improve module import time import os - import urllib.parse # TODO: Deprecate accepting file paths (in particular path-like objects). url = os.fspath(url) + # A URL scheme requires a ':'; a plain file path (the common case) has + # none, so skip the relatively expensive urlparse() for it. + if isinstance(url, str) and ':' not in url: + return self.guess_file_type(url, strict=strict) + import urllib.parse p = urllib.parse.urlparse(url) if p.scheme and len(p.scheme) > 1: scheme = p.scheme diff --git a/Misc/NEWS.d/next/Library/2026-06-02-15-45-02.gh-issue-150821.yKimWm.rst b/Misc/NEWS.d/next/Library/2026-06-02-15-45-02.gh-issue-150821.yKimWm.rst new file mode 100644 index 000000000000000..fc40f8304fe5dc4 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2026-06-02-15-45-02.gh-issue-150821.yKimWm.rst @@ -0,0 +1,2 @@ +Speed up :func:`mimetypes.guess_type` for plain file paths by skipping URL +parsing when the argument has no scheme. Patch by Bernát Gábor. From 2ac8c38abf298b276faf538aaa9e43bdb55b6e24 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bern=C3=A1t=20G=C3=A1bor?= Date: Wed, 3 Jun 2026 07:05:03 -0700 Subject: [PATCH 2/5] Reword comment, tidy blank line, test colon-path branch Address review: frame the fast path as 'no colon means it cannot be a URL' (a file path may legitimately contain ':' on POSIX), add the blank line before the lazy import, and cover a ':'-containing argument that is not a URL (single-letter drive, colon in the name) reaching the file path branch. --- Lib/mimetypes.py | 5 +++-- Lib/test/test_mimetypes.py | 9 +++++++++ 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/Lib/mimetypes.py b/Lib/mimetypes.py index 3382dda34bfb2fb..8c4e78f53697f76 100644 --- a/Lib/mimetypes.py +++ b/Lib/mimetypes.py @@ -126,10 +126,11 @@ def guess_type(self, url, strict=True): # TODO: Deprecate accepting file paths (in particular path-like objects). url = os.fspath(url) - # A URL scheme requires a ':'; a plain file path (the common case) has - # none, so skip the relatively expensive urlparse() for it. + # Without a ':' the argument cannot carry a URL scheme, so it cannot + # be a URL; skip the relatively expensive urlparse() in that case. if isinstance(url, str) and ':' not in url: return self.guess_file_type(url, strict=strict) + import urllib.parse p = urllib.parse.urlparse(url) if p.scheme and len(p.scheme) > 1: diff --git a/Lib/test/test_mimetypes.py b/Lib/test/test_mimetypes.py index 1a3b49b87b121f2..4ed82fa164a379b 100644 --- a/Lib/test/test_mimetypes.py +++ b/Lib/test/test_mimetypes.py @@ -375,6 +375,15 @@ def test_url(self): result = self.db.guess_type('http://example.com/host.html?q=x.tar') self.assertSequenceEqual(result, ('text/html', None)) + def test_path_with_colon_but_no_url_scheme(self): + # A ':' that does not introduce a real URL scheme -- a single-letter + # Windows drive, or a colon elsewhere in the name -- is treated as a + # file path rather than a URL. + eq = self.assertSequenceEqual + eq(self.db.guess_type("c:fake.html"), ("text/html", None)) + eq(self.db.guess_type(r"c:\dir\fake.html"), ("text/html", None)) + eq(self.db.guess_type("note 12:30.txt"), ("text/plain", None)) + def test_guess_all_types(self): # First try strict. Use a set here for testing the results because if # test_urllib2 is run before test_mimetypes, global state is modified From eb9d878260e908b571b3986809fa23285f7fe4f4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bern=C3=A1t=20G=C3=A1bor?= Date: Wed, 3 Jun 2026 08:07:10 -0700 Subject: [PATCH 3/5] Use a module-level lazy import for urllib.parse Replace the in-function import with a top-level lazy import, keeping urllib.parse off the module import path while declaring it with the other imports. --- Lib/mimetypes.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Lib/mimetypes.py b/Lib/mimetypes.py index 8c4e78f53697f76..3165ed8ccaa5091 100644 --- a/Lib/mimetypes.py +++ b/Lib/mimetypes.py @@ -33,6 +33,8 @@ except ImportError: _winreg = None +lazy import urllib.parse + __all__ = [ "knownfiles", "inited", "MimeTypes", "guess_type", "guess_file_type", "guess_all_extensions", "guess_extension", @@ -131,7 +133,6 @@ def guess_type(self, url, strict=True): if isinstance(url, str) and ':' not in url: return self.guess_file_type(url, strict=strict) - import urllib.parse p = urllib.parse.urlparse(url) if p.scheme and len(p.scheme) > 1: scheme = p.scheme From a05f1d19905c4513e7b59f96bdc38eb080095261 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bern=C3=A1t=20G=C3=A1bor?= Date: Wed, 3 Jun 2026 08:46:05 -0700 Subject: [PATCH 4/5] Re-run CI (flaky test_ssl on macOS) From e4aad498b1d12211d06d480db95df0dd6af29f36 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bern=C3=A1t=20G=C3=A1bor?= Date: Wed, 3 Jun 2026 12:44:25 -0700 Subject: [PATCH 5/5] Hoist os, posixpath and urllib.parse to module-level lazy imports Per review: place the lazy imports at the top of the module with the other imports, above the platform try/except blocks, instead of inside the functions, and drop the now-redundant 'Lazy import to improve module import time' comments. --- Lib/mimetypes.py | 18 ++++-------------- 1 file changed, 4 insertions(+), 14 deletions(-) diff --git a/Lib/mimetypes.py b/Lib/mimetypes.py index 3165ed8ccaa5091..a3b6ca25743ff1f 100644 --- a/Lib/mimetypes.py +++ b/Lib/mimetypes.py @@ -23,6 +23,10 @@ read_mime_types(file) -- parse one file, return a dictionary or None """ +lazy import os +lazy import posixpath +lazy import urllib.parse + try: from _winapi import _mimetypes_read_windows_registry except ImportError: @@ -33,8 +37,6 @@ except ImportError: _winreg = None -lazy import urllib.parse - __all__ = [ "knownfiles", "inited", "MimeTypes", "guess_type", "guess_file_type", "guess_all_extensions", "guess_extension", @@ -123,9 +125,6 @@ def guess_type(self, url, strict=True): Optional 'strict' argument when False adds a bunch of commonly found, but non-standard types. """ - # Lazy import to improve module import time - import os - # TODO: Deprecate accepting file paths (in particular path-like objects). url = os.fspath(url) # Without a ':' the argument cannot carry a URL scheme, so it cannot @@ -159,9 +158,6 @@ def guess_type(self, url, strict=True): type = 'text/plain' return type, None # never compressed, so encoding is None - # Lazy import to improve module import time - import posixpath - return self._guess_file_type(url, strict, posixpath.splitext) def guess_file_type(self, path, *, strict=True): @@ -169,9 +165,6 @@ def guess_file_type(self, path, *, strict=True): Similar to guess_type(), but takes file path instead of URL. """ - # Lazy import to improve module import time - import os - path = os.fsdecode(path) path = os.path.splitdrive(path)[1] return self._guess_file_type(path, strict, os.path.splitext) @@ -418,9 +411,6 @@ def init(files=None): else: db = _db - # Lazy import to improve module import time - import os - for file in files: if os.path.isfile(file): db.read(file)