diff --git a/Lib/mimetypes.py b/Lib/mimetypes.py index 15e8c0a437bfd9..a3b6ca25743ff1 100644 --- a/Lib/mimetypes.py +++ b/Lib/mimetypes.py @@ -23,6 +23,10 @@ read_mime_types(file) -- parse one file, return a dictionary or None """ +lazy import os +lazy import posixpath +lazy import urllib.parse + try: from _winapi import _mimetypes_read_windows_registry except ImportError: @@ -121,12 +125,13 @@ def guess_type(self, url, strict=True): Optional 'strict' argument when False adds a bunch of commonly found, but non-standard types. """ - # Lazy import to improve module import time - import os - import urllib.parse - # TODO: Deprecate accepting file paths (in particular path-like objects). url = os.fspath(url) + # Without a ':' the argument cannot carry a URL scheme, so it cannot + # be a URL; skip the relatively expensive urlparse() in that case. + if isinstance(url, str) and ':' not in url: + return self.guess_file_type(url, strict=strict) + p = urllib.parse.urlparse(url) if p.scheme and len(p.scheme) > 1: scheme = p.scheme @@ -153,9 +158,6 @@ def guess_type(self, url, strict=True): type = 'text/plain' return type, None # never compressed, so encoding is None - # Lazy import to improve module import time - import posixpath - return self._guess_file_type(url, strict, posixpath.splitext) def guess_file_type(self, path, *, strict=True): @@ -163,9 +165,6 @@ def guess_file_type(self, path, *, strict=True): Similar to guess_type(), but takes file path instead of URL. """ - # Lazy import to improve module import time - import os - path = os.fsdecode(path) path = os.path.splitdrive(path)[1] return self._guess_file_type(path, strict, os.path.splitext) @@ -412,9 +411,6 @@ def init(files=None): else: db = _db - # Lazy import to improve module import time - import os - for file in files: if os.path.isfile(file): db.read(file) diff --git a/Lib/test/test_mimetypes.py b/Lib/test/test_mimetypes.py index 1a3b49b87b121f..4ed82fa164a379 100644 --- a/Lib/test/test_mimetypes.py +++ b/Lib/test/test_mimetypes.py @@ -375,6 +375,15 @@ def test_url(self): result = self.db.guess_type('http://example.com/host.html?q=x.tar') self.assertSequenceEqual(result, ('text/html', None)) + def test_path_with_colon_but_no_url_scheme(self): + # A ':' that does not introduce a real URL scheme -- a single-letter + # Windows drive, or a colon elsewhere in the name -- is treated as a + # file path rather than a URL. + eq = self.assertSequenceEqual + eq(self.db.guess_type("c:fake.html"), ("text/html", None)) + eq(self.db.guess_type(r"c:\dir\fake.html"), ("text/html", None)) + eq(self.db.guess_type("note 12:30.txt"), ("text/plain", None)) + def test_guess_all_types(self): # First try strict. Use a set here for testing the results because if # test_urllib2 is run before test_mimetypes, global state is modified diff --git a/Misc/NEWS.d/next/Library/2026-06-02-15-45-02.gh-issue-150821.yKimWm.rst b/Misc/NEWS.d/next/Library/2026-06-02-15-45-02.gh-issue-150821.yKimWm.rst new file mode 100644 index 00000000000000..fc40f8304fe5dc --- /dev/null +++ b/Misc/NEWS.d/next/Library/2026-06-02-15-45-02.gh-issue-150821.yKimWm.rst @@ -0,0 +1,2 @@ +Speed up :func:`mimetypes.guess_type` for plain file paths by skipping URL +parsing when the argument has no scheme. Patch by Bernát Gábor.