From 41bde7aad26fcce500709d4c04abb81a74e5bd72 Mon Sep 17 00:00:00 2001 From: Shashi Kumar Reddy Date: Sun, 14 Jun 2026 00:40:30 +0530 Subject: [PATCH 1/2] fix(files): fall back to text/plain for unsupported text mime types (#744) --- google/genai/_extra_utils.py | 53 +++++++- .../tests/files/test_upload_mime_type.py | 126 ++++++++++++++++++ 2 files changed, 178 insertions(+), 1 deletion(-) create mode 100644 google/genai/tests/files/test_upload_mime_type.py diff --git a/google/genai/_extra_utils.py b/google/genai/_extra_utils.py index da91c56c0..9e5d243aa 100644 --- a/google/genai/_extra_utils.py +++ b/google/genai/_extra_utils.py @@ -603,6 +603,57 @@ def append_chunk_contents( return contents +# application/* MIME types the Gemini API accepts even for textual content, so +# they should not be downgraded to text/plain. +_TEXT_COMPATIBLE_APPLICATION_MIME_TYPES = frozenset({ + 'application/json', + 'application/xml', + 'application/rtf', +}) + + +def _is_utf8_text_file(fs_path: str, sample_size: int = 8192) -> bool: + """Returns True if the file starts with UTF-8 text and no NUL bytes.""" + try: + with open(fs_path, 'rb') as f: + sample = f.read(sample_size) + except OSError: + return False + if b'\x00' in sample: + return False + try: + sample.decode('utf-8') + except UnicodeDecodeError as e: + # Tolerate a multi-byte character split across the sample boundary. + return e.start >= len(sample) - 3 + return True + + +def _resolve_upload_mime_type(fs_path: str) -> Optional[str]: + """Guesses the upload MIME type, falling back to text/plain for text files. + + mimetypes maps many source extensions to types the API rejects (e.g. .cu -> + application/cu-seeme); see github.com/googleapis/python-genai/issues/744. + """ + mime_type, _ = mimetypes.guess_type(fs_path) + if mime_type is not None: + main_type, _, sub_type = mime_type.partition('/') + if main_type in ('image', 'audio', 'video'): + return mime_type + if main_type == 'text' and not ( + sub_type.startswith('x-') or sub_type.startswith('vnd.') + ): + return mime_type + if main_type == 'application' and ( + mime_type in _TEXT_COMPATIBLE_APPLICATION_MIME_TYPES + or not _is_utf8_text_file(fs_path) + ): + return mime_type + if _is_utf8_text_file(fs_path): + return 'text/plain' + return mime_type + + def prepare_resumable_upload( file: Union[str, os.PathLike[str], io.IOBase], user_http_options: Optional[types.HttpOptionsOrDict] = None, @@ -639,7 +690,7 @@ def prepare_resumable_upload( raise FileNotFoundError(f'{file} is not a valid file path.') size_bytes = os.path.getsize(fs_path) if mime_type is None: - mime_type, _ = mimetypes.guess_type(fs_path) + mime_type = _resolve_upload_mime_type(fs_path) if mime_type is None: raise ValueError( 'Unknown mime type: Could not determine the mimetype for your' diff --git a/google/genai/tests/files/test_upload_mime_type.py b/google/genai/tests/files/test_upload_mime_type.py new file mode 100644 index 000000000..35b13045a --- /dev/null +++ b/google/genai/tests/files/test_upload_mime_type.py @@ -0,0 +1,126 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + + +"""Tests for upload MIME type resolution (issue #744).""" + + +import pytest + +from ... import _extra_utils + + +# PNG file signature followed by some bytes, used to exercise the binary path. +_PNG_HEADER = b'\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR' + + +def _write(tmp_path, name, data): + path = tmp_path / name + path.write_bytes(data) + return str(path) + + +@pytest.mark.parametrize( + 'guessed', + [ + 'application/cu-seeme', # .cu (the case reported in #744) + 'text/x-python', # .py + 'text/vnd.trolltech.linguist', # .ts + None, # unknown extension + ], +) +def test_textual_file_with_unsupported_guess_falls_back_to_text_plain( + tmp_path, monkeypatch, guessed +): + path = _write(tmp_path, 'source.bin', b'int main() { return 0; }\n') + monkeypatch.setattr( + _extra_utils.mimetypes, 'guess_type', lambda *a, **k: (guessed, None) + ) + + assert _extra_utils._resolve_upload_mime_type(path) == 'text/plain' + + +def test_standard_text_type_is_preserved(tmp_path, monkeypatch): + path = _write(tmp_path, 'doc.md', b'# title\n') + monkeypatch.setattr( + _extra_utils.mimetypes, 'guess_type', lambda *a, **k: ('text/markdown', None) + ) + + assert _extra_utils._resolve_upload_mime_type(path) == 'text/markdown' + + +def test_text_compatible_application_type_is_preserved(tmp_path, monkeypatch): + path = _write(tmp_path, 'data.json', b'{"a": 1}\n') + monkeypatch.setattr( + _extra_utils.mimetypes, 'guess_type', lambda *a, **k: ('application/json', None) + ) + + assert _extra_utils._resolve_upload_mime_type(path) == 'application/json' + + +def test_binary_file_keeps_guessed_type(tmp_path, monkeypatch): + path = _write(tmp_path, 'image.png', _PNG_HEADER) + monkeypatch.setattr( + _extra_utils.mimetypes, 'guess_type', lambda *a, **k: ('image/png', None) + ) + + assert _extra_utils._resolve_upload_mime_type(path) == 'image/png' + + +def test_binary_file_with_unknown_guess_is_left_unset(tmp_path, monkeypatch): + # A binary file whose type cannot be guessed must NOT be coerced to text. + path = _write(tmp_path, 'blob.bin', _PNG_HEADER + b'\x00\x01\x02') + monkeypatch.setattr( + _extra_utils.mimetypes, 'guess_type', lambda *a, **k: (None, None) + ) + + assert _extra_utils._resolve_upload_mime_type(path) is None + + +def test_prepare_resumable_upload_uses_text_plain_for_unsupported_text( + tmp_path, monkeypatch +): + path = _write(tmp_path, 'kernel.cu', b'__global__ void k() {}\n') + monkeypatch.setattr( + _extra_utils.mimetypes, + 'guess_type', + lambda *a, **k: ('application/cu-seeme', None), + ) + + http_options, size_bytes, mime_type = _extra_utils.prepare_resumable_upload( + path + ) + + assert mime_type == 'text/plain' + assert size_bytes > 0 + assert ( + http_options.headers['X-Goog-Upload-Header-Content-Type'] == 'text/plain' + ) + + +def test_user_provided_mime_type_takes_precedence(tmp_path, monkeypatch): + path = _write(tmp_path, 'kernel.cu', b'__global__ void k() {}\n') + # Should never be consulted when the user passes mime_type explicitly. + monkeypatch.setattr( + _extra_utils.mimetypes, + 'guess_type', + lambda *a, **k: pytest.fail('guess_type should not be called'), + ) + + _, _, mime_type = _extra_utils.prepare_resumable_upload( + path, user_mime_type='text/x-cuda' + ) + + assert mime_type == 'text/x-cuda' From 992c29b9d1a91f53a2591c4c312fb16abeb271a1 Mon Sep 17 00:00:00 2001 From: Shashi Kumar Reddy Date: Sun, 14 Jun 2026 00:52:36 +0530 Subject: [PATCH 2/2] chore: re-trigger CI