Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
53 changes: 52 additions & 1 deletion google/genai/_extra_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -603,6 +603,57 @@ def append_chunk_contents(
return contents


# application/* MIME types the Gemini API accepts even for textual content, so
# they should not be downgraded to text/plain.
_TEXT_COMPATIBLE_APPLICATION_MIME_TYPES = frozenset({
'application/json',
'application/xml',
'application/rtf',
})


def _is_utf8_text_file(fs_path: str, sample_size: int = 8192) -> bool:
"""Returns True if the file starts with UTF-8 text and no NUL bytes."""
try:
with open(fs_path, 'rb') as f:
sample = f.read(sample_size)
except OSError:
return False
if b'\x00' in sample:
return False
try:
sample.decode('utf-8')
except UnicodeDecodeError as e:
# Tolerate a multi-byte character split across the sample boundary.
return e.start >= len(sample) - 3
return True


def _resolve_upload_mime_type(fs_path: str) -> Optional[str]:
"""Guesses the upload MIME type, falling back to text/plain for text files.

mimetypes maps many source extensions to types the API rejects (e.g. .cu ->
application/cu-seeme); see github.com/googleapis/python-genai/issues/744.
"""
mime_type, _ = mimetypes.guess_type(fs_path)
if mime_type is not None:
main_type, _, sub_type = mime_type.partition('/')
if main_type in ('image', 'audio', 'video'):
return mime_type
if main_type == 'text' and not (
sub_type.startswith('x-') or sub_type.startswith('vnd.')
):
return mime_type
if main_type == 'application' and (
mime_type in _TEXT_COMPATIBLE_APPLICATION_MIME_TYPES
or not _is_utf8_text_file(fs_path)
):
return mime_type
if _is_utf8_text_file(fs_path):
return 'text/plain'
return mime_type


def prepare_resumable_upload(
file: Union[str, os.PathLike[str], io.IOBase],
user_http_options: Optional[types.HttpOptionsOrDict] = None,
Expand Down Expand Up @@ -639,7 +690,7 @@ def prepare_resumable_upload(
raise FileNotFoundError(f'{file} is not a valid file path.')
size_bytes = os.path.getsize(fs_path)
if mime_type is None:
mime_type, _ = mimetypes.guess_type(fs_path)
mime_type = _resolve_upload_mime_type(fs_path)
if mime_type is None:
raise ValueError(
'Unknown mime type: Could not determine the mimetype for your'
Expand Down
126 changes: 126 additions & 0 deletions google/genai/tests/files/test_upload_mime_type.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
# Copyright 2025 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#


"""Tests for upload MIME type resolution (issue #744)."""


import pytest

from ... import _extra_utils


# PNG file signature followed by some bytes, used to exercise the binary path.
_PNG_HEADER = b'\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR'


def _write(tmp_path, name, data):
path = tmp_path / name
path.write_bytes(data)
return str(path)


@pytest.mark.parametrize(
'guessed',
[
'application/cu-seeme', # .cu (the case reported in #744)
'text/x-python', # .py
'text/vnd.trolltech.linguist', # .ts
None, # unknown extension
],
)
def test_textual_file_with_unsupported_guess_falls_back_to_text_plain(
tmp_path, monkeypatch, guessed
):
path = _write(tmp_path, 'source.bin', b'int main() { return 0; }\n')
monkeypatch.setattr(
_extra_utils.mimetypes, 'guess_type', lambda *a, **k: (guessed, None)
)

assert _extra_utils._resolve_upload_mime_type(path) == 'text/plain'


def test_standard_text_type_is_preserved(tmp_path, monkeypatch):
path = _write(tmp_path, 'doc.md', b'# title\n')
monkeypatch.setattr(
_extra_utils.mimetypes, 'guess_type', lambda *a, **k: ('text/markdown', None)
)

assert _extra_utils._resolve_upload_mime_type(path) == 'text/markdown'


def test_text_compatible_application_type_is_preserved(tmp_path, monkeypatch):
path = _write(tmp_path, 'data.json', b'{"a": 1}\n')
monkeypatch.setattr(
_extra_utils.mimetypes, 'guess_type', lambda *a, **k: ('application/json', None)
)

assert _extra_utils._resolve_upload_mime_type(path) == 'application/json'


def test_binary_file_keeps_guessed_type(tmp_path, monkeypatch):
path = _write(tmp_path, 'image.png', _PNG_HEADER)
monkeypatch.setattr(
_extra_utils.mimetypes, 'guess_type', lambda *a, **k: ('image/png', None)
)

assert _extra_utils._resolve_upload_mime_type(path) == 'image/png'


def test_binary_file_with_unknown_guess_is_left_unset(tmp_path, monkeypatch):
# A binary file whose type cannot be guessed must NOT be coerced to text.
path = _write(tmp_path, 'blob.bin', _PNG_HEADER + b'\x00\x01\x02')
monkeypatch.setattr(
_extra_utils.mimetypes, 'guess_type', lambda *a, **k: (None, None)
)

assert _extra_utils._resolve_upload_mime_type(path) is None


def test_prepare_resumable_upload_uses_text_plain_for_unsupported_text(
tmp_path, monkeypatch
):
path = _write(tmp_path, 'kernel.cu', b'__global__ void k() {}\n')
monkeypatch.setattr(
_extra_utils.mimetypes,
'guess_type',
lambda *a, **k: ('application/cu-seeme', None),
)

http_options, size_bytes, mime_type = _extra_utils.prepare_resumable_upload(
path
)

assert mime_type == 'text/plain'
assert size_bytes > 0
assert (
http_options.headers['X-Goog-Upload-Header-Content-Type'] == 'text/plain'
)


def test_user_provided_mime_type_takes_precedence(tmp_path, monkeypatch):
path = _write(tmp_path, 'kernel.cu', b'__global__ void k() {}\n')
# Should never be consulted when the user passes mime_type explicitly.
monkeypatch.setattr(
_extra_utils.mimetypes,
'guess_type',
lambda *a, **k: pytest.fail('guess_type should not be called'),
)

_, _, mime_type = _extra_utils.prepare_resumable_upload(
path, user_mime_type='text/x-cuda'
)

assert mime_type == 'text/x-cuda'
Loading