From 75e047a4ca9f96e1faf5bc67b5a9df1d96f3f830 Mon Sep 17 00:00:00 2001 From: Andrea Fasoli Date: Mon, 11 May 2026 21:59:15 -0400 Subject: [PATCH 1/7] update auth token argument Signed-off-by: Andrea Fasoli --- fms_mo/dq.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/fms_mo/dq.py b/fms_mo/dq.py index eb49bc30..e7d644a2 100644 --- a/fms_mo/dq.py +++ b/fms_mo/dq.py @@ -88,8 +88,7 @@ def run_dq(model_args, data_args, opt_args, fms_mo_args): config_kwargs = { "cache_dir": model_args.cache_dir, "revision": model_args.model_revision, - "use_auth_token": True if model_args.use_auth_token else None, - "torchscript": True, + "token": True if model_args.use_auth_token else None, "attn_implementation": attn_implementation, } config = AutoConfig.from_pretrained(model_args.model_name_or_path, **config_kwargs) @@ -97,7 +96,7 @@ def run_dq(model_args, data_args, opt_args, fms_mo_args): "cache_dir": model_args.cache_dir, "use_fast": model_args.use_fast_tokenizer, "revision": model_args.model_revision, - "use_auth_token": True if model_args.use_auth_token else None, + "token": True if model_args.use_auth_token else None, } tokenizer = AutoTokenizer.from_pretrained( model_args.model_name_or_path, **tokenizer_kwargs @@ -121,7 +120,7 @@ def run_dq(model_args, data_args, opt_args, fms_mo_args): config=config, cache_dir=model_args.cache_dir, revision="main", - use_auth_token=True if model_args.use_auth_token else None, + token=True if model_args.use_auth_token else None, torch_dtype=torch_dtype, device_map=model_args.device_map, low_cpu_mem_usage=bool(model_args.device_map), From 2aa5f84b090ff002d62c1becac6958dee53da2f9 Mon Sep 17 00:00:00 2001 From: Andrea Fasoli Date: Mon, 11 May 2026 21:59:57 -0400 Subject: [PATCH 2/7] update torchscript arg to BertModel Signed-off-by: Andrea Fasoli --- tests/models/conftest.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/tests/models/conftest.py b/tests/models/conftest.py index 0a831cc8..e88bd6bc 100644 --- a/tests/models/conftest.py +++ b/tests/models/conftest.py @@ -39,6 +39,7 @@ import pytest import torch import torch.nn.functional as F +import transformers # Local # fms_mo imports @@ -1302,6 +1303,10 @@ def model_bert(): Returns: transformers.models.bert.modeling_bert.BertModel: BERT model """ + # torchscript parameter removed in transformers 5.0 + transformers_version = tuple(int(x) for x in transformers.__version__.split(".")[:2]) + if transformers_version >= (5, 0): + return BertModel.from_pretrained("google-bert/bert-base-uncased") return BertModel.from_pretrained("google-bert/bert-base-uncased", torchscript=True) @@ -1313,6 +1318,12 @@ def model_bert_eager(): Returns: transformers.models.bert.modeling_bert.BertModel: BERT model """ + # torchscript parameter removed in transformers 5.0 + transformers_version = tuple(int(x) for x in transformers.__version__.split(".")[:2]) + if transformers_version >= (5, 0): + return BertModel.from_pretrained( + "google-bert/bert-base-uncased", attn_implementation="eager" + ) return BertModel.from_pretrained( "google-bert/bert-base-uncased", torchscript=True, attn_implementation="eager" ) From e432635b0d52db9653f6e6e2dd53d21c95c1cb5f Mon Sep 17 00:00:00 2001 From: Andrea Fasoli Date: Mon, 11 May 2026 22:02:14 -0400 Subject: [PATCH 3/7] upgrade requirements for transformers and fix tests Signed-off-by: Andrea Fasoli --- pyproject.toml | 3 ++- tox.ini | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 8381b40c..77b12876 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -24,7 +24,7 @@ dynamic = ["version"] dependencies = [ "numpy>=1.26.4,<2.3.0", "accelerate>=0.20.3,!=0.34,<1.11", -"transformers>=4.45,<4.58", +"transformers>4.45,<5.9", "torch>=2.2.0,<2.11.0", "tqdm>=4.66.2,<5.0", "datasets>=3.0.0,<5.0", @@ -36,6 +36,7 @@ dependencies = [ [project.optional-dependencies] examples = ["ninja>=1.11.1.1,<2.0", "evaluate", "huggingface_hub"] fp8 = ["llmcompressor", "torchao==0.11"] # FP8 matmul on CPU needs a fix before advancing torchao > 0.11 +fp8-infer = ["torchao==0.11"] gptq = ["Cython", "gptqmodel>=1.7.3"] mx = ["microxcaling>=1.1"] opt = ["fms-model-optimizer[fp8, gptq, mx]"] diff --git a/tox.ini b/tox.ini index f01bd4c5..53a917c2 100644 --- a/tox.ini +++ b/tox.ini @@ -7,6 +7,7 @@ description = run tests (unit, unitcov) extras = dev test + torchvision package = wheel wheel_build_env = pkg deps = From f5b7225446e01caab1b3c7095034b5c8418b2e75 Mon Sep 17 00:00:00 2001 From: Andrea Fasoli Date: Mon, 11 May 2026 22:07:27 -0400 Subject: [PATCH 4/7] revert torchvision requirement in testing Signed-off-by: Andrea Fasoli --- tests/models/test_qmodelprep.py | 8 ++++++++ tox.ini | 1 - 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/tests/models/test_qmodelprep.py b/tests/models/test_qmodelprep.py index e6a89b55..476aa404 100644 --- a/tests/models/test_qmodelprep.py +++ b/tests/models/test_qmodelprep.py @@ -272,6 +272,10 @@ def test_vit_dynamo( qmodule_error(model_vit, 2, 36) +@pytest.mark.skipif( + not available_packages["torchvision"], + reason="Requires torchvision", +) def test_resnet18( model_resnet18, batch_resnet18, @@ -290,6 +294,10 @@ def test_resnet18( qmodule_error(model_resnet18, 4, 17) +@pytest.mark.skipif( + not available_packages["torchvision"], + reason="Requires torchvision", +) def test_vit_base( model_vit_base, batch_vit_base, diff --git a/tox.ini b/tox.ini index 53a917c2..f01bd4c5 100644 --- a/tox.ini +++ b/tox.ini @@ -7,7 +7,6 @@ description = run tests (unit, unitcov) extras = dev test - torchvision package = wheel wheel_build_env = pkg deps = From 7512c5505371d4c346af0226c2b8937b0e4207d4 Mon Sep 17 00:00:00 2001 From: Andrea Fasoli Date: Mon, 11 May 2026 22:18:58 -0400 Subject: [PATCH 5/7] fix tokenizer file detection at test time Signed-off-by: Andrea Fasoli --- tests/build/test_launch_script.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/tests/build/test_launch_script.py b/tests/build/test_launch_script.py index 32b0ae80..8fc8af23 100644 --- a/tests/build/test_launch_script.py +++ b/tests/build/test_launch_script.py @@ -22,6 +22,7 @@ # Third Party import pytest import torch +import transformers # First Party from build.accelerate_launch import main @@ -248,7 +249,12 @@ def _validate_quantization_output(base_dir, quant_method): """Check whether the tokenizer and quantized model artifacts exists""" # Check tokenizer files exist assert os.path.exists(os.path.join(base_dir, "tokenizer.json")) is True - assert os.path.exists(os.path.join(base_dir, "special_tokens_map.json")) is True + + # special_tokens_map.json is optional in transformers 5.0+ for some tokenizers + transformers_version = tuple(int(x) for x in transformers.__version__.split(".")[:2]) + if transformers_version < (5, 0): + assert os.path.exists(os.path.join(base_dir, "special_tokens_map.json")) is True + assert os.path.exists(os.path.join(base_dir, "tokenizer_config.json")) is True # assert os.path.exists(os.path.join(base_dir, "tokenizer.model")) is True From 471902d11b4aeb9213a42f79a53e7fddecf96b24 Mon Sep 17 00:00:00 2001 From: Andrea Fasoli Date: Mon, 11 May 2026 22:21:44 -0400 Subject: [PATCH 6/7] clean up Signed-off-by: Andrea Fasoli --- tests/build/test_launch_script.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/tests/build/test_launch_script.py b/tests/build/test_launch_script.py index 8fc8af23..f3e27c9c 100644 --- a/tests/build/test_launch_script.py +++ b/tests/build/test_launch_script.py @@ -242,7 +242,6 @@ def _validate_termination_files_when_quantization_succeeds(base_dir): """Check whether the termination log and .complete files exists""" assert os.path.exists(os.path.join(base_dir, "/termination-log")) is False assert os.path.exists(os.path.join(base_dir, ".complete")) is True - # assert os.path.exists(os.path.join(base_dir, training_logs_filename)) is True def _validate_quantization_output(base_dir, quant_method): @@ -256,7 +255,6 @@ def _validate_quantization_output(base_dir, quant_method): assert os.path.exists(os.path.join(base_dir, "special_tokens_map.json")) is True assert os.path.exists(os.path.join(base_dir, "tokenizer_config.json")) is True - # assert os.path.exists(os.path.join(base_dir, "tokenizer.model")) is True # Check quantized model files exist if quant_method == "gptq": From d9f67cbb6b97b988365487ddfb687c597a9c61f4 Mon Sep 17 00:00:00 2001 From: Andrea Fasoli Date: Mon, 11 May 2026 22:32:36 -0400 Subject: [PATCH 7/7] ruff Signed-off-by: Andrea Fasoli --- tests/build/test_launch_script.py | 4 +++- tests/models/conftest.py | 8 ++++++-- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/tests/build/test_launch_script.py b/tests/build/test_launch_script.py index f3e27c9c..6f632ca9 100644 --- a/tests/build/test_launch_script.py +++ b/tests/build/test_launch_script.py @@ -250,7 +250,9 @@ def _validate_quantization_output(base_dir, quant_method): assert os.path.exists(os.path.join(base_dir, "tokenizer.json")) is True # special_tokens_map.json is optional in transformers 5.0+ for some tokenizers - transformers_version = tuple(int(x) for x in transformers.__version__.split(".")[:2]) + transformers_version = tuple( + int(x) for x in transformers.__version__.split(".")[:2] + ) if transformers_version < (5, 0): assert os.path.exists(os.path.join(base_dir, "special_tokens_map.json")) is True diff --git a/tests/models/conftest.py b/tests/models/conftest.py index e88bd6bc..5594bff7 100644 --- a/tests/models/conftest.py +++ b/tests/models/conftest.py @@ -1304,7 +1304,9 @@ def model_bert(): transformers.models.bert.modeling_bert.BertModel: BERT model """ # torchscript parameter removed in transformers 5.0 - transformers_version = tuple(int(x) for x in transformers.__version__.split(".")[:2]) + transformers_version = tuple( + int(x) for x in transformers.__version__.split(".")[:2] + ) if transformers_version >= (5, 0): return BertModel.from_pretrained("google-bert/bert-base-uncased") return BertModel.from_pretrained("google-bert/bert-base-uncased", torchscript=True) @@ -1319,7 +1321,9 @@ def model_bert_eager(): transformers.models.bert.modeling_bert.BertModel: BERT model """ # torchscript parameter removed in transformers 5.0 - transformers_version = tuple(int(x) for x in transformers.__version__.split(".")[:2]) + transformers_version = tuple( + int(x) for x in transformers.__version__.split(".")[:2] + ) if transformers_version >= (5, 0): return BertModel.from_pretrained( "google-bert/bert-base-uncased", attn_implementation="eager"