diff --git a/fms_mo/dq.py b/fms_mo/dq.py index eb49bc30..e7d644a2 100644 --- a/fms_mo/dq.py +++ b/fms_mo/dq.py @@ -88,8 +88,7 @@ def run_dq(model_args, data_args, opt_args, fms_mo_args): config_kwargs = { "cache_dir": model_args.cache_dir, "revision": model_args.model_revision, - "use_auth_token": True if model_args.use_auth_token else None, - "torchscript": True, + "token": True if model_args.use_auth_token else None, "attn_implementation": attn_implementation, } config = AutoConfig.from_pretrained(model_args.model_name_or_path, **config_kwargs) @@ -97,7 +96,7 @@ def run_dq(model_args, data_args, opt_args, fms_mo_args): "cache_dir": model_args.cache_dir, "use_fast": model_args.use_fast_tokenizer, "revision": model_args.model_revision, - "use_auth_token": True if model_args.use_auth_token else None, + "token": True if model_args.use_auth_token else None, } tokenizer = AutoTokenizer.from_pretrained( model_args.model_name_or_path, **tokenizer_kwargs @@ -121,7 +120,7 @@ def run_dq(model_args, data_args, opt_args, fms_mo_args): config=config, cache_dir=model_args.cache_dir, revision="main", - use_auth_token=True if model_args.use_auth_token else None, + token=True if model_args.use_auth_token else None, torch_dtype=torch_dtype, device_map=model_args.device_map, low_cpu_mem_usage=bool(model_args.device_map), diff --git a/pyproject.toml b/pyproject.toml index 8381b40c..77b12876 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -24,7 +24,7 @@ dynamic = ["version"] dependencies = [ "numpy>=1.26.4,<2.3.0", "accelerate>=0.20.3,!=0.34,<1.11", -"transformers>=4.45,<4.58", +"transformers>4.45,<5.9", "torch>=2.2.0,<2.11.0", "tqdm>=4.66.2,<5.0", "datasets>=3.0.0,<5.0", @@ -36,6 +36,7 @@ dependencies = [ [project.optional-dependencies] examples = ["ninja>=1.11.1.1,<2.0", "evaluate", "huggingface_hub"] fp8 = ["llmcompressor", "torchao==0.11"] # FP8 matmul on CPU needs a fix before advancing torchao > 0.11 +fp8-infer = ["torchao==0.11"] gptq = ["Cython", "gptqmodel>=1.7.3"] mx = ["microxcaling>=1.1"] opt = ["fms-model-optimizer[fp8, gptq, mx]"] diff --git a/tests/build/test_launch_script.py b/tests/build/test_launch_script.py index 32b0ae80..6f632ca9 100644 --- a/tests/build/test_launch_script.py +++ b/tests/build/test_launch_script.py @@ -22,6 +22,7 @@ # Third Party import pytest import torch +import transformers # First Party from build.accelerate_launch import main @@ -241,16 +242,21 @@ def _validate_termination_files_when_quantization_succeeds(base_dir): """Check whether the termination log and .complete files exists""" assert os.path.exists(os.path.join(base_dir, "/termination-log")) is False assert os.path.exists(os.path.join(base_dir, ".complete")) is True - # assert os.path.exists(os.path.join(base_dir, training_logs_filename)) is True def _validate_quantization_output(base_dir, quant_method): """Check whether the tokenizer and quantized model artifacts exists""" # Check tokenizer files exist assert os.path.exists(os.path.join(base_dir, "tokenizer.json")) is True - assert os.path.exists(os.path.join(base_dir, "special_tokens_map.json")) is True + + # special_tokens_map.json is optional in transformers 5.0+ for some tokenizers + transformers_version = tuple( + int(x) for x in transformers.__version__.split(".")[:2] + ) + if transformers_version < (5, 0): + assert os.path.exists(os.path.join(base_dir, "special_tokens_map.json")) is True + assert os.path.exists(os.path.join(base_dir, "tokenizer_config.json")) is True - # assert os.path.exists(os.path.join(base_dir, "tokenizer.model")) is True # Check quantized model files exist if quant_method == "gptq": diff --git a/tests/models/conftest.py b/tests/models/conftest.py index 0a831cc8..5594bff7 100644 --- a/tests/models/conftest.py +++ b/tests/models/conftest.py @@ -39,6 +39,7 @@ import pytest import torch import torch.nn.functional as F +import transformers # Local # fms_mo imports @@ -1302,6 +1303,12 @@ def model_bert(): Returns: transformers.models.bert.modeling_bert.BertModel: BERT model """ + # torchscript parameter removed in transformers 5.0 + transformers_version = tuple( + int(x) for x in transformers.__version__.split(".")[:2] + ) + if transformers_version >= (5, 0): + return BertModel.from_pretrained("google-bert/bert-base-uncased") return BertModel.from_pretrained("google-bert/bert-base-uncased", torchscript=True) @@ -1313,6 +1320,14 @@ def model_bert_eager(): Returns: transformers.models.bert.modeling_bert.BertModel: BERT model """ + # torchscript parameter removed in transformers 5.0 + transformers_version = tuple( + int(x) for x in transformers.__version__.split(".")[:2] + ) + if transformers_version >= (5, 0): + return BertModel.from_pretrained( + "google-bert/bert-base-uncased", attn_implementation="eager" + ) return BertModel.from_pretrained( "google-bert/bert-base-uncased", torchscript=True, attn_implementation="eager" ) diff --git a/tests/models/test_qmodelprep.py b/tests/models/test_qmodelprep.py index e6a89b55..476aa404 100644 --- a/tests/models/test_qmodelprep.py +++ b/tests/models/test_qmodelprep.py @@ -272,6 +272,10 @@ def test_vit_dynamo( qmodule_error(model_vit, 2, 36) +@pytest.mark.skipif( + not available_packages["torchvision"], + reason="Requires torchvision", +) def test_resnet18( model_resnet18, batch_resnet18, @@ -290,6 +294,10 @@ def test_resnet18( qmodule_error(model_resnet18, 4, 17) +@pytest.mark.skipif( + not available_packages["torchvision"], + reason="Requires torchvision", +) def test_vit_base( model_vit_base, batch_vit_base,