|
| 1 | +import twinkle |
| 2 | +from peft import LoraConfig |
| 3 | + |
| 4 | +from twinkle import DeviceMesh, get_device_placement, get_logger |
| 5 | +from twinkle.dataloader import DataLoader |
| 6 | +from twinkle.dataset import Dataset, DatasetMeta |
| 7 | +from twinkle.model import MegatronModel |
| 8 | +from twinkle.preprocessor import SelfCognitionProcessor |
| 9 | + |
| 10 | +MODEL_ID = 'ms://Qwen/Qwen3-4B' |
| 11 | +DATASET_ID = 'ms://swift/self-cognition' |
| 12 | +DATASET_SLICE = range(256) |
| 13 | +BATCH_SIZE = 8 |
| 14 | +MAX_STEPS = 10 |
| 15 | + |
| 16 | +# Keep the same 8-card TP/PP/DP layout as the GPU reference script, but run it |
| 17 | +# through the NPU backend to validate Megatron + MindSpeed integration. |
| 18 | +device_mesh = DeviceMesh.from_sizes(dp_size=2, tp_size=2, pp_size=2, device_type='npu') |
| 19 | +twinkle.initialize(mode='local', global_device_mesh=device_mesh) |
| 20 | + |
| 21 | +logger = get_logger() |
| 22 | + |
| 23 | + |
| 24 | +def build_dataset(): |
| 25 | + dataset = Dataset(dataset_meta=DatasetMeta(DATASET_ID, data_slice=DATASET_SLICE)) |
| 26 | + # Qwen3-4B is a text-only model, so use the base template instead of the VL template. |
| 27 | + dataset.set_template('Template', model_id=MODEL_ID, max_length=512) |
| 28 | + dataset.map(SelfCognitionProcessor('twinkle大模型', 'ModelScope社区')) |
| 29 | + dataset.encode() |
| 30 | + return dataset |
| 31 | + |
| 32 | + |
| 33 | +def build_model(total_steps: int): |
| 34 | + model = MegatronModel(model_id=MODEL_ID) |
| 35 | + lora_config = LoraConfig(r=8, lora_alpha=32, target_modules='all-linear') |
| 36 | + model.add_adapter_to_model('default', lora_config) |
| 37 | + model.set_optimizer(optimizer_cls='default', lr=1e-4) |
| 38 | + model.set_lr_scheduler(scheduler_cls='default', lr_warmup_steps=2, lr_decay_steps=total_steps) |
| 39 | + return model |
| 40 | + |
| 41 | + |
| 42 | +def train(): |
| 43 | + dataset = build_dataset() |
| 44 | + dataloader = DataLoader(dataset=dataset, batch_size=BATCH_SIZE, num_workers=0) |
| 45 | + model = build_model(len(dataloader)) |
| 46 | + |
| 47 | + logger.info(get_device_placement()) |
| 48 | + logger.info(model.get_train_configs()) |
| 49 | + logger.info(f'Total steps: {len(dataloader)}, validating {MAX_STEPS} steps') |
| 50 | + |
| 51 | + for step, batch in enumerate(dataloader): |
| 52 | + if step >= MAX_STEPS: |
| 53 | + break |
| 54 | + model.forward_backward(inputs=batch) |
| 55 | + model.clip_grad_and_step() |
| 56 | + metric = model.calculate_metric(is_training=True) |
| 57 | + logger.info(f'[NPU smoke] step {step + 1}/{MAX_STEPS}, metric: {metric}') |
| 58 | + |
| 59 | + |
| 60 | +if __name__ == '__main__': |
| 61 | + train() |
0 commit comments