Skip to content

Commit 436b58c

Browse files
committed
update
1 parent 2fc2e86 commit 436b58c

File tree

2 files changed

+6
-6
lines changed

2 files changed

+6
-6
lines changed

3.test_cases/torchtune/slurm/tutorials/e2e-llama3-70b-development/configs/full_finetune_distributed.yaml

+5-5
Original file line numberDiff line numberDiff line change
@@ -25,10 +25,9 @@ tokenizer:
2525

2626
# Dataset
2727
dataset:
28-
_component_: torchtune.datasets.wiki_text
29-
train_on_input: True
28+
_component_: torchtune.datasets.wikitext_dataset
3029
seed: null
31-
shuffle: True
30+
shuffle: False
3231

3332
# Model Arguments
3433
model:
@@ -75,8 +74,8 @@ checkpointer:
7574
resume_from_checkpoint: False
7675

7776
# Fine-tuning arguments
78-
batch_size: 2
79-
epochs: 3
77+
batch_size: 1
78+
epochs: 1
8079

8180
optimizer:
8281
_component_: torch.optim.AdamW
@@ -95,6 +94,7 @@ device: cuda
9594
# Memory management
9695
enable_activation_checkpointing: True
9796
memory_efficient_fsdp_wrap: True
97+
fsdp_cpu_offload: True
9898

9999
# Reduced precision
100100
dtype: bf16

3.test_cases/torchtune/slurm/tutorials/e2e-llama3-70b-development/full_finetune_distributed.sbatch

+1-1
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,7 @@ declare -a TORCHRUN_ARGS=(
7777
--rdzv_endpoint=$(hostname)
7878
)
7979
declare -a TRAIN_ARGS=(
80-
--config ${PWD}/tutorials/e2e-llama3-70b-development/configs/lora_finetune_distributed.yaml
80+
--config ${PWD}/tutorials/e2e-llama3-70b-development/configs/full_finetune_distributed.yaml
8181
tokenizer.path=${MODEL_PATH}/${HF_MODEL}/original/tokenizer.model
8282
checkpointer.checkpoint_dir=${MODEL_PATH}/${HF_MODEL}
8383
checkpointer.output_dir=${MODEL_PATH}/${HF_MODEL}-tuned

0 commit comments

Comments
 (0)