Skip to content

Commit

Permalink
Merge pull request #192 from srowen/bf16flag
Browse files Browse the repository at this point in the history
Set bf16 flags correctly for a10/a100
  • Loading branch information
srowen authored Jun 7, 2023
2 parents fd1a733 + 6609c85 commit a357706
Show file tree
Hide file tree
Showing 4 changed files with 18 additions and 3 deletions.
5 changes: 4 additions & 1 deletion config/a100_config.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
{
"fp16": {
"enabled": false
},
"bf16": {
"enabled": "auto"
"enabled": true
},
"optimizer": {
"type": "AdamW",
Expand Down
5 changes: 4 additions & 1 deletion config/a10_config.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
{
"fp16": {
"enabled": false
},
"bf16": {
"enabled": "auto"
"enabled": true
},
"optimizer": {
"type": "AdamW",
Expand Down
3 changes: 3 additions & 0 deletions config/v100_config.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@
"fp16": {
"enabled": true
},
"bf16": {
"enabled": false
},
"optimizer": {
"type": "AdamW",
"params": {
Expand Down
8 changes: 7 additions & 1 deletion train_dolly.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,11 @@
num_gpus = int(num_gpus)
num_gpus_flag = f"--num_gpus={num_gpus}"

if gpu_family == "v100":
bf16_flag = "--bf16 false"
else:
bf16_flag = "--bf16 true"

os.environ["TOKENIZERS_PARALLELISM"] = "false"

# COMMAND ----------
Expand All @@ -184,7 +189,8 @@
--eval-steps 50 \
--warmup-steps 50 \
--test-size 200 \
--lr 5e-6
--lr 5e-6 \
{bf16_flag}

# COMMAND ----------

Expand Down

0 comments on commit a357706

Please sign in to comment.