Skip to content

Commit

Permalink
gguf-py, convert-hf : conversion support for FLAN-T5 model family
Browse files Browse the repository at this point in the history
  • Loading branch information
sszymczy committed Jun 23, 2024
1 parent da4f661 commit 47a0a0c
Show file tree
Hide file tree
Showing 3 changed files with 30 additions and 2 deletions.
11 changes: 11 additions & 0 deletions convert-hf-to-gguf.py
Original file line number Diff line number Diff line change
Expand Up @@ -2834,6 +2834,17 @@ def set_gguf_parameters(self):
self.gguf_writer.add_decoder_start_token_id(self.hparams["decoder_start_token_id"])
self.gguf_writer.add_file_type(self.ftype)

def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
del bid # unused

# flan-t5-xxl contains "decoder.embed_tokens.weight" tensor that is the same as "shared.weight" tensor
# To prevent errors caused by an unnecessary unmapped tensor, skip "decoder.embed_tokens.weight".
if name == "decoder.embed_tokens.weight":
logger.debug(f"Skipping tensor {name!r} in safetensors so that convert can end normally.")
return []

return [(self.map_tensor_name(name), data_torch)]


###### CONVERSION LOGIC ######

Expand Down
7 changes: 7 additions & 0 deletions gguf-py/gguf/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -218,6 +218,7 @@ class MODEL_TENSOR(IntEnum):
DEC_CROSS_ATTN_OUT = auto()
DEC_CROSS_ATTN_REL_B = auto()
DEC_FFN_NORM = auto()
DEC_FFN_GATE = auto()
DEC_FFN_DOWN = auto()
DEC_FFN_UP = auto()
DEC_OUTPUT_NORM = auto()
Expand All @@ -228,6 +229,7 @@ class MODEL_TENSOR(IntEnum):
ENC_ATTN_OUT = auto()
ENC_ATTN_REL_B = auto()
ENC_FFN_NORM = auto()
ENC_FFN_GATE = auto()
ENC_FFN_DOWN = auto()
ENC_FFN_UP = auto()
ENC_OUTPUT_NORM = auto()
Expand Down Expand Up @@ -333,6 +335,7 @@ class MODEL_TENSOR(IntEnum):
MODEL_TENSOR.DEC_CROSS_ATTN_OUT: "dec.blk.{bid}.cross_attn_o",
MODEL_TENSOR.DEC_CROSS_ATTN_REL_B: "dec.blk.{bid}.cross_attn_rel_b",
MODEL_TENSOR.DEC_FFN_NORM: "dec.blk.{bid}.ffn_norm",
MODEL_TENSOR.DEC_FFN_GATE: "dec.blk.{bid}.ffn_gate",
MODEL_TENSOR.DEC_FFN_DOWN: "dec.blk.{bid}.ffn_down",
MODEL_TENSOR.DEC_FFN_UP: "dec.blk.{bid}.ffn_up",
MODEL_TENSOR.DEC_OUTPUT_NORM: "dec.output_norm",
Expand All @@ -343,6 +346,7 @@ class MODEL_TENSOR(IntEnum):
MODEL_TENSOR.ENC_ATTN_OUT: "enc.blk.{bid}.attn_o",
MODEL_TENSOR.ENC_ATTN_REL_B: "enc.blk.{bid}.attn_rel_b",
MODEL_TENSOR.ENC_FFN_NORM: "enc.blk.{bid}.ffn_norm",
MODEL_TENSOR.ENC_FFN_GATE: "enc.blk.{bid}.ffn_gate",
MODEL_TENSOR.ENC_FFN_DOWN: "enc.blk.{bid}.ffn_down",
MODEL_TENSOR.ENC_FFN_UP: "enc.blk.{bid}.ffn_up",
MODEL_TENSOR.ENC_OUTPUT_NORM: "enc.output_norm",
Expand Down Expand Up @@ -868,6 +872,7 @@ class MODEL_TENSOR(IntEnum):
],
MODEL_ARCH.T5: [
MODEL_TENSOR.TOKEN_EMBD,
MODEL_TENSOR.OUTPUT,
MODEL_TENSOR.DEC_ATTN_NORM,
MODEL_TENSOR.DEC_ATTN_Q,
MODEL_TENSOR.DEC_ATTN_K,
Expand All @@ -881,6 +886,7 @@ class MODEL_TENSOR(IntEnum):
MODEL_TENSOR.DEC_CROSS_ATTN_OUT,
MODEL_TENSOR.DEC_CROSS_ATTN_REL_B,
MODEL_TENSOR.DEC_FFN_NORM,
MODEL_TENSOR.DEC_FFN_GATE,
MODEL_TENSOR.DEC_FFN_DOWN,
MODEL_TENSOR.DEC_FFN_UP,
MODEL_TENSOR.DEC_OUTPUT_NORM,
Expand All @@ -891,6 +897,7 @@ class MODEL_TENSOR(IntEnum):
MODEL_TENSOR.ENC_ATTN_OUT,
MODEL_TENSOR.ENC_ATTN_REL_B,
MODEL_TENSOR.ENC_FFN_NORM,
MODEL_TENSOR.ENC_FFN_GATE,
MODEL_TENSOR.ENC_FFN_DOWN,
MODEL_TENSOR.ENC_FFN_UP,
MODEL_TENSOR.ENC_OUTPUT_NORM,
Expand Down
14 changes: 12 additions & 2 deletions gguf-py/gguf/tensor_mapping.py
Original file line number Diff line number Diff line change
Expand Up @@ -467,8 +467,13 @@ class TensorNameMap:
"decoder.block.{bid}.layer.2.layer_norm", # t5
),

MODEL_TENSOR.DEC_FFN_GATE: (
"decoder.block.{bid}.layer.2.DenseReluDense.wi_0", # flan-t5
),

MODEL_TENSOR.DEC_FFN_UP: (
"decoder.block.{bid}.layer.2.DenseReluDense.wi", # t5
"decoder.block.{bid}.layer.2.DenseReluDense.wi", # t5
"decoder.block.{bid}.layer.2.DenseReluDense.wi_1", # flan-t5
),

MODEL_TENSOR.DEC_FFN_DOWN: (
Expand Down Expand Up @@ -507,8 +512,13 @@ class TensorNameMap:
"encoder.block.{bid}.layer.1.layer_norm", # t5
),

MODEL_TENSOR.ENC_FFN_GATE: (
"encoder.block.{bid}.layer.1.DenseReluDense.wi_0", # flan-t5
),

MODEL_TENSOR.ENC_FFN_UP: (
"encoder.block.{bid}.layer.1.DenseReluDense.wi", # t5
"encoder.block.{bid}.layer.1.DenseReluDense.wi", # t5
"encoder.block.{bid}.layer.1.DenseReluDense.wi_1", # flan-t5
),

MODEL_TENSOR.ENC_FFN_DOWN: (
Expand Down

0 comments on commit 47a0a0c

Please sign in to comment.