latest
7.4GB
PHOGPT-4B-CHAT: SUPERVISED FINE-TUNING VIETNAMESE CHAT MODEL _ w/8K Context size
194 Pulls Updated 6 months ago
a48c3891e2dc · 7.4GB
-
general.architecturempt
-
mpt.attention.head_count24
-
mpt.attention.layer_norm_epsilon1e-05
-
mpt.attention.max_alibi_bias8
-
mpt.block_count32
-
mpt.context_length8192
-
mpt.embedding_length3072
-
mpt.feed_forward_length12288
-
tokenizer.ggml.bos_token_id1
-
tokenizer.ggml.eos_token_id2
-
tokenizer.ggml.merges[á » á º Ġ t n g Ġ c ...]
-
tokenizer.ggml.modelgpt2
-
tokenizer.ggml.padding_token_id3
-
tokenizer.ggml.token_type[3 3 3 3 1 ...]
-
tokenizer.ggml.tokens[<unk> <s> </s> <pad> ! ...]
-
tokenizer.ggml.unknown_token_id0
-
NameTypeShape
-
token_embd.weightF16[3072 20480]
-
blk.0.attn_norm.weightF32[3072]
-
blk.0.attn_norm.biasF32[3072]
-
blk.0.attn_qkv.weightF16[3072 9216]
-
blk.0.attn_qkv.biasF32[9216]
-
blk.0.attn_output.weightF16[3072 3072]
-
blk.0.attn_output.biasF32[3072]
-
blk.0.ffn_norm.weightF32[3072]
-
blk.0.ffn_norm.biasF32[3072]
-
blk.0.ffn_up.weightF16[3072 12288]
-
blk.0.ffn_up.biasF32[12288]
-
blk.0.ffn_down.weightF16[12288 3072]
-
blk.0.ffn_down.biasF32[3072]
-
blk.1.attn_norm.weightF32[3072]
-
blk.1.attn_norm.biasF32[3072]
-
blk.1.attn_qkv.weightF16[3072 9216]
-
blk.1.attn_qkv.biasF32[9216]
-
blk.1.attn_output.weightF16[3072 3072]
-
blk.1.attn_output.biasF32[3072]
-
blk.1.ffn_norm.weightF32[3072]
-
blk.1.ffn_norm.biasF32[3072]
-
blk.1.ffn_up.weightF16[3072 12288]
-
blk.1.ffn_up.biasF32[12288]
-
blk.1.ffn_down.weightF16[12288 3072]
-
blk.1.ffn_down.biasF32[3072]
-
blk.2.attn_norm.weightF32[3072]
-
blk.2.attn_norm.biasF32[3072]
-
blk.2.attn_qkv.weightF16[3072 9216]
-
blk.2.attn_qkv.biasF32[9216]
-
blk.2.attn_output.weightF16[3072 3072]
-
blk.2.attn_output.biasF32[3072]
-
blk.2.ffn_norm.weightF32[3072]
-
blk.2.ffn_norm.biasF32[3072]
-
blk.2.ffn_up.weightF16[3072 12288]
-
blk.2.ffn_up.biasF32[12288]
-
blk.2.ffn_down.weightF16[12288 3072]
-
blk.2.ffn_down.biasF32[3072]
-
blk.3.attn_norm.weightF32[3072]
-
blk.3.attn_norm.biasF32[3072]
-
blk.3.attn_qkv.weightF16[3072 9216]
-
blk.3.attn_qkv.biasF32[9216]
-
blk.3.attn_output.weightF16[3072 3072]
-
blk.3.attn_output.biasF32[3072]
-
blk.3.ffn_norm.weightF32[3072]
-
blk.3.ffn_norm.biasF32[3072]
-
blk.3.ffn_up.weightF16[3072 12288]
-
blk.3.ffn_up.biasF32[12288]
-
blk.3.ffn_down.weightF16[12288 3072]
-
blk.3.ffn_down.biasF32[3072]
-
blk.4.attn_norm.weightF32[3072]
-
blk.4.attn_norm.biasF32[3072]
-
blk.4.attn_qkv.weightF16[3072 9216]
-
blk.4.attn_qkv.biasF32[9216]
-
blk.4.attn_output.weightF16[3072 3072]
-
blk.4.attn_output.biasF32[3072]
-
blk.4.ffn_norm.weightF32[3072]
-
blk.4.ffn_norm.biasF32[3072]
-
blk.4.ffn_up.weightF16[3072 12288]
-
blk.4.ffn_up.biasF32[12288]
-
blk.4.ffn_down.weightF16[12288 3072]
-
blk.4.ffn_down.biasF32[3072]
-
blk.5.attn_norm.weightF32[3072]
-
blk.5.attn_norm.biasF32[3072]
-
blk.5.attn_qkv.weightF16[3072 9216]
-
blk.5.attn_qkv.biasF32[9216]
-
blk.5.attn_output.weightF16[3072 3072]
-
blk.5.attn_output.biasF32[3072]
-
blk.5.ffn_norm.weightF32[3072]
-
blk.5.ffn_norm.biasF32[3072]
-
blk.5.ffn_up.weightF16[3072 12288]
-
blk.5.ffn_up.biasF32[12288]
-
blk.5.ffn_down.weightF16[12288 3072]
-
blk.5.ffn_down.biasF32[3072]
-
blk.6.attn_norm.weightF32[3072]
-
blk.6.attn_norm.biasF32[3072]
-
blk.6.attn_qkv.weightF16[3072 9216]
-
blk.6.attn_qkv.biasF32[9216]
-
blk.6.attn_output.weightF16[3072 3072]
-
blk.6.attn_output.biasF32[3072]
-
blk.6.ffn_norm.weightF32[3072]
-
blk.6.ffn_norm.biasF32[3072]
-
blk.6.ffn_up.weightF16[3072 12288]
-
blk.6.ffn_up.biasF32[12288]
-
blk.6.ffn_down.weightF16[12288 3072]
-
blk.6.ffn_down.biasF32[3072]
-
blk.7.attn_norm.weightF32[3072]
-
blk.7.attn_norm.biasF32[3072]
-
blk.7.attn_qkv.weightF16[3072 9216]
-
blk.7.attn_qkv.biasF32[9216]
-
blk.7.attn_output.weightF16[3072 3072]
-
blk.7.attn_output.biasF32[3072]
-
blk.7.ffn_norm.weightF32[3072]
-
blk.7.ffn_norm.biasF32[3072]
-
blk.7.ffn_up.weightF16[3072 12288]
-
blk.7.ffn_up.biasF32[12288]
-
blk.7.ffn_down.weightF16[12288 3072]
-
blk.7.ffn_down.biasF32[3072]
-
blk.8.attn_norm.weightF32[3072]
-
blk.8.attn_norm.biasF32[3072]
-
blk.8.attn_qkv.weightF16[3072 9216]
-
blk.8.attn_qkv.biasF32[9216]
-
blk.8.attn_output.weightF16[3072 3072]
-
blk.8.attn_output.biasF32[3072]
-
blk.8.ffn_norm.weightF32[3072]
-
blk.8.ffn_norm.biasF32[3072]
-
blk.8.ffn_up.weightF16[3072 12288]
-
blk.8.ffn_up.biasF32[12288]
-
blk.8.ffn_down.weightF16[12288 3072]
-
blk.8.ffn_down.biasF32[3072]
-
blk.9.attn_norm.weightF32[3072]
-
blk.9.attn_norm.biasF32[3072]
-
blk.9.attn_qkv.weightF16[3072 9216]
-
blk.9.attn_qkv.biasF32[9216]
-
blk.9.attn_output.weightF16[3072 3072]
-
blk.9.attn_output.biasF32[3072]
-
blk.9.ffn_norm.weightF32[3072]
-
blk.9.ffn_norm.biasF32[3072]
-
blk.9.ffn_up.weightF16[3072 12288]
-
blk.9.ffn_up.biasF32[12288]
-
blk.9.ffn_down.weightF16[12288 3072]
-
blk.9.ffn_down.biasF32[3072]
-
blk.10.attn_norm.weightF32[3072]
-
blk.10.attn_norm.biasF32[3072]
-
blk.10.attn_qkv.weightF16[3072 9216]
-
blk.10.attn_qkv.biasF32[9216]
-
blk.10.attn_output.weightF16[3072 3072]
-
blk.10.attn_output.biasF32[3072]
-
blk.10.ffn_norm.weightF32[3072]
-
blk.10.ffn_norm.biasF32[3072]
-
blk.10.ffn_up.weightF16[3072 12288]
-
blk.10.ffn_up.biasF32[12288]
-
blk.10.ffn_down.weightF16[12288 3072]
-
blk.10.ffn_down.biasF32[3072]
-
blk.11.attn_norm.weightF32[3072]
-
blk.11.attn_norm.biasF32[3072]
-
blk.11.attn_qkv.weightF16[3072 9216]
-
blk.11.attn_qkv.biasF32[9216]
-
blk.11.attn_output.weightF16[3072 3072]
-
blk.11.attn_output.biasF32[3072]
-
blk.11.ffn_norm.weightF32[3072]
-
blk.11.ffn_norm.biasF32[3072]
-
blk.11.ffn_up.weightF16[3072 12288]
-
blk.11.ffn_up.biasF32[12288]
-
blk.11.ffn_down.weightF16[12288 3072]
-
blk.11.ffn_down.biasF32[3072]
-
blk.12.attn_norm.weightF32[3072]
-
blk.12.attn_norm.biasF32[3072]
-
blk.12.attn_qkv.weightF16[3072 9216]
-
blk.12.attn_qkv.biasF32[9216]
-
blk.12.attn_output.weightF16[3072 3072]
-
blk.12.attn_output.biasF32[3072]
-
blk.12.ffn_norm.weightF32[3072]
-
blk.12.ffn_norm.biasF32[3072]
-
blk.12.ffn_up.weightF16[3072 12288]
-
blk.12.ffn_up.biasF32[12288]
-
blk.12.ffn_down.weightF16[12288 3072]
-
blk.12.ffn_down.biasF32[3072]
-
blk.13.attn_norm.weightF32[3072]
-
blk.13.attn_norm.biasF32[3072]
-
blk.13.attn_qkv.weightF16[3072 9216]
-
blk.13.attn_qkv.biasF32[9216]
-
blk.13.attn_output.weightF16[3072 3072]
-
blk.13.attn_output.biasF32[3072]
-
blk.13.ffn_norm.weightF32[3072]
-
blk.13.ffn_norm.biasF32[3072]
-
blk.13.ffn_up.weightF16[3072 12288]
-
blk.13.ffn_up.biasF32[12288]
-
blk.13.ffn_down.weightF16[12288 3072]
-
blk.13.ffn_down.biasF32[3072]
-
blk.14.attn_norm.weightF32[3072]
-
blk.14.attn_norm.biasF32[3072]
-
blk.14.attn_qkv.weightF16[3072 9216]
-
blk.14.attn_qkv.biasF32[9216]
-
blk.14.attn_output.weightF16[3072 3072]
-
blk.14.attn_output.biasF32[3072]
-
blk.14.ffn_norm.weightF32[3072]
-
blk.14.ffn_norm.biasF32[3072]
-
blk.14.ffn_up.weightF16[3072 12288]
-
blk.14.ffn_up.biasF32[12288]
-
blk.14.ffn_down.weightF16[12288 3072]
-
blk.14.ffn_down.biasF32[3072]
-
blk.15.attn_norm.weightF32[3072]
-
blk.15.attn_norm.biasF32[3072]
-
blk.15.attn_qkv.weightF16[3072 9216]
-
blk.15.attn_qkv.biasF32[9216]
-
blk.15.attn_output.weightF16[3072 3072]
-
blk.15.attn_output.biasF32[3072]
-
blk.15.ffn_norm.weightF32[3072]
-
blk.15.ffn_norm.biasF32[3072]
-
blk.15.ffn_up.weightF16[3072 12288]
-
blk.15.ffn_up.biasF32[12288]
-
blk.15.ffn_down.weightF16[12288 3072]
-
blk.15.ffn_down.biasF32[3072]
-
blk.16.attn_norm.weightF32[3072]
-
blk.16.attn_norm.biasF32[3072]
-
blk.16.attn_qkv.weightF16[3072 9216]
-
blk.16.attn_qkv.biasF32[9216]
-
blk.16.attn_output.weightF16[3072 3072]
-
blk.16.attn_output.biasF32[3072]
-
blk.16.ffn_norm.weightF32[3072]
-
blk.16.ffn_norm.biasF32[3072]
-
blk.16.ffn_up.weightF16[3072 12288]
-
blk.16.ffn_up.biasF32[12288]
-
blk.16.ffn_down.weightF16[12288 3072]
-
blk.16.ffn_down.biasF32[3072]
-
blk.17.attn_norm.weightF32[3072]
-
blk.17.attn_norm.biasF32[3072]
-
blk.17.attn_qkv.weightF16[3072 9216]
-
blk.17.attn_qkv.biasF32[9216]
-
blk.17.attn_output.weightF16[3072 3072]
-
blk.17.attn_output.biasF32[3072]
-
blk.17.ffn_norm.weightF32[3072]
-
blk.17.ffn_norm.biasF32[3072]
-
blk.17.ffn_up.weightF16[3072 12288]
-
blk.17.ffn_up.biasF32[12288]
-
blk.17.ffn_down.weightF16[12288 3072]
-
blk.17.ffn_down.biasF32[3072]
-
blk.18.attn_norm.weightF32[3072]
-
blk.18.attn_norm.biasF32[3072]
-
blk.18.attn_qkv.weightF16[3072 9216]
-
blk.18.attn_qkv.biasF32[9216]
-
blk.18.attn_output.weightF16[3072 3072]
-
blk.18.attn_output.biasF32[3072]
-
blk.18.ffn_norm.weightF32[3072]
-
blk.18.ffn_norm.biasF32[3072]
-
blk.18.ffn_up.weightF16[3072 12288]
-
blk.18.ffn_up.biasF32[12288]
-
blk.18.ffn_down.weightF16[12288 3072]
-
blk.18.ffn_down.biasF32[3072]
-
blk.19.attn_norm.weightF32[3072]
-
blk.19.attn_norm.biasF32[3072]
-
blk.19.attn_qkv.weightF16[3072 9216]
-
blk.19.attn_qkv.biasF32[9216]
-
blk.19.attn_output.weightF16[3072 3072]
-
blk.19.attn_output.biasF32[3072]
-
blk.19.ffn_norm.weightF32[3072]
-
blk.19.ffn_norm.biasF32[3072]
-
blk.19.ffn_up.weightF16[3072 12288]
-
blk.19.ffn_up.biasF32[12288]
-
blk.19.ffn_down.weightF16[12288 3072]
-
blk.19.ffn_down.biasF32[3072]
-
blk.20.attn_norm.weightF32[3072]
-
blk.20.attn_norm.biasF32[3072]
-
blk.20.attn_qkv.weightF16[3072 9216]
-
blk.20.attn_qkv.biasF32[9216]
-
blk.20.attn_output.weightF16[3072 3072]
-
blk.20.attn_output.biasF32[3072]
-
blk.20.ffn_norm.weightF32[3072]
-
blk.20.ffn_norm.biasF32[3072]
-
blk.20.ffn_up.weightF16[3072 12288]
-
blk.20.ffn_up.biasF32[12288]
-
blk.20.ffn_down.weightF16[12288 3072]
-
blk.20.ffn_down.biasF32[3072]
-
blk.21.attn_norm.weightF32[3072]
-
blk.21.attn_norm.biasF32[3072]
-
blk.21.attn_qkv.weightF16[3072 9216]
-
blk.21.attn_qkv.biasF32[9216]
-
blk.21.attn_output.weightF16[3072 3072]
-
blk.21.attn_output.biasF32[3072]
-
blk.21.ffn_norm.weightF32[3072]
-
blk.21.ffn_norm.biasF32[3072]
-
blk.21.ffn_up.weightF16[3072 12288]
-
blk.21.ffn_up.biasF32[12288]
-
blk.21.ffn_down.weightF16[12288 3072]
-
blk.21.ffn_down.biasF32[3072]
-
blk.22.attn_norm.weightF32[3072]
-
blk.22.attn_norm.biasF32[3072]
-
blk.22.attn_qkv.weightF16[3072 9216]
-
blk.22.attn_qkv.biasF32[9216]
-
blk.22.attn_output.weightF16[3072 3072]
-
blk.22.attn_output.biasF32[3072]
-
blk.22.ffn_norm.weightF32[3072]
-
blk.22.ffn_norm.biasF32[3072]
-
blk.22.ffn_up.weightF16[3072 12288]
-
blk.22.ffn_up.biasF32[12288]
-
blk.22.ffn_down.weightF16[12288 3072]
-
blk.22.ffn_down.biasF32[3072]
-
blk.23.attn_norm.weightF32[3072]
-
blk.23.attn_norm.biasF32[3072]
-
blk.23.attn_qkv.weightF16[3072 9216]
-
blk.23.attn_qkv.biasF32[9216]
-
blk.23.attn_output.weightF16[3072 3072]
-
blk.23.attn_output.biasF32[3072]
-
blk.23.ffn_norm.weightF32[3072]
-
blk.23.ffn_norm.biasF32[3072]
-
blk.23.ffn_up.weightF16[3072 12288]
-
blk.23.ffn_up.biasF32[12288]
-
blk.23.ffn_down.weightF16[12288 3072]
-
blk.23.ffn_down.biasF32[3072]
-
blk.24.attn_norm.weightF32[3072]
-
blk.24.attn_norm.biasF32[3072]
-
blk.24.attn_qkv.weightF16[3072 9216]
-
blk.24.attn_qkv.biasF32[9216]
-
blk.24.attn_output.weightF16[3072 3072]
-
blk.24.attn_output.biasF32[3072]
-
blk.24.ffn_norm.weightF32[3072]
-
blk.24.ffn_norm.biasF32[3072]
-
blk.24.ffn_up.weightF16[3072 12288]
-
blk.24.ffn_up.biasF32[12288]
-
blk.24.ffn_down.weightF16[12288 3072]
-
blk.24.ffn_down.biasF32[3072]
-
blk.25.attn_norm.weightF32[3072]
-
blk.25.attn_norm.biasF32[3072]
-
blk.25.attn_qkv.weightF16[3072 9216]
-
blk.25.attn_qkv.biasF32[9216]
-
blk.25.attn_output.weightF16[3072 3072]
-
blk.25.attn_output.biasF32[3072]
-
blk.25.ffn_norm.weightF32[3072]
-
blk.25.ffn_norm.biasF32[3072]
-
blk.25.ffn_up.weightF16[3072 12288]
-
blk.25.ffn_up.biasF32[12288]
-
blk.25.ffn_down.weightF16[12288 3072]
-
blk.25.ffn_down.biasF32[3072]
-
blk.26.attn_norm.weightF32[3072]
-
blk.26.attn_norm.biasF32[3072]
-
blk.26.attn_qkv.weightF16[3072 9216]
-
blk.26.attn_qkv.biasF32[9216]
-
blk.26.attn_output.weightF16[3072 3072]
-
blk.26.attn_output.biasF32[3072]
-
blk.26.ffn_norm.weightF32[3072]
-
blk.26.ffn_norm.biasF32[3072]
-
blk.26.ffn_up.weightF16[3072 12288]
-
blk.26.ffn_up.biasF32[12288]
-
blk.26.ffn_down.weightF16[12288 3072]
-
blk.26.ffn_down.biasF32[3072]
-
blk.27.attn_norm.weightF32[3072]
-
blk.27.attn_norm.biasF32[3072]
-
blk.27.attn_qkv.weightF16[3072 9216]
-
blk.27.attn_qkv.biasF32[9216]
-
blk.27.attn_output.weightF16[3072 3072]
-
blk.27.attn_output.biasF32[3072]
-
blk.27.ffn_norm.weightF32[3072]
-
blk.27.ffn_norm.biasF32[3072]
-
blk.27.ffn_up.weightF16[3072 12288]
-
blk.27.ffn_up.biasF32[12288]
-
blk.27.ffn_down.weightF16[12288 3072]
-
blk.27.ffn_down.biasF32[3072]
-
blk.28.attn_norm.weightF32[3072]
-
blk.28.attn_norm.biasF32[3072]
-
blk.28.attn_qkv.weightF16[3072 9216]
-
blk.28.attn_qkv.biasF32[9216]
-
blk.28.attn_output.weightF16[3072 3072]
-
blk.28.attn_output.biasF32[3072]
-
blk.28.ffn_norm.weightF32[3072]
-
blk.28.ffn_norm.biasF32[3072]
-
blk.28.ffn_up.weightF16[3072 12288]
-
blk.28.ffn_up.biasF32[12288]
-
blk.28.ffn_down.weightF16[12288 3072]
-
blk.28.ffn_down.biasF32[3072]
-
blk.29.attn_norm.weightF32[3072]
-
blk.29.attn_norm.biasF32[3072]
-
blk.29.attn_qkv.weightF16[3072 9216]
-
blk.29.attn_qkv.biasF32[9216]
-
blk.29.attn_output.weightF16[3072 3072]
-
blk.29.attn_output.biasF32[3072]
-
blk.29.ffn_norm.weightF32[3072]
-
blk.29.ffn_norm.biasF32[3072]
-
blk.29.ffn_up.weightF16[3072 12288]
-
blk.29.ffn_up.biasF32[12288]
-
blk.29.ffn_down.weightF16[12288 3072]
-
blk.29.ffn_down.biasF32[3072]
-
blk.30.attn_norm.weightF32[3072]
-
blk.30.attn_norm.biasF32[3072]
-
blk.30.attn_qkv.weightF16[3072 9216]
-
blk.30.attn_qkv.biasF32[9216]
-
blk.30.attn_output.weightF16[3072 3072]
-
blk.30.attn_output.biasF32[3072]
-
blk.30.ffn_norm.weightF32[3072]
-
blk.30.ffn_norm.biasF32[3072]
-
blk.30.ffn_up.weightF16[3072 12288]
-
blk.30.ffn_up.biasF32[12288]
-
blk.30.ffn_down.weightF16[12288 3072]
-
blk.30.ffn_down.biasF32[3072]
-
blk.31.attn_norm.weightF32[3072]
-
blk.31.attn_norm.biasF32[3072]
-
blk.31.attn_qkv.weightF16[3072 9216]
-
blk.31.attn_qkv.biasF32[9216]
-
blk.31.attn_output.weightF16[3072 3072]
-
blk.31.attn_output.biasF32[3072]
-
blk.31.ffn_norm.weightF32[3072]
-
blk.31.ffn_norm.biasF32[3072]
-
blk.31.ffn_up.weightF16[3072 12288]
-
blk.31.ffn_up.biasF32[12288]
-
blk.31.ffn_down.weightF16[12288 3072]
-
blk.31.ffn_down.biasF32[3072]
-
output_norm.weightF32[3072]
-
output_norm.biasF32[3072]
Metadata
Tensors
blk.0
blk.1
blk.2
blk.3
blk.4
blk.5
blk.6
blk.7
blk.8
blk.9
blk.10
blk.11
blk.12
blk.13
blk.14
blk.15
blk.16
blk.17
blk.18
blk.19
blk.20
blk.21
blk.22
blk.23
blk.24
blk.25
blk.26
blk.27
blk.28
blk.29
blk.30
blk.31