1,368 Pulls Updated 6 months ago
ebc06d986445 · 28GB
-
general.architecturebaichuan
-
baichuan.attention.head_count40
-
baichuan.attention.head_count_kv40
-
baichuan.attention.layer_norm_rms_epsilon1e-06
-
baichuan.block_count40
-
baichuan.context_length4096
-
baichuan.embedding_length5120
-
baichuan.feed_forward_length13696
-
baichuan.rope.dimension_count128
-
baichuan.tensor_data_layoutMeta AI original pth
-
tokenizer.ggml.bos_token_id1
-
tokenizer.ggml.eos_token_id2
-
tokenizer.ggml.modelllama
-
tokenizer.ggml.padding_token_id0
-
tokenizer.ggml.scores[0 0 0 0 0 ...]
-
tokenizer.ggml.token_type[2 3 3 1 1 ...]
-
tokenizer.ggml.tokens[<unk> <s> </s> <SEP> <CLS> ...]
-
NameTypeShape
-
token_embd.weightF16[5120 125696]
-
blk.0.attn_output.weightF16[5120 5120]
-
blk.0.ffn_gate.weightF16[5120 13696]
-
blk.0.ffn_down.weightF16[13696 5120]
-
blk.0.ffn_up.weightF16[5120 13696]
-
blk.0.attn_norm.weightF32[5120]
-
blk.0.ffn_norm.weightF32[5120]
-
blk.0.attn_q.weightF16[5120 5120]
-
blk.0.attn_k.weightF16[5120 5120]
-
blk.0.attn_v.weightF16[5120 5120]
-
blk.1.attn_output.weightF16[5120 5120]
-
blk.1.ffn_gate.weightF16[5120 13696]
-
blk.1.ffn_down.weightF16[13696 5120]
-
blk.1.ffn_up.weightF16[5120 13696]
-
blk.1.attn_norm.weightF32[5120]
-
blk.1.ffn_norm.weightF32[5120]
-
blk.1.attn_q.weightF16[5120 5120]
-
blk.1.attn_k.weightF16[5120 5120]
-
blk.1.attn_v.weightF16[5120 5120]
-
blk.2.attn_output.weightF16[5120 5120]
-
blk.2.ffn_gate.weightF16[5120 13696]
-
blk.2.ffn_down.weightF16[13696 5120]
-
blk.2.ffn_up.weightF16[5120 13696]
-
blk.2.attn_norm.weightF32[5120]
-
blk.2.ffn_norm.weightF32[5120]
-
blk.2.attn_q.weightF16[5120 5120]
-
blk.2.attn_k.weightF16[5120 5120]
-
blk.2.attn_v.weightF16[5120 5120]
-
blk.3.attn_output.weightF16[5120 5120]
-
blk.3.ffn_gate.weightF16[5120 13696]
-
blk.3.ffn_down.weightF16[13696 5120]
-
blk.3.ffn_up.weightF16[5120 13696]
-
blk.3.attn_norm.weightF32[5120]
-
blk.3.ffn_norm.weightF32[5120]
-
blk.3.attn_q.weightF16[5120 5120]
-
blk.3.attn_k.weightF16[5120 5120]
-
blk.3.attn_v.weightF16[5120 5120]
-
blk.4.attn_output.weightF16[5120 5120]
-
blk.4.ffn_gate.weightF16[5120 13696]
-
blk.4.ffn_down.weightF16[13696 5120]
-
blk.4.ffn_up.weightF16[5120 13696]
-
blk.4.attn_norm.weightF32[5120]
-
blk.4.ffn_norm.weightF32[5120]
-
blk.4.attn_q.weightF16[5120 5120]
-
blk.4.attn_k.weightF16[5120 5120]
-
blk.4.attn_v.weightF16[5120 5120]
-
blk.5.attn_output.weightF16[5120 5120]
-
blk.5.ffn_gate.weightF16[5120 13696]
-
blk.5.ffn_down.weightF16[13696 5120]
-
blk.5.ffn_up.weightF16[5120 13696]
-
blk.5.attn_norm.weightF32[5120]
-
blk.5.ffn_norm.weightF32[5120]
-
blk.5.attn_q.weightF16[5120 5120]
-
blk.5.attn_k.weightF16[5120 5120]
-
blk.5.attn_v.weightF16[5120 5120]
-
blk.6.attn_output.weightF16[5120 5120]
-
blk.6.ffn_gate.weightF16[5120 13696]
-
blk.6.ffn_down.weightF16[13696 5120]
-
blk.6.ffn_up.weightF16[5120 13696]
-
blk.6.attn_norm.weightF32[5120]
-
blk.6.ffn_norm.weightF32[5120]
-
blk.6.attn_q.weightF16[5120 5120]
-
blk.6.attn_k.weightF16[5120 5120]
-
blk.6.attn_v.weightF16[5120 5120]
-
blk.7.attn_output.weightF16[5120 5120]
-
blk.7.ffn_gate.weightF16[5120 13696]
-
blk.7.ffn_down.weightF16[13696 5120]
-
blk.7.ffn_up.weightF16[5120 13696]
-
blk.7.attn_norm.weightF32[5120]
-
blk.7.ffn_norm.weightF32[5120]
-
blk.7.attn_q.weightF16[5120 5120]
-
blk.7.attn_k.weightF16[5120 5120]
-
blk.7.attn_v.weightF16[5120 5120]
-
blk.8.attn_output.weightF16[5120 5120]
-
blk.8.ffn_gate.weightF16[5120 13696]
-
blk.8.ffn_down.weightF16[13696 5120]
-
blk.8.ffn_up.weightF16[5120 13696]
-
blk.8.attn_norm.weightF32[5120]
-
blk.8.ffn_norm.weightF32[5120]
-
blk.8.attn_q.weightF16[5120 5120]
-
blk.8.attn_k.weightF16[5120 5120]
-
blk.8.attn_v.weightF16[5120 5120]
-
blk.9.attn_output.weightF16[5120 5120]
-
blk.9.ffn_gate.weightF16[5120 13696]
-
blk.9.ffn_down.weightF16[13696 5120]
-
blk.9.ffn_up.weightF16[5120 13696]
-
blk.9.attn_norm.weightF32[5120]
-
blk.9.ffn_norm.weightF32[5120]
-
blk.9.attn_q.weightF16[5120 5120]
-
blk.9.attn_k.weightF16[5120 5120]
-
blk.9.attn_v.weightF16[5120 5120]
-
blk.10.attn_output.weightF16[5120 5120]
-
blk.10.ffn_gate.weightF16[5120 13696]
-
blk.10.ffn_down.weightF16[13696 5120]
-
blk.10.ffn_up.weightF16[5120 13696]
-
blk.10.attn_norm.weightF32[5120]
-
blk.10.ffn_norm.weightF32[5120]
-
blk.10.attn_q.weightF16[5120 5120]
-
blk.10.attn_k.weightF16[5120 5120]
-
blk.10.attn_v.weightF16[5120 5120]
-
blk.11.attn_output.weightF16[5120 5120]
-
blk.11.ffn_gate.weightF16[5120 13696]
-
blk.11.ffn_down.weightF16[13696 5120]
-
blk.11.ffn_up.weightF16[5120 13696]
-
blk.11.attn_norm.weightF32[5120]
-
blk.11.ffn_norm.weightF32[5120]
-
blk.11.attn_q.weightF16[5120 5120]
-
blk.11.attn_k.weightF16[5120 5120]
-
blk.11.attn_v.weightF16[5120 5120]
-
blk.12.attn_output.weightF16[5120 5120]
-
blk.12.ffn_gate.weightF16[5120 13696]
-
blk.12.ffn_down.weightF16[13696 5120]
-
blk.12.ffn_up.weightF16[5120 13696]
-
blk.12.attn_norm.weightF32[5120]
-
blk.12.ffn_norm.weightF32[5120]
-
blk.12.attn_q.weightF16[5120 5120]
-
blk.12.attn_k.weightF16[5120 5120]
-
blk.12.attn_v.weightF16[5120 5120]
-
blk.13.attn_output.weightF16[5120 5120]
-
blk.13.ffn_gate.weightF16[5120 13696]
-
blk.13.ffn_down.weightF16[13696 5120]
-
blk.13.attn_q.weightF16[5120 5120]
-
blk.13.attn_k.weightF16[5120 5120]
-
blk.13.attn_v.weightF16[5120 5120]
-
blk.13.ffn_up.weightF16[5120 13696]
-
blk.13.attn_norm.weightF32[5120]
-
blk.13.ffn_norm.weightF32[5120]
-
blk.14.attn_output.weightF16[5120 5120]
-
blk.14.ffn_gate.weightF16[5120 13696]
-
blk.14.ffn_down.weightF16[13696 5120]
-
blk.14.ffn_up.weightF16[5120 13696]
-
blk.14.attn_norm.weightF32[5120]
-
blk.14.ffn_norm.weightF32[5120]
-
blk.14.attn_q.weightF16[5120 5120]
-
blk.14.attn_k.weightF16[5120 5120]
-
blk.14.attn_v.weightF16[5120 5120]
-
blk.15.attn_output.weightF16[5120 5120]
-
blk.15.ffn_gate.weightF16[5120 13696]
-
blk.15.ffn_down.weightF16[13696 5120]
-
blk.15.ffn_up.weightF16[5120 13696]
-
blk.15.attn_norm.weightF32[5120]
-
blk.15.ffn_norm.weightF32[5120]
-
blk.15.attn_q.weightF16[5120 5120]
-
blk.15.attn_k.weightF16[5120 5120]
-
blk.15.attn_v.weightF16[5120 5120]
-
blk.16.attn_output.weightF16[5120 5120]
-
blk.16.ffn_gate.weightF16[5120 13696]
-
blk.16.ffn_down.weightF16[13696 5120]
-
blk.16.ffn_up.weightF16[5120 13696]
-
blk.16.attn_norm.weightF32[5120]
-
blk.16.ffn_norm.weightF32[5120]
-
blk.16.attn_q.weightF16[5120 5120]
-
blk.16.attn_k.weightF16[5120 5120]
-
blk.16.attn_v.weightF16[5120 5120]
-
blk.17.attn_output.weightF16[5120 5120]
-
blk.17.ffn_gate.weightF16[5120 13696]
-
blk.17.ffn_down.weightF16[13696 5120]
-
blk.17.ffn_up.weightF16[5120 13696]
-
blk.17.attn_norm.weightF32[5120]
-
blk.17.ffn_norm.weightF32[5120]
-
blk.17.attn_q.weightF16[5120 5120]
-
blk.17.attn_k.weightF16[5120 5120]
-
blk.17.attn_v.weightF16[5120 5120]
-
blk.18.attn_output.weightF16[5120 5120]
-
blk.18.ffn_gate.weightF16[5120 13696]
-
blk.18.ffn_down.weightF16[13696 5120]
-
blk.18.ffn_up.weightF16[5120 13696]
-
blk.18.attn_norm.weightF32[5120]
-
blk.18.ffn_norm.weightF32[5120]
-
blk.18.attn_q.weightF16[5120 5120]
-
blk.18.attn_k.weightF16[5120 5120]
-
blk.18.attn_v.weightF16[5120 5120]
-
blk.19.attn_output.weightF16[5120 5120]
-
blk.19.ffn_gate.weightF16[5120 13696]
-
blk.19.ffn_down.weightF16[13696 5120]
-
blk.19.ffn_up.weightF16[5120 13696]
-
blk.19.attn_norm.weightF32[5120]
-
blk.19.ffn_norm.weightF32[5120]
-
blk.19.attn_q.weightF16[5120 5120]
-
blk.19.attn_k.weightF16[5120 5120]
-
blk.19.attn_v.weightF16[5120 5120]
-
blk.20.attn_output.weightF16[5120 5120]
-
blk.20.ffn_gate.weightF16[5120 13696]
-
blk.20.ffn_down.weightF16[13696 5120]
-
blk.20.ffn_up.weightF16[5120 13696]
-
blk.20.attn_norm.weightF32[5120]
-
blk.20.ffn_norm.weightF32[5120]
-
blk.20.attn_q.weightF16[5120 5120]
-
blk.20.attn_k.weightF16[5120 5120]
-
blk.20.attn_v.weightF16[5120 5120]
-
blk.21.attn_output.weightF16[5120 5120]
-
blk.21.ffn_gate.weightF16[5120 13696]
-
blk.21.ffn_down.weightF16[13696 5120]
-
blk.21.ffn_up.weightF16[5120 13696]
-
blk.21.attn_norm.weightF32[5120]
-
blk.21.ffn_norm.weightF32[5120]
-
blk.21.attn_q.weightF16[5120 5120]
-
blk.21.attn_k.weightF16[5120 5120]
-
blk.21.attn_v.weightF16[5120 5120]
-
blk.22.attn_output.weightF16[5120 5120]
-
blk.22.ffn_gate.weightF16[5120 13696]
-
blk.22.ffn_down.weightF16[13696 5120]
-
blk.22.ffn_up.weightF16[5120 13696]
-
blk.22.attn_norm.weightF32[5120]
-
blk.22.ffn_norm.weightF32[5120]
-
blk.22.attn_q.weightF16[5120 5120]
-
blk.22.attn_k.weightF16[5120 5120]
-
blk.22.attn_v.weightF16[5120 5120]
-
blk.23.attn_output.weightF16[5120 5120]
-
blk.23.ffn_gate.weightF16[5120 13696]
-
blk.23.ffn_down.weightF16[13696 5120]
-
blk.23.ffn_up.weightF16[5120 13696]
-
blk.23.attn_norm.weightF32[5120]
-
blk.23.ffn_norm.weightF32[5120]
-
blk.23.attn_q.weightF16[5120 5120]
-
blk.23.attn_k.weightF16[5120 5120]
-
blk.23.attn_v.weightF16[5120 5120]
-
blk.24.attn_output.weightF16[5120 5120]
-
blk.24.ffn_gate.weightF16[5120 13696]
-
blk.24.ffn_down.weightF16[13696 5120]
-
blk.24.ffn_up.weightF16[5120 13696]
-
blk.24.attn_norm.weightF32[5120]
-
blk.24.ffn_norm.weightF32[5120]
-
blk.24.attn_q.weightF16[5120 5120]
-
blk.24.attn_k.weightF16[5120 5120]
-
blk.24.attn_v.weightF16[5120 5120]
-
blk.25.attn_output.weightF16[5120 5120]
-
blk.25.ffn_gate.weightF16[5120 13696]
-
blk.25.ffn_down.weightF16[13696 5120]
-
blk.25.ffn_up.weightF16[5120 13696]
-
blk.25.attn_norm.weightF32[5120]
-
blk.25.ffn_norm.weightF32[5120]
-
blk.25.attn_q.weightF16[5120 5120]
-
blk.25.attn_k.weightF16[5120 5120]
-
blk.25.attn_v.weightF16[5120 5120]
-
blk.26.attn_output.weightF16[5120 5120]
-
blk.26.ffn_gate.weightF16[5120 13696]
-
blk.26.ffn_down.weightF16[13696 5120]
-
blk.26.ffn_up.weightF16[5120 13696]
-
blk.26.attn_norm.weightF32[5120]
-
blk.26.ffn_norm.weightF32[5120]
-
blk.26.attn_q.weightF16[5120 5120]
-
blk.26.attn_k.weightF16[5120 5120]
-
blk.26.attn_v.weightF16[5120 5120]
-
blk.27.attn_output.weightF16[5120 5120]
-
blk.27.ffn_gate.weightF16[5120 13696]
-
blk.27.ffn_down.weightF16[13696 5120]
-
blk.27.ffn_up.weightF16[5120 13696]
-
blk.27.attn_norm.weightF32[5120]
-
blk.27.ffn_norm.weightF32[5120]
-
blk.27.attn_q.weightF16[5120 5120]
-
blk.27.attn_k.weightF16[5120 5120]
-
blk.27.attn_v.weightF16[5120 5120]
-
blk.28.attn_output.weightF16[5120 5120]
-
blk.28.ffn_gate.weightF16[5120 13696]
-
blk.28.ffn_down.weightF16[13696 5120]
-
blk.28.ffn_up.weightF16[5120 13696]
-
blk.28.attn_norm.weightF32[5120]
-
blk.28.ffn_norm.weightF32[5120]
-
blk.28.attn_q.weightF16[5120 5120]
-
blk.28.attn_k.weightF16[5120 5120]
-
blk.28.attn_v.weightF16[5120 5120]
-
blk.29.attn_output.weightF16[5120 5120]
-
blk.29.ffn_gate.weightF16[5120 13696]
-
blk.29.attn_q.weightF16[5120 5120]
-
blk.29.attn_k.weightF16[5120 5120]
-
blk.29.attn_v.weightF16[5120 5120]
-
blk.29.ffn_down.weightF16[13696 5120]
-
blk.29.ffn_up.weightF16[5120 13696]
-
blk.29.attn_norm.weightF32[5120]
-
blk.29.ffn_norm.weightF32[5120]
-
blk.30.attn_output.weightF16[5120 5120]
-
blk.30.ffn_gate.weightF16[5120 13696]
-
blk.30.ffn_down.weightF16[13696 5120]
-
blk.30.ffn_up.weightF16[5120 13696]
-
blk.30.attn_norm.weightF32[5120]
-
blk.30.ffn_norm.weightF32[5120]
-
blk.30.attn_q.weightF16[5120 5120]
-
blk.30.attn_k.weightF16[5120 5120]
-
blk.30.attn_v.weightF16[5120 5120]
-
blk.31.attn_output.weightF16[5120 5120]
-
blk.31.ffn_gate.weightF16[5120 13696]
-
blk.31.ffn_down.weightF16[13696 5120]
-
blk.31.ffn_up.weightF16[5120 13696]
-
blk.31.attn_norm.weightF32[5120]
-
blk.31.ffn_norm.weightF32[5120]
-
blk.31.attn_q.weightF16[5120 5120]
-
blk.31.attn_k.weightF16[5120 5120]
-
blk.31.attn_v.weightF16[5120 5120]
-
blk.32.attn_output.weightF16[5120 5120]
-
blk.32.ffn_gate.weightF16[5120 13696]
-
blk.32.ffn_down.weightF16[13696 5120]
-
blk.32.ffn_up.weightF16[5120 13696]
-
blk.32.attn_norm.weightF32[5120]
-
blk.32.ffn_norm.weightF32[5120]
-
blk.32.attn_q.weightF16[5120 5120]
-
blk.32.attn_k.weightF16[5120 5120]
-
blk.32.attn_v.weightF16[5120 5120]
-
blk.33.attn_output.weightF16[5120 5120]
-
blk.33.ffn_gate.weightF16[5120 13696]
-
blk.33.ffn_down.weightF16[13696 5120]
-
blk.33.ffn_up.weightF16[5120 13696]
-
blk.33.attn_norm.weightF32[5120]
-
blk.33.ffn_norm.weightF32[5120]
-
blk.33.attn_q.weightF16[5120 5120]
-
blk.33.attn_k.weightF16[5120 5120]
-
blk.33.attn_v.weightF16[5120 5120]
-
blk.34.attn_output.weightF16[5120 5120]
-
blk.34.ffn_gate.weightF16[5120 13696]
-
blk.34.ffn_down.weightF16[13696 5120]
-
blk.34.ffn_up.weightF16[5120 13696]
-
blk.34.attn_norm.weightF32[5120]
-
blk.34.ffn_norm.weightF32[5120]
-
blk.34.attn_q.weightF16[5120 5120]
-
blk.34.attn_k.weightF16[5120 5120]
-
blk.34.attn_v.weightF16[5120 5120]
-
blk.35.attn_output.weightF16[5120 5120]
-
blk.35.ffn_gate.weightF16[5120 13696]
-
blk.35.ffn_down.weightF16[13696 5120]
-
blk.35.ffn_up.weightF16[5120 13696]
-
blk.35.attn_norm.weightF32[5120]
-
blk.35.ffn_norm.weightF32[5120]
-
blk.35.attn_q.weightF16[5120 5120]
-
blk.35.attn_k.weightF16[5120 5120]
-
blk.35.attn_v.weightF16[5120 5120]
-
blk.36.attn_output.weightF16[5120 5120]
-
blk.36.ffn_gate.weightF16[5120 13696]
-
blk.36.ffn_down.weightF16[13696 5120]
-
blk.36.ffn_up.weightF16[5120 13696]
-
blk.36.attn_norm.weightF32[5120]
-
blk.36.ffn_norm.weightF32[5120]
-
blk.36.attn_q.weightF16[5120 5120]
-
blk.36.attn_k.weightF16[5120 5120]
-
blk.36.attn_v.weightF16[5120 5120]
-
blk.37.attn_output.weightF16[5120 5120]
-
blk.37.ffn_gate.weightF16[5120 13696]
-
blk.37.ffn_down.weightF16[13696 5120]
-
blk.37.ffn_up.weightF16[5120 13696]
-
blk.37.attn_norm.weightF32[5120]
-
blk.37.ffn_norm.weightF32[5120]
-
blk.37.attn_q.weightF16[5120 5120]
-
blk.37.attn_k.weightF16[5120 5120]
-
blk.37.attn_v.weightF16[5120 5120]
-
blk.38.attn_output.weightF16[5120 5120]
-
blk.38.ffn_gate.weightF16[5120 13696]
-
blk.38.ffn_down.weightF16[13696 5120]
-
blk.38.ffn_up.weightF16[5120 13696]
-
blk.38.attn_norm.weightF32[5120]
-
blk.38.ffn_norm.weightF32[5120]
-
blk.38.attn_q.weightF16[5120 5120]
-
blk.38.attn_k.weightF16[5120 5120]
-
blk.38.attn_v.weightF16[5120 5120]
-
blk.39.attn_output.weightF16[5120 5120]
-
blk.39.ffn_gate.weightF16[5120 13696]
-
blk.39.ffn_down.weightF16[13696 5120]
-
blk.39.ffn_up.weightF16[5120 13696]
-
blk.39.attn_norm.weightF32[5120]
-
blk.39.ffn_norm.weightF32[5120]
-
blk.39.attn_q.weightF16[5120 5120]
-
blk.39.attn_k.weightF16[5120 5120]
-
blk.39.attn_v.weightF16[5120 5120]
-
output_norm.weightF32[5120]
-
output.weightF16[5120 125696]
Metadata
Tensors
blk.0
blk.1
blk.2
blk.3
blk.4
blk.5
blk.6
blk.7
blk.8
blk.9
blk.10
blk.11
blk.12
blk.13
blk.14
blk.15
blk.16
blk.17
blk.18
blk.19
blk.20
blk.21
blk.22
blk.23
blk.24
blk.25
blk.26
blk.27
blk.28
blk.29
blk.30
blk.31
blk.32
blk.33
blk.34
blk.35
blk.36
blk.37
blk.38
blk.39