Made from Gemma 2 9B SPPO iter3 and SimPO
9B
97 Pulls Updated 2 weeks ago
d124698aee67 · 5.2GB
-
quantize.imatrix.chunks_count128
-
quantize.imatrix.dataset/training_dir/calibration_datav3.txt
-
quantize.imatrix.entries_count294
-
quantize.imatrix.file/models_out/Gemma-2-Ataraxy-9B-GGUF/Gemma-2-Ataraxy-9B.imatrix
-
general.architecturegemma2
-
general.file_typeIQ1_M
-
gemma2.attention.head_count16
-
gemma2.attention.head_count_kv8
-
gemma2.attention.key_length256
-
gemma2.attention.layer_norm_rms_epsilon1e-06
-
gemma2.attention.sliding_window4096
-
gemma2.attention.value_length256
-
gemma2.attn_logit_softcapping50
-
gemma2.block_count42
-
gemma2.context_length8192
-
gemma2.embedding_length3584
-
gemma2.feed_forward_length14336
-
gemma2.final_logit_softcapping30
-
tokenizer.ggml.add_bos_tokentrue
-
tokenizer.ggml.add_eos_tokenfalse
-
tokenizer.ggml.add_space_prefixfalse
-
tokenizer.ggml.bos_token_id2
-
tokenizer.ggml.eos_token_id1
-
tokenizer.ggml.modelllama
-
tokenizer.ggml.padding_token_id0
-
tokenizer.ggml.predefault
-
tokenizer.ggml.scores[-1000 -1000 -1000 -1000 -1000 ...]
-
tokenizer.ggml.token_type[3 3 3 3 3 ...]
-
tokenizer.ggml.tokens[<pad> <eos> <bos> <unk> <mask> ...]
-
tokenizer.ggml.unknown_token_id3
-
NameTypeShape
-
token_embd.weightQ6_K[3584 256000]
-
blk.0.attn_norm.weightF32[3584]
-
blk.0.ffn_down.weight(!unknown_type 23!)[14336 3584]
-
blk.0.ffn_gate.weight(!unknown_type 23!)[3584 14336]
-
blk.0.ffn_up.weight(!unknown_type 23!)[3584 14336]
-
blk.0.post_attention_norm.weightF32[3584]
-
blk.0.post_ffw_norm.weightF32[3584]
-
blk.0.ffn_norm.weightF32[3584]
-
blk.0.attn_k.weight(!unknown_type 23!)[3584 2048]
-
blk.0.attn_output.weight(!unknown_type 23!)[4096 3584]
-
blk.0.attn_q.weight(!unknown_type 23!)[3584 4096]
-
blk.0.attn_v.weight(!unknown_type 23!)[3584 2048]
-
blk.1.attn_norm.weightF32[3584]
-
blk.1.ffn_down.weight(!unknown_type 23!)[14336 3584]
-
blk.1.ffn_gate.weight(!unknown_type 23!)[3584 14336]
-
blk.1.ffn_up.weight(!unknown_type 23!)[3584 14336]
-
blk.1.post_attention_norm.weightF32[3584]
-
blk.1.post_ffw_norm.weightF32[3584]
-
blk.1.ffn_norm.weightF32[3584]
-
blk.1.attn_k.weight(!unknown_type 23!)[3584 2048]
-
blk.1.attn_output.weight(!unknown_type 23!)[4096 3584]
-
blk.1.attn_q.weight(!unknown_type 23!)[3584 4096]
-
blk.1.attn_v.weight(!unknown_type 23!)[3584 2048]
-
blk.2.attn_norm.weightF32[3584]
-
blk.2.ffn_down.weight(!unknown_type 23!)[14336 3584]
-
blk.2.ffn_gate.weight(!unknown_type 23!)[3584 14336]
-
blk.2.ffn_up.weight(!unknown_type 23!)[3584 14336]
-
blk.2.post_attention_norm.weightF32[3584]
-
blk.2.post_ffw_norm.weightF32[3584]
-
blk.2.ffn_norm.weightF32[3584]
-
blk.2.attn_k.weight(!unknown_type 23!)[3584 2048]
-
blk.2.attn_output.weight(!unknown_type 23!)[4096 3584]
-
blk.2.attn_q.weight(!unknown_type 23!)[3584 4096]
-
blk.2.attn_v.weight(!unknown_type 23!)[3584 2048]
-
blk.3.attn_norm.weightF32[3584]
-
blk.3.ffn_down.weight(!unknown_type 23!)[14336 3584]
-
blk.3.ffn_gate.weight(!unknown_type 23!)[3584 14336]
-
blk.3.ffn_up.weight(!unknown_type 23!)[3584 14336]
-
blk.3.post_attention_norm.weightF32[3584]
-
blk.3.post_ffw_norm.weightF32[3584]
-
blk.3.ffn_norm.weightF32[3584]
-
blk.3.attn_k.weight(!unknown_type 23!)[3584 2048]
-
blk.3.attn_output.weight(!unknown_type 23!)[4096 3584]
-
blk.3.attn_q.weight(!unknown_type 23!)[3584 4096]
-
blk.3.attn_v.weight(!unknown_type 23!)[3584 2048]
-
blk.4.attn_norm.weightF32[3584]
-
blk.4.ffn_down.weight(!unknown_type 23!)[14336 3584]
-
blk.4.ffn_gate.weight(!unknown_type 23!)[3584 14336]
-
blk.4.ffn_up.weight(!unknown_type 23!)[3584 14336]
-
blk.4.post_attention_norm.weightF32[3584]
-
blk.4.post_ffw_norm.weightF32[3584]
-
blk.4.ffn_norm.weightF32[3584]
-
blk.4.attn_k.weight(!unknown_type 23!)[3584 2048]
-
blk.4.attn_output.weight(!unknown_type 23!)[4096 3584]
-
blk.4.attn_q.weight(!unknown_type 23!)[3584 4096]
-
blk.4.attn_v.weight(!unknown_type 23!)[3584 2048]
-
blk.5.attn_norm.weightF32[3584]
-
blk.5.ffn_down.weight(!unknown_type 23!)[14336 3584]
-
blk.5.ffn_gate.weight(!unknown_type 23!)[3584 14336]
-
blk.5.ffn_up.weight(!unknown_type 23!)[3584 14336]
-
blk.5.post_attention_norm.weightF32[3584]
-
blk.5.post_ffw_norm.weightF32[3584]
-
blk.5.ffn_norm.weightF32[3584]
-
blk.5.attn_k.weight(!unknown_type 23!)[3584 2048]
-
blk.5.attn_output.weight(!unknown_type 23!)[4096 3584]
-
blk.5.attn_q.weight(!unknown_type 23!)[3584 4096]
-
blk.5.attn_v.weight(!unknown_type 23!)[3584 2048]
-
blk.6.attn_norm.weightF32[3584]
-
blk.6.ffn_down.weight(!unknown_type 23!)[14336 3584]
-
blk.6.ffn_gate.weight(!unknown_type 23!)[3584 14336]
-
blk.6.ffn_up.weight(!unknown_type 23!)[3584 14336]
-
blk.6.post_attention_norm.weightF32[3584]
-
blk.6.post_ffw_norm.weightF32[3584]
-
blk.6.ffn_norm.weightF32[3584]
-
blk.6.attn_k.weight(!unknown_type 23!)[3584 2048]
-
blk.6.attn_output.weight(!unknown_type 23!)[4096 3584]
-
blk.6.attn_q.weight(!unknown_type 23!)[3584 4096]
-
blk.6.attn_v.weight(!unknown_type 23!)[3584 2048]
-
blk.7.attn_norm.weightF32[3584]
-
blk.7.ffn_down.weight(!unknown_type 23!)[14336 3584]
-
blk.7.ffn_gate.weight(!unknown_type 23!)[3584 14336]
-
blk.7.ffn_up.weight(!unknown_type 23!)[3584 14336]
-
blk.7.post_attention_norm.weightF32[3584]
-
blk.7.post_ffw_norm.weightF32[3584]
-
blk.7.ffn_norm.weightF32[3584]
-
blk.7.attn_k.weight(!unknown_type 23!)[3584 2048]
-
blk.7.attn_output.weight(!unknown_type 23!)[4096 3584]
-
blk.7.attn_q.weight(!unknown_type 23!)[3584 4096]
-
blk.7.attn_v.weight(!unknown_type 23!)[3584 2048]
-
blk.8.attn_norm.weightF32[3584]
-
blk.8.ffn_down.weight(!unknown_type 23!)[14336 3584]
-
blk.8.ffn_gate.weight(!unknown_type 23!)[3584 14336]
-
blk.8.ffn_up.weight(!unknown_type 23!)[3584 14336]
-
blk.8.post_attention_norm.weightF32[3584]
-
blk.8.post_ffw_norm.weightF32[3584]
-
blk.8.ffn_norm.weightF32[3584]
-
blk.8.attn_k.weight(!unknown_type 23!)[3584 2048]
-
blk.8.attn_output.weight(!unknown_type 23!)[4096 3584]
-
blk.8.attn_q.weight(!unknown_type 23!)[3584 4096]
-
blk.8.attn_v.weight(!unknown_type 23!)[3584 2048]
-
blk.9.attn_norm.weightF32[3584]
-
blk.9.ffn_down.weight(!unknown_type 23!)[14336 3584]
-
blk.9.ffn_gate.weight(!unknown_type 23!)[3584 14336]
-
blk.9.ffn_up.weight(!unknown_type 23!)[3584 14336]
-
blk.9.post_attention_norm.weightF32[3584]
-
blk.9.post_ffw_norm.weightF32[3584]
-
blk.9.ffn_norm.weightF32[3584]
-
blk.9.attn_k.weight(!unknown_type 23!)[3584 2048]
-
blk.9.attn_output.weight(!unknown_type 23!)[4096 3584]
-
blk.9.attn_q.weight(!unknown_type 23!)[3584 4096]
-
blk.9.attn_v.weight(!unknown_type 23!)[3584 2048]
-
blk.10.attn_norm.weightF32[3584]
-
blk.10.ffn_down.weight(!unknown_type 23!)[14336 3584]
-
blk.10.ffn_gate.weight(!unknown_type 23!)[3584 14336]
-
blk.10.ffn_up.weight(!unknown_type 23!)[3584 14336]
-
blk.10.post_attention_norm.weightF32[3584]
-
blk.10.post_ffw_norm.weightF32[3584]
-
blk.10.ffn_norm.weightF32[3584]
-
blk.10.attn_k.weight(!unknown_type 23!)[3584 2048]
-
blk.10.attn_output.weight(!unknown_type 23!)[4096 3584]
-
blk.10.attn_q.weight(!unknown_type 23!)[3584 4096]
-
blk.10.attn_v.weight(!unknown_type 23!)[3584 2048]
-
blk.11.attn_norm.weightF32[3584]
-
blk.11.ffn_down.weight(!unknown_type 23!)[14336 3584]
-
blk.11.ffn_gate.weight(!unknown_type 23!)[3584 14336]
-
blk.11.ffn_up.weight(!unknown_type 23!)[3584 14336]
-
blk.11.post_attention_norm.weightF32[3584]
-
blk.11.post_ffw_norm.weightF32[3584]
-
blk.11.ffn_norm.weightF32[3584]
-
blk.11.attn_k.weight(!unknown_type 23!)[3584 2048]
-
blk.11.attn_output.weight(!unknown_type 23!)[4096 3584]
-
blk.11.attn_q.weight(!unknown_type 23!)[3584 4096]
-
blk.11.attn_v.weight(!unknown_type 23!)[3584 2048]
-
blk.12.attn_norm.weightF32[3584]
-
blk.12.ffn_down.weight(!unknown_type 23!)[14336 3584]
-
blk.12.ffn_gate.weight(!unknown_type 23!)[3584 14336]
-
blk.12.ffn_up.weight(!unknown_type 23!)[3584 14336]
-
blk.12.post_attention_norm.weightF32[3584]
-
blk.12.post_ffw_norm.weightF32[3584]
-
blk.12.ffn_norm.weightF32[3584]
-
blk.12.attn_k.weight(!unknown_type 23!)[3584 2048]
-
blk.12.attn_output.weight(!unknown_type 23!)[4096 3584]
-
blk.12.attn_q.weight(!unknown_type 23!)[3584 4096]
-
blk.12.attn_v.weight(!unknown_type 23!)[3584 2048]
-
blk.13.attn_norm.weightF32[3584]
-
blk.13.ffn_down.weight(!unknown_type 23!)[14336 3584]
-
blk.13.ffn_gate.weight(!unknown_type 23!)[3584 14336]
-
blk.13.ffn_up.weight(!unknown_type 23!)[3584 14336]
-
blk.13.post_attention_norm.weightF32[3584]
-
blk.13.post_ffw_norm.weightF32[3584]
-
blk.13.ffn_norm.weightF32[3584]
-
blk.13.attn_k.weight(!unknown_type 23!)[3584 2048]
-
blk.13.attn_output.weight(!unknown_type 23!)[4096 3584]
-
blk.13.attn_q.weight(!unknown_type 23!)[3584 4096]
-
blk.13.attn_v.weight(!unknown_type 23!)[3584 2048]
-
blk.14.attn_norm.weightF32[3584]
-
blk.14.ffn_down.weight(!unknown_type 23!)[14336 3584]
-
blk.14.ffn_gate.weight(!unknown_type 23!)[3584 14336]
-
blk.14.ffn_up.weight(!unknown_type 23!)[3584 14336]
-
blk.14.post_attention_norm.weightF32[3584]
-
blk.14.post_ffw_norm.weightF32[3584]
-
blk.14.ffn_norm.weightF32[3584]
-
blk.14.attn_k.weight(!unknown_type 23!)[3584 2048]
-
blk.14.attn_output.weight(!unknown_type 23!)[4096 3584]
-
blk.14.attn_q.weight(!unknown_type 23!)[3584 4096]
-
blk.14.attn_v.weight(!unknown_type 23!)[3584 2048]
-
blk.15.attn_norm.weightF32[3584]
-
blk.15.ffn_down.weight(!unknown_type 23!)[14336 3584]
-
blk.15.ffn_gate.weight(!unknown_type 23!)[3584 14336]
-
blk.15.ffn_up.weight(!unknown_type 23!)[3584 14336]
-
blk.15.post_attention_norm.weightF32[3584]
-
blk.15.post_ffw_norm.weightF32[3584]
-
blk.15.ffn_norm.weightF32[3584]
-
blk.15.attn_k.weight(!unknown_type 23!)[3584 2048]
-
blk.15.attn_output.weight(!unknown_type 23!)[4096 3584]
-
blk.15.attn_q.weight(!unknown_type 23!)[3584 4096]
-
blk.15.attn_v.weight(!unknown_type 23!)[3584 2048]
-
blk.16.attn_norm.weightF32[3584]
-
blk.16.ffn_down.weight(!unknown_type 23!)[14336 3584]
-
blk.16.ffn_gate.weight(!unknown_type 23!)[3584 14336]
-
blk.16.ffn_up.weight(!unknown_type 23!)[3584 14336]
-
blk.16.post_attention_norm.weightF32[3584]
-
blk.16.post_ffw_norm.weightF32[3584]
-
blk.16.ffn_norm.weightF32[3584]
-
blk.16.attn_k.weight(!unknown_type 23!)[3584 2048]
-
blk.16.attn_output.weight(!unknown_type 23!)[4096 3584]
-
blk.16.attn_q.weight(!unknown_type 23!)[3584 4096]
-
blk.16.attn_v.weight(!unknown_type 23!)[3584 2048]
-
blk.17.attn_norm.weightF32[3584]
-
blk.17.ffn_down.weight(!unknown_type 23!)[14336 3584]
-
blk.17.ffn_gate.weight(!unknown_type 23!)[3584 14336]
-
blk.17.ffn_up.weight(!unknown_type 23!)[3584 14336]
-
blk.17.post_attention_norm.weightF32[3584]
-
blk.17.post_ffw_norm.weightF32[3584]
-
blk.17.ffn_norm.weightF32[3584]
-
blk.17.attn_k.weight(!unknown_type 23!)[3584 2048]
-
blk.17.attn_output.weight(!unknown_type 23!)[4096 3584]
-
blk.17.attn_q.weight(!unknown_type 23!)[3584 4096]
-
blk.17.attn_v.weight(!unknown_type 23!)[3584 2048]
-
blk.18.attn_norm.weightF32[3584]
-
blk.18.ffn_down.weight(!unknown_type 23!)[14336 3584]
-
blk.18.ffn_gate.weight(!unknown_type 23!)[3584 14336]
-
blk.18.ffn_up.weight(!unknown_type 23!)[3584 14336]
-
blk.18.post_attention_norm.weightF32[3584]
-
blk.18.post_ffw_norm.weightF32[3584]
-
blk.18.ffn_norm.weightF32[3584]
-
blk.18.attn_k.weight(!unknown_type 23!)[3584 2048]
-
blk.18.attn_output.weight(!unknown_type 23!)[4096 3584]
-
blk.18.attn_q.weight(!unknown_type 23!)[3584 4096]
-
blk.18.attn_v.weight(!unknown_type 23!)[3584 2048]
-
blk.19.attn_norm.weightF32[3584]
-
blk.19.ffn_down.weight(!unknown_type 23!)[14336 3584]
-
blk.19.ffn_gate.weight(!unknown_type 23!)[3584 14336]
-
blk.19.ffn_up.weight(!unknown_type 23!)[3584 14336]
-
blk.19.post_attention_norm.weightF32[3584]
-
blk.19.post_ffw_norm.weightF32[3584]
-
blk.19.ffn_norm.weightF32[3584]
-
blk.19.attn_k.weight(!unknown_type 23!)[3584 2048]
-
blk.19.attn_output.weight(!unknown_type 23!)[4096 3584]
-
blk.19.attn_q.weight(!unknown_type 23!)[3584 4096]
-
blk.19.attn_v.weight(!unknown_type 23!)[3584 2048]
-
blk.20.attn_norm.weightF32[3584]
-
blk.20.ffn_down.weight(!unknown_type 23!)[14336 3584]
-
blk.20.ffn_gate.weight(!unknown_type 23!)[3584 14336]
-
blk.20.ffn_up.weight(!unknown_type 23!)[3584 14336]
-
blk.20.post_attention_norm.weightF32[3584]
-
blk.20.post_ffw_norm.weightF32[3584]
-
blk.20.ffn_norm.weightF32[3584]
-
blk.20.attn_k.weight(!unknown_type 23!)[3584 2048]
-
blk.20.attn_output.weight(!unknown_type 23!)[4096 3584]
-
blk.20.attn_q.weight(!unknown_type 23!)[3584 4096]
-
blk.20.attn_v.weight(!unknown_type 23!)[3584 2048]
-
blk.21.attn_norm.weightF32[3584]
-
blk.21.ffn_down.weight(!unknown_type 23!)[14336 3584]
-
blk.21.ffn_gate.weight(!unknown_type 23!)[3584 14336]
-
blk.21.ffn_up.weight(!unknown_type 23!)[3584 14336]
-
blk.21.post_attention_norm.weightF32[3584]
-
blk.21.post_ffw_norm.weightF32[3584]
-
blk.21.ffn_norm.weightF32[3584]
-
blk.21.attn_k.weight(!unknown_type 23!)[3584 2048]
-
blk.21.attn_output.weight(!unknown_type 23!)[4096 3584]
-
blk.21.attn_q.weight(!unknown_type 23!)[3584 4096]
-
blk.21.attn_v.weight(!unknown_type 23!)[3584 2048]
-
blk.22.attn_norm.weightF32[3584]
-
blk.22.ffn_down.weight(!unknown_type 23!)[14336 3584]
-
blk.22.ffn_gate.weight(!unknown_type 23!)[3584 14336]
-
blk.22.ffn_up.weight(!unknown_type 23!)[3584 14336]
-
blk.22.post_attention_norm.weightF32[3584]
-
blk.22.post_ffw_norm.weightF32[3584]
-
blk.22.ffn_norm.weightF32[3584]
-
blk.22.attn_k.weight(!unknown_type 23!)[3584 2048]
-
blk.22.attn_output.weight(!unknown_type 23!)[4096 3584]
-
blk.22.attn_q.weight(!unknown_type 23!)[3584 4096]
-
blk.22.attn_v.weight(!unknown_type 23!)[3584 2048]
-
blk.23.attn_norm.weightF32[3584]
-
blk.23.ffn_down.weight(!unknown_type 23!)[14336 3584]
-
blk.23.ffn_gate.weight(!unknown_type 23!)[3584 14336]
-
blk.23.ffn_up.weight(!unknown_type 23!)[3584 14336]
-
blk.23.post_attention_norm.weightF32[3584]
-
blk.23.post_ffw_norm.weightF32[3584]
-
blk.23.ffn_norm.weightF32[3584]
-
blk.23.attn_k.weight(!unknown_type 23!)[3584 2048]
-
blk.23.attn_output.weight(!unknown_type 23!)[4096 3584]
-
blk.23.attn_q.weight(!unknown_type 23!)[3584 4096]
-
blk.23.attn_v.weight(!unknown_type 23!)[3584 2048]
-
blk.24.attn_norm.weightF32[3584]
-
blk.24.ffn_down.weight(!unknown_type 23!)[14336 3584]
-
blk.24.ffn_gate.weight(!unknown_type 23!)[3584 14336]
-
blk.24.ffn_up.weight(!unknown_type 23!)[3584 14336]
-
blk.24.post_attention_norm.weightF32[3584]
-
blk.24.post_ffw_norm.weightF32[3584]
-
blk.24.ffn_norm.weightF32[3584]
-
blk.24.attn_k.weight(!unknown_type 23!)[3584 2048]
-
blk.24.attn_output.weight(!unknown_type 23!)[4096 3584]
-
blk.24.attn_q.weight(!unknown_type 23!)[3584 4096]
-
blk.24.attn_v.weight(!unknown_type 23!)[3584 2048]
-
blk.25.attn_norm.weightF32[3584]
-
blk.25.ffn_down.weight(!unknown_type 23!)[14336 3584]
-
blk.25.ffn_gate.weight(!unknown_type 23!)[3584 14336]
-
blk.25.ffn_up.weight(!unknown_type 23!)[3584 14336]
-
blk.25.post_attention_norm.weightF32[3584]
-
blk.25.post_ffw_norm.weightF32[3584]
-
blk.25.ffn_norm.weightF32[3584]
-
blk.25.attn_k.weight(!unknown_type 23!)[3584 2048]
-
blk.25.attn_output.weight(!unknown_type 23!)[4096 3584]
-
blk.25.attn_q.weight(!unknown_type 23!)[3584 4096]
-
blk.25.attn_v.weight(!unknown_type 23!)[3584 2048]
-
blk.26.attn_norm.weightF32[3584]
-
blk.26.ffn_down.weight(!unknown_type 23!)[14336 3584]
-
blk.26.ffn_gate.weight(!unknown_type 23!)[3584 14336]
-
blk.26.ffn_up.weight(!unknown_type 23!)[3584 14336]
-
blk.26.post_attention_norm.weightF32[3584]
-
blk.26.post_ffw_norm.weightF32[3584]
-
blk.26.ffn_norm.weightF32[3584]
-
blk.26.attn_k.weight(!unknown_type 23!)[3584 2048]
-
blk.26.attn_output.weight(!unknown_type 23!)[4096 3584]
-
blk.26.attn_q.weight(!unknown_type 23!)[3584 4096]
-
blk.26.attn_v.weight(!unknown_type 23!)[3584 2048]
-
blk.27.attn_norm.weightF32[3584]
-
blk.27.ffn_down.weight(!unknown_type 23!)[14336 3584]
-
blk.27.ffn_gate.weight(!unknown_type 23!)[3584 14336]
-
blk.27.ffn_up.weight(!unknown_type 23!)[3584 14336]
-
blk.27.post_attention_norm.weightF32[3584]
-
blk.27.post_ffw_norm.weightF32[3584]
-
blk.27.ffn_norm.weightF32[3584]
-
blk.27.attn_k.weight(!unknown_type 23!)[3584 2048]
-
blk.27.attn_output.weight(!unknown_type 23!)[4096 3584]
-
blk.27.attn_q.weight(!unknown_type 23!)[3584 4096]
-
blk.27.attn_v.weight(!unknown_type 23!)[3584 2048]
-
blk.28.attn_norm.weightF32[3584]
-
blk.28.ffn_down.weight(!unknown_type 23!)[14336 3584]
-
blk.28.ffn_gate.weight(!unknown_type 23!)[3584 14336]
-
blk.28.ffn_up.weight(!unknown_type 23!)[3584 14336]
-
blk.28.post_attention_norm.weightF32[3584]
-
blk.28.post_ffw_norm.weightF32[3584]
-
blk.28.ffn_norm.weightF32[3584]
-
blk.28.attn_k.weight(!unknown_type 23!)[3584 2048]
-
blk.28.attn_output.weight(!unknown_type 23!)[4096 3584]
-
blk.28.attn_q.weight(!unknown_type 23!)[3584 4096]
-
blk.28.attn_v.weight(!unknown_type 23!)[3584 2048]
-
blk.29.attn_norm.weightF32[3584]
-
blk.29.ffn_down.weight(!unknown_type 23!)[14336 3584]
-
blk.29.ffn_gate.weight(!unknown_type 23!)[3584 14336]
-
blk.29.ffn_up.weight(!unknown_type 23!)[3584 14336]
-
blk.29.post_attention_norm.weightF32[3584]
-
blk.29.post_ffw_norm.weightF32[3584]
-
blk.29.ffn_norm.weightF32[3584]
-
blk.29.attn_k.weight(!unknown_type 23!)[3584 2048]
-
blk.29.attn_output.weight(!unknown_type 23!)[4096 3584]
-
blk.29.attn_q.weight(!unknown_type 23!)[3584 4096]
-
blk.29.attn_v.weight(!unknown_type 23!)[3584 2048]
-
blk.30.attn_norm.weightF32[3584]
-
blk.30.ffn_down.weight(!unknown_type 23!)[14336 3584]
-
blk.30.ffn_gate.weight(!unknown_type 23!)[3584 14336]
-
blk.30.ffn_up.weight(!unknown_type 23!)[3584 14336]
-
blk.30.post_attention_norm.weightF32[3584]
-
blk.30.post_ffw_norm.weightF32[3584]
-
blk.30.ffn_norm.weightF32[3584]
-
blk.30.attn_k.weight(!unknown_type 23!)[3584 2048]
-
blk.30.attn_output.weight(!unknown_type 23!)[4096 3584]
-
blk.30.attn_q.weight(!unknown_type 23!)[3584 4096]
-
blk.30.attn_v.weight(!unknown_type 23!)[3584 2048]
-
blk.31.attn_norm.weightF32[3584]
-
blk.31.ffn_down.weight(!unknown_type 23!)[14336 3584]
-
blk.31.ffn_gate.weight(!unknown_type 23!)[3584 14336]
-
blk.31.ffn_up.weight(!unknown_type 23!)[3584 14336]
-
blk.31.post_attention_norm.weightF32[3584]
-
blk.31.post_ffw_norm.weightF32[3584]
-
blk.31.ffn_norm.weightF32[3584]
-
blk.31.attn_k.weight(!unknown_type 23!)[3584 2048]
-
blk.31.attn_output.weight(!unknown_type 23!)[4096 3584]
-
blk.31.attn_q.weight(!unknown_type 23!)[3584 4096]
-
blk.31.attn_v.weight(!unknown_type 23!)[3584 2048]
-
blk.32.attn_norm.weightF32[3584]
-
blk.32.ffn_down.weight(!unknown_type 23!)[14336 3584]
-
blk.32.ffn_gate.weight(!unknown_type 23!)[3584 14336]
-
blk.32.ffn_up.weight(!unknown_type 23!)[3584 14336]
-
blk.32.post_attention_norm.weightF32[3584]
-
blk.32.post_ffw_norm.weightF32[3584]
-
blk.32.ffn_norm.weightF32[3584]
-
blk.32.attn_k.weight(!unknown_type 23!)[3584 2048]
-
blk.32.attn_output.weight(!unknown_type 23!)[4096 3584]
-
blk.32.attn_q.weight(!unknown_type 23!)[3584 4096]
-
blk.32.attn_v.weight(!unknown_type 23!)[3584 2048]
-
blk.33.attn_norm.weightF32[3584]
-
blk.33.ffn_down.weight(!unknown_type 23!)[14336 3584]
-
blk.33.ffn_gate.weight(!unknown_type 23!)[3584 14336]
-
blk.33.ffn_up.weight(!unknown_type 23!)[3584 14336]
-
blk.33.post_attention_norm.weightF32[3584]
-
blk.33.post_ffw_norm.weightF32[3584]
-
blk.33.ffn_norm.weightF32[3584]
-
blk.33.attn_k.weight(!unknown_type 23!)[3584 2048]
-
blk.33.attn_output.weight(!unknown_type 23!)[4096 3584]
-
blk.33.attn_q.weight(!unknown_type 23!)[3584 4096]
-
blk.33.attn_v.weight(!unknown_type 23!)[3584 2048]
-
blk.34.attn_norm.weightF32[3584]
-
blk.34.ffn_down.weight(!unknown_type 23!)[14336 3584]
-
blk.34.ffn_gate.weight(!unknown_type 23!)[3584 14336]
-
blk.34.ffn_up.weight(!unknown_type 23!)[3584 14336]
-
blk.34.post_attention_norm.weightF32[3584]
-
blk.34.post_ffw_norm.weightF32[3584]
-
blk.34.ffn_norm.weightF32[3584]
-
blk.34.attn_k.weight(!unknown_type 23!)[3584 2048]
-
blk.34.attn_output.weight(!unknown_type 23!)[4096 3584]
-
blk.34.attn_q.weight(!unknown_type 23!)[3584 4096]
-
blk.34.attn_v.weight(!unknown_type 23!)[3584 2048]
-
blk.35.attn_norm.weightF32[3584]
-
blk.35.ffn_down.weight(!unknown_type 23!)[14336 3584]
-
blk.35.ffn_gate.weight(!unknown_type 23!)[3584 14336]
-
blk.35.ffn_up.weight(!unknown_type 23!)[3584 14336]
-
blk.35.post_attention_norm.weightF32[3584]
-
blk.35.post_ffw_norm.weightF32[3584]
-
blk.35.ffn_norm.weightF32[3584]
-
blk.35.attn_k.weight(!unknown_type 23!)[3584 2048]
-
blk.35.attn_output.weight(!unknown_type 23!)[4096 3584]
-
blk.35.attn_q.weight(!unknown_type 23!)[3584 4096]
-
blk.35.attn_v.weight(!unknown_type 23!)[3584 2048]
-
blk.36.attn_norm.weightF32[3584]
-
blk.36.ffn_down.weight(!unknown_type 23!)[14336 3584]
-
blk.36.ffn_gate.weight(!unknown_type 23!)[3584 14336]
-
blk.36.ffn_up.weight(!unknown_type 23!)[3584 14336]
-
blk.36.post_attention_norm.weightF32[3584]
-
blk.36.post_ffw_norm.weightF32[3584]
-
blk.36.ffn_norm.weightF32[3584]
-
blk.36.attn_k.weight(!unknown_type 23!)[3584 2048]
-
blk.36.attn_output.weight(!unknown_type 23!)[4096 3584]
-
blk.36.attn_q.weight(!unknown_type 23!)[3584 4096]
-
blk.36.attn_v.weight(!unknown_type 23!)[3584 2048]
-
blk.37.attn_norm.weightF32[3584]
-
blk.37.ffn_down.weight(!unknown_type 23!)[14336 3584]
-
blk.37.ffn_gate.weight(!unknown_type 23!)[3584 14336]
-
blk.37.ffn_up.weight(!unknown_type 23!)[3584 14336]
-
blk.37.post_attention_norm.weightF32[3584]
-
blk.37.post_ffw_norm.weightF32[3584]
-
blk.37.ffn_norm.weightF32[3584]
-
blk.37.attn_k.weight(!unknown_type 23!)[3584 2048]
-
blk.37.attn_output.weight(!unknown_type 23!)[4096 3584]
-
blk.37.attn_q.weight(!unknown_type 23!)[3584 4096]
-
blk.37.attn_v.weight(!unknown_type 23!)[3584 2048]
-
blk.38.attn_norm.weightF32[3584]
-
blk.38.ffn_down.weight(!unknown_type 23!)[14336 3584]
-
blk.38.ffn_gate.weight(!unknown_type 23!)[3584 14336]
-
blk.38.ffn_up.weight(!unknown_type 23!)[3584 14336]
-
blk.38.post_attention_norm.weightF32[3584]
-
blk.38.post_ffw_norm.weightF32[3584]
-
blk.38.ffn_norm.weightF32[3584]
-
blk.38.attn_k.weight(!unknown_type 23!)[3584 2048]
-
blk.38.attn_output.weight(!unknown_type 23!)[4096 3584]
-
blk.38.attn_q.weight(!unknown_type 23!)[3584 4096]
-
blk.38.attn_v.weight(!unknown_type 23!)[3584 2048]
-
blk.39.attn_norm.weightF32[3584]
-
blk.39.ffn_down.weight(!unknown_type 23!)[14336 3584]
-
blk.39.ffn_gate.weight(!unknown_type 23!)[3584 14336]
-
blk.39.ffn_up.weight(!unknown_type 23!)[3584 14336]
-
blk.39.post_attention_norm.weightF32[3584]
-
blk.39.post_ffw_norm.weightF32[3584]
-
blk.39.ffn_norm.weightF32[3584]
-
blk.39.attn_k.weight(!unknown_type 23!)[3584 2048]
-
blk.39.attn_output.weight(!unknown_type 23!)[4096 3584]
-
blk.39.attn_q.weight(!unknown_type 23!)[3584 4096]
-
blk.39.attn_v.weight(!unknown_type 23!)[3584 2048]
-
blk.40.attn_norm.weightF32[3584]
-
blk.40.ffn_down.weight(!unknown_type 23!)[14336 3584]
-
blk.40.ffn_gate.weight(!unknown_type 23!)[3584 14336]
-
blk.40.ffn_up.weight(!unknown_type 23!)[3584 14336]
-
blk.40.post_attention_norm.weightF32[3584]
-
blk.40.post_ffw_norm.weightF32[3584]
-
blk.40.ffn_norm.weightF32[3584]
-
blk.40.attn_k.weight(!unknown_type 23!)[3584 2048]
-
blk.40.attn_output.weight(!unknown_type 23!)[4096 3584]
-
blk.40.attn_q.weight(!unknown_type 23!)[3584 4096]
-
blk.40.attn_v.weight(!unknown_type 23!)[3584 2048]
-
blk.41.attn_norm.weightF32[3584]
-
blk.41.ffn_down.weight(!unknown_type 23!)[14336 3584]
-
blk.41.ffn_gate.weight(!unknown_type 23!)[3584 14336]
-
blk.41.ffn_up.weight(!unknown_type 23!)[3584 14336]
-
blk.41.post_attention_norm.weightF32[3584]
-
blk.41.post_ffw_norm.weightF32[3584]
-
blk.41.ffn_norm.weightF32[3584]
-
blk.41.attn_k.weight(!unknown_type 23!)[3584 2048]
-
blk.41.attn_output.weight(!unknown_type 23!)[4096 3584]
-
blk.41.attn_q.weight(!unknown_type 23!)[3584 4096]
-
blk.41.attn_v.weight(!unknown_type 23!)[3584 2048]
-
output_norm.weightF32[3584]
Metadata
Tensors
blk.0
blk.1
blk.2
blk.3
blk.4
blk.5
blk.6
blk.7
blk.8
blk.9
blk.10
blk.11
blk.12
blk.13
blk.14
blk.15
blk.16
blk.17
blk.18
blk.19
blk.20
blk.21
blk.22
blk.23
blk.24
blk.25
blk.26
blk.27
blk.28
blk.29
blk.30
blk.31
blk.32
blk.33
blk.34
blk.35
blk.36
blk.37
blk.38
blk.39
blk.40
blk.41