Fine-tuned google/gemma-2-9b-it on princeton-nlp/gemma2-ultrafeedback-armorm with the SimPO objective.
1,021 Pulls Updated 3 months ago
dbeb81e40b84 · 5.4GB
-
gemma2.attention.head_count16
-
gemma2.attention.head_count_kv8
-
gemma2.attention.key_length256
-
gemma2.attention.layer_norm_rms_epsilon1e-06
-
gemma2.attention.sliding_window4096
-
gemma2.attention.value_length256
-
gemma2.attn_logit_softcapping50
-
gemma2.block_count42
-
gemma2.context_length8192
-
gemma2.embedding_length3584
-
gemma2.feed_forward_length14336
-
gemma2.final_logit_softcapping30
-
general.architecturegemma2
-
general.base_model.0.nameGemma 2 9b It
-
general.base_model.0.organizationGoogle
-
general.base_model.0.repo_urlhttps://huggingface.co/google/gemma-2-9b-it
-
general.base_model.count1
-
general.basenamegemma-2
-
general.datasets[princeton-nlp/gemma2-ultrafeedback-armorm]
-
general.file_type25
-
general.finetuneit-SimPO
-
general.licensemit
-
general.nameGemma 2 9b It SimPO
-
general.organizationPrinceton Nlp
-
general.quantization_version2
-
general.size_label9B
-
general.tags[alignment-handbook, generated_from_trainer]
-
general.typemodel
-
quantize.imatrix.chunks_count64
-
quantize.imatrix.dataset/shared/opt/work_models/_imatrix/calibration_datav3.txt
-
quantize.imatrix.entries_count294
-
quantize.imatrix.fileimatrix.dat
-
tokenizer.ggml.add_bos_tokentrue
-
tokenizer.ggml.add_eos_tokenfalse
-
tokenizer.ggml.add_space_prefixfalse
-
tokenizer.ggml.bos_token_id2
-
tokenizer.ggml.eos_token_id1
-
tokenizer.ggml.modelllama
-
tokenizer.ggml.padding_token_id0
-
tokenizer.ggml.predefault
-
tokenizer.ggml.scores[-1000, -1000, -1000, -1000, -1000, ...]
-
tokenizer.ggml.token_type[3, 3, 3, 3, 3, ...]
-
tokenizer.ggml.tokens[<pad>, <eos>, <bos>, <unk>, <mask>, ...]
-
tokenizer.ggml.unknown_token_id3
-
NameTypeShape
-
token_embd.weightQ6_K[3584, 256000]
-
blk.0.attn_k.weight(!unknown_type 20!)[3584, 2048]
-
blk.0.attn_norm.weightF32[3584]
-
blk.0.attn_output.weight(!unknown_type 20!)[4096, 3584]
-
blk.0.attn_q.weight(!unknown_type 20!)[3584, 4096]
-
blk.0.attn_v.weight(!unknown_type 20!)[3584, 2048]
-
blk.0.ffn_down.weight(!unknown_type 20!)[14336, 3584]
-
blk.0.ffn_gate.weight(!unknown_type 20!)[3584, 14336]
-
blk.0.ffn_norm.weightF32[3584]
-
blk.0.ffn_up.weight(!unknown_type 20!)[3584, 14336]
-
blk.0.post_attention_norm.weightF32[3584]
-
blk.0.post_ffw_norm.weightF32[3584]
-
blk.1.attn_k.weight(!unknown_type 20!)[3584, 2048]
-
blk.1.attn_norm.weightF32[3584]
-
blk.1.attn_output.weight(!unknown_type 20!)[4096, 3584]
-
blk.1.attn_q.weight(!unknown_type 20!)[3584, 4096]
-
blk.1.attn_v.weight(!unknown_type 20!)[3584, 2048]
-
blk.1.ffn_down.weight(!unknown_type 20!)[14336, 3584]
-
blk.1.ffn_gate.weight(!unknown_type 20!)[3584, 14336]
-
blk.1.ffn_norm.weightF32[3584]
-
blk.1.ffn_up.weight(!unknown_type 20!)[3584, 14336]
-
blk.1.post_attention_norm.weightF32[3584]
-
blk.1.post_ffw_norm.weightF32[3584]
-
blk.2.attn_k.weight(!unknown_type 20!)[3584, 2048]
-
blk.2.attn_norm.weightF32[3584]
-
blk.2.attn_output.weight(!unknown_type 20!)[4096, 3584]
-
blk.2.attn_q.weight(!unknown_type 20!)[3584, 4096]
-
blk.2.attn_v.weight(!unknown_type 20!)[3584, 2048]
-
blk.2.ffn_down.weight(!unknown_type 20!)[14336, 3584]
-
blk.2.ffn_gate.weight(!unknown_type 20!)[3584, 14336]
-
blk.2.ffn_norm.weightF32[3584]
-
blk.2.ffn_up.weight(!unknown_type 20!)[3584, 14336]
-
blk.2.post_attention_norm.weightF32[3584]
-
blk.2.post_ffw_norm.weightF32[3584]
-
blk.3.attn_k.weight(!unknown_type 20!)[3584, 2048]
-
blk.3.attn_norm.weightF32[3584]
-
blk.3.attn_output.weight(!unknown_type 20!)[4096, 3584]
-
blk.3.attn_q.weight(!unknown_type 20!)[3584, 4096]
-
blk.3.attn_v.weight(!unknown_type 20!)[3584, 2048]
-
blk.3.ffn_down.weight(!unknown_type 20!)[14336, 3584]
-
blk.3.ffn_gate.weight(!unknown_type 20!)[3584, 14336]
-
blk.3.ffn_norm.weightF32[3584]
-
blk.3.ffn_up.weight(!unknown_type 20!)[3584, 14336]
-
blk.3.post_attention_norm.weightF32[3584]
-
blk.3.post_ffw_norm.weightF32[3584]
-
blk.4.attn_k.weight(!unknown_type 20!)[3584, 2048]
-
blk.4.attn_norm.weightF32[3584]
-
blk.4.attn_output.weight(!unknown_type 20!)[4096, 3584]
-
blk.4.attn_q.weight(!unknown_type 20!)[3584, 4096]
-
blk.4.attn_v.weight(!unknown_type 20!)[3584, 2048]
-
blk.4.ffn_down.weight(!unknown_type 20!)[14336, 3584]
-
blk.4.ffn_gate.weight(!unknown_type 20!)[3584, 14336]
-
blk.4.ffn_norm.weightF32[3584]
-
blk.4.ffn_up.weight(!unknown_type 20!)[3584, 14336]
-
blk.4.post_attention_norm.weightF32[3584]
-
blk.4.post_ffw_norm.weightF32[3584]
-
blk.5.attn_k.weight(!unknown_type 20!)[3584, 2048]
-
blk.5.attn_norm.weightF32[3584]
-
blk.5.attn_output.weight(!unknown_type 20!)[4096, 3584]
-
blk.5.attn_q.weight(!unknown_type 20!)[3584, 4096]
-
blk.5.attn_v.weight(!unknown_type 20!)[3584, 2048]
-
blk.5.ffn_down.weight(!unknown_type 20!)[14336, 3584]
-
blk.5.ffn_gate.weight(!unknown_type 20!)[3584, 14336]
-
blk.5.ffn_norm.weightF32[3584]
-
blk.5.ffn_up.weight(!unknown_type 20!)[3584, 14336]
-
blk.5.post_attention_norm.weightF32[3584]
-
blk.5.post_ffw_norm.weightF32[3584]
-
blk.6.attn_k.weight(!unknown_type 20!)[3584, 2048]
-
blk.6.attn_norm.weightF32[3584]
-
blk.6.attn_output.weight(!unknown_type 20!)[4096, 3584]
-
blk.6.attn_q.weight(!unknown_type 20!)[3584, 4096]
-
blk.6.attn_v.weight(!unknown_type 20!)[3584, 2048]
-
blk.6.ffn_down.weight(!unknown_type 20!)[14336, 3584]
-
blk.6.ffn_gate.weight(!unknown_type 20!)[3584, 14336]
-
blk.6.ffn_norm.weightF32[3584]
-
blk.6.ffn_up.weight(!unknown_type 20!)[3584, 14336]
-
blk.6.post_attention_norm.weightF32[3584]
-
blk.6.post_ffw_norm.weightF32[3584]
-
blk.7.attn_k.weight(!unknown_type 20!)[3584, 2048]
-
blk.7.attn_norm.weightF32[3584]
-
blk.7.attn_output.weight(!unknown_type 20!)[4096, 3584]
-
blk.7.attn_q.weight(!unknown_type 20!)[3584, 4096]
-
blk.7.attn_v.weight(!unknown_type 20!)[3584, 2048]
-
blk.7.ffn_down.weight(!unknown_type 20!)[14336, 3584]
-
blk.7.ffn_gate.weight(!unknown_type 20!)[3584, 14336]
-
blk.7.ffn_norm.weightF32[3584]
-
blk.7.ffn_up.weight(!unknown_type 20!)[3584, 14336]
-
blk.7.post_attention_norm.weightF32[3584]
-
blk.7.post_ffw_norm.weightF32[3584]
-
blk.8.attn_k.weight(!unknown_type 20!)[3584, 2048]
-
blk.8.attn_norm.weightF32[3584]
-
blk.8.attn_output.weight(!unknown_type 20!)[4096, 3584]
-
blk.8.attn_q.weight(!unknown_type 20!)[3584, 4096]
-
blk.8.attn_v.weight(!unknown_type 20!)[3584, 2048]
-
blk.8.ffn_down.weight(!unknown_type 20!)[14336, 3584]
-
blk.8.ffn_gate.weight(!unknown_type 20!)[3584, 14336]
-
blk.8.ffn_norm.weightF32[3584]
-
blk.8.ffn_up.weight(!unknown_type 20!)[3584, 14336]
-
blk.8.post_attention_norm.weightF32[3584]
-
blk.8.post_ffw_norm.weightF32[3584]
-
blk.9.attn_k.weight(!unknown_type 20!)[3584, 2048]
-
blk.9.attn_norm.weightF32[3584]
-
blk.9.attn_output.weight(!unknown_type 20!)[4096, 3584]
-
blk.9.attn_q.weight(!unknown_type 20!)[3584, 4096]
-
blk.9.attn_v.weight(!unknown_type 20!)[3584, 2048]
-
blk.9.ffn_down.weight(!unknown_type 20!)[14336, 3584]
-
blk.9.ffn_gate.weight(!unknown_type 20!)[3584, 14336]
-
blk.9.ffn_norm.weightF32[3584]
-
blk.9.ffn_up.weight(!unknown_type 20!)[3584, 14336]
-
blk.9.post_attention_norm.weightF32[3584]
-
blk.9.post_ffw_norm.weightF32[3584]
-
blk.10.attn_k.weight(!unknown_type 20!)[3584, 2048]
-
blk.10.attn_norm.weightF32[3584]
-
blk.10.attn_output.weight(!unknown_type 20!)[4096, 3584]
-
blk.10.attn_q.weight(!unknown_type 20!)[3584, 4096]
-
blk.10.attn_v.weight(!unknown_type 20!)[3584, 2048]
-
blk.10.ffn_down.weight(!unknown_type 20!)[14336, 3584]
-
blk.10.ffn_gate.weight(!unknown_type 20!)[3584, 14336]
-
blk.10.ffn_norm.weightF32[3584]
-
blk.10.ffn_up.weight(!unknown_type 20!)[3584, 14336]
-
blk.10.post_attention_norm.weightF32[3584]
-
blk.10.post_ffw_norm.weightF32[3584]
-
blk.11.attn_k.weight(!unknown_type 20!)[3584, 2048]
-
blk.11.attn_norm.weightF32[3584]
-
blk.11.attn_output.weight(!unknown_type 20!)[4096, 3584]
-
blk.11.attn_q.weight(!unknown_type 20!)[3584, 4096]
-
blk.11.attn_v.weight(!unknown_type 20!)[3584, 2048]
-
blk.11.ffn_down.weight(!unknown_type 20!)[14336, 3584]
-
blk.11.ffn_gate.weight(!unknown_type 20!)[3584, 14336]
-
blk.11.ffn_norm.weightF32[3584]
-
blk.11.ffn_up.weight(!unknown_type 20!)[3584, 14336]
-
blk.11.post_attention_norm.weightF32[3584]
-
blk.11.post_ffw_norm.weightF32[3584]
-
blk.12.attn_k.weight(!unknown_type 20!)[3584, 2048]
-
blk.12.attn_norm.weightF32[3584]
-
blk.12.attn_output.weight(!unknown_type 20!)[4096, 3584]
-
blk.12.attn_q.weight(!unknown_type 20!)[3584, 4096]
-
blk.12.attn_v.weight(!unknown_type 20!)[3584, 2048]
-
blk.12.ffn_down.weight(!unknown_type 20!)[14336, 3584]
-
blk.12.ffn_gate.weight(!unknown_type 20!)[3584, 14336]
-
blk.12.ffn_norm.weightF32[3584]
-
blk.12.ffn_up.weight(!unknown_type 20!)[3584, 14336]
-
blk.12.post_attention_norm.weightF32[3584]
-
blk.12.post_ffw_norm.weightF32[3584]
-
blk.13.attn_k.weight(!unknown_type 20!)[3584, 2048]
-
blk.13.attn_norm.weightF32[3584]
-
blk.13.attn_output.weight(!unknown_type 20!)[4096, 3584]
-
blk.13.attn_q.weight(!unknown_type 20!)[3584, 4096]
-
blk.13.attn_v.weight(!unknown_type 20!)[3584, 2048]
-
blk.13.ffn_down.weight(!unknown_type 20!)[14336, 3584]
-
blk.13.ffn_gate.weight(!unknown_type 20!)[3584, 14336]
-
blk.13.ffn_norm.weightF32[3584]
-
blk.13.ffn_up.weight(!unknown_type 20!)[3584, 14336]
-
blk.13.post_attention_norm.weightF32[3584]
-
blk.13.post_ffw_norm.weightF32[3584]
-
blk.14.attn_k.weight(!unknown_type 20!)[3584, 2048]
-
blk.14.attn_norm.weightF32[3584]
-
blk.14.attn_output.weight(!unknown_type 20!)[4096, 3584]
-
blk.14.attn_q.weight(!unknown_type 20!)[3584, 4096]
-
blk.14.attn_v.weight(!unknown_type 20!)[3584, 2048]
-
blk.14.ffn_down.weight(!unknown_type 20!)[14336, 3584]
-
blk.14.ffn_gate.weight(!unknown_type 20!)[3584, 14336]
-
blk.14.ffn_norm.weightF32[3584]
-
blk.14.ffn_up.weight(!unknown_type 20!)[3584, 14336]
-
blk.14.post_attention_norm.weightF32[3584]
-
blk.14.post_ffw_norm.weightF32[3584]
-
blk.15.attn_k.weight(!unknown_type 20!)[3584, 2048]
-
blk.15.attn_norm.weightF32[3584]
-
blk.15.attn_output.weight(!unknown_type 20!)[4096, 3584]
-
blk.15.attn_q.weight(!unknown_type 20!)[3584, 4096]
-
blk.15.attn_v.weight(!unknown_type 20!)[3584, 2048]
-
blk.15.ffn_down.weight(!unknown_type 20!)[14336, 3584]
-
blk.15.ffn_gate.weight(!unknown_type 20!)[3584, 14336]
-
blk.15.ffn_norm.weightF32[3584]
-
blk.15.ffn_up.weight(!unknown_type 20!)[3584, 14336]
-
blk.15.post_attention_norm.weightF32[3584]
-
blk.15.post_ffw_norm.weightF32[3584]
-
blk.16.attn_k.weight(!unknown_type 20!)[3584, 2048]
-
blk.16.attn_norm.weightF32[3584]
-
blk.16.attn_output.weight(!unknown_type 20!)[4096, 3584]
-
blk.16.attn_q.weight(!unknown_type 20!)[3584, 4096]
-
blk.16.attn_v.weight(!unknown_type 20!)[3584, 2048]
-
blk.16.ffn_down.weight(!unknown_type 20!)[14336, 3584]
-
blk.16.ffn_gate.weight(!unknown_type 20!)[3584, 14336]
-
blk.16.ffn_norm.weightF32[3584]
-
blk.16.ffn_up.weight(!unknown_type 20!)[3584, 14336]
-
blk.16.post_attention_norm.weightF32[3584]
-
blk.16.post_ffw_norm.weightF32[3584]
-
blk.17.attn_k.weight(!unknown_type 20!)[3584, 2048]
-
blk.17.attn_norm.weightF32[3584]
-
blk.17.attn_output.weight(!unknown_type 20!)[4096, 3584]
-
blk.17.attn_q.weight(!unknown_type 20!)[3584, 4096]
-
blk.17.attn_v.weight(!unknown_type 20!)[3584, 2048]
-
blk.17.ffn_down.weight(!unknown_type 20!)[14336, 3584]
-
blk.17.ffn_gate.weight(!unknown_type 20!)[3584, 14336]
-
blk.17.ffn_norm.weightF32[3584]
-
blk.17.ffn_up.weight(!unknown_type 20!)[3584, 14336]
-
blk.17.post_attention_norm.weightF32[3584]
-
blk.17.post_ffw_norm.weightF32[3584]
-
blk.18.attn_k.weight(!unknown_type 20!)[3584, 2048]
-
blk.18.attn_norm.weightF32[3584]
-
blk.18.attn_output.weight(!unknown_type 20!)[4096, 3584]
-
blk.18.attn_q.weight(!unknown_type 20!)[3584, 4096]
-
blk.18.attn_v.weight(!unknown_type 20!)[3584, 2048]
-
blk.18.ffn_down.weight(!unknown_type 20!)[14336, 3584]
-
blk.18.ffn_gate.weight(!unknown_type 20!)[3584, 14336]
-
blk.18.ffn_norm.weightF32[3584]
-
blk.18.ffn_up.weight(!unknown_type 20!)[3584, 14336]
-
blk.18.post_attention_norm.weightF32[3584]
-
blk.18.post_ffw_norm.weightF32[3584]
-
blk.19.attn_k.weight(!unknown_type 20!)[3584, 2048]
-
blk.19.attn_norm.weightF32[3584]
-
blk.19.attn_output.weight(!unknown_type 20!)[4096, 3584]
-
blk.19.attn_q.weight(!unknown_type 20!)[3584, 4096]
-
blk.19.attn_v.weight(!unknown_type 20!)[3584, 2048]
-
blk.19.ffn_down.weight(!unknown_type 20!)[14336, 3584]
-
blk.19.ffn_gate.weight(!unknown_type 20!)[3584, 14336]
-
blk.19.ffn_norm.weightF32[3584]
-
blk.19.ffn_up.weight(!unknown_type 20!)[3584, 14336]
-
blk.19.post_attention_norm.weightF32[3584]
-
blk.19.post_ffw_norm.weightF32[3584]
-
blk.20.attn_k.weight(!unknown_type 20!)[3584, 2048]
-
blk.20.attn_norm.weightF32[3584]
-
blk.20.attn_output.weight(!unknown_type 20!)[4096, 3584]
-
blk.20.attn_q.weight(!unknown_type 20!)[3584, 4096]
-
blk.20.attn_v.weight(!unknown_type 20!)[3584, 2048]
-
blk.20.ffn_down.weight(!unknown_type 20!)[14336, 3584]
-
blk.20.ffn_gate.weight(!unknown_type 20!)[3584, 14336]
-
blk.20.ffn_norm.weightF32[3584]
-
blk.20.ffn_up.weight(!unknown_type 20!)[3584, 14336]
-
blk.20.post_attention_norm.weightF32[3584]
-
blk.20.post_ffw_norm.weightF32[3584]
-
blk.21.attn_k.weight(!unknown_type 20!)[3584, 2048]
-
blk.21.attn_norm.weightF32[3584]
-
blk.21.attn_output.weight(!unknown_type 20!)[4096, 3584]
-
blk.21.attn_q.weight(!unknown_type 20!)[3584, 4096]
-
blk.21.attn_v.weight(!unknown_type 20!)[3584, 2048]
-
blk.21.ffn_down.weight(!unknown_type 20!)[14336, 3584]
-
blk.21.ffn_gate.weight(!unknown_type 20!)[3584, 14336]
-
blk.21.ffn_norm.weightF32[3584]
-
blk.21.ffn_up.weight(!unknown_type 20!)[3584, 14336]
-
blk.21.post_attention_norm.weightF32[3584]
-
blk.21.post_ffw_norm.weightF32[3584]
-
blk.22.attn_k.weight(!unknown_type 20!)[3584, 2048]
-
blk.22.attn_norm.weightF32[3584]
-
blk.22.attn_output.weight(!unknown_type 20!)[4096, 3584]
-
blk.22.attn_q.weight(!unknown_type 20!)[3584, 4096]
-
blk.22.attn_v.weight(!unknown_type 20!)[3584, 2048]
-
blk.22.ffn_down.weight(!unknown_type 20!)[14336, 3584]
-
blk.22.ffn_gate.weight(!unknown_type 20!)[3584, 14336]
-
blk.22.ffn_norm.weightF32[3584]
-
blk.22.ffn_up.weight(!unknown_type 20!)[3584, 14336]
-
blk.22.post_attention_norm.weightF32[3584]
-
blk.22.post_ffw_norm.weightF32[3584]
-
blk.23.attn_k.weight(!unknown_type 20!)[3584, 2048]
-
blk.23.attn_norm.weightF32[3584]
-
blk.23.attn_output.weight(!unknown_type 20!)[4096, 3584]
-
blk.23.attn_q.weight(!unknown_type 20!)[3584, 4096]
-
blk.23.attn_v.weight(!unknown_type 20!)[3584, 2048]
-
blk.23.ffn_down.weight(!unknown_type 20!)[14336, 3584]
-
blk.23.ffn_gate.weight(!unknown_type 20!)[3584, 14336]
-
blk.23.ffn_norm.weightF32[3584]
-
blk.23.ffn_up.weight(!unknown_type 20!)[3584, 14336]
-
blk.23.post_attention_norm.weightF32[3584]
-
blk.23.post_ffw_norm.weightF32[3584]
-
blk.24.attn_k.weight(!unknown_type 20!)[3584, 2048]
-
blk.24.attn_norm.weightF32[3584]
-
blk.24.attn_output.weight(!unknown_type 20!)[4096, 3584]
-
blk.24.attn_q.weight(!unknown_type 20!)[3584, 4096]
-
blk.24.attn_v.weight(!unknown_type 20!)[3584, 2048]
-
blk.24.ffn_down.weight(!unknown_type 20!)[14336, 3584]
-
blk.24.ffn_gate.weight(!unknown_type 20!)[3584, 14336]
-
blk.24.ffn_norm.weightF32[3584]
-
blk.24.ffn_up.weight(!unknown_type 20!)[3584, 14336]
-
blk.24.post_attention_norm.weightF32[3584]
-
blk.24.post_ffw_norm.weightF32[3584]
-
blk.25.attn_k.weight(!unknown_type 20!)[3584, 2048]
-
blk.25.attn_norm.weightF32[3584]
-
blk.25.attn_output.weight(!unknown_type 20!)[4096, 3584]
-
blk.25.attn_q.weight(!unknown_type 20!)[3584, 4096]
-
blk.25.attn_v.weight(!unknown_type 20!)[3584, 2048]
-
blk.25.ffn_down.weight(!unknown_type 20!)[14336, 3584]
-
blk.25.ffn_gate.weight(!unknown_type 20!)[3584, 14336]
-
blk.25.ffn_norm.weightF32[3584]
-
blk.25.ffn_up.weight(!unknown_type 20!)[3584, 14336]
-
blk.25.post_attention_norm.weightF32[3584]
-
blk.25.post_ffw_norm.weightF32[3584]
-
blk.26.attn_k.weight(!unknown_type 20!)[3584, 2048]
-
blk.26.attn_norm.weightF32[3584]
-
blk.26.attn_output.weight(!unknown_type 20!)[4096, 3584]
-
blk.26.attn_q.weight(!unknown_type 20!)[3584, 4096]
-
blk.26.attn_v.weight(!unknown_type 20!)[3584, 2048]
-
blk.26.ffn_down.weight(!unknown_type 20!)[14336, 3584]
-
blk.26.ffn_gate.weight(!unknown_type 20!)[3584, 14336]
-
blk.26.ffn_norm.weightF32[3584]
-
blk.26.ffn_up.weight(!unknown_type 20!)[3584, 14336]
-
blk.26.post_attention_norm.weightF32[3584]
-
blk.26.post_ffw_norm.weightF32[3584]
-
blk.27.attn_k.weight(!unknown_type 20!)[3584, 2048]
-
blk.27.attn_norm.weightF32[3584]
-
blk.27.attn_output.weight(!unknown_type 20!)[4096, 3584]
-
blk.27.attn_q.weight(!unknown_type 20!)[3584, 4096]
-
blk.27.attn_v.weight(!unknown_type 20!)[3584, 2048]
-
blk.27.ffn_down.weight(!unknown_type 20!)[14336, 3584]
-
blk.27.ffn_gate.weight(!unknown_type 20!)[3584, 14336]
-
blk.27.ffn_norm.weightF32[3584]
-
blk.27.ffn_up.weight(!unknown_type 20!)[3584, 14336]
-
blk.27.post_attention_norm.weightF32[3584]
-
blk.27.post_ffw_norm.weightF32[3584]
-
blk.28.attn_k.weight(!unknown_type 20!)[3584, 2048]
-
blk.28.attn_norm.weightF32[3584]
-
blk.28.attn_output.weight(!unknown_type 20!)[4096, 3584]
-
blk.28.attn_q.weight(!unknown_type 20!)[3584, 4096]
-
blk.28.attn_v.weight(!unknown_type 20!)[3584, 2048]
-
blk.28.ffn_down.weight(!unknown_type 20!)[14336, 3584]
-
blk.28.ffn_gate.weight(!unknown_type 20!)[3584, 14336]
-
blk.28.ffn_norm.weightF32[3584]
-
blk.28.ffn_up.weight(!unknown_type 20!)[3584, 14336]
-
blk.28.post_attention_norm.weightF32[3584]
-
blk.28.post_ffw_norm.weightF32[3584]
-
blk.29.attn_k.weight(!unknown_type 20!)[3584, 2048]
-
blk.29.attn_norm.weightF32[3584]
-
blk.29.attn_output.weight(!unknown_type 20!)[4096, 3584]
-
blk.29.attn_q.weight(!unknown_type 20!)[3584, 4096]
-
blk.29.attn_v.weight(!unknown_type 20!)[3584, 2048]
-
blk.29.ffn_down.weight(!unknown_type 20!)[14336, 3584]
-
blk.29.ffn_gate.weight(!unknown_type 20!)[3584, 14336]
-
blk.29.ffn_norm.weightF32[3584]
-
blk.29.ffn_up.weight(!unknown_type 20!)[3584, 14336]
-
blk.29.post_attention_norm.weightF32[3584]
-
blk.29.post_ffw_norm.weightF32[3584]
-
blk.30.attn_k.weight(!unknown_type 20!)[3584, 2048]
-
blk.30.attn_norm.weightF32[3584]
-
blk.30.attn_output.weight(!unknown_type 20!)[4096, 3584]
-
blk.30.attn_q.weight(!unknown_type 20!)[3584, 4096]
-
blk.30.attn_v.weight(!unknown_type 20!)[3584, 2048]
-
blk.30.ffn_down.weight(!unknown_type 20!)[14336, 3584]
-
blk.30.ffn_gate.weight(!unknown_type 20!)[3584, 14336]
-
blk.30.ffn_norm.weightF32[3584]
-
blk.30.ffn_up.weight(!unknown_type 20!)[3584, 14336]
-
blk.30.post_attention_norm.weightF32[3584]
-
blk.30.post_ffw_norm.weightF32[3584]
-
blk.31.attn_k.weight(!unknown_type 20!)[3584, 2048]
-
blk.31.attn_norm.weightF32[3584]
-
blk.31.attn_output.weight(!unknown_type 20!)[4096, 3584]
-
blk.31.attn_q.weight(!unknown_type 20!)[3584, 4096]
-
blk.31.attn_v.weight(!unknown_type 20!)[3584, 2048]
-
blk.31.ffn_down.weight(!unknown_type 20!)[14336, 3584]
-
blk.31.ffn_gate.weight(!unknown_type 20!)[3584, 14336]
-
blk.31.ffn_norm.weightF32[3584]
-
blk.31.ffn_up.weight(!unknown_type 20!)[3584, 14336]
-
blk.31.post_attention_norm.weightF32[3584]
-
blk.31.post_ffw_norm.weightF32[3584]
-
blk.32.attn_k.weight(!unknown_type 20!)[3584, 2048]
-
blk.32.attn_norm.weightF32[3584]
-
blk.32.attn_output.weight(!unknown_type 20!)[4096, 3584]
-
blk.32.attn_q.weight(!unknown_type 20!)[3584, 4096]
-
blk.32.attn_v.weight(!unknown_type 20!)[3584, 2048]
-
blk.32.ffn_down.weight(!unknown_type 20!)[14336, 3584]
-
blk.32.ffn_gate.weight(!unknown_type 20!)[3584, 14336]
-
blk.32.ffn_norm.weightF32[3584]
-
blk.32.ffn_up.weight(!unknown_type 20!)[3584, 14336]
-
blk.32.post_attention_norm.weightF32[3584]
-
blk.32.post_ffw_norm.weightF32[3584]
-
blk.33.attn_k.weight(!unknown_type 20!)[3584, 2048]
-
blk.33.attn_norm.weightF32[3584]
-
blk.33.attn_output.weight(!unknown_type 20!)[4096, 3584]
-
blk.33.attn_q.weight(!unknown_type 20!)[3584, 4096]
-
blk.33.attn_v.weight(!unknown_type 20!)[3584, 2048]
-
blk.33.ffn_down.weight(!unknown_type 20!)[14336, 3584]
-
blk.33.ffn_gate.weight(!unknown_type 20!)[3584, 14336]
-
blk.33.ffn_norm.weightF32[3584]
-
blk.33.ffn_up.weight(!unknown_type 20!)[3584, 14336]
-
blk.33.post_attention_norm.weightF32[3584]
-
blk.33.post_ffw_norm.weightF32[3584]
-
blk.34.attn_k.weight(!unknown_type 20!)[3584, 2048]
-
blk.34.attn_norm.weightF32[3584]
-
blk.34.attn_output.weight(!unknown_type 20!)[4096, 3584]
-
blk.34.attn_q.weight(!unknown_type 20!)[3584, 4096]
-
blk.34.attn_v.weight(!unknown_type 20!)[3584, 2048]
-
blk.34.ffn_down.weight(!unknown_type 20!)[14336, 3584]
-
blk.34.ffn_gate.weight(!unknown_type 20!)[3584, 14336]
-
blk.34.ffn_norm.weightF32[3584]
-
blk.34.ffn_up.weight(!unknown_type 20!)[3584, 14336]
-
blk.34.post_attention_norm.weightF32[3584]
-
blk.34.post_ffw_norm.weightF32[3584]
-
blk.35.attn_k.weight(!unknown_type 20!)[3584, 2048]
-
blk.35.attn_norm.weightF32[3584]
-
blk.35.attn_output.weight(!unknown_type 20!)[4096, 3584]
-
blk.35.attn_q.weight(!unknown_type 20!)[3584, 4096]
-
blk.35.attn_v.weight(!unknown_type 20!)[3584, 2048]
-
blk.35.ffn_down.weight(!unknown_type 20!)[14336, 3584]
-
blk.35.ffn_gate.weight(!unknown_type 20!)[3584, 14336]
-
blk.35.ffn_norm.weightF32[3584]
-
blk.35.ffn_up.weight(!unknown_type 20!)[3584, 14336]
-
blk.35.post_attention_norm.weightF32[3584]
-
blk.35.post_ffw_norm.weightF32[3584]
-
blk.36.attn_k.weight(!unknown_type 20!)[3584, 2048]
-
blk.36.attn_norm.weightF32[3584]
-
blk.36.attn_output.weight(!unknown_type 20!)[4096, 3584]
-
blk.36.attn_q.weight(!unknown_type 20!)[3584, 4096]
-
blk.36.attn_v.weight(!unknown_type 20!)[3584, 2048]
-
blk.36.ffn_down.weight(!unknown_type 20!)[14336, 3584]
-
blk.36.ffn_gate.weight(!unknown_type 20!)[3584, 14336]
-
blk.36.ffn_norm.weightF32[3584]
-
blk.36.ffn_up.weight(!unknown_type 20!)[3584, 14336]
-
blk.36.post_attention_norm.weightF32[3584]
-
blk.36.post_ffw_norm.weightF32[3584]
-
blk.37.attn_k.weight(!unknown_type 20!)[3584, 2048]
-
blk.37.attn_norm.weightF32[3584]
-
blk.37.attn_output.weight(!unknown_type 20!)[4096, 3584]
-
blk.37.attn_q.weight(!unknown_type 20!)[3584, 4096]
-
blk.37.attn_v.weight(!unknown_type 20!)[3584, 2048]
-
blk.37.ffn_down.weight(!unknown_type 20!)[14336, 3584]
-
blk.37.ffn_gate.weight(!unknown_type 20!)[3584, 14336]
-
blk.37.ffn_norm.weightF32[3584]
-
blk.37.ffn_up.weight(!unknown_type 20!)[3584, 14336]
-
blk.37.post_attention_norm.weightF32[3584]
-
blk.37.post_ffw_norm.weightF32[3584]
-
blk.38.attn_k.weight(!unknown_type 20!)[3584, 2048]
-
blk.38.attn_norm.weightF32[3584]
-
blk.38.attn_output.weight(!unknown_type 20!)[4096, 3584]
-
blk.38.attn_q.weight(!unknown_type 20!)[3584, 4096]
-
blk.38.attn_v.weight(!unknown_type 20!)[3584, 2048]
-
blk.38.ffn_down.weight(!unknown_type 20!)[14336, 3584]
-
blk.38.ffn_gate.weight(!unknown_type 20!)[3584, 14336]
-
blk.38.ffn_norm.weightF32[3584]
-
blk.38.ffn_up.weight(!unknown_type 20!)[3584, 14336]
-
blk.38.post_attention_norm.weightF32[3584]
-
blk.38.post_ffw_norm.weightF32[3584]
-
blk.39.attn_k.weight(!unknown_type 20!)[3584, 2048]
-
blk.39.attn_norm.weightF32[3584]
-
blk.39.attn_output.weight(!unknown_type 20!)[4096, 3584]
-
blk.39.attn_q.weight(!unknown_type 20!)[3584, 4096]
-
blk.39.attn_v.weight(!unknown_type 20!)[3584, 2048]
-
blk.39.ffn_down.weight(!unknown_type 20!)[14336, 3584]
-
blk.39.ffn_gate.weight(!unknown_type 20!)[3584, 14336]
-
blk.39.ffn_norm.weightF32[3584]
-
blk.39.ffn_up.weight(!unknown_type 20!)[3584, 14336]
-
blk.39.post_attention_norm.weightF32[3584]
-
blk.39.post_ffw_norm.weightF32[3584]
-
blk.40.attn_k.weight(!unknown_type 20!)[3584, 2048]
-
blk.40.attn_norm.weightF32[3584]
-
blk.40.attn_output.weight(!unknown_type 20!)[4096, 3584]
-
blk.40.attn_q.weight(!unknown_type 20!)[3584, 4096]
-
blk.40.attn_v.weight(!unknown_type 20!)[3584, 2048]
-
blk.40.ffn_down.weight(!unknown_type 20!)[14336, 3584]
-
blk.40.ffn_gate.weight(!unknown_type 20!)[3584, 14336]
-
blk.40.ffn_norm.weightF32[3584]
-
blk.40.ffn_up.weight(!unknown_type 20!)[3584, 14336]
-
blk.40.post_attention_norm.weightF32[3584]
-
blk.40.post_ffw_norm.weightF32[3584]
-
blk.41.attn_k.weight(!unknown_type 20!)[3584, 2048]
-
blk.41.attn_norm.weightF32[3584]
-
blk.41.attn_output.weight(!unknown_type 20!)[4096, 3584]
-
blk.41.attn_q.weight(!unknown_type 20!)[3584, 4096]
-
blk.41.attn_v.weight(!unknown_type 20!)[3584, 2048]
-
blk.41.ffn_down.weight(!unknown_type 20!)[14336, 3584]
-
blk.41.ffn_gate.weight(!unknown_type 20!)[3584, 14336]
-
blk.41.ffn_norm.weightF32[3584]
-
blk.41.ffn_up.weight(!unknown_type 20!)[3584, 14336]
-
blk.41.post_attention_norm.weightF32[3584]
-
blk.41.post_ffw_norm.weightF32[3584]
-
output_norm.weightF32[3584]
Metadata
Tensor
blk.0
blk.1
blk.2
blk.3
blk.4
blk.5
blk.6
blk.7
blk.8
blk.9
blk.10
blk.11
blk.12
blk.13
blk.14
blk.15
blk.16
blk.17
blk.18
blk.19
blk.20
blk.21
blk.22
blk.23
blk.24
blk.25
blk.26
blk.27
blk.28
blk.29
blk.30
blk.31
blk.32
blk.33
blk.34
blk.35
blk.36
blk.37
blk.38
blk.39
blk.40
blk.41