236B
96 Pulls Updated 2 months ago
1396c520778b · 47GB
-
mradermacher.convert_typehf
-
mradermacher.quantize_version2
-
mradermacher.quantized_at2024-06-20T05:00:15+02:00
-
mradermacher.quantized_bymradermacher
-
mradermacher.quantized_ondb2
-
quantize.imatrix.chunks_count291
-
quantize.imatrix.datasetimatrix-training.txt
-
quantize.imatrix.entries_count716
-
quantize.imatrix.fileDeepSeek-Coder-V2-Instruct-i1-GGUF/imatrix.dat
-
general.architecturedeepseek2
-
general.file_typeIQ1_S
-
deepseek2.attention.head_count128
-
deepseek2.attention.head_count_kv128
-
deepseek2.attention.key_length192
-
deepseek2.attention.kv_lora_rank512
-
deepseek2.attention.layer_norm_rms_epsilon1e-06
-
deepseek2.attention.q_lora_rank1536
-
deepseek2.attention.value_length128
-
deepseek2.block_count60
-
deepseek2.context_length163840
-
deepseek2.embedding_length5120
-
deepseek2.expert_count160
-
deepseek2.expert_feed_forward_length1536
-
deepseek2.expert_shared_count2
-
deepseek2.expert_used_count6
-
deepseek2.expert_weights_scale16
-
deepseek2.feed_forward_length12288
-
deepseek2.leading_dense_block_count1
-
deepseek2.rope.dimension_count64
-
deepseek2.rope.freq_base10000
-
deepseek2.rope.scaling.factor40
-
deepseek2.rope.scaling.original_context_length4096
-
deepseek2.rope.scaling.typeyarn
-
deepseek2.rope.scaling.yarn_log_multiplier0.1
-
deepseek2.vocab_size102400
-
tokenizer.ggml.add_bos_tokentrue
-
tokenizer.ggml.add_eos_tokenfalse
-
tokenizer.ggml.bos_token_id100000
-
tokenizer.ggml.eos_token_id100001
-
tokenizer.ggml.merges[Ġ Ġ Ġ t Ġ a i n h e ...]
-
tokenizer.ggml.modelgpt2
-
tokenizer.ggml.padding_token_id100001
-
tokenizer.ggml.predeepseek-llm
-
tokenizer.ggml.token_type[1 1 1 1 1 ...]
-
tokenizer.ggml.tokens[! " # $ % ...]
-
NameTypeShape
-
token_embd.weightQ2_K[5120 102400]
-
blk.0.attn_norm.weightF32[5120]
-
blk.0.ffn_down.weightQ2_K[12288 5120]
-
blk.0.ffn_gate.weightCOUNT[5120 12288]
-
blk.0.ffn_up.weightCOUNT[5120 12288]
-
blk.0.ffn_norm.weightF32[5120]
-
blk.0.attn_kv_a_norm.weightF32[512]
-
blk.0.attn_kv_a_mqa.weightCOUNT[5120 576]
-
blk.0.attn_kv_b.weightCOUNT[512 32768]
-
blk.0.attn_output.weightI8[16384 5120]
-
blk.0.attn_q_a_norm.weightF32[1536]
-
blk.0.attn_q_a.weightCOUNT[5120 1536]
-
blk.0.attn_q_b.weightCOUNT[1536 24576]
-
blk.1.ffn_gate_inp.weightF32[5120 160]
-
blk.1.ffn_down_shexp.weightQ2_K[3072 5120]
-
blk.1.ffn_gate_shexp.weightCOUNT[5120 3072]
-
blk.1.ffn_up_shexp.weightCOUNT[5120 3072]
-
blk.1.attn_kv_a_norm.weightF32[512]
-
blk.1.attn_kv_a_mqa.weightCOUNT[5120 576]
-
blk.1.attn_kv_b.weightCOUNT[512 32768]
-
blk.1.attn_output.weightI8[16384 5120]
-
blk.1.attn_q_a_norm.weightF32[1536]
-
blk.1.attn_q_a.weightCOUNT[5120 1536]
-
blk.1.attn_q_b.weightCOUNT[1536 24576]
-
blk.1.attn_norm.weightF32[5120]
-
blk.1.ffn_down_exps.weightQ2_K[1536 5120 160]
-
blk.1.ffn_gate_exps.weightCOUNT[5120 1536 160]
-
blk.1.ffn_up_exps.weightCOUNT[5120 1536 160]
-
blk.1.ffn_norm.weightF32[5120]
-
blk.2.ffn_gate_inp.weightF32[5120 160]
-
blk.2.ffn_down_shexp.weightQ2_K[3072 5120]
-
blk.2.ffn_gate_shexp.weightCOUNT[5120 3072]
-
blk.2.ffn_up_shexp.weightCOUNT[5120 3072]
-
blk.2.attn_kv_a_norm.weightF32[512]
-
blk.2.attn_kv_a_mqa.weightCOUNT[5120 576]
-
blk.2.attn_kv_b.weightCOUNT[512 32768]
-
blk.2.attn_output.weightI8[16384 5120]
-
blk.2.attn_q_a_norm.weightF32[1536]
-
blk.2.attn_q_a.weightCOUNT[5120 1536]
-
blk.2.attn_q_b.weightCOUNT[1536 24576]
-
blk.2.attn_norm.weightF32[5120]
-
blk.2.ffn_down_exps.weightQ2_K[1536 5120 160]
-
blk.2.ffn_gate_exps.weightCOUNT[5120 1536 160]
-
blk.2.ffn_up_exps.weightCOUNT[5120 1536 160]
-
blk.2.ffn_norm.weightF32[5120]
-
blk.3.ffn_gate_inp.weightF32[5120 160]
-
blk.3.ffn_down_shexp.weightQ2_K[3072 5120]
-
blk.3.ffn_gate_shexp.weightCOUNT[5120 3072]
-
blk.3.ffn_up_shexp.weightCOUNT[5120 3072]
-
blk.3.attn_kv_a_norm.weightF32[512]
-
blk.3.attn_kv_a_mqa.weightCOUNT[5120 576]
-
blk.3.attn_kv_b.weightCOUNT[512 32768]
-
blk.3.attn_output.weightI8[16384 5120]
-
blk.3.attn_q_a_norm.weightF32[1536]
-
blk.3.attn_q_a.weightCOUNT[5120 1536]
-
blk.3.attn_q_b.weightCOUNT[1536 24576]
-
blk.3.attn_norm.weightF32[5120]
-
blk.3.ffn_down_exps.weightQ2_K[1536 5120 160]
-
blk.3.ffn_gate_exps.weightCOUNT[5120 1536 160]
-
blk.3.ffn_up_exps.weightCOUNT[5120 1536 160]
-
blk.3.ffn_norm.weightF32[5120]
-
blk.4.ffn_gate_inp.weightF32[5120 160]
-
blk.4.ffn_down_shexp.weightCOUNT[3072 5120]
-
blk.4.ffn_gate_shexp.weightCOUNT[5120 3072]
-
blk.4.ffn_up_shexp.weightCOUNT[5120 3072]
-
blk.4.attn_kv_a_norm.weightF32[512]
-
blk.4.attn_kv_a_mqa.weightCOUNT[5120 576]
-
blk.4.attn_kv_b.weightCOUNT[512 32768]
-
blk.4.attn_output.weightI8[16384 5120]
-
blk.4.attn_q_a_norm.weightF32[1536]
-
blk.4.attn_q_a.weightCOUNT[5120 1536]
-
blk.4.attn_q_b.weightCOUNT[1536 24576]
-
blk.4.attn_norm.weightF32[5120]
-
blk.4.ffn_down_exps.weightCOUNT[1536 5120 160]
-
blk.4.ffn_gate_exps.weightCOUNT[5120 1536 160]
-
blk.4.ffn_up_exps.weightCOUNT[5120 1536 160]
-
blk.4.ffn_norm.weightF32[5120]
-
blk.5.attn_norm.weightF32[5120]
-
blk.5.ffn_down_exps.weightCOUNT[1536 5120 160]
-
blk.5.ffn_gate_exps.weightCOUNT[5120 1536 160]
-
blk.5.ffn_up_exps.weightCOUNT[5120 1536 160]
-
blk.5.ffn_gate_inp.weightF32[5120 160]
-
blk.5.ffn_down_shexp.weightCOUNT[3072 5120]
-
blk.5.ffn_gate_shexp.weightCOUNT[5120 3072]
-
blk.5.ffn_up_shexp.weightCOUNT[5120 3072]
-
blk.5.ffn_norm.weightF32[5120]
-
blk.5.attn_kv_a_norm.weightF32[512]
-
blk.5.attn_kv_a_mqa.weightCOUNT[5120 576]
-
blk.5.attn_kv_b.weightCOUNT[512 32768]
-
blk.5.attn_output.weightI8[16384 5120]
-
blk.5.attn_q_a_norm.weightF32[1536]
-
blk.5.attn_q_a.weightCOUNT[5120 1536]
-
blk.5.attn_q_b.weightCOUNT[1536 24576]
-
blk.6.ffn_gate_inp.weightF32[5120 160]
-
blk.6.ffn_down_shexp.weightCOUNT[3072 5120]
-
blk.6.ffn_gate_shexp.weightCOUNT[5120 3072]
-
blk.6.ffn_up_shexp.weightCOUNT[5120 3072]
-
blk.6.attn_kv_a_norm.weightF32[512]
-
blk.6.attn_kv_a_mqa.weightCOUNT[5120 576]
-
blk.6.attn_kv_b.weightCOUNT[512 32768]
-
blk.6.attn_output.weightI8[16384 5120]
-
blk.6.attn_q_a_norm.weightF32[1536]
-
blk.6.attn_q_a.weightCOUNT[5120 1536]
-
blk.6.attn_q_b.weightCOUNT[1536 24576]
-
blk.6.attn_norm.weightF32[5120]
-
blk.6.ffn_down_exps.weightCOUNT[1536 5120 160]
-
blk.6.ffn_gate_exps.weightCOUNT[5120 1536 160]
-
blk.6.ffn_up_exps.weightCOUNT[5120 1536 160]
-
blk.6.ffn_norm.weightF32[5120]
-
blk.7.ffn_gate_inp.weightF32[5120 160]
-
blk.7.ffn_down_shexp.weightCOUNT[3072 5120]
-
blk.7.ffn_gate_shexp.weightCOUNT[5120 3072]
-
blk.7.ffn_up_shexp.weightCOUNT[5120 3072]
-
blk.7.attn_kv_a_norm.weightF32[512]
-
blk.7.attn_kv_a_mqa.weightCOUNT[5120 576]
-
blk.7.attn_kv_b.weightCOUNT[512 32768]
-
blk.7.attn_output.weightI8[16384 5120]
-
blk.7.attn_q_a_norm.weightF32[1536]
-
blk.7.attn_q_a.weightCOUNT[5120 1536]
-
blk.7.attn_q_b.weightCOUNT[1536 24576]
-
blk.7.attn_norm.weightF32[5120]
-
blk.7.ffn_down_exps.weightCOUNT[1536 5120 160]
-
blk.7.ffn_gate_exps.weightCOUNT[5120 1536 160]
-
blk.7.ffn_up_exps.weightCOUNT[5120 1536 160]
-
blk.7.ffn_norm.weightF32[5120]
-
blk.8.ffn_gate_inp.weightF32[5120 160]
-
blk.8.ffn_down_shexp.weightCOUNT[3072 5120]
-
blk.8.ffn_gate_shexp.weightCOUNT[5120 3072]
-
blk.8.ffn_up_shexp.weightCOUNT[5120 3072]
-
blk.8.attn_kv_a_norm.weightF32[512]
-
blk.8.attn_kv_a_mqa.weightCOUNT[5120 576]
-
blk.8.attn_kv_b.weightCOUNT[512 32768]
-
blk.8.attn_output.weightI8[16384 5120]
-
blk.8.attn_q_a_norm.weightF32[1536]
-
blk.8.attn_q_a.weightCOUNT[5120 1536]
-
blk.8.attn_q_b.weightCOUNT[1536 24576]
-
blk.8.attn_norm.weightF32[5120]
-
blk.8.ffn_down_exps.weightCOUNT[1536 5120 160]
-
blk.8.ffn_gate_exps.weightCOUNT[5120 1536 160]
-
blk.8.ffn_up_exps.weightCOUNT[5120 1536 160]
-
blk.8.ffn_norm.weightF32[5120]
-
blk.9.ffn_gate_inp.weightF32[5120 160]
-
blk.9.ffn_down_shexp.weightCOUNT[3072 5120]
-
blk.9.ffn_gate_shexp.weightCOUNT[5120 3072]
-
blk.9.ffn_up_shexp.weightCOUNT[5120 3072]
-
blk.9.attn_kv_a_norm.weightF32[512]
-
blk.9.attn_kv_a_mqa.weightCOUNT[5120 576]
-
blk.9.attn_kv_b.weightCOUNT[512 32768]
-
blk.9.attn_output.weightI8[16384 5120]
-
blk.9.attn_q_a_norm.weightF32[1536]
-
blk.9.attn_q_a.weightCOUNT[5120 1536]
-
blk.9.attn_q_b.weightCOUNT[1536 24576]
-
blk.9.attn_norm.weightF32[5120]
-
blk.9.ffn_down_exps.weightCOUNT[1536 5120 160]
-
blk.9.ffn_gate_exps.weightCOUNT[5120 1536 160]
-
blk.9.ffn_up_exps.weightCOUNT[5120 1536 160]
-
blk.9.ffn_norm.weightF32[5120]
-
blk.10.ffn_gate_inp.weightF32[5120 160]
-
blk.10.ffn_down_shexp.weightCOUNT[3072 5120]
-
blk.10.ffn_gate_shexp.weightCOUNT[5120 3072]
-
blk.10.ffn_up_shexp.weightCOUNT[5120 3072]
-
blk.10.attn_kv_a_norm.weightF32[512]
-
blk.10.attn_kv_a_mqa.weightCOUNT[5120 576]
-
blk.10.attn_kv_b.weightCOUNT[512 32768]
-
blk.10.attn_output.weightI8[16384 5120]
-
blk.10.attn_q_a_norm.weightF32[1536]
-
blk.10.attn_q_a.weightCOUNT[5120 1536]
-
blk.10.attn_q_b.weightCOUNT[1536 24576]
-
blk.10.attn_norm.weightF32[5120]
-
blk.10.ffn_down_exps.weightCOUNT[1536 5120 160]
-
blk.10.ffn_gate_exps.weightCOUNT[5120 1536 160]
-
blk.10.ffn_up_exps.weightCOUNT[5120 1536 160]
-
blk.10.ffn_norm.weightF32[5120]
-
blk.11.ffn_gate_inp.weightF32[5120 160]
-
blk.11.ffn_down_shexp.weightCOUNT[3072 5120]
-
blk.11.ffn_gate_shexp.weightCOUNT[5120 3072]
-
blk.11.ffn_up_shexp.weightCOUNT[5120 3072]
-
blk.11.attn_kv_a_norm.weightF32[512]
-
blk.11.attn_kv_a_mqa.weightCOUNT[5120 576]
-
blk.11.attn_kv_b.weightCOUNT[512 32768]
-
blk.11.attn_output.weightI8[16384 5120]
-
blk.11.attn_q_a_norm.weightF32[1536]
-
blk.11.attn_q_a.weightCOUNT[5120 1536]
-
blk.11.attn_q_b.weightCOUNT[1536 24576]
-
blk.11.attn_norm.weightF32[5120]
-
blk.11.ffn_down_exps.weightCOUNT[1536 5120 160]
-
blk.11.ffn_gate_exps.weightCOUNT[5120 1536 160]
-
blk.11.ffn_up_exps.weightCOUNT[5120 1536 160]
-
blk.11.ffn_norm.weightF32[5120]
-
blk.12.ffn_gate_inp.weightF32[5120 160]
-
blk.12.ffn_down_shexp.weightCOUNT[3072 5120]
-
blk.12.ffn_gate_shexp.weightCOUNT[5120 3072]
-
blk.12.ffn_up_shexp.weightCOUNT[5120 3072]
-
blk.12.attn_kv_a_norm.weightF32[512]
-
blk.12.attn_kv_a_mqa.weightCOUNT[5120 576]
-
blk.12.attn_kv_b.weightCOUNT[512 32768]
-
blk.12.attn_output.weightI8[16384 5120]
-
blk.12.attn_q_a_norm.weightF32[1536]
-
blk.12.attn_q_a.weightCOUNT[5120 1536]
-
blk.12.attn_q_b.weightCOUNT[1536 24576]
-
blk.12.attn_norm.weightF32[5120]
-
blk.12.ffn_down_exps.weightCOUNT[1536 5120 160]
-
blk.12.ffn_gate_exps.weightCOUNT[5120 1536 160]
-
blk.12.ffn_up_exps.weightCOUNT[5120 1536 160]
-
blk.12.ffn_norm.weightF32[5120]
-
blk.13.ffn_gate_inp.weightF32[5120 160]
-
blk.13.ffn_down_shexp.weightCOUNT[3072 5120]
-
blk.13.ffn_gate_shexp.weightCOUNT[5120 3072]
-
blk.13.ffn_up_shexp.weightCOUNT[5120 3072]
-
blk.13.attn_kv_a_norm.weightF32[512]
-
blk.13.attn_kv_a_mqa.weightCOUNT[5120 576]
-
blk.13.attn_kv_b.weightCOUNT[512 32768]
-
blk.13.attn_output.weightI8[16384 5120]
-
blk.13.attn_q_a_norm.weightF32[1536]
-
blk.13.attn_q_a.weightCOUNT[5120 1536]
-
blk.13.attn_q_b.weightCOUNT[1536 24576]
-
blk.13.attn_norm.weightF32[5120]
-
blk.13.ffn_down_exps.weightCOUNT[1536 5120 160]
-
blk.13.ffn_gate_exps.weightCOUNT[5120 1536 160]
-
blk.13.ffn_up_exps.weightCOUNT[5120 1536 160]
-
blk.13.ffn_norm.weightF32[5120]
-
blk.14.ffn_gate_inp.weightF32[5120 160]
-
blk.14.ffn_down_shexp.weightCOUNT[3072 5120]
-
blk.14.ffn_gate_shexp.weightCOUNT[5120 3072]
-
blk.14.ffn_up_shexp.weightCOUNT[5120 3072]
-
blk.14.attn_kv_a_norm.weightF32[512]
-
blk.14.attn_kv_a_mqa.weightCOUNT[5120 576]
-
blk.14.attn_kv_b.weightCOUNT[512 32768]
-
blk.14.attn_output.weightI8[16384 5120]
-
blk.14.attn_q_a_norm.weightF32[1536]
-
blk.14.attn_q_a.weightCOUNT[5120 1536]
-
blk.14.attn_q_b.weightCOUNT[1536 24576]
-
blk.14.attn_norm.weightF32[5120]
-
blk.14.ffn_down_exps.weightCOUNT[1536 5120 160]
-
blk.14.ffn_gate_exps.weightCOUNT[5120 1536 160]
-
blk.14.ffn_up_exps.weightCOUNT[5120 1536 160]
-
blk.14.ffn_norm.weightF32[5120]
-
blk.15.ffn_gate_inp.weightF32[5120 160]
-
blk.15.ffn_down_shexp.weightCOUNT[3072 5120]
-
blk.15.ffn_gate_shexp.weightCOUNT[5120 3072]
-
blk.15.ffn_up_shexp.weightCOUNT[5120 3072]
-
blk.15.attn_kv_a_norm.weightF32[512]
-
blk.15.attn_kv_a_mqa.weightCOUNT[5120 576]
-
blk.15.attn_kv_b.weightCOUNT[512 32768]
-
blk.15.attn_output.weightI8[16384 5120]
-
blk.15.attn_q_a_norm.weightF32[1536]
-
blk.15.attn_q_a.weightCOUNT[5120 1536]
-
blk.15.attn_q_b.weightCOUNT[1536 24576]
-
blk.15.attn_norm.weightF32[5120]
-
blk.15.ffn_down_exps.weightCOUNT[1536 5120 160]
-
blk.15.ffn_gate_exps.weightCOUNT[5120 1536 160]
-
blk.15.ffn_up_exps.weightCOUNT[5120 1536 160]
-
blk.15.ffn_norm.weightF32[5120]
-
blk.16.ffn_gate_inp.weightF32[5120 160]
-
blk.16.ffn_down_shexp.weightCOUNT[3072 5120]
-
blk.16.ffn_gate_shexp.weightCOUNT[5120 3072]
-
blk.16.ffn_up_shexp.weightCOUNT[5120 3072]
-
blk.16.attn_kv_a_norm.weightF32[512]
-
blk.16.attn_kv_a_mqa.weightCOUNT[5120 576]
-
blk.16.attn_kv_b.weightCOUNT[512 32768]
-
blk.16.attn_output.weightI8[16384 5120]
-
blk.16.attn_q_a_norm.weightF32[1536]
-
blk.16.attn_q_a.weightCOUNT[5120 1536]
-
blk.16.attn_q_b.weightCOUNT[1536 24576]
-
blk.16.attn_norm.weightF32[5120]
-
blk.16.ffn_down_exps.weightCOUNT[1536 5120 160]
-
blk.16.ffn_gate_exps.weightCOUNT[5120 1536 160]
-
blk.16.ffn_up_exps.weightCOUNT[5120 1536 160]
-
blk.16.ffn_norm.weightF32[5120]
-
blk.17.ffn_gate_inp.weightF32[5120 160]
-
blk.17.ffn_down_shexp.weightCOUNT[3072 5120]
-
blk.17.ffn_gate_shexp.weightCOUNT[5120 3072]
-
blk.17.ffn_up_shexp.weightCOUNT[5120 3072]
-
blk.17.attn_kv_a_norm.weightF32[512]
-
blk.17.attn_kv_a_mqa.weightCOUNT[5120 576]
-
blk.17.attn_kv_b.weightCOUNT[512 32768]
-
blk.17.attn_output.weightI8[16384 5120]
-
blk.17.attn_q_a_norm.weightF32[1536]
-
blk.17.attn_q_a.weightCOUNT[5120 1536]
-
blk.17.attn_q_b.weightCOUNT[1536 24576]
-
blk.17.attn_norm.weightF32[5120]
-
blk.17.ffn_down_exps.weightCOUNT[1536 5120 160]
-
blk.17.ffn_gate_exps.weightCOUNT[5120 1536 160]
-
blk.17.ffn_up_exps.weightCOUNT[5120 1536 160]
-
blk.17.ffn_norm.weightF32[5120]
-
blk.18.attn_norm.weightF32[5120]
-
blk.18.ffn_down_exps.weightCOUNT[1536 5120 160]
-
blk.18.ffn_gate_exps.weightCOUNT[5120 1536 160]
-
blk.18.ffn_up_exps.weightCOUNT[5120 1536 160]
-
blk.18.ffn_gate_inp.weightF32[5120 160]
-
blk.18.ffn_down_shexp.weightCOUNT[3072 5120]
-
blk.18.ffn_gate_shexp.weightCOUNT[5120 3072]
-
blk.18.ffn_up_shexp.weightCOUNT[5120 3072]
-
blk.18.ffn_norm.weightF32[5120]
-
blk.18.attn_kv_a_norm.weightF32[512]
-
blk.18.attn_kv_a_mqa.weightCOUNT[5120 576]
-
blk.18.attn_kv_b.weightCOUNT[512 32768]
-
blk.18.attn_output.weightI8[16384 5120]
-
blk.18.attn_q_a_norm.weightF32[1536]
-
blk.18.attn_q_a.weightCOUNT[5120 1536]
-
blk.18.attn_q_b.weightCOUNT[1536 24576]
-
blk.19.ffn_gate_inp.weightF32[5120 160]
-
blk.19.ffn_down_shexp.weightCOUNT[3072 5120]
-
blk.19.ffn_gate_shexp.weightCOUNT[5120 3072]
-
blk.19.ffn_up_shexp.weightCOUNT[5120 3072]
-
blk.19.attn_kv_a_norm.weightF32[512]
-
blk.19.attn_kv_a_mqa.weightCOUNT[5120 576]
-
blk.19.attn_kv_b.weightCOUNT[512 32768]
-
blk.19.attn_output.weightI8[16384 5120]
-
blk.19.attn_q_a_norm.weightF32[1536]
-
blk.19.attn_q_a.weightCOUNT[5120 1536]
-
blk.19.attn_q_b.weightCOUNT[1536 24576]
-
blk.19.attn_norm.weightF32[5120]
-
blk.19.ffn_down_exps.weightCOUNT[1536 5120 160]
-
blk.19.ffn_gate_exps.weightCOUNT[5120 1536 160]
-
blk.19.ffn_up_exps.weightCOUNT[5120 1536 160]
-
blk.19.ffn_norm.weightF32[5120]
-
blk.20.ffn_gate_inp.weightF32[5120 160]
-
blk.20.ffn_down_shexp.weightCOUNT[3072 5120]
-
blk.20.ffn_gate_shexp.weightCOUNT[5120 3072]
-
blk.20.ffn_up_shexp.weightCOUNT[5120 3072]
-
blk.20.attn_kv_a_norm.weightF32[512]
-
blk.20.attn_kv_a_mqa.weightCOUNT[5120 576]
-
blk.20.attn_kv_b.weightCOUNT[512 32768]
-
blk.20.attn_output.weightI8[16384 5120]
-
blk.20.attn_q_a_norm.weightF32[1536]
-
blk.20.attn_q_a.weightCOUNT[5120 1536]
-
blk.20.attn_q_b.weightCOUNT[1536 24576]
-
blk.20.attn_norm.weightF32[5120]
-
blk.20.ffn_down_exps.weightCOUNT[1536 5120 160]
-
blk.20.ffn_gate_exps.weightCOUNT[5120 1536 160]
-
blk.20.ffn_up_exps.weightCOUNT[5120 1536 160]
-
blk.20.ffn_norm.weightF32[5120]
-
blk.21.ffn_gate_inp.weightF32[5120 160]
-
blk.21.ffn_down_shexp.weightCOUNT[3072 5120]
-
blk.21.ffn_gate_shexp.weightCOUNT[5120 3072]
-
blk.21.ffn_up_shexp.weightCOUNT[5120 3072]
-
blk.21.attn_kv_a_norm.weightF32[512]
-
blk.21.attn_kv_a_mqa.weightCOUNT[5120 576]
-
blk.21.attn_kv_b.weightCOUNT[512 32768]
-
blk.21.attn_output.weightI8[16384 5120]
-
blk.21.attn_q_a_norm.weightF32[1536]
-
blk.21.attn_q_a.weightCOUNT[5120 1536]
-
blk.21.attn_q_b.weightCOUNT[1536 24576]
-
blk.21.attn_norm.weightF32[5120]
-
blk.21.ffn_down_exps.weightCOUNT[1536 5120 160]
-
blk.21.ffn_gate_exps.weightCOUNT[5120 1536 160]
-
blk.21.ffn_up_exps.weightCOUNT[5120 1536 160]
-
blk.21.ffn_norm.weightF32[5120]
-
blk.22.ffn_gate_inp.weightF32[5120 160]
-
blk.22.ffn_down_shexp.weightCOUNT[3072 5120]
-
blk.22.ffn_gate_shexp.weightCOUNT[5120 3072]
-
blk.22.ffn_up_shexp.weightCOUNT[5120 3072]
-
blk.22.attn_kv_a_norm.weightF32[512]
-
blk.22.attn_kv_a_mqa.weightCOUNT[5120 576]
-
blk.22.attn_kv_b.weightCOUNT[512 32768]
-
blk.22.attn_output.weightI8[16384 5120]
-
blk.22.attn_q_a_norm.weightF32[1536]
-
blk.22.attn_q_a.weightCOUNT[5120 1536]
-
blk.22.attn_q_b.weightCOUNT[1536 24576]
-
blk.22.attn_norm.weightF32[5120]
-
blk.22.ffn_down_exps.weightCOUNT[1536 5120 160]
-
blk.22.ffn_gate_exps.weightCOUNT[5120 1536 160]
-
blk.22.ffn_up_exps.weightCOUNT[5120 1536 160]
-
blk.22.ffn_norm.weightF32[5120]
-
blk.23.ffn_gate_inp.weightF32[5120 160]
-
blk.23.ffn_down_shexp.weightCOUNT[3072 5120]
-
blk.23.ffn_gate_shexp.weightCOUNT[5120 3072]
-
blk.23.ffn_up_shexp.weightCOUNT[5120 3072]
-
blk.23.attn_kv_a_norm.weightF32[512]
-
blk.23.attn_kv_a_mqa.weightCOUNT[5120 576]
-
blk.23.attn_kv_b.weightCOUNT[512 32768]
-
blk.23.attn_output.weightI8[16384 5120]
-
blk.23.attn_q_a_norm.weightF32[1536]
-
blk.23.attn_q_a.weightCOUNT[5120 1536]
-
blk.23.attn_q_b.weightCOUNT[1536 24576]
-
blk.23.attn_norm.weightF32[5120]
-
blk.23.ffn_down_exps.weightCOUNT[1536 5120 160]
-
blk.23.ffn_gate_exps.weightCOUNT[5120 1536 160]
-
blk.23.ffn_up_exps.weightCOUNT[5120 1536 160]
-
blk.23.ffn_norm.weightF32[5120]
-
blk.24.ffn_gate_inp.weightF32[5120 160]
-
blk.24.ffn_down_shexp.weightCOUNT[3072 5120]
-
blk.24.ffn_gate_shexp.weightCOUNT[5120 3072]
-
blk.24.ffn_up_shexp.weightCOUNT[5120 3072]
-
blk.24.attn_kv_a_norm.weightF32[512]
-
blk.24.attn_kv_a_mqa.weightCOUNT[5120 576]
-
blk.24.attn_kv_b.weightCOUNT[512 32768]
-
blk.24.attn_output.weightI8[16384 5120]
-
blk.24.attn_q_a_norm.weightF32[1536]
-
blk.24.attn_q_a.weightCOUNT[5120 1536]
-
blk.24.attn_q_b.weightCOUNT[1536 24576]
-
blk.24.attn_norm.weightF32[5120]
-
blk.24.ffn_down_exps.weightCOUNT[1536 5120 160]
-
blk.24.ffn_gate_exps.weightCOUNT[5120 1536 160]
-
blk.24.ffn_up_exps.weightCOUNT[5120 1536 160]
-
blk.24.ffn_norm.weightF32[5120]
-
blk.25.ffn_gate_inp.weightF32[5120 160]
-
blk.25.ffn_down_shexp.weightCOUNT[3072 5120]
-
blk.25.ffn_gate_shexp.weightCOUNT[5120 3072]
-
blk.25.ffn_up_shexp.weightCOUNT[5120 3072]
-
blk.25.attn_kv_a_norm.weightF32[512]
-
blk.25.attn_kv_a_mqa.weightCOUNT[5120 576]
-
blk.25.attn_kv_b.weightCOUNT[512 32768]
-
blk.25.attn_output.weightI8[16384 5120]
-
blk.25.attn_q_a_norm.weightF32[1536]
-
blk.25.attn_q_a.weightCOUNT[5120 1536]
-
blk.25.attn_q_b.weightCOUNT[1536 24576]
-
blk.25.attn_norm.weightF32[5120]
-
blk.25.ffn_down_exps.weightCOUNT[1536 5120 160]
-
blk.25.ffn_gate_exps.weightCOUNT[5120 1536 160]
-
blk.25.ffn_up_exps.weightCOUNT[5120 1536 160]
-
blk.25.ffn_norm.weightF32[5120]
-
blk.26.ffn_gate_inp.weightF32[5120 160]
-
blk.26.ffn_down_shexp.weightCOUNT[3072 5120]
-
blk.26.ffn_gate_shexp.weightCOUNT[5120 3072]
-
blk.26.ffn_up_shexp.weightCOUNT[5120 3072]
-
blk.26.attn_kv_a_norm.weightF32[512]
-
blk.26.attn_kv_a_mqa.weightCOUNT[5120 576]
-
blk.26.attn_kv_b.weightCOUNT[512 32768]
-
blk.26.attn_output.weightI8[16384 5120]
-
blk.26.attn_q_a_norm.weightF32[1536]
-
blk.26.attn_q_a.weightCOUNT[5120 1536]
-
blk.26.attn_q_b.weightCOUNT[1536 24576]
-
blk.26.attn_norm.weightF32[5120]
-
blk.26.ffn_down_exps.weightCOUNT[1536 5120 160]
-
blk.26.ffn_gate_exps.weightCOUNT[5120 1536 160]
-
blk.26.ffn_up_exps.weightCOUNT[5120 1536 160]
-
blk.26.ffn_norm.weightF32[5120]
-
blk.27.ffn_gate_inp.weightF32[5120 160]
-
blk.27.ffn_down_shexp.weightCOUNT[3072 5120]
-
blk.27.ffn_gate_shexp.weightCOUNT[5120 3072]
-
blk.27.ffn_up_shexp.weightCOUNT[5120 3072]
-
blk.27.attn_kv_a_norm.weightF32[512]
-
blk.27.attn_kv_a_mqa.weightCOUNT[5120 576]
-
blk.27.attn_kv_b.weightCOUNT[512 32768]
-
blk.27.attn_output.weightI8[16384 5120]
-
blk.27.attn_q_a_norm.weightF32[1536]
-
blk.27.attn_q_a.weightCOUNT[5120 1536]
-
blk.27.attn_q_b.weightCOUNT[1536 24576]
-
blk.27.attn_norm.weightF32[5120]
-
blk.27.ffn_down_exps.weightCOUNT[1536 5120 160]
-
blk.27.ffn_gate_exps.weightCOUNT[5120 1536 160]
-
blk.27.ffn_up_exps.weightCOUNT[5120 1536 160]
-
blk.27.ffn_norm.weightF32[5120]
-
blk.28.ffn_gate_inp.weightF32[5120 160]
-
blk.28.ffn_down_shexp.weightCOUNT[3072 5120]
-
blk.28.ffn_gate_shexp.weightCOUNT[5120 3072]
-
blk.28.ffn_up_shexp.weightCOUNT[5120 3072]
-
blk.28.attn_kv_a_norm.weightF32[512]
-
blk.28.attn_kv_a_mqa.weightCOUNT[5120 576]
-
blk.28.attn_kv_b.weightCOUNT[512 32768]
-
blk.28.attn_output.weightI8[16384 5120]
-
blk.28.attn_q_a_norm.weightF32[1536]
-
blk.28.attn_q_a.weightCOUNT[5120 1536]
-
blk.28.attn_q_b.weightCOUNT[1536 24576]
-
blk.28.attn_norm.weightF32[5120]
-
blk.28.ffn_down_exps.weightCOUNT[1536 5120 160]
-
blk.28.ffn_gate_exps.weightCOUNT[5120 1536 160]
-
blk.28.ffn_up_exps.weightCOUNT[5120 1536 160]
-
blk.28.ffn_norm.weightF32[5120]
-
blk.29.ffn_gate_inp.weightF32[5120 160]
-
blk.29.ffn_down_shexp.weightCOUNT[3072 5120]
-
blk.29.ffn_gate_shexp.weightCOUNT[5120 3072]
-
blk.29.ffn_up_shexp.weightCOUNT[5120 3072]
-
blk.29.attn_kv_a_norm.weightF32[512]
-
blk.29.attn_kv_a_mqa.weightCOUNT[5120 576]
-
blk.29.attn_kv_b.weightCOUNT[512 32768]
-
blk.29.attn_output.weightI8[16384 5120]
-
blk.29.attn_q_a_norm.weightF32[1536]
-
blk.29.attn_q_a.weightCOUNT[5120 1536]
-
blk.29.attn_q_b.weightCOUNT[1536 24576]
-
blk.29.attn_norm.weightF32[5120]
-
blk.29.ffn_down_exps.weightCOUNT[1536 5120 160]
-
blk.29.ffn_gate_exps.weightCOUNT[5120 1536 160]
-
blk.29.ffn_up_exps.weightCOUNT[5120 1536 160]
-
blk.29.ffn_norm.weightF32[5120]
-
blk.30.ffn_gate_inp.weightF32[5120 160]
-
blk.30.ffn_down_shexp.weightCOUNT[3072 5120]
-
blk.30.ffn_gate_shexp.weightCOUNT[5120 3072]
-
blk.30.ffn_up_shexp.weightCOUNT[5120 3072]
-
blk.30.attn_kv_a_norm.weightF32[512]
-
blk.30.attn_kv_a_mqa.weightCOUNT[5120 576]
-
blk.30.attn_kv_b.weightCOUNT[512 32768]
-
blk.30.attn_output.weightI8[16384 5120]
-
blk.30.attn_q_a_norm.weightF32[1536]
-
blk.30.attn_q_a.weightCOUNT[5120 1536]
-
blk.30.attn_q_b.weightCOUNT[1536 24576]
-
blk.30.attn_norm.weightF32[5120]
-
blk.30.ffn_down_exps.weightCOUNT[1536 5120 160]
-
blk.30.ffn_gate_exps.weightCOUNT[5120 1536 160]
-
blk.30.ffn_up_exps.weightCOUNT[5120 1536 160]
-
blk.30.ffn_norm.weightF32[5120]
-
blk.31.attn_norm.weightF32[5120]
-
blk.31.ffn_down_exps.weightCOUNT[1536 5120 160]
-
blk.31.ffn_gate_exps.weightCOUNT[5120 1536 160]
-
blk.31.ffn_up_exps.weightCOUNT[5120 1536 160]
-
blk.31.ffn_gate_inp.weightF32[5120 160]
-
blk.31.ffn_down_shexp.weightCOUNT[3072 5120]
-
blk.31.ffn_gate_shexp.weightCOUNT[5120 3072]
-
blk.31.ffn_up_shexp.weightCOUNT[5120 3072]
-
blk.31.ffn_norm.weightF32[5120]
-
blk.31.attn_kv_a_norm.weightF32[512]
-
blk.31.attn_kv_a_mqa.weightCOUNT[5120 576]
-
blk.31.attn_kv_b.weightCOUNT[512 32768]
-
blk.31.attn_output.weightI8[16384 5120]
-
blk.31.attn_q_a_norm.weightF32[1536]
-
blk.31.attn_q_a.weightCOUNT[5120 1536]
-
blk.31.attn_q_b.weightCOUNT[1536 24576]
-
blk.32.ffn_gate_inp.weightF32[5120 160]
-
blk.32.ffn_down_shexp.weightCOUNT[3072 5120]
-
blk.32.ffn_gate_shexp.weightCOUNT[5120 3072]
-
blk.32.ffn_up_shexp.weightCOUNT[5120 3072]
-
blk.32.attn_kv_a_norm.weightF32[512]
-
blk.32.attn_kv_a_mqa.weightCOUNT[5120 576]
-
blk.32.attn_kv_b.weightCOUNT[512 32768]
-
blk.32.attn_output.weightI8[16384 5120]
-
blk.32.attn_q_a_norm.weightF32[1536]
-
blk.32.attn_q_a.weightCOUNT[5120 1536]
-
blk.32.attn_q_b.weightCOUNT[1536 24576]
-
blk.32.attn_norm.weightF32[5120]
-
blk.32.ffn_down_exps.weightCOUNT[1536 5120 160]
-
blk.32.ffn_gate_exps.weightCOUNT[5120 1536 160]
-
blk.32.ffn_up_exps.weightCOUNT[5120 1536 160]
-
blk.32.ffn_norm.weightF32[5120]
-
blk.33.ffn_gate_inp.weightF32[5120 160]
-
blk.33.ffn_down_shexp.weightCOUNT[3072 5120]
-
blk.33.ffn_gate_shexp.weightCOUNT[5120 3072]
-
blk.33.ffn_up_shexp.weightCOUNT[5120 3072]
-
blk.33.attn_kv_a_norm.weightF32[512]
-
blk.33.attn_kv_a_mqa.weightCOUNT[5120 576]
-
blk.33.attn_kv_b.weightCOUNT[512 32768]
-
blk.33.attn_output.weightI8[16384 5120]
-
blk.33.attn_q_a_norm.weightF32[1536]
-
blk.33.attn_q_a.weightCOUNT[5120 1536]
-
blk.33.attn_q_b.weightCOUNT[1536 24576]
-
blk.33.attn_norm.weightF32[5120]
-
blk.33.ffn_down_exps.weightCOUNT[1536 5120 160]
-
blk.33.ffn_gate_exps.weightCOUNT[5120 1536 160]
-
blk.33.ffn_up_exps.weightCOUNT[5120 1536 160]
-
blk.33.ffn_norm.weightF32[5120]
-
blk.34.ffn_gate_inp.weightF32[5120 160]
-
blk.34.ffn_down_shexp.weightCOUNT[3072 5120]
-
blk.34.ffn_gate_shexp.weightCOUNT[5120 3072]
-
blk.34.ffn_up_shexp.weightCOUNT[5120 3072]
-
blk.34.attn_kv_a_norm.weightF32[512]
-
blk.34.attn_kv_a_mqa.weightCOUNT[5120 576]
-
blk.34.attn_kv_b.weightCOUNT[512 32768]
-
blk.34.attn_output.weightI8[16384 5120]
-
blk.34.attn_q_a_norm.weightF32[1536]
-
blk.34.attn_q_a.weightCOUNT[5120 1536]
-
blk.34.attn_q_b.weightCOUNT[1536 24576]
-
blk.34.attn_norm.weightF32[5120]
-
blk.34.ffn_down_exps.weightCOUNT[1536 5120 160]
-
blk.34.ffn_gate_exps.weightCOUNT[5120 1536 160]
-
blk.34.ffn_up_exps.weightCOUNT[5120 1536 160]
-
blk.34.ffn_norm.weightF32[5120]
-
blk.35.ffn_gate_inp.weightF32[5120 160]
-
blk.35.ffn_down_shexp.weightCOUNT[3072 5120]
-
blk.35.ffn_gate_shexp.weightCOUNT[5120 3072]
-
blk.35.ffn_up_shexp.weightCOUNT[5120 3072]
-
blk.35.attn_kv_a_norm.weightF32[512]
-
blk.35.attn_kv_a_mqa.weightCOUNT[5120 576]
-
blk.35.attn_kv_b.weightCOUNT[512 32768]
-
blk.35.attn_output.weightI8[16384 5120]
-
blk.35.attn_q_a_norm.weightF32[1536]
-
blk.35.attn_q_a.weightCOUNT[5120 1536]
-
blk.35.attn_q_b.weightCOUNT[1536 24576]
-
blk.35.attn_norm.weightF32[5120]
-
blk.35.ffn_down_exps.weightCOUNT[1536 5120 160]
-
blk.35.ffn_gate_exps.weightCOUNT[5120 1536 160]
-
blk.35.ffn_up_exps.weightCOUNT[5120 1536 160]
-
blk.35.ffn_norm.weightF32[5120]
-
blk.36.ffn_gate_inp.weightF32[5120 160]
-
blk.36.ffn_down_shexp.weightCOUNT[3072 5120]
-
blk.36.ffn_gate_shexp.weightCOUNT[5120 3072]
-
blk.36.ffn_up_shexp.weightCOUNT[5120 3072]
-
blk.36.attn_kv_a_norm.weightF32[512]
-
blk.36.attn_kv_a_mqa.weightCOUNT[5120 576]
-
blk.36.attn_kv_b.weightCOUNT[512 32768]
-
blk.36.attn_output.weightI8[16384 5120]
-
blk.36.attn_q_a_norm.weightF32[1536]
-
blk.36.attn_q_a.weightCOUNT[5120 1536]
-
blk.36.attn_q_b.weightCOUNT[1536 24576]
-
blk.36.attn_norm.weightF32[5120]
-
blk.36.ffn_down_exps.weightCOUNT[1536 5120 160]
-
blk.36.ffn_gate_exps.weightCOUNT[5120 1536 160]
-
blk.36.ffn_up_exps.weightCOUNT[5120 1536 160]
-
blk.36.ffn_norm.weightF32[5120]
-
blk.37.ffn_gate_inp.weightF32[5120 160]
-
blk.37.ffn_down_shexp.weightCOUNT[3072 5120]
-
blk.37.ffn_gate_shexp.weightCOUNT[5120 3072]
-
blk.37.ffn_up_shexp.weightCOUNT[5120 3072]
-
blk.37.attn_kv_a_norm.weightF32[512]
-
blk.37.attn_kv_a_mqa.weightCOUNT[5120 576]
-
blk.37.attn_kv_b.weightCOUNT[512 32768]
-
blk.37.attn_output.weightI8[16384 5120]
-
blk.37.attn_q_a_norm.weightF32[1536]
-
blk.37.attn_q_a.weightCOUNT[5120 1536]
-
blk.37.attn_q_b.weightCOUNT[1536 24576]
-
blk.37.attn_norm.weightF32[5120]
-
blk.37.ffn_down_exps.weightCOUNT[1536 5120 160]
-
blk.37.ffn_gate_exps.weightCOUNT[5120 1536 160]
-
blk.37.ffn_up_exps.weightCOUNT[5120 1536 160]
-
blk.37.ffn_norm.weightF32[5120]
-
blk.38.ffn_gate_inp.weightF32[5120 160]
-
blk.38.ffn_down_shexp.weightCOUNT[3072 5120]
-
blk.38.ffn_gate_shexp.weightCOUNT[5120 3072]
-
blk.38.ffn_up_shexp.weightCOUNT[5120 3072]
-
blk.38.attn_kv_a_norm.weightF32[512]
-
blk.38.attn_kv_a_mqa.weightCOUNT[5120 576]
-
blk.38.attn_kv_b.weightCOUNT[512 32768]
-
blk.38.attn_output.weightI8[16384 5120]
-
blk.38.attn_q_a_norm.weightF32[1536]
-
blk.38.attn_q_a.weightCOUNT[5120 1536]
-
blk.38.attn_q_b.weightCOUNT[1536 24576]
-
blk.38.attn_norm.weightF32[5120]
-
blk.38.ffn_down_exps.weightCOUNT[1536 5120 160]
-
blk.38.ffn_gate_exps.weightCOUNT[5120 1536 160]
-
blk.38.ffn_up_exps.weightCOUNT[5120 1536 160]
-
blk.38.ffn_norm.weightF32[5120]
-
blk.39.ffn_gate_inp.weightF32[5120 160]
-
blk.39.ffn_down_shexp.weightCOUNT[3072 5120]
-
blk.39.ffn_gate_shexp.weightCOUNT[5120 3072]
-
blk.39.ffn_up_shexp.weightCOUNT[5120 3072]
-
blk.39.attn_kv_a_norm.weightF32[512]
-
blk.39.attn_kv_a_mqa.weightCOUNT[5120 576]
-
blk.39.attn_kv_b.weightCOUNT[512 32768]
-
blk.39.attn_output.weightI8[16384 5120]
-
blk.39.attn_q_a_norm.weightF32[1536]
-
blk.39.attn_q_a.weightCOUNT[5120 1536]
-
blk.39.attn_q_b.weightCOUNT[1536 24576]
-
blk.39.attn_norm.weightF32[5120]
-
blk.39.ffn_down_exps.weightCOUNT[1536 5120 160]
-
blk.39.ffn_gate_exps.weightCOUNT[5120 1536 160]
-
blk.39.ffn_up_exps.weightCOUNT[5120 1536 160]
-
blk.39.ffn_norm.weightF32[5120]
-
blk.40.ffn_gate_inp.weightF32[5120 160]
-
blk.40.ffn_down_shexp.weightCOUNT[3072 5120]
-
blk.40.ffn_gate_shexp.weightCOUNT[5120 3072]
-
blk.40.ffn_up_shexp.weightCOUNT[5120 3072]
-
blk.40.attn_kv_a_norm.weightF32[512]
-
blk.40.attn_kv_a_mqa.weightCOUNT[5120 576]
-
blk.40.attn_kv_b.weightCOUNT[512 32768]
-
blk.40.attn_output.weightI8[16384 5120]
-
blk.40.attn_q_a_norm.weightF32[1536]
-
blk.40.attn_q_a.weightCOUNT[5120 1536]
-
blk.40.attn_q_b.weightCOUNT[1536 24576]
-
blk.40.attn_norm.weightF32[5120]
-
blk.40.ffn_down_exps.weightCOUNT[1536 5120 160]
-
blk.40.ffn_gate_exps.weightCOUNT[5120 1536 160]
-
blk.40.ffn_up_exps.weightCOUNT[5120 1536 160]
-
blk.40.ffn_norm.weightF32[5120]
-
blk.41.ffn_gate_inp.weightF32[5120 160]
-
blk.41.ffn_down_shexp.weightCOUNT[3072 5120]
-
blk.41.ffn_gate_shexp.weightCOUNT[5120 3072]
-
blk.41.ffn_up_shexp.weightCOUNT[5120 3072]
-
blk.41.attn_kv_a_norm.weightF32[512]
-
blk.41.attn_kv_a_mqa.weightCOUNT[5120 576]
-
blk.41.attn_kv_b.weightCOUNT[512 32768]
-
blk.41.attn_output.weightI8[16384 5120]
-
blk.41.attn_q_a_norm.weightF32[1536]
-
blk.41.attn_q_a.weightCOUNT[5120 1536]
-
blk.41.attn_q_b.weightCOUNT[1536 24576]
-
blk.41.attn_norm.weightF32[5120]
-
blk.41.ffn_down_exps.weightCOUNT[1536 5120 160]
-
blk.41.ffn_gate_exps.weightCOUNT[5120 1536 160]
-
blk.41.ffn_up_exps.weightCOUNT[5120 1536 160]
-
blk.41.ffn_norm.weightF32[5120]
-
blk.42.ffn_gate_inp.weightF32[5120 160]
-
blk.42.ffn_down_shexp.weightCOUNT[3072 5120]
-
blk.42.ffn_gate_shexp.weightCOUNT[5120 3072]
-
blk.42.ffn_up_shexp.weightCOUNT[5120 3072]
-
blk.42.attn_kv_a_norm.weightF32[512]
-
blk.42.attn_kv_a_mqa.weightCOUNT[5120 576]
-
blk.42.attn_kv_b.weightCOUNT[512 32768]
-
blk.42.attn_output.weightI8[16384 5120]
-
blk.42.attn_q_a_norm.weightF32[1536]
-
blk.42.attn_q_a.weightCOUNT[5120 1536]
-
blk.42.attn_q_b.weightCOUNT[1536 24576]
-
blk.42.attn_norm.weightF32[5120]
-
blk.42.ffn_down_exps.weightCOUNT[1536 5120 160]
-
blk.42.ffn_gate_exps.weightCOUNT[5120 1536 160]
-
blk.42.ffn_up_exps.weightCOUNT[5120 1536 160]
-
blk.42.ffn_norm.weightF32[5120]
-
blk.43.ffn_gate_inp.weightF32[5120 160]
-
blk.43.ffn_down_shexp.weightCOUNT[3072 5120]
-
blk.43.ffn_gate_shexp.weightCOUNT[5120 3072]
-
blk.43.ffn_up_shexp.weightCOUNT[5120 3072]
-
blk.43.attn_kv_a_norm.weightF32[512]
-
blk.43.attn_kv_a_mqa.weightCOUNT[5120 576]
-
blk.43.attn_kv_b.weightCOUNT[512 32768]
-
blk.43.attn_output.weightI8[16384 5120]
-
blk.43.attn_q_a_norm.weightF32[1536]
-
blk.43.attn_q_a.weightCOUNT[5120 1536]
-
blk.43.attn_q_b.weightCOUNT[1536 24576]
-
blk.43.attn_norm.weightF32[5120]
-
blk.43.ffn_down_exps.weightCOUNT[1536 5120 160]
-
blk.43.ffn_gate_exps.weightCOUNT[5120 1536 160]
-
blk.43.ffn_up_exps.weightCOUNT[5120 1536 160]
-
blk.43.ffn_norm.weightF32[5120]
-
blk.44.attn_norm.weightF32[5120]
-
blk.44.ffn_down_exps.weightCOUNT[1536 5120 160]
-
blk.44.ffn_gate_exps.weightCOUNT[5120 1536 160]
-
blk.44.ffn_up_exps.weightCOUNT[5120 1536 160]
-
blk.44.ffn_gate_inp.weightF32[5120 160]
-
blk.44.ffn_down_shexp.weightCOUNT[3072 5120]
-
blk.44.ffn_gate_shexp.weightCOUNT[5120 3072]
-
blk.44.ffn_up_shexp.weightCOUNT[5120 3072]
-
blk.44.ffn_norm.weightF32[5120]
-
blk.44.attn_kv_a_norm.weightF32[512]
-
blk.44.attn_kv_a_mqa.weightCOUNT[5120 576]
-
blk.44.attn_kv_b.weightCOUNT[512 32768]
-
blk.44.attn_output.weightI8[16384 5120]
-
blk.44.attn_q_a_norm.weightF32[1536]
-
blk.44.attn_q_a.weightCOUNT[5120 1536]
-
blk.44.attn_q_b.weightCOUNT[1536 24576]
-
blk.45.ffn_gate_inp.weightF32[5120 160]
-
blk.45.ffn_down_shexp.weightCOUNT[3072 5120]
-
blk.45.ffn_gate_shexp.weightCOUNT[5120 3072]
-
blk.45.ffn_up_shexp.weightCOUNT[5120 3072]
-
blk.45.attn_kv_a_norm.weightF32[512]
-
blk.45.attn_kv_a_mqa.weightCOUNT[5120 576]
-
blk.45.attn_kv_b.weightCOUNT[512 32768]
-
blk.45.attn_output.weightI8[16384 5120]
-
blk.45.attn_q_a_norm.weightF32[1536]
-
blk.45.attn_q_a.weightCOUNT[5120 1536]
-
blk.45.attn_q_b.weightCOUNT[1536 24576]
-
blk.45.attn_norm.weightF32[5120]
-
blk.45.ffn_down_exps.weightCOUNT[1536 5120 160]
-
blk.45.ffn_gate_exps.weightCOUNT[5120 1536 160]
-
blk.45.ffn_up_exps.weightCOUNT[5120 1536 160]
-
blk.45.ffn_norm.weightF32[5120]
-
blk.46.ffn_gate_inp.weightF32[5120 160]
-
blk.46.ffn_down_shexp.weightCOUNT[3072 5120]
-
blk.46.ffn_gate_shexp.weightCOUNT[5120 3072]
-
blk.46.ffn_up_shexp.weightCOUNT[5120 3072]
-
blk.46.attn_kv_a_norm.weightF32[512]
-
blk.46.attn_kv_a_mqa.weightCOUNT[5120 576]
-
blk.46.attn_kv_b.weightCOUNT[512 32768]
-
blk.46.attn_output.weightI8[16384 5120]
-
blk.46.attn_q_a_norm.weightF32[1536]
-
blk.46.attn_q_a.weightCOUNT[5120 1536]
-
blk.46.attn_q_b.weightCOUNT[1536 24576]
-
blk.46.attn_norm.weightF32[5120]
-
blk.46.ffn_down_exps.weightCOUNT[1536 5120 160]
-
blk.46.ffn_gate_exps.weightCOUNT[5120 1536 160]
-
blk.46.ffn_up_exps.weightCOUNT[5120 1536 160]
-
blk.46.ffn_norm.weightF32[5120]
-
blk.47.ffn_gate_inp.weightF32[5120 160]
-
blk.47.ffn_down_shexp.weightCOUNT[3072 5120]
-
blk.47.ffn_gate_shexp.weightCOUNT[5120 3072]
-
blk.47.ffn_up_shexp.weightCOUNT[5120 3072]
-
blk.47.attn_kv_a_norm.weightF32[512]
-
blk.47.attn_kv_a_mqa.weightCOUNT[5120 576]
-
blk.47.attn_kv_b.weightCOUNT[512 32768]
-
blk.47.attn_output.weightI8[16384 5120]
-
blk.47.attn_q_a_norm.weightF32[1536]
-
blk.47.attn_q_a.weightCOUNT[5120 1536]
-
blk.47.attn_q_b.weightCOUNT[1536 24576]
-
blk.47.attn_norm.weightF32[5120]
-
blk.47.ffn_down_exps.weightCOUNT[1536 5120 160]
-
blk.47.ffn_gate_exps.weightCOUNT[5120 1536 160]
-
blk.47.ffn_up_exps.weightCOUNT[5120 1536 160]
-
blk.47.ffn_norm.weightF32[5120]
-
blk.48.ffn_gate_inp.weightF32[5120 160]
-
blk.48.ffn_down_shexp.weightCOUNT[3072 5120]
-
blk.48.ffn_gate_shexp.weightCOUNT[5120 3072]
-
blk.48.ffn_up_shexp.weightCOUNT[5120 3072]
-
blk.48.attn_kv_a_norm.weightF32[512]
-
blk.48.attn_kv_a_mqa.weightCOUNT[5120 576]
-
blk.48.attn_kv_b.weightCOUNT[512 32768]
-
blk.48.attn_output.weightI8[16384 5120]
-
blk.48.attn_q_a_norm.weightF32[1536]
-
blk.48.attn_q_a.weightCOUNT[5120 1536]
-
blk.48.attn_q_b.weightCOUNT[1536 24576]
-
blk.48.attn_norm.weightF32[5120]
-
blk.48.ffn_down_exps.weightCOUNT[1536 5120 160]
-
blk.48.ffn_gate_exps.weightCOUNT[5120 1536 160]
-
blk.48.ffn_up_exps.weightCOUNT[5120 1536 160]
-
blk.48.ffn_norm.weightF32[5120]
-
blk.49.ffn_gate_inp.weightF32[5120 160]
-
blk.49.ffn_down_shexp.weightCOUNT[3072 5120]
-
blk.49.ffn_gate_shexp.weightCOUNT[5120 3072]
-
blk.49.ffn_up_shexp.weightCOUNT[5120 3072]
-
blk.49.attn_kv_a_norm.weightF32[512]
-
blk.49.attn_kv_a_mqa.weightCOUNT[5120 576]
-
blk.49.attn_kv_b.weightCOUNT[512 32768]
-
blk.49.attn_output.weightI8[16384 5120]
-
blk.49.attn_q_a_norm.weightF32[1536]
-
blk.49.attn_q_a.weightCOUNT[5120 1536]
-
blk.49.attn_q_b.weightCOUNT[1536 24576]
-
blk.49.attn_norm.weightF32[5120]
-
blk.49.ffn_down_exps.weightCOUNT[1536 5120 160]
-
blk.49.ffn_gate_exps.weightCOUNT[5120 1536 160]
-
blk.49.ffn_up_exps.weightCOUNT[5120 1536 160]
-
blk.49.ffn_norm.weightF32[5120]
-
blk.50.ffn_gate_inp.weightF32[5120 160]
-
blk.50.ffn_down_shexp.weightCOUNT[3072 5120]
-
blk.50.ffn_gate_shexp.weightCOUNT[5120 3072]
-
blk.50.ffn_up_shexp.weightCOUNT[5120 3072]
-
blk.50.attn_kv_a_norm.weightF32[512]
-
blk.50.attn_kv_a_mqa.weightCOUNT[5120 576]
-
blk.50.attn_kv_b.weightCOUNT[512 32768]
-
blk.50.attn_output.weightI8[16384 5120]
-
blk.50.attn_q_a_norm.weightF32[1536]
-
blk.50.attn_q_a.weightCOUNT[5120 1536]
-
blk.50.attn_q_b.weightCOUNT[1536 24576]
-
blk.50.attn_norm.weightF32[5120]
-
blk.50.ffn_down_exps.weightCOUNT[1536 5120 160]
-
blk.50.ffn_gate_exps.weightCOUNT[5120 1536 160]
-
blk.50.ffn_up_exps.weightCOUNT[5120 1536 160]
-
blk.50.ffn_norm.weightF32[5120]
-
blk.51.ffn_gate_inp.weightF32[5120 160]
-
blk.51.ffn_down_shexp.weightCOUNT[3072 5120]
-
blk.51.ffn_gate_shexp.weightCOUNT[5120 3072]
-
blk.51.ffn_up_shexp.weightCOUNT[5120 3072]
-
blk.51.attn_kv_a_norm.weightF32[512]
-
blk.51.attn_kv_a_mqa.weightCOUNT[5120 576]
-
blk.51.attn_kv_b.weightCOUNT[512 32768]
-
blk.51.attn_output.weightI8[16384 5120]
-
blk.51.attn_q_a_norm.weightF32[1536]
-
blk.51.attn_q_a.weightCOUNT[5120 1536]
-
blk.51.attn_q_b.weightCOUNT[1536 24576]
-
blk.51.attn_norm.weightF32[5120]
-
blk.51.ffn_down_exps.weightCOUNT[1536 5120 160]
-
blk.51.ffn_gate_exps.weightCOUNT[5120 1536 160]
-
blk.51.ffn_up_exps.weightCOUNT[5120 1536 160]
-
blk.51.ffn_norm.weightF32[5120]
-
blk.52.ffn_gate_inp.weightF32[5120 160]
-
blk.52.ffn_down_shexp.weightCOUNT[3072 5120]
-
blk.52.ffn_gate_shexp.weightCOUNT[5120 3072]
-
blk.52.ffn_up_shexp.weightCOUNT[5120 3072]
-
blk.52.attn_kv_a_norm.weightF32[512]
-
blk.52.attn_kv_a_mqa.weightCOUNT[5120 576]
-
blk.52.attn_kv_b.weightCOUNT[512 32768]
-
blk.52.attn_output.weightI8[16384 5120]
-
blk.52.attn_q_a_norm.weightF32[1536]
-
blk.52.attn_q_a.weightCOUNT[5120 1536]
-
blk.52.attn_q_b.weightCOUNT[1536 24576]
-
blk.52.attn_norm.weightF32[5120]
-
blk.52.ffn_down_exps.weightCOUNT[1536 5120 160]
-
blk.52.ffn_gate_exps.weightCOUNT[5120 1536 160]
-
blk.52.ffn_up_exps.weightCOUNT[5120 1536 160]
-
blk.52.ffn_norm.weightF32[5120]
-
blk.53.ffn_gate_inp.weightF32[5120 160]
-
blk.53.ffn_down_shexp.weightCOUNT[3072 5120]
-
blk.53.ffn_gate_shexp.weightCOUNT[5120 3072]
-
blk.53.ffn_up_shexp.weightCOUNT[5120 3072]
-
blk.53.attn_kv_a_norm.weightF32[512]
-
blk.53.attn_kv_a_mqa.weightCOUNT[5120 576]
-
blk.53.attn_kv_b.weightCOUNT[512 32768]
-
blk.53.attn_output.weightI8[16384 5120]
-
blk.53.attn_q_a_norm.weightF32[1536]
-
blk.53.attn_q_a.weightCOUNT[5120 1536]
-
blk.53.attn_q_b.weightCOUNT[1536 24576]
-
blk.53.attn_norm.weightF32[5120]
-
blk.53.ffn_down_exps.weightCOUNT[1536 5120 160]
-
blk.53.ffn_gate_exps.weightCOUNT[5120 1536 160]
-
blk.53.ffn_up_exps.weightCOUNT[5120 1536 160]
-
blk.53.ffn_norm.weightF32[5120]
-
blk.54.ffn_gate_inp.weightF32[5120 160]
-
blk.54.ffn_down_shexp.weightCOUNT[3072 5120]
-
blk.54.ffn_gate_shexp.weightCOUNT[5120 3072]
-
blk.54.ffn_up_shexp.weightCOUNT[5120 3072]
-
blk.54.attn_kv_a_norm.weightF32[512]
-
blk.54.attn_kv_a_mqa.weightCOUNT[5120 576]
-
blk.54.attn_kv_b.weightCOUNT[512 32768]
-
blk.54.attn_output.weightI8[16384 5120]
-
blk.54.attn_q_a_norm.weightF32[1536]
-
blk.54.attn_q_a.weightCOUNT[5120 1536]
-
blk.54.attn_q_b.weightCOUNT[1536 24576]
-
blk.54.attn_norm.weightF32[5120]
-
blk.54.ffn_down_exps.weightCOUNT[1536 5120 160]
-
blk.54.ffn_gate_exps.weightCOUNT[5120 1536 160]
-
blk.54.ffn_up_exps.weightCOUNT[5120 1536 160]
-
blk.54.ffn_norm.weightF32[5120]
-
blk.55.ffn_gate_inp.weightF32[5120 160]
-
blk.55.ffn_down_shexp.weightCOUNT[3072 5120]
-
blk.55.ffn_gate_shexp.weightCOUNT[5120 3072]
-
blk.55.ffn_up_shexp.weightCOUNT[5120 3072]
-
blk.55.attn_kv_a_norm.weightF32[512]
-
blk.55.attn_kv_a_mqa.weightCOUNT[5120 576]
-
blk.55.attn_kv_b.weightCOUNT[512 32768]
-
blk.55.attn_output.weightI8[16384 5120]
-
blk.55.attn_q_a_norm.weightF32[1536]
-
blk.55.attn_q_a.weightCOUNT[5120 1536]
-
blk.55.attn_q_b.weightCOUNT[1536 24576]
-
blk.55.attn_norm.weightF32[5120]
-
blk.55.ffn_down_exps.weightCOUNT[1536 5120 160]
-
blk.55.ffn_gate_exps.weightCOUNT[5120 1536 160]
-
blk.55.ffn_up_exps.weightCOUNT[5120 1536 160]
-
blk.55.ffn_norm.weightF32[5120]
-
blk.56.ffn_gate_inp.weightF32[5120 160]
-
blk.56.ffn_down_shexp.weightCOUNT[3072 5120]
-
blk.56.ffn_gate_shexp.weightCOUNT[5120 3072]
-
blk.56.ffn_up_shexp.weightCOUNT[5120 3072]
-
blk.56.attn_kv_a_norm.weightF32[512]
-
blk.56.attn_kv_a_mqa.weightCOUNT[5120 576]
-
blk.56.attn_kv_b.weightCOUNT[512 32768]
-
blk.56.attn_output.weightI8[16384 5120]
-
blk.56.attn_q_a_norm.weightF32[1536]
-
blk.56.attn_q_a.weightCOUNT[5120 1536]
-
blk.56.attn_q_b.weightCOUNT[1536 24576]
-
blk.56.attn_norm.weightF32[5120]
-
blk.56.ffn_down_exps.weightCOUNT[1536 5120 160]
-
blk.56.ffn_gate_exps.weightCOUNT[5120 1536 160]
-
blk.56.ffn_up_exps.weightCOUNT[5120 1536 160]
-
blk.56.ffn_norm.weightF32[5120]
-
blk.57.attn_norm.weightF32[5120]
-
blk.57.ffn_down_exps.weightCOUNT[1536 5120 160]
-
blk.57.ffn_gate_exps.weightCOUNT[5120 1536 160]
-
blk.57.ffn_up_exps.weightCOUNT[5120 1536 160]
-
blk.57.ffn_gate_inp.weightF32[5120 160]
-
blk.57.ffn_down_shexp.weightCOUNT[3072 5120]
-
blk.57.ffn_gate_shexp.weightCOUNT[5120 3072]
-
blk.57.ffn_up_shexp.weightCOUNT[5120 3072]
-
blk.57.ffn_norm.weightF32[5120]
-
blk.57.attn_kv_a_norm.weightF32[512]
-
blk.57.attn_kv_a_mqa.weightCOUNT[5120 576]
-
blk.57.attn_kv_b.weightCOUNT[512 32768]
-
blk.57.attn_output.weightI8[16384 5120]
-
blk.57.attn_q_a_norm.weightF32[1536]
-
blk.57.attn_q_a.weightCOUNT[5120 1536]
-
blk.57.attn_q_b.weightCOUNT[1536 24576]
-
blk.58.ffn_gate_inp.weightF32[5120 160]
-
blk.58.ffn_down_shexp.weightCOUNT[3072 5120]
-
blk.58.ffn_gate_shexp.weightCOUNT[5120 3072]
-
blk.58.ffn_up_shexp.weightCOUNT[5120 3072]
-
blk.58.attn_kv_a_norm.weightF32[512]
-
blk.58.attn_kv_a_mqa.weightCOUNT[5120 576]
-
blk.58.attn_kv_b.weightCOUNT[512 32768]
-
blk.58.attn_output.weightI8[16384 5120]
-
blk.58.attn_q_a_norm.weightF32[1536]
-
blk.58.attn_q_a.weightCOUNT[5120 1536]
-
blk.58.attn_q_b.weightCOUNT[1536 24576]
-
blk.58.attn_norm.weightF32[5120]
-
blk.58.ffn_down_exps.weightCOUNT[1536 5120 160]
-
blk.58.ffn_gate_exps.weightCOUNT[5120 1536 160]
-
blk.58.ffn_up_exps.weightCOUNT[5120 1536 160]
-
blk.58.ffn_norm.weightF32[5120]
-
blk.59.ffn_gate_inp.weightF32[5120 160]
-
blk.59.ffn_down_shexp.weightCOUNT[3072 5120]
-
blk.59.ffn_gate_shexp.weightCOUNT[5120 3072]
-
blk.59.ffn_up_shexp.weightCOUNT[5120 3072]
-
blk.59.attn_kv_a_norm.weightF32[512]
-
blk.59.attn_kv_a_mqa.weightCOUNT[5120 576]
-
blk.59.attn_kv_b.weightCOUNT[512 32768]
-
blk.59.attn_output.weightI8[16384 5120]
-
blk.59.attn_q_a_norm.weightF32[1536]
-
blk.59.attn_q_a.weightCOUNT[5120 1536]
-
blk.59.attn_q_b.weightCOUNT[1536 24576]
-
blk.59.attn_norm.weightF32[5120]
-
blk.59.ffn_down_exps.weightCOUNT[1536 5120 160]
-
blk.59.ffn_gate_exps.weightCOUNT[5120 1536 160]
-
blk.59.ffn_up_exps.weightCOUNT[5120 1536 160]
-
blk.59.ffn_norm.weightF32[5120]
-
output.weightQ5_K[5120 102400]
-
output_norm.weightF32[5120]
Metadata
Tensors
blk.0
blk.1
blk.2
blk.3
blk.4
blk.5
blk.6
blk.7
blk.8
blk.9
blk.10
blk.11
blk.12
blk.13
blk.14
blk.15
blk.16
blk.17
blk.18
blk.19
blk.20
blk.21
blk.22
blk.23
blk.24
blk.25
blk.26
blk.27
blk.28
blk.29
blk.30
blk.31
blk.32
blk.33
blk.34
blk.35
blk.36
blk.37
blk.38
blk.39
blk.40
blk.41
blk.42
blk.43
blk.44
blk.45
blk.46
blk.47
blk.48
blk.49
blk.50
blk.51
blk.52
blk.53
blk.54
blk.55
blk.56
blk.57
blk.58
blk.59