Low-latency fill-in-the-middle
79 Pulls Updated 6 months ago
2298c7e2ad48 · 1.7GB
-
general.architecturerefact
-
general.file_typeQ8_0
-
refact.attention.head_count32
-
refact.attention.head_count_kv1
-
refact.attention.layer_norm_rms_epsilon1e-05
-
refact.block_count32
-
refact.context_length4096
-
refact.embedding_length2048
-
refact.feed_forward_length5632
-
tokenizer.ggml.bos_token_id0
-
tokenizer.ggml.eos_token_id0
-
tokenizer.ggml.merges[Ġ Ġ ĠĠ ĠĠ ĠĠĠĠ ĠĠĠĠ ĠĠ Ġ e r ...]
-
tokenizer.ggml.modelgpt2
-
tokenizer.ggml.token_type[3 3 3 3 3 ...]
-
tokenizer.ggml.tokens[<|endoftext|> <fim_prefix> <fim_middle> <fim_suffix> <fim_pad> ...]
-
tokenizer.ggml.unknown_token_id0
-
NameTypeShape
-
token_embd.weightQ8_0[2048 49216]
-
blk.0.attn_norm.weightF32[2048]
-
blk.0.attn_output.weightQ8_0[2048 2048]
-
blk.0.ffn_norm.weightF32[2048]
-
blk.0.ffn_down.weightQ8_0[5632 2048]
-
blk.0.attn_k.weightQ8_0[2048 64]
-
blk.0.attn_v.weightQ8_0[2048 64]
-
blk.0.attn_q.weightQ8_0[2048 2048]
-
blk.0.ffn_gate.weightQ8_0[2048 5632]
-
blk.0.ffn_up.weightQ8_0[2048 5632]
-
blk.1.attn_norm.weightF32[2048]
-
blk.1.attn_output.weightQ8_0[2048 2048]
-
blk.1.ffn_norm.weightF32[2048]
-
blk.1.ffn_down.weightQ8_0[5632 2048]
-
blk.1.attn_k.weightQ8_0[2048 64]
-
blk.1.attn_v.weightQ8_0[2048 64]
-
blk.1.attn_q.weightQ8_0[2048 2048]
-
blk.1.ffn_gate.weightQ8_0[2048 5632]
-
blk.1.ffn_up.weightQ8_0[2048 5632]
-
blk.2.attn_norm.weightF32[2048]
-
blk.2.attn_output.weightQ8_0[2048 2048]
-
blk.2.ffn_norm.weightF32[2048]
-
blk.2.ffn_down.weightQ8_0[5632 2048]
-
blk.2.attn_k.weightQ8_0[2048 64]
-
blk.2.attn_v.weightQ8_0[2048 64]
-
blk.2.attn_q.weightQ8_0[2048 2048]
-
blk.2.ffn_gate.weightQ8_0[2048 5632]
-
blk.2.ffn_up.weightQ8_0[2048 5632]
-
blk.3.attn_norm.weightF32[2048]
-
blk.3.attn_output.weightQ8_0[2048 2048]
-
blk.3.ffn_norm.weightF32[2048]
-
blk.3.ffn_down.weightQ8_0[5632 2048]
-
blk.3.attn_k.weightQ8_0[2048 64]
-
blk.3.attn_v.weightQ8_0[2048 64]
-
blk.3.attn_q.weightQ8_0[2048 2048]
-
blk.3.ffn_gate.weightQ8_0[2048 5632]
-
blk.3.ffn_up.weightQ8_0[2048 5632]
-
blk.4.attn_norm.weightF32[2048]
-
blk.4.attn_output.weightQ8_0[2048 2048]
-
blk.4.ffn_norm.weightF32[2048]
-
blk.4.ffn_down.weightQ8_0[5632 2048]
-
blk.4.attn_k.weightQ8_0[2048 64]
-
blk.4.attn_v.weightQ8_0[2048 64]
-
blk.4.attn_q.weightQ8_0[2048 2048]
-
blk.4.ffn_gate.weightQ8_0[2048 5632]
-
blk.4.ffn_up.weightQ8_0[2048 5632]
-
blk.5.attn_norm.weightF32[2048]
-
blk.5.attn_output.weightQ8_0[2048 2048]
-
blk.5.ffn_norm.weightF32[2048]
-
blk.5.ffn_down.weightQ8_0[5632 2048]
-
blk.5.attn_k.weightQ8_0[2048 64]
-
blk.5.attn_v.weightQ8_0[2048 64]
-
blk.5.attn_q.weightQ8_0[2048 2048]
-
blk.5.ffn_gate.weightQ8_0[2048 5632]
-
blk.5.ffn_up.weightQ8_0[2048 5632]
-
blk.6.attn_norm.weightF32[2048]
-
blk.6.attn_output.weightQ8_0[2048 2048]
-
blk.6.ffn_norm.weightF32[2048]
-
blk.6.ffn_down.weightQ8_0[5632 2048]
-
blk.6.attn_k.weightQ8_0[2048 64]
-
blk.6.attn_v.weightQ8_0[2048 64]
-
blk.6.attn_q.weightQ8_0[2048 2048]
-
blk.6.ffn_gate.weightQ8_0[2048 5632]
-
blk.6.ffn_up.weightQ8_0[2048 5632]
-
blk.7.attn_norm.weightF32[2048]
-
blk.7.attn_output.weightQ8_0[2048 2048]
-
blk.7.ffn_norm.weightF32[2048]
-
blk.7.ffn_down.weightQ8_0[5632 2048]
-
blk.7.attn_k.weightQ8_0[2048 64]
-
blk.7.attn_v.weightQ8_0[2048 64]
-
blk.7.attn_q.weightQ8_0[2048 2048]
-
blk.7.ffn_gate.weightQ8_0[2048 5632]
-
blk.7.ffn_up.weightQ8_0[2048 5632]
-
blk.8.attn_norm.weightF32[2048]
-
blk.8.attn_output.weightQ8_0[2048 2048]
-
blk.8.ffn_norm.weightF32[2048]
-
blk.8.ffn_down.weightQ8_0[5632 2048]
-
blk.8.attn_k.weightQ8_0[2048 64]
-
blk.8.attn_v.weightQ8_0[2048 64]
-
blk.8.attn_q.weightQ8_0[2048 2048]
-
blk.8.ffn_gate.weightQ8_0[2048 5632]
-
blk.8.ffn_up.weightQ8_0[2048 5632]
-
blk.9.attn_norm.weightF32[2048]
-
blk.9.attn_output.weightQ8_0[2048 2048]
-
blk.9.ffn_norm.weightF32[2048]
-
blk.9.ffn_down.weightQ8_0[5632 2048]
-
blk.9.attn_k.weightQ8_0[2048 64]
-
blk.9.attn_v.weightQ8_0[2048 64]
-
blk.9.attn_q.weightQ8_0[2048 2048]
-
blk.9.ffn_gate.weightQ8_0[2048 5632]
-
blk.9.ffn_up.weightQ8_0[2048 5632]
-
blk.10.attn_norm.weightF32[2048]
-
blk.10.attn_output.weightQ8_0[2048 2048]
-
blk.10.ffn_norm.weightF32[2048]
-
blk.10.ffn_down.weightQ8_0[5632 2048]
-
blk.10.attn_k.weightQ8_0[2048 64]
-
blk.10.attn_v.weightQ8_0[2048 64]
-
blk.10.attn_q.weightQ8_0[2048 2048]
-
blk.10.ffn_gate.weightQ8_0[2048 5632]
-
blk.10.ffn_up.weightQ8_0[2048 5632]
-
blk.11.attn_norm.weightF32[2048]
-
blk.11.attn_output.weightQ8_0[2048 2048]
-
blk.11.ffn_norm.weightF32[2048]
-
blk.11.ffn_down.weightQ8_0[5632 2048]
-
blk.11.attn_k.weightQ8_0[2048 64]
-
blk.11.attn_v.weightQ8_0[2048 64]
-
blk.11.attn_q.weightQ8_0[2048 2048]
-
blk.11.ffn_gate.weightQ8_0[2048 5632]
-
blk.11.ffn_up.weightQ8_0[2048 5632]
-
blk.12.attn_norm.weightF32[2048]
-
blk.12.attn_output.weightQ8_0[2048 2048]
-
blk.12.ffn_norm.weightF32[2048]
-
blk.12.ffn_down.weightQ8_0[5632 2048]
-
blk.12.attn_k.weightQ8_0[2048 64]
-
blk.12.attn_v.weightQ8_0[2048 64]
-
blk.12.attn_q.weightQ8_0[2048 2048]
-
blk.12.ffn_gate.weightQ8_0[2048 5632]
-
blk.12.ffn_up.weightQ8_0[2048 5632]
-
blk.13.attn_norm.weightF32[2048]
-
blk.13.attn_output.weightQ8_0[2048 2048]
-
blk.13.ffn_norm.weightF32[2048]
-
blk.13.ffn_down.weightQ8_0[5632 2048]
-
blk.13.attn_k.weightQ8_0[2048 64]
-
blk.13.attn_v.weightQ8_0[2048 64]
-
blk.13.attn_q.weightQ8_0[2048 2048]
-
blk.13.ffn_gate.weightQ8_0[2048 5632]
-
blk.13.ffn_up.weightQ8_0[2048 5632]
-
blk.14.attn_norm.weightF32[2048]
-
blk.14.attn_output.weightQ8_0[2048 2048]
-
blk.14.ffn_norm.weightF32[2048]
-
blk.14.ffn_down.weightQ8_0[5632 2048]
-
blk.14.attn_k.weightQ8_0[2048 64]
-
blk.14.attn_v.weightQ8_0[2048 64]
-
blk.14.attn_q.weightQ8_0[2048 2048]
-
blk.14.ffn_gate.weightQ8_0[2048 5632]
-
blk.14.ffn_up.weightQ8_0[2048 5632]
-
blk.15.attn_norm.weightF32[2048]
-
blk.15.attn_output.weightQ8_0[2048 2048]
-
blk.15.ffn_norm.weightF32[2048]
-
blk.15.ffn_down.weightQ8_0[5632 2048]
-
blk.15.attn_k.weightQ8_0[2048 64]
-
blk.15.attn_v.weightQ8_0[2048 64]
-
blk.15.attn_q.weightQ8_0[2048 2048]
-
blk.15.ffn_gate.weightQ8_0[2048 5632]
-
blk.15.ffn_up.weightQ8_0[2048 5632]
-
blk.16.attn_norm.weightF32[2048]
-
blk.16.attn_output.weightQ8_0[2048 2048]
-
blk.16.ffn_norm.weightF32[2048]
-
blk.16.ffn_down.weightQ8_0[5632 2048]
-
blk.16.attn_k.weightQ8_0[2048 64]
-
blk.16.attn_v.weightQ8_0[2048 64]
-
blk.16.attn_q.weightQ8_0[2048 2048]
-
blk.16.ffn_gate.weightQ8_0[2048 5632]
-
blk.16.ffn_up.weightQ8_0[2048 5632]
-
blk.17.attn_norm.weightF32[2048]
-
blk.17.attn_output.weightQ8_0[2048 2048]
-
blk.17.ffn_norm.weightF32[2048]
-
blk.17.ffn_down.weightQ8_0[5632 2048]
-
blk.17.attn_k.weightQ8_0[2048 64]
-
blk.17.attn_v.weightQ8_0[2048 64]
-
blk.17.attn_q.weightQ8_0[2048 2048]
-
blk.17.ffn_gate.weightQ8_0[2048 5632]
-
blk.17.ffn_up.weightQ8_0[2048 5632]
-
blk.18.attn_norm.weightF32[2048]
-
blk.18.attn_output.weightQ8_0[2048 2048]
-
blk.18.ffn_norm.weightF32[2048]
-
blk.18.ffn_down.weightQ8_0[5632 2048]
-
blk.18.attn_k.weightQ8_0[2048 64]
-
blk.18.attn_v.weightQ8_0[2048 64]
-
blk.18.attn_q.weightQ8_0[2048 2048]
-
blk.18.ffn_gate.weightQ8_0[2048 5632]
-
blk.18.ffn_up.weightQ8_0[2048 5632]
-
blk.19.attn_norm.weightF32[2048]
-
blk.19.attn_output.weightQ8_0[2048 2048]
-
blk.19.ffn_norm.weightF32[2048]
-
blk.19.ffn_down.weightQ8_0[5632 2048]
-
blk.19.attn_k.weightQ8_0[2048 64]
-
blk.19.attn_v.weightQ8_0[2048 64]
-
blk.19.attn_q.weightQ8_0[2048 2048]
-
blk.19.ffn_gate.weightQ8_0[2048 5632]
-
blk.19.ffn_up.weightQ8_0[2048 5632]
-
blk.20.attn_norm.weightF32[2048]
-
blk.20.attn_output.weightQ8_0[2048 2048]
-
blk.20.ffn_norm.weightF32[2048]
-
blk.20.ffn_down.weightQ8_0[5632 2048]
-
blk.20.attn_k.weightQ8_0[2048 64]
-
blk.20.attn_v.weightQ8_0[2048 64]
-
blk.20.attn_q.weightQ8_0[2048 2048]
-
blk.20.ffn_gate.weightQ8_0[2048 5632]
-
blk.20.ffn_up.weightQ8_0[2048 5632]
-
blk.21.attn_norm.weightF32[2048]
-
blk.21.attn_output.weightQ8_0[2048 2048]
-
blk.21.ffn_norm.weightF32[2048]
-
blk.21.ffn_down.weightQ8_0[5632 2048]
-
blk.21.attn_k.weightQ8_0[2048 64]
-
blk.21.attn_v.weightQ8_0[2048 64]
-
blk.21.attn_q.weightQ8_0[2048 2048]
-
blk.21.ffn_gate.weightQ8_0[2048 5632]
-
blk.21.ffn_up.weightQ8_0[2048 5632]
-
blk.22.attn_norm.weightF32[2048]
-
blk.22.attn_output.weightQ8_0[2048 2048]
-
blk.22.ffn_norm.weightF32[2048]
-
blk.22.ffn_down.weightQ8_0[5632 2048]
-
blk.22.attn_k.weightQ8_0[2048 64]
-
blk.22.attn_v.weightQ8_0[2048 64]
-
blk.22.attn_q.weightQ8_0[2048 2048]
-
blk.22.ffn_gate.weightQ8_0[2048 5632]
-
blk.22.ffn_up.weightQ8_0[2048 5632]
-
blk.23.attn_norm.weightF32[2048]
-
blk.23.attn_output.weightQ8_0[2048 2048]
-
blk.23.ffn_norm.weightF32[2048]
-
blk.23.ffn_down.weightQ8_0[5632 2048]
-
blk.23.attn_k.weightQ8_0[2048 64]
-
blk.23.attn_v.weightQ8_0[2048 64]
-
blk.23.attn_q.weightQ8_0[2048 2048]
-
blk.23.ffn_gate.weightQ8_0[2048 5632]
-
blk.23.ffn_up.weightQ8_0[2048 5632]
-
blk.24.attn_norm.weightF32[2048]
-
blk.24.attn_output.weightQ8_0[2048 2048]
-
blk.24.ffn_norm.weightF32[2048]
-
blk.24.ffn_down.weightQ8_0[5632 2048]
-
blk.24.attn_k.weightQ8_0[2048 64]
-
blk.24.attn_v.weightQ8_0[2048 64]
-
blk.24.attn_q.weightQ8_0[2048 2048]
-
blk.24.ffn_gate.weightQ8_0[2048 5632]
-
blk.24.ffn_up.weightQ8_0[2048 5632]
-
blk.25.attn_norm.weightF32[2048]
-
blk.25.attn_output.weightQ8_0[2048 2048]
-
blk.25.ffn_norm.weightF32[2048]
-
blk.25.ffn_down.weightQ8_0[5632 2048]
-
blk.25.attn_k.weightQ8_0[2048 64]
-
blk.25.attn_v.weightQ8_0[2048 64]
-
blk.25.attn_q.weightQ8_0[2048 2048]
-
blk.25.ffn_gate.weightQ8_0[2048 5632]
-
blk.25.ffn_up.weightQ8_0[2048 5632]
-
blk.26.attn_norm.weightF32[2048]
-
blk.26.attn_output.weightQ8_0[2048 2048]
-
blk.26.ffn_norm.weightF32[2048]
-
blk.26.ffn_down.weightQ8_0[5632 2048]
-
blk.26.attn_k.weightQ8_0[2048 64]
-
blk.26.attn_v.weightQ8_0[2048 64]
-
blk.26.attn_q.weightQ8_0[2048 2048]
-
blk.26.ffn_gate.weightQ8_0[2048 5632]
-
blk.26.ffn_up.weightQ8_0[2048 5632]
-
blk.27.attn_norm.weightF32[2048]
-
blk.27.attn_output.weightQ8_0[2048 2048]
-
blk.27.ffn_norm.weightF32[2048]
-
blk.27.ffn_down.weightQ8_0[5632 2048]
-
blk.27.attn_k.weightQ8_0[2048 64]
-
blk.27.attn_v.weightQ8_0[2048 64]
-
blk.27.attn_q.weightQ8_0[2048 2048]
-
blk.27.ffn_gate.weightQ8_0[2048 5632]
-
blk.27.ffn_up.weightQ8_0[2048 5632]
-
blk.28.attn_norm.weightF32[2048]
-
blk.28.attn_output.weightQ8_0[2048 2048]
-
blk.28.ffn_norm.weightF32[2048]
-
blk.28.ffn_down.weightQ8_0[5632 2048]
-
blk.28.attn_k.weightQ8_0[2048 64]
-
blk.28.attn_v.weightQ8_0[2048 64]
-
blk.28.attn_q.weightQ8_0[2048 2048]
-
blk.28.ffn_gate.weightQ8_0[2048 5632]
-
blk.28.ffn_up.weightQ8_0[2048 5632]
-
blk.29.attn_norm.weightF32[2048]
-
blk.29.attn_output.weightQ8_0[2048 2048]
-
blk.29.ffn_norm.weightF32[2048]
-
blk.29.ffn_down.weightQ8_0[5632 2048]
-
blk.29.attn_k.weightQ8_0[2048 64]
-
blk.29.attn_v.weightQ8_0[2048 64]
-
blk.29.attn_q.weightQ8_0[2048 2048]
-
blk.29.ffn_gate.weightQ8_0[2048 5632]
-
blk.29.ffn_up.weightQ8_0[2048 5632]
-
blk.30.attn_norm.weightF32[2048]
-
blk.30.attn_output.weightQ8_0[2048 2048]
-
blk.30.ffn_norm.weightF32[2048]
-
blk.30.ffn_down.weightQ8_0[5632 2048]
-
blk.30.attn_k.weightQ8_0[2048 64]
-
blk.30.attn_v.weightQ8_0[2048 64]
-
blk.30.attn_q.weightQ8_0[2048 2048]
-
blk.30.ffn_gate.weightQ8_0[2048 5632]
-
blk.30.ffn_up.weightQ8_0[2048 5632]
-
blk.31.attn_norm.weightF32[2048]
-
blk.31.attn_output.weightQ8_0[2048 2048]
-
blk.31.ffn_norm.weightF32[2048]
-
blk.31.ffn_down.weightQ8_0[5632 2048]
-
blk.31.attn_k.weightQ8_0[2048 64]
-
blk.31.attn_v.weightQ8_0[2048 64]
-
blk.31.attn_q.weightQ8_0[2048 2048]
-
blk.31.ffn_gate.weightQ8_0[2048 5632]
-
blk.31.ffn_up.weightQ8_0[2048 5632]
-
output_norm.weightF32[2048]
-
output.weightQ8_0[2048 49216]
Metadata
Tensors
blk.0
blk.1
blk.2
blk.3
blk.4
blk.5
blk.6
blk.7
blk.8
blk.9
blk.10
blk.11
blk.12
blk.13
blk.14
blk.15
blk.16
blk.17
blk.18
blk.19
blk.20
blk.21
blk.22
blk.23
blk.24
blk.25
blk.26
blk.27
blk.28
blk.29
blk.30
blk.31