latest
2.3GB
quantization of vonjack/Phi-3-mini-4k-instruct-LLaMAfied
3B
131 Pulls Updated 4 months ago
73a89ade4154 · 2.3GB
-
general.architecturellama
-
general.file_typeQ4_K_M
-
llama.attention.head_count32
-
llama.attention.head_count_kv32
-
llama.attention.layer_norm_rms_epsilon1e-05
-
llama.block_count32
-
llama.context_length4096
-
llama.embedding_length3072
-
llama.feed_forward_length8192
-
llama.rope.dimension_count96
-
llama.rope.freq_base10000
-
llama.vocab_size32064
-
tokenizer.ggml.add_bos_tokentrue
-
tokenizer.ggml.add_eos_tokenfalse
-
tokenizer.ggml.bos_token_id1
-
tokenizer.ggml.eos_token_id32007
-
tokenizer.ggml.modelllama
-
tokenizer.ggml.padding_token_id32000
-
tokenizer.ggml.scores[0 0 0 0 0 ...]
-
tokenizer.ggml.token_type[2 3 3 6 6 ...]
-
tokenizer.ggml.tokens[<unk> <s> </s> <0x00> <0x01> ...]
-
tokenizer.ggml.unknown_token_id0
-
NameTypeShape
-
token_embd.weightQ4_K[3072 32064]
-
blk.0.attn_norm.weightF32[3072]
-
blk.0.ffn_down.weightQ6_K[8192 3072]
-
blk.0.ffn_gate.weightQ4_K[3072 8192]
-
blk.0.ffn_up.weightQ4_K[3072 8192]
-
blk.0.ffn_norm.weightF32[3072]
-
blk.0.attn_k.weightQ4_K[3072 3072]
-
blk.0.attn_output.weightQ4_K[3072 3072]
-
blk.0.attn_q.weightQ4_K[3072 3072]
-
blk.0.attn_v.weightQ6_K[3072 3072]
-
blk.1.attn_norm.weightF32[3072]
-
blk.1.ffn_down.weightQ6_K[8192 3072]
-
blk.1.ffn_gate.weightQ4_K[3072 8192]
-
blk.1.ffn_up.weightQ4_K[3072 8192]
-
blk.1.ffn_norm.weightF32[3072]
-
blk.1.attn_k.weightQ4_K[3072 3072]
-
blk.1.attn_output.weightQ4_K[3072 3072]
-
blk.1.attn_q.weightQ4_K[3072 3072]
-
blk.1.attn_v.weightQ6_K[3072 3072]
-
blk.2.attn_norm.weightF32[3072]
-
blk.2.ffn_down.weightQ6_K[8192 3072]
-
blk.2.ffn_gate.weightQ4_K[3072 8192]
-
blk.2.ffn_up.weightQ4_K[3072 8192]
-
blk.2.ffn_norm.weightF32[3072]
-
blk.2.attn_k.weightQ4_K[3072 3072]
-
blk.2.attn_output.weightQ4_K[3072 3072]
-
blk.2.attn_q.weightQ4_K[3072 3072]
-
blk.2.attn_v.weightQ6_K[3072 3072]
-
blk.3.attn_norm.weightF32[3072]
-
blk.3.ffn_down.weightQ4_K[8192 3072]
-
blk.3.ffn_gate.weightQ4_K[3072 8192]
-
blk.3.ffn_up.weightQ4_K[3072 8192]
-
blk.3.ffn_norm.weightF32[3072]
-
blk.3.attn_k.weightQ4_K[3072 3072]
-
blk.3.attn_output.weightQ4_K[3072 3072]
-
blk.3.attn_q.weightQ4_K[3072 3072]
-
blk.3.attn_v.weightQ4_K[3072 3072]
-
blk.4.attn_norm.weightF32[3072]
-
blk.4.ffn_down.weightQ4_K[8192 3072]
-
blk.4.ffn_gate.weightQ4_K[3072 8192]
-
blk.4.ffn_up.weightQ4_K[3072 8192]
-
blk.4.ffn_norm.weightF32[3072]
-
blk.4.attn_k.weightQ4_K[3072 3072]
-
blk.4.attn_output.weightQ4_K[3072 3072]
-
blk.4.attn_q.weightQ4_K[3072 3072]
-
blk.4.attn_v.weightQ4_K[3072 3072]
-
blk.5.attn_norm.weightF32[3072]
-
blk.5.ffn_down.weightQ6_K[8192 3072]
-
blk.5.ffn_gate.weightQ4_K[3072 8192]
-
blk.5.ffn_up.weightQ4_K[3072 8192]
-
blk.5.ffn_norm.weightF32[3072]
-
blk.5.attn_k.weightQ4_K[3072 3072]
-
blk.5.attn_output.weightQ4_K[3072 3072]
-
blk.5.attn_q.weightQ4_K[3072 3072]
-
blk.5.attn_v.weightQ6_K[3072 3072]
-
blk.6.attn_norm.weightF32[3072]
-
blk.6.ffn_down.weightQ6_K[8192 3072]
-
blk.6.ffn_gate.weightQ4_K[3072 8192]
-
blk.6.ffn_up.weightQ4_K[3072 8192]
-
blk.6.ffn_norm.weightF32[3072]
-
blk.6.attn_k.weightQ4_K[3072 3072]
-
blk.6.attn_output.weightQ4_K[3072 3072]
-
blk.6.attn_q.weightQ4_K[3072 3072]
-
blk.6.attn_v.weightQ6_K[3072 3072]
-
blk.7.attn_norm.weightF32[3072]
-
blk.7.ffn_down.weightQ6_K[8192 3072]
-
blk.7.ffn_gate.weightQ4_K[3072 8192]
-
blk.7.ffn_up.weightQ4_K[3072 8192]
-
blk.7.ffn_norm.weightF32[3072]
-
blk.7.attn_k.weightQ4_K[3072 3072]
-
blk.7.attn_output.weightQ4_K[3072 3072]
-
blk.7.attn_q.weightQ4_K[3072 3072]
-
blk.7.attn_v.weightQ6_K[3072 3072]
-
blk.8.attn_norm.weightF32[3072]
-
blk.8.ffn_down.weightQ6_K[8192 3072]
-
blk.8.ffn_gate.weightQ4_K[3072 8192]
-
blk.8.ffn_up.weightQ4_K[3072 8192]
-
blk.8.ffn_norm.weightF32[3072]
-
blk.8.attn_k.weightQ4_K[3072 3072]
-
blk.8.attn_output.weightQ4_K[3072 3072]
-
blk.8.attn_q.weightQ4_K[3072 3072]
-
blk.8.attn_v.weightQ6_K[3072 3072]
-
blk.9.attn_norm.weightF32[3072]
-
blk.9.ffn_down.weightQ6_K[8192 3072]
-
blk.9.ffn_gate.weightQ4_K[3072 8192]
-
blk.9.ffn_up.weightQ4_K[3072 8192]
-
blk.9.ffn_norm.weightF32[3072]
-
blk.9.attn_k.weightQ4_K[3072 3072]
-
blk.9.attn_output.weightQ4_K[3072 3072]
-
blk.9.attn_q.weightQ4_K[3072 3072]
-
blk.9.attn_v.weightQ6_K[3072 3072]
-
blk.10.attn_norm.weightF32[3072]
-
blk.10.ffn_down.weightQ6_K[8192 3072]
-
blk.10.ffn_gate.weightQ4_K[3072 8192]
-
blk.10.ffn_up.weightQ4_K[3072 8192]
-
blk.10.ffn_norm.weightF32[3072]
-
blk.10.attn_k.weightQ4_K[3072 3072]
-
blk.10.attn_output.weightQ4_K[3072 3072]
-
blk.10.attn_q.weightQ4_K[3072 3072]
-
blk.10.attn_v.weightQ6_K[3072 3072]
-
blk.11.attn_norm.weightF32[3072]
-
blk.11.ffn_down.weightQ6_K[8192 3072]
-
blk.11.ffn_gate.weightQ4_K[3072 8192]
-
blk.11.ffn_up.weightQ4_K[3072 8192]
-
blk.11.ffn_norm.weightF32[3072]
-
blk.11.attn_k.weightQ4_K[3072 3072]
-
blk.11.attn_output.weightQ4_K[3072 3072]
-
blk.11.attn_q.weightQ4_K[3072 3072]
-
blk.11.attn_v.weightQ6_K[3072 3072]
-
blk.12.attn_norm.weightF32[3072]
-
blk.12.ffn_down.weightQ4_K[8192 3072]
-
blk.12.ffn_gate.weightQ4_K[3072 8192]
-
blk.12.ffn_up.weightQ4_K[3072 8192]
-
blk.12.ffn_norm.weightF32[3072]
-
blk.12.attn_k.weightQ4_K[3072 3072]
-
blk.12.attn_output.weightQ4_K[3072 3072]
-
blk.12.attn_q.weightQ4_K[3072 3072]
-
blk.12.attn_v.weightQ4_K[3072 3072]
-
blk.13.attn_norm.weightF32[3072]
-
blk.13.ffn_down.weightQ4_K[8192 3072]
-
blk.13.ffn_gate.weightQ4_K[3072 8192]
-
blk.13.ffn_up.weightQ4_K[3072 8192]
-
blk.13.ffn_norm.weightF32[3072]
-
blk.13.attn_k.weightQ4_K[3072 3072]
-
blk.13.attn_output.weightQ4_K[3072 3072]
-
blk.13.attn_q.weightQ4_K[3072 3072]
-
blk.13.attn_v.weightQ4_K[3072 3072]
-
blk.14.attn_norm.weightF32[3072]
-
blk.14.ffn_down.weightQ6_K[8192 3072]
-
blk.14.ffn_gate.weightQ4_K[3072 8192]
-
blk.14.ffn_up.weightQ4_K[3072 8192]
-
blk.14.ffn_norm.weightF32[3072]
-
blk.14.attn_k.weightQ4_K[3072 3072]
-
blk.14.attn_output.weightQ4_K[3072 3072]
-
blk.14.attn_q.weightQ4_K[3072 3072]
-
blk.14.attn_v.weightQ6_K[3072 3072]
-
blk.15.attn_norm.weightF32[3072]
-
blk.15.ffn_down.weightQ4_K[8192 3072]
-
blk.15.ffn_gate.weightQ4_K[3072 8192]
-
blk.15.ffn_up.weightQ4_K[3072 8192]
-
blk.15.ffn_norm.weightF32[3072]
-
blk.15.attn_k.weightQ4_K[3072 3072]
-
blk.15.attn_output.weightQ4_K[3072 3072]
-
blk.15.attn_q.weightQ4_K[3072 3072]
-
blk.15.attn_v.weightQ4_K[3072 3072]
-
blk.16.attn_norm.weightF32[3072]
-
blk.16.ffn_down.weightQ4_K[8192 3072]
-
blk.16.ffn_gate.weightQ4_K[3072 8192]
-
blk.16.ffn_up.weightQ4_K[3072 8192]
-
blk.16.ffn_norm.weightF32[3072]
-
blk.16.attn_k.weightQ4_K[3072 3072]
-
blk.16.attn_output.weightQ4_K[3072 3072]
-
blk.16.attn_q.weightQ4_K[3072 3072]
-
blk.16.attn_v.weightQ4_K[3072 3072]
-
blk.17.attn_norm.weightF32[3072]
-
blk.17.ffn_down.weightQ6_K[8192 3072]
-
blk.17.ffn_gate.weightQ4_K[3072 8192]
-
blk.17.ffn_up.weightQ4_K[3072 8192]
-
blk.17.ffn_norm.weightF32[3072]
-
blk.17.attn_k.weightQ4_K[3072 3072]
-
blk.17.attn_output.weightQ4_K[3072 3072]
-
blk.17.attn_q.weightQ4_K[3072 3072]
-
blk.17.attn_v.weightQ6_K[3072 3072]
-
blk.18.attn_norm.weightF32[3072]
-
blk.18.ffn_down.weightQ4_K[8192 3072]
-
blk.18.ffn_gate.weightQ4_K[3072 8192]
-
blk.18.ffn_up.weightQ4_K[3072 8192]
-
blk.18.ffn_norm.weightF32[3072]
-
blk.18.attn_k.weightQ4_K[3072 3072]
-
blk.18.attn_output.weightQ4_K[3072 3072]
-
blk.18.attn_q.weightQ4_K[3072 3072]
-
blk.18.attn_v.weightQ4_K[3072 3072]
-
blk.19.attn_norm.weightF32[3072]
-
blk.19.ffn_down.weightQ4_K[8192 3072]
-
blk.19.ffn_gate.weightQ4_K[3072 8192]
-
blk.19.ffn_up.weightQ4_K[3072 8192]
-
blk.19.ffn_norm.weightF32[3072]
-
blk.19.attn_k.weightQ4_K[3072 3072]
-
blk.19.attn_output.weightQ4_K[3072 3072]
-
blk.19.attn_q.weightQ4_K[3072 3072]
-
blk.19.attn_v.weightQ4_K[3072 3072]
-
blk.20.attn_norm.weightF32[3072]
-
blk.20.ffn_down.weightQ4_K[8192 3072]
-
blk.20.ffn_gate.weightQ4_K[3072 8192]
-
blk.20.ffn_up.weightQ4_K[3072 8192]
-
blk.20.ffn_norm.weightF32[3072]
-
blk.20.attn_k.weightQ4_K[3072 3072]
-
blk.20.attn_output.weightQ4_K[3072 3072]
-
blk.20.attn_q.weightQ4_K[3072 3072]
-
blk.20.attn_v.weightQ4_K[3072 3072]
-
blk.21.attn_norm.weightF32[3072]
-
blk.21.ffn_down.weightQ4_K[8192 3072]
-
blk.21.ffn_gate.weightQ4_K[3072 8192]
-
blk.21.ffn_up.weightQ4_K[3072 8192]
-
blk.21.ffn_norm.weightF32[3072]
-
blk.21.attn_k.weightQ4_K[3072 3072]
-
blk.21.attn_output.weightQ4_K[3072 3072]
-
blk.21.attn_q.weightQ4_K[3072 3072]
-
blk.21.attn_v.weightQ4_K[3072 3072]
-
blk.22.attn_norm.weightF32[3072]
-
blk.22.ffn_down.weightQ6_K[8192 3072]
-
blk.22.ffn_gate.weightQ4_K[3072 8192]
-
blk.22.ffn_up.weightQ4_K[3072 8192]
-
blk.22.ffn_norm.weightF32[3072]
-
blk.22.attn_k.weightQ4_K[3072 3072]
-
blk.22.attn_output.weightQ4_K[3072 3072]
-
blk.22.attn_q.weightQ4_K[3072 3072]
-
blk.22.attn_v.weightQ6_K[3072 3072]
-
blk.23.attn_norm.weightF32[3072]
-
blk.23.ffn_down.weightQ4_K[8192 3072]
-
blk.23.ffn_gate.weightQ4_K[3072 8192]
-
blk.23.ffn_up.weightQ4_K[3072 8192]
-
blk.23.ffn_norm.weightF32[3072]
-
blk.23.attn_k.weightQ4_K[3072 3072]
-
blk.23.attn_output.weightQ4_K[3072 3072]
-
blk.23.attn_q.weightQ4_K[3072 3072]
-
blk.23.attn_v.weightQ4_K[3072 3072]
-
blk.24.attn_norm.weightF32[3072]
-
blk.24.ffn_down.weightQ4_K[8192 3072]
-
blk.24.ffn_gate.weightQ4_K[3072 8192]
-
blk.24.ffn_up.weightQ4_K[3072 8192]
-
blk.24.ffn_norm.weightF32[3072]
-
blk.24.attn_k.weightQ4_K[3072 3072]
-
blk.24.attn_output.weightQ4_K[3072 3072]
-
blk.24.attn_q.weightQ4_K[3072 3072]
-
blk.24.attn_v.weightQ4_K[3072 3072]
-
blk.25.attn_norm.weightF32[3072]
-
blk.25.ffn_down.weightQ6_K[8192 3072]
-
blk.25.ffn_gate.weightQ4_K[3072 8192]
-
blk.25.ffn_up.weightQ4_K[3072 8192]
-
blk.25.ffn_norm.weightF32[3072]
-
blk.25.attn_k.weightQ4_K[3072 3072]
-
blk.25.attn_output.weightQ4_K[3072 3072]
-
blk.25.attn_q.weightQ4_K[3072 3072]
-
blk.25.attn_v.weightQ6_K[3072 3072]
-
blk.26.attn_norm.weightF32[3072]
-
blk.26.ffn_down.weightQ4_K[8192 3072]
-
blk.26.ffn_gate.weightQ4_K[3072 8192]
-
blk.26.ffn_up.weightQ4_K[3072 8192]
-
blk.26.ffn_norm.weightF32[3072]
-
blk.26.attn_k.weightQ4_K[3072 3072]
-
blk.26.attn_output.weightQ4_K[3072 3072]
-
blk.26.attn_q.weightQ4_K[3072 3072]
-
blk.26.attn_v.weightQ4_K[3072 3072]
-
blk.27.attn_norm.weightF32[3072]
-
blk.27.ffn_down.weightQ4_K[8192 3072]
-
blk.27.ffn_gate.weightQ4_K[3072 8192]
-
blk.27.ffn_up.weightQ4_K[3072 8192]
-
blk.27.ffn_norm.weightF32[3072]
-
blk.27.attn_k.weightQ4_K[3072 3072]
-
blk.27.attn_output.weightQ4_K[3072 3072]
-
blk.27.attn_q.weightQ4_K[3072 3072]
-
blk.27.attn_v.weightQ4_K[3072 3072]
-
blk.28.attn_norm.weightF32[3072]
-
blk.28.ffn_down.weightQ6_K[8192 3072]
-
blk.28.ffn_gate.weightQ4_K[3072 8192]
-
blk.28.ffn_up.weightQ4_K[3072 8192]
-
blk.28.ffn_norm.weightF32[3072]
-
blk.28.attn_k.weightQ4_K[3072 3072]
-
blk.28.attn_output.weightQ4_K[3072 3072]
-
blk.28.attn_q.weightQ4_K[3072 3072]
-
blk.28.attn_v.weightQ6_K[3072 3072]
-
blk.29.attn_norm.weightF32[3072]
-
blk.29.ffn_down.weightQ4_K[8192 3072]
-
blk.29.ffn_gate.weightQ4_K[3072 8192]
-
blk.29.ffn_up.weightQ4_K[3072 8192]
-
blk.29.ffn_norm.weightF32[3072]
-
blk.29.attn_k.weightQ4_K[3072 3072]
-
blk.29.attn_output.weightQ4_K[3072 3072]
-
blk.29.attn_q.weightQ4_K[3072 3072]
-
blk.29.attn_v.weightQ4_K[3072 3072]
-
blk.30.attn_norm.weightF32[3072]
-
blk.30.ffn_down.weightQ6_K[8192 3072]
-
blk.30.ffn_gate.weightQ4_K[3072 8192]
-
blk.30.ffn_up.weightQ4_K[3072 8192]
-
blk.30.ffn_norm.weightF32[3072]
-
blk.30.attn_k.weightQ4_K[3072 3072]
-
blk.30.attn_output.weightQ4_K[3072 3072]
-
blk.30.attn_q.weightQ4_K[3072 3072]
-
blk.30.attn_v.weightQ6_K[3072 3072]
-
blk.31.attn_norm.weightF32[3072]
-
blk.31.ffn_down.weightQ4_K[8192 3072]
-
blk.31.ffn_gate.weightQ4_K[3072 8192]
-
blk.31.ffn_up.weightQ4_K[3072 8192]
-
blk.31.ffn_norm.weightF32[3072]
-
blk.31.attn_k.weightQ4_K[3072 3072]
-
blk.31.attn_output.weightQ4_K[3072 3072]
-
blk.31.attn_q.weightQ4_K[3072 3072]
-
blk.31.attn_v.weightQ4_K[3072 3072]
-
output.weightQ6_K[3072 32064]
-
output_norm.weightF32[3072]
Metadata
Tensors
blk.0
blk.1
blk.2
blk.3
blk.4
blk.5
blk.6
blk.7
blk.8
blk.9
blk.10
blk.11
blk.12
blk.13
blk.14
blk.15
blk.16
blk.17
blk.18
blk.19
blk.20
blk.21
blk.22
blk.23
blk.24
blk.25
blk.26
blk.27
blk.28
blk.29
blk.30
blk.31