Apple OpenELM: An Efficient Language Model Family with Open Training and Inference Framework
186 Pulls Updated 8 weeks ago
8308909dd8bf · 2.2GB
-
general.architectureopenelm
-
general.file_typeQ5_K_M
-
openelm.attention.head_count12
-
openelm.attention.head_count_kv3
-
openelm.attention.key_length128
-
openelm.attention.layer_norm_rms_epsilon1e-06
-
openelm.attention.value_length128
-
openelm.block_count36
-
openelm.context_length2048
-
openelm.embedding_length3072
-
openelm.feed_forward_length1536
-
openelm.rope.dimension_count128
-
openelm.rope.freq_base10000
-
tokenizer.ggml.add_bos_tokentrue
-
tokenizer.ggml.add_eos_tokenfalse
-
tokenizer.ggml.bos_token_id1
-
tokenizer.ggml.eos_token_id2
-
tokenizer.ggml.modelllama
-
tokenizer.ggml.predefault
-
tokenizer.ggml.scores[0 0 0 0 0 ...]
-
tokenizer.ggml.token_type[2 3 3 6 6 ...]
-
tokenizer.ggml.tokens[<unk> <s> </s> <0x00> <0x01> ...]
-
tokenizer.ggml.unknown_token_id0
-
NameTypeShape
-
token_embd.weightQ6_K[3072 32000]
-
blk.0.attn_k_norm.weightF32[128]
-
blk.0.attn_output.weightQ5_K[1536 3072]
-
blk.0.attn_q_norm.weightF32[128]
-
blk.0.attn_qkv.weightQ6_K[3072 2304]
-
blk.0.attn_norm.weightF32[3072]
-
blk.0.ffn_gate.weightQ5_K[3072 1536]
-
blk.0.ffn_up.weightQ5_K[3072 1536]
-
blk.0.ffn_down.weightQ6_K[1536 3072]
-
blk.0.ffn_norm.weightF32[3072]
-
blk.1.attn_k_norm.weightF32[128]
-
blk.1.attn_output.weightQ5_K[1536 3072]
-
blk.1.attn_q_norm.weightF32[128]
-
blk.1.attn_qkv.weightQ6_K[3072 2304]
-
blk.1.attn_norm.weightF32[3072]
-
blk.1.ffn_gate.weightQ5_K[3072 1792]
-
blk.1.ffn_up.weightQ5_K[3072 1792]
-
blk.1.ffn_down.weightQ6_K[1792 3072]
-
blk.1.ffn_norm.weightF32[3072]
-
blk.2.attn_k_norm.weightF32[128]
-
blk.2.attn_output.weightQ5_K[1536 3072]
-
blk.2.attn_q_norm.weightF32[128]
-
blk.2.attn_qkv.weightQ6_K[3072 2304]
-
blk.2.attn_norm.weightF32[3072]
-
blk.2.ffn_gate.weightQ5_K[3072 2048]
-
blk.2.ffn_up.weightQ5_K[3072 2048]
-
blk.2.ffn_down.weightQ6_K[2048 3072]
-
blk.2.ffn_norm.weightF32[3072]
-
blk.3.attn_k_norm.weightF32[128]
-
blk.3.attn_output.weightQ5_K[1536 3072]
-
blk.3.attn_q_norm.weightF32[128]
-
blk.3.attn_qkv.weightQ6_K[3072 2304]
-
blk.3.attn_norm.weightF32[3072]
-
blk.3.ffn_gate.weightQ5_K[3072 2560]
-
blk.3.ffn_up.weightQ5_K[3072 2560]
-
blk.3.ffn_down.weightQ5_K[2560 3072]
-
blk.3.ffn_norm.weightF32[3072]
-
blk.4.attn_k_norm.weightF32[128]
-
blk.4.attn_output.weightQ5_K[2048 3072]
-
blk.4.attn_q_norm.weightF32[128]
-
blk.4.attn_qkv.weightQ6_K[3072 3072]
-
blk.4.attn_norm.weightF32[3072]
-
blk.4.ffn_gate.weightQ5_K[3072 2816]
-
blk.4.ffn_up.weightQ5_K[3072 2816]
-
blk.4.ffn_down.weightQ5_K[2816 3072]
-
blk.4.ffn_norm.weightF32[3072]
-
blk.5.attn_k_norm.weightF32[128]
-
blk.5.attn_output.weightQ5_K[2048 3072]
-
blk.5.attn_q_norm.weightF32[128]
-
blk.5.attn_qkv.weightQ6_K[3072 3072]
-
blk.5.attn_norm.weightF32[3072]
-
blk.5.ffn_gate.weightQ5_K[3072 3072]
-
blk.5.ffn_up.weightQ5_K[3072 3072]
-
blk.5.ffn_down.weightQ5_K[3072 3072]
-
blk.5.ffn_norm.weightF32[3072]
-
blk.6.attn_k_norm.weightF32[128]
-
blk.6.attn_output.weightQ5_K[2048 3072]
-
blk.6.attn_q_norm.weightF32[128]
-
blk.6.attn_qkv.weightQ6_K[3072 3072]
-
blk.6.attn_norm.weightF32[3072]
-
blk.6.ffn_gate.weightQ5_K[3072 3328]
-
blk.6.ffn_up.weightQ5_K[3072 3328]
-
blk.6.ffn_down.weightQ6_K[3328 3072]
-
blk.6.ffn_norm.weightF32[3072]
-
blk.7.attn_k_norm.weightF32[128]
-
blk.7.attn_output.weightQ5_K[2048 3072]
-
blk.7.attn_q_norm.weightF32[128]
-
blk.7.attn_qkv.weightQ6_K[3072 3072]
-
blk.7.attn_norm.weightF32[3072]
-
blk.7.ffn_gate.weightQ5_K[3072 3584]
-
blk.7.ffn_up.weightQ5_K[3072 3584]
-
blk.7.ffn_down.weightQ5_K[3584 3072]
-
blk.7.ffn_norm.weightF32[3072]
-
blk.8.attn_k_norm.weightF32[128]
-
blk.8.attn_output.weightQ5_K[2048 3072]
-
blk.8.attn_q_norm.weightF32[128]
-
blk.8.attn_qkv.weightQ6_K[3072 3072]
-
blk.8.attn_norm.weightF32[3072]
-
blk.8.ffn_gate.weightQ5_K[3072 4096]
-
blk.8.ffn_up.weightQ5_K[3072 4096]
-
blk.8.ffn_down.weightQ5_K[4096 3072]
-
blk.8.ffn_norm.weightF32[3072]
-
blk.9.attn_k_norm.weightF32[128]
-
blk.9.attn_output.weightQ5_K[2048 3072]
-
blk.9.attn_q_norm.weightF32[128]
-
blk.9.attn_qkv.weightQ6_K[3072 3072]
-
blk.9.attn_norm.weightF32[3072]
-
blk.9.ffn_gate.weightQ5_K[3072 4352]
-
blk.9.ffn_up.weightQ5_K[3072 4352]
-
blk.9.ffn_down.weightQ6_K[4352 3072]
-
blk.9.ffn_norm.weightF32[3072]
-
blk.10.attn_k_norm.weightF32[128]
-
blk.10.attn_output.weightQ5_K[2048 3072]
-
blk.10.attn_q_norm.weightF32[128]
-
blk.10.attn_qkv.weightQ6_K[3072 3072]
-
blk.10.attn_norm.weightF32[3072]
-
blk.10.ffn_gate.weightQ5_K[3072 4608]
-
blk.10.ffn_up.weightQ5_K[3072 4608]
-
blk.10.ffn_down.weightQ6_K[4608 3072]
-
blk.10.ffn_norm.weightF32[3072]
-
blk.11.attn_k_norm.weightF32[128]
-
blk.11.attn_output.weightQ5_K[2048 3072]
-
blk.11.attn_q_norm.weightF32[128]
-
blk.11.attn_qkv.weightQ6_K[3072 3072]
-
blk.11.attn_norm.weightF32[3072]
-
blk.11.ffn_gate.weightQ5_K[3072 4864]
-
blk.11.ffn_up.weightQ5_K[3072 4864]
-
blk.11.ffn_down.weightQ6_K[4864 3072]
-
blk.11.ffn_norm.weightF32[3072]
-
blk.12.attn_k_norm.weightF32[128]
-
blk.12.attn_output.weightQ5_K[2048 3072]
-
blk.12.attn_q_norm.weightF32[128]
-
blk.12.attn_qkv.weightQ6_K[3072 3072]
-
blk.12.attn_norm.weightF32[3072]
-
blk.12.ffn_gate.weightQ5_K[3072 5120]
-
blk.12.ffn_up.weightQ5_K[3072 5120]
-
blk.12.ffn_down.weightQ5_K[5120 3072]
-
blk.12.ffn_norm.weightF32[3072]
-
blk.13.attn_k_norm.weightF32[128]
-
blk.13.attn_output.weightQ5_K[2048 3072]
-
blk.13.attn_q_norm.weightF32[128]
-
blk.13.attn_qkv.weightQ6_K[3072 3072]
-
blk.13.attn_norm.weightF32[3072]
-
blk.13.ffn_gate.weightQ5_K[3072 5632]
-
blk.13.ffn_up.weightQ5_K[3072 5632]
-
blk.13.ffn_down.weightQ5_K[5632 3072]
-
blk.13.ffn_norm.weightF32[3072]
-
blk.14.attn_k_norm.weightF32[128]
-
blk.14.attn_output.weightQ5_K[2048 3072]
-
blk.14.attn_q_norm.weightF32[128]
-
blk.14.attn_qkv.weightQ6_K[3072 3072]
-
blk.14.attn_norm.weightF32[3072]
-
blk.14.ffn_gate.weightQ5_K[3072 5888]
-
blk.14.ffn_up.weightQ5_K[3072 5888]
-
blk.14.ffn_down.weightQ6_K[5888 3072]
-
blk.14.ffn_norm.weightF32[3072]
-
blk.15.attn_k_norm.weightF32[128]
-
blk.15.attn_output.weightQ5_K[2048 3072]
-
blk.15.attn_q_norm.weightF32[128]
-
blk.15.attn_qkv.weightQ6_K[3072 3072]
-
blk.15.attn_norm.weightF32[3072]
-
blk.15.ffn_gate.weightQ5_K[3072 6144]
-
blk.15.ffn_up.weightQ5_K[3072 6144]
-
blk.15.ffn_down.weightQ5_K[6144 3072]
-
blk.15.ffn_norm.weightF32[3072]
-
blk.16.attn_k_norm.weightF32[128]
-
blk.16.attn_output.weightQ5_K[2048 3072]
-
blk.16.attn_q_norm.weightF32[128]
-
blk.16.attn_qkv.weightQ6_K[3072 3072]
-
blk.16.attn_norm.weightF32[3072]
-
blk.16.ffn_gate.weightQ5_K[3072 6400]
-
blk.16.ffn_up.weightQ5_K[3072 6400]
-
blk.16.ffn_down.weightQ5_K[6400 3072]
-
blk.16.ffn_norm.weightF32[3072]
-
blk.17.attn_k_norm.weightF32[128]
-
blk.17.attn_output.weightQ5_K[2048 3072]
-
blk.17.attn_q_norm.weightF32[128]
-
blk.17.attn_qkv.weightQ6_K[3072 3072]
-
blk.17.attn_norm.weightF32[3072]
-
blk.17.ffn_gate.weightQ5_K[3072 6656]
-
blk.17.ffn_up.weightQ5_K[3072 6656]
-
blk.17.ffn_down.weightQ6_K[6656 3072]
-
blk.17.ffn_norm.weightF32[3072]
-
blk.18.attn_k_norm.weightF32[128]
-
blk.18.attn_output.weightQ5_K[2560 3072]
-
blk.18.attn_q_norm.weightF32[128]
-
blk.18.attn_qkv.weightQ6_K[3072 3840]
-
blk.18.attn_norm.weightF32[3072]
-
blk.18.ffn_gate.weightQ5_K[3072 7168]
-
blk.18.ffn_up.weightQ5_K[3072 7168]
-
blk.18.ffn_down.weightQ5_K[7168 3072]
-
blk.18.ffn_norm.weightF32[3072]
-
blk.19.attn_k_norm.weightF32[128]
-
blk.19.attn_output.weightQ5_K[2560 3072]
-
blk.19.attn_q_norm.weightF32[128]
-
blk.19.attn_qkv.weightQ6_K[3072 3840]
-
blk.19.attn_norm.weightF32[3072]
-
blk.19.ffn_gate.weightQ5_K[3072 7424]
-
blk.19.ffn_up.weightQ5_K[3072 7424]
-
blk.19.ffn_down.weightQ5_K[7424 3072]
-
blk.19.ffn_norm.weightF32[3072]
-
blk.20.attn_k_norm.weightF32[128]
-
blk.20.attn_output.weightQ5_K[2560 3072]
-
blk.20.attn_q_norm.weightF32[128]
-
blk.20.attn_qkv.weightQ6_K[3072 3840]
-
blk.20.attn_norm.weightF32[3072]
-
blk.20.ffn_gate.weightQ5_K[3072 7680]
-
blk.20.ffn_up.weightQ5_K[3072 7680]
-
blk.20.ffn_down.weightQ5_K[7680 3072]
-
blk.20.ffn_norm.weightF32[3072]
-
blk.21.attn_k_norm.weightF32[128]
-
blk.21.attn_output.weightQ5_K[2560 3072]
-
blk.21.attn_q_norm.weightF32[128]
-
blk.21.attn_qkv.weightQ6_K[3072 3840]
-
blk.21.attn_norm.weightF32[3072]
-
blk.21.ffn_gate.weightQ5_K[3072 7936]
-
blk.21.ffn_up.weightQ5_K[3072 7936]
-
blk.21.ffn_down.weightQ5_K[7936 3072]
-
blk.21.ffn_norm.weightF32[3072]
-
blk.22.attn_k_norm.weightF32[128]
-
blk.22.attn_output.weightQ5_K[2560 3072]
-
blk.22.attn_q_norm.weightF32[128]
-
blk.22.attn_qkv.weightQ6_K[3072 3840]
-
blk.22.attn_norm.weightF32[3072]
-
blk.22.ffn_gate.weightQ5_K[3072 8192]
-
blk.22.ffn_up.weightQ5_K[3072 8192]
-
blk.22.ffn_down.weightQ6_K[8192 3072]
-
blk.22.ffn_norm.weightF32[3072]
-
blk.23.attn_k_norm.weightF32[128]
-
blk.23.attn_output.weightQ5_K[2560 3072]
-
blk.23.attn_q_norm.weightF32[128]
-
blk.23.attn_qkv.weightQ6_K[3072 3840]
-
blk.23.attn_norm.weightF32[3072]
-
blk.23.ffn_gate.weightQ5_K[3072 8704]
-
blk.23.ffn_up.weightQ5_K[3072 8704]
-
blk.23.ffn_down.weightQ5_K[8704 3072]
-
blk.23.ffn_norm.weightF32[3072]
-
blk.24.attn_k_norm.weightF32[128]
-
blk.24.attn_output.weightQ5_K[2560 3072]
-
blk.24.attn_q_norm.weightF32[128]
-
blk.24.attn_qkv.weightQ6_K[3072 3840]
-
blk.24.attn_norm.weightF32[3072]
-
blk.24.ffn_gate.weightQ5_K[3072 8960]
-
blk.24.ffn_up.weightQ5_K[3072 8960]
-
blk.24.ffn_down.weightQ5_K[8960 3072]
-
blk.24.ffn_norm.weightF32[3072]
-
blk.25.attn_k_norm.weightF32[128]
-
blk.25.attn_output.weightQ5_K[2560 3072]
-
blk.25.attn_q_norm.weightF32[128]
-
blk.25.attn_qkv.weightQ6_K[3072 3840]
-
blk.25.attn_norm.weightF32[3072]
-
blk.25.ffn_gate.weightQ5_K[3072 9216]
-
blk.25.ffn_up.weightQ5_K[3072 9216]
-
blk.25.ffn_down.weightQ6_K[9216 3072]
-
blk.25.ffn_norm.weightF32[3072]
-
blk.26.attn_k_norm.weightF32[128]
-
blk.26.attn_output.weightQ5_K[2560 3072]
-
blk.26.attn_q_norm.weightF32[128]
-
blk.26.attn_qkv.weightQ6_K[3072 3840]
-
blk.26.attn_norm.weightF32[3072]
-
blk.26.ffn_gate.weightQ5_K[3072 9472]
-
blk.26.ffn_up.weightQ5_K[3072 9472]
-
blk.26.ffn_down.weightQ5_K[9472 3072]
-
blk.26.ffn_norm.weightF32[3072]
-
blk.27.attn_k_norm.weightF32[128]
-
blk.27.attn_output.weightQ5_K[2560 3072]
-
blk.27.attn_q_norm.weightF32[128]
-
blk.27.attn_qkv.weightQ6_K[3072 3840]
-
blk.27.attn_norm.weightF32[3072]
-
blk.27.ffn_gate.weightQ5_K[3072 9728]
-
blk.27.ffn_up.weightQ5_K[3072 9728]
-
blk.27.ffn_down.weightQ5_K[9728 3072]
-
blk.27.ffn_norm.weightF32[3072]
-
blk.28.attn_k_norm.weightF32[128]
-
blk.28.attn_output.weightQ5_K[2560 3072]
-
blk.28.attn_q_norm.weightF32[128]
-
blk.28.attn_qkv.weightQ6_K[3072 3840]
-
blk.28.attn_norm.weightF32[3072]
-
blk.28.ffn_gate.weightQ5_K[3072 10240]
-
blk.28.ffn_up.weightQ5_K[3072 10240]
-
blk.28.ffn_down.weightQ6_K[10240 3072]
-
blk.28.ffn_norm.weightF32[3072]
-
blk.29.attn_k_norm.weightF32[128]
-
blk.29.attn_output.weightQ5_K[2560 3072]
-
blk.29.attn_q_norm.weightF32[128]
-
blk.29.attn_qkv.weightQ6_K[3072 3840]
-
blk.29.attn_norm.weightF32[3072]
-
blk.29.ffn_gate.weightQ5_K[3072 10496]
-
blk.29.ffn_up.weightQ5_K[3072 10496]
-
blk.29.ffn_down.weightQ5_K[10496 3072]
-
blk.29.ffn_norm.weightF32[3072]
-
blk.30.attn_k_norm.weightF32[128]
-
blk.30.attn_output.weightQ5_K[3072 3072]
-
blk.30.attn_q_norm.weightF32[128]
-
blk.30.attn_qkv.weightQ6_K[3072 4608]
-
blk.30.attn_norm.weightF32[3072]
-
blk.30.ffn_gate.weightQ5_K[3072 10752]
-
blk.30.ffn_up.weightQ5_K[3072 10752]
-
blk.30.ffn_down.weightQ6_K[10752 3072]
-
blk.30.ffn_norm.weightF32[3072]
-
blk.31.attn_k_norm.weightF32[128]
-
blk.31.attn_output.weightQ5_K[3072 3072]
-
blk.31.attn_q_norm.weightF32[128]
-
blk.31.attn_qkv.weightQ6_K[3072 4608]
-
blk.31.ffn_gate.weightQ5_K[3072 11008]
-
blk.31.ffn_up.weightQ5_K[3072 11008]
-
blk.31.attn_norm.weightF32[3072]
-
blk.31.ffn_down.weightQ6_K[11008 3072]
-
blk.31.ffn_norm.weightF32[3072]
-
blk.32.attn_k_norm.weightF32[128]
-
blk.32.attn_output.weightQ5_K[3072 3072]
-
blk.32.attn_q_norm.weightF32[128]
-
blk.32.attn_qkv.weightQ6_K[3072 4608]
-
blk.32.attn_norm.weightF32[3072]
-
blk.32.ffn_gate.weightQ5_K[3072 11264]
-
blk.32.ffn_up.weightQ5_K[3072 11264]
-
blk.32.ffn_down.weightQ6_K[11264 3072]
-
blk.32.ffn_norm.weightF32[3072]
-
blk.33.attn_k_norm.weightF32[128]
-
blk.33.attn_output.weightQ5_K[3072 3072]
-
blk.33.attn_q_norm.weightF32[128]
-
blk.33.attn_qkv.weightQ6_K[3072 4608]
-
blk.33.attn_norm.weightF32[3072]
-
blk.33.ffn_gate.weightQ5_K[3072 11776]
-
blk.33.ffn_up.weightQ5_K[3072 11776]
-
blk.33.ffn_down.weightQ6_K[11776 3072]
-
blk.33.ffn_norm.weightF32[3072]
-
blk.34.attn_k_norm.weightF32[128]
-
blk.34.attn_output.weightQ5_K[3072 3072]
-
blk.34.attn_q_norm.weightF32[128]
-
blk.34.attn_qkv.weightQ6_K[3072 4608]
-
blk.34.attn_norm.weightF32[3072]
-
blk.34.ffn_gate.weightQ5_K[3072 12032]
-
blk.34.ffn_up.weightQ5_K[3072 12032]
-
blk.34.ffn_down.weightQ6_K[12032 3072]
-
blk.34.ffn_norm.weightF32[3072]
-
blk.35.attn_k_norm.weightF32[128]
-
blk.35.attn_output.weightQ5_K[3072 3072]
-
blk.35.attn_q_norm.weightF32[128]
-
blk.35.attn_qkv.weightQ6_K[3072 4608]
-
blk.35.attn_norm.weightF32[3072]
-
blk.35.ffn_gate.weightQ5_K[3072 12288]
-
blk.35.ffn_up.weightQ5_K[3072 12288]
-
blk.35.ffn_down.weightQ6_K[12288 3072]
-
blk.35.ffn_norm.weightF32[3072]
-
output_norm.weightF32[3072]
Metadata
Tensors
blk.0
blk.1
blk.2
blk.3
blk.4
blk.5
blk.6
blk.7
blk.8
blk.9
blk.10
blk.11
blk.12
blk.13
blk.14
blk.15
blk.16
blk.17
blk.18
blk.19
blk.20
blk.21
blk.22
blk.23
blk.24
blk.25
blk.26
blk.27
blk.28
blk.29
blk.30
blk.31
blk.32
blk.33
blk.34
blk.35