-
general.architecture
llama
-
general.file_type
Q8_0
-
llama.attention.head_count
8
-
llama.attention.head_count_kv
4
-
llama.attention.layer_norm_rms_epsilon
1e-06
-
llama.block_count
2
-
llama.context_length
512
-
llama.embedding_length
128
-
llama.feed_forward_length
384
-
llama.rope.dimension_count
16
-
llama.rope.freq_base
10000
-
llama.vocab_size
2048
-
tokenizer.ggml.add_bos_token
true
-
tokenizer.ggml.add_eos_token
false
-
tokenizer.ggml.bos_token_id
1
-
tokenizer.ggml.eos_token_id
2
-
tokenizer.ggml.model
llama
-
tokenizer.ggml.padding_token_id
0
-
tokenizer.ggml.pre
default
-
tokenizer.ggml.scores
[-1000 -1000 -1000 -1000 -1000 ...]
-
tokenizer.ggml.token_type
[3 3 3 1 1 ...]
-
tokenizer.ggml.tokens
[<unk> <|start_story|> <|end_story|>
! ...]
-
tokenizer.ggml.unknown_token_id
0
-
Name
Type
Shape
-
token_embd.weight
Q8_0
[128 2048]
-
blk.0.attn_norm.weight
F32
[128]
-
blk.0.ffn_down.weight
Q8_0
[384 128]
-
blk.0.ffn_gate.weight
Q8_0
[128 384]
-
blk.0.ffn_up.weight
Q8_0
[128 384]
-
blk.0.ffn_norm.weight
F32
[128]
-
blk.0.attn_k.weight
Q8_0
[128 64]
-
blk.0.attn_output.weight
Q8_0
[128 128]
-
blk.0.attn_q.weight
Q8_0
[128 128]
-
blk.0.attn_v.weight
Q8_0
[128 64]
-
blk.1.attn_norm.weight
F32
[128]
-
blk.1.ffn_down.weight
Q8_0
[384 128]
-
blk.1.ffn_gate.weight
Q8_0
[128 384]
-
blk.1.ffn_up.weight
Q8_0
[128 384]
-
blk.1.ffn_norm.weight
F32
[128]
-
blk.1.attn_k.weight
Q8_0
[128 64]
-
blk.1.attn_output.weight
Q8_0
[128 128]
-
blk.1.attn_q.weight
Q8_0
[128 128]
-
blk.1.attn_v.weight
Q8_0
[128 64]
-
output_norm.weight
F32
[128]