Family of LLaVA models fine-tuned from Llama3-8B Instruct, Phi3-mini and CLIP-ViT-Large-patch14-336 with ShareGPT4V-PT and InternVL-SFT by XTuner.
Vision
3B
8B
1,362 Pulls Updated 4 months ago
ff9c8f58905d · 7.6GB
-
general.architecturellama
-
general.file_typeF16
-
llama.attention.head_count32
-
llama.attention.head_count_kv32
-
llama.attention.layer_norm_rms_epsilon1e-05
-
llama.block_count32
-
llama.context_length4096
-
llama.embedding_length3072
-
llama.feed_forward_length8192
-
llama.rope.dimension_count96
-
llama.rope.freq_base10000
-
llama.vocab_size32064
-
tokenizer.ggml.add_bos_tokentrue
-
tokenizer.ggml.add_eos_tokenfalse
-
tokenizer.ggml.bos_token_id1
-
tokenizer.ggml.eos_token_id32000
-
tokenizer.ggml.modelllama
-
tokenizer.ggml.padding_token_id32000
-
tokenizer.ggml.scores[0 0 0 0 0 ...]
-
tokenizer.ggml.token_type[2 3 3 6 6 ...]
-
tokenizer.ggml.tokens[<unk> <s> </s> <0x00> <0x01> ...]
-
tokenizer.ggml.unknown_token_id0
-
NameTypeShape
-
token_embd.weightF16[3072 32064]
-
blk.0.attn_norm.weightF32[3072]
-
blk.0.ffn_down.weightF16[8192 3072]
-
blk.0.ffn_gate.weightF16[3072 8192]
-
blk.0.ffn_up.weightF16[3072 8192]
-
blk.0.ffn_norm.weightF32[3072]
-
blk.0.attn_k.weightF16[3072 3072]
-
blk.0.attn_output.weightF16[3072 3072]
-
blk.0.attn_q.weightF16[3072 3072]
-
blk.0.attn_v.weightF16[3072 3072]
-
blk.1.attn_norm.weightF32[3072]
-
blk.1.ffn_down.weightF16[8192 3072]
-
blk.1.ffn_gate.weightF16[3072 8192]
-
blk.1.ffn_up.weightF16[3072 8192]
-
blk.1.ffn_norm.weightF32[3072]
-
blk.1.attn_k.weightF16[3072 3072]
-
blk.1.attn_output.weightF16[3072 3072]
-
blk.1.attn_q.weightF16[3072 3072]
-
blk.1.attn_v.weightF16[3072 3072]
-
blk.2.attn_norm.weightF32[3072]
-
blk.2.ffn_down.weightF16[8192 3072]
-
blk.2.ffn_gate.weightF16[3072 8192]
-
blk.2.ffn_up.weightF16[3072 8192]
-
blk.2.ffn_norm.weightF32[3072]
-
blk.2.attn_k.weightF16[3072 3072]
-
blk.2.attn_output.weightF16[3072 3072]
-
blk.2.attn_q.weightF16[3072 3072]
-
blk.2.attn_v.weightF16[3072 3072]
-
blk.3.attn_norm.weightF32[3072]
-
blk.3.ffn_down.weightF16[8192 3072]
-
blk.3.ffn_gate.weightF16[3072 8192]
-
blk.3.ffn_up.weightF16[3072 8192]
-
blk.3.ffn_norm.weightF32[3072]
-
blk.3.attn_k.weightF16[3072 3072]
-
blk.3.attn_output.weightF16[3072 3072]
-
blk.3.attn_q.weightF16[3072 3072]
-
blk.3.attn_v.weightF16[3072 3072]
-
blk.4.attn_norm.weightF32[3072]
-
blk.4.ffn_down.weightF16[8192 3072]
-
blk.4.ffn_gate.weightF16[3072 8192]
-
blk.4.ffn_up.weightF16[3072 8192]
-
blk.4.ffn_norm.weightF32[3072]
-
blk.4.attn_k.weightF16[3072 3072]
-
blk.4.attn_output.weightF16[3072 3072]
-
blk.4.attn_q.weightF16[3072 3072]
-
blk.4.attn_v.weightF16[3072 3072]
-
blk.5.attn_norm.weightF32[3072]
-
blk.5.ffn_down.weightF16[8192 3072]
-
blk.5.ffn_gate.weightF16[3072 8192]
-
blk.5.ffn_up.weightF16[3072 8192]
-
blk.5.ffn_norm.weightF32[3072]
-
blk.5.attn_k.weightF16[3072 3072]
-
blk.5.attn_output.weightF16[3072 3072]
-
blk.5.attn_q.weightF16[3072 3072]
-
blk.5.attn_v.weightF16[3072 3072]
-
blk.6.attn_norm.weightF32[3072]
-
blk.6.ffn_down.weightF16[8192 3072]
-
blk.6.ffn_gate.weightF16[3072 8192]
-
blk.6.ffn_up.weightF16[3072 8192]
-
blk.6.ffn_norm.weightF32[3072]
-
blk.6.attn_k.weightF16[3072 3072]
-
blk.6.attn_output.weightF16[3072 3072]
-
blk.6.attn_q.weightF16[3072 3072]
-
blk.6.attn_v.weightF16[3072 3072]
-
blk.7.ffn_gate.weightF16[3072 8192]
-
blk.7.ffn_up.weightF16[3072 8192]
-
blk.7.attn_k.weightF16[3072 3072]
-
blk.7.attn_output.weightF16[3072 3072]
-
blk.7.attn_q.weightF16[3072 3072]
-
blk.7.attn_v.weightF16[3072 3072]
-
blk.7.attn_norm.weightF32[3072]
-
blk.7.ffn_down.weightF16[8192 3072]
-
blk.7.ffn_norm.weightF32[3072]
-
blk.8.attn_norm.weightF32[3072]
-
blk.8.ffn_down.weightF16[8192 3072]
-
blk.8.ffn_gate.weightF16[3072 8192]
-
blk.8.ffn_up.weightF16[3072 8192]
-
blk.8.ffn_norm.weightF32[3072]
-
blk.8.attn_k.weightF16[3072 3072]
-
blk.8.attn_output.weightF16[3072 3072]
-
blk.8.attn_q.weightF16[3072 3072]
-
blk.8.attn_v.weightF16[3072 3072]
-
blk.9.attn_norm.weightF32[3072]
-
blk.9.ffn_down.weightF16[8192 3072]
-
blk.9.ffn_gate.weightF16[3072 8192]
-
blk.9.ffn_up.weightF16[3072 8192]
-
blk.9.ffn_norm.weightF32[3072]
-
blk.9.attn_k.weightF16[3072 3072]
-
blk.9.attn_output.weightF16[3072 3072]
-
blk.9.attn_q.weightF16[3072 3072]
-
blk.9.attn_v.weightF16[3072 3072]
-
blk.10.attn_norm.weightF32[3072]
-
blk.10.ffn_down.weightF16[8192 3072]
-
blk.10.ffn_gate.weightF16[3072 8192]
-
blk.10.ffn_up.weightF16[3072 8192]
-
blk.10.ffn_norm.weightF32[3072]
-
blk.10.attn_k.weightF16[3072 3072]
-
blk.10.attn_output.weightF16[3072 3072]
-
blk.10.attn_q.weightF16[3072 3072]
-
blk.10.attn_v.weightF16[3072 3072]
-
blk.11.attn_norm.weightF32[3072]
-
blk.11.ffn_down.weightF16[8192 3072]
-
blk.11.ffn_gate.weightF16[3072 8192]
-
blk.11.ffn_up.weightF16[3072 8192]
-
blk.11.ffn_norm.weightF32[3072]
-
blk.11.attn_k.weightF16[3072 3072]
-
blk.11.attn_output.weightF16[3072 3072]
-
blk.11.attn_q.weightF16[3072 3072]
-
blk.11.attn_v.weightF16[3072 3072]
-
blk.12.attn_norm.weightF32[3072]
-
blk.12.ffn_down.weightF16[8192 3072]
-
blk.12.ffn_gate.weightF16[3072 8192]
-
blk.12.ffn_up.weightF16[3072 8192]
-
blk.12.ffn_norm.weightF32[3072]
-
blk.12.attn_k.weightF16[3072 3072]
-
blk.12.attn_output.weightF16[3072 3072]
-
blk.12.attn_q.weightF16[3072 3072]
-
blk.12.attn_v.weightF16[3072 3072]
-
blk.13.attn_norm.weightF32[3072]
-
blk.13.ffn_down.weightF16[8192 3072]
-
blk.13.ffn_gate.weightF16[3072 8192]
-
blk.13.ffn_up.weightF16[3072 8192]
-
blk.13.ffn_norm.weightF32[3072]
-
blk.13.attn_k.weightF16[3072 3072]
-
blk.13.attn_output.weightF16[3072 3072]
-
blk.13.attn_q.weightF16[3072 3072]
-
blk.13.attn_v.weightF16[3072 3072]
-
blk.14.attn_norm.weightF32[3072]
-
blk.14.ffn_down.weightF16[8192 3072]
-
blk.14.ffn_gate.weightF16[3072 8192]
-
blk.14.ffn_up.weightF16[3072 8192]
-
blk.14.ffn_norm.weightF32[3072]
-
blk.14.attn_k.weightF16[3072 3072]
-
blk.14.attn_output.weightF16[3072 3072]
-
blk.14.attn_q.weightF16[3072 3072]
-
blk.14.attn_v.weightF16[3072 3072]
-
blk.15.attn_norm.weightF32[3072]
-
blk.15.ffn_down.weightF16[8192 3072]
-
blk.15.ffn_gate.weightF16[3072 8192]
-
blk.15.ffn_up.weightF16[3072 8192]
-
blk.15.ffn_norm.weightF32[3072]
-
blk.15.attn_k.weightF16[3072 3072]
-
blk.15.attn_output.weightF16[3072 3072]
-
blk.15.attn_q.weightF16[3072 3072]
-
blk.15.attn_v.weightF16[3072 3072]
-
blk.16.ffn_gate.weightF16[3072 8192]
-
blk.16.attn_k.weightF16[3072 3072]
-
blk.16.attn_output.weightF16[3072 3072]
-
blk.16.attn_q.weightF16[3072 3072]
-
blk.16.attn_v.weightF16[3072 3072]
-
blk.16.attn_norm.weightF32[3072]
-
blk.16.ffn_down.weightF16[8192 3072]
-
blk.16.ffn_up.weightF16[3072 8192]
-
blk.16.ffn_norm.weightF32[3072]
-
blk.17.attn_norm.weightF32[3072]
-
blk.17.ffn_down.weightF16[8192 3072]
-
blk.17.ffn_gate.weightF16[3072 8192]
-
blk.17.ffn_up.weightF16[3072 8192]
-
blk.17.ffn_norm.weightF32[3072]
-
blk.17.attn_k.weightF16[3072 3072]
-
blk.17.attn_output.weightF16[3072 3072]
-
blk.17.attn_q.weightF16[3072 3072]
-
blk.17.attn_v.weightF16[3072 3072]
-
blk.18.attn_norm.weightF32[3072]
-
blk.18.ffn_down.weightF16[8192 3072]
-
blk.18.ffn_gate.weightF16[3072 8192]
-
blk.18.ffn_up.weightF16[3072 8192]
-
blk.18.ffn_norm.weightF32[3072]
-
blk.18.attn_k.weightF16[3072 3072]
-
blk.18.attn_output.weightF16[3072 3072]
-
blk.18.attn_q.weightF16[3072 3072]
-
blk.18.attn_v.weightF16[3072 3072]
-
blk.19.attn_norm.weightF32[3072]
-
blk.19.ffn_down.weightF16[8192 3072]
-
blk.19.ffn_gate.weightF16[3072 8192]
-
blk.19.ffn_up.weightF16[3072 8192]
-
blk.19.ffn_norm.weightF32[3072]
-
blk.19.attn_k.weightF16[3072 3072]
-
blk.19.attn_output.weightF16[3072 3072]
-
blk.19.attn_q.weightF16[3072 3072]
-
blk.19.attn_v.weightF16[3072 3072]
-
blk.20.attn_norm.weightF32[3072]
-
blk.20.ffn_down.weightF16[8192 3072]
-
blk.20.ffn_gate.weightF16[3072 8192]
-
blk.20.ffn_up.weightF16[3072 8192]
-
blk.20.ffn_norm.weightF32[3072]
-
blk.20.attn_k.weightF16[3072 3072]
-
blk.20.attn_output.weightF16[3072 3072]
-
blk.20.attn_q.weightF16[3072 3072]
-
blk.20.attn_v.weightF16[3072 3072]
-
blk.21.attn_norm.weightF32[3072]
-
blk.21.ffn_down.weightF16[8192 3072]
-
blk.21.ffn_gate.weightF16[3072 8192]
-
blk.21.ffn_up.weightF16[3072 8192]
-
blk.21.ffn_norm.weightF32[3072]
-
blk.21.attn_k.weightF16[3072 3072]
-
blk.21.attn_output.weightF16[3072 3072]
-
blk.21.attn_q.weightF16[3072 3072]
-
blk.21.attn_v.weightF16[3072 3072]
-
blk.22.attn_norm.weightF32[3072]
-
blk.22.ffn_down.weightF16[8192 3072]
-
blk.22.ffn_gate.weightF16[3072 8192]
-
blk.22.ffn_up.weightF16[3072 8192]
-
blk.22.ffn_norm.weightF32[3072]
-
blk.22.attn_k.weightF16[3072 3072]
-
blk.22.attn_output.weightF16[3072 3072]
-
blk.22.attn_q.weightF16[3072 3072]
-
blk.22.attn_v.weightF16[3072 3072]
-
blk.23.attn_norm.weightF32[3072]
-
blk.23.ffn_down.weightF16[8192 3072]
-
blk.23.ffn_gate.weightF16[3072 8192]
-
blk.23.ffn_up.weightF16[3072 8192]
-
blk.23.ffn_norm.weightF32[3072]
-
blk.23.attn_k.weightF16[3072 3072]
-
blk.23.attn_output.weightF16[3072 3072]
-
blk.23.attn_q.weightF16[3072 3072]
-
blk.23.attn_v.weightF16[3072 3072]
-
blk.24.attn_norm.weightF32[3072]
-
blk.24.ffn_down.weightF16[8192 3072]
-
blk.24.ffn_gate.weightF16[3072 8192]
-
blk.24.ffn_up.weightF16[3072 8192]
-
blk.24.ffn_norm.weightF32[3072]
-
blk.24.attn_k.weightF16[3072 3072]
-
blk.24.attn_output.weightF16[3072 3072]
-
blk.24.attn_q.weightF16[3072 3072]
-
blk.24.attn_v.weightF16[3072 3072]
-
blk.25.attn_k.weightF16[3072 3072]
-
blk.25.attn_output.weightF16[3072 3072]
-
blk.25.attn_q.weightF16[3072 3072]
-
blk.25.attn_v.weightF16[3072 3072]
-
blk.25.attn_norm.weightF32[3072]
-
blk.25.ffn_down.weightF16[8192 3072]
-
blk.25.ffn_gate.weightF16[3072 8192]
-
blk.25.ffn_up.weightF16[3072 8192]
-
blk.25.ffn_norm.weightF32[3072]
-
blk.26.attn_norm.weightF32[3072]
-
blk.26.ffn_down.weightF16[8192 3072]
-
blk.26.ffn_gate.weightF16[3072 8192]
-
blk.26.ffn_up.weightF16[3072 8192]
-
blk.26.ffn_norm.weightF32[3072]
-
blk.26.attn_k.weightF16[3072 3072]
-
blk.26.attn_output.weightF16[3072 3072]
-
blk.26.attn_q.weightF16[3072 3072]
-
blk.26.attn_v.weightF16[3072 3072]
-
blk.27.attn_norm.weightF32[3072]
-
blk.27.ffn_down.weightF16[8192 3072]
-
blk.27.ffn_gate.weightF16[3072 8192]
-
blk.27.ffn_up.weightF16[3072 8192]
-
blk.27.ffn_norm.weightF32[3072]
-
blk.27.attn_k.weightF16[3072 3072]
-
blk.27.attn_output.weightF16[3072 3072]
-
blk.27.attn_q.weightF16[3072 3072]
-
blk.27.attn_v.weightF16[3072 3072]
-
blk.28.attn_norm.weightF32[3072]
-
blk.28.ffn_down.weightF16[8192 3072]
-
blk.28.ffn_gate.weightF16[3072 8192]
-
blk.28.ffn_up.weightF16[3072 8192]
-
blk.28.ffn_norm.weightF32[3072]
-
blk.28.attn_k.weightF16[3072 3072]
-
blk.28.attn_output.weightF16[3072 3072]
-
blk.28.attn_q.weightF16[3072 3072]
-
blk.28.attn_v.weightF16[3072 3072]
-
blk.29.attn_norm.weightF32[3072]
-
blk.29.ffn_down.weightF16[8192 3072]
-
blk.29.ffn_gate.weightF16[3072 8192]
-
blk.29.ffn_up.weightF16[3072 8192]
-
blk.29.ffn_norm.weightF32[3072]
-
blk.29.attn_k.weightF16[3072 3072]
-
blk.29.attn_output.weightF16[3072 3072]
-
blk.29.attn_q.weightF16[3072 3072]
-
blk.29.attn_v.weightF16[3072 3072]
-
blk.30.attn_norm.weightF32[3072]
-
blk.30.ffn_down.weightF16[8192 3072]
-
blk.30.ffn_gate.weightF16[3072 8192]
-
blk.30.ffn_up.weightF16[3072 8192]
-
blk.30.ffn_norm.weightF32[3072]
-
blk.30.attn_k.weightF16[3072 3072]
-
blk.30.attn_output.weightF16[3072 3072]
-
blk.30.attn_q.weightF16[3072 3072]
-
blk.30.attn_v.weightF16[3072 3072]
-
blk.31.attn_norm.weightF32[3072]
-
blk.31.ffn_down.weightF16[8192 3072]
-
blk.31.ffn_gate.weightF16[3072 8192]
-
blk.31.ffn_up.weightF16[3072 8192]
-
blk.31.ffn_norm.weightF32[3072]
-
blk.31.attn_k.weightF16[3072 3072]
-
blk.31.attn_output.weightF16[3072 3072]
-
blk.31.attn_q.weightF16[3072 3072]
-
blk.31.attn_v.weightF16[3072 3072]
-
output_norm.weightF32[3072]
-
output.weightF16[3072 32064]
Metadata
Tensors
blk.0
blk.1
blk.2
blk.3
blk.4
blk.5
blk.6
blk.7
blk.8
blk.9
blk.10
blk.11
blk.12
blk.13
blk.14
blk.15
blk.16
blk.17
blk.18
blk.19
blk.20
blk.21
blk.22
blk.23
blk.24
blk.25
blk.26
blk.27
blk.28
blk.29
blk.30
blk.31