mirror of
https://github.com/pewdiepie-archdaemon/odysseus.git
synced 2026-06-29 16:12:06 -04:00
feat(catalog): add Gemma 4 12B/QAT entries and RTX 3050 bandwidth (#4728)
Add official Gemma 4 12B-it plus QAT-INT4/INT8 catalog entries (with their GGUF sources), QAT quantization support across the quant tables and the prequantized-prefix list, and the missing RTX 3050 / 3050 Ti memory bandwidth so speed estimates stop falling back to the generic cuda value.
This commit is contained in:
committed by
GitHub
parent
8f5e36a079
commit
119228a6db
@@ -14059,6 +14059,138 @@
|
||||
"vision"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "google/gemma-4-12B-it",
|
||||
"provider": "Google",
|
||||
"parameter_count": "12.0B",
|
||||
"parameters_raw": 12000000000,
|
||||
"min_ram_gb": 8.5,
|
||||
"recommended_ram_gb": 11.0,
|
||||
"min_vram_gb": 7.5,
|
||||
"quantization": "Q4_K_M",
|
||||
"context_length": 131072,
|
||||
"use_case": "General purpose, multimodal; unsloth/gemma-4-12B-it-GGUF Dynamic variants reduce VRAM from ~7.5 GB to ~5.5 GB",
|
||||
"is_moe": false,
|
||||
"num_experts": null,
|
||||
"active_experts": null,
|
||||
"active_parameters": null,
|
||||
"architecture": "gemma4",
|
||||
"pipeline_tag": "image-text-to-text",
|
||||
"release_date": "2026-04-01",
|
||||
"gguf_sources": [
|
||||
{
|
||||
"repo": "unsloth/gemma-4-12B-it-GGUF",
|
||||
"provider": "unsloth"
|
||||
}
|
||||
],
|
||||
"capabilities": [
|
||||
"vision"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "google/gemma-4-12B-it-qat-int4",
|
||||
"provider": "Google",
|
||||
"parameter_count": "12.0B",
|
||||
"parameters_raw": 12000000000,
|
||||
"min_ram_gb": 8.0,
|
||||
"recommended_ram_gb": 9.5,
|
||||
"min_vram_gb": 6.5,
|
||||
"quantization": "QAT-INT4",
|
||||
"context_length": 131072,
|
||||
"use_case": "General purpose, multimodal (QAT quantization-aware training — higher quality than post-train INT4; vLLM native; no GGUF)",
|
||||
"is_moe": false,
|
||||
"num_experts": null,
|
||||
"active_experts": null,
|
||||
"active_parameters": null,
|
||||
"architecture": "gemma4",
|
||||
"pipeline_tag": "image-text-to-text",
|
||||
"release_date": "2026-04-01",
|
||||
"gguf_sources": [],
|
||||
"capabilities": [
|
||||
"vision"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "google/gemma-4-12B-it-qat-int8",
|
||||
"provider": "Google",
|
||||
"parameter_count": "12.0B",
|
||||
"parameters_raw": 12000000000,
|
||||
"min_ram_gb": 15.0,
|
||||
"recommended_ram_gb": 20.0,
|
||||
"min_vram_gb": 13.5,
|
||||
"quantization": "QAT-INT8",
|
||||
"context_length": 131072,
|
||||
"use_case": "General purpose, multimodal (QAT INT8 — highest quality, 2x VRAM of QAT-INT4; vLLM native; no GGUF)",
|
||||
"is_moe": false,
|
||||
"num_experts": null,
|
||||
"active_experts": null,
|
||||
"active_parameters": null,
|
||||
"architecture": "gemma4",
|
||||
"pipeline_tag": "image-text-to-text",
|
||||
"release_date": "2026-04-01",
|
||||
"gguf_sources": [],
|
||||
"capabilities": [
|
||||
"vision"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "google/gemma-4-12B-it-qat-q4_0-gguf",
|
||||
"provider": "Google",
|
||||
"parameter_count": "12.0B",
|
||||
"parameters_raw": 12000000000,
|
||||
"min_ram_gb": 8.5,
|
||||
"recommended_ram_gb": 11.0,
|
||||
"min_vram_gb": 7.5,
|
||||
"quantization": "QAT-INT4",
|
||||
"context_length": 262144,
|
||||
"use_case": "General purpose, multimodal (vision + audio); official Google QAT int4 GGUF — near-bf16 quality at int4 size, served on llama.cpp/Ollama with CPU offload",
|
||||
"is_moe": false,
|
||||
"num_experts": null,
|
||||
"active_experts": null,
|
||||
"active_parameters": null,
|
||||
"architecture": "gemma4",
|
||||
"pipeline_tag": "image-text-to-text",
|
||||
"release_date": "2026-04-01",
|
||||
"gguf_sources": [
|
||||
{
|
||||
"repo": "google/gemma-4-12B-it-qat-q4_0-gguf",
|
||||
"provider": "Google",
|
||||
"file": "gemma-4-12b-it-qat-q4_0.gguf"
|
||||
}
|
||||
],
|
||||
"capabilities": [
|
||||
"vision",
|
||||
"audio"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "google/gemma-4-26B-A4B-it-qat-q4_0-gguf",
|
||||
"provider": "Google",
|
||||
"parameter_count": "25.2B",
|
||||
"parameters_raw": 25200000000,
|
||||
"min_ram_gb": 14.4,
|
||||
"recommended_ram_gb": 18.0,
|
||||
"min_vram_gb": 14.4,
|
||||
"quantization": "QAT-INT4",
|
||||
"context_length": 262144,
|
||||
"use_case": "High-throughput, multimodal MoE (3.8B active); official Google QAT int4 GGUF — near-bf16 quality at int4 size, served on llama.cpp with CPU offload",
|
||||
"is_moe": true,
|
||||
"num_experts": null,
|
||||
"active_experts": null,
|
||||
"active_parameters": 3800000000,
|
||||
"architecture": "gemma4",
|
||||
"pipeline_tag": "image-text-to-text",
|
||||
"release_date": "2026-04-01",
|
||||
"gguf_sources": [
|
||||
{
|
||||
"repo": "google/gemma-4-26B-A4B-it-qat-q4_0-gguf",
|
||||
"provider": "Google"
|
||||
}
|
||||
],
|
||||
"capabilities": [
|
||||
"vision"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "google/gemma-4-31B-it",
|
||||
"provider": "Google",
|
||||
@@ -19144,4 +19276,4 @@
|
||||
],
|
||||
"_discovered": true
|
||||
}
|
||||
]
|
||||
]
|
||||
Reference in New Issue
Block a user