2025-12-09 23:34:30 +00:00

126 lines
4.9 KiB
TypeScript
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

// Path: 00-Lesson-Site/frontend/src/components/Util/QuantizationCalc.tsx
import { createMemo, createSignal, For, type Component } from "solid-js";
import styles from "./QuantizationCalc.module.scss";
type QuantMethod = {
id: number;
name: string;
bpw: number;
desc: string;
};
// Data derived from the provided Llama-3-8B and IQ/TQ specs
const QUANT_DATA: QuantMethod[] = [
{ id: 0, name: "F32", bpw: 32.0, desc: "Standard Float 32 (Uncompressed)" },
{ id: 32, name: "BF16 / F16", bpw: 16.0, desc: "Half Precision" },
{ id: 7, name: "Q8_0", bpw: 7.96, desc: "Almost lossless" },
{ id: 18, name: "Q6_K", bpw: 6.14, desc: "High quality" },
{ id: 9, name: "Q5_1", bpw: 5.65, desc: "High accuracy" },
{ id: 17, name: "Q5_K_M (Q5_K)", bpw: 5.33, desc: "Recommended balance" },
{ id: 16, name: "Q5_K_S", bpw: 5.21, desc: "" },
{ id: 8, name: "Q5_0", bpw: 5.21, desc: "Legacy standard" },
{ id: 3, name: "Q4_1", bpw: 4.78, desc: "" },
{ id: 15, name: "Q4_K_M (Q4_K)", bpw: 4.58, desc: "Most popular daily driver" },
{ id: 25, name: "IQ4_NL", bpw: 4.5, desc: "Non-linear quantization" },
{ id: 14, name: "Q4_K_S", bpw: 4.37, desc: "Fast inference" },
{ id: 2, name: "Q4_0", bpw: 4.34, desc: "Very fast" },
{ id: 30, name: "IQ4_XS", bpw: 4.25, desc: "" },
{ id: 13, name: "Q3_K_L", bpw: 4.03, desc: "" },
{ id: 12, name: "Q3_K_M (Q3_K)", bpw: 3.74, desc: "Decent for lower VRAM" },
{ id: 27, name: "IQ3_M", bpw: 3.66, desc: "Mix quantization" },
{ id: 26, name: "IQ3_S", bpw: 3.44, desc: "" },
{ id: 11, name: "Q3_K_S", bpw: 3.41, desc: "" },
{ id: 22, name: "IQ3_XS", bpw: 3.3, desc: "" },
{ id: 21, name: "Q2_K_S", bpw: 3.18, desc: "Significant quality loss" },
{ id: 23, name: "IQ3_XXS", bpw: 3.06, desc: "" },
{ id: 10, name: "Q2_K", bpw: 2.96, desc: "Legacy 2-bit" },
{ id: 29, name: "IQ2_M", bpw: 2.7, desc: "SOTA 2-bit" },
{ id: 28, name: "IQ2_S", bpw: 2.5, desc: "" },
{ id: 20, name: "IQ2_XS", bpw: 2.31, desc: "" },
{ id: 19, name: "IQ2_XXS", bpw: 2.06, desc: "" },
{ id: 37, name: "TQ2_0", bpw: 2.06, desc: "Ternarization" },
{ id: 31, name: "IQ1_M", bpw: 1.75, desc: "Extreme compression" },
{ id: 36, name: "TQ1_0", bpw: 1.69, desc: "Ternarization" },
{ id: 24, name: "IQ1_S", bpw: 1.56, desc: "Experimental" },
];
const QuantizationCalculator: Component = () => {
const [params, setParams] = createSignal<number>(8);
const [selectedQuantId, setSelectedQuantId] = createSignal<number>(15);
const [includeOverhead, setIncludeOverhead] = createSignal<boolean>(true);
const selectedQuant = createMemo(() => QUANT_DATA.find((q) => q.id === selectedQuantId()) || QUANT_DATA[0]);
const modelSizeGB = createMemo(() => {
return (params() * selectedQuant().bpw) / 8;
});
const totalVramEstimation = createMemo(() => {
const size = modelSizeGB();
// +0.5GB CUDA Context + ~15% for KV Cache
const overhead = includeOverhead() ? 0.5 + size * 0.15 : 0;
return size + overhead;
});
return (
<div class={styles.wrapper}>
<h2 class={styles.title}>LLM VRAM Calculator</h2>
{/* --- Inputs --- */}
<div class={styles.controls}>
{/* Parameter Input */}
<div class={styles.inputGroup}>
<label for="model-params">Model Parameters (Billions)</label>
<input id="model-params" type="number" min="0.1" step="0.1" value={params()} onInput={(e) => setParams(parseFloat(e.currentTarget.value) || 0)} />
</div>
{/* Quantization Select */}
<div class={styles.inputGroup}>
<label for="quant-method">Quantization Method</label>
<select id="quant-method" value={selectedQuantId()} onChange={(e) => setSelectedQuantId(parseInt(e.currentTarget.value))}>
<For each={QUANT_DATA}>
{(quant) => (
<option value={quant.id}>
{quant.name} ({quant.bpw} bpw) {quant.desc ? `- ${quant.desc}` : ""}
</option>
)}
</For>
</select>
</div>
{/* Overhead Toggle */}
<div class={styles.checkboxGroup}>
<input type="checkbox" id="overhead-check" checked={includeOverhead()} onChange={(e) => setIncludeOverhead(e.currentTarget.checked)} />
<label for="overhead-check">Include Estimated Overhead (KV Cache + Context)</label>
</div>
</div>
{/* --- Results --- */}
<div class={styles.resultBox}>
<div class={styles.resultHeader}>
<span class={styles.label}>Estimated VRAM:</span>
<span class={styles.value}>{totalVramEstimation().toFixed(2)} GB</span>
</div>
<p class={styles.subtext}>
(Model Weights: {modelSizeGB().toFixed(2)} GB {includeOverhead() ? "+ Overhead" : ""})
</p>
</div>
{/* --- Equation Display --- */}
<div class={styles.equationBox}>
<span class={styles.eqTitle}>Calculation Trace</span>
<div class={styles.eqMath}>
VRAM = ( {params()}B params × {selectedQuant().bpw} bpw ) / 8
</div>
<div class={styles.eqResult}>
= {modelSizeGB().toFixed(4)} GB
{includeOverhead() ? " + KV_Cache_Overhead" : ""}
</div>
</div>
</div>
);
};
export default QuantizationCalculator;