Vibe Coded Quantize Calc
This commit is contained in:
parent
6c523d019e
commit
82602ea9ef
File diff suppressed because one or more lines are too long
@ -0,0 +1,169 @@
|
|||||||
|
/* Path: 00-Lesson-Site/frontend/src/components/Util/QuantizationCalc.module.scss */
|
||||||
|
|
||||||
|
@use "../../styles/global_vars" as *;
|
||||||
|
|
||||||
|
.wrapper {
|
||||||
|
font-family: "Geist", sans-serif;
|
||||||
|
padding: 2rem;
|
||||||
|
border-radius: 12px;
|
||||||
|
|
||||||
|
// Background: Base + slight tint (0.02)
|
||||||
|
background-color: color-adjust(background, 0.02, 0);
|
||||||
|
|
||||||
|
// Border: Background + higher contrast (0.1)
|
||||||
|
border: 1px solid color-adjust(background, 0.1, 0);
|
||||||
|
|
||||||
|
// Shadow: kept as rgba for transparency, or could be replaced if you have a shadow mixin
|
||||||
|
box-shadow: 0 4px 6px -1px rgba(0, 0, 0, 0.1);
|
||||||
|
|
||||||
|
color: color-adjust(text, 0, 0);
|
||||||
|
|
||||||
|
margin-bottom: 20px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.title {
|
||||||
|
margin-top: 0;
|
||||||
|
margin-bottom: 1.5rem;
|
||||||
|
font-size: 1.5rem;
|
||||||
|
color: color-adjust(text, 0, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
.controls {
|
||||||
|
display: flex;
|
||||||
|
flex-direction: column;
|
||||||
|
gap: 1.25rem;
|
||||||
|
margin-bottom: 2rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.inputGroup {
|
||||||
|
display: flex;
|
||||||
|
flex-direction: column;
|
||||||
|
gap: 0.5rem;
|
||||||
|
|
||||||
|
label {
|
||||||
|
font-weight: 600;
|
||||||
|
font-size: 0.95rem;
|
||||||
|
// Slightly softer text than title
|
||||||
|
color: color-adjust(text, -0.05, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
input[type="number"],
|
||||||
|
select {
|
||||||
|
width: 100%;
|
||||||
|
padding: 0.75rem;
|
||||||
|
|
||||||
|
// Border: Background + medium contrast
|
||||||
|
border: 1px solid color-adjust(background, 0.15, 0);
|
||||||
|
border-radius: 8px;
|
||||||
|
font-size: 1rem;
|
||||||
|
|
||||||
|
// Input BG: Pure base background
|
||||||
|
background-color: color-adjust(background, 0, 0);
|
||||||
|
color: color-adjust(text, 0, 0);
|
||||||
|
|
||||||
|
transition:
|
||||||
|
border-color 0.15s ease,
|
||||||
|
box-shadow 0.15s ease;
|
||||||
|
|
||||||
|
&:focus {
|
||||||
|
outline: none;
|
||||||
|
border-color: color-adjust(primary, 0, 0);
|
||||||
|
// using primary color for the ring with opacity if supported,
|
||||||
|
// otherwise fallback to raw color or keeping the rgba hardcoded for alpha
|
||||||
|
box-shadow: 0 0 0 2px color-adjust(primary, 0, 0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
.checkboxGroup {
|
||||||
|
display: flex;
|
||||||
|
align-items: center;
|
||||||
|
gap: 0.75rem;
|
||||||
|
margin-top: 0.5rem;
|
||||||
|
|
||||||
|
input[type="checkbox"] {
|
||||||
|
width: 1.2rem;
|
||||||
|
height: 1.2rem;
|
||||||
|
cursor: pointer;
|
||||||
|
accent-color: color-adjust(primary, 0, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
label {
|
||||||
|
cursor: pointer;
|
||||||
|
// Softer text for label
|
||||||
|
color: color-adjust(text, -0.1, 0);
|
||||||
|
font-size: 0.95rem;
|
||||||
|
user-select: none;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
.resultBox {
|
||||||
|
// Result box uses pure base background (white in light mode)
|
||||||
|
// to stand out from the slightly tinted wrapper
|
||||||
|
background: color-adjust(background, 0, 0);
|
||||||
|
padding: 1.5rem;
|
||||||
|
border-radius: 8px;
|
||||||
|
border: 1px solid color-adjust(background, 0.1, 0);
|
||||||
|
margin-bottom: 1.5rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.resultHeader {
|
||||||
|
display: flex;
|
||||||
|
justify-content: space-between;
|
||||||
|
align-items: baseline;
|
||||||
|
margin-bottom: 0.5rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.label {
|
||||||
|
// Secondary text color
|
||||||
|
color: color-adjust(text, -0.2, 0);
|
||||||
|
font-weight: 500;
|
||||||
|
}
|
||||||
|
|
||||||
|
.value {
|
||||||
|
font-size: 2rem;
|
||||||
|
font-weight: 700;
|
||||||
|
// Highlight with primary color
|
||||||
|
color: color-adjust(primary, 0, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
.subtext {
|
||||||
|
margin: 0;
|
||||||
|
font-size: 0.875rem;
|
||||||
|
// Muted text color
|
||||||
|
color: color-adjust(text, -0.3, 0);
|
||||||
|
text-align: right;
|
||||||
|
}
|
||||||
|
|
||||||
|
.equationBox {
|
||||||
|
background: color-adjust(background, 0.08, 0);
|
||||||
|
|
||||||
|
color: color-adjust(text, 0, 0);
|
||||||
|
padding: 1.25rem;
|
||||||
|
border-radius: 8px;
|
||||||
|
font-family: "GeistMono", monospace;
|
||||||
|
font-size: 0.9rem;
|
||||||
|
overflow-x: auto;
|
||||||
|
line-height: 1.6;
|
||||||
|
border: 1px solid color-adjust(background, 0.15, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
.eqTitle {
|
||||||
|
display: block;
|
||||||
|
margin-bottom: 0.5rem;
|
||||||
|
font-weight: bold;
|
||||||
|
color: color-adjust(text, -0.3, 0);
|
||||||
|
text-transform: uppercase;
|
||||||
|
font-size: 0.75rem;
|
||||||
|
letter-spacing: 0.05em;
|
||||||
|
}
|
||||||
|
|
||||||
|
.eqMath {
|
||||||
|
display: block;
|
||||||
|
}
|
||||||
|
|
||||||
|
.eqResult {
|
||||||
|
margin-top: 0.5rem;
|
||||||
|
color: color-adjust(primary, 0, 0);
|
||||||
|
font-weight: bold;
|
||||||
|
}
|
||||||
125
00-Lesson-Site/frontend/src/components/Util/QuantizationCalc.tsx
Normal file
125
00-Lesson-Site/frontend/src/components/Util/QuantizationCalc.tsx
Normal file
@ -0,0 +1,125 @@
|
|||||||
|
// Path: 00-Lesson-Site/frontend/src/components/Util/QuantizationCalc.tsx
|
||||||
|
|
||||||
|
import { createMemo, createSignal, For, type Component } from "solid-js";
|
||||||
|
import styles from "./QuantizationCalc.module.scss";
|
||||||
|
|
||||||
|
type QuantMethod = {
|
||||||
|
id: number;
|
||||||
|
name: string;
|
||||||
|
bpw: number;
|
||||||
|
desc: string;
|
||||||
|
};
|
||||||
|
|
||||||
|
// Data derived from the provided Llama-3-8B and IQ/TQ specs
|
||||||
|
const QUANT_DATA: QuantMethod[] = [
|
||||||
|
{ id: 0, name: "F32", bpw: 32.0, desc: "Standard Float 32 (Uncompressed)" },
|
||||||
|
{ id: 32, name: "BF16 / F16", bpw: 16.0, desc: "Half Precision" },
|
||||||
|
{ id: 7, name: "Q8_0", bpw: 7.96, desc: "Almost lossless" },
|
||||||
|
{ id: 18, name: "Q6_K", bpw: 6.14, desc: "High quality" },
|
||||||
|
{ id: 9, name: "Q5_1", bpw: 5.65, desc: "High accuracy" },
|
||||||
|
{ id: 17, name: "Q5_K_M (Q5_K)", bpw: 5.33, desc: "Recommended balance" },
|
||||||
|
{ id: 16, name: "Q5_K_S", bpw: 5.21, desc: "" },
|
||||||
|
{ id: 8, name: "Q5_0", bpw: 5.21, desc: "Legacy standard" },
|
||||||
|
{ id: 3, name: "Q4_1", bpw: 4.78, desc: "" },
|
||||||
|
{ id: 15, name: "Q4_K_M (Q4_K)", bpw: 4.58, desc: "Most popular daily driver" },
|
||||||
|
{ id: 25, name: "IQ4_NL", bpw: 4.5, desc: "Non-linear quantization" },
|
||||||
|
{ id: 14, name: "Q4_K_S", bpw: 4.37, desc: "Fast inference" },
|
||||||
|
{ id: 2, name: "Q4_0", bpw: 4.34, desc: "Very fast" },
|
||||||
|
{ id: 30, name: "IQ4_XS", bpw: 4.25, desc: "" },
|
||||||
|
{ id: 13, name: "Q3_K_L", bpw: 4.03, desc: "" },
|
||||||
|
{ id: 12, name: "Q3_K_M (Q3_K)", bpw: 3.74, desc: "Decent for lower VRAM" },
|
||||||
|
{ id: 27, name: "IQ3_M", bpw: 3.66, desc: "Mix quantization" },
|
||||||
|
{ id: 26, name: "IQ3_S", bpw: 3.44, desc: "" },
|
||||||
|
{ id: 11, name: "Q3_K_S", bpw: 3.41, desc: "" },
|
||||||
|
{ id: 22, name: "IQ3_XS", bpw: 3.3, desc: "" },
|
||||||
|
{ id: 21, name: "Q2_K_S", bpw: 3.18, desc: "Significant quality loss" },
|
||||||
|
{ id: 23, name: "IQ3_XXS", bpw: 3.06, desc: "" },
|
||||||
|
{ id: 10, name: "Q2_K", bpw: 2.96, desc: "Legacy 2-bit" },
|
||||||
|
{ id: 29, name: "IQ2_M", bpw: 2.7, desc: "SOTA 2-bit" },
|
||||||
|
{ id: 28, name: "IQ2_S", bpw: 2.5, desc: "" },
|
||||||
|
{ id: 20, name: "IQ2_XS", bpw: 2.31, desc: "" },
|
||||||
|
{ id: 19, name: "IQ2_XXS", bpw: 2.06, desc: "" },
|
||||||
|
{ id: 37, name: "TQ2_0", bpw: 2.06, desc: "Ternarization" },
|
||||||
|
{ id: 31, name: "IQ1_M", bpw: 1.75, desc: "Extreme compression" },
|
||||||
|
{ id: 36, name: "TQ1_0", bpw: 1.69, desc: "Ternarization" },
|
||||||
|
{ id: 24, name: "IQ1_S", bpw: 1.56, desc: "Experimental" },
|
||||||
|
];
|
||||||
|
|
||||||
|
const QuantizationCalculator: Component = () => {
|
||||||
|
const [params, setParams] = createSignal<number>(8);
|
||||||
|
const [selectedQuantId, setSelectedQuantId] = createSignal<number>(15);
|
||||||
|
const [includeOverhead, setIncludeOverhead] = createSignal<boolean>(true);
|
||||||
|
|
||||||
|
const selectedQuant = createMemo(() => QUANT_DATA.find((q) => q.id === selectedQuantId()) || QUANT_DATA[0]);
|
||||||
|
|
||||||
|
const modelSizeGB = createMemo(() => {
|
||||||
|
return (params() * selectedQuant().bpw) / 8;
|
||||||
|
});
|
||||||
|
|
||||||
|
const totalVramEstimation = createMemo(() => {
|
||||||
|
const size = modelSizeGB();
|
||||||
|
// +0.5GB CUDA Context + ~15% for KV Cache
|
||||||
|
const overhead = includeOverhead() ? 0.5 + size * 0.15 : 0;
|
||||||
|
return size + overhead;
|
||||||
|
});
|
||||||
|
|
||||||
|
return (
|
||||||
|
<div class={styles.wrapper}>
|
||||||
|
<h2 class={styles.title}>LLM VRAM Calculator</h2>
|
||||||
|
|
||||||
|
{/* --- Inputs --- */}
|
||||||
|
<div class={styles.controls}>
|
||||||
|
{/* Parameter Input */}
|
||||||
|
<div class={styles.inputGroup}>
|
||||||
|
<label for="model-params">Model Parameters (Billions)</label>
|
||||||
|
<input id="model-params" type="number" min="0.1" step="0.1" value={params()} onInput={(e) => setParams(parseFloat(e.currentTarget.value) || 0)} />
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{/* Quantization Select */}
|
||||||
|
<div class={styles.inputGroup}>
|
||||||
|
<label for="quant-method">Quantization Method</label>
|
||||||
|
<select id="quant-method" value={selectedQuantId()} onChange={(e) => setSelectedQuantId(parseInt(e.currentTarget.value))}>
|
||||||
|
<For each={QUANT_DATA}>
|
||||||
|
{(quant) => (
|
||||||
|
<option value={quant.id}>
|
||||||
|
{quant.name} ({quant.bpw} bpw) {quant.desc ? `- ${quant.desc}` : ""}
|
||||||
|
</option>
|
||||||
|
)}
|
||||||
|
</For>
|
||||||
|
</select>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{/* Overhead Toggle */}
|
||||||
|
<div class={styles.checkboxGroup}>
|
||||||
|
<input type="checkbox" id="overhead-check" checked={includeOverhead()} onChange={(e) => setIncludeOverhead(e.currentTarget.checked)} />
|
||||||
|
<label for="overhead-check">Include Estimated Overhead (KV Cache + Context)</label>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{/* --- Results --- */}
|
||||||
|
<div class={styles.resultBox}>
|
||||||
|
<div class={styles.resultHeader}>
|
||||||
|
<span class={styles.label}>Estimated VRAM:</span>
|
||||||
|
<span class={styles.value}>{totalVramEstimation().toFixed(2)} GB</span>
|
||||||
|
</div>
|
||||||
|
<p class={styles.subtext}>
|
||||||
|
(Model Weights: {modelSizeGB().toFixed(2)} GB {includeOverhead() ? "+ Overhead" : ""})
|
||||||
|
</p>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{/* --- Equation Display --- */}
|
||||||
|
<div class={styles.equationBox}>
|
||||||
|
<span class={styles.eqTitle}>Calculation Trace</span>
|
||||||
|
<div class={styles.eqMath}>
|
||||||
|
VRAM = ( {params()}B params × {selectedQuant().bpw} bpw ) / 8
|
||||||
|
</div>
|
||||||
|
<div class={styles.eqResult}>
|
||||||
|
= {modelSizeGB().toFixed(4)} GB
|
||||||
|
{includeOverhead() ? " + KV_Cache_Overhead" : ""}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
);
|
||||||
|
};
|
||||||
|
|
||||||
|
export default QuantizationCalculator;
|
||||||
@ -1,5 +1,5 @@
|
|||||||
---
|
---
|
||||||
# Path: frontend/src/content/lessons/01-intro.mdx
|
# Path: 00-Lesson-Site/frontend/src/content/lessons/01-intro.mdx
|
||||||
|
|
||||||
title: "Introduction to Web Dev"
|
title: "Introduction to Web Dev"
|
||||||
description: "Setting up the environment"
|
description: "Setting up the environment"
|
||||||
@ -14,6 +14,7 @@ import Info from "../../components/Post/Blockquotes/Info.astro";
|
|||||||
import QA from "../../components/Post/Blockquotes/QA.astro";
|
import QA from "../../components/Post/Blockquotes/QA.astro";
|
||||||
|
|
||||||
import Spoiler from "../../components/Post/Spoiler.tsx";
|
import Spoiler from "../../components/Post/Spoiler.tsx";
|
||||||
|
import QuantizationCalculator from "../../components/Util/QuantizationCalc.tsx";
|
||||||
|
|
||||||
# Hosting a Large Language Model (LLM) Locally
|
# Hosting a Large Language Model (LLM) Locally
|
||||||
|
|
||||||
@ -650,7 +651,7 @@ import Spoiler from "../../components/Post/Spoiler.tsx";
|
|||||||
- TQx = Ternary Quantization with x bits per weight, bpw is on the chart
|
- TQx = Ternary Quantization with x bits per weight, bpw is on the chart
|
||||||
</QA>
|
</QA>
|
||||||
|
|
||||||
**TO BE ADDED** - Quantization Calculator
|
<QuantizationCalculator client:idle />
|
||||||
|
|
||||||
Once we have decided what quantization type to use, we can proceed to quantize the model by running:
|
Once we have decided what quantization type to use, we can proceed to quantize the model by running:
|
||||||
|
|
||||||
|
|||||||
@ -22,7 +22,7 @@ import "../styles/main.scss";
|
|||||||
<link rel="icon" href="favicon/favicon.ico" />
|
<link rel="icon" href="favicon/favicon.ico" />
|
||||||
<link rel="icon" type="image/png" sizes="32x32" href="favicon/favicon-32x32.png" />
|
<link rel="icon" type="image/png" sizes="32x32" href="favicon/favicon-32x32.png" />
|
||||||
<link rel="icon" type="image/png" sizes="16x16" href="favicon/favicon-16x16.png" />
|
<link rel="icon" type="image/png" sizes="16x16" href="favicon/favicon-16x16.png" />
|
||||||
<link rel="apple-touch-icon" sizes="180x180" href="favicon/apple-touch-icon.png" />
|
<link rel="apple-touch-icon" sizes="180x180" href="favicon/apple-icon.png" />
|
||||||
<link rel="manifest" href="favicon/site.webmanifest" />
|
<link rel="manifest" href="favicon/site.webmanifest" />
|
||||||
|
|
||||||
<title>Web Dev Lesson Notes</title>
|
<title>Web Dev Lesson Notes</title>
|
||||||
|
|||||||
@ -29,7 +29,7 @@ const { pageTitle = "Web Dev Lessons" } = Astro.props;
|
|||||||
<link rel="icon" href="favicon/favicon.ico" />
|
<link rel="icon" href="favicon/favicon.ico" />
|
||||||
<link rel="icon" type="image/png" sizes="32x32" href="favicon/favicon-32x32.png" />
|
<link rel="icon" type="image/png" sizes="32x32" href="favicon/favicon-32x32.png" />
|
||||||
<link rel="icon" type="image/png" sizes="16x16" href="favicon/favicon-16x16.png" />
|
<link rel="icon" type="image/png" sizes="16x16" href="favicon/favicon-16x16.png" />
|
||||||
<link rel="apple-touch-icon" sizes="180x180" href="favicon/apple-touch-icon.png" />
|
<link rel="apple-touch-icon" sizes="180x180" href="favicon/apple-icon.png" />
|
||||||
<link rel="manifest" href="favicon/site.webmanifest" />
|
<link rel="manifest" href="favicon/site.webmanifest" />
|
||||||
|
|
||||||
<title>{pageTitle}</title>
|
<title>{pageTitle}</title>
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user