diff --git a/rag/chunk.py b/rag/chunk.py
index 81ef39c..c937c1f 100644
--- a/rag/chunk.py
+++ b/rag/chunk.py
@@ -32,8 +32,12 @@ CHARS_PER_TOKEN = 4
 TARGET_TOKENS = 500
 TARGET_CHARS = TARGET_TOKENS * CHARS_PER_TOKEN
 # Hard cap: nomic-embed-text's context is 2048 tokens. Anything larger
-# 400s the entire embed batch. 6000 chars ≈ 1500 tokens leaves headroom.
-MAX_CHARS = 6000
+# 400s the entire embed batch. 6000 chars works for prose but markdown
+# tables with lots of `|` separators tokenize ~1.4× denser; a 5839-char
+# table chunk from the HVM qualification matrix tokenized past 2048 and
+# crashed the rebuild. 4000 chars stays under 2048 tokens even for
+# dense table content while leaving headroom for the query side.
+MAX_CHARS = 4000
 
 
 def _hard_split(text: str) -> list[str]: