diff --git a/services/memory/memory_extractor.py b/services/memory/memory_extractor.py
index 4ea0d6489..e5f609250 100644
--- a/services/memory/memory_extractor.py
+++ b/services/memory/memory_extractor.py
@@ -236,6 +236,43 @@ def _is_text_duplicate(new_text: str, existing: list, threshold: float = 0.6) ->
     return False
 
 
+def _parse_extraction_json(raw: str) -> list:
+    """Parse the extraction LLM's reply into a list of facts, tolerating
+    reasoning-model noise.
+
+    The model emits <think>…</think> (and sometimes a prose preamble or a
+    ```json fence) AROUND the JSON array; without stripping it, json.loads
+    bombs and the run silently yields "0 candidates". Pure str -> list (no
+    LLM/network); returns [] on any parse failure instead of raising.
+    """
+    text = (raw or "").strip()
+    try:
+        from src.text_helpers import strip_think as _strip_think
+        text = _strip_think(text, prose=True, prompt_echo=True).strip()
+    except Exception:
+        pass
+    if text.startswith("```"):
+        text = text.split("\n", 1)[-1].rsplit("```", 1)[0].strip()
+    # JSON may still be embedded in surrounding commentary (leading prose or
+    # trailing remarks like "[...] Done!") — slice from the first '[' to the
+    # last ']' whenever both exist. Slice unconditionally: a reply that starts
+    # with '[' can still carry trailing commentary that breaks json.loads.
+    _start = text.find("[")
+    _end = text.rfind("]")
+    if 0 <= _start < _end:
+        text = text[_start : _end + 1]
+
+    try:
+        facts = json.loads(text)
+    except json.JSONDecodeError:
+        logger.debug("Memory extraction returned non-JSON: %r", (raw or "")[:120])
+        return []
+    except Exception:
+        logger.debug("Memory extraction returned non-JSON: %r", (raw or "")[:120])
+        return []
+    return facts if isinstance(facts, list) else []
+
+
 async def extract_and_store(
     session,
     memory_manager,
@@ -284,9 +321,34 @@ async def extract_and_store(
 
         fallback_facts = _fallback_memory_candidates(stripped_recent)
 
+        # Flatten the window into a SINGLE user message instead of appending the
+        # raw alternating role messages. Passed as raw chat messages, the model
+        # treats the window as a conversation to CONTINUE rather than a transcript
+        # to ANALYZE, so it reliably extracts nothing — typically returning `[]`
+        # (and, depending on the input, sometimes an empty or <think>-only
+        # completion when the window ends on an assistant turn). This was the real
+        # cause of auto-memory logging "0 candidates" on every run. Reframing it as
+        # one "analyze this transcript, return the JSON array" user message makes
+        # the model actually extract. Controlled repro on this model: 0/6 trials
+        # with the old structure vs 6/6 with this one. The skill extractor flattens
+        # for the same reason.
+        def _flatten_msg(m):
+            c = m.get("content", "")
+            if isinstance(c, list):
+                c = " ".join(
+                    b.get("text", "") for b in c
+                    if isinstance(b, dict) and b.get("type") == "text"
+                )
+            return f"{m.get('role', '?')}: {c}"
+
+        transcript = "\n\n".join(_flatten_msg(m) for m in stripped_recent)
         extraction_messages = [
             {"role": "system", "content": EXTRACT_SYSTEM_PROMPT},
-        ] + stripped_recent
+            {"role": "user", "content": (
+                "Conversation to analyze:\n\n" + transcript
+                + "\n\nReturn the JSON array of durable facts now (or [] if none)."
+            )},
+        ]
 
         facts = []
         try:
@@ -295,19 +357,20 @@ async def extract_and_store(
                 model,
                 extraction_messages,
                 temperature=0.1,
-                max_tokens=500,
+                # A reasoning model spends most of its budget on <think> tokens
+                # BEFORE emitting the JSON, so the old 500 truncated the response
+                # before any JSON appeared → every run logged "0 candidates". The
+                # audit path hit the same wall and raised to 16384; extraction's
+                # output (a short facts list) is small, so an ample ceiling is
+                # enough once thinking has room.
+                max_tokens=4096,
                 headers=headers,
             )
 
-            # Parse JSON from response (handle markdown fences if model wraps them)
-            text = raw.strip()
-            if text.startswith("```"):
-                text = text.split("\n", 1)[-1].rsplit("```", 1)[0].strip()
-
-            try:
-                facts = json.loads(text)
-            except json.JSONDecodeError:
-                logger.debug("Memory extraction returned non-JSON")
+            # Parse JSON, tolerating reasoning-model noise (<think> blocks, a
+            # ```json fence, and leading/trailing commentary). See
+            # _parse_extraction_json — returns [] rather than raising.
+            facts = _parse_extraction_json(raw)
         except Exception as e:
             logger.warning(f"LLM memory extraction failed; using fallback candidates if available: {e}")
 
diff --git a/tests/test_memory_extraction_parse.py b/tests/test_memory_extraction_parse.py
new file mode 100644
index 000000000..20d383cc6
--- /dev/null
+++ b/tests/test_memory_extraction_parse.py
@@ -0,0 +1,36 @@
+"""_parse_extraction_json must survive reasoning-model noise.
+
+The extraction model wraps its JSON array in <think> blocks, ```json fences,
+or leading/trailing prose. The helper strips that noise and slices the array
+unconditionally — a reply that starts with '[' can still carry trailing
+commentary like "[...] Done!" that would otherwise break json.loads.
+"""
+
+from services.memory.memory_extractor import _parse_extraction_json
+
+
+def test_think_prefixed_array_parses_to_one_fact():
+    raw = '<think>reasoning...</think>\n[{"text": "x", "category": "fact"}]'
+    assert _parse_extraction_json(raw) == [{"text": "x", "category": "fact"}]
+
+
+def test_fenced_json_block_parses():
+    raw = '```json\n[{"text": "x", "category": "fact"}]\n```'
+    assert _parse_extraction_json(raw) == [{"text": "x", "category": "fact"}]
+
+
+def test_leading_prose_before_array_parses():
+    raw = 'Here are the durable facts:\n[{"text": "x", "category": "fact"}]'
+    assert _parse_extraction_json(raw) == [{"text": "x", "category": "fact"}]
+
+
+def test_trailing_commentary_after_array_parses():
+    # Exercises the unconditional slice: text starts with '[' but has trailing
+    # commentary that the old `text[0] != "["` guard skipped, breaking json.loads.
+    raw = '[{"text": "x", "category": "fact"}] Done!'
+    assert _parse_extraction_json(raw) == [{"text": "x", "category": "fact"}]
+
+
+def test_malformed_no_array_returns_empty():
+    assert _parse_extraction_json("no array here, sorry") == []
+    assert _parse_extraction_json("") == []