fix: handle batch events format in manage_calendar tool (#3503)

* fix: handle batch events format in manage_calendar tool

Models like deepseek-v4-flash emit batch events array instead of individual create_event calls. The tool defaulted to list_events (no action key), so events were never created despite the model confirming success.

- Add batch normalization in do_manage_calendar

- Map start/end objects to flat dtstart/dtend strings

- Add tests for both object and flat string formats

* fix: surface partial batch failures in manage_calendar

Partial failures were silently dropped - batches with mixed success/failure would report only created count with no error visibility.

- Return non-zero exit code for any failures

- Surface both created and failed counts in response

- Include first error message for debugging

- Add test for partial failure case

* chore: strip trailing whitespace in batch normalization block

* chore: strip whitespace-only blank lines in batch events test
This commit is contained in:
Srinesh R
2026-06-10 22:43:08 +05:30
committed by GitHub
parent f5b91f1e9e
commit d9a4b99046
2 changed files with 161 additions and 0 deletions
+36
View File
@@ -1453,6 +1453,42 @@ async def do_manage_calendar(content: str, owner: Optional[str] = None) -> Dict:
except ValueError:
return {"error": "Invalid JSON arguments", "exit_code": 1}
# ── Batch normalization ──
# Some models (e.g. deepseek-v4-flash) emit {"events": [{...}, ...]}
# instead of individual create_event calls. Iterate and create each.
if isinstance(args.get("events"), list) and not args.get("action"):
results = []
for ev in args["events"]:
if not isinstance(ev, dict):
continue
# Normalize start/end from {dateTime: "..."} object to flat string
for field, target in [("start", "dtstart"), ("end", "dtend")]:
val = ev.pop(field, None)
if val and target not in ev:
ev[target] = val.get("dateTime", val) if isinstance(val, dict) else val
ev.setdefault("action", "create_event")
r = await do_manage_calendar(json.dumps(ev), owner=owner)
results.append(r)
created = [r for r in results if r.get("exit_code") == 0 and not r.get("error")]
failed = [r for r in results if r.get("error")]
if not results:
return {"error": "No events to create", "exit_code": 1}
# Surface both successes and failures
parts = []
if created:
summaries = [r.get("response", "") for r in created]
parts.append(f"Created {len(created)} event(s):\n" + "\n".join(summaries))
if failed:
first_error = failed[0].get("error", "Unknown error")
parts.append(f"Failed to create {len(failed)} event(s). First error: {first_error}")
response = "\n\n".join(parts)
# Non-zero exit code for partial or total failure
exit_code = 0 if not failed else 1
return {"response": response, "exit_code": exit_code, "created_count": len(created), "failed_count": len(failed)}
# Normalize action — some models emit hyphens ("list-calendars") instead
# of underscores. Treat them as equivalent so we don't bounce a
# cosmetic typo back to the model and waste a round-trip. Also accept
+125
View File
@@ -0,0 +1,125 @@
"""Test that do_manage_calendar handles the batch {"events": [...]} format
that models like deepseek-v4-flash emit instead of individual create_event calls.
"""
import json
import sys
import uuid
import pytest
from tests.helpers.import_state import clear_fake_database_modules
from tests.helpers.sqlite_db import make_temp_sqlite
clear_fake_database_modules()
import core.database as cdb
from core.database import CalendarEvent
_TS, _ENGINE, _TMPDB = make_temp_sqlite(cdb.Base.metadata)
@pytest.fixture(autouse=True)
def _bind_temp_db(monkeypatch):
monkeypatch.setitem(sys.modules, "core.database", cdb)
parent = sys.modules.get("core")
if parent is not None:
monkeypatch.setattr(parent, "database", cdb, raising=False)
monkeypatch.setattr(cdb, "SessionLocal", _TS)
yield
async def test_batch_events_with_datetime_objects():
"""Model emits {"events": [{"summary": ..., "start": {"dateTime": ...}, "end": {"dateTime": ...}}]}."""
from src.tool_implementations import do_manage_calendar
owner = "tester-" + uuid.uuid4().hex[:6]
payload = {
"events": [
{
"summary": "Morning Gym",
"start": {"dateTime": "2026-06-09T06:00:00+05:30"},
"end": {"dateTime": "2026-06-09T07:00:00+05:30"},
},
{
"summary": "Morning Gym",
"start": {"dateTime": "2026-06-10T06:00:00+05:30"},
"end": {"dateTime": "2026-06-10T07:00:00+05:30"},
},
]
}
res = await do_manage_calendar(json.dumps(payload), owner=owner)
assert res.get("exit_code") == 0, res
assert "Created 2 event(s)" in res.get("response", "")
# Verify events exist in DB
db = _TS()
events = db.query(CalendarEvent).filter(CalendarEvent.summary == "Morning Gym").all()
assert len(events) == 2
db.close()
async def test_batch_events_with_flat_strings():
"""Model emits {"events": [{"summary": ..., "start": "ISO", "end": "ISO"}]}."""
from src.tool_implementations import do_manage_calendar
owner = "tester-" + uuid.uuid4().hex[:6]
payload = {
"events": [
{
"summary": "Standup",
"start": "2026-06-09T09:00:00",
"end": "2026-06-09T09:30:00",
},
]
}
res = await do_manage_calendar(json.dumps(payload), owner=owner)
assert res.get("exit_code") == 0, res
assert "Created 1 event(s)" in res.get("response", "")
async def test_batch_events_partial_failure():
"""Batch with some valid and some invalid events — should surface both counts and first error."""
from src.tool_implementations import do_manage_calendar
owner = "tester-" + uuid.uuid4().hex[:6]
payload = {
"events": [
{
"summary": "Valid Event 1",
"start": "2026-06-09T10:00:00",
"end": "2026-06-09T11:00:00",
},
{
"summary": "Invalid Event",
# Missing required dtstart — will fail
},
{
"summary": "Valid Event 2",
"start": "2026-06-09T14:00:00",
"end": "2026-06-09T15:00:00",
},
]
}
res = await do_manage_calendar(json.dumps(payload), owner=owner)
# Partial failure = non-zero exit code
assert res.get("exit_code") != 0, "Partial failure should return non-zero exit code"
# Response should mention both created and failed counts
response = res.get("response", "")
assert "Created 2 event(s)" in response, f"Should report 2 created: {response}"
assert "Failed to create 1 event(s)" in response, f"Should report 1 failed: {response}"
assert "error" in response.lower() or "required" in response.lower(), "Should include error details"
# Metadata fields
assert res.get("created_count") == 2
assert res.get("failed_count") == 1
# Verify only valid events were created
db = _TS()
events = db.query(CalendarEvent).filter(
CalendarEvent.summary.in_(["Valid Event 1", "Valid Event 2"])
).all()
assert len(events) == 2
db.close()