fix(search): degrade to empty results on non-JSON provider responses (#1129) (#1352)

tavily_search, serper_search and google_pse_search parsed response.json()
inside the network try block, which only caught httpx.RequestError and
RateLimitError. When a provider returned a non-JSON body (an HTML error page, a
truncated/empty body, a gateway 5xx), response.json() raised an UNCAUGHT
json.JSONDecodeError that aborted the search in the background — exactly the
'search engines other than SearXNG fail in the background' symptom.

brave_search already handles this correctly: it parses JSON in its own try
block and returns [] on json.JSONDecodeError. Mirror that in the other three
providers so a malformed provider response degrades to no-results instead of
propagating an exception.

Adds tests/test_search_provider_json.py: a non-JSON 200 body now yields [] for
tavily, serper, google_pse, and brave (the last guards the reference behaviour).

Co-authored-by: NubsCarson <nubs@nubs.site>
This commit is contained in:
Shaw
2026-06-03 01:24:23 -04:00
committed by GitHub
parent e678ff753f
commit 552bc15067
2 changed files with 77 additions and 3 deletions
+18 -3
View File
@@ -492,7 +492,6 @@ def google_pse_search(query: str, count: int = 10, time_filter: Optional[str] =
if response.status_code == 429:
raise RateLimitError("Google PSE rate limit hit")
response.raise_for_status()
data = response.json()
except httpx.RequestError as e:
error_logger.error(f"Google PSE search failed: {e}")
return []
@@ -500,6 +499,12 @@ def google_pse_search(query: str, count: int = 10, time_filter: Optional[str] =
error_logger.error(str(e))
return []
try:
data = response.json()
except json.JSONDecodeError as e:
error_logger.error(f"Google PSE returned invalid JSON: {e}")
return []
results = []
for item in data.get("items", [])[:count]:
url = item.get("link", "")
@@ -544,7 +549,6 @@ def tavily_search(query: str, count: int = 10, time_filter: Optional[str] = None
if response.status_code == 429:
raise RateLimitError("Tavily rate limit hit")
response.raise_for_status()
data = response.json()
except httpx.RequestError as e:
error_logger.error(f"Tavily search failed: {e}")
return []
@@ -552,6 +556,12 @@ def tavily_search(query: str, count: int = 10, time_filter: Optional[str] = None
error_logger.error(str(e))
return []
try:
data = response.json()
except json.JSONDecodeError as e:
error_logger.error(f"Tavily returned invalid JSON: {e}")
return []
results = []
for item in data.get("results", [])[:count]:
url = item.get("url", "")
@@ -599,7 +609,6 @@ def serper_search(query: str, count: int = 10, time_filter: Optional[str] = None
if response.status_code == 429:
raise RateLimitError("Serper rate limit hit")
response.raise_for_status()
data = response.json()
except httpx.RequestError as e:
error_logger.error(f"Serper search failed: {e}")
return []
@@ -607,6 +616,12 @@ def serper_search(query: str, count: int = 10, time_filter: Optional[str] = None
error_logger.error(str(e))
return []
try:
data = response.json()
except json.JSONDecodeError as e:
error_logger.error(f"Serper returned invalid JSON: {e}")
return []
results = []
for item in data.get("organic", [])[:count]:
url = item.get("link", "")