From 30a03db92302646bd6bf5782e862432d5fdaf93b Mon Sep 17 00:00:00 2001 From: Salastil <46979341+Salastil@users.noreply.github.com> Date: Sat, 22 Nov 2025 21:00:59 -0500 Subject: [PATCH] Follow nested m3u8 playlists in puppeteer runner --- internal/extractor.go | 59 +++++++++++++++++++++++++++++++++++++------ 1 file changed, 51 insertions(+), 8 deletions(-) diff --git a/internal/extractor.go b/internal/extractor.go index b20c4fb..91b931f 100644 --- a/internal/extractor.go +++ b/internal/extractor.go @@ -209,16 +209,59 @@ function installTouchAndWindowSpoofing(page) { }); let captured = null; + let resolveCapture; const capturePromise = new Promise(resolve => { - page.on('request', req => { - const url = req.url(); - if (!captured && url.includes('.m3u8')) { - const headers = req.headers(); - captured = { url, headers }; - console.log('[puppeteer] found .m3u8 request: ' + url); - resolve(); + resolveCapture = resolve; + }); + + function findNestedPlaylist(body, baseUrl) { + if (!body) return ''; + const lines = body.split(/\r?\n/); + for (const rawLine of lines) { + const line = (rawLine || '').trim(); + if (!line || line.startsWith('#')) continue; + if (line.toLowerCase().includes('.m3u8')) { + try { + return new URL(line, baseUrl).toString(); + } catch (_) { + return line; + } } - }); + } + return ''; + } + + async function handleM3U8Response(res) { + const url = res.url(); + const headers = res.request().headers(); + let body = ''; + try { + body = await res.text(); + } catch (err) { + console.log('[puppeteer] failed to read m3u8 body for ' + url + ': ' + err.message); + } + + const hasExtinf = body && body.includes('#EXTINF'); + const nested = findNestedPlaylist(body, url); + let finalUrl = url; + let reason = 'first seen'; + if (hasExtinf) { + reason = 'contains #EXTINF segments'; + } else if (nested) { + finalUrl = nested; + reason = 'nested m3u8 discovered in response body'; + } + + if (!captured || hasExtinf) { + captured = { url: finalUrl, headers, hasExtinf }; + console.log('[puppeteer] captured .m3u8 (' + reason + '): ' + finalUrl); + if (resolveCapture) resolveCapture(); + } + } + + page.on('response', res => { + if (!res.url().includes('.m3u8')) return; + handleM3U8Response(res); }); try {