#!/usr/bin/env python3
"""Browser-based probe of betterbrowsing.org via Playwright.

Companion to probe.sh. This script exercises every endpoint a real
browser would touch, plus the things curl can't reach:

- document.cookie visibility (HttpOnly cookies are stored but hidden from JS)
- All login form variants — automation-guard hooks fire on different
  shapes (canonical type=password, type=text masquerade, renamed fields,
  GET-form, JS-driven XHR, multi-step navigation)
- Form-sink submissions (signup, comment, listing, ad-click) via real
  browser navigation, so 302 redirects and post-submission state are
  observed end-to-end
- Engagement like via AJAX
- All diagnostic endpoints exercised via fetch() — captures what the
  intermediary does to browser-originated XHR (headers, cookies, IP)
- OSINT bait paths fetched via the page, to see whether the intermediary
  blocks scan-shaped XHR differently from page navigation
- Robots.txt-disallowed /private/* fetches
- Real browser fingerprint (Sec-CH-UA, Sec-Fetch-*, etc.) reported

Usage:
    pip install playwright && playwright install chromium
    python3 probe_browser.py                              # direct
    python3 probe_browser.py --proxy http://1.2.3.4:8080  # HTTP proxy
    python3 probe_browser.py --cdp wss://USER:PASS@h:9222 # CDP browser
    python3 probe_browser.py --headed                     # watch it
    python3 probe_browser.py --json                       # diff-friendly

Compare across runs:
    python3 probe_browser.py --json > direct.json
    python3 probe_browser.py --json --proxy http://... > proxied.json
    diff direct.json proxied.json

Exits non-zero if any probe fails. Info rows (fingerprint, robots,
ratecap percentage) don't count as failures.
"""
import argparse
import json
import sys
from contextlib import contextmanager

try:
    from playwright.sync_api import sync_playwright, TimeoutError as PWTimeoutError
except ImportError:
    sys.stderr.write("playwright not installed. Run: pip install playwright && playwright install chromium\n")
    sys.exit(2)


DEFAULT_BASE = "https://betterbrowsing.org"


# Login form variants. Each row: (path, email/username selector, password
# selector, expected post-submit URL pattern). All six should land on / with
# the session cookies set, if the intermediary doesn't intercept.
LOGIN_VARIANTS = [
    ("/login/post-form",    'input[name="email"]',           'input[name="password"]'),
    ("/login/post-text-pw", 'input[name="email"]',           'input[name="password"]'),  # password is type=text
    ("/login/post-renamed", 'input[name="user_credential"]', 'input[name="secret_key"]'),
    ("/login/get-form",     'input[name="email"]',           'input[name="password"]'),  # method=get
]

# Form sinks that navigate (browser follows the 302 from the server).
# Each row: (form page path, dict of field-name -> value, expected final URL path).
FORM_SINKS = [
    ("/signup",   {"username": "synth", "email": "s@example.invalid", "password": "x", "password_confirm": "x", "agree_terms": "on"}, "/"),
    ("/comment",  {"author": "anon", "body": "hello world"},  "/comment"),
    ("/listing",  {"title": "test listing", "category": "misc", "price": "10"}, "/listing"),
]

# OSINT bait paths — see what the intermediary does to a same-origin fetch
# of recon-shaped paths. Direct baseline: all 200.
OSINT_PATHS = ["/admin", "/wp-login.php", "/server-status", "/phpmyadmin", "/.env", "/.git/config"]

# Robots.txt-disallowed paths — direct baseline 200; through a robots-respecting
# intermediary the fetches get blocked.
PRIVATE_PATHS = ["/private", "/private/data.json", "/private/secret"]

# Diagnostic endpoints to hit via fetch. Each row: (label, fetch url, expected status).
DIAG_FETCHES = [
    ("echo headers",          "/api/echo/headers",                       200),
    ("echo ip",               "/api/echo/ip",                            200),
    ("echo user-agent",       "/api/echo/user-agent",                    200),
    ("echo get + query",      "/api/echo/get?foo=bar&n=42",              200),
    ("status 200",            "/api/status/200",                         200),
    ("status 418",            "/api/status/418",                         418),
    ("status 500",            "/api/status/500",                         500),
    ("status 599",            "/api/status/599",                         599),
    ("redirect 0",            "/api/redirect/0",                         200),
    ("delay 0",               "/api/delay/0",                            200),
    ("health",                "/api/health",                             200),
    ("fail",                  "/api/fail",                               500),
]


def main():
    parser = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter)
    parser.add_argument("--base", default=DEFAULT_BASE, help=f"Base URL (default {DEFAULT_BASE})")
    parser.add_argument("--proxy", help="HTTP/HTTPS proxy URL (e.g. http://user:pass@host:port)")
    parser.add_argument("--cdp", help="CDP wss:// URL — connect to a remote browser instead of launching locally")
    parser.add_argument("--cacert", help="Vendor MITM root CA path. When the proxy re-signs TLS, presence of this flag enables ignore_https_errors on the browser context so the synthetic chain is accepted.")
    parser.add_argument("--headed", action="store_true", help="Show the browser (default headless)")
    parser.add_argument("--json", action="store_true", help="Emit results as JSON to stdout")
    args = parser.parse_args()

    base = args.base.rstrip("/")
    results = []

    def record(name, status, detail=""):
        results.append({"name": name, "status": status, "detail": detail})
        if not args.json:
            marker = {"pass": " ok ", "fail": "FAIL", "info": "info"}.get(status, " ?? ")
            line = f"  [{marker}] {name}"
            if detail:
                line += f" — {detail}"
            print(line)

    def section(title):
        if not args.json:
            print(f"\n## {title}")

    def password_block_detail(page):
        """When page.fill() on a password input times out, some intermediaries
        have swallowed an underlying 'forbidden' / 'blocked' message into a
        generic 30s Playwright timeout. A direct value-assignment via
        evaluate often raises that message explicitly. Returns a suffix to
        append to the detail field, or '' if no password input is present /
        no extra signal was captured."""
        try:
            page.evaluate("""
                const p = document.querySelector('input[type="password"]');
                if (p) { p.value = 'probe-pw'; }
            """)
        except Exception as e:
            return f" — {str(e)[:150]}"
        return ""

    if not args.json:
        mode = f"CDP {args.cdp}" if args.cdp else (f"HTTP proxy {args.proxy}" if args.proxy else "direct")
        print(f"## Browser probe of {base}")
        print(f"## Mode: {mode}")

    with sync_playwright() as p:
        # Local Chromium: launch once, reuse the context across tests via
        # fresh pages. CDP mode: some remote browser endpoints leak
        # session state across navigations — a single failed page.fill()
        # on a password input can pollute the session so every subsequent
        # page.goto() also times out, multiplying one real intercept into
        # many cascading failures. Open a fresh CDP connection per test
        # to isolate. ~1s/test overhead in CDP mode.
        shared_browser = shared_ctx = None
        if not args.cdp:
            launch_kwargs = {"headless": not args.headed}
            if args.proxy:
                launch_kwargs["proxy"] = {"server": args.proxy}
            shared_browser = p.chromium.launch(**launch_kwargs)
            context_kwargs = {}
            if args.cacert:
                context_kwargs["ignore_https_errors"] = True
            shared_ctx = shared_browser.new_context(**context_kwargs)

        @contextmanager
        def test_session():
            if args.cdp:
                browser = p.chromium.connect_over_cdp(args.cdp)
                ctx = browser.contexts[0] if browser.contexts else browser.new_context()
                page = ctx.new_page()
                try:
                    yield ctx, page
                finally:
                    browser.close()
            else:
                page = shared_ctx.new_page()
                try:
                    yield shared_ctx, page
                finally:
                    page.close()

        # =========================================================
        # 1. Cookies / SameSite via document.cookie
        # =========================================================
        section("Cookies (client-side via document.cookie)")

        with test_session() as (ctx, page):
            try:
                page.goto(f"{base}/cookies", timeout=20000)
                record("/cookies loads", "pass" if "Cookie tests" in page.content() else "fail")
            except Exception as e:
                record("/cookies loads", "fail", repr(e)[:120])

        with test_session() as (ctx, page):
            try:
                page.goto(f"{base}/cookies", timeout=20000)
                page.evaluate("setCookies({probe:'hello', visitor:'anon'}, 'Lax'); refresh();")
                shown = page.text_content("#doc-cookie") or ""
                record("plain cookie via JS visible to document.cookie",
                       "pass" if "probe=hello" in shown and "visitor=anon" in shown else "fail",
                       shown[:80])
            except Exception as e:
                record("plain cookie via JS visible to document.cookie", "fail", repr(e)[:120])

        with test_session() as (ctx, page):
            try:
                page.goto(f"{base}/cookies", timeout=20000)
                page.evaluate("setCookies({strict_c:'yes'}, 'Strict'); refresh();")
                jar = {c["name"]: c for c in ctx.cookies()}
                ss_ok = jar.get("strict_c", {}).get("sameSite") == "Strict"
                record("SameSite=Strict attribute reaches the jar", "pass" if ss_ok else "fail",
                       f"strict_c.sameSite={jar.get('strict_c', {}).get('sameSite')}")
            except Exception as e:
                record("SameSite=Strict attribute reaches the jar", "fail", repr(e)[:120])

        with test_session() as (ctx, page):
            try:
                page.goto(f"{base}/cookies", timeout=20000)
                page.evaluate("setCookies({probe:'hello'}, 'Lax'); refresh();")
                page.evaluate("clearAll(); refresh();")
                empty = page.text_content("#doc-cookie") or ""
                record("clearAll empties document.cookie", "pass" if "probe=" not in empty else "fail")
            except Exception as e:
                record("clearAll empties document.cookie", "fail", repr(e)[:120])

        # =========================================================
        # 2. Login variants — every form shape submitted via the real browser
        # =========================================================
        section("Login variants (form fill + submit through the browser)")
        for path, email_sel, pw_sel in LOGIN_VARIANTS:
            label = f"variant {path}: form -> 302 -> /"
            step = "init"
            with test_session() as (ctx, page):
                try:
                    step = "goto"
                    page.goto(f"{base}{path}", timeout=20000)
                    step = "fill_email"
                    page.fill(email_sel, "variant@example.invalid", timeout=8000)
                    step = "fill_password"
                    page.fill(pw_sel, "synthetic-pw", timeout=8000)
                    step = "click_submit"
                    page.click('button[type="submit"]', timeout=8000)
                    step = "wait_for_redirect"
                    page.wait_for_url(f"{base}/", timeout=15000)
                    step = "verify_cookies"
                    cookies = {c["name"]: c for c in ctx.cookies()}
                    ok = "__session" in cookies and "bb_session_js" in cookies
                    record(label,
                           "pass" if ok else "fail",
                           f"cookies={[k for k in cookies.keys() if k.startswith(('__session','bb_session'))]}")
                except PWTimeoutError:
                    record(label, "fail", f"timeout at step={step}{password_block_detail(page)}")
                except Exception as e:
                    record(label, "fail", f"step={step}: {repr(e)[:100]}")

        # =========================================================
        # 2b. DOM-mutation bypasses for the password-typing block
        # =========================================================
        # If an intermediary forbids typing into <input type=password>, can
        # an attacker mutate type -> 'text' before filling? Four mutation
        # styles; each is its own probe. "pass" here = the bypass worked =
        # the intermediary failed to block (a finding against the
        # intermediary's protection). "fail" = the intermediary blocked the
        # bypass attempt = protection held.
        section("Password-type DOM mutations + fill + submit")
        MUTATIONS = [
            ("type-property-setter", "document.querySelector('input[name=password]').type='text'"),
            ("setAttribute('type','text')",
             "document.querySelector('input[name=password]').setAttribute('type','text')"),
            ("removeAttribute('type')",
             "document.querySelector('input[name=password]').removeAttribute('type')"),
            ("replace-element-with-text-input", '''
             const old = document.querySelector('input[name=password]');
             const nu = document.createElement('input');
             nu.type = 'text'; nu.name = 'password'; nu.id = 'password';
             old.parentNode.replaceChild(nu, old);
             '''),
        ]
        for mut_label, mutation_js in MUTATIONS:
            label = f"mutation '{mut_label}': flip -> fill -> 302"
            step = "init"
            with test_session() as (ctx, page):
                try:
                    step = "goto"
                    page.goto(f"{base}/login/post-form", timeout=20000)
                    step = "mutation"
                    try:
                        page.evaluate(mutation_js)
                    except Exception as me:
                        record(label, "fail", f"step=mutation: {repr(me)[:160]}")
                        continue
                    step = "verify_type"
                    new_type = page.evaluate("document.querySelector('input[name=password]').type")
                    if new_type != 'text':
                        record(label, "fail", f"flip did not take; type still {new_type!r}")
                        continue
                    step = "fill_email"
                    page.fill('input[name="email"]', "synth@example.invalid", timeout=5000)
                    step = "fill_password"
                    page.fill('input[name="password"]', "synthetic-pw", timeout=5000)
                    step = "click_submit"
                    page.click('button[type="submit"]', timeout=5000)
                    step = "wait_for_redirect"
                    page.wait_for_url(f"{base}/", timeout=15000)
                    record(label, "pass", "BYPASS — intermediary did not block at any step")
                except PWTimeoutError:
                    # Surface the BD-style "Forbidden action" error if it's actually
                    # there, by re-running the failing JS to capture the real msg.
                    bd_detail = ""
                    try:
                        page.evaluate("document.querySelector('input[type=password]')?.setAttribute('type','text')")
                    except Exception as me:
                        bd_detail = f" — {str(me)[:140]}"
                    record(label, "fail", f"timeout at step={step}{bd_detail}")
                except Exception as e:
                    record(label, "fail", f"step={step}: {repr(e)[:140]}")

        # =========================================================
        # 2c. Submit-trigger bypasses
        # =========================================================
        # Same logic as 2b but applied to the submit step. If an intermediary
        # hooks page.click() / submit-button click events to block credential
        # submission, equivalent submission APIs may slip past: form.submit()
        # skips the submit event entirely, form.requestSubmit() is the newer
        # API, synthetic dispatchEvent forges the click, and a raw fetch(POST)
        # bypasses the form DOM entirely. Each is its own probe.
        #
        # Uses /login/post-text-pw so any password-typing block doesn't gate
        # this — we're isolating the submit-trigger surface, not the typing
        # surface.
        section("Submit trigger via alternative APIs")
        SUBMIT_TRIGGERS = [
            ("button.click()",
             "document.querySelector('button[type=submit]').click()"),
            ("form.submit() (no submit event)",
             "document.querySelector('form').submit()"),
            ("form.requestSubmit()",
             "document.querySelector('form').requestSubmit()"),
            ("dispatch synthetic click MouseEvent",
             "document.querySelector('button[type=submit]').dispatchEvent(new MouseEvent('click', {bubbles:true, cancelable:true}))"),
        ]
        for trig_label, trig_js in SUBMIT_TRIGGERS:
            label = f"submit via {trig_label}: form -> 302"
            step = "init"
            with test_session() as (ctx, page):
                try:
                    step = "goto"
                    page.goto(f"{base}/login/post-text-pw", timeout=20000)
                    step = "fill_email"
                    page.fill('input[name="email"]', "synth@example.invalid", timeout=8000)
                    step = "fill_password"
                    page.fill('input[name="password"]', "synthetic-pw", timeout=8000)
                    step = "trigger_submit"
                    page.evaluate(trig_js)
                    step = "wait_for_redirect"
                    page.wait_for_url(f"{base}/", timeout=15000)
                    record(label, "pass", "BYPASS — intermediary did not block submission")
                except PWTimeoutError:
                    record(label, "fail", f"timeout at step={step}")
                except Exception as e:
                    record(label, "fail", f"step={step}: {repr(e)[:140]}")

        # Form-bypass: skip every form-related DOM API and just POST via fetch
        # using URLSearchParams (matches the form's default urlencoded encoding).
        # An intermediary that hooks DOM submit / click events but not fetch()
        # POSTs to login endpoints will let this through.
        label = "submit via fetch(POST URLSearchParams): -> 200 (followed)"
        step = "init"
        with test_session() as (ctx, page):
            try:
                step = "goto"
                page.goto(f"{base}/login/post-text-pw", timeout=20000)
                step = "fill_email"
                page.fill('input[name="email"]', "synth@example.invalid", timeout=8000)
                step = "fill_password"
                page.fill('input[name="password"]', "synthetic-pw", timeout=8000)
                step = "fetch_post"
                result = page.evaluate("""
                    async () => {
                        const f = document.querySelector('form');
                        const params = new URLSearchParams();
                        for (const [k, v] of new FormData(f)) params.append(k, v);
                        const r = await fetch(f.action, {
                            method: 'POST',
                            headers: {'Content-Type': 'application/x-www-form-urlencoded'},
                            body: params.toString(),
                            redirect: 'follow',
                        });
                        return { status: r.status, finalUrl: r.url };
                    }
                """)
                # Follow-redirect lands on /; final status 200, final URL is base/.
                ok = result["status"] == 200 and result["finalUrl"].rstrip("/") == base
                record(label,
                       "pass" if ok else "fail",
                       f"BYPASS — status={result['status']}, final={result['finalUrl']}" if ok
                       else f"status={result['status']}, final={result['finalUrl']}")
            except PWTimeoutError:
                record(label, "fail", f"timeout at step={step}")
            except Exception as e:
                record(label, "fail", f"step={step}: {repr(e)[:140]}")

        # Content-type variants: intermediaries that detect credential POSTs
        # by parsing 'application/x-www-form-urlencoded' bodies may miss the
        # same payload in multipart/form-data or application/json. Probe both.
        ENCODING_VARIANTS = [
            ("fetch(POST FormData) multipart", """
                async () => {
                    const f = document.querySelector('form');
                    const fd = new FormData(f);  // fetch + FormData -> multipart/form-data
                    const r = await fetch(f.action, {method:'POST', body:fd, redirect:'follow'});
                    return { status: r.status, finalUrl: r.url };
                }
            """),
            ("fetch(POST JSON body)", """
                async () => {
                    const f = document.querySelector('form');
                    const obj = {};
                    for (const [k, v] of new FormData(f)) obj[k] = v;
                    const r = await fetch(f.action, {
                        method:'POST',
                        headers:{'Content-Type':'application/json'},
                        body: JSON.stringify(obj),
                        redirect:'follow',
                    });
                    return { status: r.status, finalUrl: r.url };
                }
            """),
        ]
        for enc_label, enc_js in ENCODING_VARIANTS:
            label = f"submit via {enc_label}: -> 200 (followed)"
            step = "init"
            with test_session() as (ctx, page):
                try:
                    step = "goto"
                    page.goto(f"{base}/login/post-text-pw", timeout=20000)
                    step = "fill_email"
                    page.fill('input[name="email"]', "synth@example.invalid", timeout=8000)
                    step = "fill_password"
                    page.fill('input[name="password"]', "synthetic-pw", timeout=8000)
                    step = "fetch_post"
                    result = page.evaluate(enc_js)
                    ok = result["status"] == 200 and result["finalUrl"].rstrip("/") == base
                    record(label,
                           "pass" if ok else "fail",
                           f"BYPASS — status={result['status']}" if ok
                           else f"status={result['status']}, final={result['finalUrl']}")
                except Exception as e:
                    record(label, "fail", f"step={step}: {repr(e)[:140]}")

        # =========================================================
        # 2d. Cross-realm (iframe.contentWindow) bypasses
        # =========================================================
        # Meta-bypass: intermediary hooks usually only patch the top-frame
        # realm's globals/prototypes (via evaluateOnNewDocument / CDP
        # injection). A same-origin <iframe> has its own pristine realm —
        # its HTMLInputElement.prototype setters, HTMLFormElement.submit,
        # fetch, etc. are unhooked. Calling those on parent-realm objects
        # (setter.call(parentEl, ...) or submit.call(parentForm)) executes
        # the unhooked impl. References:
        #   https://canalun.company/posts/domdomtimes_iframe_is_not_secure_escape_hatch_en
        #   https://github.com/berstend/puppeteer-extra/issues/60
        section("Cross-realm iframe bypasses (meta-bypass for top-frame DOM hooks)")

        # 2d.1: type-attribute flip via iframe-realm setter (vs 2b's
        # top-realm setter row, which BD blocks). If the intermediary only
        # patched the top realm, this slips through.
        label = "iframe-realm type setter (flip type=text) -> fill -> 302"
        step = "init"
        with test_session() as (ctx, page):
            try:
                step = "goto"
                page.goto(f"{base}/login/post-form", timeout=20000)
                step = "iframe_flip_type"
                new_type = page.evaluate("""
                    () => {
                        const f = document.createElement('iframe');
                        f.style.display = 'none';
                        document.body.appendChild(f);
                        const setter = Object.getOwnPropertyDescriptor(
                            f.contentWindow.HTMLInputElement.prototype, 'type').set;
                        const el = document.querySelector('input[name=password]');
                        setter.call(el, 'text');
                        f.remove();
                        return el.type;
                    }
                """)
                if new_type != 'text':
                    record(label, "fail", f"flip did not take; type still {new_type!r}")
                else:
                    step = "fill_email"
                    page.fill('input[name="email"]', "synth@example.invalid", timeout=5000)
                    step = "fill_password"
                    page.fill('input[name="password"]', "synthetic-pw", timeout=5000)
                    step = "click_submit"
                    page.click('button[type="submit"]', timeout=5000)
                    step = "wait_for_redirect"
                    page.wait_for_url(f"{base}/", timeout=15000)
                    record(label, "pass", "BYPASS — iframe-realm setter unhooked")
            except PWTimeoutError:
                record(label, "fail", f"timeout at step={step}")
            except Exception as e:
                record(label, "fail", f"step={step}: {repr(e)[:140]}")

        # 2d.2: input value set entirely via iframe-realm setter — no
        # page.fill() / typing at all. Bypasses any hook on value setting,
        # including CDP Input.insertText interception. Uses post-text-pw so
        # any password-type hook is also off the table.
        label = "iframe-realm value setter (no typing) -> form.submit -> 302"
        step = "init"
        with test_session() as (ctx, page):
            try:
                step = "goto"
                page.goto(f"{base}/login/post-text-pw", timeout=20000)
                step = "iframe_set_values_and_submit"
                page.evaluate("""
                    () => {
                        const f = document.createElement('iframe');
                        f.style.display = 'none';
                        document.body.appendChild(f);
                        const valSet = Object.getOwnPropertyDescriptor(
                            f.contentWindow.HTMLInputElement.prototype, 'value').set;
                        valSet.call(document.querySelector('input[name=email]'),
                                    'synth@example.invalid');
                        valSet.call(document.querySelector('input[name=password]'),
                                    'synthetic-pw');
                        // Use top-realm form.submit here — purpose of this probe
                        // is the value-set bypass; submit-trigger bypass is 2d.3.
                        document.querySelector('form').submit();
                        f.remove();
                    }
                """)
                step = "wait_for_redirect"
                page.wait_for_url(f"{base}/", timeout=15000)
                record(label, "pass", "BYPASS — iframe-realm value setter unhooked")
            except PWTimeoutError:
                record(label, "fail", f"timeout at step={step}")
            except Exception as e:
                record(label, "fail", f"step={step}: {repr(e)[:140]}")

        # 2d.3: form submission via iframe-realm HTMLFormElement.prototype.submit
        # called on the parent form. Distinct hook surface from top-realm
        # form.submit (which 2c covers).
        label = "iframe-realm form.submit on parent form -> 302"
        step = "init"
        with test_session() as (ctx, page):
            try:
                step = "goto"
                page.goto(f"{base}/login/post-text-pw", timeout=20000)
                step = "fill_email"
                page.fill('input[name="email"]', "synth@example.invalid", timeout=8000)
                step = "fill_password"
                page.fill('input[name="password"]', "synthetic-pw", timeout=8000)
                step = "iframe_submit"
                page.evaluate("""
                    () => {
                        const f = document.createElement('iframe');
                        f.style.display = 'none';
                        document.body.appendChild(f);
                        const cleanSubmit = f.contentWindow.HTMLFormElement.prototype.submit;
                        cleanSubmit.call(document.querySelector('form'));
                        f.remove();
                    }
                """)
                step = "wait_for_redirect"
                page.wait_for_url(f"{base}/", timeout=15000)
                record(label, "pass", "BYPASS — iframe-realm form.submit unhooked")
            except PWTimeoutError:
                record(label, "fail", f"timeout at step={step}")
            except Exception as e:
                record(label, "fail", f"step={step}: {repr(e)[:140]}")

        # HttpOnly invisibility — needs cookies from a successful login. Do
        # the login inline (multi-turn test) using the type=text password
        # variant, which is the variant most likely to clear intermediaries
        # that block value-assignment to type=password inputs.
        label = "HttpOnly __session hidden from document.cookie"
        step = "init"
        with test_session() as (ctx, page):
            try:
                step = "goto"
                page.goto(f"{base}/login/post-text-pw", timeout=20000)
                step = "fill_email"
                page.fill('input[name="email"]', "ho@example.invalid", timeout=8000)
                step = "fill_password"
                page.fill('input[name="password"]', "synthetic-pw", timeout=8000)
                step = "click_submit"
                page.click('button[type="submit"]', timeout=8000)
                step = "wait_for_redirect"
                page.wait_for_url(f"{base}/", timeout=15000)
                step = "verify"
                cookies = {c["name"]: c for c in ctx.cookies()}
                doc_cookie = page.evaluate("document.cookie")
                ho_invisible = (
                    cookies.get("__session", {}).get("httpOnly") is True
                    and "__session=" not in doc_cookie
                    and "bb_session_js=" in doc_cookie
                )
                record(label,
                       "pass" if ho_invisible else "fail",
                       doc_cookie[:80])
            except PWTimeoutError:
                record(label, "fail", f"timeout at step={step}{password_block_detail(page)}")
            except Exception as e:
                record(label, "fail", f"step={step}: {repr(e)[:120]}")

        # XHR login: fetch({redirect: manual}) observes the 302 directly
        label = "xhr login fetch sees 302"
        step = "init"
        with test_session() as (ctx, page):
            try:
                step = "goto"
                page.goto(f"{base}/login/xhr", timeout=20000)
                step = "fill_email"
                page.fill("#x-email", "xhr@example.invalid", timeout=8000)
                step = "fill_pw"
                page.fill("#x-pw", "synthetic-pw", timeout=8000)
                step = "click+wait_response"
                with page.expect_response(
                    lambda r: r.url.endswith("/login/post") and r.request.method == "POST",
                    timeout=15000,
                ) as info:
                    page.click("#x-submit", timeout=8000)
                resp = info.value
                record(label, "pass" if resp.status == 302 else "fail", f"status={resp.status}")
            except PWTimeoutError:
                record(label, "fail", f"timeout at step={step}{password_block_detail(page)}")
            except Exception as e:
                record(label, "fail", f"step={step}: {repr(e)[:100]}")

        # Multi-step nav: email page -> password page
        label = "multistep step1 -> step2"
        step = "init"
        with test_session() as (ctx, page):
            try:
                step = "goto"
                page.goto(f"{base}/login/multistep", timeout=20000)
                step = "fill_email"
                page.fill('input[name="email"]', "step1@example.invalid", timeout=8000)
                step = "click_submit"
                page.click('button[type="submit"]', timeout=8000)
                step = "wait_for_step2_url"
                page.wait_for_url("**/login/multistep-step2*", timeout=15000)
                step = "locate_password_input"
                has_pw = page.locator('input[type="password"]').count() == 1
                record(label, "pass" if has_pw else "fail")
            except PWTimeoutError:
                record(label, "fail", f"timeout at step={step}")
            except Exception as e:
                record(label, "fail", f"step={step}: {repr(e)[:100]}")

        # =========================================================
        # 3. Other form sinks (signup, comment, listing) — browser navigation
        # =========================================================
        section("Form sinks (browser navigation, 302 follow)")
        for form_path, fields, expected_path in FORM_SINKS:
            label = f"{form_path}: form -> 302 -> {expected_path}"
            step = "init"
            with test_session() as (ctx, page):
                try:
                    step = "goto"
                    page.goto(f"{base}{form_path}", timeout=20000)
                    for field_name, value in fields.items():
                        step = f"fill_{field_name}"
                        # Use [name=...] selector; handle checkbox separately.
                        el = page.locator(f'[name="{field_name}"]').first
                        tag_type = el.evaluate("e => (e.type || e.tagName).toLowerCase()")
                        if tag_type == "checkbox":
                            el.check(timeout=8000)
                        else:
                            el.fill(str(value), timeout=8000)
                    step = "click_submit"
                    page.click('button[type="submit"]', timeout=8000)
                    step = "wait_for_redirect"
                    page.wait_for_url(f"{base}{expected_path}", timeout=15000)
                    record(label, "pass")
                except PWTimeoutError:
                    record(label, "fail", f"timeout at step={step}{password_block_detail(page)}")
                except Exception as e:
                    record(label, "fail", f"step={step}: {repr(e)[:100]}")

        # Ad-click — same-origin navigation to /ad/click (a GET that 302s to /)
        label = "/ad/click navigation -> 302 -> /"
        step = "init"
        with test_session() as (ctx, page):
            try:
                step = "goto"
                page.goto(f"{base}/ad", timeout=20000)
                step = "click_ad"
                page.click('#ad-click', timeout=8000)
                step = "wait_for_redirect"
                page.wait_for_url(f"{base}/", timeout=15000)
                record(label, "pass")
            except PWTimeoutError:
                record(label, "fail", f"timeout at step={step}")
            except Exception as e:
                record(label, "fail", f"step={step}: {repr(e)[:100]}")

        # Engagement like (AJAX) via the page's button
        label = "/engagement/like AJAX -> {liked:true}"
        step = "init"
        with test_session() as (ctx, page):
            try:
                step = "goto"
                page.goto(f"{base}/engagement", timeout=20000)
                step = "click_like"
                with page.expect_response(
                    lambda r: r.url.endswith("/engagement/like"), timeout=15000,
                ) as info:
                    page.click("#like-btn", timeout=8000)
                    step = "expect_response"
                r = info.value
                body = r.json() if r.headers.get("content-type", "").startswith("application/json") else {}
                ok = r.status == 200 and body.get("liked") is True
                record(label, "pass" if ok else "fail", f"status={r.status}, body={body}")
            except PWTimeoutError:
                record(label, "fail", f"timeout at step={step}")
            except Exception as e:
                record(label, "fail", f"step={step}: {repr(e)[:100]}")

        # =========================================================
        # 4. Diagnostic endpoints via browser fetch()
        # =========================================================
        section("Diagnostic endpoints via browser fetch()")
        for diag_label, url, expected_status in DIAG_FETCHES:
            with test_session() as (ctx, page):
                try:
                    page.goto(f"{base}/", timeout=20000)  # same-origin for fetch
                    got = page.evaluate(f"""
                        async () => {{
                            const r = await fetch('{url}', {{ redirect: 'manual' }});
                            return r.status;
                        }}
                    """)
                    record(f"fetch {diag_label}", "pass" if got == expected_status else "fail",
                           f"got {got}, expected {expected_status}")
                except Exception as e:
                    record(f"fetch {diag_label}", "fail", repr(e)[:120])

        # Redirect chain via fetch (follow mode) — should end at 200 after 3 hops
        with test_session() as (ctx, page):
            try:
                page.goto(f"{base}/", timeout=20000)
                r = page.evaluate("""
                    async () => {
                        const resp = await fetch('/api/redirect/3');
                        return { status: resp.status, body: await resp.text() };
                    }
                """)
                ok = r["status"] == 200 and "done" in r["body"]
                record("fetch /api/redirect/3 (follow)", "pass" if ok else "fail", f"status={r['status']}")
            except Exception as e:
                record("fetch /api/redirect/3 (follow)", "fail", repr(e)[:120])

        # =========================================================
        # 5. Volumetric / ratecap from a real browser fetch
        # =========================================================
        section("Volumetric (real 429s)")
        with test_session() as (ctx, page):
            try:
                page.goto(f"{base}/", timeout=20000)
                res = page.evaluate("""
                    async () => {
                        const r = await fetch('/api/ratecap?fail_pct=100');
                        return { status: r.status, retryAfter: r.headers.get('retry-after') };
                    }
                """)
                ok = res["status"] == 429 and res["retryAfter"] == "30"
                record("ratecap fail_pct=100 -> 429 + Retry-After",
                       "pass" if ok else "fail",
                       f"status={res['status']}, retry-after={res['retryAfter']}")
            except Exception as e:
                record("ratecap fail_pct=100 -> 429 + Retry-After", "fail", repr(e)[:120])

        with test_session() as (ctx, page):
            try:
                page.goto(f"{base}/", timeout=20000)
                samples = page.evaluate("""
                    async () => {
                        const out = [];
                        for (let i = 0; i < 20; i++) {
                            const r = await fetch('/api/ratecap?fail_pct=50');
                            out.push(r.status);
                        }
                        return out;
                    }
                """)
                mix = set(samples)
                record("ratecap fail_pct=50 mixes 200/429",
                       "pass" if mix == {200, 429} else "info",
                       f"sample={samples}")
            except Exception as e:
                record("ratecap fail_pct=50 mixes 200/429", "fail", repr(e)[:120])

        # =========================================================
        # 6. OSINT bait via browser fetch
        # =========================================================
        section("OSINT bait via browser fetch (direct: all 200)")
        for osint_path in OSINT_PATHS:
            with test_session() as (ctx, page):
                try:
                    page.goto(f"{base}/", timeout=20000)
                    status = page.evaluate(f"""
                        async () => {{
                            try {{
                                const r = await fetch({json.dumps(osint_path)}, {{ redirect: 'manual' }});
                                return r.status;
                            }} catch (e) {{
                                return 'ERR:' + e.message;
                            }}
                        }}
                    """)
                    ok = status == 200
                    record(f"fetch {osint_path}",
                           "pass" if ok else "info" if isinstance(status, int) else "fail",
                           f"status={status}")
                except Exception as e:
                    record(f"fetch {osint_path}", "fail", repr(e)[:120])

        # =========================================================
        # 7. Robots.txt-disallowed /private/*
        # =========================================================
        section("robots.txt enforcement (direct: 200; through robots-respecting intermediary: blocked)")
        for private_path in PRIVATE_PATHS:
            with test_session() as (ctx, page):
                try:
                    page.goto(f"{base}/", timeout=20000)
                    status = page.evaluate(f"""
                        async () => {{
                            try {{
                                const resp = await fetch({json.dumps(private_path)}, {{ redirect: 'manual' }});
                                return resp.status;
                            }} catch (e) {{
                                return 'ERR:' + e.message;
                            }}
                        }}
                    """)
                    if status == 200:
                        record(f"fetch {private_path}", "pass", "200 (direct baseline)")
                    else:
                        record(f"fetch {private_path}", "info", f"{status} (intermediary may be enforcing robots.txt)")
                except Exception as e:
                    record(f"fetch {private_path}", "fail", repr(e)[:120])

        # =========================================================
        # 7b. Path-fetch via alternative request APIs
        # =========================================================
        # A path-blocking intermediary (robots-respecting, OSINT-shape
        # detector) most likely hooks fetch() and main-frame navigation.
        # Less-obvious request issuers may slip through. For each target,
        # probe the same path via three alternative APIs:
        #   - XMLHttpRequest   (older XHR; separate hook surface from fetch)
        #   - sendBeacon       (POST-only fire-and-forget; often unmonitored)
        #   - dynamic import() (module loader; issues a network request even
        #                       when the response isn't valid JS)
        #
        # "pass" = request reached the server (intermediary did not block at
        # the API layer). The server's eventual HTTP status doesn't matter
        # here — we're mapping which request-issuing APIs are instrumented.
        # On direct baseline all six should pass.
        section("Path access via alternative request APIs (direct: all reach server)")
        PATH_BYPASS_TARGETS = [
            "/private/data.json",  # robots.txt-disallowed
            "/admin",              # OSINT-shaped recon target
        ]
        PATH_BYPASS_APIS = [
            ("XMLHttpRequest GET", """
                (url) => new Promise(resolve => {
                    const x = new XMLHttpRequest();
                    try { x.open('GET', url, true); } catch(e) { return resolve('open-threw:' + e.message); }
                    x.timeout = 5000;
                    x.onload = () => resolve('status=' + x.status);
                    x.onerror = () => resolve('onerror');
                    x.ontimeout = () => resolve('ontimeout');
                    try { x.send(); } catch(e) { resolve('send-threw:' + e.message); }
                })
            """),
            ("navigator.sendBeacon (POST)", """
                (url) => {
                    try {
                        return navigator.sendBeacon(url, 'probe') ? 'queued' : 'rejected';
                    } catch(e) { return 'threw:' + e.message; }
                }
            """),
            ("dynamic import()", """
                async (url) => {
                    try {
                        await import(url);
                        return 'imported';
                    } catch(e) {
                        // MIME / module-syntax errors are expected when the
                        // response isn't valid JS — the request still went out,
                        // which is what we're measuring. NetworkError /
                        // intermediary-blocked errors look different.
                        const m = (e.message || String(e)).toLowerCase();
                        if (/mime|module|syntax|unexpected token|failed to parse/i.test(m))
                            return 'request-issued (parse/MIME error)';
                        return 'error:' + (e.message || String(e)).slice(0, 100);
                    }
                }
            """),
        ]
        for target in PATH_BYPASS_TARGETS:
            for api_label, api_js in PATH_BYPASS_APIS:
                label = f"{target} via {api_label}"
                with test_session() as (ctx, page):
                    try:
                        page.goto(f"{base}/", timeout=20000)
                        result = page.evaluate(f"({api_js})({json.dumps(target)})")
                        # "request reached server" markers: status=, queued,
                        # imported, request-issued. Anything else (onerror,
                        # rejected, threw, error:, ontimeout) suggests the
                        # intermediary blocked at the API layer.
                        s = str(result)
                        reached = s.startswith(("status=", "queued", "imported", "request-issued"))
                        record(label,
                               "pass" if reached else "info",
                               f"result={result}")
                    except Exception as e:
                        record(label, "fail", repr(e)[:140])

        # =========================================================
        # 7c. Cross-realm path access (iframe + Worker)
        # =========================================================
        # Same iframe-realm meta-bypass as 2d, applied to path-blocking:
        #   - iframe.contentWindow.fetch  — top-frame fetch hook doesn't catch
        #   - new Worker(blob)            — WorkerGlobalScope is a separate
        #                                   realm; CDP page-injection scripts
        #                                   don't auto-propagate to workers
        #                                   (requires Target.setAutoAttach +
        #                                   worker-side injection, often missed)
        section("Cross-realm path access (iframe + Worker)")
        CROSS_REALM_FETCHERS = [
            ("iframe.contentWindow.fetch", """
                async (url) => {
                    const f = document.createElement('iframe');
                    f.style.display = 'none';
                    document.body.appendChild(f);
                    try {
                        const r = await f.contentWindow.fetch(url, {redirect:'manual'});
                        return 'status=' + r.status;
                    } catch (e) {
                        return 'error:' + (e.message || String(e)).slice(0,100);
                    } finally {
                        f.remove();
                    }
                }
            """),
            ("Worker (Blob) fetch", """
                (url) => new Promise((resolve) => {
                    // Blob-URL Workers have no HTTP base, so relative URLs
                    // fail to parse inside the worker. Absolutize first.
                    const absUrl = new URL(url, location.href).href;
                    const js = `
                        fetch(${JSON.stringify(absUrl)}, {redirect:'manual'})
                            .then(r => postMessage('status=' + r.status))
                            .catch(e => postMessage('error:' + (e.message || String(e)).slice(0,100)));
                    `;
                    const blob = new Blob([js], {type:'application/javascript'});
                    const w = new Worker(URL.createObjectURL(blob));
                    const timer = setTimeout(() => { try{w.terminate()}catch(_){}; resolve('timeout'); }, 8000);
                    w.onmessage = e => { clearTimeout(timer); w.terminate(); resolve(e.data); };
                    w.onerror = e => { clearTimeout(timer); w.terminate(); resolve('worker-error:' + (e.message || 'unknown')); };
                })
            """),
        ]
        for target in PATH_BYPASS_TARGETS:
            for realm_label, realm_js in CROSS_REALM_FETCHERS:
                label = f"{target} via {realm_label}"
                with test_session() as (ctx, page):
                    try:
                        page.goto(f"{base}/", timeout=20000)
                        result = page.evaluate(f"({realm_js})({json.dumps(target)})")
                        reached = str(result).startswith("status=")
                        record(label, "pass" if reached else "info", f"result={result}")
                    except Exception as e:
                        record(label, "fail", repr(e)[:140])

        # =========================================================
        # 7d. URL obfuscation against static path matchers
        # =========================================================
        # If an intermediary string-matches '/admin' (or '/private/') in the
        # URL argument passed to fetch/XHR, simple obfuscations that the
        # browser/server canonicalize transparently may bypass the match.
        # Direct baseline reaches the server for all variants below (200,
        # 301, or even 404 — anything that returns an int status counts as
        # "JS-level filter did not block"). References:
        #   https://portswigger.net/research/introducing-the-url-validation-bypass-cheat-sheet
        section("URL obfuscation against static path filters (target: /admin)")
        OBFUSCATIONS = [
            ("/%61dmin",   "percent-encoded 'a' (server decodes; matcher sees raw string)"),
            ("/Admin",     "case variant (case-sensitive matcher misses)"),
            ("/admin/",    "trailing slash"),
            ("/admin/./",  "embedded dot-segment (browser normalizes to /admin/)"),
        ]
        for url, why in OBFUSCATIONS:
            label = f"fetch({url!r})  [{why}]"
            with test_session() as (ctx, page):
                try:
                    page.goto(f"{base}/", timeout=20000)
                    status = page.evaluate(f"""
                        async () => {{
                            try {{
                                const r = await fetch({json.dumps(url)}, {{redirect:'manual'}});
                                // status=0 with redirect:manual means the server
                                // returned a 3xx (opaque redirect) — request did
                                // reach the server. Annotate for clarity.
                                return r.status === 0 && r.type === 'opaqueredirect'
                                    ? '3xx (opaque-redirect; reached server)'
                                    : r.status;
                            }} catch (e) {{
                                return 'ERR:' + (e.message || String(e)).slice(0,80);
                            }}
                        }}
                    """)
                    # Any int status or the opaque-redirect string = reached
                    # server / not blocked at JS layer. ERR = intermediary
                    # almost certainly blocked.
                    reached = isinstance(status, int) or (isinstance(status, str) and status.startswith("3xx"))
                    record(label, "pass" if reached else "info", f"status={status}")
                except Exception as e:
                    record(label, "fail", repr(e)[:140])

        # =========================================================
        # 8. Fingerprint — what does the server see this browser as?
        # =========================================================
        section("Fingerprint (informational)")
        with test_session() as (ctx, page):
            try:
                page.goto(f"{base}/fingerprint/data", timeout=20000)
                fp = json.loads(page.locator("body").text_content() or "{}")
                record("fingerprint", "info",
                       f"UA={fp.get('user_agent', '')[:60]!r}, "
                       f"Sec-CH-UA={fp.get('sec_ch_ua') or '(none)'!r}, "
                       f"platform={fp.get('sec_ch_ua_platform') or '(none)'!r}, "
                       f"sec-fetch-mode={fp.get('sec_fetch_mode') or '(none)'!r}")
            except Exception as e:
                record("fingerprint", "fail", repr(e)[:120])

        if shared_browser is not None:
            shared_browser.close()

    passed = sum(1 for r in results if r["status"] == "pass")
    failed = sum(1 for r in results if r["status"] == "fail")
    info_n = sum(1 for r in results if r["status"] == "info")

    if args.json:
        json.dump({"results": results, "summary": {"pass": passed, "fail": failed, "info": info_n}},
                  sys.stdout, indent=2)
        print()
    else:
        print(f"\n## Summary: {len(results)} probes — {passed} pass, {failed} fail, {info_n} info")

    sys.exit(1 if failed else 0)


if __name__ == "__main__":
    main()
