"""
YakSpider — a tiny reference Python client (stdlib only, no dependencies).

Usage:
    from yakspider import YakSpider
    ys = YakSpider("ysk_your_api_key")

    # Synchronous scrape → clean Markdown
    page = ys.scrape("https://example.com", format="markdown")
    print(page["result"]["content"])

    # Batch (async) then poll for results
    run = ys.batch(["https://a.com/1", "https://a.com/2"], tier="t2")
    result = ys.wait(run["run_id"])

    # Screenshot (returns base64)
    shot = ys.screenshot("https://example.com")

Generated/maintained alongside the OpenAPI spec at /openapi.yaml.
"""
from __future__ import annotations

import json
import time
import urllib.error
import urllib.request

DEFAULT_BASE = "https://yakspider.com/api/v1"


class YakSpiderError(Exception):
    def __init__(self, status: int, body: dict):
        self.status = status
        self.body = body
        detail = (body.get("errors") or {}).get("detail", body)
        super().__init__(f"YakSpider {status}: {detail}")


class YakSpider:
    def __init__(self, api_key: str, base_url: str = DEFAULT_BASE, timeout: int = 60):
        self.api_key = api_key
        self.base_url = base_url.rstrip("/")
        self.timeout = timeout

    # --- endpoints --------------------------------------------------------

    def scrape(self, url: str, tier: str = "t1", format: str = "html",
               extract: list | None = None, session: str | None = None) -> dict:
        """Synchronous single-URL fetch (tiers t1/t2). Returns the result envelope."""
        body = {"url": url, "tier": tier, "format": format}
        if extract:
            body["extract"] = extract
        if session:
            body["session"] = session
        return self._post("/scrape", body)["data"]

    def run_spider(self, spider_id: int) -> dict:
        """Start a saved spider's run. Returns {run_id, status}."""
        return self._post("/scrape", {"spider_id": spider_id})["data"]

    def batch(self, urls: list[str], tier: str = "t1", extract: list | None = None) -> dict:
        """Start an async run over many URLs. Returns {run_id, status, urls}."""
        body = {"urls": urls, "tier": tier}
        if extract:
            body["extract"] = extract
        return self._post("/batch", body)["data"]

    def screenshot(self, url: str, format: str = "png", full_page: bool = False) -> dict:
        """Render a page and return {result: {image_b64, ...}}."""
        return self._post("/screenshot", {"url": url, "format": format, "full_page": full_page})["data"]

    def get_run(self, run_id: int) -> dict:
        """Fetch a run's status and results envelope."""
        return self._get(f"/scrape/{run_id}")["data"]

    def wait(self, run_id: int, interval: float = 2.0, max_wait: float = 300.0) -> dict:
        """Poll get_run until it reaches a terminal status (or max_wait)."""
        terminal = {"succeeded", "failed", "partial", "canceled"}
        waited = 0.0
        while waited < max_wait:
            data = self.get_run(run_id)
            if data.get("status") in terminal:
                return data
            time.sleep(interval)
            waited += interval
        return self.get_run(run_id)

    # --- transport --------------------------------------------------------

    def _post(self, path: str, body: dict) -> dict:
        return self._request("POST", path, body)

    def _get(self, path: str) -> dict:
        return self._request("GET", path, None)

    def _request(self, method: str, path: str, body: dict | None) -> dict:
        data = json.dumps(body).encode() if body is not None else None
        req = urllib.request.Request(self.base_url + path, data=data, method=method)
        req.add_header("X-API-Key", self.api_key)
        if data is not None:
            req.add_header("Content-Type", "application/json")
        try:
            with urllib.request.urlopen(req, timeout=self.timeout) as resp:
                return json.loads(resp.read() or "{}")
        except urllib.error.HTTPError as e:
            payload = {}
            try:
                payload = json.loads(e.read() or "{}")
            except Exception:
                pass
            raise YakSpiderError(e.code, payload) from None
