# -*- coding: utf-8 -*-
"""
Resolución de URLs de descarga: redirecciones HTTP y meta-refresh.

Muchas páginas usan redirecciones o <meta http-equiv="refresh"> antes de
servir el binario. Este módulo sigue esas cadenas hasta la URL final.
"""

import logging
import re
from urllib.parse import urljoin

import requests

from . import config


def _buscar_meta_refresh(html_content, base_url):
    """
    Busca <meta http-equiv="refresh" content="...; URL=..."> en el HTML.

    Returns:
        URL absoluta de destino o None.
    """
    patrones = [
        r'<meta\s+http-equiv=["\']?refresh["\']?\s+content=["\']?\d+;\s*URL=[\'"]?([^"\'>]+)[\'"]?',
        r'<meta\s+http-equiv=["\']?refresh["\']?\s+content=["\']?\d+;URL=[\'"]([^"\']+)[\'"]',
    ]
    for patron in patrones:
        match = re.search(patron, html_content, re.IGNORECASE)
        if match:
            return urljoin(base_url, match.group(1))
    return None


def _resolver_url_sourceforge(url, headers):
    """
    Resuelve la URL final en páginas de descarga de SourceForge (incl. página
    "Tu descarga comenzará en breve").
    """
    r = requests.get(url, headers=headers, timeout=30, allow_redirects=True, stream=True)
    content_type = (r.headers.get("Content-Type") or "").lower()
    final_url = r.url
    if "text/html" not in content_type:
        r.close()
        return final_url, content_type, r.headers
    r.close()
    r2 = requests.get(final_url, headers=headers, timeout=30, allow_redirects=True)
    html = r2.text[:10000] if r2.text else ""
    url_redirect = _buscar_meta_refresh(html, r2.url)
    if url_redirect:
        logging.info(f"Meta-refresh detectado en SF: {url_redirect}")
        return resolver_url_descarga(url_redirect, headers=headers)
    mm = re.search(r'https?://[^"\']*downloads\.sourceforge\.net[^"\']+', html, re.IGNORECASE)
    if mm:
        return mm.group(0), "application/octet-stream", r2.headers
    return r2.url, content_type, r2.headers


def resolver_url_descarga(url, headers=None):
    """
    Resuelve la URL final de descarga siguiendo redirecciones y meta-refresh.

    Args:
        url: URL inicial.
        headers: Cabeceras HTTP (opcional).

    Returns:
        (url_final, content_type, response_headers)
    """
    if not headers:
        headers = config.DEFAULT_HEADERS.copy()
    headers.pop("Accept-Encoding", None)
    headers["Accept-Encoding"] = "gzip, deflate"
    try:
        if "sourceforge.net" in url or "sf.net" in url:
            return _resolver_url_sourceforge(url, headers)
        r = requests.head(url, headers=headers, timeout=30, allow_redirects=True)
        content_type = (r.headers.get("Content-Type") or "").lower()
        if "text/html" not in content_type:
            return r.url, content_type, r.headers
        r = requests.get(url, headers=headers, timeout=30, allow_redirects=True)
        content = r.content[:5000].decode("utf-8", errors="ignore")
        url_redirect = _buscar_meta_refresh(content, r.url)
        if url_redirect:
            logging.info(f"Meta-refresh detectado: {url_redirect}")
            return resolver_url_descarga(url_redirect, headers)
        return r.url, "text/html", r.headers
    except Exception as e:
        logging.warning(f"Error resolviendo URL de descarga para {url}: {e}")
        return url, None, None
