import codecs
from urllib.parse import unquote, urljoin
import requests
from bs4 import BeautifulSoup
import re
import wget
import os
import logging
import datetime

# Configurar el nivel de logs y el formato
logging.basicConfig(
    filename='/var/log/estandares.log', 
    level=logging.INFO, 
    format='%(asctime)s - %(levelname)s - %(message)s'
)

# Obtener la fecha actual
current_date = datetime.datetime.now().strftime("%d-%m-%Y")

# Mensaje de encabezado con la fecha actual
header = f"=== Ejecución Actualizador {current_date} ==="

# Agregar el encabezado al archivo de log
with open('/var/log/estandares.log', 'a') as f:
    f.write(header + '\n')

# Función para obtener la versión del archivo utilizando una expresión regular
def obtener_version(pattern, filename):
    match = re.search(pattern, filename)
    if match:
        return match.group(0)
    return None

# Función para obtener el enlace de descarga de una URL, donde buscamos el link mediante un filtro
def obtener_enlace_descarga(url, filtro):
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36',
    }
    try:
        response = requests.get(url, headers=headers)
        response.raise_for_status()  # Verifica si la solicitud fue exitosa
        html_content = response.content
        soup = BeautifulSoup(html_content, 'html.parser')
        link = soup.find('a', **filtro)
        if link:
            href = link['href']
            if href.startswith('//'):
                href = 'https:' + href
            elif href.startswith('/'):
                base_url = response.url.split('//')[0] + '//' + response.url.split('//')[1].split('/')[0]
                href = urljoin(base_url, href)
            else:
                href = urljoin(url + '/', href)
            return href
    except requests.RequestException as e:
        logging.error(f"Error al obtener el enlace de descarga: {e}")
        return None

# Función para buscar la última versión dentro de la URL
# Función para buscar la última versión dentro de la URL
def encontrar_version_mas_alta(url, filtro, pattern, simbolo):
    try:
        response = requests.get(url)
        response.raise_for_status()
        html_content = response.content
        soup = BeautifulSoup(html_content, 'html.parser')
        enlaces = soup.find_all('a', **filtro)
        version_mayor = None
        parte_diferente = None
        for enlace in enlaces:
            match = re.search(pattern, enlace['href'])
            if match:
                # Verificar si el patrón tiene un grupo definido
                try:
                    version = match.group(1) if match.lastindex is not None else match.group(0)
                except IndexError:
                    logging.error(f"No se encontró el grupo en el patrón para la URL {enlace['href']}")
                    continue
                
                if version_mayor is None or comparar_versiones(version, version_mayor, simbolo) > 0:
                    version_mayor = version
                    parte_igual = enlace['href']
                    url_entera = urljoin(url, parte_igual)
                    parte_diferente = url_entera.replace(url, "", 1) if url_entera else None
        return parte_diferente
    except requests.RequestException as e:
        logging.error(f"Error al encontrar la versión más alta: {e}")
        return None

# Función para comparar versiones
def comparar_versiones(version1, version2, simbolo):
    try:
        partes_version1 = version1.split(simbolo) if simbolo else [version1]
        partes_version2 = version2.split(simbolo) if simbolo else [version2]
        for i in range(len(partes_version1)):
            if int(partes_version1[i]) < int(partes_version2[i]):
                return -1
            elif int(partes_version1[i]) > int(partes_version2[i]):
                return 1
        return 0
    except ValueError as e:
        logging.error(f"Error comparando versiones: {e}")
        return 0

# Función para buscar un archivo existente para reemplazar, si no encuentra versión antigua devuelve 'primera descarga'
def buscar_archivo_a_reemplazar(version_descargada, nombre, simbolo):  
    for filename in os.listdir('/srv/repositorios/estandares/aplicacion/'):
        if filename.startswith(nombre):
            version_existente = obtener_version(r'\d+(\.\d+)*', filename)
            if version_existente and version_descargada:
                if comparar_versiones(version_existente, version_descargada, simbolo):
                    return filename
    return 'primera descarga'

# Función para modificar los links y el texto de la versión en los HTMLs indicados
def modificar_htmls(html_files, enlace, version, aplicacion):
    for index, html_file in enumerate(html_files):
        ruta = "/srv/repositorios/webnueva/aplicacion/" + html_file if index == 0 else "/srv/repositorios/webnueva/aplicaciones/" + html_file
        codificaciones = ["UTF-8", "iso-8859-15", "latin1", "windows-1252"]
        for codificacion in codificaciones:
            try:
                with codecs.open(ruta, "r", encoding=codificacion) as f:
                    html_content = f.read()
                soup = BeautifulSoup(html_content, "html.parser")
                link = soup.find("a", id=aplicacion.lower())
                span = soup.find('span', id=aplicacion.lower()+"_version")
                li = soup.find('li', id=aplicacion.lower()+"_version")
                if link:
                    link["href"] = "https://softlibre.unizar.es/estandares/aplicacion/" + enlace
                if span:
                    span.string = version
                if li:
                    strong_tag = li.find('strong')
                    if strong_tag:
                        texto_despues = strong_tag.find_next_sibling(text=True)
                        if texto_despues:
                            texto_despues.replace_with(" " + version)
                html_str = str(soup)
                with open(ruta, "w", encoding=codificacion) as f:
                    f.write(html_str)
                break    
            except (UnicodeDecodeError, FileNotFoundError) as e:
                logging.warning(f"No se pudo procesar el archivo {html_file} con la codificación {codificacion}: {e}")

# Lista de descargas con la información de cada una
descargas = [
    {"url": "https://www.thunderbird.net/es-ES/", "nombre": "Thunderbird",
     "filtro": {'href': lambda href: href and re.search(r'os=win64&lang=es-ES', href)}, "pattern": r'\d+\.\d+\.\d+',
     "extension": ".exe", "find_tipe" : 0, "v_simb": ".", "html":["thunderbird.html", "internet.html"]},
    {"url": "https://download.mozilla.org/?product=firefox-latest-ssl&os=win64&lang=es-ES", "nombre": "Firefox",
     "filtro": "", "pattern": r'\d+\.\d+\.\d+', "extension": ".exe", "find_tipe" : 0, "v_simb": ".", "html":["firefox.html", "internet.html"]},
    {"url": "https://filezilla-project.org/download.php?show_all=1", "nombre": "FileZilla",
     "filtro": {'href': lambda href: href and re.search(r'win64-setup.exe', href)}, "pattern": r'\d+\.\d+\.\d+', "extension": ".exe", "find_tipe" : 0, "v_simb": ".", "html":["filezilla.html", "internet.html"]},
    {"url": "https://sourceforge.net/projects/atunes/files/latest/download", "nombre": "aTunes",
     "filtro": "", "pattern": r'\d+\.\d+\.\d+', "extension": ".exe", "find_tipe" : 0, "v_simb": ".", "html":["atunes.html", "multimedia.html"]},
    {"url": "https://www.gimp.org/downloads/", "nombre": "gimp",
     "filtro": {'id': 'win-download-link'}, "pattern": r'\d+\.\d+\.\d+', "extension": ".exe", "find_tipe" : 0, "v_simb": ".", "html":["gimp.html", "multimedia.html"]},
    {"url": "https://www.videolan.org/", "nombre": "vlc",
     "filtro": {'href': lambda href: href and re.search(r'win64.exe', href)}, "pattern": r'\d+\.\d+\.\d+', "extension": ".exe", "find_tipe" : 0, "v_simb": ".", "html":["vlc.html", "multimedia.html"]},
    {"url": "https://obsproject.com/es", "nombre": "OBSStudio",
     "filtro": {'href': lambda href: href and re.search(r'x64.exe', href)}, "pattern": r'\d+\.\d+\.\d+', "extension": ".exe", "find_tipe" : 0, "v_simb": ".", "html":["obsstudio.html", "multimedia.html"]},
    {"url": "https://sourceforge.net/projects/pidgin/files/latest/download", "nombre": "pidgin",
     "filtro": "", "pattern": r'\d+\.\d+\.\d+', "extension": ".exe", "find_tipe" : 0, "v_simb": ".", "html":["pidgin.html", "utilidades.html"]},
    {"url": "https://boinc.berkeley.edu/download.php", "nombre": "boinc",
     "filtro": {'href': lambda href: href and re.search(r'64.exe', href)}, "pattern": r'\d+\.\d+\.\d+', "extension": ".exe", "find_tipe" : 0, "v_simb": ".", "html":["boinc.html", "cientifico.html"]},
    {"url": "https://www.chiark.greenend.org.uk/~sgtatham/putty/latest.html", "nombre": "putty",
     "filtro": {'href': lambda href: href and re.search(r'w64/putty-64bit', href)}, "pattern": r'\d+\.\d+', "extension": ".msi", "find_tipe" : 0, "v_simb": ".", "html":["putty.html", "internet.html"]},
    {"url": "https://www.mirc.com/get.php", "nombre": "mIRC", "filtro": "", "pattern": r'\d+\d+\d+', "extension": ".exe", "find_tipe" : 0, "v_simb": ".", "html":["mirc.html", "internet.html"]},
    {"url": "https://sourceforge.net/projects/libreoffice/files/latest/download", "nombre": "LibreOffice",
     "filtro": "", "pattern": r'\d+\.\d+\.\d+', "extension": ".msi", "find_tipe" : 0, "v_simb": ".", "html":["libreoffice.html", "oficina.html"]},
    {"url": "https://download.pdfforge.org/download/pdfcreator/PDFCreator-stable?download", "nombre": "PDFCreator",
     "filtro": "", "pattern": r'\d+\.\d+\.\d+', "extension": ".exe", "find_tipe" : 0, "v_simb": ".", "html":["pdfcreator.html", "oficina.html"]},
    {"url": "https://www.audacityteam.org/download/windows/", "nombre": "Audacity",
     "filtro": {'href': lambda href: href and re.search(r'64bit.exe', href)}, "pattern": r'\d+\.\d+\.\d+', "extension": ".exe", "find_tipe" : 0, "v_simb": ".", "html":["audacity.html", "multimedia.html"]},
    {"url": "https://sourceforge.net/projects/imgseek/files/latest/download", "nombre": "imgSeek",
     "filtro": "", "pattern": r'\d+\.\d+\.\d+', "extension": ".exe", "find_tipe" : 0, "v_simb": ".", "html":["imgseek.html", "multimedia.html"]},
    {"url": "https://www.openshot.org/es/download/", "nombre": "OpenShot",
     "filtro": {'href': lambda href: href and re.search('x86_64.exe', href)}, "pattern": r'\d+\.\d+\.\d+', "extension": ".exe", "find_tipe" : 0, "v_simb": ".", "html":["openshot.html", "multimedia.html"]},
    {"url": "https://sourceforge.net/projects/shotcut/files/latest/download", "nombre": "Shotcut", "filtro": "", "pattern": r'\d+\d+\d+', "extension": ".exe", "find_tipe" : 0, "v_simb": ".", "html":["shotcut.html", "multimedia.html"]},
    {"url": "https://ftp.cixug.es/CRAN/bin/windows/base/", "nombre": "R",
     "filtro": {'href': lambda href: href and re.search(r'win.exe', href)}, "pattern": r'\d+\.\d+\.\d+', "extension": ".exe", "find_tipe" : 0, "v_simb": ".", "html":["r.html", "cientifico.html"]},
    {"url": "https://caeis.etech.fh-augsburg.de/downloads/windows/latest-release/", "nombre": "pspp",
     "filtro": {'href': lambda href: href and re.search(r'pspp-64bit-install', href)}, "pattern": r'\d+\-\d+\-\d+', "extension": ".exe", "find_tipe" : 0, "v_simb": "-", "html":["pspp.html", "cientifico.html"]},
    {"url": "https://www.octave.org/download", "nombre": "octave",
     "filtro": {'href': lambda href: href and re.search(r'w64-installer', href)}, "pattern": r'\d+\.\d+\.\d+', "extension": ".exe", "find_tipe" : 0, "v_simb": ".", "html":["octave.html", "cientifico.html"]},
    {"url":  "https://sourceforge.net/projects/maxima/files/latest/download", "nombre": "Maxima", "filtro": "", "pattern": r'\d+\.\d+\.\d+', "extension": ".exe", "find_tipe" : 0, "v_simb": ".", "html":["maxima.html", "cientifico.html"]},
    {"url": "https://sourceforge.net/projects/dia-installer/files/latest/download", "nombre": "Dia", "filtro": "", "pattern": r'\d+\.\d+\.\d+', "extension": ".exe", "find_tipe" : 0, "v_simb": ".", "html":["dia.html", "cientifico.html"]},
    {"url": "https://sourceforge.net/projects/ganttproject/files/latest/download", "nombre": "GanttProject",
     "filtro": "", "pattern": r'\d+\.\d+\.\d+', "extension": ".exe", "find_tipe" : 0, "v_simb": ".", "html":["ganttproject.html", "cientifico.html"]},
    {"url": "https://sourceforge.net/projects/sevenzip/files/latest/download", "nombre": "7Zip", "filtro": "", "pattern": r'\d+\d+\d+', "extension": ".exe", "find_tipe" : 0, "v_simb": "", "html":["7zip.html", "utilidades.html"]},
    {"url": "https://sourceforge.net/projects/clamwin/files/latest/download", "nombre": "ClamWin",
     "filtro": "", "pattern": r'\d+\.\d+\.\d+\.\d+', "extension": ".exe", "find_tipe" : 0, "v_simb": ".", "html":["clamwin.html", "utilidades.html"]},
    {"url":  "https://sourceforge.net/projects/notepadplusplus.mirror/files/latest/download", "nombre": "Notepad",
     "filtro": "", "pattern": r'\d+\.\d+\.\d+', "extension": ".exe", "find_tipe" : 0, "v_simb": ".", "html":["notepad.html", "utilidades.html"]},
    {"url": "https://www.virtualbox.org/wiki/Downloads", "nombre": "VirtualBox",
     "filtro": {'href': lambda href: href and re.search(r'Win.exe', href)}, "pattern": r'\d+\.\d+\.\d+', "extension": ".exe", "find_tipe" : 0, "v_simb": ".", "html":["virtualbox.html", "utilidades.html"]},
    {"url": "https://sourceforge.net/projects/azureus/files/latest/download", "nombre": "vuze", "filtro": "", "pattern": r'\d+\d+\d+\d+', "extension": ".exe", "find_tipe" : 0, "v_simb": "", "html":["vuze.html", "utilidades.html"]},
    {"url": "https://sourceforge.net/projects/infrarecorder/files/latest/download", "nombre": "InfraRecorder",
     "filtro": "", "pattern": r'\d+\d+\d+', "extension": ".exe", "find_tipe" : 0, "v_simb": "", "html":["infrarecorder.html", "utilidades.html"]},
    {"url": "https://mirrors.sahilister.in/blender/release/", "nombre": "Blender",
      "filtro": {'href': lambda href: href and re.search(r'Blender(\d+\.\d+)', href)}, "pattern": r'(\d+\.\d+)', "extension": ".msi", "find_tipe" : 1, "v_simb": ".", "html":["blender.html", "multimedia.html"]},
    ### a la hora de obtener la url con 'location' obtiene una version antigua
    #scribus#
    #{"url": "https://sourceforge.net/projects/scribus/files/scribus/", "nombre": "scribus",
    # "filtro": {'href': lambda href: href and re.search(r'scribus/(\d+\.\d+\.\d+)', href)}, "pattern": r'(\d+\.\d+\.\d+)', "extension": ".exe", "find_tipe" : 1, "v_simb": ".", "html":["scribus.html", "oficina.html"]},
    ### HTTP Error 403: Forbidden 
    #sumatrapdf#
    ## {"url": "https://www.sumatrapdfreader.org/download-free-pdf-viewer", "nombre": "Sumatra",
    ##  "filtro": {'href': lambda href: href and re.search(r'64-install', href)}, "pattern": r'\d+\.\d+\.\d+', "extension": ".exe", "find_tipe" : 0, "v_simb": ".", "html":["sumatra.html", "oficina.html"]},
    #inkscape#
    {"url": "https://inkscape.org/release/all/windows/64-bit/exe/", "nombre": "inkscape",
      "filtro": {'href': lambda href: href and re.search(r'inkscape-(\d+\.\d+\.\d+)_', href)}, "pattern": r'(\d+\.\d+\.\d+)', "extension": ".exe", "find_tipe" : 2, "v_simb": ".", "html":["inkscape.html", "multimedia.html"]},   
    ### HTTP Error 403: Forbidden 
    ####{"url": "https://www.zotero.org/download/", "nombre": "zotero",
    ## "filtro": {'href': lambda href: href and re.search(r'version', href)}, "pattern": r'\d+\.\d+\.\d+', "extension": ".exe", "find_tipe" : 0, "v_simb": ".", "html":["zotero.html", "utilidades.html"]},
    {"url": "https://ftp.osuosl.org/pub/osgeo/download/qgis/windows/", "nombre": "qgis",
     "filtro": {'href': lambda href: href and re.search(r'QGIS-OSGeo4W-(\d+\.\d+\.\d+)', href)}, "pattern": r'(\d+\.\d+\.\d+)', "extension": ".msi", "find_tipe" : 3, "v_simb": ".", "html":["qgis.html", "cientifico.html"]},
    {"url": "https://cran.rstudio.com/bin/windows/base/", "nombre": "rstudio",
     "filtro": {'href': lambda href: href and re.search(r'win.exe', href)}, "pattern": r'(\d+\.\d+\.\d+)', "extension": ".exe", "find_tipe" : 0, "v_simb": ".", "html":["rstudio.html", "cientifico.html"]},
]

# Lógica de descarga y actualización de HTMLs
for descarga in descargas:
    # Obtener el enlace de descarga
    enlace_descarga = obtener_enlace_descarga(descarga["url"], descarga["filtro"])
    if enlace_descarga:
        # Descargar el archivo si es necesario
        parte_diferente = encontrar_version_mas_alta(descarga["url"], descarga["filtro"], descarga["pattern"], descarga["v_simb"])
        if parte_diferente:
            nombre_archivo = buscar_archivo_a_reemplazar(parte_diferente, descarga["nombre"], descarga["v_simb"])
            if nombre_archivo == 'primera descarga' or comparar_versiones(parte_diferente, obtener_version(r'\d+(\.\d+)*', nombre_archivo), descarga["v_simb"]) > 0:
                try:
                    logging.info(f"Descargando {descarga['nombre']} versión {parte_diferente} desde {enlace_descarga}")
                    wget.download(enlace_descarga, f"/srv/repositorios/estandares/aplicacion/{descarga['nombre']}{parte_diferente}{descarga['extension']}")
                except Exception as e:
                    logging.error(f"Error al descargar {descarga['nombre']}: {e}")
                    continue

            # Modificar los archivos HTML correspondientes
            modificar_htmls(descarga["html"], parte_diferente, parte_diferente, descarga["nombre"])
            logging.info(f"Se actualizó {descarga['nombre']} a la versión {parte_diferente} en los HTMLs correspondientes.")
        else:
            logging.error(f"No se encontró la versión para {descarga['nombre']}.")
    else:
        logging.error(f"Error al obtener enlace de descarga para {descarga['nombre']}.")

