taiage/scripts/md_a_audio_con_fichero.py

import sys
import os
import re
from gtts import gTTS


def limpiar_markdown(texto):
    """Elimina sintaxis básica de markdown para que el audio suene natural."""

    # Quitar títulos markdown
    texto = re.sub(r'#', '', texto)

    # Quitar enlaces [texto](url)
    texto = re.sub(r'\[(.*?)\]\(.*?\)', r'\1', texto)

    # Quitar negritas y cursivas
    texto = texto.replace("**", "")
    texto = texto.replace("*", "")

    # Quitar bloques de código
    texto = re.sub(r'```.*?```', '', texto, flags=re.DOTALL)

    # Quitar saltos de línea excesivos
    texto = texto.replace('\n', ' ')

    return texto


def convertir_md_a_audio(md_input):
    base_name = os.path.splitext(md_input)[0]
    audio_output = f"{base_name}.mp3"

    print(f"--- Procesando: {md_input} ---")

    try:
        with open(md_input, "r", encoding="utf-8") as f:
            texto = f.read()

        texto_limpio = limpiar_markdown(texto)

        longitud = len(texto_limpio.strip())

        if longitud < 10:
            print("Error: El fichero no tiene texto suficiente.")
            return

        print(f"Texto leído correctamente ({longitud} caracteres).")
        print("Generando audio con Google TTS...")

        tts = gTTS(text=texto_limpio, lang="es")
        tts.save(audio_output)

        print(f"¡Audio generado!: {audio_output}")

    except Exception as e:
        print(f"Error: {e}")


if __name__ == "__main__":
    if len(sys.argv) < 2:
        print("Uso: python3 md_a_audio.py <archivo.md>")
    else:
        fichero = sys.argv[1]

        if os.path.exists(fichero):
            convertir_md_a_audio(fichero)
        else:
            print(f"Error: el archivo '{fichero}' no existe.")