taiage/scripts/md_a_audio_con_edge_y_metad...

import asyncio
import edge_tts
import re
import os
from pathlib import Path

def limpiar_markdown(texto):
    texto = re.sub(r'```.*?```', ' [código] ', texto, flags=re.DOTALL)
    texto = re.sub(r'\|.*?\|', '', texto)
    texto = re.sub(r'[#*_~`>]', '', texto)
    return ' '.join(texto.split())

async def convertir_archivo(path_md):
    temp_output = path_md.with_suffix('.temp.mp3')
    final_output = path_md.with_suffix('.mp3')

    # Extraer nombre del tema para el metadato (ej: "tema6_audio")
    nombre_tema = path_md.stem.replace('_', ' ').capitalize()

    texto = path_md.read_text(encoding="utf-8")
    texto_limpio = limpiar_markdown(texto)

    # 1. Generar audio
    comunicar = edge_tts.Communicate(texto_limpio, "es-ES-AlvaroNeural")
    await comunicar.save(temp_output)

    # 2. Normalización agresiva para Spotify (CBR 192k + Título)
    # Usamos -metadata title para que aparezca el nombre del tema
    comando = (
        f'ffmpeg -i "{temp_output}" -codec:a libmp3lame -b:a 192k -ar 44100 '
        f'-metadata title="{nombre_tema}" -id3v2_version 3 -write_id3v1 1 '
        f'-y "{final_output}" > /dev/null 2>&1'
    )
    os.system(comando)

    if temp_output.exists():
        temp_output.unlink()

    print(f"✅ Listo para el coche: {final_output} (Título: {nombre_tema})")

async def main():
    # Ajusta aquí para procesar todos los .md si quieres
    archivos = sorted(Path('.').glob('bloque1/*.md'))
    for md in archivos:
        print(f"Procesando: {md.name}...")
        await convertir_archivo(md)

if __name__ == "__main__":
    asyncio.run(main())