From 58bc4ef47eec31da0e19b42550782a54d0d398c7 Mon Sep 17 00:00:00 2001
From: Kris Villa <krisvillaema@gmail.com>
Date: Fri, 29 May 2026 21:10:11 +0200
Subject: [PATCH] lectura boe

---
 .../convocatorias/busquedas-boe/leer-boe.py   | 106 +++++++++++++++++-
 1 file changed, 102 insertions(+), 4 deletions(-)

diff --git a/src/main/resources/convocatorias/busquedas-boe/leer-boe.py b/src/main/resources/convocatorias/busquedas-boe/leer-boe.py
index da90a38..926923a 100644
--- a/src/main/resources/convocatorias/busquedas-boe/leer-boe.py
+++ b/src/main/resources/convocatorias/busquedas-boe/leer-boe.py
@@ -3,9 +3,107 @@ import xml.etree.ElementTree as ET
 import datetime
 
 today = datetime.date.today()
-url = f"https://www.boe.es/datosabiertos/api/boe/sumario/{today.strftime('%Y%m%d')}"
 
-response = requests.get(url)
+# url = f"https://www.boe.es/datosabiertos/api/boe/sumario/{today.strftime('%Y%m%d')}"
+urlraiz = "https://www.boe.es/datosabiertos/api/boe/sumario/"
+
+def buscarenboe(fecha):
+    url = f"{urlraiz}{fecha}"
+
+    headers = {
+        "Accept": "application/xml"
+    }
+
+    response = requests.get(url, headers=headers)
+
+    root = ET.fromstring(response.content)
+
+    INCLUDE_EPIGRAFES = [
+        "Oposiciones y concursos",
+        "Personal funcionario",
+        "Procesos selectivos"
+    ]
+
+    EXCLUDE_EPIGRAFES = [
+        "Anuncios",
+        "Contratación",
+        "Licitaciones"
+    ]
+
+    EXCLUDE_TITULOS = [
+    "libre designación",
+    "proveer puesto de trabajo",
+    "sistema de concurso",
+    "concurso específico",
+    "concurso de méritos",
+]
+
+    INCLUDE_TITULOS = [
+        "proceso selectivo",
+        "pruebas selectivas",
+        "oposición",
+        "concurso-oposición",
+        "bolsa de trabajo",
+        "interinos",
+        "estabilización",
+        "ingreso libre",
+        "cuerpo general",
+        "tecnologías de la información",
+    ]
+
+    for seccion in root.iter("seccion"):
+
+        nombre_seccion = seccion.attrib.get("nombre", "")
+
+        for departamento in seccion.iter("departamento"):
+
+            nombre_departamento = departamento.attrib.get("nombre", "")
+
+            for epigrafe in departamento.iter("epigrafe"):
+
+                nombre_epigrafe = epigrafe.attrib.get("nombre", "")
+
+                # excluir epígrafes basura
+                if nombre_epigrafe in EXCLUDE_EPIGRAFES:
+                    continue
+
+                # incluir interesantes
+                relevante = (
+                    nombre_epigrafe in INCLUDE_EPIGRAFES
+                )
+
+            for item in epigrafe.iter("item"):
+
+                titulo = item.find("titulo")
+
+                if titulo is None:
+                    continue
+
+                texto = titulo.text or ""
+                texto_lower = texto.lower()
+
+                # excluir ruido
+                if any(x in texto_lower for x in EXCLUDE_TITULOS):
+                    continue
+
+                # incluir solo cosas interesantes
+                if not any(x in texto_lower for x in INCLUDE_TITULOS):
+                    continue
+
+                if relevante:
+
+                    print()
+                    print("=" * 80)
+                    print("SECCIÓN:", nombre_seccion)
+                    print("DEPARTAMENTO:", nombre_departamento)
+                    print("EPÍGRAFE:", nombre_epigrafe)
+                    print("TÍTULO:", texto)
+
+
+for dia in range(1, 31):
+
+    fecha = f"202512{dia:02d}"
+    print(f"Consultando {fecha}...")
+    buscarenboe(fecha)
+
 
-print(response.status_code)
-print(response.text[:1000])  # primeros caracteres
\ No newline at end of file