From c8e351da000d01206e3cd578c20e0af238dd498c Mon Sep 17 00:00:00 2001 From: Tatiana Villa Date: Thu, 30 Apr 2026 12:48:31 +0200 Subject: [PATCH] Primer commit --- data/clasificacion2026.csv | 21 ++++++ data/espana | 1 + data/estadisticas_por_equipo.csv | 21 ++++++ data/prediccion20260430.txt | 19 ++++++ main.py | 11 ++++ models/modelo_rf.pkl | Bin 0 -> 86641 bytes partidos_jornada.txt | 15 +++++ requirements.txt | 3 + scripts/descargar_datos.py | 21 ++++++ scripts/entrenar_modelo.py | 33 ++++++++++ scripts/predecir_jornada.py | 56 ++++++++++++++++ scripts/predecir_jornada_mejorado.py | 83 +++++++++++++++++++++++ scripts/predecir_jornada_stats.py | 94 +++++++++++++++++++++++++++ scripts/procesar_txt_a_csv.py | 65 ++++++++++++++++++ scripts/unir_y_analizar_csv.py | 30 +++++++++ 15 files changed, 473 insertions(+) create mode 100644 data/clasificacion2026.csv create mode 160000 data/espana create mode 100644 data/estadisticas_por_equipo.csv create mode 100644 data/prediccion20260430.txt create mode 100644 main.py create mode 100644 models/modelo_rf.pkl create mode 100644 partidos_jornada.txt create mode 100644 requirements.txt create mode 100644 scripts/descargar_datos.py create mode 100644 scripts/entrenar_modelo.py create mode 100644 scripts/predecir_jornada.py create mode 100644 scripts/predecir_jornada_mejorado.py create mode 100644 scripts/predecir_jornada_stats.py create mode 100644 scripts/procesar_txt_a_csv.py create mode 100644 scripts/unir_y_analizar_csv.py diff --git a/data/clasificacion2026.csv b/data/clasificacion2026.csv new file mode 100644 index 0000000..5d1b012 --- /dev/null +++ b/data/clasificacion2026.csv @@ -0,0 +1,21 @@ +posicion,equipo,PJ,PG,PE,PP,GF,GC,DG,Pts,Racha1,Racha2,Racha3,Racha4,Racha5 +1,Barcelona,33,28,1,4,87,30,57,85,G,G,G,G,G +2,Real Madrid,33,23,5,5,68,31,37,74,G,P,E,G,E +3,Villarreal,33,20,5,8,59,38,21,65,G,P,G,E,G +4,Atletico Madrid,33,18,6,9,56,37,19,60,P,P,P,P,G +5,Real Betis,33,12,14,7,49,41,8,50,P,E,E,G,E +6,Getafe,33,13,5,15,28,34,-6,44,G,G,P,G,P +7,Celta Vigo,33,11,11,11,45,43,2,44,P,G,P,P,P +8,Real Sociedad,33,11,10,12,52,52,0,43,P,G,E,P,E +9,Osasuna,33,11,9,13,39,40,-1,42,G,E,E,P,G +10,Athletic Club,33,12,5,16,36,48,-12,41,G,P,P,G,P +11,Rayo Vallecano,33,9,12,12,33,41,-8,39,P,G,P,G,E +12,Valencia,33,10,9,14,37,48,-11,39,G,P,P,E,G +13,Espanyol,33,10,9,14,37,49,-12,39,P,E,P,P,E +14,Elche,33,9,11,13,44,50,-6,38,G,P,G,G,G +15,Girona,33,9,11,13,36,50,-14,38,P,G,E,P,P +16,Deportivo Alaves,33,9,9,15,38,49,-11,36,G,E,E,P,G +17,Mallorca,33,9,8,16,41,51,-10,35,P,G,G,E,P +18,Sevilla,33,9,7,17,40,55,-15,34,P,P,G,P,P +19,Levante,33,8,9,16,37,50,-13,33,G,P,G,G,E +20,Real Oviedo,33,6,10,17,26,51,-25,28,P,G,G,E,P diff --git a/data/espana b/data/espana new file mode 160000 index 0000000..98851d5 --- /dev/null +++ b/data/espana @@ -0,0 +1 @@ +Subproject commit 98851d5d630896c679facf4fa693e48462566575 diff --git a/data/estadisticas_por_equipo.csv b/data/estadisticas_por_equipo.csv new file mode 100644 index 0000000..4855db9 --- /dev/null +++ b/data/estadisticas_por_equipo.csv @@ -0,0 +1,21 @@ +equipo,goles_favor,goles_contra,disparos_puerta,faltas,amarillas,rojas +Real Oviedo,26,51,109,426,67,8 +Getafe CF,28,34,91,508,92,7 +Rayo Vallecano,33,41,156,434,85,9 +Girona FC,36,50,126,349,65,7 +Athletic Club,36,48,149,450,66,7 +Levante UD,37,50,116,420,75,4 +Valencia CF,37,48,105,412,64,2 +RCD Espanyol de Barcelona,37,49,134,455,74,5 +Deportivo Alavés,38,49,130,501,71,5 +CA Osasuna,39,40,126,445,80,6 +Sevilla FC,40,55,113,481,92,5 +RCD Mallorca,41,51,133,382,69,4 +Elche CF,44,50,130,426,63,6 +Celta,45,43,137,388,64,1 +Real Betis,49,41,155,351,67,1 +Real Sociedad,52,52,160,469,71,4 +Atlético de Madrid,56,37,172,371,67,4 +Villarreal CF,59,38,148,403,71,3 +Real Madrid,68,31,231,330,58,7 +FC Barcelona,87,30,233,309,50,2 diff --git a/data/prediccion20260430.txt b/data/prediccion20260430.txt new file mode 100644 index 0000000..3210e26 --- /dev/null +++ b/data/prediccion20260430.txt @@ -0,0 +1,19 @@ +Predicciones para la jornada (con features de clasificación y estadísticas): + local visitante ... dif_rojas prediccion +0 Villarreal Levante ... 0.00 1 +1 Valencia At. Madrid ... 0.00 2 +2 Alavés Athletic Club ... -2.15 1 +3 Osasuna Barcelona ... 0.00 2 +4 Celta Elche ... -3.85 1 +5 Getafe Rayo Vallecano ... -4.15 1 +6 Betis R. Oviedo ... 0.00 X +7 Espanyol Real Madrid ... -2.15 2 +8 Cultural Leonesa Cádiz ... 0.00 X +9 Castellón Córdoba ... 0.00 X +10 Eibar Málaga ... 0.00 X +11 Racing Santander Huesca ... 0.00 X +12 Sporting Ceuta ... 0.00 X +13 Las Palmas Valladolid ... 0.00 X +14 Sevilla Real Sociedad ... 0.85 2 + +[15 rows x 12 columns] diff --git a/main.py b/main.py new file mode 100644 index 0000000..0fe4639 --- /dev/null +++ b/main.py @@ -0,0 +1,11 @@ +# main.py +""" +Script principal para predecir quinielas de fútbol español. +""" + +def main(): + print("Bienvenido al predictor de quinielas de fútbol español.") + # Aquí se cargarán los datos, el modelo y se harán predicciones + +if __name__ == "__main__": + main() diff --git a/models/modelo_rf.pkl b/models/modelo_rf.pkl new file mode 100644 index 0000000000000000000000000000000000000000..136590dd098a0da5bfb8fa3e8031e3b581e0ebf0 GIT binary patch literal 86641 zcmeI5d0Z36`^UqfTq3B5cvSGl8x;{nohaTYqKJY@LpXF%jtHTGCu#wY=od9utyZab zZB?rEzVNDoSDaQBIs`dY_b#D--{W->$QI@FLXn)lih6Qz0WhBdFFZI zjZ^%Mc$GgAyXd)LA;PGLULg_DA>n~xA-x2%BBMg0#ZrmwWMM>bWcaAcKi$KG(b1u^ zLPMgY8>L;BNf%2UC1!B9&~TwRGD<3GPQQ;hDg?eqkb3u!X!s^0LV`l0Ln9-mz~}$< zO@g9A#UW8}52?g>c4$PXRAL$}3JVp(C*d=Jm8W7BE{qifhls?K)Kk(VJTyWOEesch zg+vP~ca?e=O01dB!jCjd>R}+UQ$IOBBy=_<7R-tg28rQe1eGt2uKN5uo}`iby=R38 z#WC<>!?$m&{#(6|AR;oj@|i|al@~$~Ef$JH;2zD@?;08|ih*Yy2Tv;~Dnu9^B4xZ~ z<;Sl+Fl=BJ6eJP|!$g#jv6E$ZWJIJmG9olc5Cku`r^H%NeGLWQg+!?@nN-rCPcPSA zPSTh_NuvmX=4wYvJ%T0Hnm+|1VU#dDTIxtyP*#*R)r4wFHKS}OTdFx_N413Cq*-8O zq&Qj}B@{_lO3Wf70|n7R@LQFxk~EAE%!v$yADe3K(M@6k4;&Z?KZ*xWVm@CO6|Vls zQDW&Ts)eV-LVaDTFFV}df@(xrc~aIMhEx*|1FEU!(^JhP#?kX&E6T=`veiD_(vz~& z{3U5peZ>V4!f^OC!V4;uG=rxS78=+qA|_lEClG}O!LBKlH1SmbIV>tl7&i@m0ui$Z znnuS2R$gm(sVZAZB_$gi zslltLA=MXR6MZPXUI_KU@Gpd;#GnuC2z}u{Uuj6?l_;Q34}MI|CxiXKMtcSv)jP+JjrD0m&v zUZ*Cas@M5xKA|c7`RGHYgc?+RAP)mINzjB*R?u>otMBs=%$NwV)T05skyPIHbq>%# za{w2K(Nq^_?Y)8;X|j6<2;6-6!f zFooM_xV45`F*V7<4sK)M)&Xw6gU>s{?R@yR6WqqC-#Mla@_~_CqxyUGa7~oPc)Z{H(uI^6gMhW(|kDY5fNd-KZJ2-CT)9nna8_?gQPWubxyanclY>m)Uv*T?{h<0JbT{RDlW19WZzrZd@>G z@OuRq!|ado_&hn%C#pWV)Wv-DM*YLQHtNU!#ghZGeE>dB4VdRITg9(DIQ%cAYxN^a zqI=o@10+l2l!dQu33tzt198^K7XIQa(7h5UnTDIznR6Wv|EvU+o3nDw^HoRH^f&r+ zbhh;C^j7&nym?`PdLnx6w2?#y04#3&1dKKygb`Gjl@ zH`gyGIu}Fx)n@zIyoQIAn&+#oUrkQv>x#Z!bH6(C)HtQ27snGePxi&1%33W_fnkI^ zm7hB1olvF#4#P=M##NBO=DWf4zvJ_NaOJYn;cZc%RKB~<};LY|@$ zAJ4Uat^giPo?_QxkDq!o`?CYSeQCpNFT?luN}25+=+8bmHMAN_jmiaHPCeRho0<*c zGp2smeCwf*gOq6xWpoCH~IK!Q;Oe>_8%2s z3X`Xk_&hl?`*ZpdZ2N+YpVIK{+b8DnH|vk5!RuQTRg~p`Et{hRjoM~|?)PKTN}g+Z zI(M7*s@*yzNDTba_3Sei@pSgku$CN8*gXC3+58_Hu2%s!Li^PBZN(1-UloAQ{b%O!OZCUof%Mfe--P9YEe&4wZ|<7~&JC1Ix#gzy_trgiZ*FU; z1kVP=-uo+$Mf#pij~jLzPuM(7j*fZU`4<)NBjjmyqv;<YhP$eh-;)AZw7o~AX6&DePa+V_v{TMu<+ z(fDcB{h}@$PuM(t<2>2abF&JB67m%LM_H1fL;(a$p5pO&a%J}CWPBdFFx!jp{XKK$ z`OEajQ>D8%I2WA1*?h?rgKY47>#`O6i&}s0W>EL#i5r!Ga{c9V@dFn5dvSK{qd1pnFHf+a7eCWe4&H_wD(cW!2G%Z6u3NfG>+kXX9(y%dDhC1H z8>THzXOX{m|HBWQ9)Zu(v$Z~rlwuW_OvqEy-&b^2!FVGdW`9)T$43lg@<5kvq7ocfb}nd60E_g9aqH!cIe!n^ z-}Cjg%$&DI1;&%|^s!+4zArjF@$vn=Ud;YX5O0Kxp9tbBkojC|{qd9_c`-F)Wv(XP zh>nw>?e~}!&hS?WM4vWAe%iz$o-%#b$8bDh^K@rM-i%>mRbU(;Pu9XX(-~0To4~YB zPWbi-iQgm86FM<@EW-B}5&Pu8h=;9X{N9EkMw4D%muqy0b9)Wp6qMFJb>xpUoYGMV zPArZ2JnaOFc$#+doIl4CHcyYtPtM+xOg{hAzT)9_uL=bi%;X8qKhgK!+K!(Pe@%w( zuOaifO8oH?qLOtSzq7IMPAk)N2 z(D$3?6K7EA8|1*!H~Yqp<1FGS=(new9)Yh%Y`GS=V|=6vcoFjS+;aM|mroU7GSfai z!1wnMJ%V8VsU_3jbHMlazA)Rz>TmqC;XquMVnexR{wc7@lRq|myra>@4@g~ento7% zgZI~se$;|R^G~y1ZRp6^Cv5xVGkwv4wa`8dCgkbL^3J9ww(6{Z+K;bC3_$vGJhs2s zgxNk|0e1Tj%=Y{BH~$nD;gk5lG8c?+J-EAK_Fo|JNZBvmHd>y%+idmkbM*GM98cIhogOg%xcNR67)!{Lqtr}aaF#{($-roQEN7pv?bGMH_ZRbiRDt1yJlR}Ze!SCX1(?LNPb=~Dh`!AJ z%*XfFkntD6{M9?=@vrO8-+S+vb!2V7T(GBu8mufp&qL8XxivhMmlmW1tKYA6-tNk# z`P`RxT{)hxc^YqZYt@yVI{%Kv_vCtn z!}jg@KdE&5Jtutogv7%t@p(Y?2z>j5%;!4jZ~SyAw9^}~73Mcmr;TvR29!g$Zyxs3 z>JeV8iacenVLp}{_JM-VESk^l^mv8~#}hVB2l88)tnwj`FCSVQXkVsO05|6SU4$PW zf#i>H!siKzk06-8YQj8!rT%!@e%+#U$znM$=|A&~b4HeC9*Xv7FIttSTa_!p5~T_J zWf4!y%=+;-p0Ifu;TpB4&{qXK3H1m^p`v1SjslEg_D6#Jut@#~g88G?%<)eu{(P`0 z^Ze@}KP)}(v;amQofl}wPY=Alb&s=r0w$?9dAaO~kGNnX;qqDHkDrpSxh6Qm_&w76 z37hT>hWRW>pbsMP zM)i=-(lu#4j5N>I=zKI!{eLobxUfkHHeG#tVOchd#!smunziNZ6SjRCDjr&}8P?Zu zA{;-Bdoce(d#IQ9W7;Qz@e^WS2=Yf*GkHqJ*Y}Y1x%Du9qVri+re7YS)%U_+b&a!$ zN>I5eW)V*}vYt-hc*5psL*)J|54Wg*KOs*_+uwa3!T3Gh{Co8Mx3>OXPbLop{u&Zr z5r98_LiF-_$iGMXd1bbl1^Ya;Z6NV2~X{VT+z>0ZELP;`uEps?pNpaPt8SB8*)5h^JG)PJK1$5dHmjzi=lxDaQuYi z--Ge6boZ{UeM0m-FZ}Tn5})pX&m)q*ydL6VM>PF$l2uL?px3?ATx?0RT<*O^32tpy zefGV@B7d*k?v4S+6E;ur8Pw3m^HpFdp?&hYGAjIUsP7@`HS+Q2bCLLPKE8cH=5v$l z$P=YMz1-)P8%(*JtEmIBHx!F()qeQ0hkkwd`Zui}u~>Y!^R^RmaPex& z>pr_#G=6gWvUVJ2pRnyydf&gloY_D=eoAcix+Mkmh>1*|0`SLAh@Oy&@9!b&p9u7b z_e}d#q`&!Gi|o1G(gL;lVRiM07vDFv%xSL#eLp_R8-1EZ_UX~j(>Of>-#%Shb^2Zd z=v`=2kKBoJ^`tC?PEP=m=ydhqw?y1BaX<%@tj*?K zVE&CxKO9}~+LT2+{USfsf#V6Ar?Rn6CqMKe*CUL#8VLDN-$Uxvc;V}N$a+G8cp+r| z${OEa{L1wAc>1dcV7DTvqw_8~pyQ05hWCz6u-EDlpSG4y_|jPcjE)#Qj1OZGPhUqD zTW~yK^R#hZk0T~9e*|eh7Sk|CLH8PKRvF<;PgHG@l()<_w)T% zlgF1QE?VNcP_6);On=V{Ki&wbHzUI50ny8y@OebyBY67r_r@Il1y*>^1sQ$RN}miA zw)F@%3&;la_%iowan9B*{Sd`ui?pRnyy z=I!NAewL8OM+{Qg_{g5<%;&;-HT3Piw)Uw%-9X(@9PoKS^7j$=i?5i+uZMayUQRvQ zZ=0H}@%v~ll4WMnU8PE({Py+VCmXX!-@EfHgRB39Z=Z@cP5LVv#+Q@U&;2fX-RNJT z9)Z+Ti^sQ59hv<(4S)QD*e5Ui_K1C|hxqbl>A``Si?e|4`rI?U|98-|l@hE;eX{%Q z4HnHmHGZ+ljk8bK_UX_5eT$mzAdffta%lQ56_BSuroXot-##Jh36t@8K=g8g`QQR3 z&nNXapL^V6W1FK6Fu&2$&NmC^WNGSt()KBOa?AohxPI)DSBNzoi^jfo}=*a^;?P*UMr42qBZFe0-kJ^-K8iLdbd#5k8N|dO}KnJSBYc?-_9^ zM^guYUROx-bl~pTnf>@m@Y~d|(OZ_X$UdF>+@}S{6E;uDw!ize5UIcrLZ03Z^1s>c zqfWg~8NOcLj%goc_<97A-$;hflL7O75U)QyLS3as&`1vAGt?8gGT>ENa6_w?m-QJc zJNQfvj9w^PbX~_H`*diW!B~zbY@Q~Z7@QGvi@ZMJk}Gx(IzWFf80nAe`1Jj^wt57T zFN`4n-av#$eEaqqX`iGy|CBn`qrL>I<-&zen*4io9tryToSQf=!}>P`Xzx3E>pxvt z#M8Moo?QM2d_BUXaNNCxFkcvH{=FV2G9M*Eo{)NKFkcwmJ!{JoqDK&{Uqb8~58pnS zG4BWUkS~m0_vF6R+R@HdYoERke}3ldMFkj|(P(EodlvE3H~X7@oPEN!PYu$IRez+a z0H4s`OLLFeHwW^B)YnMGA3q`aBb4|&AUp=(w@2y$r0H+`)WTn#IU-k6*PUJmMcbzz zo3}Zi4CBjh+`YC!&ZhYCxh5}#98cIhMOij%I&iBBxDoQSQ#f|AAJij;BlmZFo)CS` z8sA^DqYYwZN5$s}Sx;CG^=dMwwe2+hcoxvLPg{+5j66J62{J!k6#pE?B0a)IzT1f7 z37e-aX%9T_2vxw3kf#rM!jckbpOE;7WPE=Qi8o5cj}LcX@)VEX-hz2QNY|ep(Xnyz zjf`p9cv#)~ggKz?f{!o`c1-+bi)Jt`mbP0}+nR9?|NeT-{puVa@!t5@LXIbFp1PcO z6tsZzxuo&s0Rvvd{sH4*$1(dO!F(T__;`DYMdK&1a6hM)CpPcaLpOE-+Cwx7j55fa}JS`G$Vv)b6vg|aIvrpLeX^`y^6N?Mv>z~#=ALx+?$4`h}?uyTo zBa;V$cp=1J&P@g`Jrv;dA?L5E zP!{pj*J@>JjwfuM_-6t#hSp*HlxAvo?=_r{MD#rozQ2dmqvpl)>G4Hv$6rmEJSy?) zsg)w_^~clg4?U&#Cgy_AgL8*$dXxgc9Z}VJOw4N|= zrz2Od2H)Si((jOA-X<0BCLBL`t!*-;39R>ttao3D&lBRW1>oBkWIi_yKmHHNA5kCk zPp}HWq(#|)&cjFFFUqZa_MZN#1ZGN~qpNu=8b8^@?(5FkCv5vvx-4LEij+KlFSBI$ z;R4ZMM8SS9{^E|SlZpnfhgpBsR0 zpOAcEBK_%m@6Sxn`TkR`X1zxDN&}_U_u6_JExdMD39k5kGMhJ*MfPd0>pp9aCv2X+ zzuU6YA#d{e+y`z!(dzYtNIa}Be!eh7@AksiyAk^qfZx8DIeseDA5U>o>fxVTw3ERmdj0= zD9ot)0zf}D36SDp(S%3as_;>eB z2ieOt>oxXN%szT_l-A!{eZ1A+(J+7c>kXMn^DeMx{N(maKQ7(~-`}J2N08<>qVq@i zGW{ij{1J#=PB5Q~z>vwP_A3LOUsiq>&nRA7b}3v58wEA8OEac zh<6U}Jvo05+u!Ti&gyg^Tu(@vk0t+54~J-2?{gZHCxZE0Wc?F?o`C2P1Ux-w@>CD= zxwHrfSlns|IXL7PXzg4 zk$fyNeEWjk$iD3ML)MF#?7m@WE zsrvKx9xZion`^9{&(&S;p0vYM_UKzBc$fZGFm5=Dc=GeNHRgE2<|+D3(7hR_RA3?@ zPfpaW>}Am33u5|vcKGq-NPGmrdO}1WOvATtNWL&v{qb~LxOwjgp<=z13MpgRFHgqT zBib{^FRA$c8e-oFctq?|y8d{YGU&)`hiAR<%pVBjjgWeu`S|lsh+a;>6SCgj3Ew^;^`9vH%|H3v za$Da5#y+flF*Rgmu1vG8kmhNw&|$Pk8qB|OCvxVaE-d27JFjbR&OTw=C-?ko@y@%+ z^BYz53nf72pWyh3?k}}nzl7-B1pXSL4-<@-5fH_=)!O=q-JH=5i zTl#f+t9`Z3dc=$eM~84cVe{1Xa)%%@e-#KI96t@3cJ81zT%U{Z6pwG8T$uhIAK$(p z{$e_Qe1s`;{3p|&eG85uYc-z7WKRBlWBacr<68e?8O#pySGEu4qdl z@88;D+X@9}5)uVV1F*>7J2RtzTc3+RelosiKH7M%3XCM=X$ke4)moU}2(eG;b@caS z`1!&R9@Fsk3?%i~4WACp${T&9_4>NIvk$FUof;6(W2CuF0077Q<0+pcFUT;Ul!S?U(Y$V;dsL4X-?9BO*amZuXkVPabiPD$P^*$%61CHSO-0C{IG*7Sk4Q_S8Spm$SKZ!~WVUc|@aUbu)@r2Ej_w~D1 zhQs_3QwhgU`I*zxU&8s^>CEx`1ALy4{6t0g`W&JUrsLZuWIi`me>`nC5Z9&H5bAq- z4dKv927W)Z;^2TJZG3sc`}?1_eUXEqi8jlBk7p52hYsH|<#@v8sYB$}#kQMOz@3n% zsW-QqUxogj7n3KrUW1muYdimh#D}Ni^MJ&|df~T!PoJ>baMZ(k4RsYLSPH5}efM)o ze`K6!4)yY)O<~D*S>*4vPZjpyc*5q%DKWStV~+~VB;@Jz>GM5CK%N4bJe|bnsXwzn z^YF({h`%So=Ls1SE8=ozCAfC(RQtFi zEaK^wYX-&fgw4~;ysrf_pnW2(_nA4uYW*jeFKh;rCxZ1)h#p~$pFaX!kBC41LhMsL ztbaN(%<}JE6SBd~ly={_FV*Vhqb!$h4TSl^#CZl8gJ7K~nzO2H&A5kuf4$~@b&ikl zlC%ltc*5rC=~%Oq8L-~xSVH?WXv+KpAEA9h))S`U>*a{QN1!Jl^H0h6_6f-sRuA<) zX&vJJ)6O>w=V-@Ivl}G!-VN)i^(%RodV)>$)OO7V!#SR?dD{I~mA71Q7J)n9%0*fGQV6)bxU{5{VXJ7#5T{65;w zJV?y6ZU=RUiN$l|=f1MYKJ{t1kBg7MkB_*We95PImI?quo;u6d+kPn0(f3mE*FPcl zi9k<4;?oIu%wzJDrazv>z-l$yUOv|75HwdbPgVt~jSEuc;M3~ErK=CKNZ*@nwY4i} zpRnyyV&6ZfWUVKke>%Ogo7t&zI_otE#!q9J{n;*_PxrUl>Vp={_5}4c5WTz}#!r=5 z!#>umuVK=n^~^BQ6HuI$F}jOAi}LTS@D~i@c*5o>o7eHmh1n`FkkCHO>bYi_I^VJz z)8E^V9}kPnKM};=wP5l@uzm`uud!c$^G}uTo|_!daeMSSDEfZUtGC;t`7qD&?w?ng zG=Y9I{e0E7rq1y1uh-nK&U%Ew_?r_sp0Ii11z(dMoKuJV5l27Vd;{|>Bk@Kj@p(el zyA$XMNd5>V{(SI1OddDuk0*F!xue+{uuGJL%osjoqx z4}PLgSZx66As&`eXNCP#Gv3HIHep(9f&$!i9|C__G=56x*`Wc)6E;sRChSdE4c8Nr z<~Oo^)+2jCp#lIVPb=~56QW1R@bzw6CJ%gk`_!0u{@3+qpDvXjvK=6l1F!0QVQ)hY z6?JH=wNGp3j2K^ZLJ6LCn?Kt=nML|u{{wFQIi9e2GPvJm^?5(?^-qB!x7FoP-$Uws z#^Uodh{*$ieL?C05b%Wfdk^%-lR7G*iCC_QgVp7!t$FZnKd9?nc{Sj~_@7zCQ>b$Z zSKl39FYgvU>_8)!&vFEz9&yRvx5c1OI`uvY@>wE!w-bK65#le#)Bj5ewxVimt673Psn_%5}yZzCjvbKsaIpCKl{{ip*kaME}-+m(z*n#)1~fE z|0sY{}bT>AFSf5||%0ymxehea-cR7SI38zj;Xk$|T#?Y-!6Po^m=FbL%zm zdHV7o-()bXPe{sBTak0Sqz^jlH3;exIy3z(C;WJ$Hq7=#`1Xy*ydUuO7hg`-CoF91 z5pEWc4fZ6z%Q?_U8^0$tJM?h$7A5%m^{(-vTY4NnSyx-Ym_Vu()jB3Hq-xXa{{Wk( Bdzk goles_visitante: + resultado = '1' + elif goles_local < goles_visitante: + resultado = '2' + else: + resultado = 'X' + partidos.append([ + temporada, division, jornada, fecha, equipo_local, equipo_visitante, goles_local, goles_visitante, resultado + ]) + + with open(salida_csv, 'w', newline='', encoding='utf-8') as f: + writer = csv.writer(f) + writer.writerow(['temporada', 'division', 'jornada', 'fecha', 'local', 'visitante', 'goles_local', 'goles_visitante', 'resultado']) + writer.writerows(partidos) + print(f"Procesado {ruta_txt} -> {salida_csv}") + +if __name__ == "__main__": + # Procesar todos los archivos txt de todas las temporadas y divisiones + for temporada in os.listdir(BASE_DIR): + temp_path = os.path.join(BASE_DIR, temporada) + if os.path.isdir(temp_path): + for archivo in os.listdir(temp_path): + if archivo.endswith('.txt'): + division = archivo.split('-')[1].replace('.txt', '') if '-' in archivo else '1' + ruta_txt = os.path.join(temp_path, archivo) + salida_csv = os.path.join(temp_path, archivo.replace('.txt', '.csv')) + procesar_archivo_txt(ruta_txt, temporada, division, salida_csv) diff --git a/scripts/unir_y_analizar_csv.py b/scripts/unir_y_analizar_csv.py new file mode 100644 index 0000000..02fe6e0 --- /dev/null +++ b/scripts/unir_y_analizar_csv.py @@ -0,0 +1,30 @@ +# unir_y_analizar_csv.py +""" +Une todos los CSV de partidos en uno solo y analiza la cantidad de datos. +""" +import os +import pandas as pd +from glob import glob + +BASE_DIR = os.path.join(os.path.dirname(__file__), '../data/espana') +SALIDA = os.path.join(BASE_DIR, 'partidos_todos.csv') + +# Buscar todos los CSV de partidos +csvs = glob(os.path.join(BASE_DIR, '*', '*.csv')) + +# Unir todos los CSV +dfs = [] +for csv_file in csvs: + df = pd.read_csv(csv_file) + dfs.append(df) + +df_total = pd.concat(dfs, ignore_index=True) +df_total.to_csv(SALIDA, index=False) + +# Análisis básico +total_partidos = len(df_total) +print(f"Total de partidos: {total_partidos}") +print("Primeras filas:") +print(df_total.head()) +print("Distribución de resultados:") +print(df_total['resultado'].value_counts())