1782 lines
71 KiB
Python
1782 lines
71 KiB
Python
"""Synchronisation des PDFs d'absences depuis Escadaweb.
|
|
|
|
Usage:
|
|
python scripts/sync_esacada.py --list-classes
|
|
python scripts/sync_esacada.py --sync "EM-AU 1" "EM-AU 2"
|
|
python scripts/sync_esacada.py --sync # toutes les classes
|
|
|
|
Le script ouvre une fenêtre Chromium visible.
|
|
Si la session est expirée, connectez-vous manuellement (2FA inclus) ;
|
|
le script reprend automatiquement dès que vous êtes sur la page des classes.
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
import json
|
|
import re
|
|
import sys
|
|
import time
|
|
from pathlib import Path
|
|
|
|
_root = Path(__file__).resolve().parent.parent
|
|
if str(_root) not in sys.path:
|
|
sys.path.insert(0, str(_root))
|
|
|
|
# Force UTF-8 sur stdout/stderr pour éviter les UnicodeEncodeError sur les pipes Windows
|
|
if hasattr(sys.stdout, "reconfigure"):
|
|
sys.stdout.reconfigure(encoding="utf-8", errors="replace")
|
|
if hasattr(sys.stderr, "reconfigure"):
|
|
sys.stderr.reconfigure(encoding="utf-8", errors="replace")
|
|
|
|
from playwright.sync_api import Page, sync_playwright, TimeoutError as PWTimeout, Error as PWError
|
|
|
|
BASE_URL = "https://escadaweb.vs.ch"
|
|
LEHRPERSONEN_URL = f"{BASE_URL}/Lehrpersonen"
|
|
CLASSES_URL = f"{BASE_URL}/Lehrpersonen/ViewKlassen.aspx"
|
|
EINSTELLUNGEN_URL = f"{BASE_URL}/Lehrpersonen/Dialogs/DlgEinstellungen.aspx"
|
|
PROFILE_DIR = _root / "data" / "browser_profile"
|
|
PDFS_DIR = _root / "data" / "pdfs"
|
|
HREF_CACHE_FILE = _root / "data" / "class_href_cache.json"
|
|
CLASSES_CACHE_FILE = _root / "data" / "esacada_classes.json"
|
|
LOG_FILE = _root / "data" / "logs" / "operations.log"
|
|
|
|
_href_cache: dict[str, str] = {}
|
|
_lang_ok = False # True après que la langue a été vérifiée/changée
|
|
|
|
|
|
def _cache_load() -> None:
|
|
global _href_cache
|
|
if HREF_CACHE_FILE.exists():
|
|
try:
|
|
_href_cache = json.loads(HREF_CACHE_FILE.read_text(encoding="utf-8"))
|
|
except Exception:
|
|
_href_cache = {}
|
|
|
|
|
|
def _cache_save() -> None:
|
|
HREF_CACHE_FILE.parent.mkdir(parents=True, exist_ok=True)
|
|
HREF_CACHE_FILE.write_text(
|
|
json.dumps(_href_cache, ensure_ascii=False, indent=2), encoding="utf-8"
|
|
)
|
|
|
|
_HEADER_SKIP = {"Identifiant", "Classe", "Description", "Type", "Nom", "Désignation", "Designation"}
|
|
|
|
# JS finder : trouve le href du premier lien ViewAbsenzenErweitert ou ViewLernende
|
|
# dans la ligne du tableau qui contient exactement le nom de la classe.
|
|
# Argument : [className, preferredPart, fallbackPart]
|
|
_JS_FIND_CLASS_HREF = """([className, pref, fallback]) => {
|
|
for (const tr of document.querySelectorAll('tr')) {
|
|
const tds = Array.from(tr.querySelectorAll(':scope > td'));
|
|
if (!tds.some(td => (td.innerText || td.textContent || '').trim() === className))
|
|
continue;
|
|
for (const part of [pref, fallback]) {
|
|
for (const td of tds) {
|
|
for (const a of td.querySelectorAll('a[href]')) {
|
|
if (!a.href.includes(part)) continue;
|
|
let p = a.parentElement, nested = false;
|
|
while (p && p !== td) {
|
|
if (p.tagName === 'TABLE') { nested = true; break; }
|
|
p = p.parentElement;
|
|
}
|
|
if (!nested) return a.getAttribute('href');
|
|
}
|
|
}
|
|
}
|
|
}
|
|
return null;
|
|
}"""
|
|
|
|
|
|
# ── Utilitaires ───────────────────────────────────────────────────────────────
|
|
|
|
def _log(msg: str) -> None:
|
|
from datetime import datetime
|
|
from zoneinfo import ZoneInfo
|
|
ts = datetime.now(tz=ZoneInfo("Europe/Zurich")).strftime("%H:%M:%S")
|
|
line = f"[{ts}] {msg}"
|
|
print(line, flush=True)
|
|
try:
|
|
LOG_FILE.parent.mkdir(parents=True, exist_ok=True)
|
|
with LOG_FILE.open("a", encoding="utf-8") as _f:
|
|
_f.write(line + "\n")
|
|
except Exception:
|
|
pass
|
|
|
|
|
|
def _load_settings() -> dict:
|
|
_path = _root / "data" / "settings.json"
|
|
if not _path.exists():
|
|
return {}
|
|
try:
|
|
return json.loads(_path.read_text(encoding="utf-8"))
|
|
except Exception:
|
|
return {}
|
|
|
|
|
|
def _load_totp_secret() -> str | None:
|
|
return _load_settings().get("totp_secret") or None
|
|
|
|
|
|
def _load_escada_creds() -> tuple[str, str]:
|
|
"""Retourne (username, password) depuis settings, ou ('', '')."""
|
|
s = _load_settings()
|
|
return s.get("escada_username", ""), s.get("escada_password", "")
|
|
|
|
|
|
def _try_fill_login(page: Page) -> bool:
|
|
"""Détecte le formulaire Keycloak (edusso.apps.vs.ch) et remplit les identifiants.
|
|
|
|
Sélecteurs exacts du formulaire :
|
|
<input id="username" name="username" type="text">
|
|
<input id="password" name="password" type="password">
|
|
<input id="kc-login" name="login" type="submit">
|
|
Retourne True si le formulaire a été trouvé et soumis.
|
|
"""
|
|
username, password = _load_escada_creds()
|
|
if not username or not password:
|
|
return False
|
|
try:
|
|
# Attendre que le champ username soit visible (jusqu'à 5 s pour le rendu JS)
|
|
page.wait_for_selector("input#username", state="visible", timeout=5_000)
|
|
page.wait_for_selector("input#password", state="visible", timeout=2_000)
|
|
_log(" [LOGIN] Formulaire Keycloak détecté — saisie automatique des identifiants.")
|
|
page.locator("input#username").fill(username)
|
|
page.locator("input#password").fill(password)
|
|
try:
|
|
page.locator("input#kc-login").click(timeout=2_000)
|
|
except Exception:
|
|
page.locator("input#password").press("Enter")
|
|
return True
|
|
except Exception:
|
|
return False
|
|
|
|
|
|
def _try_fill_totp(page: Page, secret: str) -> bool:
|
|
"""Remplit le champ OTP Keycloak via JavaScript (bypass visibilité Playwright)."""
|
|
import pyotp
|
|
_log(f" [2FA] Tentative remplissage OTP sur: {page.url[:80]}")
|
|
try:
|
|
code = pyotp.TOTP(secret).now()
|
|
result = page.evaluate("""(code) => {
|
|
const inp = document.querySelector('#otp')
|
|
|| document.querySelector('[name="otp"]')
|
|
|| document.querySelector('[autocomplete="one-time-code"]')
|
|
|| document.querySelector('input[type="text"]:not([type="hidden"])');
|
|
if (!inp) return 'not_found';
|
|
inp.value = code;
|
|
inp.dispatchEvent(new Event('input', {bubbles: true}));
|
|
inp.dispatchEvent(new Event('change', {bubbles: true}));
|
|
return 'filled';
|
|
}""", code)
|
|
_log(f" [2FA] JS fill result: {result}")
|
|
if result != "filled":
|
|
return False
|
|
_log(f" [2FA] OTP saisi via JS — soumission du formulaire.")
|
|
# Soumettre : clic sur le bouton submit, sinon form.submit()
|
|
submitted = page.evaluate("""() => {
|
|
const btn = document.querySelector('input[type="submit"]')
|
|
|| document.querySelector('button[type="submit"]');
|
|
if (btn) { btn.click(); return 'clicked'; }
|
|
const form = document.querySelector('form');
|
|
if (form) { form.submit(); return 'submitted'; }
|
|
return 'no_submit';
|
|
}""")
|
|
_log(f" [2FA] submit result: {submitted}")
|
|
return submitted in ("clicked", "submitted")
|
|
except Exception as _e:
|
|
_log(f" [2FA] JS fill err: {_e}")
|
|
return False
|
|
|
|
|
|
def _ensure_french_language(page: Page) -> None:
|
|
"""Force le français via navigation directe vers DlgEinstellungen.
|
|
|
|
URL confirmée par capture HAR : /Lehrpersonen/Dialogs/DlgEinstellungen.aspx
|
|
Après Speichern, le script de la page redirige automatiquement vers ViewKlassen.aspx.
|
|
Bloquant : sys.exit(1) si le changement échoue — tout le parsing dépend du français.
|
|
"""
|
|
global _lang_ok
|
|
if _lang_ok:
|
|
return
|
|
try:
|
|
_log(" [LANG] Navigation vers DlgEinstellungen…")
|
|
page.goto(EINSTELLUNGEN_URL, wait_until="domcontentloaded", timeout=15_000)
|
|
try:
|
|
page.wait_for_load_state("networkidle", timeout=8_000)
|
|
except Exception:
|
|
pass
|
|
|
|
inp_loc = page.locator("#ContentPlaceHolderSite_DropDownList_sprache_I")
|
|
try:
|
|
inp_loc.wait_for(state="visible", timeout=8_000)
|
|
except Exception:
|
|
_log("ERR [LANG] Dropdown langue introuvable dans DlgEinstellungen — arrêt.")
|
|
sys.exit(1)
|
|
|
|
cur_val = inp_loc.input_value()
|
|
_log(f" [LANG] Valeur actuelle: {cur_val!r}")
|
|
|
|
if cur_val != "français":
|
|
_log(" [LANG] Changement en français")
|
|
page.evaluate("""() => {
|
|
const inp = document.querySelector('#ContentPlaceHolderSite_DropDownList_sprache_I');
|
|
if (inp) {
|
|
inp.value = 'français';
|
|
ASPx.ETextChanged('ContentPlaceHolderSite_DropDownList_sprache');
|
|
}
|
|
}""")
|
|
page.locator("span.dx-vam:has-text('Speichern')").first.click()
|
|
try:
|
|
page.wait_for_load_state("networkidle", timeout=10_000)
|
|
except Exception:
|
|
pass
|
|
# Attendre que le grid soit prêt avant de rendre la main.
|
|
# Sans ça le premier _go_to_students_page tombe sur le timeout 20s.
|
|
try:
|
|
page.wait_for_selector(
|
|
"a[href*='ViewAbsenzenErweitert']", state="attached", timeout=30_000
|
|
)
|
|
except Exception:
|
|
pass
|
|
_log(" [LANG] Langue changée en français — grid prêt")
|
|
else:
|
|
_log(" [LANG] Déjà en français")
|
|
page.goto(CLASSES_URL, wait_until="domcontentloaded", timeout=15_000)
|
|
|
|
_lang_ok = True
|
|
except (SystemExit, KeyboardInterrupt):
|
|
raise
|
|
except Exception as _e:
|
|
_log(f"ERR [LANG] Echec inattendu: {_e} — arrêt.")
|
|
sys.exit(1)
|
|
|
|
|
|
def _ensure_logged_in(page: Page) -> None:
|
|
"""Gère la reconnexion : login + TOTP automatiques si identifiants configurés."""
|
|
if "ViewKlassen" in page.url:
|
|
_ensure_french_language(page)
|
|
return
|
|
|
|
_totp_secret = _load_totp_secret()
|
|
_username, _password = _load_escada_creds()
|
|
_log("SESSION_EXPIRED")
|
|
|
|
cur = page.url.lower()
|
|
if "login" not in cur and "logon" not in cur and "viewklassen" not in cur:
|
|
page.goto(LEHRPERSONEN_URL)
|
|
|
|
if _username and _password:
|
|
_log(" [LOGIN] Identifiants configurés — connexion automatique en cours.")
|
|
else:
|
|
_log(" Connectez-vous avec votre identifiant et mot de passe dans la fenetre.")
|
|
if _totp_secret:
|
|
_log(" [2FA] Secret TOTP configure - code saisi automatiquement quand demande.")
|
|
|
|
deadline = time.time() + 300 # 5 min
|
|
_last_login = 0.0
|
|
_last_totp = 0.0
|
|
|
|
while time.time() < deadline:
|
|
try:
|
|
_log(f" [LOGIN] url: {page.url[:100]}")
|
|
if "ViewKlassen" in page.url:
|
|
_log("LOGIN_OK")
|
|
_ensure_french_language(page)
|
|
return
|
|
|
|
# Tentative login automatique (formulaire Keycloak) toutes les 5 s
|
|
if _username and _password and (time.time() - _last_login) > 5:
|
|
if _try_fill_login(page):
|
|
_last_login = time.time()
|
|
# Attendre la redirection (vers TOTP ou ViewKlassen)
|
|
try:
|
|
page.wait_for_load_state("networkidle", timeout=8_000)
|
|
except (PWTimeout, PWError):
|
|
pass
|
|
|
|
if _totp_secret and (time.time() - _last_totp) > 5:
|
|
if _try_fill_totp(page, _totp_secret):
|
|
_last_totp = time.time()
|
|
try:
|
|
page.wait_for_url("**ViewKlassen**", timeout=10_000)
|
|
_log("LOGIN_OK")
|
|
return
|
|
except (PWTimeout, PWError):
|
|
pass
|
|
|
|
page.wait_for_timeout(800)
|
|
except PWError:
|
|
if "ViewKlassen" in page.url:
|
|
_log("LOGIN_OK")
|
|
_ensure_french_language(page)
|
|
return
|
|
|
|
_log("ERR Delai de connexion depasse (5 min).")
|
|
sys.exit(1)
|
|
|
|
|
|
def _launch_context():
|
|
PROFILE_DIR.mkdir(parents=True, exist_ok=True)
|
|
pw = sync_playwright().start()
|
|
ctx = pw.chromium.launch_persistent_context(
|
|
str(PROFILE_DIR),
|
|
headless=True,
|
|
args=[
|
|
"--start-maximized",
|
|
"--disable-popup-blocking",
|
|
],
|
|
accept_downloads=True,
|
|
)
|
|
page = ctx.pages[0] if ctx.pages else ctx.new_page()
|
|
return pw, ctx, page
|
|
|
|
|
|
# ── Récupération des classes ──────────────────────────────────────────────────
|
|
|
|
def _next_page(page: Page, current: int) -> bool:
|
|
"""Clique sur le lien de pagination DevExpress vers la page suivante.
|
|
|
|
DevExpress génère des liens <a class="dxp-num"> avec le numéro de page
|
|
en texte. On itère sur tous ces liens et compare via inner_text().
|
|
"""
|
|
next_num = current + 1
|
|
try:
|
|
page.wait_for_selector("a.dxp-num", state="attached", timeout=3_000)
|
|
except Exception:
|
|
_log(f" [pagination] pas de pager sur la page")
|
|
return False
|
|
links = page.locator("a.dxp-num").all()
|
|
for link in links:
|
|
try:
|
|
if link.inner_text().strip() == str(next_num):
|
|
_log(f" [pagination] -> page {next_num}")
|
|
link.click()
|
|
try:
|
|
page.wait_for_selector(
|
|
"a[href*='ViewAbsenzenErweitert']", state="attached", timeout=15_000
|
|
)
|
|
page.wait_for_timeout(300)
|
|
except Exception:
|
|
pass
|
|
return True
|
|
except Exception:
|
|
continue
|
|
_log(f" [pagination] aucun lien vers page {next_num}")
|
|
return False
|
|
|
|
|
|
def _scrape_classes(page: Page) -> list[str]:
|
|
"""Scrape toutes les pages du tableau (suppose déjà sur ViewKlassen et connecté)."""
|
|
classes: list[str] = []
|
|
current_page = 1
|
|
|
|
while True:
|
|
try:
|
|
page.wait_for_selector(
|
|
"a[href*='ViewAbsenzenErweitert']", state="attached", timeout=15_000
|
|
)
|
|
except Exception:
|
|
break
|
|
rows = page.locator("tr:has(a[href*='ViewAbsenzenErweitert'])").all()
|
|
for row in rows:
|
|
for cell in row.locator("td").all():
|
|
txt = cell.inner_text().strip()
|
|
if txt and 1 < len(txt) <= 20 and txt[0].isalpha() and txt not in _HEADER_SKIP:
|
|
classes.append(txt)
|
|
break
|
|
|
|
if _next_page(page, current_page):
|
|
current_page += 1
|
|
else:
|
|
break
|
|
|
|
return sorted(set(classes))
|
|
|
|
|
|
def _all_classes(page: Page) -> list[str]:
|
|
"""Navigue vers la liste des classes, gère le login, puis scrape toutes les pages."""
|
|
page.goto(CLASSES_URL)
|
|
_ensure_logged_in(page)
|
|
return _scrape_classes(page)
|
|
|
|
|
|
# ── Navigation et téléchargement ──────────────────────────────────────────────
|
|
|
|
def _go_to_class_page(page: Page, class_name: str, cache_type: str = "abs") -> "Page | None":
|
|
"""Navigation générique vers la page d'une classe.
|
|
|
|
Tente d'abord l'URL mise en cache ; si invalide, retombe sur le scraping.
|
|
Utilise des locators Playwright (pas page.evaluate) pour rester robuste
|
|
même si la session expire entre deux navigations.
|
|
"""
|
|
cache_key = f"{class_name}:{cache_type}"
|
|
|
|
# ── Tentative via cache ───────────────────────────────────────────────────
|
|
cached_url = _href_cache.get(cache_key)
|
|
if cached_url:
|
|
# Toujours passer par CLASSES_URL avant le cache : réinitialise le contexte
|
|
# serveur ASP.NET (nécessaire après un download Notes/BN sur ViewLernende).
|
|
try:
|
|
page.goto(CLASSES_URL, wait_until="domcontentloaded", timeout=10_000)
|
|
except Exception:
|
|
pass
|
|
try:
|
|
page.goto(cached_url, wait_until="domcontentloaded", timeout=15_000)
|
|
page.wait_for_load_state("networkidle", timeout=15_000)
|
|
cur = page.url
|
|
if ("ViewKlassen" not in cur
|
|
and "login" not in cur.lower()
|
|
and class_name in (page.content() or "")):
|
|
_log(f"CACHE {class_name}")
|
|
return page
|
|
except Exception:
|
|
pass
|
|
_log(f"CACHE_MISS {class_name} — retour au scraping")
|
|
del _href_cache[cache_key]
|
|
_cache_save()
|
|
|
|
# ── Scraping ──────────────────────────────────────────────────────────────
|
|
page.goto(CLASSES_URL)
|
|
_ensure_logged_in(page) # gère expiration de session / 2FA
|
|
|
|
# Attendre que la grille soit rendue (au moins un lien ViewAbsenzenErweitert visible)
|
|
try:
|
|
page.wait_for_selector(
|
|
"a[href*='ViewAbsenzenErweitert']", state="attached", timeout=20_000
|
|
)
|
|
page.wait_for_timeout(500)
|
|
except Exception:
|
|
_log(f"WARN {class_name}: grille non chargée après 20s")
|
|
return None
|
|
|
|
# DevExpress restaure le dernier état du grid (pagination incluse).
|
|
# Forcer le retour à la page 1 si un lien "1" est présent dans le pager.
|
|
try:
|
|
p1 = page.locator("a.dxp-num:has-text('1')").first
|
|
if p1.count():
|
|
_log(f" [scan] retour page 1 du grid DevExpress")
|
|
p1.click()
|
|
page.wait_for_load_state("networkidle", timeout=10_000)
|
|
page.wait_for_timeout(300)
|
|
except Exception:
|
|
pass
|
|
|
|
pref, fallback = (
|
|
("ViewLernende", "ViewAbsenzenErweitert")
|
|
if cache_type == "lernende"
|
|
else ("ViewAbsenzenErweitert", "ViewLernende")
|
|
)
|
|
|
|
current_pg = 1
|
|
while True:
|
|
_log(f" [scan page={current_pg}] recherche '{class_name}'…")
|
|
|
|
href = None
|
|
try:
|
|
href = page.evaluate(_JS_FIND_CLASS_HREF, [class_name, pref, fallback])
|
|
except Exception as e:
|
|
_log(f" [scan page={current_pg}] evaluate ERR: {e}")
|
|
|
|
_log(f" [scan page={current_pg}] -> {'TROUVE' if href else 'pas trouve'}")
|
|
|
|
if href:
|
|
full_url = (
|
|
href if href.startswith("http")
|
|
else f"{BASE_URL}/Lehrpersonen/{href.lstrip('/')}"
|
|
)
|
|
_href_cache[cache_key] = full_url
|
|
_cache_save()
|
|
try:
|
|
page.locator(f"a[href='{href}']").first.click()
|
|
page.wait_for_load_state("networkidle", timeout=15_000)
|
|
except Exception:
|
|
page.goto(full_url, wait_until="domcontentloaded")
|
|
try:
|
|
page.wait_for_load_state("networkidle", timeout=15_000)
|
|
except Exception:
|
|
pass
|
|
_log(f" [nav {cache_type}] url après nav: {page.url[:80]}")
|
|
return page
|
|
|
|
if not _next_page(page, current_pg):
|
|
break
|
|
current_pg += 1
|
|
|
|
_log(f"WARN {class_name}: classe introuvable")
|
|
return None
|
|
|
|
|
|
def _go_to_absence_page(page: Page, class_name: str) -> "Page | None":
|
|
"""Ouvre la page d'absences de la classe (ViewAbsenzenErweitert ou ViewLernende).
|
|
|
|
Le cache abs est toujours effacé avant navigation : les GUIDs Escada sont
|
|
propres à chaque session, donc un GUID mis en cache depuis une session
|
|
précédente provoque une double-navigation corrompant le contexte ASP.NET.
|
|
"""
|
|
cache_key = f"{class_name}:abs"
|
|
if cache_key in _href_cache:
|
|
del _href_cache[cache_key]
|
|
_cache_save()
|
|
result = _go_to_class_page(page, class_name, cache_type="abs")
|
|
if result is not None:
|
|
_log(f" [abs nav] url: {page.url[:80]}")
|
|
return result
|
|
|
|
|
|
def _has_bn_button(page: Page) -> bool:
|
|
"""Renvoie True si le bouton BN (FR ou DE) est présent dans le ribbon."""
|
|
loc = page.locator(
|
|
"a.dxr-item.dxr-buttonItem:has-text('Bulletins de notes'),"
|
|
"a.dxr-item.dxr-buttonItem:has-text('Zeugnisse')"
|
|
)
|
|
if loc.count():
|
|
return True
|
|
# Log des boutons disponibles si aucun BN trouvé (aide au diagnostic)
|
|
try:
|
|
_all = page.locator("a.dxr-item.dxr-buttonItem").all()
|
|
if _all:
|
|
_log(f" [BN] boutons sur ViewLernende: {[b.inner_text() for b in _all[:8]]}")
|
|
except Exception:
|
|
pass
|
|
return False
|
|
|
|
|
|
def _go_to_students_page(page: Page, class_name: str) -> "Page | None":
|
|
"""Ouvre ViewLernende (liste élèves) en naviguant DIRECTEMENT depuis ViewKlassen.
|
|
|
|
Règles strictes :
|
|
- Toujours repartir de CLASSES_URL (jamais depuis la page absences).
|
|
- Ne retourner la page que si le bouton 'Bulletins de notes' est visible.
|
|
- Pas de cache : les GUIDs Escada sont contexte-dépendants (chemin 'Classes→Elèves'
|
|
donne un GUID valide ; chemin 'Classes→Absences→Elèves' donne un GUID différent
|
|
avec une liste vide et sans bouton BN).
|
|
"""
|
|
# Purger toute entrée de cache lernende (GUIDs ne sont pas fiables entre sessions)
|
|
cache_key = f"{class_name}:lernende"
|
|
if cache_key in _href_cache:
|
|
del _href_cache[cache_key]
|
|
_cache_save()
|
|
|
|
# ── Naviguer depuis la liste des classes ──────────────────────────────────
|
|
page.goto(CLASSES_URL)
|
|
_ensure_logged_in(page)
|
|
try:
|
|
page.wait_for_selector(
|
|
"a[href*='ViewAbsenzenErweitert']", state="attached", timeout=20_000
|
|
)
|
|
page.wait_for_timeout(500)
|
|
except Exception:
|
|
_log(f"WARN {class_name}: grille ViewKlassen non chargée")
|
|
return None
|
|
try:
|
|
p1 = page.locator("a.dxp-num:has-text('1')").first
|
|
if p1.count():
|
|
p1.click()
|
|
page.wait_for_load_state("networkidle", timeout=10_000)
|
|
page.wait_for_timeout(300)
|
|
except Exception:
|
|
pass
|
|
|
|
current_pg = 1
|
|
while True:
|
|
_log(f" [lrn p={current_pg}] '{class_name}'")
|
|
|
|
# ── Tentative A : lien href*ViewLernende dans la ligne ────────────────
|
|
# Cherche dans les TDs directs ET dans les sous-tables DevExpress.
|
|
lrn_href = page.evaluate("""([className]) => {
|
|
for (const tr of document.querySelectorAll('tr')) {
|
|
const tds = Array.from(tr.querySelectorAll(':scope > td'));
|
|
if (!tds.some(td => (td.innerText || td.textContent || '').trim() === className))
|
|
continue;
|
|
for (const td of tds) {
|
|
for (const a of td.querySelectorAll('a[href]')) {
|
|
if (a.href.includes('ViewLernende')) return a.getAttribute('href');
|
|
}
|
|
}
|
|
}
|
|
return null;
|
|
}""", [class_name])
|
|
|
|
if lrn_href:
|
|
_log(f" [lrn] lien trouvé : {lrn_href[:70]}")
|
|
try:
|
|
page.locator(f"a[href='{lrn_href}']").first.click()
|
|
page.wait_for_load_state("networkidle", timeout=15_000)
|
|
except Exception:
|
|
full = (lrn_href if lrn_href.startswith("http")
|
|
else f"{BASE_URL}/Lehrpersonen/{lrn_href.lstrip('/')}")
|
|
page.goto(full, wait_until="domcontentloaded")
|
|
try:
|
|
page.wait_for_load_state("networkidle", timeout=15_000)
|
|
except Exception:
|
|
pass
|
|
if "ViewLernende" in page.url and _has_bn_button(page):
|
|
_log(f" [lrn] OK bouton BN présent")
|
|
return page
|
|
_log(f" [lrn] WARN: ViewLernende atteint mais bouton BN absent — URL: {page.url[:80]}")
|
|
# Revenir sur ViewKlassen pour tenter l'approche B
|
|
page.goto(CLASSES_URL)
|
|
try:
|
|
page.wait_for_selector(
|
|
"a[href*='ViewAbsenzenErweitert']", state="attached", timeout=15_000
|
|
)
|
|
page.wait_for_timeout(300)
|
|
except Exception:
|
|
pass
|
|
try:
|
|
p1 = page.locator("a.dxp-num:has-text('1')").first
|
|
if p1.count():
|
|
p1.click()
|
|
page.wait_for_load_state("networkidle", timeout=10_000)
|
|
page.wait_for_timeout(300)
|
|
except Exception:
|
|
pass
|
|
|
|
# ── Tentative B : clic sur le nom de la classe dans la ligne ─────────
|
|
# Le nom de classe peut être un lien DevExpress (onclick) sans href direct.
|
|
_log(f" [lrn] tentative B: clic sur '{class_name}' dans la grille")
|
|
loc = page.locator(
|
|
"tr:has(a[href*='ViewAbsenzenErweitert']) td"
|
|
).filter(has_text=class_name).first
|
|
if loc.count():
|
|
try:
|
|
loc.click()
|
|
page.wait_for_load_state("networkidle", timeout=15_000)
|
|
if "ViewLernende" in page.url and _has_bn_button(page):
|
|
_log(f" [lrn] OK via clic nom classe")
|
|
return page
|
|
_log(f" [lrn] clic nom classe -> {page.url[:80]} (bouton BN: {_has_bn_button(page)})")
|
|
except Exception as e:
|
|
_log(f" [lrn] clic nom ERR: {e}")
|
|
|
|
if not _next_page(page, current_pg):
|
|
break
|
|
current_pg += 1
|
|
|
|
_log(f"WARN {class_name}: ViewLernende avec bouton BN introuvable")
|
|
return None
|
|
|
|
|
|
# ID DevExpress de la grille ViewLernende (stable sur Escada EPTM)
|
|
_GRID_ID = "ContentPlaceHolder_site_GridLernende"
|
|
|
|
|
|
def _parse_fiche_text(raw: str) -> dict:
|
|
"""Parse le texte brut d'une ligne de détail Escada en dict fiche."""
|
|
import re
|
|
|
|
# Fusionner les colonnes (séparées par |||) en un seul bloc de lignes
|
|
text = raw.replace('|||', '\n')
|
|
lines = [l.strip() for l in text.splitlines() if l.strip()]
|
|
|
|
fiche: dict = {}
|
|
section = ''
|
|
reEmail = re.compile(r'^[^@\s]+@[^@\s]+\.[^@\s]+$')
|
|
reTel = re.compile(r'^\+?[\d\s\/\-\.]{7,}$')
|
|
reDate = re.compile(r'\d{2}\.\d{2}\.\d{4}')
|
|
reCp = re.compile(r'^(\d{4})\s+(.+)$')
|
|
reTelPfx = re.compile(r'^(Mobile|Tel|Tél)[^\d+]*', re.I)
|
|
|
|
for line in lines:
|
|
if re.match(r'^El[eè]ve\s*:', line, re.I): section = 'eleve'; continue
|
|
if re.match(r'^Entreprise\s*:', line, re.I): section = 'entreprise'; continue
|
|
if re.match(r'^Formateur\s*:', line, re.I): section = 'formateur'; continue
|
|
if re.match(r'^Remarques?\s*:', line, re.I): section = ''; continue
|
|
|
|
if section == 'eleve':
|
|
if 'nom_eleve' not in fiche \
|
|
and not reEmail.match(line) and not reTel.match(line) \
|
|
and not reDate.search(line) and not reCp.match(line) \
|
|
and not re.match(r'^(Mobile|Tel|Tél|Majeur)', line, re.I):
|
|
fiche['nom_eleve'] = line; continue
|
|
if 'adresse' not in fiche \
|
|
and not reEmail.match(line) and not reTel.match(line) \
|
|
and not reDate.search(line) and not reCp.match(line) \
|
|
and not re.match(r'^(Mobile|Tel|Tél|Majeur)', line, re.I):
|
|
fiche['adresse'] = line; continue
|
|
m = reCp.match(line)
|
|
if m and 'code_postal' not in fiche:
|
|
fiche['code_postal'] = m.group(1); fiche['localite'] = m.group(2); continue
|
|
if re.match(r'^(Mobile|Tel|Tél)', line, re.I) and 'telephone' not in fiche:
|
|
fiche['telephone'] = reTelPfx.sub('', line).strip(); continue
|
|
if reEmail.match(line) and 'email' not in fiche:
|
|
fiche['email'] = line; continue
|
|
dm = reDate.search(line)
|
|
if dm and 'date_naissance' not in fiche:
|
|
fiche['date_naissance'] = dm.group(0)
|
|
if re.search(r'Majeur', line, re.I):
|
|
fiche['majeur'] = bool(re.search(r'\boui\b', line, re.I))
|
|
|
|
elif section == 'entreprise':
|
|
if 'entreprise_nom' not in fiche \
|
|
and not reEmail.match(line) and not reTel.match(line) \
|
|
and not reCp.match(line) \
|
|
and not re.match(r'^(Mobile|Tel|Tél)', line, re.I):
|
|
fiche['entreprise_nom'] = line; continue
|
|
if 'entreprise_adresse' not in fiche \
|
|
and not reEmail.match(line) and not reTel.match(line) \
|
|
and not reCp.match(line) \
|
|
and not re.match(r'^(Mobile|Tel|Tél)', line, re.I):
|
|
fiche['entreprise_adresse'] = line; continue
|
|
m = reCp.match(line)
|
|
if m and 'entreprise_code_postal' not in fiche:
|
|
fiche['entreprise_code_postal'] = m.group(1)
|
|
fiche['entreprise_localite'] = m.group(2); continue
|
|
if re.match(r'^(Mobile|Tel|Tél)', line, re.I) and 'entreprise_telephone' not in fiche:
|
|
fiche['entreprise_telephone'] = reTelPfx.sub('', line).strip(); continue
|
|
if reEmail.match(line) and 'entreprise_email' not in fiche:
|
|
fiche['entreprise_email'] = line; continue
|
|
|
|
elif section == 'formateur':
|
|
if 'formateur_nom' not in fiche and not reEmail.match(line):
|
|
fiche['formateur_nom'] = line; continue
|
|
if reEmail.match(line) and 'formateur_email' not in fiche:
|
|
fiche['formateur_email'] = line; continue
|
|
|
|
return fiche
|
|
|
|
|
|
def _scrape_student_details(page: Page, class_name: str) -> list[dict]:
|
|
"""Scrape les fiches depuis ViewLernende (grille DevExpress ASPxGridView).
|
|
|
|
Structure connue du DOM Escada :
|
|
- Lignes données : tr#<GRID_ID>_DXDataRow{N}
|
|
- Bouton expand : img.dxGridView_gvDetailCollapsedButton_MetropolisBlue
|
|
avec onclick="ASPx.GVShowDetailRow(...,N,...)"
|
|
- Cellule détail : td#<GRID_ID>_tcdxdt{N} (colspan=15, chargée par AJAX au clic)
|
|
└ inner table → tr → td[0] = Élève | td[2] = Entreprise + Formateur
|
|
"""
|
|
_log(f" [fiches] scraping {class_name}…")
|
|
|
|
try:
|
|
page.wait_for_load_state("networkidle", timeout=15_000)
|
|
except Exception:
|
|
pass
|
|
|
|
gid = _GRID_ID
|
|
|
|
# Compter les lignes de données via leurs IDs DevExpress
|
|
n = page.evaluate(
|
|
"(gid) => document.querySelectorAll(`[id^='${gid}_DXDataRow']`).length",
|
|
gid
|
|
)
|
|
_log(f" [fiches] {n} élève(s) trouvé(s)")
|
|
if n == 0:
|
|
return []
|
|
|
|
# Déplier + lire une ligne à la fois (Escada ne gère pas les AJAX simultanés)
|
|
fiches: list[dict] = []
|
|
for i in range(n):
|
|
# Clic sur le bouton expand de la ligne i
|
|
clicked = page.evaluate("""([gid, i]) => {
|
|
const row = document.getElementById(`${gid}_DXDataRow${i}`);
|
|
if (!row) return false;
|
|
const img = row.querySelector(
|
|
'img.dxGridView_gvDetailCollapsedButton_MetropolisBlue'
|
|
);
|
|
if (!img) return false;
|
|
img.click();
|
|
return true;
|
|
}""", [gid, i])
|
|
|
|
if not clicked:
|
|
_log(f" [fiches] {i}: WARNING bouton expand introuvable")
|
|
continue
|
|
|
|
# Attendre que la cellule de cette ligne soit chargée (max 15s)
|
|
ready = False
|
|
for _ in range(15):
|
|
page.wait_for_timeout(1_000)
|
|
ready = page.evaluate("""([gid, i]) => {
|
|
const cell = document.getElementById(`${gid}_tcdxdt${i}`);
|
|
if (!cell) return false;
|
|
return (cell.innerText || cell.textContent || '').trim().length >= 10;
|
|
}""", [gid, i])
|
|
if ready:
|
|
break
|
|
|
|
if not ready:
|
|
_log(f" [fiches] {i}: WARNING cellule non chargée après 15s")
|
|
continue
|
|
|
|
# Lire la cellule
|
|
raw = page.evaluate("""([gid, i]) => {
|
|
const cell = document.getElementById(`${gid}_tcdxdt${i}`);
|
|
if (!cell) return null;
|
|
const inner = cell.querySelector('table tr');
|
|
if (!inner) {
|
|
return (cell.innerText || '').trim() || null;
|
|
}
|
|
const tds = inner.querySelectorAll(':scope > td');
|
|
// td[0] = Élève, td[2] = Entreprise + Formateur
|
|
const eleve = tds[0] ? (tds[0].innerText || '').trim() : '';
|
|
const ent = tds[2] ? (tds[2].innerText || '').trim() : '';
|
|
if (!eleve && !ent) return null;
|
|
return eleve + (ent ? ('\\n||||\\n' + ent) : '');
|
|
}""", [gid, i])
|
|
|
|
if raw:
|
|
fiche = _parse_fiche_text(raw)
|
|
if fiche.get('nom_eleve') or fiche.get('entreprise_nom'):
|
|
fiches.append(fiche)
|
|
_log(f" [fiches] {i}: {fiche.get('nom_eleve', '?')}")
|
|
else:
|
|
_log(f" [fiches] {i}: WARNING données vides — raw[:80]={raw[:80]!r}")
|
|
else:
|
|
_log(f" [fiches] {i}: WARNING cellule vide")
|
|
|
|
_log(f" [fiches] {len(fiches)} fiche(s) extraite(s)")
|
|
return fiches
|
|
|
|
|
|
def _download_pdf(page: Page, class_name: str) -> Path | None:
|
|
"""Clique sur 'Contrôle des absences (apprenants)' et récupère le PDF.
|
|
|
|
Le clic ouvre le PDF dans un nouvel onglet. On récupère l'URL, on ferme
|
|
l'onglet immédiatement (évite le cache du lecteur PDF Chrome), puis on
|
|
re-télécharge via context.request.get() — GET HTTP pur avec les cookies
|
|
de session, sans passer par le lecteur PDF du navigateur.
|
|
"""
|
|
PDFS_DIR.mkdir(parents=True, exist_ok=True)
|
|
dest = PDFS_DIR / f"esacada_{class_name.replace(' ', '_')}.pdf"
|
|
|
|
_log(f" [abs] page url avant clic: {page.url[:80]}")
|
|
# Attendre que le ribbon DevExpress soit rendu
|
|
try:
|
|
page.wait_for_selector("a.dxr-item.dxr-buttonItem", timeout=15_000)
|
|
except Exception:
|
|
pass
|
|
# Cibler le <a> DevExpress ribbon, pas le <span> intérieur (qui n'a pas le handler JS)
|
|
btn = page.locator("a.dxr-item.dxr-buttonItem:has-text('Contrôle des absences (apprenants)')").first
|
|
if not btn.count():
|
|
# Fallback allemand (si locale du serveur est DE)
|
|
btn = page.locator("a.dxr-item.dxr-buttonItem:has-text('Absenzenkontrolle (Lernende)')").first
|
|
if not btn.count():
|
|
try:
|
|
_all = page.locator("a.dxr-item.dxr-buttonItem").all()
|
|
_texts = [b.inner_text() for b in _all]
|
|
_log(f" [abs] boutons disponibles: {_texts}")
|
|
except Exception:
|
|
pass
|
|
_log(f"ERR {class_name}: bouton introuvable")
|
|
return None
|
|
|
|
# Stratégie 0 : extraire le href du bouton et faire un GET direct.
|
|
# Le bouton <a target="Blank"> utilise un nom de fenêtre nommée qui peut
|
|
# être bloqué après un download précédent (Notes). Le href contient l'URL
|
|
# complète du rapport — on l'utilise directement sans clic.
|
|
try:
|
|
href_attr = page.evaluate("el => el.getAttribute('href')", btn.element_handle())
|
|
except Exception:
|
|
href_attr = None
|
|
|
|
if href_attr:
|
|
full_pdf_url = (
|
|
href_attr if href_attr.startswith("http")
|
|
else f"{BASE_URL}/{href_attr.lstrip('/')}"
|
|
)
|
|
_log(f" [abs] GET direct: {full_pdf_url[:80]}")
|
|
try:
|
|
resp = page.context.request.get(full_pdf_url, timeout=30_000)
|
|
if resp.ok:
|
|
body = resp.body()
|
|
if len(body) > 1_000:
|
|
dest.write_bytes(body)
|
|
_log(f"OK {class_name} [href size={len(body)}]")
|
|
return dest
|
|
_log(f" [abs] GET status={resp.status} len={len(resp.body())}")
|
|
except Exception as e:
|
|
_log(f" [abs] GET err: {e}")
|
|
|
|
pages_before = {id(p) for p in page.context.pages}
|
|
|
|
try:
|
|
# Stratégie 1 : téléchargement direct (Content-Disposition: attachment)
|
|
try:
|
|
with page.expect_download(timeout=10_000) as dl_info:
|
|
btn.click()
|
|
dl_info.value.save_as(dest)
|
|
_log(f"OK {class_name} [direct size={dest.stat().st_size}]")
|
|
return dest
|
|
except PWTimeout:
|
|
pass
|
|
|
|
# Stratégie 2 : le PDF s'ouvre dans un nouvel onglet
|
|
page.wait_for_timeout(2_000)
|
|
new_tabs = [p for p in page.context.pages
|
|
if p is not page and id(p) not in pages_before]
|
|
|
|
pdf_url: str | None = None
|
|
for tab in new_tabs:
|
|
try:
|
|
tab.wait_for_load_state("domcontentloaded", timeout=10_000)
|
|
except Exception:
|
|
pass
|
|
url = tab.url
|
|
if url.startswith("http") and ("Reports" in url or ".pdf" in url.lower()):
|
|
pdf_url = url
|
|
elif "chrome-extension" in url and "http" in url:
|
|
m = re.search(r"(https?://\S+)", url)
|
|
if m:
|
|
pdf_url = m.group(1)
|
|
try:
|
|
tab.close()
|
|
except Exception:
|
|
pass
|
|
|
|
if pdf_url:
|
|
resp = page.context.request.get(pdf_url)
|
|
if resp.ok:
|
|
body = resp.body()
|
|
if len(body) > 1_000:
|
|
dest.write_bytes(body)
|
|
_log(f"OK {class_name} [request size={len(body)}]")
|
|
return dest
|
|
_log(f"ERR {class_name}: GET HTTP {resp.status}")
|
|
return None
|
|
|
|
_log(f"ERR {class_name}: aucun onglet PDF trouvé")
|
|
return None
|
|
|
|
except Exception as e:
|
|
_log(f"ERR {class_name}: {e}")
|
|
return None
|
|
|
|
|
|
def _download_bn_pdf(page: Page, class_name: str) -> Path | None:
|
|
"""Sur la page liste-élèves, clique 'Bulletins de notes' → 'Impression des bulletins'
|
|
et télécharge le PDF BN de la classe.
|
|
|
|
Utilise les mêmes deux stratégies de téléchargement que _download_pdf().
|
|
"""
|
|
PDFS_DIR.mkdir(parents=True, exist_ok=True)
|
|
dest = PDFS_DIR / f"bn_{class_name.replace(' ', '_')}.pdf"
|
|
|
|
# Attendre que le ribbon DevExpress soit rendu
|
|
try:
|
|
page.wait_for_selector("a.dxr-item.dxr-buttonItem", timeout=15_000)
|
|
except Exception:
|
|
pass
|
|
# Bouton DevExpress ribbon — pas d'attribut onclick, ciblé par CSS + texte.
|
|
# Le bouton est un <A class="dxr-item dxr-buttonItem"> sans onclick inline.
|
|
btn_bn = page.locator("a.dxr-item.dxr-buttonItem:has-text('Bulletins de notes')").first
|
|
if not btn_bn.count():
|
|
btn_bn = page.locator("a.dxr-item.dxr-buttonItem:has-text('Zeugnisse')").first
|
|
if not btn_bn.count():
|
|
try:
|
|
_all = page.locator("a.dxr-item.dxr-buttonItem").all()
|
|
_log(f" [BN] boutons disponibles: {[b.inner_text() for b in _all]}")
|
|
except Exception:
|
|
pass
|
|
_log(f"ERR BN {class_name}: bouton 'Bulletins de notes' introuvable")
|
|
return None
|
|
|
|
btn_bn.click()
|
|
|
|
# Le popup DevExpress charge DlgZeugnisse.aspx dans une iframe.
|
|
# On attend que cette iframe apparaisse dans page.frames.
|
|
dlg_frame = None
|
|
for _ in range(20):
|
|
for frame in page.frames:
|
|
if "DlgZeugnisse" in frame.url:
|
|
dlg_frame = frame
|
|
break
|
|
if dlg_frame:
|
|
break
|
|
page.wait_for_timeout(1_000)
|
|
|
|
if dlg_frame is None:
|
|
_log(f"ERR BN {class_name}: iframe DlgZeugnisse introuvable")
|
|
return None
|
|
|
|
try:
|
|
dlg_frame.wait_for_load_state("networkidle", timeout=20_000)
|
|
except Exception:
|
|
pass
|
|
|
|
# Le div wrapper du bouton DevExpress (l'input type=submit interne est hidden)
|
|
try:
|
|
dlg_frame.wait_for_selector(
|
|
"#ContentPlaceHolderSite_Button_Zeugnisdruck",
|
|
state="visible",
|
|
timeout=15_000,
|
|
)
|
|
except PWTimeout:
|
|
_log(f"ERR BN {class_name}: bouton Zeugnisdruck non visible dans DlgZeugnisse")
|
|
return None
|
|
|
|
# Intercepte window.open dans l'iframe pour capturer l'URL du rapport
|
|
# avant que la popup soit ouverte — même stratégie que abs/Notes/MATU.
|
|
try:
|
|
dlg_frame.evaluate("""
|
|
window.__bnOpenedUrl = null;
|
|
const _orig = window.open.bind(window);
|
|
window.open = function(url, ...rest) {
|
|
window.__bnOpenedUrl = (url || '');
|
|
return _orig(url, ...rest);
|
|
};
|
|
""")
|
|
except Exception as e:
|
|
_log(f" [BN] intercept window.open err: {e}")
|
|
|
|
# Écoute aussi les downloads sur la page principale (au cas où le PDF
|
|
# arrive via l'iframe parent plutôt que via window.open)
|
|
_dl_main = [False]
|
|
def _on_main_dl(dl):
|
|
try:
|
|
dl.save_as(str(dest))
|
|
_dl_main[0] = True
|
|
_log(f" [BN] download main page capturé: {dl.suggested_filename}")
|
|
except Exception as ex:
|
|
_log(f" [BN] main dl err: {ex}")
|
|
page.on("download", _on_main_dl)
|
|
|
|
try:
|
|
dlg_frame.click("#ContentPlaceHolderSite_Button_Zeugnisdruck")
|
|
except Exception as e:
|
|
page.remove_listener("download", _on_main_dl)
|
|
_log(f"ERR BN {class_name}: clic Zeugnisdruck: {e}")
|
|
return None
|
|
|
|
# Polling 120s : window.open URL, download principal, ou nouvelle page
|
|
pages_before = {id(p) for p in page.context.pages}
|
|
bn_report_url: str | None = None
|
|
|
|
for _i in range(120):
|
|
page.wait_for_timeout(1_000)
|
|
|
|
if _dl_main[0]:
|
|
break
|
|
|
|
# Vérifier si window.open a été appelé avec une URL
|
|
try:
|
|
opened = dlg_frame.evaluate("window.__bnOpenedUrl")
|
|
except Exception:
|
|
opened = None
|
|
if opened and opened.startswith("/"):
|
|
bn_report_url = f"{BASE_URL}/{opened.lstrip('/')}"
|
|
_log(f" [BN] window.open URL: {bn_report_url[:80]}")
|
|
break
|
|
if opened and opened.startswith("http"):
|
|
bn_report_url = opened
|
|
_log(f" [BN] window.open URL: {bn_report_url[:80]}")
|
|
break
|
|
|
|
# Nouvelles pages créées
|
|
new_pages = [p for p in page.context.pages if id(p) not in pages_before]
|
|
for np in new_pages:
|
|
pages_before.add(id(np))
|
|
try:
|
|
np.wait_for_load_state("domcontentloaded", timeout=5_000)
|
|
except Exception:
|
|
pass
|
|
np_url = np.url
|
|
_log(f" [BN] nouvelle page: {np_url[:80]}")
|
|
if np_url.startswith("http"):
|
|
bn_report_url = np_url
|
|
try:
|
|
np.close()
|
|
except Exception:
|
|
pass
|
|
break
|
|
|
|
if bn_report_url:
|
|
break
|
|
|
|
if _i in (0, 4, 9, 29, 59):
|
|
_log(f" [BN] +{_i+1}s attente…")
|
|
|
|
page.remove_listener("download", _on_main_dl)
|
|
|
|
# Cas 1 : download capturé sur la page principale
|
|
if _dl_main[0] and dest.exists() and dest.stat().st_size > 1_000:
|
|
_log(f"OK BN {class_name} [main download size={dest.stat().st_size}]")
|
|
return dest
|
|
|
|
# Cas 2 : URL du rapport récupérée → GET direct
|
|
if bn_report_url:
|
|
try:
|
|
resp = page.context.request.get(bn_report_url, timeout=60_000)
|
|
if resp.ok:
|
|
body = resp.body()
|
|
if len(body) > 1_000:
|
|
dest.write_bytes(body)
|
|
_log(f"OK BN {class_name} [href size={len(body)}]")
|
|
return dest
|
|
_log(f"ERR BN {class_name}: GET HTTP {resp.status} len={len(resp.body())}")
|
|
except Exception as e:
|
|
_log(f"ERR BN {class_name}: GET err: {e}")
|
|
return None
|
|
|
|
_log(f"ERR BN {class_name}: ni download ni URL après 120s")
|
|
return None
|
|
|
|
|
|
def _download_ribbon_pdf(page: Page, btn_locator, dest: Path, label: str) -> Path | None:
|
|
"""Télécharge un PDF déclenché par un bouton ribbon DevExpress.
|
|
|
|
Stratégie 0 : href direct sur le <a> (contourne les problèmes target=Blank).
|
|
Stratégie 1 : polling 90s — capture Content-Disposition: attachment ET nouveaux onglets.
|
|
"""
|
|
# Stratégie 0 : href direct (indépendant de l'état de la session)
|
|
try:
|
|
href_attr = page.evaluate("el => el.getAttribute('href')", btn_locator.element_handle())
|
|
except Exception:
|
|
href_attr = None
|
|
|
|
if href_attr and href_attr.startswith("/"):
|
|
full_url = f"{BASE_URL}/{href_attr.lstrip('/')}"
|
|
_log(f" [{label}] href direct: {full_url[:80]}")
|
|
try:
|
|
resp = page.context.request.get(full_url, timeout=30_000)
|
|
if resp.ok:
|
|
body = resp.body()
|
|
if len(body) > 1_000:
|
|
dest.write_bytes(body)
|
|
_log(f"OK {label} [href size={len(body)}]")
|
|
return dest
|
|
_log(f" [{label}] href GET status={resp.status if resp else '?'}")
|
|
except Exception as e:
|
|
_log(f" [{label}] href GET err: {e}")
|
|
|
|
try:
|
|
btn_locator.scroll_into_view_if_needed(timeout=3_000)
|
|
page.wait_for_timeout(300)
|
|
except Exception:
|
|
pass
|
|
|
|
# Écouteur de téléchargement direct (Content-Disposition: attachment) sur la page courante
|
|
_dl_ok = [False]
|
|
def _on_download(dl):
|
|
try:
|
|
dl.save_as(str(dest))
|
|
_dl_ok[0] = True
|
|
_log(f" [{label}] download direct capturé")
|
|
except Exception as e:
|
|
_log(f" [{label}] download save err: {e}")
|
|
page.on("download", _on_download)
|
|
|
|
pages_before = {id(p) for p in page.context.pages}
|
|
|
|
try:
|
|
btn_locator.click()
|
|
except Exception as e:
|
|
page.remove_listener("download", _on_download)
|
|
_log(f"ERR {label}: click failed: {e}")
|
|
return None
|
|
|
|
# Polling 90s — vérifie download direct ET nouveaux onglets
|
|
for _i in range(45):
|
|
page.wait_for_timeout(2_000)
|
|
|
|
# Download direct capturé par l'écouteur
|
|
if _dl_ok[0] and dest.exists() and dest.stat().st_size > 1_000:
|
|
page.remove_listener("download", _on_download)
|
|
_log(f"OK {label} [direct size={dest.stat().st_size}]")
|
|
return dest
|
|
|
|
# Nouvel onglet ouvert via window.open
|
|
new_tabs = [p for p in page.context.pages if id(p) not in pages_before]
|
|
for tab in new_tabs:
|
|
pages_before.add(id(tab))
|
|
try:
|
|
tab.wait_for_load_state("domcontentloaded", timeout=10_000)
|
|
except Exception:
|
|
pass
|
|
url = tab.url
|
|
try:
|
|
tab.close()
|
|
except Exception:
|
|
pass
|
|
if url.startswith("http"):
|
|
try:
|
|
resp = page.context.request.get(url, timeout=30_000)
|
|
if resp.ok and len(resp.body()) > 1_000:
|
|
page.remove_listener("download", _on_download)
|
|
dest.write_bytes(resp.body())
|
|
_log(f"OK {label} [new_page size={len(resp.body())}]")
|
|
return dest
|
|
except Exception:
|
|
pass
|
|
|
|
# Diagnostic : état de la page 2s et 10s après le clic
|
|
if _i in (0, 4):
|
|
try:
|
|
cur = page.url
|
|
extra_frames = [f.url for f in page.frames
|
|
if f.url and "about:blank" not in f.url and f.url != cur]
|
|
_log(f" [{label}] +{(_i+1)*2}s url={cur[:70]}"
|
|
+ (f" frames={extra_frames[:2]}" if extra_frames else ""))
|
|
except Exception:
|
|
pass
|
|
|
|
page.remove_listener("download", _on_download)
|
|
_log(f"ERR {label}: aucun PDF récupéré après 90s")
|
|
return None
|
|
|
|
|
|
def _download_notes_pdf(page: Page, class_name: str) -> Path | None:
|
|
"""Sur la page liste-élèves, clique 'Moyennes des notes d'examen (apprenant)'
|
|
et télécharge le PDF de la liste des notes par apprenant.
|
|
"""
|
|
PDFS_DIR.mkdir(parents=True, exist_ok=True)
|
|
dest = PDFS_DIR / f"notes_{class_name.replace(' ', '_')}.pdf"
|
|
|
|
try:
|
|
page.wait_for_selector("a.dxr-item.dxr-buttonItem", timeout=15_000)
|
|
except Exception:
|
|
pass
|
|
btn = page.locator("a.dxr-item.dxr-buttonItem:has-text('Moyennes des notes')").first
|
|
if not btn.count():
|
|
btn = page.locator("a.dxr-item.dxr-buttonItem:has-text('Prüfungsnotenliste (Lernende)')").first
|
|
if not btn.count():
|
|
try:
|
|
_all = page.locator("a.dxr-item.dxr-buttonItem").all()
|
|
_log(f" [NOTES] boutons disponibles: {[b.inner_text() for b in _all]}")
|
|
except Exception:
|
|
pass
|
|
_log(f"ERR NOTES {class_name}: bouton 'Moyennes des notes' introuvable")
|
|
return None
|
|
return _download_ribbon_pdf(page, btn, dest, f"NOTES {class_name}")
|
|
|
|
|
|
def _download_matu_pdf(page: Page, class_name: str) -> Path | None:
|
|
"""Sur la page liste-élèves d'une classe MP, télécharge la liste de contrôle des notes MP."""
|
|
PDFS_DIR.mkdir(parents=True, exist_ok=True)
|
|
dest = PDFS_DIR / f"matu_{class_name.replace(' ', '_')}.pdf"
|
|
|
|
try:
|
|
page.wait_for_selector("a.dxr-item.dxr-buttonItem", timeout=15_000)
|
|
except Exception:
|
|
pass
|
|
btn = page.locator("a.dxr-item.dxr-buttonItem:has-text('notes MP du bulletin')").first
|
|
if not btn.count():
|
|
btn = page.locator("a.dxr-item.dxr-buttonItem:has-text('BM-Zeugnisnoten-Kontrollliste')").first
|
|
if not btn.count():
|
|
try:
|
|
_all = page.locator("a.dxr-item.dxr-buttonItem").all()
|
|
_log(f" [MATU] boutons disponibles: {[b.inner_text() for b in _all]}")
|
|
except Exception:
|
|
pass
|
|
_log(f"ERR MATU {class_name}: bouton 'notes MP du bulletin' introuvable")
|
|
return None
|
|
return _download_ribbon_pdf(page, btn, dest, f"MATU {class_name}")
|
|
|
|
|
|
# ── Commandes principales ─────────────────────────────────────────────────────
|
|
|
|
def cmd_list_classes() -> None:
|
|
pw, ctx, page = _launch_context()
|
|
try:
|
|
classes = _all_classes(page)
|
|
_log(f"CLASSES_JSON:{json.dumps(classes, ensure_ascii=False)}")
|
|
finally:
|
|
ctx.close()
|
|
pw.stop()
|
|
|
|
|
|
def cmd_sync(selected: list[str]) -> None:
|
|
pw, ctx, page = _launch_context()
|
|
try:
|
|
_cache_load()
|
|
page.goto(CLASSES_URL)
|
|
_ensure_logged_in(page)
|
|
|
|
if not selected:
|
|
_log("INFO Récupération de toutes les classes...")
|
|
selected = _scrape_classes(page)
|
|
|
|
_log(f"TOTAL {len(selected)}")
|
|
downloaded: list[str] = []
|
|
|
|
for i, cls in enumerate(selected, 1):
|
|
_log(f"PROGRESS {i}/{len(selected)} {cls}")
|
|
abs_page = _go_to_absence_page(page, cls)
|
|
if abs_page is not None:
|
|
pdf = _download_pdf(abs_page, cls)
|
|
if pdf:
|
|
downloaded.append(str(pdf))
|
|
if abs_page is not page:
|
|
try:
|
|
abs_page.close()
|
|
except Exception:
|
|
pass
|
|
|
|
_log(f"DONE {json.dumps(downloaded, ensure_ascii=False)}")
|
|
finally:
|
|
ctx.close()
|
|
pw.stop()
|
|
|
|
|
|
def cmd_sync_bn(selected: list[str]) -> None:
|
|
"""Télécharge les PDFs des Bulletins de Notes pour les classes sélectionnées."""
|
|
pw, ctx, page = _launch_context()
|
|
try:
|
|
_cache_load()
|
|
page.goto(CLASSES_URL)
|
|
_ensure_logged_in(page)
|
|
|
|
if not selected:
|
|
_log("INFO Récupération de toutes les classes...")
|
|
selected = _scrape_classes(page)
|
|
|
|
_log(f"TOTAL {len(selected)}")
|
|
downloaded: list[str] = []
|
|
|
|
for i, cls in enumerate(selected, 1):
|
|
_log(f"PROGRESS {i}/{len(selected)} {cls}")
|
|
students_page = _go_to_students_page(page, cls)
|
|
if students_page is not None:
|
|
pdf = _download_bn_pdf(students_page, cls)
|
|
if pdf:
|
|
downloaded.append(str(pdf))
|
|
if students_page is not page:
|
|
try:
|
|
students_page.close()
|
|
except Exception:
|
|
pass
|
|
|
|
_log(f"BN_DONE {json.dumps(downloaded, ensure_ascii=False)}")
|
|
finally:
|
|
ctx.close()
|
|
pw.stop()
|
|
|
|
|
|
def cmd_sync_fiches(selected: list[str]) -> None:
|
|
"""Scrape les fiches détaillées des élèves depuis ViewLernende."""
|
|
pw, ctx, page = _launch_context()
|
|
try:
|
|
_cache_load()
|
|
page.goto(CLASSES_URL)
|
|
_ensure_logged_in(page)
|
|
|
|
if not selected:
|
|
_log("INFO Récupération de toutes les classes...")
|
|
selected = _scrape_classes(page)
|
|
|
|
_log(f"TOTAL {len(selected)}")
|
|
all_fiches: dict[str, list[dict]] = {}
|
|
|
|
for i, cls in enumerate(selected, 1):
|
|
_log(f"PROGRESS {i}/{len(selected)} {cls}")
|
|
students_page = _go_to_students_page(page, cls)
|
|
if students_page is not None:
|
|
fiches = _scrape_student_details(students_page, cls)
|
|
all_fiches[cls] = fiches
|
|
if students_page is not page:
|
|
try:
|
|
students_page.close()
|
|
except Exception:
|
|
pass
|
|
|
|
_log(f"FICHES_DONE {json.dumps(all_fiches, ensure_ascii=False)}")
|
|
finally:
|
|
ctx.close()
|
|
pw.stop()
|
|
|
|
|
|
def cmd_sync_notes(selected: list[str]) -> None:
|
|
"""Télécharge les PDFs des moyennes de notes (apprenant) pour les classes sélectionnées."""
|
|
pw, ctx, page = _launch_context()
|
|
try:
|
|
_cache_load()
|
|
page.goto(CLASSES_URL)
|
|
_ensure_logged_in(page)
|
|
|
|
if not selected:
|
|
_log("INFO Récupération de toutes les classes...")
|
|
selected = _scrape_classes(page)
|
|
|
|
_log(f"TOTAL {len(selected)}")
|
|
downloaded: list[str] = []
|
|
|
|
for i, cls in enumerate(selected, 1):
|
|
_log(f"PROGRESS {i}/{len(selected)} {cls}")
|
|
students_page = _go_to_students_page(page, cls)
|
|
if students_page is not None:
|
|
pdf = _download_notes_pdf(students_page, cls)
|
|
if pdf:
|
|
downloaded.append(str(pdf))
|
|
if students_page is not page:
|
|
try:
|
|
students_page.close()
|
|
except Exception:
|
|
pass
|
|
|
|
_log(f"NOTES_DONE {json.dumps(downloaded, ensure_ascii=False)}")
|
|
finally:
|
|
ctx.close()
|
|
pw.stop()
|
|
|
|
|
|
def cmd_sync_matu(selected: list[str]) -> None:
|
|
"""Télécharge les listes de contrôle des notes MP pour les classes sélectionnées."""
|
|
pw, ctx, page = _launch_context()
|
|
try:
|
|
_cache_load()
|
|
page.goto(CLASSES_URL)
|
|
_ensure_logged_in(page)
|
|
|
|
if not selected:
|
|
_log("INFO Récupération de toutes les classes MP...")
|
|
all_cls = _scrape_classes(page)
|
|
selected = [c for c in all_cls if re.match(r"MP\d", c, re.I)]
|
|
|
|
_log(f"TOTAL {len(selected)}")
|
|
downloaded: list[str] = []
|
|
|
|
for i, cls in enumerate(selected, 1):
|
|
_log(f"PROGRESS {i}/{len(selected)} {cls}")
|
|
students_page = _go_to_students_page(page, cls)
|
|
if students_page is not None:
|
|
pdf = _download_matu_pdf(students_page, cls)
|
|
if pdf:
|
|
downloaded.append(str(pdf))
|
|
if students_page is not page:
|
|
try:
|
|
students_page.close()
|
|
except Exception:
|
|
pass
|
|
|
|
_log(f"MATU_DONE {json.dumps(downloaded, ensure_ascii=False)}")
|
|
finally:
|
|
ctx.close()
|
|
pw.stop()
|
|
|
|
|
|
# ── Debug ────────────────────────────────────────────────────────────────────
|
|
|
|
def cmd_debug_bn(class_name: str) -> None:
|
|
"""Ouvre la page liste-élèves et affiche tous les éléments cliquables du ribbon."""
|
|
pw, ctx, page = _launch_context()
|
|
try:
|
|
page.goto(CLASSES_URL)
|
|
_ensure_logged_in(page)
|
|
|
|
students_page = _go_to_students_page(page, class_name)
|
|
if students_page is None:
|
|
_log(f"ERR: classe '{class_name}' introuvable")
|
|
return
|
|
|
|
students_page.wait_for_load_state("networkidle")
|
|
|
|
# ── Étape : clic 'Bulletins de notes' → clic 'Impression des bulletins' via JS
|
|
# puis dump de tous les boutons visibles
|
|
btn_bn = students_page.locator("a.dxr-item.dxr-buttonItem:has-text('Bulletins de notes')").first
|
|
if btn_bn.count():
|
|
_log("\n=== Clic sur 'Bulletins de notes' ===")
|
|
btn_bn.click()
|
|
try:
|
|
btn_imp = students_page.wait_for_selector(
|
|
"text='Impression des bulletins'", state="visible", timeout=12_000
|
|
)
|
|
_log("'Impression des bulletins' visible — clic via JS")
|
|
students_page.evaluate("(el) => el.click()", btn_imp)
|
|
students_page.wait_for_timeout(6_000)
|
|
except PWTimeout:
|
|
_log("Popup 'Impression des bulletins' non visible après 12s")
|
|
|
|
visible_btns = students_page.evaluate("""() => {
|
|
const results = [];
|
|
for (const el of document.querySelectorAll('a, button, input[type=submit], input[type=button]')) {
|
|
const r = el.getBoundingClientRect();
|
|
if (r.width > 0 && r.height > 0 && el.offsetParent !== null) {
|
|
results.push({
|
|
tag: el.tagName, id: el.id || '',
|
|
text: (el.innerText || el.value || '').trim().slice(0, 80),
|
|
title: el.title || '',
|
|
onclick: el.getAttribute('onclick') || '',
|
|
});
|
|
}
|
|
}
|
|
return results;
|
|
}""")
|
|
_log(f"\n=== Boutons VISIBLES après clic 'Impression des bulletins' ({len(visible_btns)}) ===")
|
|
for el in visible_btns:
|
|
_log(f" [{el['tag']}#{el['id']}] text={el['text']!r:40s} title={el['title']!r:20s} onclick={el['onclick']!r}")
|
|
input("\nInspecte le navigateur puis appuie sur Entrée pour continuer...")
|
|
|
|
# 1. Tous les boutons DevExpress ribbon (classe dxr-)
|
|
ribbon_items = students_page.evaluate("""() => {
|
|
const results = [];
|
|
for (const el of document.querySelectorAll('[class*="dxr-"]')) {
|
|
const text = (el.innerText || el.textContent || '').trim().slice(0, 80);
|
|
if (!text && !el.getAttribute('title')) continue;
|
|
results.push({
|
|
tag: el.tagName,
|
|
id: el.id || '',
|
|
cls: el.className || '',
|
|
text: text,
|
|
title: el.getAttribute('title') || '',
|
|
onclick: el.getAttribute('onclick') || '',
|
|
});
|
|
}
|
|
return results;
|
|
}""")
|
|
|
|
_log(f"=== Boutons ribbon (dxr-) sur la page de '{class_name}' ===")
|
|
for el in ribbon_items:
|
|
_log(f" [{el['tag']}#{el['id']}] cls={el['cls']!r:35s} text={el['text']!r:35s} title={el['title']!r}")
|
|
|
|
# 2. Tous les éléments dont le texte/title contient 'Zeugnis' ou 'Bulletin' ou 'note'
|
|
keyword_items = students_page.evaluate("""() => {
|
|
const kw = ['zeugnis', 'bulletin', 'impression', 'note'];
|
|
const results = [];
|
|
for (const el of document.querySelectorAll('*')) {
|
|
const text = (el.innerText || el.textContent || '').trim().toLowerCase();
|
|
const title = (el.getAttribute('title') || '').toLowerCase();
|
|
if (kw.some(k => text.includes(k) || title.includes(k))) {
|
|
const direct = el.childElementCount === 0 || ['A','BUTTON','INPUT'].includes(el.tagName);
|
|
if (!direct) continue;
|
|
results.push({
|
|
tag: el.tagName,
|
|
id: el.id || '',
|
|
cls: el.className || '',
|
|
text: (el.innerText || el.textContent || '').trim().slice(0, 80),
|
|
title: el.getAttribute('title') || '',
|
|
onclick: el.getAttribute('onclick') || '',
|
|
href: el.getAttribute('href') || '',
|
|
});
|
|
}
|
|
}
|
|
return results;
|
|
}""")
|
|
|
|
_log(f"\n=== Éléments contenant 'zeugnis/bulletin/note' ===")
|
|
for el in keyword_items:
|
|
_log(f" [{el['tag']}#{el['id']}] text={el['text']!r:40s} title={el['title']!r:30s} onclick={el['onclick']!r}")
|
|
|
|
input("\nAppuie sur Entrée pour fermer...")
|
|
finally:
|
|
ctx.close()
|
|
pw.stop()
|
|
|
|
|
|
# ── Synchro unifiée ───────────────────────────────────────────────────────────
|
|
|
|
def _year_of_class(cls: str) -> "int | None":
|
|
"""Return the year number embedded in a class name.
|
|
|
|
"AUTOMAT 1" → 1, "EM-AU 2" → 2, "MP1-TASV 1A" → 1, "MP1-TASV 2B" → 2
|
|
"""
|
|
# MP-style: trailing digit + letter (e.g. "MP1-TASV 1A")
|
|
m = re.search(r"\s(\d+)[A-Za-z]\s*$", cls)
|
|
if m:
|
|
return int(m.group(1))
|
|
# Regular: trailing digit (e.g. "AUTOMAT 1", "EM-AU 2")
|
|
m = re.search(r"\s(\d+)\s*$", cls)
|
|
if m:
|
|
return int(m.group(1))
|
|
return None
|
|
|
|
|
|
def cmd_sync_all(
|
|
selected: list[str],
|
|
skip_abs: bool = False,
|
|
skip_bn: bool = False,
|
|
skip_fiches: bool = False,
|
|
skip_notes: bool = False,
|
|
force_abs: bool = False,
|
|
) -> None:
|
|
"""Pour chaque classe sélectionnée : absences → BN → Matu → Notes → fiches.
|
|
|
|
skip_abs : ne pas télécharger les PDFs d'absences.
|
|
skip_bn : ne pas télécharger les PDFs BN ni les notes Matu.
|
|
skip_fiches : ne pas scraper les fiches détaillées des élèves.
|
|
skip_notes : ne pas télécharger les PDFs de moyennes de notes.
|
|
|
|
Sortie : une ligne ALL_DONE <json> avec les clés abs/bn/matu/notes/fiches/errors.
|
|
"""
|
|
pw, ctx, page = _launch_context()
|
|
try:
|
|
_cache_load()
|
|
page.goto(CLASSES_URL)
|
|
_ensure_logged_in(page)
|
|
|
|
if not selected:
|
|
_log("INFO Récupération de toutes les classes...")
|
|
selected = _scrape_classes(page)
|
|
|
|
_log(f"TOTAL {len(selected)}")
|
|
|
|
abs_downloaded: list[str] = []
|
|
bn_downloaded: list[str] = []
|
|
matu_downloaded: list[str] = []
|
|
notes_downloaded: list[str] = []
|
|
all_fiches: dict[str, list[dict]] = {}
|
|
errors: list[str] = []
|
|
|
|
for i, cls in enumerate(selected, 1):
|
|
_log(f"PROGRESS {i}/{len(selected)} {cls}")
|
|
|
|
# ── Élèves (Notes + BN + fiches) EN PREMIER ───────────────────────
|
|
# Doit précéder les absences : la visite de ViewAbsenzenErweitert
|
|
# corrompt le contexte serveur et rend le bouton Notes inopérant.
|
|
if not skip_bn or not skip_fiches or not skip_notes:
|
|
try:
|
|
sp = _go_to_students_page(page, cls)
|
|
if sp is None:
|
|
if not skip_bn:
|
|
errors.append(f"{cls}: page élèves introuvable (BN)")
|
|
if not skip_notes:
|
|
errors.append(f"{cls}: page élèves introuvable (Notes)")
|
|
if not skip_fiches:
|
|
errors.append(f"{cls}: page élèves introuvable (fiches)")
|
|
else:
|
|
if not skip_notes:
|
|
pdf_notes = _download_notes_pdf(sp, cls)
|
|
if pdf_notes:
|
|
notes_downloaded.append(str(pdf_notes))
|
|
else:
|
|
errors.append(f"{cls}: téléchargement Notes échoué")
|
|
if not skip_bn:
|
|
pdf_bn = _download_bn_pdf(sp, cls)
|
|
if pdf_bn:
|
|
bn_downloaded.append(str(pdf_bn))
|
|
else:
|
|
errors.append(f"{cls}: téléchargement BN échoué")
|
|
if not skip_fiches:
|
|
try:
|
|
fiches = _scrape_student_details(sp, cls)
|
|
all_fiches[cls] = fiches
|
|
except Exception as e:
|
|
_log(f"ERR {cls} [fiches]: {e}")
|
|
errors.append(f"{cls} [fiches]: {e}")
|
|
except Exception as e:
|
|
_log(f"ERR {cls} [bn/notes/fiches]: {e}")
|
|
errors.append(f"{cls} [bn/notes/fiches]: {e}")
|
|
|
|
# ── Absences ENSUITE ───────────────────────────────────────────────
|
|
if not skip_abs:
|
|
try:
|
|
abs_page = _go_to_absence_page(page, cls)
|
|
if abs_page is None:
|
|
errors.append(f"{cls}: page absences introuvable")
|
|
else:
|
|
pdf = _download_pdf(abs_page, cls)
|
|
if pdf:
|
|
abs_downloaded.append(str(pdf))
|
|
else:
|
|
errors.append(f"{cls}: téléchargement absences échoué")
|
|
except Exception as e:
|
|
_log(f"ERR {cls} [abs]: {e}")
|
|
errors.append(f"{cls} [abs]: {e}")
|
|
|
|
# ── Matu : classes MP correspondant aux années présentes dans la sélection ──
|
|
if not skip_bn:
|
|
years_needed: set[int] = set()
|
|
for c in selected:
|
|
y = _year_of_class(c)
|
|
if y is not None:
|
|
years_needed.add(y)
|
|
|
|
if years_needed:
|
|
all_known: list[str] = []
|
|
if CLASSES_CACHE_FILE.exists():
|
|
try:
|
|
all_known = json.loads(CLASSES_CACHE_FILE.read_text(encoding="utf-8"))
|
|
except Exception:
|
|
pass
|
|
|
|
mp_targets = [
|
|
c for c in all_known
|
|
if re.match(r"MP\d", c, re.I) and _year_of_class(c) in years_needed
|
|
]
|
|
|
|
if mp_targets:
|
|
_log(f"MATU classes cibles: {mp_targets}")
|
|
for j, mp_cls in enumerate(mp_targets, 1):
|
|
_log(f"MATU {j}/{len(mp_targets)} {mp_cls}")
|
|
try:
|
|
sp_matu = _go_to_students_page(page, mp_cls)
|
|
if sp_matu is None:
|
|
_log(f"INFO MATU {mp_cls}: page eleves introuvable")
|
|
else:
|
|
pdf_matu = _download_matu_pdf(sp_matu, mp_cls)
|
|
if pdf_matu:
|
|
matu_downloaded.append(str(pdf_matu))
|
|
except Exception as e:
|
|
_log(f"ERR {mp_cls} [matu]: {e}")
|
|
else:
|
|
_log(f"INFO Aucune classe MP trouvee pour annees {sorted(years_needed)}")
|
|
|
|
from datetime import datetime as _dt
|
|
_all_done_payload = {'abs': abs_downloaded, 'bn': bn_downloaded, 'matu': matu_downloaded, 'notes': notes_downloaded, 'fiches': all_fiches, 'errors': errors}
|
|
try:
|
|
_adf = _root / 'data' / 'sync_all_done.json'
|
|
_adf.parent.mkdir(parents=True, exist_ok=True)
|
|
_adf.write_text(json.dumps({'timestamp': _dt.now().isoformat(), 'payload': _all_done_payload}, ensure_ascii=False), encoding='utf-8')
|
|
_log('sync_all_done.json ecrit par subprocess')
|
|
try:
|
|
import subprocess as _sp
|
|
_imp = _sp.Popen(
|
|
[sys.executable, str(_root / 'scripts' / 'run_imports.py'), str(_root / 'data'), 'escada', '1' if force_abs else '0'],
|
|
start_new_session=True, stdout=_sp.DEVNULL, stderr=_sp.DEVNULL,
|
|
)
|
|
_log(f'run_imports lance (pid={_imp.pid})')
|
|
except Exception as _ie:
|
|
_log(f'WARN run_imports non lance: {_ie}')
|
|
except Exception as _e:
|
|
_log(f'WARN sync_all_done.json non ecrit: {_e}')
|
|
_log(
|
|
f"ALL_DONE {json.dumps(_all_done_payload, ensure_ascii=False)}"
|
|
)
|
|
finally:
|
|
ctx.close()
|
|
pw.stop()
|
|
|
|
|
|
# ── Point d'entrée ────────────────────────────────────────────────────────────
|
|
|
|
if __name__ == "__main__":
|
|
import argparse
|
|
|
|
ap = argparse.ArgumentParser(description=__doc__,
|
|
formatter_class=argparse.RawDescriptionHelpFormatter)
|
|
ap.add_argument("--list-classes", action="store_true",
|
|
help="Récupère la liste des classes et l'affiche en JSON")
|
|
ap.add_argument("--sync-all", nargs="*", metavar="CLASSE",
|
|
help="Synchro unifiée : absences + BN + Matu pour chaque classe")
|
|
ap.add_argument("--skip-abs", action="store_true",
|
|
help="Ne pas télécharger les PDFs d'absences (utilisé avec --sync-all)")
|
|
ap.add_argument("--skip-bn", action="store_true",
|
|
help="Ne pas télécharger les PDFs BN + Matu (utilisé avec --sync-all)")
|
|
ap.add_argument("--skip-fiches", action="store_true",
|
|
help="Ne pas scraper les fiches détaillées des élèves (utilisé avec --sync-all)")
|
|
ap.add_argument("--skip-notes", action="store_true",
|
|
help="Ne pas télécharger les PDFs de moyennes de notes (utilisé avec --sync-all)")
|
|
ap.add_argument("--sync", nargs="*", metavar="CLASSE",
|
|
help="Télécharge les PDFs d'absences uniquement")
|
|
ap.add_argument("--sync-bn", nargs="*", metavar="CLASSE",
|
|
help="Télécharge les PDFs de bulletins de notes uniquement")
|
|
ap.add_argument("--sync-fiches", nargs="*", metavar="CLASSE",
|
|
help="Scrape les fiches détaillées des élèves uniquement")
|
|
ap.add_argument("--sync-matu", nargs="*", metavar="CLASSE",
|
|
help="Télécharge les listes de contrôle des notes MP uniquement")
|
|
ap.add_argument("--sync-notes", nargs="*", metavar="CLASSE",
|
|
help="Télécharge les PDFs de moyennes de notes (apprenant) uniquement")
|
|
ap.add_argument("--debug-bn", metavar="CLASSE",
|
|
help="Affiche les boutons du ribbon sur la page liste-élèves (debug)")
|
|
ap.add_argument('--force-abs', action='store_true',
|
|
help='Reimporter les absences existantes (efface les EscadaPending)')
|
|
args = ap.parse_args()
|
|
|
|
if args.list_classes:
|
|
cmd_list_classes()
|
|
elif args.sync_all is not None:
|
|
cmd_sync_all(args.sync_all, skip_abs=args.skip_abs, skip_bn=args.skip_bn,
|
|
skip_fiches=args.skip_fiches, skip_notes=args.skip_notes,
|
|
force_abs=args.force_abs)
|
|
elif args.sync is not None:
|
|
cmd_sync(args.sync)
|
|
elif args.sync_bn is not None:
|
|
cmd_sync_bn(args.sync_bn)
|
|
elif args.sync_fiches is not None:
|
|
cmd_sync_fiches(args.sync_fiches)
|
|
elif args.sync_matu is not None:
|
|
cmd_sync_matu(args.sync_matu)
|
|
elif args.sync_notes is not None:
|
|
cmd_sync_notes(args.sync_notes)
|
|
elif args.debug_bn:
|
|
cmd_debug_bn(args.debug_bn)
|
|
else:
|
|
ap.print_help()
|