import représentant légal

2026-05-11 21:27:13 +02:00 · 2026-05-11 21:27:13 +02:00 · 9188e6ba1e
commit 9188e6ba1e
parent 7431339ce5
6 changed files with 494 additions and 11 deletions
--- a/TODO.md
+++ b/TODO.md
@ -8,7 +8,7 @@ en haut de la section concernée.
 - [ ] Ajouter sur le dashboard l'affichage des notes insuffisantes
 - [X] Afficher toutes les notes du BN
 - [ ] Mettre à jour les MD
- [ ] Ajouter l'indication des compensation des désavantages
+- [X] Ajouter l'indication des compensation des désavantages
 - [X] Ajouter le TAB notices aussi sur la vue classe 
 - [ ] Réussir à récupérer le fichier session Esacada d'un utilisateur pour l'utiliser sur le serveur afin de récupérer la liste des classes dont il a accès et de pouvoir uploader les notices avec son nom propre
 - [X] Filtrer que les classes EM pour les avis de sanction
--- a/eptm_dashboard/pages/fiche.py
+++ b/eptm_dashboard/pages/fiche.py
@ -480,6 +480,17 @@ class FicheState(AuthState):
    fiche_date_naissance: str = ""
    fiche_majeur: str = ""
    fiche_compensation: str = ""
    # Représentant légal (mineurs)
    fiche_resp_legal_nom:         str = ""
    fiche_resp_legal_adresse:     str = ""
    fiche_resp_legal_cp_localite: str = ""
    fiche_resp_legal_tel_p:       str = ""   # numéro brut
    fiche_resp_legal_tel_n:       str = ""   # numéro brut
    # URLs Google Maps construites depuis adresse+CP+localité
    fiche_map_url:            str = ""
    fiche_entreprise_map_url: str = ""
    fiche_resp_legal_map_url: str = ""
    fiche_entreprise_nom: str = ""
    fiche_entreprise_adresse: str = ""
    fiche_entreprise_cp_localite: str = ""
@ -1027,6 +1038,15 @@ class FicheState(AuthState):
                 else "Compensation des désavantages : non")
                if fiche.compensation_desavantages is not None else ""
            )
            self.fiche_resp_legal_nom     = fiche.resp_legal_nom or ""
            self.fiche_resp_legal_adresse = fiche.resp_legal_adresse or ""
            self.fiche_resp_legal_cp_localite = (
                f"{fiche.resp_legal_code_postal or ''} "
                f"{fiche.resp_legal_localite or ''}".strip()
            )
            self.fiche_resp_legal_tel_p = fiche.resp_legal_telephone_p or ""
            self.fiche_resp_legal_tel_n = fiche.resp_legal_telephone_n or ""
            self.fiche_entreprise_nom = fiche.entreprise_nom or ""
            self.fiche_entreprise_adresse = fiche.entreprise_adresse or ""
            self.fiche_entreprise_cp_localite = (
@ -1040,12 +1060,35 @@ class FicheState(AuthState):
            self.fiche_updated_at = (
                fiche.updated_at.strftime("%d.%m.%Y %H:%M") if fiche.updated_at else ""
            )
            # URLs Google Maps construites APRÈS l'assignation de tous les
            # champs (sinon on utiliserait les valeurs de l'apprenti précédent).
            # Pour l'entreprise on inclut le nom → Maps trouve la fiche
            # établissement si elle existe.
            from urllib.parse import quote_plus as _qp
            def _maps(*parts: str) -> str:
                q = ", ".join(p.strip() for p in parts if p and p.strip())
                return f"https://www.google.com/maps/search/?api=1&query={_qp(q)}" if q else ""
            self.fiche_map_url            = _maps(self.fiche_adresse, self.fiche_cp_localite)
            self.fiche_entreprise_map_url = _maps(
                self.fiche_entreprise_nom,
                self.fiche_entreprise_adresse,
                self.fiche_entreprise_cp_localite,
            )
            self.fiche_resp_legal_map_url = _maps(
                self.fiche_resp_legal_adresse, self.fiche_resp_legal_cp_localite,
            )
        else:
            self.fiche_available = False
            for attr in [
                "fiche_adresse", "fiche_cp_localite", "fiche_telephone",
                "fiche_email_val", "fiche_date_naissance", "fiche_majeur",
                "fiche_compensation",
                "fiche_resp_legal_nom", "fiche_resp_legal_adresse",
                "fiche_resp_legal_cp_localite",
                "fiche_resp_legal_tel_p", "fiche_resp_legal_tel_n",
                "fiche_map_url", "fiche_entreprise_map_url",
                "fiche_resp_legal_map_url",
                "fiche_entreprise_nom", "fiche_entreprise_adresse",
                "fiche_entreprise_cp_localite", "fiche_entreprise_telephone",
                "fiche_entreprise_email", "fiche_formateur_nom",
@ -1335,6 +1378,62 @@ def _info_line(icon: str, value) -> rx.Component:
    )
 def _info_line_email(icon: str, value) -> rx.Component:
    """Ligne info avec lien mailto: cliquable."""
    return rx.cond(
        value != "",
        rx.hstack(
            rx.icon(icon, size=14, color="#9e9e9e"),
            rx.link(
                value, href="mailto:" + value,
                size="2", color="var(--brand-accent)",
                text_decoration="none",
                _hover={"text_decoration": "underline"},
            ),
            spacing="2", align="center",
        ),
    )
 def _info_line_tel(icon: str, value, label_prefix: str = "") -> rx.Component:
    """Ligne info avec lien tel: cliquable (cliquable depuis un smartphone)."""
    return rx.cond(
        value != "",
        rx.hstack(
            rx.icon(icon, size=14, color="#9e9e9e"),
            rx.link(
                label_prefix + value,
                href="tel:" + value.replace(" ", ""),
                size="2", color="var(--brand-accent)",
                text_decoration="none",
                _hover={"text_decoration": "underline"},
            ),
            spacing="2", align="center",
        ),
    )
 def _info_line_map(line1, line2, map_url) -> rx.Component:
    """Bloc adresse : une seule icône cliquable + 2 lignes de texte (rue puis CP localité)."""
    return rx.cond(
        (line1 != "") | (line2 != ""),
        rx.hstack(
            rx.link(
                rx.icon("map-pin", size=14, color="var(--brand-accent)"),
                href=map_url, is_external=True,
                _hover={"opacity": "0.7"},
                title="Voir sur Google Maps",
            ),
            rx.vstack(
                rx.cond(line1 != "", rx.text(line1, size="2", color="#555")),
                rx.cond(line2 != "", rx.text(line2, size="2", color="#555")),
                spacing="0", align="start",
            ),
            spacing="2", align="start",
        ),
    )
 def _cal_day_cell(d) -> rx.Component:
    is_selected = d["date_str"] == FicheState.edit_date
    return rx.cond(
@ -1782,10 +1881,9 @@ def fiche_page() -> rx.Component:
                                rx.flex(
                                    rx.vstack(
                                        rx.text("Élève", size="2", font_weight="700", color="var(--text-strong)"),
-                                        _info_line("map-pin", FicheState.fiche_adresse),
+                                        _info_line_map(FicheState.fiche_adresse, FicheState.fiche_cp_localite, FicheState.fiche_map_url),
-                                        _info_line("map-pin", FicheState.fiche_cp_localite),
+                                        _info_line_tel("phone", FicheState.fiche_telephone),
-                                        _info_line("phone", FicheState.fiche_telephone),
+                                        _info_line_email("mail", FicheState.fiche_email_val),
                                        _info_line("mail", FicheState.fiche_email_val),
                                        _info_line("cake", FicheState.fiche_date_naissance),
                                        _info_line("user-check", FicheState.fiche_majeur),
                                        _info_line("scale", FicheState.fiche_compensation),
@ -1794,18 +1892,29 @@ def fiche_page() -> rx.Component:
                                    rx.vstack(
                                        rx.text("Entreprise", size="2", font_weight="700", color="var(--text-strong)"),
                                        _info_line("building-2", FicheState.fiche_entreprise_nom),
-                                        _info_line("map-pin", FicheState.fiche_entreprise_adresse),
+                                        _info_line_map(FicheState.fiche_entreprise_adresse, FicheState.fiche_entreprise_cp_localite, FicheState.fiche_entreprise_map_url),
-                                        _info_line("map-pin", FicheState.fiche_entreprise_cp_localite),
+                                        _info_line_tel("phone", FicheState.fiche_entreprise_telephone),
-                                        _info_line("phone", FicheState.fiche_entreprise_telephone),
+                                        _info_line_email("mail", FicheState.fiche_entreprise_email),
                                        _info_line("mail", FicheState.fiche_entreprise_email),
                                        spacing="1", align="start", flex="1", min_width="200px",
                                    ),
                                    rx.vstack(
                                        rx.text("Formateur", size="2", font_weight="700", color="var(--text-strong)"),
                                        _info_line("user", FicheState.fiche_formateur_nom),
-                                        _info_line("mail", FicheState.fiche_formateur_email),
+                                        _info_line_email("mail", FicheState.fiche_formateur_email),
                                        spacing="1", align="start", flex="1", min_width="200px",
                                    ),
                                    # Représentant légal (mineurs uniquement)
                                    rx.cond(
                                        FicheState.fiche_resp_legal_nom != "",
                                        rx.vstack(
                                            rx.text("Représentant légal", size="2", font_weight="700", color="var(--text-strong)"),
                                            _info_line("user", FicheState.fiche_resp_legal_nom),
                                            _info_line_map(FicheState.fiche_resp_legal_adresse, FicheState.fiche_resp_legal_cp_localite, FicheState.fiche_resp_legal_map_url),
                                            _info_line_tel("phone", FicheState.fiche_resp_legal_tel_p, label_prefix="Fixe : "),
                                            _info_line_tel("phone", FicheState.fiche_resp_legal_tel_n, label_prefix="Mobile : "),
                                            spacing="1", align="start", flex="1", min_width="200px",
                                        ),
                                    ),
                                    gap="1.5rem", flex_wrap="wrap", width="100%",
                                ),
                                rx.text(
--- a/scripts/sync_esacada.py
+++ b/scripts/sync_esacada.py
@ -844,10 +844,125 @@ def _scrape_student_details(page: Page, class_name: str) -> list[dict]:
        else:
            _log(f"  [fiches]  {i}: WARNING cellule vide")
    # Récupération du PDF "Liste des classes" + injection des représentants
    # légaux dans les fiches (uniquement pour les mineurs).
    try:
        lc_pdf = _download_liste_classe_pdf(page, class_name)
        if lc_pdf:
            from src.parser_liste_classe import parse_liste_classe_pdf
            lc_data = parse_liste_classe_pdf(lc_pdf)
            _merge_resp_legaux(fiches, lc_data.get("apprentis", []))
    except Exception as _e:
        _log(f"  [resp.lég.] WARN: {_e}")
    _log(f"  [fiches] {len(fiches)} fiche(s) extraite(s)")
    return fiches
 _LISTES_CLASSES_DIR = _root / "data" / "pdfs" / "listes_classes"
 def _download_liste_classe_pdf(page: Page, class_name: str) -> Path | None:
    """Télécharge le PDF "Liste de la classe" (Rapport DevExpress) sur la page
    ViewLernende. Le lien a un href direct vers Reports/RptEscada.aspx?id=…&key=…
    — on récupère le href et on télécharge via context.request avec les
    cookies de session."""
    _LISTES_CLASSES_DIR.mkdir(parents=True, exist_ok=True)
    dest = _LISTES_CLASSES_DIR / f"liste_{class_name.replace(' ', '_')}.pdf"
    try:
        page.wait_for_selector("a.dxr-item.dxr-buttonItem", timeout=10_000)
    except Exception:
        pass
    href = page.evaluate("""() => {
        const links = document.querySelectorAll('a.dxr-item.dxr-buttonItem');
        for (const a of links) {
            const txt = (a.innerText || '').trim();
            if (txt === 'Liste des classes' || txt === 'Klassenliste') {
                return a.getAttribute('href');
            }
        }
        return null;
    }""")
    if not href:
        _log(f"  [liste] {class_name}: bouton 'Liste des classes' introuvable")
        return None
    full_url = f"{BASE_URL}{href}" if href.startswith("/") else href
    try:
        resp = page.context.request.get(full_url, timeout=30_000)
        if resp.status != 200:
            _log(f"  [liste] {class_name}: HTTP {resp.status}")
            return None
        body = resp.body()
        if not body.startswith(b"%PDF"):
            _log(f"  [liste] {class_name}: réponse n'est pas un PDF")
            return None
        dest.write_bytes(body)
        _log(f"  [liste] {class_name}: {dest.name} ({len(body)} bytes)")
        return dest
    except Exception as e:
        _log(f"  [liste] {class_name}: {e}")
        return None
 def _merge_resp_legaux(fiches: list[dict], lc_apprentis: list[dict]) -> None:
    """Match par nom_eleve (normalisé) et injecte :
      - resp_legal_* si présent dans le PDF ;
      - entreprise_nom en fallback (PDF a 'CFCNomEntreprise' collé en col F)
        quand le scraping ViewLernende a manqué le nom ou stocké une adresse.
    """
    import re as _re
    def _norm(s: str) -> str:
        import unicodedata
        nfkd = unicodedata.normalize("NFKD", s or "")
        return " ".join(
            nfkd.encode("ascii", "ignore").decode("ascii").lower().split()
        )
    _addr_prefix = _re.compile(
        r"^(Chemin|Rue|Route|Avenue|Impasse|Ruelle|Allée|Place|Boulevard|Bd|Av\.|Ch\.|Rte)\s",
        _re.I,
    )
    by_name: dict[str, dict] = {}
    for ap in lc_apprentis:
        n = _norm(ap.get("nom_eleve") or "")
        if n:
            by_name[n] = ap
    matched_rl = 0
    matched_ent = 0
    for fiche in fiches:
        n = _norm(fiche.get("nom_eleve") or "")
        ap = by_name.get(n)
        if not ap:
            continue
        # Resp. légal
        rl_keys = (
            "resp_legal_nom", "resp_legal_adresse", "resp_legal_code_postal",
            "resp_legal_localite", "resp_legal_telephone_p", "resp_legal_telephone_n",
        )
        if any(ap.get(k) for k in rl_keys):
            for k in rl_keys:
                if ap.get(k):
                    fiche[k] = ap[k]
            matched_rl += 1
        # Fallback entreprise_nom (cas où ViewLernende a raté)
        ent_pdf = ap.get("entreprise_nom_pdf")
        ent_cur = (fiche.get("entreprise_nom") or "").strip()
        if ent_pdf and (not ent_cur or _addr_prefix.match(ent_cur)):
            # Pousser la valeur courante (qui est en fait l'adresse) en
            # entreprise_adresse si celle-ci est vide.
            if ent_cur and not (fiche.get("entreprise_adresse") or "").strip():
                fiche["entreprise_adresse"] = ent_cur
            fiche["entreprise_nom"] = ent_pdf
            matched_ent += 1
    _log(f"  [resp.lég.] {matched_rl} apprenti(s) avec représentant légal")
    if matched_ent:
        _log(f"  [entreprise] {matched_ent} apprenti(s) avec entreprise_nom corrigé depuis le PDF")
 def _download_pdf(page: Page, class_name: str) -> Path | None:
    """Clique sur 'Contrôle des absences (apprenants)' et récupère le PDF.
--- a/src/db.py
+++ b/src/db.py
@ -192,6 +192,15 @@ class ApprentiFiche(Base):
    formateur_nom:   Mapped[Optional[str]] = mapped_column(String, nullable=True)
    formateur_email: Mapped[Optional[str]] = mapped_column(String, nullable=True)
    # Représentant légal (uniquement pour les mineurs ; scrapé depuis le PDF
    # "Liste des classes" sur Escada).
    resp_legal_nom:         Mapped[Optional[str]] = mapped_column(String, nullable=True)
    resp_legal_adresse:     Mapped[Optional[str]] = mapped_column(String, nullable=True)
    resp_legal_code_postal: Mapped[Optional[str]] = mapped_column(String, nullable=True)
    resp_legal_localite:    Mapped[Optional[str]] = mapped_column(String, nullable=True)
    resp_legal_telephone_p: Mapped[Optional[str]] = mapped_column(String, nullable=True)  # fixe
    resp_legal_telephone_n: Mapped[Optional[str]] = mapped_column(String, nullable=True)  # mobile
    # Profession dérivée du préfixe de classe (mapping dans data/settings.json)
    profession: Mapped[Optional[str]] = mapped_column(String, nullable=True)
@ -345,6 +354,12 @@ def init_db(engine=None):
            "ALTER TABLE cron_jobs ADD COLUMN notify_level TEXT DEFAULT 'normal'",
            "ALTER TABLE apprenti_fiches ADD COLUMN profession TEXT",
            "ALTER TABLE apprenti_fiches ADD COLUMN compensation_desavantages BOOLEAN",
            "ALTER TABLE apprenti_fiches ADD COLUMN resp_legal_nom         TEXT",
            "ALTER TABLE apprenti_fiches ADD COLUMN resp_legal_adresse     TEXT",
            "ALTER TABLE apprenti_fiches ADD COLUMN resp_legal_code_postal TEXT",
            "ALTER TABLE apprenti_fiches ADD COLUMN resp_legal_localite    TEXT",
            "ALTER TABLE apprenti_fiches ADD COLUMN resp_legal_telephone_p TEXT",
            "ALTER TABLE apprenti_fiches ADD COLUMN resp_legal_telephone_n TEXT",
            "ALTER TABLE cron_jobs ADD COLUMN sync_notices BOOLEAN DEFAULT 0",
            # Migration cron task_kind — schéma 3 valeurs + checkbox sync_notices.
            # Étape A : pour les rows qui ciblaient les notices, on flag sync_notices=1
@ -416,6 +431,8 @@ def upsert_apprenti_fiche(session: Session, apprenti_id: int, data: dict) -> Non
        "entreprise_localite", "entreprise_telephone", "entreprise_email",
        "formateur_nom", "formateur_email",
        "profession",
        "resp_legal_nom", "resp_legal_adresse", "resp_legal_code_postal",
        "resp_legal_localite", "resp_legal_telephone_p", "resp_legal_telephone_n",
    ]
    if existing:
        for f in fields:
--- a/src/parser_bn.py
+++ b/src/parser_bn.py
@ -71,7 +71,14 @@ def _extract_name(page) -> tuple[str, str]:
    )
    skip_kw = {"EPTM", "Professionnelle", "Technique", "Département",
               "Service", "Ecole", "École", "formation", "Canton",
-               "Kanton", "page", "Sion", "Saint", "BULLETIN", "NOTES"}
+               "Kanton", "page", "Sion", "Saint", "BULLETIN", "NOTES",
               # En-têtes de colonnes du PDF qui peuvent être pris pour un
               # nom d'élève si le bloc adresse n'est pas trouvé.
               "Profession", "Automaticien", "Monteur", "Electronicien",
               "Polymécanicien", "CFC", "AFP", "Classe", "Titulaire",
               # Libellés du tableau de notes (cas où le tableau commence
               # haut sur la page et le bloc adresse est absent).
               "Moyenne", "Branches", "Travaux", "Culture", "globale", "groupe"}
    for line_words in lines:
        text = " ".join(w["text"] for w in line_words).strip()
--- a/src/parser_liste_classe.py
+++ b/src/parser_liste_classe.py
@ -0,0 +1,235 @@
 """PDF parser for the EPTM "Liste de la classe" report.
 Source : bouton "Liste des classes" sur ViewLernende d'une classe Escada
 (`Reports/RptEscada.aspx?...`).
 Layout par apprenti (4 colonnes alignées verticalement) :
    Col 1 (Apprenti)   : Nom Prénom / Adresse / CP Localité / Tél / Email
    Col 2 (Formation)  : Métier / Date naissance / Origine
    Col 3 (Entreprise) : Nom / Adresse / CP Loc / Tél / (Formateur en dessous)
    Col 4 (Resp. lég.) : Civilité Nom / Adresse / CP Localité / P:tél / N:tél
 Le resp. légal n'est présent que pour les apprentis MINEURS — les majeurs
 ont une 4e colonne vide.
 Sortie de parse_liste_classe_pdf() :
    {
      "classe": "AUTOMAT 1",
      "apprentis": [
          {
            "nom_eleve": "Clivaz Eloan",
            "resp_legal_nom":         "Madame Diana Linda Clivaz",
            "resp_legal_adresse":     "Route du Fougir 6",
            "resp_legal_code_postal": "3971",
            "resp_legal_localite":    "Chermignon",
            "resp_legal_telephone_p": "+41 27 483 36 27",
            "resp_legal_telephone_n": "+41 79 103 14 79",
          },
          ...
      ]
    }
 """
 from __future__ import annotations
 import re
 from pathlib import Path
 import pdfplumber
 _RE_CLASSE     = re.compile(r"Liste de la classe\s+([^\n]+?)\s*$", re.I | re.M)
 _RE_CP_LOC     = re.compile(r"^(\d{4})\s+(.+)$")
 _RE_DATE       = re.compile(r"\b\d{2}\.\d{2}\.\d{4}\b")
 _RE_CIVILITE   = re.compile(r"^(Monsieur|Madame)\s+(.+)$")
 _RE_TEL_P      = re.compile(r"^P:\s*(.+)$")
 _RE_TEL_N      = re.compile(r"^N:\s*(.+)$")
 # Colonnes du rapport (en points PDF, page A4 portrait = 612pt large).
 # Calibrées sur le template Escada (en-têtes "Apprenti / Formation /
 # Entreprise / Resp. légal" aux x0 ≈ 56 / 184 / 302 / 433).
 _COL_APPRENTI   = ( 50, 184)
 _COL_FORMATION  = (184, 302)
 _COL_ENTREPRISE = (302, 425)
 _COL_RESP_LEGAL = (425, 612)
 def _group_words_by_line(words: list[dict], y_tol: float = 3.0) -> list[list[dict]]:
    """Cluster words par ligne visuelle (top similaire)."""
    if not words:
        return []
    ws = sorted(words, key=lambda w: (w["top"], w["x0"]))
    lines: list[list[dict]] = [[ws[0]]]
    for w in ws[1:]:
        if abs(w["top"] - lines[-1][-1]["top"]) < y_tol:
            lines[-1].append(w)
        else:
            lines.append([w])
    for line in lines:
        line.sort(key=lambda w: w["x0"])
    return lines
 def _words_in_col(line: list[dict], x_min: float, x_max: float) -> str:
    """Joint les mots d'une ligne dont x0 est dans la plage [x_min, x_max]."""
    cells = [w["text"] for w in line if x_min <= w["x0"] < x_max]
    return " ".join(cells).strip()
 def _is_header_line(line_text: str) -> bool:
    """True si la ligne est un en-tête de page (à ignorer)."""
    low = line_text.lower()
    return any(kw in low for kw in (
        "département de l'économie", "ecole professionnelle",
        "liste de la classe", "titulaire", "apprenti portable",
        "formation date de", "origine", "resp. légal",
        "chemin st-hubert", "entreprise formateur",
    )) or line_text.startswith("Total:")
 def _parse_apprenti_block(block_lines: list[tuple[str, str, str, str]]) -> dict:
    """Parse les 4 colonnes d'un bloc d'apprenti.
    block_lines : list de tuples (col_apprenti, col_formation, col_entreprise, col_resp_legal)
    """
    # Col Apprenti
    col_ap = [c[0] for c in block_lines if c[0]]
    nom_eleve = col_ap[0] if col_ap else ""
    # Nom d'entreprise depuis col Formation + col Entreprise de la PREMIÈRE
    # ligne du bloc. Le PDF Escada concatène CFC/AFP avec le début du nom
    # ("CFCTelsa") et peut couper le reste en col E ("SA"). On joint les deux
    # pour reconstruire (ex: "Monteur automaticien CFCTelsa SA" → "Telsa SA",
    # ou "Automaticien CFC Constellium Valais SA" → "Constellium Valais SA").
    # Utilisé en fallback quand la cellule ViewLernende ne renvoie pas le nom.
    entreprise_nom_pdf = None
    first_row = next((r for r in block_lines if r[1] or r[2]), None)
    if first_row:
        line_text = f"{first_row[1]} {first_row[2]}".strip()
        m_ent = re.search(r"\b(?:CFC|AFP)\s*([A-ZÀ-Ÿ].*)", line_text)
        if m_ent:
            entreprise_nom_pdf = m_ent.group(1).strip()
    # Col Resp. Légal — extraction
    col_rl = [c[3] for c in block_lines if c[3]]
    if not col_rl:
        return {
            "nom_eleve":           nom_eleve,
            "entreprise_nom_pdf":  entreprise_nom_pdf,
        }  # apprenti majeur, pas de resp.
    rl_nom = ""
    rl_adresse = ""
    rl_cp = ""
    rl_loc = ""
    rl_tp = ""
    rl_tn = ""
    for line in col_rl:
        m = _RE_CIVILITE.match(line)
        if m and not rl_nom:
            rl_nom = f"{m.group(1)} {m.group(2)}".strip()
            continue
        m = _RE_CP_LOC.match(line)
        if m and not rl_cp:
            rl_cp = m.group(1)
            rl_loc = m.group(2).strip()
            continue
        m = _RE_TEL_P.match(line)
        if m:
            rl_tp = m.group(1).strip()
            continue
        m = _RE_TEL_N.match(line)
        if m:
            rl_tn = m.group(1).strip()
            continue
        # Ligne non matchée. Si elle ne contient pas de chiffre, c'est la
        # suite du nom (ex: "Madame Séverine Massy" / "Luisier" sur 2 lignes).
        # Sinon c'est l'adresse (rue avec numéro).
        if rl_nom and not rl_adresse:
            if any(c.isdigit() for c in line):
                rl_adresse = line.strip()
            else:
                rl_nom = f"{rl_nom} {line.strip()}".strip()
    if not rl_nom:
        return {
            "nom_eleve": nom_eleve,
            "entreprise_nom_pdf": entreprise_nom_pdf,
        }
    return {
        "nom_eleve":              nom_eleve,
        "entreprise_nom_pdf":     entreprise_nom_pdf,
        "resp_legal_nom":         rl_nom,
        "resp_legal_adresse":     rl_adresse or None,
        "resp_legal_code_postal": rl_cp or None,
        "resp_legal_localite":    rl_loc or None,
        "resp_legal_telephone_p": rl_tp or None,
        "resp_legal_telephone_n": rl_tn or None,
    }
 def parse_liste_classe_pdf(pdf_path: Path) -> dict:
    """Parse le PDF "Liste de la classe" et retourne classe + liste d'apprentis
    avec leurs représentants légaux (si mineur)."""
    pdf_path = Path(pdf_path)
    classe = ""
    apprentis: list[dict] = []
    # Pour identifier la fin d'un bloc apprenti : nouvelle ligne avec un nom
    # en col 1 dont la première position y est > précédente + un seuil. Plus
    # simple : on regroupe par bloc selon la présence d'une ligne "Formation"
    # (col 2) qui contient un métier (ex. "Automaticien CFC"). Chaque
    # apparition d'une telle ligne démarre un nouveau bloc.
    with pdfplumber.open(str(pdf_path)) as pdf:
        for page in pdf.pages:
            text = page.extract_text() or ""
            if not classe:
                m = _RE_CLASSE.search(text)
                if m:
                    classe = m.group(1).strip()
            words = page.extract_words()
            lines = _group_words_by_line(words)
            # Convertir chaque ligne en (col1, col2, col3, col4) selon x0
            structured = []
            for line in lines:
                row = (
                    _words_in_col(line, *_COL_APPRENTI),
                    _words_in_col(line, *_COL_FORMATION),
                    _words_in_col(line, *_COL_ENTREPRISE),
                    _words_in_col(line, *_COL_RESP_LEGAL),
                )
                joined = " ".join(c for c in row if c).strip()
                if _is_header_line(joined):
                    continue
                if not any(row):
                    continue
                structured.append(row)
            # Découpe en blocs : un nouveau bloc commence quand col2 contient
            # un métier ("CFC" ou "AFP" en col Formation). Le PDF Escada
            # concatène parfois CFC + nom d'entreprise sans espace
            # ("CFCBOBST", "CFCBühler") → on accepte "CFC"/"AFP" en début de
            # mot, sans exiger une frontière à droite.
            blocks: list[list[tuple]] = []
            current: list[tuple] | None = None
            for row in structured:
                col2 = row[1]
                is_new = bool(re.search(r"(\s|^)(CFC|AFP)", col2))
                if is_new:
                    if current:
                        blocks.append(current)
                    current = []
                if current is not None:
                    current.append(row)
            if current:
                blocks.append(current)
            for blk in blocks:
                fiche = _parse_apprenti_block(blk)
                if fiche.get("nom_eleve"):
                    apprentis.append(fiche)
    return {"classe": classe, "apprentis": apprentis}