eptm_dashboard/.venv/lib/python3.12/site-packages/pypdf/generic/_link.py

# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met:
#
# * Redistributions of source code must retain the above copyright notice,
# this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
# * The name of the author may not be used to endorse or promote products
# derived from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.


# This module contains code used by _writer.py to track links in pages
# being added to the writer until the links can be resolved.

from typing import TYPE_CHECKING, Optional, Union, cast

from .._utils import logger_warning
from . import ArrayObject, DictionaryObject, IndirectObject, PdfObject, TextStringObject, is_null_or_none

if TYPE_CHECKING:
    from .._page import PageObject
    from .._reader import PdfReader
    from .._writer import PdfWriter


class NamedReferenceLink:
    """Named reference link being preserved until we can resolve it correctly."""

    def __init__(self, reference: TextStringObject, source_pdf: "PdfReader") -> None:
        """reference: TextStringObject with named reference"""
        self._reference = reference
        self._source_pdf = source_pdf

    def find_referenced_page(self) -> Union[IndirectObject, None]:
        destination = self._source_pdf.named_destinations.get(str(self._reference))
        return destination.page if destination else None

    def patch_reference(self, target_pdf: "PdfWriter", new_page: IndirectObject) -> None:
        """target_pdf: PdfWriter which the new link went into"""
        # point named destination in new PDF to the new page
        if str(self._reference) not in target_pdf.named_destinations:
            target_pdf.add_named_destination(str(self._reference), new_page.page_number)


class DirectReferenceLink:
    """Direct reference link being preserved until we can resolve it correctly."""

    def __init__(self, reference: ArrayObject) -> None:
        """reference: an ArrayObject whose first element is the Page indirect object"""
        self._reference = reference

    def find_referenced_page(self) -> IndirectObject:
        return self._reference[0]

    def patch_reference(self, target_pdf: "PdfWriter", new_page: IndirectObject) -> None:
        """target_pdf: PdfWriter which the new link went into"""
        self._reference[0] = new_page


ReferenceLink = Union[NamedReferenceLink, DirectReferenceLink]


def extract_links(new_page: "PageObject", old_page: "PageObject") -> list[tuple[ReferenceLink, ReferenceLink]]:
    """Extracts links from two pages on the assumption that the two pages are
    the same. Produces one list of (new link, old link) tuples.

    Non-link annotations are ignored before pairing to avoid dropping valid
    links when one page includes additional non-link annotation entries.
    """
    new_annotations = new_page.get("/Annots", ArrayObject()).get_object()
    old_annotations = old_page.get("/Annots", ArrayObject()).get_object()
    if is_null_or_none(new_annotations):
        new_annotations = ArrayObject()
    if is_null_or_none(old_annotations):
        old_annotations = ArrayObject()
    if not isinstance(new_annotations, ArrayObject) or not isinstance(old_annotations, ArrayObject):
        logger_warning(
            f"Expected annotation arrays: {old_annotations} {new_annotations}. Ignoring annotations.",
            __name__
        )
        return []
    new_links = [
        link
        for annotation in new_annotations
        if (link := _build_link(annotation, new_page)) is not None
    ]
    old_links = [
        link
        for annotation in old_annotations
        if (link := _build_link(annotation, old_page)) is not None
    ]

    if len(new_links) != len(old_links):
        logger_warning(
            f"Annotation sizes differ: {old_links} vs. {new_links}",
            __name__,
        )

    return list(zip(new_links, old_links))


def _build_link(indirect_object: IndirectObject, page: "PageObject") -> Optional[ReferenceLink]:
    src = cast("PdfReader", page.pdf)
    link = cast(DictionaryObject, indirect_object.get_object())
    if (not isinstance(link, DictionaryObject)) or link.get("/Subtype") != "/Link":
        return None

    if "/A" in link:
        action = cast(DictionaryObject, link["/A"])
        if action.get("/S") != "/GoTo":
            return None

        if "/D" not in action:
            return None
        return _create_link(action["/D"], src)

    if "/Dest" in link:
        return _create_link(link["/Dest"], src)

    return None  # Nothing to do here


def _create_link(reference: PdfObject, source_pdf: "PdfReader") -> Optional[ReferenceLink]:
    if isinstance(reference, TextStringObject):
        return NamedReferenceLink(reference, source_pdf)
    if isinstance(reference, ArrayObject):
        return DirectReferenceLink(reference)
    return None