# SPDX-FileCopyrightText: 2026 geisserml <geisserml@gmail.com>
# SPDX-License-Identifier: Apache-2.0 OR BSD-3-Clause

# TODO test-confirm filter and info params

from collections import OrderedDict
import pypdfium2._helpers as pdfium
import pypdfium2.internal as pdfium_i
from pypdfium2_cli._parsers import (
    add_input,
    add_n_digits,
    get_input,
    round_list,
    iterator_hasvalue,
)


PARAM_POS = "pos"
PARAM_IMGINFO = "imginfo"
PARAM_TEXT = "text"
INFO_PARAMS = (PARAM_POS, PARAM_IMGINFO, PARAM_TEXT)


def attach(parser):
    
    add_input(parser, pages=True)
    add_n_digits(parser)
    
    # TODO think out strategy for choices (see https://github.com/python/cpython/issues/69247)
    obj_types = list( pdfium_i.ObjectTypeToConst.keys() )
    parser.add_argument(
        "--filter",
        nargs = "+",
        metavar = "T",
        choices = obj_types,
        help = f"Object types to include. Choices: {obj_types}",
    )
    parser.add_argument(
        "--max-depth",
        type = int,
        default = 2,
        help = "Maximum recursion depth to consider when descending into Form XObjects.",
    )
    parser.add_argument(
        "--info",
        nargs = "+",
        type = str.lower,
        choices = INFO_PARAMS,
        default = INFO_PARAMS,
        help = "Object details to show.",
    )


def print_img_metadata(m, n_digits, pad=""):
    
    members = OrderedDict(
        width = m.width,
        height = m.height,
        horizontal_dpi = round(m.horizontal_dpi, n_digits),
        vertical_dpi = round(m.vertical_dpi, n_digits),
        bits_per_pixel = m.bits_per_pixel,
        colorspace = pdfium_i.ColorspaceToStr.get(m.colorspace),
    )
    if m.marked_content_id != -1:
        members["marked_content_id"] = m.marked_content_id
    
    for key, value in members.items():
        print(pad + f"{key}: {value}")


def main(args):
    
    pdf = get_input(args)
    
    # if no filter is given, leave it at None (make a difference in case of unhandled object types)
    if args.filter:
        args.filter = [pdfium_i.ObjectTypeToConst[t] for t in args.filter]
    
    show_pos = PARAM_POS in args.info
    show_imginfo = PARAM_IMGINFO in args.info
    show_text = PARAM_TEXT in args.info
    assert any((show_pos, show_imginfo, show_text))
    
    total_count = 0
    for i in args.pages:
        
        page = pdf[i]
        textpage = page.get_textpage() if show_text else None
        hasvalue, obj_searcher = iterator_hasvalue( page.get_objects(args.filter, max_depth=args.max_depth, textpage=textpage) )
        if not hasvalue: continue
        
        print(f"# Page {i+1}")
        count = 0
        
        for obj in obj_searcher:
            
            pad_0 = "    " * obj.level
            pad_1 = pad_0 + "    "
            print(pad_0 + pdfium_i.ObjectTypeToStr.get(obj.type))
            
            if show_pos:
                bounds = round_list(obj.get_bounds(), args.n_digits)
                print(pad_1 + f"Bounding Box: {bounds}")
                if isinstance(obj, (pdfium.PdfImage, pdfium.PdfTextObj)):
                    quad_bounds = obj.get_quad_points()
                    print(pad_1 + f"Quad Points: {[round_list(p, args.n_digits) for p in quad_bounds]}")
            
            if show_imginfo and isinstance(obj, pdfium.PdfImage):
                print(pad_1 + f"Filters: {obj.get_filters()}")
                metadata = obj.get_metadata()
                assert (metadata.width, metadata.height) == obj.get_px_size()
                print_img_metadata(metadata, args.n_digits, pad=pad_1)
            
            elif show_text and isinstance(obj, pdfium.PdfTextObj):
                print(pad_1 + repr(obj.extract()))
            
            count += 1
        
        if count > 0:
            print(f"-> Count: {count}\n")
            total_count += count
    
    if total_count > 0:
        print(f"-> Total count: {total_count}")