
    )iO\                     j   S r SSKrSSKJrJr  SSKrSSKJr  SSKJr  SSK	J
r  SSKJr  SSKJr  SSKJr  SSKJr  SS	KJr  \R.                  " \5      r " S
 S\R4                  5      r " S S\5      r " S S\R:                  5      r " S S\5      r\" SS5      r  " S S\!5      r"S r#SS jr$S r%g))	PdfObjectPdfImage
PdfTextObjPdfFont    N)c_uintc_float)Path)
namedtuple)PdfiumError)	PdfMatrix)	PdfBitmap)Lazyc                   j   ^  \ rS rSrSrU 4S jrSU 4S jjr\S 5       rS r	S r
S rS	 rS
 rSrU =r$ )r      a  
Pageobject helper class.

When constructing a :class:`.PdfObject`, an instance of a more specific subclass may be returned instead, depending on the object's :attr:`.type` (e.g. :class:`.PdfImage`, :class:`.PdfTextObj`).

Note:
    :meth:`.PdfObject.close` only takes effect on loose pageobjects.
    It is a no-op otherwise, because pageobjects that are part of a page are owned by pdfium, not the caller.

Attributes:
    raw (FPDF_PAGEOBJECT):
        The underlying PDFium pageobject handle.
    type (int):
        The object's type (:data:`FPDF_PAGEOBJ_*`).
    page (PdfPage):
        Reference to the page this pageobject belongs to. May be None if not part of a page (e.g. new or detached object).
    pdf (PdfDocument):
        Reference to the document this pageobject belongs to. May be None if the object does not belong to a document yet.
        This attribute is always set if :attr:`.page` is set.
    container (PdfObject | None):
        PdfObject handle to parent Form XObject, if the pageobject is nested in a Form XObject, None otherwise.
    level (int):
        Nesting level signifying the number of parent Form XObjects, at the time of construction.
        Zero if the object is not nested in a Form XObject.
c                   > [         R                  " U5      nU[         R                  :X  a  [        TU ]  [
        5      nO;U[         R                  :X  a  [        TU ]  [        5      nO[        TU ]  [        5      nXEl	        U$ N)
pdfium_cFPDFPageObj_GetTypeFPDF_PAGEOBJ_IMAGEsuper__new__r   FPDF_PAGEOBJ_TEXTr   r   type)clsrawargskwargsr   instance	__class__s         ^/var/www/html/land-doc-ocr/venv/lib/python3.13/site-packages/pypdfium2/_helpers/pageobjects.pyr   PdfObject.__new__0   sc    ++C08...wx0HX///wz2Hwy1H    c                 
  > Xl         X l        X0l        X@l        XPl        UbC  U R                  c  UR                  U l        O$U R                  UR                  La  [        S5      e[        TU ]  [        R                  US L S9  g )Nz;*page* must belong to *pdf* when constructing a pageobject.)
needs_free)
r   pagepdf	containerlevel
ValueErrorr   __init__r   FPDFPageObj_Destroy)selfr   r%   r&   r'   r(   r   s         r    r*   PdfObject.__init__>   sq    	"
xx88) !^__ 	5544<Qr"   c                 L    U R                   c  U R                  $ U R                   $ r   r%   r&   r,   s    r    parentPdfObject.parentP   s       99,txx;$));r"   c                 4   U R                   c  [        S5      e[        5       [        5       [        5       [        5       4u  pp4[        R                  " XX#U5      nU(       d  [        S5      eUR                  UR                  UR                  UR                  4$ )z
Get the bounds of the object on the page.

Returns:
    tuple[float * 4]: Left, bottom, right and top, in PDF page coordinates.
z1Must not call get_bounds() on a loose pageobject.zFailed to locate pageobject.)r%   RuntimeErrorr   r   FPDFPageObj_GetBoundsr   value)r,   lbrtoks         r    
get_boundsPdfObject.get_boundsV   sv     99RSSY	79gi?
a++DQ1=<==!''17733r"   c                    U R                   [        R                  [        R                  4;  a  [	        S5      e[        R
                  " 5       n[        R                  " X5      nU(       d  [        S5      eUR                  UR                  4UR                  UR                  4UR                  UR                  4UR                  UR                  44$ )u  
Get the object's quadriliteral points (i.e. the positions of its corners).
For transformed objects, this may provide tighter bounds than a rectangle (e.g. rotation by a non-multiple of 90°, shear).

Note:
    This function only supports image and text objects.

Returns:
    tuple[tuple[float*2] * 4]: Corner positions as (x, y) tuples, counter-clockwise from origin, i.e. bottom-left, bottom-right, top-right, top-left, in PDF page coordinates.
z6Quad points only supported for image and text objects.zFailed to get quad points.)r   r   r   r   r4   FS_QUADPOINTSFFPDFPageObj_GetRotatedBoundsr   x1y1x2y2x3y3x4y4)r,   qr;   s      r    get_quad_pointsPdfObject.get_quad_pointsh   s     99X88(:T:TUUWXX##%224;:;;add|addADD\ADD!$$<!$$EEr"   c                     [         R                  " 5       n[         R                  " X5      nU(       d  [        S5      e[        R
                  " U5      $ )zD
Returns:
    PdfMatrix: The pageobject's current transform matrix.
z#Failed to get matrix of pageobject.)r   	FS_MATRIXFPDFPageObj_GetMatrixr   r   from_raw)r,   	fs_matrixr;   s      r    
get_matrixPdfObject.get_matrix   sB    
 &&(	++D<CDD!!),,r"   c                 T    [         R                  " X5      nU(       d  [        S5      eg)z[
Parameters:
    matrix (PdfMatrix): Set this matrix as the pageobject's transform matrix.
z#Failed to set matrix of pageobject.N)r   FPDFPageObj_SetMatrixr   r,   matrixr;   s      r    
set_matrixPdfObject.set_matrix   s(    
 ++D9CDD r"   c                 T    [         R                  " X5      nU(       d  [        S5      eg)zh
Parameters:
    matrix (PdfMatrix): Multiply the pageobject's current transform matrix by this matrix.
z+Failed to transform pageobject with matrix.N)r   FPDFPageObj_TransformFr   rU   s      r    	transformPdfObject.transform   s(    
 ,,T:KLL r"   )r'   r(   r%   r&   r   )NNNr   )__name__
__module____qualname____firstlineno____doc__r   r*   propertyr1   r<   rJ   rQ   rW   r[   __static_attributes____classcell__r   s   @r    r   r      sI    4R$ < <
4$F0	-EM Mr"   r   c                   F   ^  \ rS rSrSrSS.U 4S jjrS rS rS rSr	U =r
$ )	r      z
Textobject helper class.

You may want to call :meth:`.PdfPage.get_objects` or :meth:`.PdfTextPage.get_textobj` to obtain an instance of this class.
N)textpagec                   > Ub.  UR                  UR                  UR                  R                  S9  [        TU ]  " U0 UD6  Xl        g )Nr/   )updater%   r&   r   r*   rh   )r,   rh   r   r   r   s       r    r*   PdfTextObj.__init__   s>    MMx}}(--2C2CMD$)&) r"   c                    [         R                  " X R                  SS5      nUS:X  a  [        S5      e[        R
                  " U5      n[        R                  " U[        R                  " [         R                  5      5      n[         R                  " X R                  X15        UR                  SUS-
   R                  S5      $ )z/
Returns:
    str: The objects's text content.
Nr   z#Failed to get text from textobject.   z	utf-16-le)r   FPDFTextObj_GetTextrh   r   ctypescreate_string_buffercastPOINTER
FPDF_WCHARr   decode)r,   bufsizebuffer
buffer_ptrs       r    extractPdfTextObj.extract   s    
 ..t]]D!La<CDD,,W5[[8K8K)LM
$$T==*Nzz*719%,,[99r"   c                 D    [         R                  " U 5      n[        X5      $ )zS
Returns:
    PdfFont: Handle to the object's font. Provides name and weight info.
)r   FPDFTextObj_GetFontr   )r,   raw_fonts     r    get_fontPdfTextObj.get_font   s     //5x&&r"   c                     [         R                  " 5       n[        R                  " X5      nU(       d  [	        S5      eUR
                  $ )zb
Returns:
    float: Font size used by the object's text, in PDF canvas units (typically 1/72in).
zFailed to get font size.)ro   r   r   FPDFTextObj_GetFontSizer   r6   )r,   r_sizer;   s      r    get_font_sizePdfTextObj.get_font_size   s9    
 !--d;899||r"   )r]   r^   r_   r`   ra   r*   rx   r}   r   rc   rd   re   s   @r    r   r      s+     (, ! !:'	 	r"   r   c                   :    \ rS rSrSrS
S jrS rS rS rS r	S	r
g)r      z
Font helper class.
Nc                     Xl         X l        g r   )r   r1   )r,   r   r1   s      r    r*   PdfFont.__init__   s    r"   c                     U" U S S5      nUS:X  a  [        SU S35      e[        R                  " U5      nU" XU5        UR                  R	                  S5      $ )Nr   zFailed to get font z name.utf-8)r   ro   rp   r6   rt   )r,   apiwhichru   rv   s        r    _get_name_implPdfFont._get_name_impl   s[    dD!$a< 3E7&ABB,,W5D'"||""7++r"   c                 B    U R                  [        R                  S5      $ )z'
Returns:
    str: The base font name.
base)r   r   FPDFFont_GetBaseFontNamer0   s    r    get_base_namePdfFont.get_base_name   s    
 ""8#D#DfMMr"   c                 B    U R                  [        R                  S5      $ )z)
Returns:
    str: The font family name.
family)r   r   FPDFFont_GetFamilyNamer0   s    r    get_family_namePdfFont.get_family_name   s    
 ""8#B#BHMMr"   c                 T    [         R                  " U 5      nUS:X  a  [        S5      eU$ )zV
Returns:
    int: The font's weight. Typical values are 400 (normal) and 700 (bold).
zFailed to get font weight.)r   FPDFFont_GetWeightr   )r,   weights     r    
get_weightPdfFont.get_weight   s,    
 ,,T2R<:;;r"   )r1   r   r   )r]   r^   r_   r`   ra   r*   r   r   r   r   rc    r"   r    r   r      s"    
	,NNr"   r   c                   v    \ rS rSrSrSr\S 5       rS rS r	SS jr
SS	 jrS
 rSS jrSS jrSS jrS rSrg)r      z:
Image object helper class (specific kind of pageobject).
)ASCIIHexDecodeASCII85DecodeRunLengthDecodeFlateDecode	LZWDecodec                 >    [         R                  " U5      nU " USUS9$ )a  
Parameters:
    pdf (PdfDocument): The document to which the new image object shall be added.
Returns:
    PdfImage: Handle to a new, empty image.
    Note that position and size of the image are defined by its matrix, which defaults to the identity matrix.
    This means that new images will appear as a tiny square of 1x1 canvas units on the bottom left corner of the page.
    Use :class:`.PdfMatrix` and :meth:`.set_matrix` to adjust size and position.
Nr/   )r   FPDFPageObj_NewImageObj)r   r&   raw_imgs      r    newPdfImage.new
  s#     223773//r"   c                     [         R                  " 5       n[         R                  " X R                  U5      nU(       d  [	        S5      eU$ )a  
Retrieve image metadata including DPI, bits per pixel, color space, and size.
If the image does not belong to a page yet, bits per pixel and color space will be unset (0).

Note:
    * The DPI values signify the resolution of the image on the PDF page, not the DPI metadata embedded in the image file.
    * Due to issues in pdfium, this function might be slow on some kinds of images. If you only need size, prefer :meth:`.get_px_size` instead.

Returns:
    FPDF_IMAGEOBJ_METADATA: Image metadata structure
zFailed to get image metadata.)r   FPDF_IMAGEOBJ_METADATAFPDFImageObj_GetImageMetadatar%   r   )r,   metadatar;   s      r    get_metadataPdfImage.get_metadata  s<     22433D))XN=>>r"   c                     [        5       [        5       p![        R                  " XU5      nU(       d  [        S5      eUR                  UR                  4$ )zJ
Returns:
    (int, int): Image dimensions as a tuple of (width, height).
zFailed to get image size.)r   r   FPDFImageObj_GetImagePixelSizer   r6   )r,   whr;   s       r    get_px_sizePdfImage.get_px_size-  sC     x144Ta@9::wwr"   Nc                    [        U[        [        45      (       a  [        US5      nSnO.[        R
                  " US5      (       a  UnO[        SU S35      e[        R                  " U5      u  pgU(       a  [        R                  O[        R                  n[        R                  " U5      u  pU" XX5      nU(       d  [        S5      eU(       a-  U H  n[        U5        M     U(       a  UR                  5         ggU R                  =R                   U-  sl        U(       a&  U R                  R"                  R%                  U5        gg)a1  
Set a JPEG as the image object's content.

Parameters:
    source (str | pathlib.Path | typing.BinaryIO):
        Input JPEG, given as file path or readable byte stream.
    pages (list[PdfPage] | None):
        If replacing an image, pass in a list of loaded pages that might contain it, to update their cache.
        (The same image may be shown multiple times in different transforms across a PDF.)
        May be None or an empty sequence if the image is not shared.
    inline (bool):
        Whether to load the image content into memory. If True, the buffer may be closed after this function call.
        Otherwise, the buffer needs to remain open until the PDF is closed.
    autoclose (bool):
        If the input is a buffer, whether it should be automatically closed once not needed by the PDF anymore.
rbTr9   zCannot load JPEG from z" - not a file path or byte stream.z&Failed to load JPEG into image object.N)
isinstancestrr	   openpdfium_i	is_streamr)   get_bufreaderr   FPDFImageObj_LoadJpegFileInlineFPDFImageObj_LoadJpegFilepages_c_arrayr   idcloser&   _data_holder_data_closerappend)r,   sourcepagesinline	autocloserv   	bufaccessto_holdloaderc_pages
page_countr;   datas                r    	load_jpegPdfImage.load_jpeg:  s   $ fsDk**&$'FI,,F5fX=_`aa%33F;	=C9933 	 '44U;G9FGG4    HH!!W,!%%,,V4 r"   c                     [         R                  " U5      u  p4[        R                  " X4X5      nU(       d  [	        S5      eg)aA  
Set a bitmap as the image object's content.
The pixel data will be flate compressed (as of PDFium 5418).

Parameters:
    bitmap (PdfBitmap):
        The bitmap to inject into the image object.
    pages (list[PdfPage] | None):
        A list of loaded pages that might contain the image object. See :meth:`.load_jpeg`.
zFailed to set image to bitmap.N)r   r   r   FPDFImageObj_SetBitmapr   )r,   bitmapr   r   r   r;   s         r    
set_bitmapPdfImage.set_bitmaph  s=     '44U;,,W$O>?? r"   c                 .   U R                   c  [        S5      eU(       a  U R                  5       u  p#U R                  5       u  pEpg[	        Xd-
  5      [	        Xu-
  5      pX#:  X:  :g  n
U
(       a  X2p2[        X(-  X9-  5      nU R                  5       nUR                  X5      nU R                  U5         [        R                  " U R                   U R                  U 5      nU(       a  U R                  W5        U$ ! U(       a  U R                  W5        f f = f)zBThis is a private implementation function. Do not use externally. z/Cannot get rendered bitmap of loose pageobject.)r&   r4   r   r<   absmaxrQ   scalerW   r   FPDFImageObj_GetRenderedBitmapr%   )r,   scale_to_originalpx_wpx_hr7   r8   r9   r:   	content_w	content_hswapscale_factororig_mat
scaled_mat
raw_bitmaps                  r    _get_rendered_bitmapPdfImage._get_rendered_bitmapy  s     88PQQ ))+JD*JA!#&qs8SXy KY%:;D!d t~t~>L(H!CJOOJ'	*!@@499VZ[J ) !) !s   3,C9 9Dc                 6   U(       a  U R                  U5      nO[        R                  " U 5      nU(       d  [        SU  S35      e[        R
                  " U5      nU(       a6  U(       a/  [        R                  SUR                   SUR                   35        U$ )a  
Get a bitmap rasterization of the image.

Parameters:
    render (bool):
        Whether the image should be rendered, thereby applying possible transform matrices and alpha masks.
    scale_to_original (bool):
        If *render* is True, whether to temporarily scale the image to its native resolution, or close to that (defaults to True). This should improve output quality. Ignored if *render* is False.
Returns:
    PdfBitmap: Image bitmap (with a buffer allocated by PDFium).
zFailed to get bitmap of image .zExtracted size: z, )
r   r   FPDFImageObj_GetBitmapr   r   rO   loggerdebugwidthheight)r,   renderr   r   r   s        r    
get_bitmapPdfImage.get_bitmap  s}     223DEJ!88>J >tfAFGG##J/'LL+FLL>FMM?KLr"   c                     U(       a  [         R                  O[         R                  nU" U SS5      n[        R                  U-  " 5       nU" XU5        U$ )a~  
Parameters:
    decode_simple (bool):
        If True, decode simple filters (see :attr:`.SIMPLE_FILTERS`), so only complex filters will remain, if any. If there are no complex filters, this provides the decoded pixel data.
        If False, the raw stream data will be returned instead.
Returns:
    ctypes.Array: The data of the image stream (as :class:`~ctypes.c_ubyte` array).
Nr   )r    FPDFImageObj_GetImageDataDecodedFPDFImageObj_GetImageDataRawro   c_ubyte)r,   decode_simplefuncn_bytesrv   s        r    get_dataPdfImage.get_data  sL     =Jx8844 	tT1%..7*-T7#r"   c                    / n[         R                  " U 5      n[        U5       Ht  n[         R                  " XSS5      n[        R
                  " U5      n[         R                  " XXe5        UR                  R                  S5      nUR                  U5        Mv     U(       a#  U Vs/ s H  owU R                  ;  d  M  UPM     nnU$ s  snf )z
Parameters:
    skip_simple (bool):
        If True, exclude simple filters.
Returns:
    list[str]: A list of image filters, to be applied in order (from lowest to highest index).
Nr   r   )
r    FPDFImageObj_GetImageFilterCountrangeFPDFImageObj_GetImageFilterro   rp   r6   rt   r   SIMPLE_FILTERS)r,   skip_simplefilterscountilengthrv   fs           r    get_filtersPdfImage.get_filters  s     99$?uA99$4KF008F00&I##G,ANN1  ")J'Qd6I6I-Iq'GJ Ks   'C>Cc                 `   [        U /UQ70 UD6n[        U5      n[        U[        [        45      (       a,  [        U SU 3S5       nUR                  U5        SSS5        g[        R                  " US5      (       a  UR                  U5        g[        SU S35      e! , (       d  f       g= f)a  
Extract the image into an independently usable file or byte stream, attempting to avoid re-encoding or quality loss, as far as pdfium's limited API permits.

This method can only extract DCTDecode (JPEG) and JPXDecode (JPEG 2000) images directly.
Otherwise, the pixel data is decoded and re-encoded using :mod:`PIL`, which is slower and loses the original encoding.
For images with simple filters only, ``get_data(decode_simple=True)`` is used to preserve higher bit depth or special color formats not supported by ``FPDF_BITMAP``.
For images with complex filters other than those extracted directly, we have to resort to :meth:`.get_bitmap`.

Note, this method is not able to account for alpha masks, and potentially other data stored separately of the main image stream, which might lead to incorrect representation of the image.

Tip:
    The ``pikepdf`` library is capable of preserving the original encoding in many cases where this method is not.

Parameters:
    dest (str | pathlib.Path | io.BytesIO):
        File path prefix or byte stream to which the image shall be written.
    fb_format (str):
        The image format to use in case it is necessary to (re-)encode the data.
r   wbNr   zCannot extract to '')
_extract_smartnextr   r   r	   r   sendr   r   r)   )r,   destr   r   extraction_genformatbufs          r    rx   PdfImage.extract  s    . (>t>v>n%dS$K((ax($/3##C( 0/c**%24&:;; 0/s   B
B-r   )NFTr   )FT)F)r]   r^   r_   r`   ra   r   classmethodr   r   r   r   r   r   r   r   r  rx   rc   r   r"   r    r   r      sS     hN 0 0(
 +5\@"%P8"2 <r"   r   
_ImageInfoz0format mode metadata all_filters complex_filtersc                       \ rS rSrSrg)_ImageExtractionErrori  r   N)r]   r^   r_   r`   rc   r   r"   r    r  r    s    r"   r  c                     U [         R                  :X  a
  US:X  a  S$ S$ U [         R                  :X  a  gU [         R                  :X  a  gg )N   1LRGBCMYK)r   FPDF_COLORSPACE_DEVICEGRAYFPDF_COLORSPACE_DEVICERGBFPDF_COLORSPACE_DEVICECMYK)csbpps     r    _get_pil_moder#    sH     
X000Qhs'C'	x11	1	x22	2r"   c           	   #   l  #     [        U 5      u  p#S nUR                  nUS:X  ah  UR                  n[        R                  R                  UR                  UR                  UR                  4U R                  SS9SUR                  SS5      nU(       a  UnU(       d  UR                  S:X  a  S	OS
nWv nU(       a  UR!                  XS9  OUR#                  W5        S v   g ! [         aE  n[        R                  [        U5      5        U R                  SS9R                  5       n S nANS nAff = f7f)Nr   Tr   r   r  F)r   r  tiffpng)r  )_extract_directr  r   r   	PIL_Image
frombuffermoder   r   r   r  r   r   r   r   to_pilsavewrite)		image_obj	fb_formatr   info	pil_imager  r   erv   s	            r    r  r    s    $Y/

 	U?}}H11		1"""6tyy!Q	I (~~7VUF\Fv-T	5 ! @SV(((6==?	@s.   D4C" CD4"
D1,;D,'D4,D11D4c                 z   U R                  5       nU Vs/ s H  o"[        R                  ;  d  M  UPM     nnU R                  5       n[	        UR
                  UR                  5      n[        U5      S:X  aO  U(       a  U R                  SS9nSnO[        S[        R                  R                  UR
                  5       S35      e[        U5      S:X  aD  US   nUS:X  a  U R                  SS9nS	nO6US
:X  a  U R                  SS9nSnO[        SU S35      e[        SU S35      e[        XuXAU5      nXh4$ s  snf )Nr   Tr%  r   zUnhandled color space z  - don't know how to treat data.r  	DCTDecodejpg	JPXDecodejp2zUnhandled complex filter r   z'Cannot handle multiple complex filters )r  r   r   r   r#  
colorspacebits_per_pixellenr   r  r   ColorspaceToStrgetr  )	r/  all_filtersr  complex_filtersr   r+  out_data
out_formatr1  s	            r    r(  r(  @  sX   '')K"-R+Q(:Q:Q1Qq+OR%%'H,,h.E.EFD
?q  )))=HJ'*@AYAYA]A]^f^q^qAr@s  tT  )U  V  V	_		"A )))=HJ+ )))=HJ'*CA3a(HII#&MoM^^_$`aajOD>1 Ss
   D8D8r   )&__all__ro   r   r   loggingpathlibr	   collectionsr
   pypdfium2.rawr   r   pypdfium2.internalinternalr   pypdfium2._helpers.miscr   pypdfium2._helpers.matrixr   pypdfium2._helpers.bitmapr   pypdfium2._lazyr   	getLoggerr]   r   AutoCloseabler   r   AutoCastabler   r   r  	Exceptionr  r#  r  r(  r   r"   r    <module>rQ     s    =  "   "   % / / /  			8	$HM'' HMV/) /d+x$$ +\G<	 G<T &XY
	Y 	

Dr"   