
    %oi                          S SK r S SKrS SKJrJr  S SKJrJrJr  S SK	J
r
  S SKJrJrJr  S SKJrJr  S SKJr  S SKJrJrJrJr  S S	KJr  S S
KJrJr  \R<                  " \5      r \" S5      r!\" S5      r" " S S5      r#g)    N)	ContainerIterator)AnyBinaryIOClassVar)settings)PDFDocumentPDFNoPageLabelsPDFTextExtractionNotAllowed)PDFObjectNotFoundPDFValueError)	PDFParser)
dict_value	int_value
list_valueresolve1)LIT)Rect
parse_rectPagePagesc                      \ rS rSr% SrS\S\S\S\S-  SS4
S	 jrS\4S
 jr	1 Skr
\\\      \S'   \S\S\S    4S j5       r\     SS\S\\   S-  S\S\S\S\S\S    4S jj5       rS\S\4S jrS\S\S\4S jrS\S\\   4S jrSrg)PDFPage   a2  An object that holds the information about a page.

A PDFPage object is merely a convenience class that has a set
of keys and values, which describe the properties of a page
and point to its contents.

Attributes
----------
  doc: a PDFDocument object.
  pageid: any Python object that can uniquely identify the page.
  attrs: a dictionary of page attributes.
  contents: a list of PDFStream objects that represents the page content.
  lastmod: the last modified time of the page.
  resources: a dictionary of resources used by the page.
  mediabox: the physical size of the page.
  cropbox: the crop rectangle of the page.
  rotate: the page rotation (in degree).
  annots: the page annotations.
  beads: a chain that represents natural reading order.
  label: the page's label (typically, the logical page number).

docpageidattrslabelNreturnc                    Xl         X l        [        U5      U l        X@l        [        U R                  R                  S5      5      U l        [        U R                  R                  S0 5      5      U l        U R                  U R                  R                  S5      5      U l
        U R                  U R                  R                  S5      U R                  5      U l        U R                  U R                  R                  S5      5      U l        [        U R                  R                  SS5      5      S-   S-  U l        U R                  R                  S	5      U l        U R                  R                  S
5      U l        g)zInitialize a page object.

doc: a PDFDocument object.
pageid: any Python object that can uniquely identify the page.
attrs: a dictionary of page attributes.
label: page label string.
LastModified	ResourcesMediaBoxCropBoxContentsRotater   ih  AnnotsBN)r   r   r   r   r   r   getlastmod	resources_parse_mediaboxmediabox_parse_cropboxcropbox_parse_contentscontentsr   rotateannotsbeads)selfr   r   r   r   s        L/var/www/html/land-ocr/venv/lib/python3.13/site-packages/pdfminer/pdfpage.py__init__PDFPage.__init__1   s    &



~ >?/7JJNN;+0
 ,,TZZ^^J-GH**4::>>)+DdmmT,,TZZ^^J-GH !!<=CsJjjnnX.ZZ^^C(
    c                 @    SU R                   < SU R                  < S3$ )Nz<PDFPage: Resources=z, MediaBox=>)r+   r-   )r5   s    r6   __repr__PDFPage.__repr__P   s"    %dnn%7{4==BSSTUUr9   >   r&   r$   r#   r"   INHERITABLE_ATTRSdocumentc              #   4  ^ ^^	#     S
S[         S[        [        [         4   S[        [            S -  S[        [
        [        [        [         [        [         [         4   4   4      4U U	U4S jjjm	 TR                  5       nSnSTR                  ;   aB  T	" TR                  S   TR                  5      nU H  u  pVT " TXV[        U5      5      v   SnM     U(       d  TR                   Hr  nUR                  5        H[  n TR                  U5      n[!        U[        5      (       a1  UR#                  S	5      [$        L a  T " TXX[        U5      5      v   MY  M[  M]     Mt     g g ! [         a    [        R                  " S 5      n GNf = f! [&         a     M  f = f7f)Nobjparentvisitedr   c              3     >#    [        U [        5      (       a+  U n[        TR                  U5      5      R	                  5       nO%U R
                  n[        U 5      R	                  5       nUc
  [        5       nX2;   a  g UR                  U5        UR                  5        H"  u  pVUT	R                  ;   d  M  XT;  d  M  XdU'   M$     UR                  S5      nUc&  [        R                  (       d  UR                  S5      nU[        L aF  SU;   a@  [        R                  SUS   5        [!        US   5       H  nT
" XU5       S h  vN   M     g U["        L a  [        R                  SU5        X44v   g g  N/7f)NTypetypeKidszPages: Kids=%rzPage: %r)
isinstanceintr   getobjcopyobjidsetadditemsr>   r)   r   STRICTLITERAL_PAGESlogdebugr   LITERAL_PAGE)rA   rB   rC   	object_idobject_propertieskvobject_typechildclsdepth_first_searchr?   s            r6   r\   0PDFPage.create_pages.<locals>.depth_first_search\   sK    
 #s##	$.xy/I$J$O$O$Q!  II	$.sO$8$8$:! %#KK	"---!2L+,a( ' ,//7K"8??/33F;m+:K0K		*,=f,EF'(9&(ABE1%GTTT C ,		*&78 44 - Us   B-E44E4;BE4E20E4Fr   TrE   N)r   dictstrrM   r   tuplerI   get_page_labelsr
   	itertoolsrepeatcatalognextxrefs
get_objidsrJ   rH   r)   rT   r   )
r[   r?   page_labelspagesobjectsrL   treexrefrA   r\   s
   ``       @r6   create_pagesPDFPage.create_pagesZ   s}    
 (,$	5$	5cN$	5 X_$	5 eCc4S>&9!::;<	$	5 $	5L	1080H0H0JK h&&&()9)9')BHDTDTUG&(Ek1BCC  '  !__.E&ooe4%c400SWWV_5T"%hD<M"NN 6U0 / '   	1#**40K	1" - sP   A+F1E  A=F?AFF  F FFF
FFFFfppagenosmaxpagespasswordcachingcheck_extractablec              #   P  #    [        U5      n[        XtUS9nUR                  (       d4  U(       a  SU< 3n	[        U	5      eSU< S3n
[        R                  U
5        [        U R                  U5      5       H*  u  pU(       a  X;  a  M  Uv   U(       d  M   X;S-   ::  d  M*    g    g 7f)N)rs   rt   z Text extraction is not allowed: zThe PDF z contains a metadata field indicating that it should not allow text extraction. Ignoring this field and proceeding. Use the check_extractable if you want to raise an error in this case   )r   r	   is_extractabler   rR   warning	enumeratern   )r[   rp   rq   rr   rs   rt   ru   parserr   	error_msgwarning_msgpagenopages                r6   	get_pagesPDFPage.get_pages   s      2&WE !! >rfE	1)<< rf %A A  K(%c&6&6s&;<LFF1JxH
2 =s   BB&B&!B&valuec                     SnUc  [         R                  S5        U$  [        S [        U5       5       5      $ ! [         a    [         R                  S5        Us $ f = f)N)        r   g      @g     @zHMediaBox missing from /Page (and not inherited), defaulting to US Letterc              3   8   #    U  H  n[        U5      v   M     g 7fr^   r   .0vals     r6   	<genexpr>*PDFPage._parse_mediabox.<locals>.<genexpr>        Ghsmm   z2Invalid MediaBox in /Page, defaulting to US Letter)rR   ry   r   r   r   )r5   r   	us_letters      r6   r,   PDFPage._parse_mediabox   sa    ,	=KK* 	GxGGG 	KKLM	s   9 !AAr-   c                     Uc  U$  [        S [        U5       5       5      $ ! [         a    [        R	                  S5        Us $ f = f)Nc              3   8   #    U  H  n[        U5      v   M     g 7fr^   r   r   s     r6   r   )PDFPage._parse_cropbox.<locals>.<genexpr>   r   r   z0Invalid CropBox in /Page, defaulting to MediaBox)r   r   r   rR   ry   )r5   r   r-   s      r6   r.   PDFPage._parse_cropbox   sG    =O	GxGGG 	KKJKO	s   " !AAc                 V    / nUb#  [        U5      n[        U[        5      (       d  U/nU$ r^   )r   rH   list)r5   r   r1   s      r6   r0   PDFPage._parse_contents   s/     Hh--$:r9   )r3   r   r4   r1   r/   r   r   r*   r-   r   r+   r2   )Nr    TF)__name__
__module____qualname____firstlineno____doc__r	   objectr`   r7   r<   r>   r   rM   __annotations__classmethodr   rn   r   r   rI   boolr   r   r   r,   r.   r   r0   __static_attributes__ r9   r6   r   r      sJ   .)) ) 	)
 Tz) 
)>V# V-xC)  ;K ;HY4G ; ;z  *."'"" 3$&" 	"
 " "  " 
)	" "HS T "
C 
4 
D 
S T#Y r9   r   )$rc   loggingcollections.abcr   r   typingr   r   r   pdfminerr   pdfminer.pdfdocumentr	   r
   r   pdfminer.pdfexceptionsr   r   pdfminer.pdfparserr   pdfminer.pdftypesr   r   r   r   pdfminer.psparserr   pdfminer.utilsr   r   	getLoggerr   rR   rT   rQ   r   r   r9   r6   <module>r      sf      / * *  
 D ( I I ! +! 6{GG Gr9   