
    %oi
9                        S r SSKrSSKrSSKrSSKrSSKJr  SSKJ	r	J
r
  SSKJrJrJr  SSKrSSKJrJrJr  SSKJrJrJrJr  SSKJr  SS	KJr  SS
KJrJrJ r J!r!  SSK"J#r#J$r$J%r%  SSK&J'r'  \RP                  " 5         \RR                  " \*5      r+\RX                  " S5      r-S\.\/-  S\.4S jr0S)S\S\1S\.S-  SS4S jjr2 S*S\S\S\3SS4S jjr4  S+S\S\S\.S-  S\3SS4
S jjr5    S,S\S\.S\S\	\6   S\.S\3S\.S-  S\.S-  SS4S  jjr7\#" S!5      r8\#" S"5      r9S\.S\.S\.SS4S# jr:     S-S\S\.S\
\6   S\	\6   S\.S\3S\.S-  S\.S-  S\3SS4S$ jjr;S\4S% jr<S)S&\=\.   S-  SS4S' jjr>\*S(:X  a  \>" 5         gg).z#Extract pdf structure in XML format    N)ArgumentParser)	ContainerIterable)AnyTextIOcast)PDFDocumentPDFNoOutlinesPDFXRefFallback)
PDFIOErrorPDFObjectNotFoundPDFTypeErrorPDFValueError)PDFPage)	PDFParser)	PDFObjRef	PDFStreamresolve1stream_value)LIT	PSKeyword	PSLiteral)isnumberz&[\000-\037&<>()"\042\047\134\177-\377]sreturnc                 v    [        U [        5      (       a  [        U S5      OU n[        R	                  S U5      $ )Nzlatin-1c                 >    S[        U R                  S5      5       S3$ )Nz&#r   ;)ordgroup)ms    P/var/www/html/land-ocr/venv/lib/python3.13/site-packages/../../../bin/dumppdf.py<lambda>escape.<locals>.<lambda>"   s    2c!''!*o%6a!8    )
isinstancebytesstrESC_PATsub)r   uss     r"   escaper,       s.    (E22Q		B;;8"==r%   outobjcodecc                 @   Uc  U R                  S5        g [        U[        5      (       a  U R                  S[        U5       S35        UR	                  5        HG  u  p4U R                  SU S35        U R                  S5        [        X5        U R                  S5        MI     U R                  S5        g [        U[        5      (       aU  U R                  S	[        U5       S35        U H  n[        X5        U R                  S
5        M!     U R                  S5        g [        U[        [        45      (       a+  U R                  S[        U5       S[        U5       S35        g [        U[        5      (       a  US:X  a   U R                  UR                  5       5        g US:X  a   U R                  UR                  5       5        g U R                  S5        [        XR                  5        U R                  S5        US:X  a:  UR                  5       nU R                  S[        U5       S[        U5       S35        U R                  S5        g [        U[        5      (       a   U R                  SUR                   S35        g [        U[         5      (       a   U R                  SUR"                   S35        g [        U[$        5      (       a   U R                  SUR"                   S35        g ['        U5      (       a  U R                  SU S35        g [)        U5      e)Nz<null />z<dict size="">
z<key>z</key>
z<value>z	</value>
z</dict>z<list size="
z</list>z<string size="z">z	</string>rawbinaryz<stream>
<props>
z

</props>
textz<data size="z</data>
z	</stream>z	<ref id="z" />z	<keyword>z
</keyword>z	<literal>z
</literal>z<number>z	</number>)writer&   dictlenitemsdumpxmllistr(   r'   r,   r   get_rawdataget_dataattrsr   objidr   namer   r   r   )r-   r.   r/   kvdatas         r"   r:   r:   %   s|   
{		*#t		LS
$/0IIKDAIIaS)*IIi COIIl#	  
 			)#t		LS
$/0ACOIIdO  			)#U|$$		N3s8*Bvc{m9EF#y!!E>IIcoo'( 	 hIIclln% 	 II+,C#IIn%||~		LT2fTl^9MNIIk"#y!!		Icii[-.#y!!		IchhZz23#y!!		IchhZz23}}		HSE+,
s
r%   docshow_fallback_xrefc                 b   UR                    H\  n[        U[        5      (       a	  U(       d  M!  U R                  S5        [	        XR                  5       5        U R                  S5        M^     [        S UR                    5       5      nU(       a   U(       d  Sn[        R                  U5        g g g )Nz
<trailer>
z
</trailer>

c              3   B   #    U  H  n[        U[        5      v   M     g 7fN)r&   r   ).0xrefs     r"   	<genexpr>dumptrailers.<locals>.<genexpr>p   s     K:dO44s   zThis PDF does not have an xref. Use --show-fallback-xref if you want to display the content of a fallback xref that contains all objects.)	xrefsr&   r   r6   r:   get_trailerallloggerwarning)r-   rD   rE   rJ   no_xrefsmsgs         r"   dumptrailersrT   f   s    
 		$004F4FIIm$C))+,II()	 
 KKKH*$ 	
 	s +xr%   c                    [        5       nU R                  S5        UR                   Hy  nUR                  5        Hb  nXd;   a  M
  UR	                  U5         UR                  U5      nUc  M2  U R                  SU S35        [        XUS9  U R                  S5        Md     M{     [        XU5        U R                  S5        g ! [         a  n[        SU< 35         S nAM  S nAff = f)Nz<pdf>z<object id="r1   r/   z
</object>

znot found: z</pdf>)
setr6   rM   
get_objidsaddgetobjr:   r   printrT   )	r-   rD   r/   rE   visitedrJ   r?   r.   es	            r"   dumpallobjsr^   z   s     eGIIg		__&EKK+jj';		Lt45.		+, '  -.IIh % +A5)**+s   C-0C
C'C""C'outfpfnameobjidspagenospassworddumpall
extractdirc           	      .  ^ [        US5       n[        U5      n	[        X5      m[        [        R
                  " T5      S5       V
Vs0 s H  u  pUR                  U
_M     nn
nS[        S[        4U4S jjn TR                  5       nU R                  S5        U GH%  u  nnnnnS n
U(       a  U" U5      nUUS   R                     n
OxU(       aq  Un[        U[        5      (       aZ  UR                  S5      nU(       aB  [        U5      S	:X  a3  UR                  S
5      (       a  U" US
   5      nUUS   R                     n
[!        U5      nU R                  SU< SU S35        Ub.  U R                  S5        [#        U U5        U R                  S5        U
b  U R                  SU
< S35        U R                  S5        GM(     U R                  S5        U	R'                  5         S S S 5        g s  snn
f ! [$         a     N+f = f! , (       d  f       g = f)Nrb   destr   c                 d  > [        U [        [        45      (       a  [        TR	                  U 5      5      n O9[        U [
        5      (       a$  [        TR	                  U R                  5      5      n [        U [        5      (       a  U S   n [        U [        5      (       a  U R                  5       n U $ )ND)
r&   r(   r'   r   get_destr   r@   r7   r   resolve)ri   rD   s    r"   resolve_dest!dumpoutline.<locals>.resolve_dest   s~    $e--T 23D),,TYY 78$%%Cy$	**||~Kr%   z<outlines>
r   Sz/'GoTo'rk   z<outline level="z	" title="r1   z<dest>z</dest>
z<pageno>z
</pageno>
z</outline>
z</outlines>
)openr   r	   	enumerater   create_pagespageidobjectr   get_outlinesr6   r?   r&   r7   getreprr,   r:   r
   close)r_   r`   ra   rb   rc   rd   r/   re   fpparserpagenopagepagesrn   outlinesleveltitleri   a_seactionsubtyper   rD   s                          @r"   dumpoutliner      s    
eT	b2&+ #,G,@,@,Eq"I
"I KK"I 	 

		v 		# 			'')HKK'.6*udAs'-D"47==1FF!&$//"(**S/"tG}	'AfjjQToo#/s#<D%*47==%9F5M.uiy4HI#KK)E4(KK,%KK(6*K @AN+) /7* KK( 	] 
	
R  		Y 
	sB   :HG0!H9EG6H0H6
H HHH
HFilespecEmbeddedFilec                   ^^
 S[         S[        [        [        4   SS 4U
U4S jjn[	        U S5       n[        U5      n[        XQ5      m
[        5       nT
R                   H|  nUR                  5        He  nT
R                  U5      n	X;  d  M  [        U	[        5      (       d  M2  U	R                  S5      [        L d  ML  UR                  U5        U" X5        Mg     M~     S S S 5        g ! , (       d  f       g = f)Nr?   r.   r   c                   > [         R                  R                  UR                  S5      =(       d-    [	        [
        UR                  S5      5      R                  5       5      nUS   R                  S5      =(       d    US   R                  S5      nTR                  UR                  5      n[        U[        5      (       d  SU< S3n[        U5      eUR                  S5      [        La  [        SU< S35      e[         R                  R                  T	U S S	U 35      n[         R                  R                  U5      (       a  [        S
U< 35      e[!        SU< 35        [         R"                  " [         R                  R%                  U5      SS9  ['        US5       nUR)                  UR+                  5       5        S S S 5        g ! , (       d  f       g = f)NUFFEFz%unable to process PDF: reference for z is not a PDFStreamTypez is not an EmbeddedFile06d-zfile exists: zextracting: T)exist_okwb)ospathbasenamerw   r   r'   decoderZ   r?   r&   r   r   LITERAL_EMBEDDEDFILEjoinexistsr   r[   makedirsdirnamerq   r6   r=   )
r?   r.   filenamefilereffileobj	error_msgr   r-   rD   re   s
           r"   extract1!extractembedded.<locals>.extract1   sn   77##CGGDM$WT%5N5U5U5WXd)--%;Ts);**W]]+'9--7|CVW   	**;;v&::7| D) )  ww||J5+Qxj(AB77>>$}TH566TH%&
BGGOOD)D9$IIg&&() s   # G
Grg   r   )intr7   r(   r   rq   r   r	   rW   rM   rX   rZ   r&   rw   LITERAL_FILESPECrY   )r`   rc   re   r   rz   r{   extracted_objidsrJ   r?   r.   rD   s     `       @r"   extractembeddedr      s    * *$sCx. *T * *, 
eT	b2&+5IID*jj'1"3--+;;$((/U( + 	 
		s   AC)C)%C)?!C))
C7c	           	      b   [        US5       n	[        U	5      n
[        X5      nU(       a$  U H  nUR                  U5      n[	        XUS9  M      U(       au  [        [        R                  " U5      5       HR  u  pX;   d  M  U(       a*  UR                   H  n[        U5      n[	        XUS9  M     M=  [	        XR                  5        MT     U(       a  [        XXh5        U(       d  U(       d  U(       d  [        XU5        S S S 5        US;  a  U R                  S5        g g ! , (       d  f       N'= f)Nrg   rV   )r3   r4   r2   )rq   r   r	   rZ   r:   rr   r   rs   contentsr   r>   r^   rT   r6   )r_   r`   ra   rb   rc   rd   r/   re   rE   rz   r{   rD   r?   r.   r|   r}   s                   r"   dumppdfr      s     
eT	b2&+jj'%0    )'*>*>s*C D$#'==C".s"3C#Ee< $1  zz2 !E E>7%78' 
( %%D &) 
	s   A0D A>D  
D.c                  *   [        [        SS9n U R                  S[        S SSS9  U R                  SSS	S
[        R
                   3S9  U R                  SSSSSS9  U R                  5       nUR                  SSSSSS9  UR                  SS[        SS9  U R                  SSS9nUR                  S[        S SSS9  UR                  SS[        S S9  UR                  S!S"[        S#S9  UR                  S$S%SSS&S9  UR                  S'SS(S)9  UR                  S*S+[        S,S-S.9  U R                  S/S0S9nUR                  S1S2[        S3S4S.9  UR                  5       nUR                  S5S6SSS7S9  UR                  S8S9SSS:S9  UR                  S;S<SSS=S9  U $ )>NT)descriptionadd_helpfiles+zOne or more paths to PDF files.)typedefaultnargshelpz	--versionz-vversionzpdfminer.six v)r   r   z--debugz-dF
store_truezUse debug logging level.)r   r   r   z--extract-tocz-TzExtract structure of outlinez--extract-embeddedz-EzExtract embedded files)r   r   ParserzUsed during PDF parsing)r   z--page-numbersz0A space-seperated list of page numbers to parse.z	--pagenosz-pzA comma-separated list of page numbers to parse. Included for legacy applications, use --page-numbers for more idiomatic argument entry.z	--objectsz-iz1Comma separated list of object numbers to extractz--allz-az3If the structure of all objects should be extractedz--show-fallback-xrefzAdditionally show the fallback xref. Use this if the PDF has zero or only invalid xref's. This setting is ignored if --extract-toc or --extract-embedded is used.)r   r   z
--passwordz-P z,The password to use for decrypting PDF file.)r   r   r   OutputzUsed during output generation.z	--outfilez-or   zJPath to file where output is written. Or "-" (default) to write to stdout.z--raw-streamz-rz%Write stream objects without encodingz--binary-streamz-bz)Write stream objects with binary encodingz--text-streamz-tz"Write stream objects as plain text)	r   __doc__add_argumentr(   pdfminer__version__add_mutually_exclusive_groupadd_argument_groupr   )r{   procedure_parserparse_paramsoutput_paramscodec_parsers        r"   create_parserr     s   $?F
.    !5!5 67	   '   ::<!!+ "  !!%	 "  ,,- - L ?   	   @	   B   7   ;   --4 . M    !==?L4   8   1   Mr%   argvc                    [        5       nUR                  U S9nUR                  (       a2  [        R                  " 5       R                  [        R                  5        UR                  (       a5  UR                  R                  S5       Vs/ s H  n[        U5      PM     snO/ nUR                  (       a  UR                   Vs1 s H  o3S-
  iM	     nnOSUR                  (       a8  UR                  R                  S5       Vs1 s H  n[        U5      S-
  iM     nnO
[        5       nUR                  nUR                  (       a  SnO*UR                  (       a  SnOUR                   (       a  SnOS nUR"                  S:X  a  [$        R&                  O[)        UR"                  S5       nUR*                   H|  n	UR,                  (       a  [/        UU	UUUUR0                  US S	9  M0  UR2                  (       a  [5        XUR2                  S
9  MW  [7        UU	UUUUR0                  US UR8                  S9	  M~     S S S 5        g s  snf s  snf s  snf ! , (       d  f       g = f)N)args,rh   r3   r4   r5   r   w)rc   rd   r/   re   )rc   re   )rc   rd   r/   re   rE   )r   
parse_argsdebuglogging	getLoggersetLevelDEBUGobjectssplitr   page_numbersrb   rW   rc   
raw_streambinary_streamtext_streamoutfilesysstdoutrq   r   extract_tocr   rO   extract_embeddedr   r   rE   )
r   r{   r   xra   rb   rc   r/   r_   r`   s
             r"   mainr     s   _F$'Dzz$$W]]3:>,,dll00565c!f56BF"&"3"34"3Qq5"34	'+||'9'9#'>?'>!3q6A:'>?%}}H!						 ||s*T\\30G	G5ZZE% HH#	 &&9N9N % HH#'+'>'>
#   
H	G+ 7 5?  
H	Gs   H>II(BI
I__main__rH   )F)NF)r   FNN)r   FNNF)?r   r   os.pathr   rer   argparser   collections.abcr   r   typingr   r   r   r   pdfminer.pdfdocumentr	   r
   r   pdfminer.pdfexceptionsr   r   r   r   pdfminer.pdfpager   pdfminer.pdfparserr   pdfminer.pdftypesr   r   r   r   pdfminer.psparserr   r   r   pdfminer.utilsr   basicConfigr   __name__rP   compiler)   r(   r'   r,   ru   r:   boolrT   r^   r   r   r   r   r   r   r   r;   r    r%   r"   <module>r      s   )   	 
 # / $ $  L L  % ( J J 7 7 #    			8	$
**>
?>cEk >c >
> >f >S4Z >4 >H  %		  
	. $			 : 	
 
> !888 8 s^	8
 8 8 :8 d
8 
8v z? >* $)3 $)# $)3 $)4 $)X !$    SM  s^	 
     :  d
    
 Fx~ xv8tCy4 84 8v zF r%   