
    )i8                        S r SSKrSSKrSSKrSSKrSSKJrJrJ	r	J
r
JrJrJrJrJr  SSKJr  SSKrSSKJrJrJr  SSKJr  SSKJr  SSKJrJr  SS	KJrJrJ r J!r!  SS
K"J#r#J$r$J%r%  SSK&J'r'  \RP                  " 5         \RR                  " \*5      r+\RX                  " S5      r-S\\.\/4   S\.4S jr0S(S\S\1S\\.   SS4S jjr2 S)S\S\S\3SS4S jjr4  S*S\S\S\\.   S\3SS4
S jjr5    S+S\S\.S\S\\6   S\.S\3S\\.   S\\.   SS4S jjr7\%" S 5      r8\%" S!5      r9S\.S\.S\.SS4S" jr:     S,S\S\.S\
\6   S\\6   S\.S\3S\\.   S\\.   S\3SS4S# jjr;S\4S$ jr<S(S%\\\.      SS4S& jjr=\*S':X  a  \=" 5         gg)-z#Extract pdf structure in XML format    N)	Any	ContainerDictIterableListOptionalTextIOUnioncast)ArgumentParser)PDFDocumentPDFNoOutlinesPDFXRefFallback)PDFPage)	PDFParser)PDFObjectNotFoundPDFValueError)	PDFStream	PDFObjRefresolve1stream_value)	PSKeyword	PSLiteralLIT)isnumberz&[\000-\037&<>()"\042\047\134\177-\377]sreturnc                 x    [        U [        5      (       a  [        U S5      nOU n[        R	                  S U5      $ )Nzlatin-1c                 <    S[        U R                  S5      5      -  $ )Nz&#%d;r   )ordgroup)ms    T/var/www/html/land-doc-ocr/venv/lib/python3.13/site-packages/../../../bin/dumppdf.py<lambda>escape.<locals>.<lambda>   s    3qwwqz?!:    )
isinstancebytesstrESC_PATsub)r   uss     r#   escaper-      s2    !UI;;:B??r&   outobjcodecc                 *   Uc  U R                  S5        g [        U[        5      (       a  U R                  S[        U5      -  5        UR	                  5        HF  u  p4U R                  SU-  5        U R                  S5        [        X5        U R                  S5        MH     U R                  S5        g [        U[        5      (       aT  U R                  S[        U5      -  5        U H  n[        X5        U R                  S5        M!     U R                  S	5        g [        U[        [        45      (       a)  U R                  S
[        U5      [        U5      4-  5        g [        U[        5      (       a  US:X  a   U R                  UR                  5       5        g US:X  a   U R                  UR                  5       5        g U R                  S5        [        XR                  5        U R                  S5        US:X  a8  UR                  5       nU R                  S[        U5      [        U5      4-  5        U R                  S5        g [        U[        5      (       a  U R                  SUR                  -  5        g [        U[         5      (       a  U R                  SUR"                  -  5        g [        U[$        5      (       a  U R                  SUR"                  -  5        g ['        U5      (       a  U R                  SU-  5        g [)        U5      e)Nz<null />z<dict size="%d">
z<key>%s</key>
z<value>z	</value>
z</dict>z<list size="%d">

z</list>z<string size="%d">%s</string>rawbinaryz<stream>
<props>
z

</props>
textz<data size="%d">%s</data>
z	</stream>z<ref id="%d" />z<keyword>%s</keyword>z<literal>%s</literal>z<number>%s</number>)writer'   dictlenitemsdumpxmllistr)   r(   r-   r   get_rawdataget_dataattrsr   objidr   namer   r   	TypeError)r.   r/   r0   kvdatas         r#   r:   r:   !   s^   
{		*#t		&S12iikFQII'!+,IIi COIIl#	 "
 			)#t		&S12ACOIIdO  			)#U|$$		1SXvc{4KKL#y!!E>IIcoo'( 	 hIIclln% 	 II+,C#IIn%||~		73t9fTl:SSTIIk"#y!!		#cii/0#y!!		)CHH45#y!!		)CHH45}}		'#-.
C.r&   docshow_fallback_xrefc                 ^   UR                    H\  n[        U[        5      (       a	  U(       d  M!  U R                  S5        [	        XR                  5       5        U R                  S5        M^     [        S UR                    5       5      nU(       a  U(       d  Sn[        R                  U5        g )Nz
<trailer>
z
</trailer>

c              3   B   #    U  H  n[        U[        5      v   M     g 7fN)r'   r   ).0xrefs     r#   	<genexpr>dumptrailers.<locals>.<genexpr>j   s     K:dO44s   zThis PDF does not have an xref. Use --show-fallback-xref if you want to display the content of a fallback xref that contains all objects.)	xrefsr'   r   r6   r:   get_trailerallloggerwarning)r.   rE   rF   rK   no_xrefsmsgs         r#   dumptrailersrU   b   s     		$004F4FIIm$C))+,II()	 
 KKKH*$ 	
 	s
r&   c                    [        5       nU R                  S5        UR                   Hx  nUR                  5        Ha  nXd;   a  M
  UR	                  U5         UR                  U5      nUc  M2  U R                  SU-  5        [        XUS9  U R                  S5        Mc     Mz     [        XU5        U R                  S5        g ! [         a  n[        SU-  5         S nAM  S nAff = f)Nz<pdf>z<object id="%d">
r0   z
</object>

znot found: %rz</pdf>)
setr6   rN   
get_objidsaddgetobjr:   r   printrU   )	r.   rE   r0   rF   visitedrK   r?   r/   es	            r#   dumpallobjsr_   u   s     eGIIg		__&EKK+jj';		.67.		+, '  -.IIh
	 % +o)**+s   C-/C
C%C  C%outfpfnameobjidspagenospassworddumpall
extractdirc                 &  ^ [        US5      n[        U5      n	[        X5      m[        [        R
                  " T5      S5       V
Vs0 s H  u  pUR                  U
_M     nn
nS[        S[        4U4S jjn TR                  5       nU R                  S5        U GH+  u  nnnnnS n
U(       a  U" U5      nUUS   R                     n
OxU(       aq  Un[        U[        5      (       aZ  UR                  S5      nU(       aB  [        U5      S	:X  a3  UR                  S
5      (       a  U" US
   5      nUUS   R                     n
[!        U5      nU R                  SR#                  UU5      5        Ub.  U R                  S5        [%        U U5        U R                  S5        U
b  U R                  SU
-  5        U R                  S5        GM.     U R                  S5        U	R)                  5         UR)                  5         g s  snn
f ! [&         a     N3f = f)Nrb   destr   c                 d  > [        U [        [        45      (       a  [        TR	                  U 5      5      n O9[        U [
        5      (       a$  [        TR	                  U R                  5      5      n [        U [        5      (       a  U S   n [        U [        5      (       a  U R                  5       n U $ )ND)
r'   r)   r(   r   get_destr   r@   r7   r   resolve)rj   rE   s    r#   resolve_dest!dumpoutline.<locals>.resolve_dest   s~    dS%L))CLL./Di((CLL34DdD!!9DdI&&<<>Dr&   z<outlines>
r   Sz/'GoTo'rl   z"<outline level="{!r}" title="{}">
z<dest>z</dest>
z<pageno>%r</pageno>
z</outline>
z</outlines>
)openr   r   	enumerater   create_pagespageidobjectr   get_outlinesr6   r?   r'   r7   getreprr-   formatr:   r   close)r`   ra   rb   rc   rd   re   r0   rf   fpparserpagenopagepagesro   outlinesleveltitlerj   aseactionsubtyper   rE   s                          @r#   dumpoutliner      s    
eT	Br]F
f
'C ((<(<S(A1EENV 	VE 
 
	6 	c 	##%N#+3'UE4BF#D)tAw}}-fd++$jjoG4=I#=&**S//+F3K8!&tAw}}!5uAKK=DDUANOH%t$K(!3f<=KK') ,4* 	O$ LLNHHJ
[R  s   G=8E$H 
HHFilespecEmbeddedFilec                   ^^
 S[         S[        [        [        4   SS 4U
U4S jjn[	        U S5       n[        U5      n[        XQ5      m
[        5       nT
R                   H|  nUR                  5        He  nT
R                  U5      n	X;  d  M  [        U	[        5      (       d  M2  U	R                  S5      [        L d  ML  UR                  U5        U" X5        Mg     M~     S S S 5        g ! , (       d  f       g = f)Nr?   r/   r   c                   > [         R                  R                  UR                  S5      =(       d-    [	        [
        UR                  S5      5      R                  5       5      nUS   R                  S5      =(       d    US   R                  S5      nTR                  UR                  5      n[        U[        5      (       d  SU-  n[        U5      eUR                  S5      [        La  [        SU-  5      e[         R                  R                  T	SX4-  5      n[         R                  R                  U5      (       a  [        SU-  5      e[!        S	U-  5        [         R"                  " [         R                  R%                  U5      S
S9  ['        US5      nUR)                  UR+                  5       5        UR-                  5         g )NUFFEFz:unable to process PDF: reference for %r is not a PDFStreamTypez>unable to process PDF: reference for %r is not an EmbeddedFilez%.6d-%szfile exists: %rzextracting: %rT)exist_okwb)ospathbasenamerx   r   r(   decoder[   r?   r'   r   r   LITERAL_EMBEDDEDFILEjoinexistsIOErrorr\   makedirsdirnamerr   r6   r=   r{   )
r?   r/   filenamefilereffileobj	error_msgr   r.   rE   rf   s
           r#   extract1!extractembedded.<locals>.extract1   sb   77##CGGDM$WT%5N5U5U5WXd)--%;Ts);**W]]+'9--&'   	**;;v&::),46  ww||J	U4E(EF77>>$+d233%&
BGGOOD)D94		'""$%		r&   rh   r   )intr   r)   r   rr   r   r   rX   rN   rY   r[   r'   r7   rx   LITERAL_FILESPECrZ   )ra   rd   rf   r   r|   r}   extracted_objidsrK   r?   r/   rE   s     `       @r#   extractembeddedr      s     $sCx. T  2 
eT	b2&+5IID*jj'1"3--+;;$((/U( + 	 
  
	 s   AC)C)%C)?!C))
C7c	                 L   [        US5      n	[        U	5      n
[        X5      nU(       a$  U H  nUR                  U5      n[	        XUS9  M      U(       au  [        [        R                  " U5      5       HR  u  pX;   d  M  U(       a*  UR                   H  n[        U5      n[	        XUS9  M     M=  [	        XR                  5        MT     U(       a  [        XXh5        U(       d  U(       d  U(       d  [        XU5        U	R                  5         US;  a  U R                  S5        g )Nrh   rW   )r3   r4   r2   )rr   r   r   r[   r:   rs   r   rt   contentsr   r>   r_   rU   r{   r6   )r`   ra   rb   rc   rd   re   r0   rf   rF   r|   r}   rE   r?   r/   r~   r   s                   r#   dumppdfr      s     
eT	Br]F
f
'CE**U#CEe,  '(<(<S(ABNV #}}*3/%8  - E::. C E:WwU!34HHJ%%D
r&   c                  B   [        [        SS9n U R                  S[        S SSS9  U R                  SSS	S
R	                  [
        R                  5      S9  U R                  SSSSSS9  U R                  5       nUR                  SSSSSS9  UR                  SS[        SS9  U R                  SSS9nUR                  S[        S SSS9  UR                  SS[        S S9  UR                  S!S"[        S#S9  UR                  S$S%SSS&S9  UR                  S'SS(S)9  UR                  S*S+[        S,S-S.9  U R                  S/S0S9nUR                  S1S2[        S3S4S.9  UR                  5       nUR                  S5S6SSS7S9  UR                  S8S9SSS:S9  UR                  S;S<SSS=S9  U $ )>NT)descriptionadd_helpfiles+zOne or more paths to PDF files.)typedefaultnargshelpz	--versionz-vversionzpdfminer.six v{})r   r   z--debugz-dF
store_truezUse debug logging level.)r   r   r   z--extract-tocz-TzExtract structure of outlinez--extract-embeddedz-EzExtract embedded files)r   r   ParserzUsed during PDF parsing)r   z--page-numbersz0A space-seperated list of page numbers to parse.z	--pagenosz-pzA comma-separated list of page numbers to parse. Included for legacy applications, use --page-numbers for more idiomatic argument entry.z	--objectsz-iz1Comma separated list of object numbers to extractz--allz-az3If the structure of all objects should be extractedz--show-fallback-xrefzAdditionally show the fallback xref. Use this if the PDF has zero or only invalid xref's. This setting is ignored if --extract-toc or --extract-embedded is used.)r   r   z
--passwordz-P z,The password to use for decrypting PDF file.)r   r   r   OutputzUsed during output generation.z	--outfilez-o-zJPath to file where output is written. Or "-" (default) to write to stdout.z--raw-streamz-rz%Write stream objects without encodingz--binary-streamz-bz)Write stream objects with binary encodingz--text-streamz-tz"Write stream objects as plain text)
r   __doc__add_argumentr)   rz   pdfminer__version__add_mutually_exclusive_groupadd_argument_groupr   )r}   procedure_parserparse_paramsoutput_paramscodec_parsers        r#   create_parserr   !  s   $?F
.   "))(*>*>?	   '   ::<!!+ "  !!d3K "  ,,7 - L ?   	   @	   B   7   ;   --> . M    !==?L4   8   1   Mr&   argvc                    [        5       nUR                  U S9nUR                  (       a2  [        R                  " 5       R                  [        R                  5        UR                  S:X  a  [        R                  nO[        UR                  S5      nUR                  (       a5  UR                  R                  S5       Vs/ s H  n[        U5      PM     nnO/ nUR                  (       a  UR                   Vs1 s H  oDS-
  iM	     nnOSUR                  (       a8  UR                  R                  S5       Vs1 s H  n[        U5      S-
  iM     nnO
[!        5       nUR"                  nUR$                  (       a  SnO*UR&                  (       a  SnOUR(                  (       a  SnOS nUR*                   H|  n	UR,                  (       a  [/        UU	UUUUR0                  US S	9  M0  UR2                  (       a  [5        XUR2                  S
9  MW  [7        UU	UUUUR0                  US UR8                  S9	  M~     UR;                  5         g s  snf s  snf s  snf )N)argsr   w,ri   r3   r4   r5   )rd   re   r0   rf   )rd   rf   )rd   re   r0   rf   rF   )r   
parse_argsdebuglogging	getLoggersetLevelDEBUGoutfilesysstdoutrr   objectssplitr   page_numbersrc   rX   rd   
raw_streambinary_streamtext_streamr   extract_tocr   rP   extract_embeddedr   r   rF   r{   )
r   r}   r   r`   xrb   rc   rd   r0   ra   s
             r#   mainr     s   _F$'Dzz$$W]]3||s

T\\3'||"&,,"4"4S"9:"9Q#a&"9:"&"3"34"3Qq5"34	'+||'9'9#'>?'>!3q6A:'>?%}}H$						!	 ""EAVAVW!#'#:#:
 6 
KKMc ;
 5?s   I;I:I__main__rI   )F)NF)r   FNN)r   FNNF)>r   r   os.pathr   rer   typingr   r   r   r   r   r   r	   r
   r   argparser   r   pdfminer.pdfdocumentr   r   r   pdfminer.pdfpager   pdfminer.pdfparserr   pdfminer.pdftypesr   r   r   r   r   r   pdfminer.psparserr   r   r   pdfminer.utilsr   basicConfigr   __name__rQ   compiler*   r)   r(   r-   rv   r:   boolrU   r_   r   r   r   r   r   r   r   r    r&   r#   <module>r      s   )   	 
 V V V #  L L $ ( > J J 7 7 #    			8	$
**>
?@eCJ @C @> >f >Xc] >d >D ?D	!7;	,  $			 C= 	
 
@  $::: : s^	:
 : : C=: : 
:z z? >* (3 (# (3 (4 (`  $$""" SM" s^	"
 " " C=" " " 
"Js~ sl>xS	" >d >B zF r&   