
    )i                        S SK r S SKrS SKrS SKJrJrJrJrJrJ	r	J
r
JrJrJrJr  S SKJr  SSKJr  SSKJr  SSKJrJrJr  SSKJr  SS	KJr  SS
KJr  SSKJr  SSKJr  SSKJr  SSKJr  SSKJ r   SSKJ!r!  SSKJ"r"  SSKJ#r#  SSKJ$r$  SSKJ%r%  SSKJ&r&  SSKJ'r'  SSKJ(r(  SSK)J*r*  SSK+J,r,  SSK+J-r-  SSK.J/r/J0r0  SSK1J2r2  SSK3J4r4  SSKJ5r5J6r6J7r7J8r8J9r9J:r:  SSKJ;r;  SS KJ<r<  SS!KJ=r=  SS"KJ>r>  \R~                  " \@5      rA " S# S$\*5      rB " S% S&\B5      rC\" S'\
\\55      rD " S( S)\B\\D   5      rE " S* S+\E\5   5      rF " S, S-\E\5   5      rG " S. S/\E\5   5      rH " S0 S1\E\5   5      rIg)2    N)BinaryIODictGenericListOptionalSequenceTextIOTupleTypeVarUnioncast)PDFColorSpace   )utils)ImageWriter)LAParamsLTComponentTextGroupElement)LTAnno)LTChar)LTContainer)LTCurve)LTFigure)LTImageLTItem)LTLayoutContainer)LTLine)LTPage)LTRect)LTText)	LTTextBox)LTTextBoxVertical)LTTextGroup)
LTTextLine)PDFTextDevice)PDFFont)PDFUnicodeNotDefined)PDFGraphicStatePDFResourceManager)PDFPage)	PDFStream)AnyIOPointMatrixRectPathSegmentmake_compat_str)apply_matrix_pt)bbox2str)enc)mult_matrixc                   :   \ rS rSr% \\S'   \\S'     S'S\S\S\	\
   SS4S	 jjrS
\S\SS4S jrS
\SS4S jrS\S\S\SS4S jrS\SS4S jrS\S\SS4S jrS\S\S\S\S\\   SS4S jrS\S\S\S\S\S\S \S!\S\4S" jrS\S\S\4S# jrS$\SS4S% jr S&r!g)(PDFLayoutAnalyzer5   cur_itemctmNrsrcmgrpagenolaparamsreturnc                 V    [         R                  " X5        X l        X0l        / U l        g N)r&   __init__r=   r>   _stackselfr<   r=   r>   s       R/var/www/html/land-doc-ocr/venv/lib/python3.13/site-packages/pdfminer/converter.pyrB   PDFLayoutAnalyzer.__init__9   s#     	t- /1    pagec                     UR                   u  p4pV[        X#U45      u  p4[        X%U45      u  pVSS[        X5-
  5      [        XF-
  5      4n[        U R                  U5      U l        g )Nr   )mediaboxr3   absr   r=   r:   )rE   rI   r;   x0y0x1y1rK   s           rF   
begin_pagePDFLayoutAnalyzer.begin_pageD   s\    =="3R1"3R1q#bg,BG5t{{H5rH   c                    U R                   (       a#   [        [        U R                   5      5      5       e[        U R                  [
        5      (       d#   [        [        U R                  5      5      5       eU R                  b%  U R                  R                  U R                  5        U =R                  S-  sl	        U R                  U R                  5        g )Nr   )rC   strlen
isinstancer:   r   typer>   analyzer=   receive_layout)rE   rI   s     rF   end_pagePDFLayoutAnalyzer.end_pageK   s    ;;5C$4 55$--00J#d4==6I2JJ0==$MM!!$--0qDMM*rH   namebboxmatrixc                     U R                   R                  U R                  5        [        X[	        X0R
                  5      5      U l        g rA   )rC   appendr:   r   r6   r;   )rE   r\   r]   r^   s       rF   begin_figurePDFLayoutAnalyzer.begin_figureS   s/    4==) [-JKrH   _c                    U R                   n[        U R                   [        5      (       d#   [        [	        U R                   5      5      5       eU R
                  R                  5       U l         U R                   R                  U5        g rA   )r:   rV   r   rT   rW   rC   popadd)rE   rc   figs      rF   
end_figurePDFLayoutAnalyzer.end_figureW   sY    mm$--22LCT]]8K4LL2)#rH   streamc                    [        U R                  [        5      (       d#   [        [	        U R                  5      5      5       e[        UUU R                  R                  U R                  R                  U R                  R                  U R                  R                  45      nU R                  R                  U5        g rA   )rV   r:   r   rT   rW   r   rM   rN   rO   rP   rf   )rE   r\   rj   items       rF   render_imagePDFLayoutAnalyzer.render_image]   s    $--22LCT]]8K4LL2]]t}}//1A1A4==CSCST

 	$rH   gstatestrokefillevenoddpathc           
         SR                  S U 5       5      nUSS S:w  a  gUR                  S5      S:  aT  [        R                  " SU5       H8  nXWR	                  S5      UR                  S5       nU R                  XX4U5        M:     gU V	s/ s H'  n	[        [        U	S   S:w  a  U	S	S OUS   S	S 5      PM)     n
n	U
 Vs/ s H  n[        U R                  U5      PM     nnUS
;   aR  [        UR                  US   US   UUUUR                  UR                  5      nU R                  R!                  U5        gUS;   Ga&  Uu  u  pu  nnu  nnu  nnnUS   US   :H  nUU:H  =(       a    UU:H  =(       a    UU:H  =(       a    UU:H  =(       d)    UU:H  =(       a    UU:H  =(       a    UU:H  =(       a    UU:H  nU(       a]  U(       aV  [#        UR                  / US   QUS   Q7UUUUR                  UR                  5      nU R                  R!                  U5        g[%        UR                  UUUUUR                  UR                  5      nU R                  R!                  U5        g[%        UR                  UUUUUR                  UR                  5      nU R                  R!                  U5        gs  sn	f s  snf )z@Paint paths described in section 4.4 of the PDF reference manual c              3   *   #    U  H	  oS    v   M     g7f)r   N ).0xs     rF   	<genexpr>/PDFLayoutAnalyzer.paint_path.<locals>.<genexpr>o   s     +d!ds   Nr   mzm[^m]+r   h>   mlmlh>   mlllhmllll      )joincountrefinditerstartend
paint_pathr   r.   r3   r;   r   	linewidthscolorncolorr:   rf   r    r   )rE   ro   rp   rq   rr   rs   shaper|   subpathpraw_ptsptptslinerM   rN   rO   rP   x2y2x3y3rc   is_closed_loophas_square_coordinatesrectcurves                              rF   r   PDFLayoutAnalyzer.paint_pathf   s    +d++!9 [[![[E2wwqzAEE!H5wG 3 OSNRUadckAbcFtAwrs|Dd   <CC7R?488R07CC%
 $$FFMMMM	 !!$',,<?9(2rHRhr2!$Q3q6!1"HCrCbBhC28*GBhE28EbER2X ' "&<!((*#a&*3q6*D MM%%d+#((E MM%%e,  $$MMMM !!%(} Ds   .J6 J;fontfontsizescalingrisecidncsgraphicstatec	                     UR                  U5      n	[        U	[        5      (       d   [        [        U	5      5      5       e UR                  U5      n
UR                  U5      n[        UUUUUU	U
UUU5
      nU R                  R                  U5        UR                  $ ! [         a    U R                  X&5      n	 Nzf = frA   )	to_unichrrV   rT   rW   r(   handle_undefined_char
char_width	char_dispr   r:   rf   adv)rE   r^   r   r   r   r   r   r   r   text	textwidthtextdisprl   s                rF   render_charPDFLayoutAnalyzer.render_char   s    	9>>#&DdC((9#d4j/9( OOC(	>>#&
 	$xx# $ 	9--d8D	9s   ?B B=<B=c                 8    [         R                  SX5        SU-  $ )Nzundefined: %r, %rz(cid:%d))logdebug)rE   r   r   s      rF   r   'PDFLayoutAnalyzer.handle_undefined_char   s    		%t1CrH   ltpagec                     g rA   rw   rE   r   s     rF   rY    PDFLayoutAnalyzer.receive_layout   s    rH   )rC   r:   r>   r=   r   N)"__name__
__module____qualname____firstlineno__r   __annotations__r/   r*   intr   r   rB   r+   rQ   rZ   rT   r0   ra   rh   r,   rm   r)   boolr   r1   r   r'   floatr   r   r   r   rY   __static_attributes__rw   rH   rF   r8   r8   5   s   	K
 '+		2#	2 	2 8$		2
 
	26w 6V 6 6+W + +L LD L& LT LC D    i  D  `)`) `) 	`)
 `) {#`) 
`)D  	
     & 
B '      V  rH   r8   c            	       \    \ rS rSr  SS\S\S\\   SS4S jjrS\	SS4S	 jr
S\	4S
 jrSrg)PDFPageAggregator   Nr<   r=   r>   r?   c                 :    [         R                  XX#S9  S U l        g N)r=   r>   )r8   rB   resultrD   s       rF   rB   PDFPageAggregator.__init__   s     	""4"S(,rH   r   c                     Xl         g rA   r   r   s     rF   rY    PDFPageAggregator.receive_layout   s    rH   c                 8    U R                   c   eU R                   $ rA   r   rE   s    rF   
get_resultPDFPageAggregator.get_result   s    {{&&&{{rH   r   r   )r   r   r   r   r*   r   r   r   rB   r   rY   r   r   rw   rH   rF   r   r      sY     '+	-#- - 8$	-
 
-V  F rH   r   IOTypec                   b    \ rS rSr   SS\S\S\S\S\\	   SS4S	 jjr
\S\S\4S
 j5       rSrg)PDFConverteri  Nr<   outfpcodecr=   r>   r?   c                     [         R                  XXES9  X l        X0l        U R	                  U R                  5      U l        g r   )r8   rB   r   r   _is_binary_streamoutfp_binary)rE   r<   r   r   r=   r>   s         rF   rB   PDFConverter.__init__  s8     	""4"S"

 224::>rH   c                    S[        U SS5      ;   a  g[        U S5      (       a  g[        U [        R                  5      (       a  g[        U [        R
                  5      (       a  g[        U [        R                  5      (       a  gg)z"Test if an stream is binary or notbmoderu   TF)getattrhasattrrV   ioBytesIOStringIO
TextIOBase)r   s    rF   r   PDFConverter._is_binary_stream  sg     '%,,UF##rzz**r{{++r}}--rH   )r   r   r   )utf-8r   N)r   r   r   r   r*   r   rT   r   r   r   rB   staticmethodr-   r   r   r   rw   rH   rF   r   r     ss    
 '+?#? ? 	?
 ? 8$? 
?  4  rH   r   c                      ^  \ rS rSr     SS\S\S\S\S\\	   S\
S	\\   S
S4U 4S jjjrS\S
S4S jrS\S
S4S jrS\S\S
S4S jrS\S\
S\
S\
S\\   S
S4S jrSrU =r$ )TextConverteri'  Nr<   r   r   r=   r>   
showpagenoimagewriterr?   c                 <   > [         TU ]  XX4US9  X`l        Xpl        g )Nr   r=   r>   )superrB   r   r   )	rE   r<   r   r   r=   r>   r   r   	__class__s	           rF   rB   TextConverter.__init__(  s$     	uhW$&rH   r   c                 *   [         R                  " XR                  S5      nU R                  (       a8  [	        [
        U R                  5      R                  UR                  5       5        g [	        [        U R                  5      R                  U5        g )Nignore)
r   compatible_encode_methodr   r   r   r   r   writeencoder	   rE   r   s     rF   
write_textTextConverter.write_text6  sY    --dJJI4::&,,T[[];$**40rH   r   c                    ^ ^ S[         SS 4UU 4S jjmT R                  (       a  T R                  SUR                  -  5        T" U5        T R                  S5        g )Nrl   r?   c                   > [        U [        5      (       a  U  H  nT" U5        M     O4[        U [        5      (       a  TR                  U R	                  5       5        [        U [
        5      (       a  TR                  S5        g [        U [        5      (       a*  TR                  b  TR                  R                  U 5        g g g )N
)	rV   r   r!   r   get_textr"   r   r   export_image)rl   childrenderrE   s     rF   r   ,TextConverter.receive_layout.<locals>.render>  s    $,,!E5M "D&))0$	**%D'**##/$$11$7 0 +rH   zPage %s
)r   r   r   pageidrE   r   r   s   ` @rF   rY   TextConverter.receive_layout=  sL    
	8 
	8D 
	8 
	8 ??OOK&--78vrH   r\   rj   c                 L    U R                   c  g [        R                  XU5        g rA   )r   r   rm   )rE   r\   rj   s      rF   rm   TextConverter.render_imageR  s$    #!!$f5rH   ro   rp   rq   rr   rs   c                     g rA   rw   )rE   ro   rp   rq   rr   rs   s         rF   r   TextConverter.paint_pathX  s     	rH   )r   r   )r   r   NFN)r   r   r   r   r*   r-   rT   r   r   r   r   r   rB   r   r   rY   r,   rm   r)   r   r1   r   r   __classcell__)r   s   @rF   r   r   '  s    
 '+ -1'#' ' 	'
 ' 8$' ' k*' 
' '1s 1t 1V  * i D   	
  {# 
 rH   r   c                        \ rS rSrSSSSSSS.rS	SS
.r            S8S\S\S\S\	S\
\   S\S\S\S\S\	S\
\   S\	S\
\\\4      S\
\\\4      SS4S jjrS\SS4S jrS9S jrS9S jrS\SS4S  jrS!\S"\	S#\S$\S%\S&\SS4S' jrS!\S"\	S(\SS4S) jrS(\S"\	S#\S$\S%\S&\SS4S* jrS!\S\S#\S$\S+\SS4S, jr S:S!\S"\	S#\S$\S%\S&\S-\SS4S. jjrS!\SS4S/ jrS\S0\S1\SS4S2 jrS9S3 jrS4\SS4S5 jr S9S6 jr!S7r"g);HTMLConverteric  yellowmagentacyanredblackgray)figuretextlinetextbox	textgroupr   rI   blue)r  charNr<   r   r   r=   r>   scale	fontscale
layoutmoder   
pagemarginr   r   rect_colorstext_colorsr?   c           	          [         R                  XX#XES9  U R                  U R                  (       + :X  a  [	        S5      eUc  SS0nUc  SSS.nX`l        Xpl        Xl        Xl        Xl	        Xl
        Xl        Xl        U(       aJ  U R                  R                  U R                  5        U R                  R                  U R                  5        U R                  U l        S U l        / U l        U R'                  5         g )Nr   )Codec is required for a binary I/O outputr  r  r  )r   rI   )r   rB   r   r   
ValueErrorr  r  r  r   r  r   r  r  updateRECT_COLORSTEXT_COLORS_yoffset_font
_fontstackwrite_header)rE   r<   r   r   r=   r>   r  r  r  r   r  r   r   r  r  s                  rF   rB   HTMLConverter.__init__r  s    " 	5f 	 	

 TZZ0HII!7+K$+V<K
"$$$&&&##D$4$45##D$4$45#26
=?rH   r   c                     U R                   (       aC  [        [        U R                  5      R	                  UR                  U R                   5      5        g [        [        U R                  5      R	                  U5        g rA   r   r   r   r   r   r   r	   r   s     rF   r   HTMLConverter.write  R    ::4::&,,T[[-DE 	 $**40rH   c                     U R                  S5        U R                  (       a  SU R                  -  nOSnU R                  U5        U R                  S5        g )Nz<html><head>
zA<meta http-equiv="Content-Type" content="text/html; charset=%s">
z5<meta http-equiv="Content-Type" content="text/html">
z</head><body>
)r   r   )rE   ss     rF   r%  HTMLConverter.write_header  sO    

#$::!#'::. 
 IA

1

$%rH   c                     [        SU R                  5       Vs/ s H  nSR                  X5      PM     nnSSR                  U5      -  nU R	                  U5        U R	                  S5        g s  snf )Nr   z<a href="#{}">{}</a>z8<div style="position:absolute; top:0px;">Page: %s</div>
z, z</body></html>
)ranger=   formatr   r   )rE   i
page_linksr,  s       rF   write_footerHTMLConverter.write_footer  st    9>q$++9N
9NA"))!/9N 	 
 H$))K
 
 	

1

%&
s   A-c                 8    U R                  [        U5      5        g rA   )r   r5   r   s     rF   r   HTMLConverter.write_text  s    

3t9rH   colorborderwidthry   ywr}   c                     U R                   R                  U5      nUbZ  SUUX0R                  -  U R                  U-
  U R                  -  XPR                  -  X`R                  -  4-  nU R	                  U5        g )Nzn<span style="position:absolute; border: %s %dpx solid; left:%dpx; top:%dpx; width:%dpx; height:%dpx;"></span>
)r  getr  r"  r   )	rE   r7  r8  ry   r9  r:  r}   color2r,  s	            rF   
place_rectHTMLConverter.place_rect  s}     !!%%e,K 

N]]Q&$**4

N

N	  JJqMrH   rl   c                 ~    U R                  XUR                  UR                  UR                  UR                  5        g rA   )r>  rM   rP   widthheight)rE   r7  r8  rl   s       rF   place_borderHTMLConverter.place_border  s)    DGGTWWdjj$++VrH   c                    U R                   b~  U R                   R                  U5      nS[        U5      UX0R                  -  U R                  U-
  U R                  -  XPR                  -  X`R                  -  4-  nU R                  U5        g )Nzd<img src="%s" border="%d" style="position:absolute; left:%dpx; top:%dpx;" width="%d" height="%d" />
)r   r   r5   r  r"  r   )	rE   rl   r8  ry   r9  r:  r}   r\   r,  s	            rF   place_imageHTMLConverter.place_image  s     '##006DD I

N]]Q&$**4

N

N	  JJqMrH   sizec                 6   U R                   R                  U5      nUb{  SUX0R                  -  U R                  U-
  U R                  -  XPR                  -  U R                  -  4-  nU R                  U5        U R                  U5        U R                  S5        g )NzP<span style="position:absolute; color:%s; left:%dpx; top:%dpx; font-size:%dpx;"></span>
)r  r<  r  r"  r  r   r   )rE   r7  r   ry   r9  rH  r=  r,  s           rF   
place_textHTMLConverter.place_text  s     !!%%e,. 

N]]Q&$**4::%6	  JJqMOOD!JJ{#rH   writing_modec           	         U R                   R                  U R                  5        S U l        SUUUX0R                  -  U R                  U-
  U R                  -  XPR                  -  X`R                  -  4-  nU R                  U5        g )Nzv<div style="position:absolute; border: %s %dpx solid; writing-mode:%s; left:%dpx; top:%dpx; width:%dpx; height:%dpx;">)r$  r`   r#  r  r"  r   )	rE   r7  r8  ry   r9  r:  r}   rM  r,  s	            rF   	begin_divHTMLConverter.begin_div  s     	tzz*
 JJ"djj0JJJJ 	
 	

1rH   c                     U R                   b  U R                  S5        U R                  R                  5       U l         U R                  S5        g )N</span>z</div>)r#  r   r$  re   )rE   r7  s     rF   end_divHTMLConverter.end_div!  s;    ::!JJy!__((*


8rH   fontnamer   c                    X#4nX@R                   :w  ag  U R                   b  U R                  S5        UR                  S5      S   nU R                  SXSU R                  -  U R                  -  4-  5        X@l         U R                  U5        g )NrR  +z.<span style="font-family: %s; font-size:%dpx">)r#  r   splitr  r  r   )rE   r   rU  r   r   fontname_without_subset_tags         rF   put_textHTMLConverter.put_text(  s    #::zz%

9%*2..*=b*A'JJ@.4::0E0VWX JrH   c                 &    U R                  S5        g )Nz<br>r   r   s    rF   put_newlineHTMLConverter.put_newline7  s    

6rH   r   c                    ^ ^^ S[         [        [        4   SS 4U U4S jjmS[        SS 4UU U4S jjmT" U5        T =R                  T R
                  -  sl        g )Nrl   r?   c                 x   > [        U [        5      (       a$  TR                  SSU 5        U  H  nT" U5        M     g )Nr  r   )rV   r$   rC  rl   r   rE   
show_groups     rF   rd  0HTMLConverter.receive_layout.<locals>.show_group<  s7    $,,!!+q$7!Eu% "rH   c           
        > [        U [        5      (       a  T=R                  U R                  -  sl        TR	                  SSU 5        TR
                  (       am  TR                  STR                  U R                  -
  TR                  -  -  5        TR                  SR                  U R                  U R                  5      5        U  H  nT" U5        M     U R                  b  U R                   H  nT" U5        M     g [        U [        5      (       a  TR	                  SSU 5        g [        U [        5      (       aa  TR                  SSU R                  U R                  U R                  U R                   5        U  H  nT" U5        M     TR#                  S5        g [        U [$        5      (       a?  TR'                  U SU R                  U R                  U R                  U R                   5        g TR(                  S:X  Ga  [        U [*        5      (       a%  TR	                  SSU 5        U  H  nT" U5        M     g [        U [,        5      (       ad  TR	                  S	SU 5        TR/                  S	[1        U R2                  S-   5      U R                  U R                  S
5        U  H  nT" U5        M     g [        U [4        5      (       aT  TR	                  SSU 5        TR/                  SU R7                  5       U R                  U R                  U R8                  5        g [        U [*        5      (       a2  U  H  nT" U5        M     TR(                  S:w  a  TR;                  5         g [        U [,        5      (       ap  TR                  S	SU R                  U R                  U R                  U R                   U R=                  5       5        U  H  nT" U5        M     TR#                  S	5        g [        U [4        5      (       a@  [?        U R@                  5      nTRC                  U R7                  5       X0R8                  5        g [        U [D        5      (       a  TRG                  U R7                  5       5        g )NrI   r   z*<div style="position:absolute; top:%dpx;">z<a name="{}">Page {}</a></div>
r   r  exactr  r     r  loose)$rV   r   r"  rP   rC  r   r   r  r0  r   groupsr   r   rO  rM   rA  rB  rS  r   rF  r  r%   r"   rK  rT   indexr   r   rH  r_  get_writing_moder2   rU  r[  r!   r   )rl   r   grouprU  r   rE   rd  s       rF   r   ,HTMLConverter.receive_layout.<locals>.renderC  s   $''(!!&!T2??JJD MMDGG3tzzAC JJ:AA KK
 "E5M ";;*!%"5) "-j g D'**!!'1d3d c D(++xDGGTWWdjj$++V!E5M "X&Z Y D'**  q$''477DJJTV S ??g-!$
33))*a>%)E"5M &*L I $D)44)))Q=%s4::>':DGGTWWb &*E"5M &*> ; $D&11))&!T:"DMMOTWWdggtyy6 / "$
33%)E"5M &*??g5 ,,.& % $D)44% GG GG JJ KK 113 &*E"5M &*Y/  $D&11#24==#AdmmoxK  $D&118rH   )r   r$   r   r   r"  r  rE   r   r   rd  s   ` @@rF   rY   HTMLConverter.receive_layout;  sa    	U;0@#@A 	d 	 	G	 G	D G	 G	R 	v(rH   c                 $    U R                  5         g rA   r3  r   s    rF   closeHTMLConverter.close      rH   )r#  r$  r"  r  r   r  r  r  r  r   r  )r   r   Nr   g      ?normalT2   Nr   NNr?   N)False)#r   r   r   r   r   r!  r*   r-   rT   r   r   r   r   r   r   r   rB   r   r%  r3  r   r>  r   rC  r   rF  rK  rO  rS  r[  r_  r   rY   rs  r   rw   rH   rF   r	  r	  c  s   K K '+"-10404-#- - 	-
 - 8$- - - - - - k*- - d38n-- d38n--  
!-^# $ 	s t '*/49>CHMR	(# C { t *-27<AFKPU	( #(-27?D	8 $  	
     
:S T S C 5 T SV S SjrH   r	  c                       \ rS rSr\R
                  " S5      r     SS\S\S\	S\
S\\   S	\\   S
\SS4S jjrS\	SS4S jrSS jrSS jrS\	SS4S jrS\SS4S jrSS jrSrg)XMLConverteri  z[ ---]Nr<   r   r   r=   r>   r   stripcontrolr?   c           	          [         R                  XX#XES9  U R                  U R                  (       + :X  a  [	        S5      eX`l        Xpl        U R                  5         g )Nr   r  )r   rB   r   r   r  r   r|  r%  )rE   r<   r   r   r=   r>   r   r|  s           rF   rB   XMLConverter.__init__  s[     	5f 	 	

 TZZ0HII&(rH   r   c                     U R                   (       aC  [        [        U R                  5      R	                  UR                  U R                   5      5        g [        [        U R                  5      R	                  U5        g rA   r(  r   s     rF   r   XMLConverter.write  r*  rH   c                     U R                   (       a  U R                  SU R                   -  5        OU R                  S5        U R                  S5        g )Nz%<?xml version="1.0" encoding="%s" ?>
z<?xml version="1.0" ?>
z<pages>
r   r   r   s    rF   r%  XMLConverter.write_header  s;    ::JJ?$**LMJJ12

;rH   c                 &    U R                  S5        g )Nz	</pages>
r^  r   s    rF   r3  XMLConverter.write_footer  s    

< rH   c                     U R                   (       a  U R                  R                  SU5      nU R                  [	        U5      5        g Nru   )r|  CONTROLsubr   r5   r   s     rF   r   XMLConverter.write_text  s4    <<##B-D

3t9rH   r   c                 `   ^ ^^ S[         SS 4U U4S jjmS[         SS 4UU U4S jjmT" U5        g )Nrl   r?   c                 T  > [        U [        5      (       a4  TR                  SU R                  [	        U R
                  5      4-  5        g [        U [        5      (       aI  TR                  S[	        U R
                  5      -  5        U  H  nT" U5        M     TR                  S5        g )Nz<textbox id="%d" bbox="%s" />
z<textgroup bbox="%s">
z</textgroup>
)rV   r"   r   rk  r4   r]   r$   rc  s     rF   rd  /XMLConverter.receive_layout.<locals>.show_group  s    $	**

5zz8DII#678  D+..

4x		7JJK!Eu% "

+,rH   c                 X	  > [        U [        5      (       a  SU R                  [        U R                  5      U R
                  4-  nTR                  U5        U  H  nT" U5        M     U R                  b=  TR                  S5        U R                   H  nT" U5        M     TR                  S5        TR                  S5        g [        U [        5      (       a6  SU R                  [        U R                  5      4-  nTR                  U5        g [        U [        5      (       a6  SU R                  [        U R                  5      4-  nTR                  U5        g [        U [        5      (       aE  SU R                  [        U R                  5      U R                  5       4-  nTR                  U5        g [        U [        5      (       a\  SU R                  < S	[        U R                  5      < S
3nTR                  U5        U  H  nT" U5        M     TR                  S5        g [        U [        5      (       aJ  TR                  S[        U R                  5      -  5        U  H  nT" U5        M     TR                  S5        g [        U [         5      (       ar  Sn[        U ["        5      (       a  SnSU R$                  [        U R                  5      U4-  nTR                  U5        U  H  nT" U5        M     TR                  S5        g [        U [&        5      (       a  S[)        U R*                  5      [        U R                  5      U R,                  R                  U R.                  R0                  U R2                  4-  nTR                  U5        TR5                  U R7                  5       5        TR                  S5        g [        U [8        5      (       a#  TR                  SU R7                  5       -  5        g [        U [:        5      (       a  TR<                  bP  TR<                  R?                  U 5      nTR                  S[)        U5      U R@                  U RB                  4-  5        g TR                  SU R@                  U RB                  4-  5         g  [E        SU 45      5       e)Nz%<page id="%s" bbox="%s" rotate="%d">
z	<layout>
z
</layout>
z</page>
z"<line linewidth="%d" bbox="%s" />
z"<rect linewidth="%d" bbox="%s" />
z+<curve linewidth="%d" bbox="%s" pts="%s"/>
z<figure name="z" bbox="z">
z
</figure>
z<textline bbox="%s">
z</textline>
ru   z wmode="vertical"z<textbox id="%d" bbox="%s"%s>
z</textbox>
zD<text font="%s" bbox="%s" colourspace="%s" ncolour="%s" size="%.3f">z</text>
z<text>%s</text>
z*<image src="%s" width="%d" height="%d" />
z!<image width="%d" height="%d" />
	Unhandled)#rV   r   r   r4   r]   rotater   rj  r   r   r    r   get_ptsr   r\   r%   r"   r#   rk  r   r5   rU  r   r   r   rH  r   r   r!   r   r   r   rA  rB  rT   )	rl   r,  r   rm  wmoder\   r   rE   rd  s	         rF   r   +XMLConverter.receive_layout.<locals>.render  s   $''<KKTYY'KK@ 
 

1!E5M ";;*JJ|,!%"5) "-JJ}-

;'T S D&))9NNTYY'=  

1H G D&))9NNTYY'=  

1| { D'**BNNTYY'LLNF 
 

1n m D(++8<		8DIICVW

1!E5M "

=)b a D*--

3htyy6IIJ!E5M "

?+X W D),,d$566/E5JJTYY'9 
 

1!E5M "

>*> = D&))0 DMM* +))00		  

10

;'   D&))

.@A  D'**##/++88>DJJEt9djj$++>?  JJ<

DKK?XX
  7c;"566urH   r   ro  s   ` @@rF   rY   XMLConverter.receive_layout  sD    	V 	 	 	Z	 Z	D Z	 Z	x 	vrH   c                 $    U R                  5         g rA   rr  r   s    rF   rs  XMLConverter.close6  ru  rH   )r   r|  )r   r   NNFrx  )r   r   r   r   r   compiler  r*   r-   rT   r   r   r   r   r   rB   r   r%  r3  r   r   rY   rs  r   rw   rH   rF   r{  r{    s    jj89G '+-1"#  	
  8$ k*  
.# $ s t kV k kZrH   r{  c                       \ rS rSrSr\R                  " S5      r    SS\S\	S\
S\S	\\   S
\4S jjrS\S\
4S jrS\
SS4S jrSS jrSS jrS\
SS4S jrSS jrS\SS4S jrSS jrSrg)HOCRConverteri;  zKExtract an hOCR representation from explicit text information within a PDF.z[\x00-\x08\x0b-\x0c\x0e-\x1f]Nr<   r   r   r=   r>   r|  c           	      h    [         R                  XX#XES9  X`l        SU l        U R	                  5         g )Nr   F)r   rB   r|  within_charsr%  )rE   r<   r   r   r=   r>   r|  s          rF   rB   HOCRConverter.__init__N  s:     	5f 	 	
 )!rH   r]   r?   c                     Uu  p#pE[        U5      n[        U R                  S   U-
  5      n[        U5      n[        U R                  S   U-
  5      n	SU SU SU SU	 3$ )N   zbbox  )r   	page_bbox)
rE   r]   in_x0in_y0in_x1in_y1out_x0out_y0out_x1out_y1s
             rF   	bbox_reprHOCRConverter.bbox_repr^  sm    '+$uUT^^A&./UT^^A&./vhaxq&::rH   r   c                    U R                   (       aE  UR                  U R                   5      n[        [        U R                  5      R                  U5        g [        [        U R                  5      R                  U5        g rA   )r   r   r   r   r   r   r	   )rE   r   encoded_texts      rF   r   HOCRConverter.writeg  sM    ::;;tzz2L4::&,,\:$**40rH   c                 t   U R                   (       a  U R                  SU R                   -  5        OU R                  S5        U R                  S5        U R                  S5        U R                  S5        U R                  S5        U R                  S5        U R                  S5        U R                  S	5        g )
NzQ<html xmlns='http://www.w3.org/1999/xhtml' xml:lang='en' lang='en' charset='%s'>
zD<html xmlns='http://www.w3.org/1999/xhtml' xml:lang='en' lang='en'>
z<head>
z<title></title>
zE<meta http-equiv='Content-Type' content='text/html;charset=utf-8' />
zA<meta name='ocr-system' content='pdfminer.six HOCR Converter' />
zR  <meta name='ocr-capabilities' content='ocr_page ocr_block ocr_line ocrx_word'/>
z</head>
z<body>
r  r   s    rF   r%  HOCRConverter.write_headern  s    ::JJ:<@JJG
 JJ- 	

:

&'

W	
 	

S	
 	

C	
 	

;

:rH   c                 H    U R                  S5        U R                  S5        g )Nz0<!-- comment in the following line to debug -->
zD<!--script src='https://unpkg.com/hocrjs'></script--></body></html>
r^  r   s    rF   r3  HOCRConverter.write_footer  s    

FG

V	
rH   c                     U R                   (       a  U R                  R                  SU5      nU R                  U5        g r  )r|  r  r  r   r   s     rF   r   HOCRConverter.write_text  s-    <<##B-D

4rH   c                 ~   [        U R                  5      S:  a  SnSU R                  ;   a  SnSU R                  ;   a  US-  nU R                  SU R                  U R                  UU R                  U R                  5      U R                  U R                  U R                  R                  5       4-  5        SU l        g )	Nr   ru   Italiczfont-style: italic; Boldzfont-weight: bold; zg<span style='font:"%s"; font-size:%d; %s' class='ocrx_word' title='%s; x_font %s; x_fsize %d'>%s</span>F)	rU   working_textworking_fontr   working_sizer  working_bboxstripr  )rE   bold_and_italic_styless     rF   
write_wordHOCRConverter.write_word  s    t  !A%%'"4,,,)?&***&*??&JJ(
 )))).t'8'89))))))//1	  "rH   r   c                 :   ^ ^ S[         SS 4UU 4S jjmT" U5        g )Nrl   r?   c                   > TR                   (       a%  [        U [        5      (       a  TR                  5         [        U [        5      (       aq  U R
                  Tl        TR                  SU R                  < STR                  U R
                  5      < S35        U  H  nT" U5        M     TR                  S5        g [        U [        5      (       aP  TR                  STR                  U R
                  5      -  5        U  H  nT" U5        M     TR                  S5        g [        U [        5      (       a\  TR                  SU R                  TR                  U R
                  5      4-  5        U  H  nT" U5        M     TR                  S5        g [        U [        5      (       Ga  TR                   (       dP  STl         U R                  5       Tl        U R
                  Tl        U R"                  Tl        U R&                  Tl        g [+        U R                  5       R-                  5       5      S	:X  a0  TR                  5         TR                  U R                  5       5        g TR                   S
   U R
                  S
   :w  d4  TR$                  U R"                  :w  d  TR(                  U R&                  :w  aC  TR                  5         U R
                  Tl        U R"                  Tl        U R&                  Tl        T=R                  U R                  5       -  sl        TR                   S	   TR                   S
   U R
                  S   TR                   S   4Tl        g g )Nz<div class='ocr_page' id='z	' title='z'>
z</div>
z"<span class='ocr_line' title='%s'>rJ  z+<div class='ocr_block' id='%d' title='%s'>
Tr   r   r   r  )r  rV   r   r  r   r]   r  r   r   r  r%   r"   rk  r   r   r  r  rU  r  rH  r  rU   r  )rl   r   
child_liner   rE   s      rF   r   ,HOCRConverter.receive_layout.<locals>.render  sy     Zf%=%=!$''!%

{{DNN499$=? "E5M "

:&D*--

8T^^DII=VX #'J:& #'

;'D),,

Bzz4>>$))#<=> "E5M "

:&D&))(((,D%(,D%(,		D%(,D%(,		D%4==?0023q8)

4==?3 !--a0DIIaL@#00DMMA#00DII= OO-04		D-04D-04		D-))T]]_<) --a0 --a0 IIaL --a0	-)- *rH   r   r  s   ` @rF   rY   HOCRConverter.receive_layout  s%    6	 6	D 6	 6	p 	vrH   c                 $    U R                  5         g rA   rr  r   s    rF   rs  HOCRConverter.close  s    rH   )r  r|  r  r  r  r  r  )utf8r   NFrx  )r   r   r   r   __doc__r   r  r  r*   r-   rT   r   r   r   r   rB   r0   r  r   r%  r3  r   r  r   rY   rs  r   rw   rH   rF   r  r  ;  s    U  jj9:G '+"#  	
  8$  ;d ;s ;1# 1$ 14
s t 
"29V 9 9vrH   r  )Jr   loggingr   typingr   r   r   r   r   r   r	   r
   r   r   r   pdfminer.pdfcolorr   ru   r   imager   layoutr   r   r   r   r   r   r   r   r   r   r   r   r   r    r!   r"   r#   r$   r%   	pdfdevicer&   pdffontr'   r(   	pdfinterpr)   r*   pdfpager+   pdftypesr,   r-   r.   r/   r0   r1   r2   r3   r4   r5   r6   	getLoggerr   r   r8   r   r   r   r   r	  r{  r  rw   rH   rF   <module>r     s&   	  	    ,   ; ;        %      %   $  ) :   K K "   !y yx) & 
68U	3$gfo @9L' 9xoL' od	c<& cLmL' mrH   