
    %oi                     8   S SK r S SKrS SKrS SKJr  S SKJrJrJrJ	r	J
r
Jr  S SKJr  S SKJr  S SKJrJrJrJrJrJrJrJrJrJrJrJrJrJrJrJ r J!r!J"r"J#r#  S SK$J%r%  S SK&J'r'  S S	K(J)r)  S S
K*J+r+J,r,  S SK-J.r.J/r/  S SK0J1r1  S SK2J3r3  S SK4J5r5J6r6J7r7J8r8J9r9J:r:J;r;J<r<J=r=J>r>J?r?  \R                  " \A5      rB " S S\'5      rC " S S\C5      rD\
" S\	\\55      rE " S S\C\\E   5      rF " S S\F\5   5      rG " S S\F\5   5      rH " S S\F\5   5      rI " S S\F\5   5      rJg)    N)Sequence)BinaryIOClassVarGenericTextIOTypeVarcast)utils)ImageWriter)LAParamsLTAnnoLTCharLTComponentLTContainerLTCurveLTFigureLTImageLTItemLTLayoutContainerLTLineLTPageLTRectLTText	LTTextBoxLTTextBoxVerticalLTTextGroup
LTTextLineTextGroupElement)PDFColorSpace)PDFTextDevice)PDFValueError)PDFFontPDFUnicodeNotDefined)PDFGraphicStatePDFResourceManager)PDFPage)	PDFStream)AnyIOMatrixPathSegmentPointRectapply_matrix_ptapply_matrix_rectbbox2strencmake_compat_strmult_matrixc                   :   \ rS rSr% \\S'   \\S'     S'S\S\S\	S-  SS4S	 jjr
S
\S\SS4S jrS
\SS4S jrS\S\S\SS4S jrS\SS4S jrS\S\SS4S jrS\S\S\S\S\\   SS4S jrS\S\S\S\S\S\S \S!\S\4S" jrS\S\S\4S# jrS$\SS4S% jrS&r g)(PDFLayoutAnalyzer=   cur_itemctmNrsrcmgrpagenolaparamsreturnc                 V    [         R                  " X5        X l        X0l        / U l        g N)r    __init__r9   r:   _stackselfr8   r9   r:   s       N/var/www/html/land-ocr/venv/lib/python3.13/site-packages/pdfminer/converter.pyr>   PDFLayoutAnalyzer.__init__A   s#     	t- /1    pagec                     [        X!R                  5      u  p4pVSS[        X5-
  5      [        XF-
  5      4n[        U R                  U5      U l        g )Nr   )r.   mediaboxabsr   r9   r6   )rA   rE   r7   x0y0x1y1rG   s           rB   
begin_pagePDFLayoutAnalyzer.begin_pageL   sA    ,S--@q#bg,BG5t{{H5rD   c                    U R                   (       a#   [        [        U R                   5      5      5       e[        U R                  [
        5      (       d#   [        [        U R                  5      5      5       eU R                  b%  U R                  R                  U R                  5        U =R                  S-  sl	        U R                  U R                  5        g )N   )r?   strlen
isinstancer6   r   typer:   analyzer9   receive_layout)rA   rE   s     rB   end_pagePDFLayoutAnalyzer.end_pageQ   s    ;;5C$4 55$--00J#d4==6I2JJ0==$MM!!$--0qDMM*rD   namebboxmatrixc                     U R                   R                  U R                  5        [        X[	        X0R
                  5      5      U l        g r=   )r?   appendr6   r   r2   r7   )rA   rY   rZ   r[   s       rB   begin_figurePDFLayoutAnalyzer.begin_figureY   s/    4==) [-JKrD   _c                    U R                   n[        U R                   [        5      (       d#   [        [	        U R                   5      5      5       eU R
                  R                  5       U l         U R                   R                  U5        g r=   )r6   rS   r   rQ   rT   r?   popadd)rA   r`   figs      rB   
end_figurePDFLayoutAnalyzer.end_figure]   sY    mm$--22LCT]]8K4LL2)#rD   streamc                    [        U R                  [        5      (       d#   [        [	        U R                  5      5      5       e[        UUU R                  R                  U R                  R                  U R                  R                  U R                  R                  45      nU R                  R                  U5        g r=   )rS   r6   r   rQ   rT   r   rI   rJ   rK   rL   rc   )rA   rY   rg   items       rB   render_imagePDFLayoutAnalyzer.render_imagec   s    $--22LCT]]8K4LL2]]t}}//1A1A4==CSCST

 	$rD   gstatestrokefillevenoddpathc                    SR                  S U 5       5      nUSS S:w  a  gUR                  S5      S:  aT  [        R                  " SU5       H8  nXWR	                  S5      UR                  S5       nU R                  XX4U5        M:     gU V	s/ s H'  n	[        [        U	S   S:w  a  U	S	S OUS   S	S 5      PM)     n
n	U
 Vs/ s H  n[        U R                  U5      PM     nnU Vs/ s H  n[        US   5      PM     nnU VVVs/ s HX  n[        USSS
2   US
SS
2   SS9 VVs/ s H0  u  nn[        U R                  [        U5      [        U5      45      PM2     snnPMZ     nnnn[        UUSS9 VV	s/ s H  u  nn	[        [        U/U	Q75      PM     nnn	[        U5      S:  a-  US	S S:X  a$  US	   US   :X  a  USS	 S-   nUR!                  5         US;   a\  [#        UR$                  US   US   UUUUR&                  UR(                  UUR*                  S9
nU R,                  R/                  U5        gUS;   Ga?  Uu  u  nnu  nnu  nnu  nnnUS   US   :H  nUU:H  =(       a    UU:H  =(       a    UU:H  =(       a    UU:H  =(       d)    UU:H  =(       a    UU:H  =(       a    UU:H  =(       a    UU:H  nU(       ai  U(       ab  [1        UR$                  / US   QUS
   Q7UUUUR&                  UR(                  UUR*                  5	      n U R,                  R/                  U 5        g[3        UR$                  UUUUUR&                  UR(                  UUR*                  5	      n!U R,                  R/                  U!5        g[3        UR$                  UUUUUR&                  UR(                  UUR*                  5	      n!U R,                  R/                  U!5        gs  sn	f s  snf s  snf s  snnf s  snnnf s  sn	nf )z@Paint paths described in section 4.4 of the PDF reference manual c              3   *   #    U  H	  oS    v   M     g7f)r   N ).0xs     rB   	<genexpr>/PDFLayoutAnalyzer.paint_path.<locals>.<genexpr>u   s     +d!ds   NrP   mzm[^m]+r   h   F)strict   lh>   mlmlh)original_pathdashing_style>   mlllhmllll   )joincountrefinditerstartend
paint_pathr	   r+   r-   r7   rQ   zipfloatr*   rR   rb   r   	linewidthscolorncolordashr6   rc   r   r   )"rA   rl   rm   rn   ro   rp   shapery   subpathpraw_ptsptpts	operation	operatorsoperand1operand2transformed_pointsotransformed_pathlinerI   rJ   rK   rL   x2y2x3y3r`   is_closed_loophas_square_coordinatesrectcurves"                                     rB   r   PDFLayoutAnalyzer.paint_pathl   s    +d++!9 [[![[E2wwqzAEE!H5wG 3 OSNRUadckAbcFtAwrs|Dd   <CC7R?488R07CC<@ADyYq\*DIA "&" "&I	 /2!!$Q$14a4//*( $DHHuXh.PQ/ "&  "  	+=eL LDAq [1'q'*L    5zA~%*"4RCF9Jcr
S(	%
 $$FFMMMM"2"(++ !!$',,<?9R(2rHRhr2!$Q3q6!1"HCrCbBhC28*GBhE28EbER2X ' "&<!((*#a&*3q6*(
D MM%%d+#(((
E MM%%e,$$MMMM$KK
 !!%(s DA" s0   .O O*O$
 O/*7O)!O/;!O6)O/fontfontsizescalingrisecidncsgraphicstatec	                     UR                  U5      n	[        U	[        5      (       d   [        [        U	5      5      5       e UR                  U5      n
UR                  U5      n[        UUUUUU	U
UUU5
      nU R                  R                  U5        UR                  $ ! [         a    U R                  X&5      n	 Nzf = fr=   )	to_unichrrS   rQ   rT   r#   handle_undefined_char
char_width	char_dispr   r6   rc   adv)rA   r[   r   r   r   r   r   r   r   text	textwidthtextdispri   s                rB   render_charPDFLayoutAnalyzer.render_char   s    	9>>#&DdC((9#d4j/9( OOC(	>>#&
 	$xx# $ 	9--d8D	9s   ?B B=<B=c                 H    [         R                  SU< SU< 35        SU S3$ )Nzundefined: , z(cid:))logdebug)rA   r   r   s      rB   r   'PDFLayoutAnalyzer.handle_undefined_char
  s)    		Kxr#12se1~rD   ltpagec                     g r=   rt   rA   r   s     rB   rV    PDFLayoutAnalyzer.receive_layout  s    rD   )r?   r6   r:   r9   rP   N)!__name__
__module____qualname____firstlineno__r   __annotations__r)   r%   intr   r>   r&   rM   rW   rQ   r,   r^   re   r'   rj   r$   boolr   r*   r   r"   r   r   r   r   r   rV   __static_attributes__rt   rD   rB   r4   r4   =   s   	K
 $(		2#	2 	2 T/		2
 
	26w 6V 6 6
+W + +L LD L& LT LC D    i  D  {){) {) 	{)
 {) {#{) 
{)z  	
     & 
B'   V  rD   r4   c            	       \    \ rS rSr  SS\S\S\S-  SS4S jjrS\SS4S	 jr	S\4S
 jr
Srg)PDFPageAggregatori  Nr8   r9   r:   r;   c                 :    [         R                  XX#S9  S U l        g N)r9   r:   )r4   r>   resultr@   s       rB   r>   PDFPageAggregator.__init__  s     	""4"S%)rD   r   c                     Xl         g r=   r   r   s     rB   rV    PDFPageAggregator.receive_layout  s    rD   c                 8    U R                   c   eU R                   $ r=   r   rA   s    rB   
get_resultPDFPageAggregator.get_result  s    {{&&&{{rD   r   r   )r   r   r   r   r%   r   r   r>   r   rV   r   r   rt   rD   rB   r   r     sX     $(	*#* * T/	*
 
*V  F rD   r   IOTypec                   b    \ rS rSr   SS\S\S\S\S\S-  SS4S	 jjr	\
S\S\4S
 j5       rSrg)PDFConverteri(  Nr8   outfpcodecr9   r:   r;   c                     [         R                  XXES9  X l        X0l        U R	                  U R                  5      U l        g r   )r4   r>   r   r   _is_binary_streamoutfp_binary)rA   r8   r   r   r9   r:   s         rB   r>   PDFConverter.__init__)  s8     	""4"S"

 224::>rD   c                     S[        U SS5      ;   a  g[        U S5      (       a  g[        U [        R                  5      (       a  g[        U [        R
                  [        R                  45      (       a  gg)z"Test if an stream is binary or notbmoderr   TF)getattrhasattrrS   ioBytesIOStringIO
TextIOBase)r   s    rB   r   PDFConverter._is_binary_stream6  s\     '%,,UF##rzz**R]];<<rD   )r   r   r   )utf-8rP   N)r   r   r   r   r%   r   rQ   r   r   r>   staticmethodr(   r   r   r   rt   rD   rB   r   r   (  sr    
 $(?#? ? 	?
 ? T/? 
?  4  rD   r   c                      ^  \ rS rSr     SS\S\S\S\S\S-  S\	S	\
S-  S
S4U 4S jjjrS\S
S4S jrS\S
S4S jrS\S\S
S4S jrS\S\	S\	S\	S\\   S
S4S jrSrU =r$ )TextConverteriF  Nr8   r   r   r9   r:   
showpagenoimagewriterr;   c                 <   > [         TU ]  XX4US9  X`l        Xpl        g )Nr   r9   r:   )superr>   r   r   )	rA   r8   r   r   r9   r:   r   r   	__class__s	           rB   r>   TextConverter.__init__G  s$     	uhW$&rD   r   c                 *   [         R                  " XR                  S5      nU R                  (       a8  [	        [
        U R                  5      R                  UR                  5       5        g [	        [        U R                  5      R                  U5        g )Nignore)
r
   compatible_encode_methodr   r   r	   r   r   writeencoder   rA   r   s     rB   
write_textTextConverter.write_textU  sY    --dJJI4::&,,T[[];$**40rD   r   c                    ^ ^ S[         SS 4UU 4S jjmT R                  (       a  T R                  SUR                   S35        T" U5        T R                  S5        g )Nri   r;   c                   > [        U [        5      (       a  U  H  nT" U5        M     O4[        U [        5      (       a  TR                  U R	                  5       5        [        U [
        5      (       a  TR                  S5        g [        U [        5      (       a*  TR                  b  TR                  R                  U 5        g g g )N
)	rS   r   r   r  get_textr   r   r   export_image)ri   childrenderrA   s     rB   r	  ,TextConverter.receive_layout.<locals>.render]  s    $,,!E5M "D&))0$	**%D'**t/?/?/K  --d3 0L*rD   zPage r  )r   r   r  pageidrA   r   r	  s   ` @rB   rV   TextConverter.receive_layout\  sP    		4 		4D 		4 		4 ??OOeFMM?"56vrD   rY   rg   c                 L    U R                   b  [        R                  XU5        g g r=   )r   r   rj   )rA   rY   rg   s      rB   rj   TextConverter.render_imagep  s#    '%%d&9 (rD   rl   rm   rn   ro   rp   c                     g r=   rt   )rA   rl   rm   rn   ro   rp   s         rB   r   TextConverter.paint_patht  s     	rD   )r   r   )r   rP   NFN)r   r   r   r   r%   r(   rQ   r   r   r   r   r>   r  r   rV   r'   rj   r$   r   r*   r   r   __classcell__)r   s   @rB   r   r   F  s    
 $( *.'#' ' 	'
 ' T/' ' !4'' 
' '1s 1t 1V  (: :i :D :  	
  {# 
 rD   r   c                   6   \ rS rSr% SSSSSSS.r\\\\4      \S	'   S
SS.r	\\\\4      \S'               S:S\
S\S\S\S\S-  S\S\S\S\S\S\S-  S\S\\\4   S-  S\\\4   S-  SS4S jjrS\SS4S jrS;S  jrS;S! jrS\SS4S" jrS#\S$\S%\S&\S'\S(\SS4S) jrS#\S$\S*\SS4S+ jrS*\S$\S%\S&\S'\S(\SS4S, jrS#\S\S%\S&\S-\SS4S. jr S<S#\S$\S%\S&\S'\S(\S/\SS4S0 jjrS#\SS4S1 jrS\S2\S3\SS4S4 jrS;S5 jrS6\ SS4S7 jr!S;S8 jr"S9r#g)=HTMLConverteri  yellowmagentacyanredblackgray)figuretextlinetextbox	textgroupr   rE   RECT_COLORSblue)r  charTEXT_COLORSNr8   r   r   r9   r:   scale	fontscale
layoutmoder   
pagemarginr   r   rect_colorstext_colorsr;   c           	      f   [         R                  U UUUUUS9  U R                  (       a  U R                  (       d  [	        S5      eU R                  (       d  U R                  (       a  [	        S5      eUc  SS0nUc  SSS.nX`l        Xpl        Xl        Xl        Xl	        Xl
        Xl        Xl        U(       aJ  U R                  R                  U R                  5        U R                  R                  U R                  5        U R                  U l        S U l        / U l        U R'                  5         g )Nr   )Codec is required for a binary I/O outputz1Codec must not be specified for a text I/O outputr"  r  r  )r   rE   )r   r>   r   r   r!   r$  r%  r&  r   r'  r   r(  r)  updater   r#  _yoffset_font
_fontstackwrite_header)rA   r8   r   r   r9   r:   r$  r%  r&  r   r'  r   r   r(  r)  s                  rB   r>   HTMLConverter.__init__  s   " 	 	 	
 TZZ KLL  TZZ STT!7+K$+V<K
"$$$&&&##D$4$45##D$4$45#/3
:<rD   r   c                     U R                   (       aC  [        [        U R                  5      R	                  UR                  U R                   5      5        g [        [        U R                  5      R	                  U5        g r=   r   r	   r   r   r   r   r   r   s     rB   r   HTMLConverter.write  H    ::4::&,,T[[-DE$**40rD   c                     U R                  S5        U R                  (       a  SU R                   S3nOSnU R                  U5        U R                  S5        g )Nz<html><head>
z<<meta http-equiv="Content-Type" content="text/html; charset=">
z5<meta http-equiv="Content-Type" content="text/html">
z</head><body>
)r   r   )rA   ss     rB   r0  HTMLConverter.write_header  sP    

#$::::,d, 
 IA

1

$%rD   c                     [        SU R                  5       Vs/ s H  nSU SU S3PM     nnSSR                  U5       S3nU R                  U5        U R                  S5        g s  snf )	NrP   z
<a href="#">z</a>z/<div style="position:absolute; top:0px;">Page: r   </div>
z</body></html>
)ranger9   r   r   )rA   i
page_linksr8  s       rB   write_footerHTMLConverter.write_footer  sr    9>q$++9NO9NA
1#Rs$/9N
OYYz*+85 	
 	

1

%& Ps   A&c                 8    U R                  [        U5      5        g r=   )r   r0   r   s     rB   r  HTMLConverter.write_text  s    

3t9rD   colorborderwidthrv   ywrz   c                 
   U R                   R                  U5      nUbe  SU SU SX0R                  -   SU R                  U-
  U R                  -   SXPR                  -   SX`R                  -   S3nU R	                  U5        g g )Nz(<span style="position:absolute; border:  zpx solid; left:px; top:
px; width:px; height:zpx;"></span>
)r(  getr$  r-  r   )	rA   rD  rE  rv   rF  rG  rz   color2r8  s	            rB   
place_rectHTMLConverter.place_rect  s     !!%%e,!(!K= 1JJ' ()TZZ78 9ZZ( )jj.)9  JJqM rD   ri   c                 ~    U R                  XUR                  UR                  UR                  UR                  5        g r=   )rO  rI   rL   widthheight)rA   rD  rE  ri   s       rB   place_borderHTMLConverter.place_border  s&    DGGTWWdjj$++VrD   c                 0   U R                   b  U R                   R                  U5      nS[        U5       SU SX0R                  -   SU R                  U-
  U R                  -   SXPR                  -   SX`R                  -   S3nU R                  U5        g g )Nz
<img src="z
" border="z!" style="position:absolute; left:rJ  zpx;" width="
" height="" />
)r   r  r0   r$  r-  r   )	rA   ri   rE  rv   rF  rG  rz   rY   r8  s	            rB   place_imageHTMLConverter.place_image  s     '##006DSYKz+ ?JJ' ()TZZ78 9jj.) *zz>*&2  JJqM (rD   sizec           	      D   U R                   R                  U5      nUb  SU SX0R                  -   SU R                  U-
  U R                  -   SXPR                  -  U R                  -   S3	nU R                  U5        U R                  U5        U R                  S5        g g )Nz&<span style="position:absolute; color:; left:rJ  zpx; font-size:px;"></span>
)r)  rM  r$  r-  r%  r   r  )rA   rD  r   rv   rF  r[  rN  r8  s           rB   
place_textHTMLConverter.place_text  s     !!%%e, !JJ' ()TZZ78 9!JJ.?@	G  JJqMOOD!JJ{# rD   writing_modec                 *   U R                   R                  U R                  5        S U l        SU SU SU SX0R                  -   SU R                  U-
  U R                  -   SXPR                  -   SX`R                  -   S3nU R                  U5        g )	Nz'<div style="position:absolute; border: rI  zpx solid; writing-mode:r]  rJ  rK  rL  r^  )r/  r]   r.  r$  r-  r   )	rA   rD  rE  rv   rF  rG  rz   rb  r8  s	            rB   	begin_divHTMLConverter.begin_div#  s     	tzz*
gQ{m ,(> *

N# $MMA%34 5^$ %**n%U, 	
 	

1rD   c                     U R                   b  U R                  S5        U R                  R                  5       U l         U R                  S5        g )N</span>z</div>)r.  r   r/  rb   )rA   rD  s     rB   end_divHTMLConverter.end_div:  s8    ::!JJy!__((*


8rD   fontnamer   c                    X#4nX@R                   :w  ai  U R                   b  U R                  S5        UR                  S5      S   nU R                  SU SX0R                  -  U R                  -   S35        X@l         U R                  U5        g )Nrg  +z<span style="font-family: z; font-size:zpx">)r.  r   splitr$  r%  r  )rA   r   rj  r   r   fontname_without_subset_tags         rB   put_textHTMLConverter.put_text@  s    #::zz%

9%*2..*=b*A'JJ  ;< =%

2T^^CDDJ
 JrD   c                 &    U R                  S5        g )Nz<br>r   r   s    rB   put_newlineHTMLConverter.put_newlineO  s    

6rD   r   c                    ^ ^^ S[         [        -  SS 4U U4S jjmS[        SS 4UU U4S jjmT" U5        T =R                  T R                  -  sl        g )Nri   r;   c                 z   > [        U [        5      (       a%  TR                  SSU 5        U  H  nT" U5        M     g g )Nr  rP   )rS   r   rT  ri   r  rA   
show_groups     rB   ry  0HTMLConverter.receive_layout.<locals>.show_groupS  s9    $,,!!+q$7!Eu% " -rD   c           
        > [        U [        5      (       a  T=R                  U R                  -  sl        TR	                  SSU 5        TR
                  (       ad  TR                  STR                  U R                  -
  TR                  -   35        TR                  SU R                   SU R                   S35        U  H  nT" U5        M     U R                  b  U R                   H  nT" U5        M     g g [        U [        5      (       a  TR	                  SSU 5        g [        U [        5      (       aa  TR                  SSU R                  U R                  U R                  U R                  5        U  H  nT" U5        M     TR!                  S5        g [        U ["        5      (       a?  TR%                  U SU R                  U R                  U R                  U R                  5        g TR&                  S	:X  Ga  [        U [(        5      (       a%  TR	                  S
SU 5        U  H  nT" U5        M     g [        U [*        5      (       ad  TR	                  SSU 5        TR-                  S[/        U R0                  S-   5      U R                  U R                  S5        U  H  nT" U5        M     g [        U [2        5      (       aU  TR	                  SSU 5        TR-                  SU R5                  5       U R                  U R                  U R6                  5        g g [        U [(        5      (       a3  U  H  nT" U5        M     TR&                  S:w  a  TR9                  5         g g [        U [*        5      (       ap  TR                  SSU R                  U R                  U R                  U R                  U R;                  5       5        U  H  nT" U5        M     TR!                  S5        g [        U [2        5      (       a@  [=        U R>                  5      nTRA                  U R5                  5       X0R6                  5        g [        U [B        5      (       a   TRE                  U R5                  5       5        g g )NrE   rP   z*<div style="position:absolute; top:%dpx;">z	<a name="z">Page z</a></div>
r   r  exactr  r     r"  loose)#rS   r   r-  rL   rT  r   r   r$  r  groupsr   r   rd  rI   rR  rS  rh  r   rY  r&  r   r   r`  rQ   indexr   r  r[  rt  get_writing_moder1   rj  rp  r   r  )ri   r  grouprj  r	  rA   ry  s       rB   r	  ,HTMLConverter.receive_layout.<locals>.renderY  s[   $''(!!&!T2??JJD MMDGG3tzzABD JJ#DKK=}LQ "E5M ";;*!%"5) "- + D'**!!'1d3D(++xDGGTWWdjj$++V!E5M "X&D'**  q$''477DJJTG+dJ//%%j!T:!%u "&i00%%iD9OO!DJJN+ "&u "&f--%%fa6OO		 . D*--!E5M "??g-$$& .D),,GGGGJJKK))+ "E5M "Y'D&))*4==9dmmoxCD&))0 *rD   )r   r   r   r-  r'  rA   r   r	  ry  s   ` @@rB   rV   HTMLConverter.receive_layoutR  sY    	&[+;; 	& 	& 	&J	1 J	1D J	1 J	1X 	v(rD   c                 $    U R                  5         g r=   r@  r   s    rB   closeHTMLConverter.close      rD   )r.  r/  r-  r%  r   r&  r'  r(  r$  r   r)  )r   rP   NrP   g      ?normalT2   Nr   NNr;   N)False)$r   r   r   r   r   r   dictrQ   r   r#  r%   r(   r   r   r   r   r   r>   r   r0  r@  r  rO  r   rT  r   rY  r`  rd  rh  rp  rt  r   rV   r  r   rt   rD   rB   r  r    s	   -K$sCx.)  -K$sCx.)  $("*.-1-13#3 3 	3
 3 T/3 3 3 3 3 3 !4'3 3 #s(^d*3 #s(^d*3  
!3j1# 1$ 1
&'s t   	
    
*W# WC W{ Wt W  	
    
*$$ $ 	$
 $ $ 
$: $  	
     
.S T S C 5 T T)V T) T)lrD   r  c                       \ rS rSr\R
                  " S5      r     SS\S\S\	S\
S\S-  S	\S-  S
\SS4S jjrS\	SS4S jrSS jrSS jrS\	SS4S jrS\SS4S jrSS jrSrg)XMLConverteri  z[ ---]Nr8   r   r   r9   r:   r   stripcontrolr;   c           	          [         R                  U UUUUUS9  U R                  U R                  (       + :X  a  [	        S5      eX`l        Xpl        U R                  5         g )Nr   r+  )r   r>   r   r   r!   r   r  r0  )rA   r8   r   r   r9   r:   r   r  s           rB   r>   XMLConverter.__init__  sc     	 	 	
 TZZ0 KLL&(rD   r   c                     U R                   (       aC  [        [        U R                  5      R	                  UR                  U R                   5      5        g [        [        U R                  5      R	                  U5        g r=   r3  r   s     rB   r   XMLConverter.write  r5  rD   c                     U R                   (       a   U R                  SU R                    S35        OU R                  S5        U R                  S5        g )Nz<?xml version="1.0" encoding="z" ?>
z<?xml version="1.0" ?>
z<pages>
r   r   r   s    rB   r0  XMLConverter.write_header  s<    ::JJ7

|6JKJJ12

;rD   c                 &    U R                  S5        g )Nz	</pages>
rs  r   s    rB   r@  XMLConverter.write_footer  s    

< rD   c                     U R                   (       a  U R                  R                  SU5      nU R                  [	        U5      5        g Nrr   )r  CONTROLsubr   r0   r   s     rB   r  XMLConverter.write_text  s1    <<##B-D

3t9rD   r   c                 `   ^ ^^ S[         SS 4U U4S jjmS[         SS 4UU U4S jjmT" U5        g )Nri   r;   c                 \  > [        U [        5      (       a6  TR                  SU R                   S[	        U R
                  5       S35        g [        U [        5      (       aK  TR                  S[	        U R
                  5       S35        U  H  nT" U5        M     TR                  S5        g g )N<textbox id="" bbox="rX  z<textgroup bbox="r7  z</textgroup>
)rS   r   r   r  r/   rZ   r   rx  s     rB   ry  /XMLConverter.receive_layout.<locals>.show_group  s    $	**

#DJJ<x8K7LFS D+..

.x		/B.C4HI!Eu% "

+,	 /rD   c                 	  > [        U [        5      (       a  SU R                   S[        U R                  5       SU R
                   S3nTR                  U5        U  H  nT" U5        M     U R                  b=  TR                  S5        U R                   H  nT" U5        M     TR                  S5        TR                  S5        g [        U [        5      (       a8  SU R                   S[        U R                  5       S	3nTR                  U5        g [        U [        5      (       a8  S
U R                   S[        U R                  5       S	3nTR                  U5        g [        U [        5      (       aI  SU R                   S[        U R                  5       SU R                  5        S3nTR                  U5        g [        U [        5      (       aZ  SU R                   S[        U R                  5       S3nTR                  U5        U  H  nT" U5        M     TR                  S5        g [        U [        5      (       aK  TR                  S[        U R                  5       S35        U  H  nT" U5        M     TR                  S5        g [        U [         5      (       av  Sn[        U ["        5      (       a  SnSU R$                   S[        U R                  5       SU S3nTR                  U5        U  H  nT" U5        M     TR                  S5        g [        U [&        5      (       a  S[)        U R*                  5       S[        U R                  5       SU R,                  R                   SU R.                  R0                   SU R2                  S S3nTR                  U5        TR5                  U R7                  5       5        TR                  S5        g [        U [8        5      (       a$  TR                  SU R7                  5        S35        g [        U [:        5      (       a  TR<                  bT  TR<                  R?                  U 5      nTR                  S [)        U5       S!U R@                   S"U RB                   S	35        g TR                  S#U R@                   S"U RB                   S	35        g [E        [G        S$U 45      5      e)%Nz
<page id="r  z
" rotate="r7  z	<layout>
z
</layout>
z</page>
z<line linewidth="rX  z<rect linewidth="z<curve linewidth="z" pts="z"/>
z<figure name="z
</figure>
z<textline bbox="z</textline>
rr   z wmode="vertical"r  "z>
z</textbox>
z<text font="z" colourspace="z" ncolour="z" size="z.3fr;  z</text>
z<text>z<image src="z	" width="rW  z<image width="	Unhandled)$rS   r   r  r/   rZ   rotater   r  r   r   r   r   get_ptsr   rY   r   r   r   r  r   r0   rj  r   r   r   r[  r  r  r   r   r   r  rR  rS  AssertionErrorrQ   )	ri   r8  r  r  wmoderY   r	  rA   ry  s	         rB   r	  +XMLConverter.receive_layout.<locals>.render  s'   $''  .%dii01 2#{{m41 
 

1!E5M ";;*JJ|,!%"5) "-JJ}-

;'D&))""&..!1 2%dii019 
 

1D&))""&..!1 2%dii019 
 

1D'**""&..!1 2%dii01 2 LLN+52  

1D(++$TYYKx8K7LDQ

1!E5M "

=)D*--

-htyy.A-B$GH!E5M "

?+D),,d$566/E#DJJ<x8K7LAeWTWX

1!E5M "

>*D&)) /0 1%dii01 2$$(HHMM? 3  $ 1 1 8 89 :!YYsO2/  

10

;'D&))

VDMMO#4I>?D'**##/++88>DJJ  #D	{ +""&** .##';;-v7 JJ(Jt{{m6R %S+t)<%=>>rD   r   r  s   ` @@rB   rV   XMLConverter.receive_layout  sA    		-V 		- 		- 		-W	? W	?D W	? W	?r 	vrD   c                 $    U R                  5         g r=   r  r   s    rB   r  XMLConverter.closeF  r  rD   )r   r  )r   rP   NNFr  )r   r   r   r   r   compiler  r%   r(   rQ   r   r   r   r   r>   r   r0  r@  r  r   rV   r  r   rt   rD   rB   r  r    s    jj89G $(*."#  	
  T/ !4'  
61# 1$ 1 !s t 
eV e eNrD   r  c                       \ rS rSrSr\R                  " S5      r    SS\S\	S\
S\S	\S-  S
\4S jjrS\S\
4S jrS\
SS4S jrSS jrSS jrS\
SS4S jrSS jrS\SS4S jrSS jrSrg)HOCRConverteriJ  zKExtract an hOCR representation from explicit text information within a PDF.z[\x00-\x08\x0b-\x0c\x0e-\x1f]Nr8   r   r   r9   r:   r  c           	      n    [         R                  U UUUUUS9  X`l        SU l        U R	                  5         g )Nr   F)r   r>   r  within_charsr0  )rA   r8   r   r   r9   r:   r  s          rB   r>   HOCRConverter.__init__]  sE     	 	 	
 )!rD   rZ   r;   c                     Uu  p#pE[        U5      n[        U R                  S   U-
  5      n[        U5      n[        U R                  S   U-
  5      n	SU SU SU SU	 3$ )Nr~   zbbox rI  )r   	page_bbox)
rA   rZ   in_x0in_y0in_x1in_y1out_x0out_y0out_x1out_y1s
             rB   	bbox_reprHOCRConverter.bbox_reprr  sm    '+$uUT^^A&./UT^^A&./vhaxq&::rD   r   c                    U R                   (       aE  UR                  U R                   5      n[        [        U R                  5      R                  U5        g [        [        U R                  5      R                  U5        g r=   )r   r   r	   r   r   r   r   )rA   r   encoded_texts      rB   r   HOCRConverter.write{  sM    ::;;tzz2L4::&,,\:$**40rD   c                 v   U R                   (       a   U R                  SU R                    S35        OU R                  S5        U R                  S5        U R                  S5        U R                  S5        U R                  S5        U R                  S5        U R                  S	5        U R                  S
5        g )NzL<html xmlns='http://www.w3.org/1999/xhtml' xml:lang='en' lang='en' charset=''>
zD<html xmlns='http://www.w3.org/1999/xhtml' xml:lang='en' lang='en'>
z<head>
z<title></title>
zE<meta http-equiv='Content-Type' content='text/html;charset=utf-8' />
zA<meta name='ocr-system' content='pdfminer.six HOCR Converter' />
zR  <meta name='ocr-capabilities' content='ocr_page ocr_block ocr_line ocrx_word'/>
z</head>
z<body>
r  r   s    rB   r0  HOCRConverter.write_header  s    ::JJ448JJ<tE
 JJW 	

:

&'

T	
 	

P	
 	

C	
 	

;

:rD   c                 H    U R                  S5        U R                  S5        g )Nz0<!-- comment in the following line to debug -->
zD<!--script src='https://unpkg.com/hocrjs'></script--></body></html>
rs  r   s    rB   r@  HOCRConverter.write_footer  s    

FG

S	
rD   c                     U R                   (       a  U R                  R                  SU5      nU R                  U5        g r  )r  r  r  r   r   s     rB   r  HOCRConverter.write_text  s-    <<##B-D

4rD   c                    [        U R                  5      S:  a  SnSU R                  ;   a  SnSU R                  ;   a  US-  nU R                  SU R                   SU R                   S	U S
U R                  U R                  5       SU R                   SU R                   SU R                  R                  5        S35        SU l        g )Nr   rr   Italiczfont-style: italic; Boldzfont-weight: bold; z<span style='font:"z"; font-size:z; z' class='ocrx_word' title='z	; x_font z
; x_fsize '>rg  F)	rR   working_textworking_fontr   working_sizer  working_bboxstripr  )rA   bold_and_italic_styless     rB   
write_wordHOCRConverter.write_word  s    t  !A%%'"4,,,)?&***&*??&JJ&t'8'8&9 :!../r)* +..):):;< =++, -,,-R$$**,-W6	 "rD   r   c                 :   ^ ^ S[         SS 4UU 4S jjmT" U5        g )Nri   r;   c                   > TR                   (       a%  [        U [        5      (       a  TR                  5         [        U [        5      (       ao  U R
                  Tl        TR                  SU R                   STR                  U R
                  5       S35        U  H  nT" U5        M     TR                  S5        g [        U [        5      (       aQ  TR                  STR                  U R
                  5       S35        U  H  nT" U5        M     TR                  S5        g [        U [        5      (       a^  TR                  SU R                   STR                  U R
                  5       S35        U  H  nT" U5        M     TR                  S5        g [        U [        5      (       Ga  TR                   (       dP  S	Tl         U R                  5       Tl        U R
                  Tl        U R"                  Tl        U R&                  Tl        g [+        U R                  5       R-                  5       5      S
:X  a0  TR                  5         TR                  U R                  5       5        g TR                   S   U R
                  S   :w  d4  TR$                  U R"                  :w  d  TR(                  U R&                  :w  aC  TR                  5         U R
                  Tl        U R"                  Tl        U R&                  Tl        T=R                  U R                  5       -  sl        TR                   S
   TR                   S   U R
                  S   TR                   S   4Tl        g g )Nz<div class='ocr_page' id='z	' title='r  r<  z<span class='ocr_line' title='r  r_  z<div class='ocr_block' id='Tr   rP   r|   r~   )r  rS   r   r  r   rZ   r  r   r  r  r   r   r  r   r  r  r  rj  r  r[  r  rR   r  )ri   r  
child_liner	  rA   s      rB   r	  ,HOCRConverter.receive_layout.<locals>.render  s     Zf%=%=!$''!%

;;- ("nnTYY78> "E5M "

:&D*--

4T^^DII5N4OrR #'J:& #'

;'D),,

::, '"nnTYY78> "E5M "

:&D&))(((,D%(,D%(,		D%(,D%(,		D%..01Q6OO%JJt}}/ ))!,		!<,,=,,		9),0II),0MM),0II)%%8%))!,))!,		!))!,	)D%+ *rD   r  r  s   ` @rB   rV   HOCRConverter.receive_layout  s%    9	 9	D 9	 9	v 	vrD   c                 $    U R                  5         g r=   r  r   s    rB   r  HOCRConverter.close  r  rD   )r  r  r  r  r  r  r  )utf8rP   NFr  )r   r   r   r   __doc__r   r  r  r%   r(   rQ   r   r   r   r>   r,   r  r   r0  r@  r  r  r   rV   r  r   rt   rD   rB   r  r  J  s    U  jj9:G $("#  	
  T/ *;d ;s ;1# 1$ 12
s t 
"&<V < <|rD   r  )Kr   loggingr   collections.abcr   typingr   r   r   r   r   r	   pdfminerr
   pdfminer.imager   pdfminer.layoutr   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   pdfminer.pdfcolorr   pdfminer.pdfdevicer    pdfminer.pdfexceptionsr!   pdfminer.pdffontr"   r#   pdfminer.pdfinterpr$   r%   pdfminer.pdfpager&   pdfminer.pdftypesr'   pdfminer.utilsr(   r)   r*   r+   r,   r-   r.   r/   r0   r1   r2   	getLoggerr   r   r4   r   r   r   r   r  r  r  rt   rD   rB   <module>r     s   	  	 $   &     * , , 0 : B $ '    !R Rj) & 
68U	3$gfo <6L' 6rjL' jZ	[<& [|nL' nrD   