
    )iS                        S SK r S SKrS SKJrJrJrJrJrJrJ	r	J
r
JrJrJrJr  SSKJr  SSKJr  SSKJr  SSKJr  SSKJr  SS	KJr  SS
KJr  SSKJr  SSKJr  SSKJr  SSKJr  SSKJr  SSKJ r   SSKJ!r!  SSKJ"r"  SSKJ#r#  SSKJ$r$  \RJ                  " \&5      r' " S S5      r( " S S5      r) " S S5      r* " S S5      r+ " S S\*5      r, " S S \,5      r- " S! S"\-5      r. " S# S$\-5      r/ " S% S&\,5      r0 " S' S(\*\+5      r1 " S) S*\,\+5      r2\" S+\*S,9r3 " S- S.\,\\3   5      r4 " S/ S0\4\3   5      r5 " S1 S2\5\3   \+5      r6\\2\14   r7 " S3 S4\6\7   5      r8 " S5 S6\85      r9 " S7 S8\85      r: " S9 S:\6\8   5      r; " S; S<\;5      r< " S= S>\;5      r=\\;S?4   r> " S@ S?\6\>   5      r? " SA SB\?5      r@ " SC SD\?5      rA " SE SF\4\,   5      rB " SG SH\B5      rC " SI SJ\B5      rDg)K    N)DictGenericIterableIteratorListOptionalSequenceSetTupleTypeVarUnioncast   )PDFColorSpace)PDFFont)Color)PDFGraphicState)	PDFStream)INF)LTComponentT)Matrix)Plane)Point)Rectapply_matrix_pt)bbox2str)fsplit)	get_bound)
matrix2str)uniqc                   4    \ rS rSrSS\SS4S jjrS	S jrSrg)
IndexAssigner'   indexreturnNc                     Xl         g Nr%   )selfr%   s     O/var/www/html/land-doc-ocr/venv/lib/python3.13/site-packages/pdfminer/layout.py__init__IndexAssigner.__init__(   s    
    c                     [        U[        5      (       a'  U R                  Ul        U =R                  S-  sl        g [        U[        5      (       a  U H  nU R	                  U5        M     g g Nr   )
isinstance	LTTextBoxr%   LTTextGrouprun)r*   objxs      r+   r4   IndexAssigner.run+   sN    c9%%

CIJJ!OJ[))  *r.   r)   r   )r5   LTItemr&   N)__name__
__module____qualname____firstlineno__intr,   r4   __static_attributes__ r.   r+   r#   r#   '   s    c $ r.   r#   c                   r    \ rS rSrSr       SS\S\S\S\S\\   S\S	\S
S4S jjrSS jr	S
\
4S jrSrg)LAParams4   a  Parameters for layout analysis

:param line_overlap: If two characters have more overlap than this they
    are considered to be on the same line. The overlap is specified
    relative to the minimum height of both characters.
:param char_margin: If two characters are closer together than this
    margin they are considered part of the same line. The margin is
    specified relative to the width of the character.
:param word_margin: If two characters on the same line are further apart
    than this margin then they are considered to be two separate words, and
    an intermediate space will be added for readability. The margin is
    specified relative to the width of the character.
:param line_margin: If two lines are are close together they are
    considered to be part of the same paragraph. The margin is
    specified relative to the height of a line.
:param boxes_flow: Specifies how much a horizontal and vertical position
    of a text matters when determining the order of text boxes. The value
    should be within the range of -1.0 (only horizontal position
    matters) to +1.0 (only vertical position matters). You can also pass
    `None` to disable advanced layout analysis, and instead return text
    based on the position of the bottom left corner of the text box.
:param detect_vertical: If vertical text should be considered during
    layout analysis
:param all_texts: If layout analysis should be performed on text in
    figures.
line_overlapchar_marginline_marginword_margin
boxes_flowdetect_vertical	all_textsr&   Nc                 x    Xl         X l        X0l        X@l        XPl        X`l        Xpl        U R                  5         g r(   )rD   rE   rF   rG   rH   rI   rJ   	_validate)r*   rD   rE   rF   rG   rH   rI   rJ   s           r+   r,   LAParams.__init__P   s6     )&&&$."r.   c                     U R                   bp  Sn[        U R                   [        5      (       d*  [        U R                   [        5      (       d  [	        U5      eSU R                   s=::  a  S::  d  O  [        U5      eg g )Nz@LAParam boxes_flow should be None, or a number between -1 and +1r   )rH   r1   r>   float	TypeError
ValueError)r*   boxes_flow_err_msgs     r+   rL   LAParams._validated   sk    ??&U  4??C00JtPU4V4V 233-A- !344 . 'r.   c                 d    SU R                   U R                  U R                  U R                  4-  $ )NzM<LAParams: char_margin=%.1f, line_margin=%.1f, word_margin=%.1f all_texts=%r>)rE   rF   rG   rJ   r*   s    r+   __repr__LAParams.__repr__p   s4    -!1!143C3CT^^TU	
r.   )rJ   rH   rE   rI   rF   rD   rG   )      ?g       @rY   g?rY   FFr&   N)r:   r;   r<   r=   __doc__rP   r   boolr,   rL   strrW   r?   r@   r.   r+   rB   rB   4   s    : "   &) %  	
  UO   
(
5
# 
r.   rB   c                   *    \ rS rSrSrS\SS4S jrSrg)r9   x   z)Interface for things that can be analyzedlaparamsr&   Nc                     g)zPerform the layout analysis.Nr@   r*   r`   s     r+   analyzeLTItem.analyze{   s    r.   r@   )r:   r;   r<   r=   r[   rB   rc   r?   r@   r.   r+   r9   r9   x   s    3 T r.   r9   c                   4    \ rS rSrSrS\4S jrS\4S jrSrg)LTText   z#Interface for things that have textr&   c                 \    SU R                   R                  < SU R                  5       < S3$ N< >)	__class__r:   get_textrV   s    r+   rW   LTText.__repr__   s     NN33T]]_EEr.   c                     [         e)zText contained in this objectNotImplementedErrorrV   s    r+   rn   LTText.get_text   s    !!r.   r@   N)	r:   r;   r<   r=   r[   r]   rW   rn   r?   r@   r.   r+   rf   rf      s    -F# F"# "r.   rf   c                      \ rS rSrSrS\SS4S jrS\4S jrS\	S\
4S	 jrS\	S\
4S
 jrS\	S\
4S jrS\	S\
4S jrS\SS4S jrS\
4S jrSS S\
4S jrSS S\4S jrSS S\4S jrSS S\
4S jrSS S\4S jrSS S\4S jrSrg)LTComponent   zObject with a bounding boxbboxr&   Nc                 P    [         R                  U 5        U R                  U5        g r(   )r9   r,   set_bboxr*   rw   s     r+   r,   LTComponent.__init__   s    dr.   c                 f    SU R                   R                  < S[        U R                  5      < S3$ ri   )rm   r:   r   rw   rV   s    r+   rW   LTComponent.__repr__   s      NN33Xdii5HIIr.   _c                     [         er(   rR   r*   r~   s     r+   __lt__LTComponent.__lt__       r.   c                     [         er(   r   r   s     r+   __le__LTComponent.__le__   r   r.   c                     [         er(   r   r   s     r+   __gt__LTComponent.__gt__   r   r.   c                     [         er(   r   r   s     r+   __ge__LTComponent.__ge__   r   r.   c                 n    Uu  p#pEX l         X0l        X@l        XPl        XB-
  U l        XS-
  U l        Xl        g r(   )x0y0x1y1widthheightrw   )r*   rw   r   r   r   r   s         r+   ry   LTComponent.set_bbox   s7    W
g	r.   c                 L    U R                   S:*  =(       d    U R                  S:*  $ Nr   )r   r   rV   s    r+   is_emptyLTComponent.is_empty   s    zzQ2$++"22r.   r5   c                     [        U[        5      (       d   [        [        U5      5      5       eUR                  U R
                  :*  =(       a    U R                  UR
                  :*  $ r(   )r1   ru   r]   typer   r   r*   r5   s     r+   is_hoverlapLTComponent.is_hoverlap   G    #{++;Sc^;+vv 6TWW%66r.   c                 $   [        U[        5      (       d   [        [        U5      5      5       eU R	                  U5      (       a  g[        [        U R                  UR                  -
  5      [        U R                  UR                  -
  5      5      $ r   	r1   ru   r]   r   r   minabsr   r   r   s     r+   	hdistanceLTComponent.hdistance   g    #{++;Sc^;+C  s477SVV+,c$''CFF2B.CDDr.   c                 $   [        U[        5      (       d   [        [        U5      5      5       eU R	                  U5      (       aL  [        [        U R                  UR                  -
  5      [        U R                  UR                  -
  5      5      $ gr   r   r   s     r+   hoverlapLTComponent.hoverlap   g    #{++;Sc^;+C  s477SVV+,c$''CFF2B.CDDr.   c                     [        U[        5      (       d   [        [        U5      5      5       eUR                  U R
                  :*  =(       a    U R                  UR
                  :*  $ r(   )r1   ru   r]   r   r   r   r   s     r+   is_voverlapLTComponent.is_voverlap   r   r.   c                 $   [        U[        5      (       d   [        [        U5      5      5       eU R	                  U5      (       a  g[        [        U R                  UR                  -
  5      [        U R                  UR                  -
  5      5      $ r   	r1   ru   r]   r   r   r   r   r   r   r   s     r+   	vdistanceLTComponent.vdistance   r   r.   c                 $   [        U[        5      (       d   [        [        U5      5      5       eU R	                  U5      (       aL  [        [        U R                  UR                  -
  5      [        U R                  UR                  -
  5      5      $ gr   r   r   s     r+   voverlapLTComponent.voverlap   r   r.   )rw   r   r   r   r   r   r   )r:   r;   r<   r=   r[   r   r,   r]   rW   objectr\   r   r   r   r   ry   r   r   rP   r   r   r   r   r   r?   r@   r.   r+   ru   ru      s	   $T d J# J 4  4  4  4 T d 3$ 37} 7 7E] Eu EM e 7} 7 7E] Eu EM e r.   ru   c                   p    \ rS rSrSr     SS\S\\   S\S\S\S	\	\
   S
\	\
   SS4S jjrS\4S jrSrg)LTCurve   zA generic Bezier curveN	linewidthptsstrokefillevenoddstroking_colornon_stroking_colorr&   c                     [         R                  U [        U5      5        X l        Xl        X0l        X@l        XPl        X`l        Xpl	        g r(   )
ru   r,   r   r   r   r   r   r   r   r   )r*   r   r   r   r   r   r   r   s           r+   r,   LTCurve.__init__   s=     	T9S>2"	,"4r.   c                 F    SR                  S U R                   5       5      $ )N,c              3   ,   #    U  H
  nS U-  v   M     g7f)z	%.3f,%.3fNr@   ).0ps     r+   	<genexpr>"LTCurve.get_pts.<locals>.<genexpr>   s     :Aas   )joinr   rV   s    r+   get_ptsLTCurve.get_pts   s    xx::::r.   )r   r   r   r   r   r   r   FFFNN)r:   r;   r<   r=   r[   rP   r   r   r\   r   r   r,   r]   r   r?   r@   r.   r+   r   r      s      *..255 %[5 	5
 5 5 !5 %UO5 
5&; ;r.   r   c                   `    \ rS rSrSr     SS\S\S\S\S\S	\S
\\	   S\\	   SS4S jjr
Srg)LTLine   zGA single straight line.

Could be used for separating text or figures.
Nr   p0p1r   r   r   r   r   r&   c	           
      >    [         R                  U UX#/UUUUU5        g r(   r   r,   )	r*   r   r   r   r   r   r   r   r   s	            r+   r,   LTLine.__init__   s,     	H		
r.   r@   r   )r:   r;   r<   r=   r[   rP   r   r\   r   r   r,   r?   r@   r.   r+   r   r      s     *..2

 
 	

 
 
 
 !
 %UO
 

 
r.   r   c                   \    \ rS rSrSr     SS\S\S\S\S\S	\\	   S
\\	   SS4S jjr
Srg)LTRecti  zEA rectangle.

Could be used for framing another pictures or figures.
Nr   rw   r   r   r   r   r   r&   c           
      V    Uu  pp[         R                  U UX4X4X4X4/UUUUU5        g r(   r   )r*   r   rw   r   r   r   r   r   r   r   r   r   s               r+   r,   LTRect.__init__  sE      Xx"B84		
r.   r@   r   )r:   r;   r<   r=   r[   rP   r   r\   r   r   r,   r?   r@   r.   r+   r   r     sx     *..2

 
 	

 
 
 !
 %UO
 

 
r.   r   c                   @    \ rS rSrSrS\S\S\SS4S jrS\4S	 jr	S
r
g)LTImagei(  zCAn image object.

Embedded images can be in JPEG, Bitmap or JBIG2.
namestreamrw   r&   Nc                 ~   [         R                  X5        Xl        X l        UR	                  S5      UR	                  S5      4U l        UR	                  S5      U l        UR	                  SS5      U l        UR	                  S5      U l        [        U R                  [        5      (       d  U R                  /U l        g g )N)WWidth)HHeight)IM	ImageMask)BPCBitsPerComponentr   )CS
ColorSpace)ru   r,   r   r   get_anysrcsize	imagemaskbits
colorspacer1   list)r*   r   r   rw   s       r+   r,   LTImage.__init__.  s    T(	~68WX(;<NN#>B	 ..)=>$//400#/DO 1r.   c           	          SU R                   R                  < SU R                  < S[        U R                  5      < SU R
                  < S3	$ Nrj   () rk   rl   )rm   r:   r   r   rw   r   rV   s    r+   rW   LTImage.__repr__9  s3    NN##IITYYLL	
 	
r.   )r   r   r   r   r   r   )r:   r;   r<   r=   r[   r]   r   r   r,   rW   r?   r@   r.   r+   r   r   (  s4    
	0S 	0) 	04 	0D 	0
# 
r.   r   c                   8    \ rS rSrSrS\SS4S jrS\4S jrSrg)	LTAnnoiB  a  Actual letter in the text as a Unicode string.

Note that, while a LTChar object has actual boundaries, LTAnno objects does
not, as these are "virtual" characters, inserted by a layout analyzer
according to the relationship between two characters (e.g. a space).
textr&   Nc                     Xl         g r(   _text)r*   r   s     r+   r,   LTAnno.__init__J  s
    
r.   c                     U R                   $ r(   r   rV   s    r+   rn   LTAnno.get_textN      zzr.   r   )	r:   r;   r<   r=   r[   r]   r,   rn   r?   r@   r.   r+   r   r   B  s&    S T # r.   r   c                       \ rS rSrSrS\S\S\S\S\S\S	\S
\	\\
\\   \4   4   S\S\SS4S jrS\4S jrS\4S jrS\S\4S jrSrg)LTChariR  z.Actual letter in the text as a Unicode string.matrixfontfontsizescalingriser   	textwidthtextdispncsgraphicstater&   Nc                 X   [         R                  U 5        X`l        Xl        UR                  U l        Xl        Xl        Xs-  U-  U l        UR                  5       (       aU  [        U[        5      (       d   eUu  pUc  US-  nOX-  S-  nSU-
  U-  S-  nU* X-   U R                  -   4nU* U-   X-   4nO,UR                  5       U-  nSX-   4nU R                  X-   U-   4nU R                  u  nnnnnnSUU-  U-  :  =(       a    UU-  S:*  U l        [        U R                  U5      u  nn[        U R                  U5      u  nnUU:  a  UUnnUU:  a  UUnn[        R                  U UUUU45        UR                  5       (       a  U R                  U l        g U R"                  U l        g )NrY   gMbP?i  r   )rf   r,   r   r   fontnamer  r  advis_verticalr1   tupleget_descentuprightr   ru   r   sizer   )r*   r   r  r  r  r  r   r  r  r  r  vxvybbox_lower_leftbbox_upper_rightdescentabcdefr   r   r   r   s                             r+   r,   LTChar.__init__U  s    	
(''1h....HRz^]U*)x'%/B "sBI$89O!#h	: &&(83G '.1O $'.8*CD![[Aq!Q1q57?*9q1uz"4;;@R"4;;0@AR7BR7BRTBB#34

DI 	 DIr.   c                     SU R                   R                  < S[        U R                  5      < S[	        U R
                  5      < SU R                  < SU R                  < SU R                  5       < S3$ )Nrj   rk    matrix=z font=z adv=z text=rl   )	rm   r:   r   rw   r    r   r
  r  rn   rV   s    r+   rW   LTChar.__repr__  sH    NN##TYYt{{#MMHHMMO
 	
r.   c                     U R                   $ r(   r   rV   s    r+   rn   LTChar.get_text  r   r.   r5   c                     g)z<Returns True if two characters can coexist in the same line.Tr@   r   s     r+   is_compatibleLTChar.is_compatible  s    r.   )r   r  r
  r  r   r  r  r  )r:   r;   r<   r=   r[   r   r   rP   r]   r   r   r   r   r   r,   rW   rn   r   r\   r#  r?   r@   r.   r+   r   r   R  s    822 2 	2
 2 2 2 2 uXe_e%;<<=2 2 &2 
2h
# 
#  D r.   r   LTItemT)boundc                       \ rS rSrSrS\SS4S jrS\\   4S jr	S\
4S jrS	\SS4S
 jrS\\   SS4S jrS\SS4S jrSrg)LTContaineri  z(Object that can be extended and analyzedrw   r&   Nc                 <    [         R                  X5        / U l        g r(   )ru   r,   _objsrz   s     r+   r,   LTContainer.__init__  s    T($&
r.   c                 ,    [        U R                  5      $ r(   )iterr*  rV   s    r+   __iter__LTContainer.__iter__  s    DJJr.   c                 ,    [        U R                  5      $ r(   )lenr*  rV   s    r+   __len__LTContainer.__len__  s    4::r.   r5   c                 :    U R                   R                  U5        g r(   )r*  appendr   s     r+   addLTContainer.add  s    

#r.   objsc                 8    U H  nU R                  U5        M     g r(   )r6  )r*   r8  r5   s      r+   extendLTContainer.extend  s    CHHSM r.   r`   c                 L    U R                    H  nUR                  U5        M     g r(   )r*  rc   r*   r`   r5   s      r+   rc   LTContainer.analyze  s    ::CKK! r.   )r*  )r:   r;   r<   r=   r[   r   r,   r   r%  r.  r>   r2  r6  r   r:  rB   rc   r?   r@   r.   r+   r(  r(    ss    2T d 
 (7+   w 4 8G,  
 T r.   r(  c                   0    \ rS rSrSS jrS\SS4S jrSrg)	LTExpandableContaineri  r&   Nc                 `    [         R                  U [        7[        7[        * [        * 45        g r(   )r(  r,   r   rV   s    r+   r,   LTExpandableContainer.__init__  s&    TSD3$sd#;<r.   r5   c           	      f   [         R                  U [        [        U5      5        U R	                  [        U R                  UR                  5      [        U R                  UR                  5      [        U R                  UR                  5      [        U R                  UR                  5      45        g r(   )r(  r6  r   r%  ry   r   r   r   maxr   r   r   s     r+   r6  LTExpandableContainer.add  su    d7C01DGGSVV$DGGSVV$DGGSVV$DGGSVV$		
 	r.   r@   rZ   )r:   r;   r<   r=   r,   ru   r6  r?   r@   r.   r+   r@  r@    s    
{ 
t 
r.   r@  c                   ,    \ rS rSrSS jrS\4S jrSrg)LTTextContaineri  r&   Nc                 X    [         R                  U 5        [        R                  U 5        g r(   )rf   r,   r@  rV   s    r+   r,   LTTextContainer.__init__  s    &&t,r.   c                 2    SR                  S U  5       5      $ )N c              3      #    U  H:  n[        U[        5      (       d  M  [        [        U5      R                  5       v   M<     g 7fr(   )r1   rf   r   rn   )r   r5   s     r+   r   +LTTextContainer.get_text.<locals>.<genexpr>  s2      
48SJsF<S(D&&((Ds
   A%A)r   rV   s    r+   rn   LTTextContainer.get_text  s     ww 
48
 
 	
r.   r@   rZ   )r:   r;   r<   r=   r,   r]   rn   r?   r@   r.   r+   rG  rG    s    

# 
r.   rG  c                      ^  \ rS rSrSrS\SS4U 4S jjrS\4S jrS\	SS4S	 jr
S
\\   S\S\S    4S jrS\4U 4S jjrSrU =r$ )
LTTextLinei  zContains a list of LTChar objects that represent a single text line.

The characters are aligned either horizontally or vertically, depending on
the text's writing mode.
rG   r&   Nc                 .   > [         TU ]  5         Xl        g r(   )superr,   rG   )r*   rG   rm   s     r+   r,   LTTextLine.__init__  s    &r.   c                     SU R                   R                  < S[        U R                  5      < SU R	                  5       < S3$ ri   )rm   r:   r   rw   rn   rV   s    r+   rW   LTTextLine.__repr__  s.    NN##TYYMMO
 	
r.   r`   c                     U R                    H  nUR                  U5        M     [        R                  U [	        S5      5        g )N
)r*  rc   r(  r6  r   r=  s      r+   rc   LTTextLine.analyze  s1    ::CKK! fTl+r.   planeratioc                     [         er(   rq   )r*   rY  rZ  s      r+   find_neighborsLTTextLine.find_neighbors  s
     "!r.   c                 j   > [         TU ]  5       =(       d    U R                  5       R                  5       $ r(   )rR  r   rn   isspace)r*   rm   s    r+   r   LTTextLine.is_empty  s%    w!>T]]_%<%<%>>r.   )rG   )r:   r;   r<   r=   r[   rP   r,   r]   rW   rB   rc   r   r   r   r\  r\   r   r?   __classcell__rm   s   @r+   rP  rP    sr    E d 

# 
 T "<("16"	l	"
?$ ? ?r.   rP  c                      ^  \ rS rSrS\SS4S jrS\SS4U 4S jjrS\\	   S	\S\
\   4S
 jrSS\S\S\4S jjrSS\S\S\4S jjr SS\S\S\4S jjrSS\S\S\4S jjrSrU =r$ )LTTextLineHorizontali   rG   r&   Nc                 F    [         R                  X5        [        7U l        g r(   )rP  r,   r   _x1r*   rG   s     r+   r,   LTTextLineHorizontal.__init__      D.$r.   r5   c                 d  > [        U[        5      (       az  U R                  (       ai  U R                  [        UR                  UR
                  5      -  nU R                  UR                  U-
  :  a  [        R                  U [        S5      5        UR                  U l        [        TU ]%  U5        g Nrk   )r1   r   rG   rD  r   r   rf  r   r(  r6  r   r   rR  r*   r5   marginrm   s      r+   r6  LTTextLineHorizontal.add  sw    c6""t'7'7%%CIIszz(BBFxx#&&6/)fSk266Cr.   rY  rZ  c                    X R                   -  nUR                  U R                  U R                  U-
  U R                  U R
                  U-   45      nU Vs/ s Hp  n[        U[        5      (       d  M  U R                  XSS9(       d  M0  U R                  XSS9(       d*  U R                  XSS9(       d  U R                  XSS9(       d  Mn  UPMr     sn$ s  snf )a  
Finds neighboring LTTextLineHorizontals in the plane.

Returns a list of other LTTestLineHorizontals in the plane which are
close to self. "Close" can be controlled by ratio. The returned objects
will be the same height as self, and also either left-, right-, or
centrally-aligned.
	tolerance)r   findr   r   r   r   r1   rd  _is_same_height_as_is_left_aligned_with_is_right_aligned_with_is_centrally_aligned_withr*   rY  rZ  r  r8  r5   s         r+   r\  #LTTextLineHorizontal.find_neighbors  s     KKzz477DGGaK$''A+FG 
3 45  ++C+=	  ..s.@2232D66s6H 
 	
 
   C3C	:CCotherrq  c                 L    [        UR                  U R                  -
  5      U:*  $ )z>
Whether the left-hand edge of `other` is within `tolerance`.
)r   r   r*   rz  rq  s      r+   rt  *LTTextLineHorizontal._is_left_aligned_with,  !     588dgg%&)33r.   c                 L    [        UR                  U R                  -
  5      U:*  $ )z?
Whether the right-hand edge of `other` is within `tolerance`.
)r   r   r|  s      r+   ru  +LTTextLineHorizontal._is_right_aligned_with2  r~  r.   c                     [        UR                  UR                  -   S-  U R                  U R                  -   S-  -
  5      U:*  $ )zA
Whether the horizontal center of `other` is within `tolerance`.
   )r   r   r   r|  s      r+   rv  /LTTextLineHorizontal._is_centrally_aligned_with8  =     EHHuxx'1,$''0AQ/FFG9TTr.   c                 L    [        UR                  U R                  -
  5      U:*  $ r(   )r   r   r|  s      r+   rs  'LTTextLineHorizontal._is_same_height_as@  s    5<<$++-.);;r.   )rf  r8   )r:   r;   r<   r=   rP   r,   ru   r6  r   r   r   rP  r\  r\   rt  ru  rv  rs  r?   ra  rb  s   @r+   rd  rd     s    E d { t 
<(
16
	j	
64; 45 4QU 44K 4E 4RV 4 67U U-2U	U< < <d < <r.   rd  c                      ^  \ rS rSrS\SS4S jrS\SS4U 4S jjrS\\	   S	\S\
\   4S
 jrSS\S\S\4S jjrSS\S\S\4S jjr SS\S\S\4S jjrS\S\S\4S jrSrU =r$ )LTTextLineVerticaliD  rG   r&   Nc                 F    [         R                  X5        [        * U l        g r(   )rP  r,   r   _y0rg  s     r+   r,   LTTextLineVertical.__init__E  ri  r.   r5   c                 d  > [        U[        5      (       az  U R                  (       ai  U R                  [        UR                  UR
                  5      -  nUR                  U-   U R                  :  a  [        R                  U [        S5      5        UR                  U l        [        TU ]%  U5        g rk  )r1   r   rG   rD  r   r   r   r  r(  r6  r   r   rR  rl  s      r+   r6  LTTextLineVertical.addL  sw    c6""t'7'7%%CIIszz(BBFvv)fSk266Cr.   rY  rZ  c                    X R                   -  nUR                  U R                  U-
  U R                  U R                  U-   U R
                  45      nU Vs/ s Hp  n[        U[        5      (       d  M  U R                  XSS9(       d  M0  U R                  XSS9(       d*  U R                  XSS9(       d  U R                  XSS9(       d  Mn  UPMr     sn$ s  snf )a  
Finds neighboring LTTextLineVerticals in the plane.

Returns a list of other LTTextLineVerticals in the plane which are
close to self. "Close" can be controlled by ratio. The returned objects
will be the same width as self, and also either upper-, lower-, or
centrally-aligned.
rp  )r   rr  r   r   r   r   r1   r  _is_same_width_as_is_lower_aligned_with_is_upper_aligned_withrv  rw  s         r+   r\  !LTTextLineVertical.find_neighborsU  s     JJzz477Q;1dggFG 
3 23  **3*<	  ///A2232D66s6H 
 	
 
ry  rz  rq  c                 L    [        UR                  U R                  -
  5      U:*  $ )z:
Whether the lower edge of `other` is within `tolerance`.
)r   r   r|  s      r+   r  )LTTextLineVertical._is_lower_aligned_withp  r~  r.   c                 L    [        UR                  U R                  -
  5      U:*  $ )z:
Whether the upper edge of `other` is within `tolerance`.
)r   r   r|  s      r+   r  )LTTextLineVertical._is_upper_aligned_withv  r~  r.   c                     [        UR                  UR                  -   S-  U R                  U R                  -   S-  -
  5      U:*  $ )z?
Whether the vertical center of `other` is within `tolerance`.
r  )r   r   r   r|  s      r+   rv  -LTTextLineVertical._is_centrally_aligned_with|  r  r.   c                 L    [        UR                  U R                  -
  5      U:*  $ r(   )r   r   r|  s      r+   r  $LTTextLineVertical._is_same_width_as  s    5;;+,	99r.   )r  r8   )r:   r;   r<   r=   rP   r,   ru   r6  r   r   r   rP  r\  r\   r  r  rv  r  r?   ra  rb  s   @r+   r  r  D  s    E d { t 
<(
16
	j	
64K 4E 4RV 44K 4E 4RV 4 67U U-2U	U:{ :u : : :r.   r  c                   >    \ rS rSrSrS	S jrS\4S jrS\4S jrSr	g)
r2   i  zRepresents a group of text chunks in a rectangular area.

Note that this box is created by geometric analysis and does not
necessarily represents a logical boundary of the text. It contains a list
of LTTextLine objects.
r&   Nc                 <    [         R                  U 5        SU l        g )NrO   )rG  r,   r%   rV   s    r+   r,   LTTextBox.__init__  s      &
r.   c           	          SU R                   R                  < SU R                  < S[        U R                  5      < SU R                  5       < S3	$ r   )rm   r:   r%   r   rw   rn   rV   s    r+   rW   LTTextBox.__repr__  s5    NN##JJTYYMMO	
 	
r.   c                     [         er(   rq   rV   s    r+   get_writing_modeLTTextBox.get_writing_mode  s    !!r.   r)   rZ   )
r:   r;   r<   r=   r[   r,   r]   rW   r  r?   r@   r.   r+   r2   r2     s$    

# 
"# "r.   r2   c                   B   ^  \ rS rSrS\SS4U 4S jjrS\4S jrSrU =r	$ )LTTextBoxHorizontali  r`   r&   Nc                 X   > [         TU ]  U5        U R                  R                  S S9  g )Nc                     U R                   * $ r(   )r   r5   s    r+   <lambda>-LTTextBoxHorizontal.analyze.<locals>.<lambda>  
    r.   keyrR  rc   r*  sortr*   r`   rm   s     r+   rc   LTTextBoxHorizontal.analyze  %    !

/0r.   c                     g)Nzlr-tbr@   rV   s    r+   r  $LTTextBoxHorizontal.get_writing_mode      r.   r@   
r:   r;   r<   r=   rB   rc   r]   r  r?   ra  rb  s   @r+   r  r    '     T 
#  r.   r  c                   B   ^  \ rS rSrS\SS4U 4S jjrS\4S jrSrU =r	$ )LTTextBoxVerticali  r`   r&   Nc                 X   > [         TU ]  U5        U R                  R                  S S9  g )Nc                     U R                   * $ r(   )r   r  s    r+   r  +LTTextBoxVertical.analyze.<locals>.<lambda>  r  r.   r  r  r  s     r+   rc   LTTextBoxVertical.analyze  r  r.   c                     g)Nztb-rlr@   rV   s    r+   r  "LTTextBoxVertical.get_writing_mode  r  r.   r@   r  rb  s   @r+   r  r    r  r.   r  r3   c                   :   ^  \ rS rSrS\\   SS4U 4S jjrSrU =r$ )r3   i  r8  r&   Nc                 D   > [         TU ]  5         U R                  U5        g r(   )rR  r,   r:  )r*   r8  rm   s     r+   r,   LTTextGroup.__init__  s    Dr.   r@   )	r:   r;   r<   r=   r   TextGroupElementr,   r?   ra  rb  s   @r+   r3   r3     s!    X&67 D  r.   c                   4   ^  \ rS rSrS\SS4U 4S jjrSrU =r$ )LTTextGroupLRTBi  r`   r&   Nc                    >^ [         TU ]  U5        UR                  c   eUR                  mU R                  R	                  U4S jS9  g )Nc                 h   > ST-
  U R                   -  ST-   U R                  U R                  -   -  -
  $ r0   )r   r   r   r5   rH   s    r+   r  )LTTextGroupLRTB.analyze.<locals>.<lambda>  s/    Q^svv5:~#&&366/23r.   r  rR  rc   rH   r*  r  r*   r`   rH   rm   s     @r+   rc   LTTextGroupLRTB.analyze  sK    !""...((


3 	 	
 	r.   r@   r:   r;   r<   r=   rB   rc   r?   ra  rb  s   @r+   r  r        	 	T 	 	r.   r  c                   4   ^  \ rS rSrS\SS4U 4S jjrSrU =r$ )LTTextGroupTBRLi  r`   r&   Nc                    >^ [         TU ]  U5        UR                  c   eUR                  mU R                  R	                  U4S jS9  g )Nc                 j   > ST-   * U R                   U R                  -   -  ST-
  U R                  -  -
  $ r0   )r   r   r   r  s    r+   r  )LTTextGroupTBRL.analyze.<locals>.<lambda>  s2    a*n-#&&A:~'(r.   r  r  r  s     @r+   rc   LTTextGroupTBRL.analyze  sK    !""...((


( 	 	
 	r.   r@   r  rb  s   @r+   r  r    r  r.   r  c                       \ rS rSrS\SS4S jrS\S\\   S\	\
   4S jrS\S	\\
   S\	\   4S
 jrS\S\\   S\\   4S jrS\SS4S jrSrg)LTLayoutContaineri  rw   r&   Nc                 <    [         R                  X5        S U l        g r(   )r(  r,   groupsrz   s     r+   r,   LTLayoutContainer.__init__  s    T(37r.   r`   r8  c              #     #    S nS nU GH  nUGb  UR                  U5      =(       a    UR                  U5      =(       a    [        UR                  UR                  5      UR                  -  UR                  U5      :  =(       a?    UR                  U5      [        UR                  UR                  5      UR                  -  :  nUR                  =(       a    UR                  U5      =(       a    UR                  U5      =(       a    [        UR                  UR                  5      UR                  -  UR                  U5      :  =(       a?    UR                  U5      [        UR                  UR                  5      UR                  -  :  nU(       a  [        U[        5      (       d  U(       a'  [        U[         5      (       a  UR#                  U5        OUb  Uv   S nOU(       a?  U(       d8  [!        UR$                  5      nUR#                  U5        UR#                  U5        OrU(       a?  U(       d8  [        UR$                  5      nUR#                  U5        UR#                  U5        O,[        UR$                  5      nUR#                  U5        Uv   S nUnGM     Uc+  [        UR$                  5      nUc   eUR#                  U5        Uv   g 7fr(   )r#  r   r   r   rD   r   r   rD  r   rE   rI   r   r   r   r1   rd  r  r6  rG   )r*   r`   r8  obj0lineobj1halignvaligns           r+   group_objectsLTLayoutContainer.group_objects  sQ     D &&t, I((.IDKK58M8MMmmD)*I t,$**djj1H4H4HHI 0 ,, K**40K((.K DJJ

3h6K6KKmmD)*K
 t,$++t{{3h6J6JJK  z$0DEEz$0BCC HHTN%JDf1(2F2FG3H4H4HI3H4H4HI"
#DG H <'(<(<=D###HHTN
s   KKlinesc              #   p  #    [        U R                  5      nUR                  U5        0 nU H  nUR                  X1R                  5      nU/nU H;  nUR                  U5        X;   d  M  UR                  UR                  U5      5        M=     [        U[        5      (       a  [        5       n	O
[        5       n	[        U5       H  n
U	R                  U
5        XU
'   M     M     [        5       nU HA  nXT;  a  M
  XE   n	X;   a  M  UR                  U	5        U	R                  5       (       a  M=  U	v   MC     g7f)z$Group neighboring lines to textboxesN)r   rw   r:  r\  rF   r5  popr1   rd  r  r  r!   r6  setr   )r*   r`   r  rY  boxesr  	neighborsmembersr  boxr5   dones               r+   group_textlines!LTLayoutContainer.group_textlines0  s     $)#3U-/D++E3G3GHIfG!t$=NN599T?3 " $ 455!4!6')G} c
 %  uD +C{HHSM<<>>	  	s   A%D6+B>D6-	D6r  c           
        ^ [         [        [        4   n[        U R                  5      mS[
        S[
        S[        4S jnSUSUS[        U   4U4S jjn/ n[        [        U5      5       HX  nX'   n[        US-   [        U5      5       H5  n	X)   n
UR                  SU" X5      [        U5      [        U
5      X45        M7     MZ     [        R                  " U5        TR                  U5        [        5       n[        U5      S:  GaA  [        R                   " U5      u  ppnnX;  Ga  X;  Ga  U(       d,  U" UU5      (       a  [        R"                  " US	XUUU45        Mj  [%        U[&        [(        45      (       d  [%        U[&        [(        45      (       a  [)        UU/5      nO[+        UU/5      nTR-                  U5        TR-                  U5        UR/                  X/5        T H9  n[        R"                  " USU" UU5      [        U5      [        U5      UU45        M;     TR1                  U5        [        U5      S:  a  GMA  [3        S
 T 5       5      $ )a  Group textboxes hierarchically.

Get pair-wise distances, via dist func defined below, and then merge
from the closest textbox pair. Once obj1 and obj2 are merged /
grouped, the resulting group is considered as a new object, and its
distances to other objects & groups are added to the process queue.

For performance reason, pair-wise distances and object pair info are
maintained in a heap of (idx, dist, id(obj1), id(obj2), obj1, obj2)
tuples. It ensures quick access to the smallest element. Note that
since comparison operators, e.g., __lt__, are disabled for
LTComponent, id(obj) has to appear before obj in element tuples.

:param laparams: LAParams object.
:param boxes: All textbox objects to be grouped.
:return: a list that has only one element, the final top level group.
r  obj2r&   c                 |   [        U R                  UR                  5      n[        U R                  UR                  5      n[        U R                  UR                  5      n[        U R
                  UR
                  5      nXB-
  XS-
  -  U R                  U R                  -  -
  UR                  UR                  -  -
  $ )aX  A distance function between two TextBoxes.

Consider the bounding rectangle for obj1 and obj2.
Return its area less the areas of obj1 and obj2,
shown as 'www' below. This value may be negative.
        +------+..........+ (x1, y1)
        | obj1 |wwwwwwwwww:
        +------+www+------+
        :wwwwwwwwww| obj2 |
(x0, y0) +..........+------+
)r   r   r   rD  r   r   r   r   )r  r  r   r   r   r   s         r+   dist/LTLayoutContainer.group_textboxes.<locals>.disti  s     TWWdgg&BTWWdgg&BTWWdgg&BTWWdgg&BRW%**t{{*+**t{{*+r.   c                 `  > [        U R                  UR                  5      n[        U R                  UR                  5      n[        U R                  UR                  5      n[        U R
                  UR
                  5      n[        TR                  X#XE45      5      nUR                  X45      $ )z8Check if there's any other object between obj1 and obj2.)	r   r   r   rD  r   r   r  rr  
difference)r  r  r   r   r   r   r8  rY  s          r+   isany0LTLayoutContainer.group_textboxes.<locals>.isany  s|    TWWdgg&BTWWdgg&BTWWdgg&BTWWdgg&Buzz22"234D??D<00r.   r   Fr   Tc              3   B   #    U  H  n[        [        U5      v   M     g 7fr(   )r   r3   )r   gs     r+   r   4LTLayoutContainer.group_textboxes.<locals>.<genexpr>  s     8%QDa((%s   )r   r2   r3   r   rw   ru   rP   r
   ranger1  r5  idheapqheapifyr:  r  heappopheappushr1   r  r  r  removeupdater6  r   )r*   r`   r  ElementTr  r  distsibox1jbox2r  
skip_isanyr  id1id2r  r  grouprz  rY  s                       @r+   group_textboxes!LTLayoutContainer.group_textboxesQ  s   * K/0!&tyy!1	{ 	+ 	% 	,	1 	1 	1S] 	1 IKs5z"A8D1q5#e*-xeT$%5r$xD4VW . #
 	eUu%j1n49MM%4H1ZCdDco!eD$&7&7NN54dD*IJd%6$HIIZ,o>N N *9$)FE+T4L9ET"T"SJ'"ENNUE 2BuIr%y%QVW #
 		% - %j1n0 8%888r.   c                    [        S U 5      u  p#U H  nUR                  U5        M     U(       d  g [        U R                  X5      5      n[        S U5      u  peU H  nUR                  U5        M     [        U R	                  X5      5      nUR
                  cM  U H  nUR                  U5        M     S[        S[        [        [        [        4   4S jn	UR                  U	S9  OeU R                  X5      U l        [        5       n
U R                   H%  nUR                  U5        U
R                  U5        M'     UR                  S S9  [        [         ["           U5      U-   [        [         ["           U5      -   U l        g )Nc                 "    [        U [        5      $ r(   )r1   r   r  s    r+   r  +LTLayoutContainer.analyze.<locals>.<lambda>  s    :c63Jr.   c                 "    U R                  5       $ r(   )r   r  s    r+   r  r    s
    #,,.r.   r  r&   c                     [        U [        5      (       a  SU R                  * U R                  * 4$ SU R                  * U R                  4$ )Nr   r   )r1   r  r   r   r   r  s    r+   getkey)LTLayoutContainer.analyze.<locals>.getkey  s@    c#455w00w//r.   r  c                     U R                   $ r(   r)   r  s    r+   r  r    s    399r.   )r   rc   r   r  r  rH   r2   r   r>   rP   r  r
  r  r#   r4   r   r   ru   r*  )r*   r`   textobjs	otherobjsr5   	textlinesempties	textboxestextboxr  assignerr	  s               r+   rc   LTLayoutContainer.analyze  sX    !''JD QCKK! ++H?@	%&@)LCKK! --hBC	&$) %0I 0%UE0A*B 0 NNvN&..xCDK$Hh'U# % NN4N5k"I.4$g./ 	

 	r.   )r*  r  )r:   r;   r<   r=   r   r,   rB   r   ru   r   rP  r  r2   r  r	   r   r3   r
  rc   r?   r@   r.   r+   r  r    s    T d N N(0(=N	*	N` )1*)=	)	BY9 Y9)1))<Y9	k	Y9v$ $T $r.   r  c                   R    \ rS rSrSrS\S\S\SS4S jrS\4S	 jr	S
\
SS4S jrSrg)LTFigurei  zRepresents an area used by PDF Form objects.

PDF Forms can be used to present figures or pictures by embedding yet
another PDF document within a page. Note that LTFigure objects can appear
recursively.
r   rw   r   r&   Nc                    ^ Xl         TU l        Uu  pEpgXE4XF-   U4XEU-   4XF-   XW-   44n[        U4S jU 5       5      n[        R	                  X5        g )Nc              3   B   >#    U  H  u  p[        TX45      v   M     g 7fr(   r   )r   r   qr   s      r+   r   $LTFigure.__init__.<locals>.<genexpr>  s     NvVa!88vs   )r   r   r   r  r,   )	r*   r   rw   r   r6   ywhboundss	      `     r+   r,   LTFigure.__init__  s]    	q&15!*qa%j15!%.ANvNN""4.r.   c           
          SU R                   R                  < SU R                  < S[        U R                  5      < S[        U R                  5      < S3	$ )Nrj   r   r   r  rl   )rm   r:   r   r   rw   r    r   rV   s    r+   rW   LTFigure.__repr__  s8    NN##IITYYt{{#	
 	
r.   r`   c                 R    UR                   (       d  g [        R                  X5        g r(   )rJ   r  rc   rb   s     r+   rc   LTFigure.analyze  s    !!!!$1r.   )r   r   )r:   r;   r<   r=   r[   r]   r   r   r,   rW   rB   rc   r?   r@   r.   r+   r  r    sG    S  f  
# 
 T r.   r  c            	       D    \ rS rSrSrSS\S\S\SS4S jjrS\	4S	 jr
S
rg)LTPagei  zRepresents an entire page.

Like any other LTLayoutContainer, an LTPage can be iterated to obtain child
objects like LTTextBox, LTFigure, LTImage, LTRect, LTCurve and LTLine.
pageidrw   rotater&   Nc                 F    [         R                  X5        Xl        X0l        g r(   )r  r,   r.  r/  )r*   r.  rw   r/  s       r+   r,   LTPage.__init__  s    ""4.r.   c           	          SU R                   R                  < SU R                  < S[        U R                  5      < SU R
                  < S3	$ )Nrj   r   r   z rotate=rl   )rm   r:   r.  r   rw   r/  rV   s    r+   rW   LTPage.__repr__   s3    NN##KKTYYKK	
 	
r.   )r.  r/  r8   )r:   r;   r<   r=   r[   r>   r   rP   r,   r]   rW   r?   r@   r.   r+   r-  r-    s4    s $  d 
# 
r.   r-  )Er  loggingtypingr   r   r   r   r   r   r	   r
   r   r   r   r   pdfcolorr   pdffontr   	pdfinterpr   r   pdftypesr   utilsr   r   r   r   r   r   r   r   r   r   r    r!   	getLoggerr:   loggerr#   rB   r9   rf   ru   r   r   r   r   r   r   r%  r(  r@  rG  TextLineElementrP  rd  r  r2   r  r  r  r3   r  r  r  r  r-  r@   r.   r+   <module>r>     s        $   &        "     			8	$
 
A
 A
H " "F& FR;k ;4
W 
:
W 
:
k 
4VV  F[& FR )6
*+ww/ :K0 (	
+G4f 	
 '?1 ?DA<: A<HA: A:H"
+ "2) 	  M12 /"23 
k 

k 
wK0 wt  @
 
r.   