
    )iH9                         S SK r S SKrS SKrS SKrS SKrS SKJr  S SKJr  S SK	J
r
  S SKJr  S SKJr  S SKJr  S SKJr  S S	KJr  S S
KJr  S SKJr  S SKJr  S SKJr  S SKJr  S SKJr  S SKJr  S SKJr  S SK J!r!  S SK J"r"  S SK J#r#  S SK$J%r%  Sr&\" S5      SLr'\'(       a  S SK(J)r)  \" S5      SLr*\*(       a  S SK+r,Sr-\ R\                  " S5      r/Sr01 Skr1S\24S\24S \24S!\34S"\44S#\34S$.r5\#" S%5      r6\#" S&5      r7 " S' S(5      r8 " S) S*\95      r: " S+ S,\;5      r< " S- S.\95      r= " S/ S0\95      r> " S1 S2\95      r?S3 r@\SMS4 j5       rAS5 rBS6 rCS7 rDS8 rE\S9 5       rFSNS: jrG   SOS< jrH      SPS= jrIS> rJS? rKS@ rL\BSQSA j5       rM\BSB 5       rNSS;S \8R                  S 4SC jrP     SRSD jrQ    SSSE jrRSS;S \8R                  S 4SF jrSSMSG jrTSS;S \8R                  S S4SH jrUSIS;S \8R                  S 4SJ jrVSK rW\XSL:X  a  \Y" \W" 5       5        gg)T    N)contextmanager)
QUOTE_NONE)ENOENT)wraps)iglob)BytesIO)environ)extsep)linesep)remove)normcase)normpath)realpath)find_loader)NamedTemporaryFile)sleep)InvalidVersion)parse)Version)Image	tesseractnumpy)ndarraypandaszutf-8z	^[a-z_]+$RGB>
   BMPGIFPBMPGMPNGPPMJPEGTIFFWEBPJPEG2000page_numorientationrotateorientation_confscriptscript_conf)zPage numberzOrientation in degreesRotatezOrientation confidenceScriptzScript confidencez3.05z4.1.0c                   $    \ rS rSrSrSrSrSrSrg)OutputE   bytesz
data.framedictstring N)	__name__
__module____qualname____firstlineno__BYTES	DATAFRAMEDICTSTRING__static_attributes__r4       W/var/www/html/land-doc-ocr/venv/lib/python3.13/site-packages/pytesseract/pytesseract.pyr/   r/   E   s    EIDFr>   r/   c                   (   ^  \ rS rSrU 4S jrSrU =r$ )PandasNotSupportedL   c                 $   > [         TU ]  S5        g )NzMissing pandas packagesuper__init__self	__class__s    r?   rF   PandasNotSupported.__init__M   s    12r>   r4   r5   r6   r7   r8   rF   r=   __classcell__rI   s   @r?   rA   rA   L   s    3 3r>   rA   c                       \ rS rSrS rSrg)TesseractErrorQ   c                 ,    Xl         X l        X4U l        g N)statusmessageargs)rH   rS   rT   s      r?   rF   TesseractError.__init__R   s    %	r>   )rU   rT   rS   N)r5   r6   r7   r8   rF   r=   r4   r>   r?   rO   rO   Q   s    &r>   rO   c                   (   ^  \ rS rSrU 4S jrSrU =r$ )TesseractNotFoundErrorX   c                 2   > [         TU ]  [         S35        g )NzQ is not installed or it's not in your PATH. See README file for more information.)rE   rF   tesseract_cmdrG   s    r?   rF   TesseractNotFoundError.__init__Y   s    o 5 6	
r>   r4   rK   rM   s   @r?   rX   rX   X   s    
 
r>   rX   c                   (   ^  \ rS rSrU 4S jrSrU =r$ )TSVNotSupported`   c                 $   > [         TU ]  S5        g )Nz4TSV output not supported. Tesseract >= 3.05 requiredrD   rG   s    r?   rF   TSVNotSupported.__init__a   s    B	
r>   r4   rK   rM   s   @r?   r^   r^   `       
 
r>   r^   c                   (   ^  \ rS rSrU 4S jrSrU =r$ )ALTONotSupportedg   c                 $   > [         TU ]  S5        g )Nz6ALTO output not supported. Tesseract >= 4.1.0 requiredrD   rG   s    r?   rF   ALTONotSupported.__init__h   s    D	
r>   r4   rK   rM   s   @r?   rd   rd   g   rb   r>   rd   c                     U R                  5          U R                  S5        U R                  5         Xl        g ! [         a    [        S5         N.[         a     N9f = f! U R                  5         Xl        f = f)N   )	terminatewait	TypeErrorr   	Exceptionkill
returncode)processcodes     r?   rn   rn   n   sd    "Q 	!  a  	!s-   : AA  	AA  AA   A8c              #     #     U(       de  U R                  5       S   v    U R                  R                  5         U R                  R                  5         U R                  R                  5         g  U R                  US9u  p#Uv    U R                  R                  5         U R                  R                  5         U R                  R                  5         g ! [
        R                   a    [        U S5        [        S5      ef = f! U R                  R                  5         U R                  R                  5         U R                  R                  5         f = f7f)Nri   )timeoutzTesseract process timeout)	communicatestdinclosestdoutstderr
subprocessTimeoutExpiredrn   RuntimeError)procseconds_error_strings       r?   timeout_managerr   {   s    ""$Q'' 	

	<"..w.?OA
 	

 (( 	<rN:;;	< 	

s6   ED AE1C AE,DD AEEc                 B   ^ ^ [        T 5      U U4S j5       mTTl        T$ )Nc                  T   > TR                   TL a  T" U 0 UD6Tl         TR                   $ rR   )_result)rU   kwargsfuncwrappers     r?   r   run_once.<locals>.wrapper   s*    ??g%"D3F3GOr>   )r   r   )r   r   s   `@r?   run_oncer      s'    
4[ 
 GONr>   c                     SR                  S U R                  [        5      R                  5        5       5      R	                  5       $ )N c              3   $   #    U  H  ov   M     g 7frR   r4   .0lines     r?   	<genexpr>get_errors.<locals>.<genexpr>   s      KKs   )joindecodeDEFAULT_ENCODING
splitlinesstrip)r   s    r?   
get_errorsr      s<    88 %,,-=>IIK egr>   c                     [        U (       a  U  S3OU 5       H  n [        U5        M     g! [         a   nUR                  [        :w  a  e  SnAM9  SnAff = f)z5Tries to remove temp files by filename wildcard path.*N)r   r   OSErrorerrnor   )	temp_namefilenamees      r?   cleanupr      sO    YYKq/IF	8 G  	ww&  !	s   ,
AAAc                    [         (       a+  [        U [        5      (       a  [        R                  " U 5      n [        U [        R                  5      (       d  [        S5      eU R                  (       d  SOU R                  nU[        ;  a  [        S5      eSU R                  5       ;   aJ  [        R                  " [        U R                  S5      nUR                  U SU R                  S5      5        Un Xl        X4$ )NzUnsupported image objectr    zUnsupported image format/typeA)   r   r   )r   r   )numpy_installed
isinstancer   r   	fromarrayrl   formatSUPPORTED_FORMATSgetbandsnewRGB_MODEsizepaste
getchannel)image	extension
backgrounds      r?   preparer      s    :eW55&eU[[))233"\\u||I))788
ennYYx_E
(8(8(=>Lr>   c           	   #     #     [        SSS9 n[        U [        5      (       aJ  UR                  [	        [        [        U 5      5      5      4v    S S S 5        [        UR                  5        g [        U 5      u  pUR                   S[         U 3nU R                  X0R                  S9  UR                  U4v   S S S 5        [        WR                  5        g ! , (       d  f       N$= f! [        WR                  5        f = f7f)Ntess_F)prefixdelete_input)r   )r   r   strnamer   r   r   r   r   r
   saver   )r   fr   input_file_names       r?   r   r      s     
wu=%%%ffhx'@AAA > 	  'u~E!"vhykBOJJ||J<&&/)) > 	 >= 	sG   D
C* ACC* D.AC;C* D
C'#C* *DDc                    [         R                  [         R                  S [        S.n[        [         S5      (       aV  [         R                  " 5       US'   US   =R
                  [         R                  -  sl        [         R                  US   l        U (       a  [         R                  US'   U$ [         R                  US'   U$ )N)rv   ry   startupinfoenvSTARTUPINFOr   rx   )
rz   PIPEr	   hasattrr   dwFlagsSTARTF_USESHOWWINDOWSW_HIDEwShowWindowDEVNULL)include_stdoutr   s     r?   subprocess_argsr      s    
 //	F z=)) * 6 6 8}}%%)H)HH%,6,>,>})%??x M &--xMr>    c                 r   / n[         R                  R                  S5      (       d  US:w  a  USS[        U5      4-  nU[        X4-  nUb  USU4-  nU(       a  U[
        R                  " U5      -  nU(       a  US;  a  UR                  U5         [        R                  " U40 [        5       D6n[        X5       n
UR                   (       a  [#        UR                   [%        U
5      5      e S S S 5        g ! [         a$  n	U	R                  [        :w  a  e [        5       eS n	A	ff = f! , (       d  f       g = f)Nwin32r   nicez-n-l>   boxosdtsvxml)sysplatform
startswithr   r[   shlexsplitappendrz   Popenr   r   r   r   rX   r   ro   rO   r   )input_filenameoutput_filename_baser   langconfigr   rs   cmd_argsr}   r   r   s              r?   run_tesseractr      s    H<<""7++	VT3t9--EEHT4L EKK''Y&BB	"+>O,=> 
	'<?? *\2JKK  
(	'  +77f(**	+ 
(	's$   C7 <1D(7
D%D  D%(
D6c           	         [        U 5       u  pxUUUUUUUS.n	[        S0 U	D6  U	S    [         U 3n
[        U
S5       nU(       a"  UR	                  5       sS S S 5        sS S S 5        $ UR	                  5       R                  [        5      sS S S 5        sS S S 5        $ ! , (       d  f       O= f S S S 5        g ! , (       d  f       g = f)N)r   r   r   r   r   r   rs   r   rbr4   )r   r   r
   openreadr   r   )r   r   r   r   r   rs   return_bytesr   r   r   r   output_files               r?   run_and_get_outputr     s     
e3,$-"
 	345fXi[I(D!["'') "! 
  ##%,,-=> "! 
 "!! 
s.   3B9B	B9)"B	B9
B,	(B99
Cc                 L   0 nU R                  5       R                  S5       Vs/ s H  oDR                  U5      PM     nn[        U5      S:  a  U$ UR                  S5      n[        U5      n[        US   5      U:  a  US   R	                  S5        US:  a  X'-  n[        U5       H_  u  p[        5       X9'   U HH  n[        U5      U::  a  M  X:w  a   [        [        XH   5      5      n
OXH   n
X9   R	                  U
5        MJ     Ma     U$ s  snf ! [         a    XH   n
 N2f = f)N
   r   rt   r   )
r   r   lenpopr   	enumeratelistintfloat
ValueError)r   cell_delimiterstr_col_idxresultrowrowsheaderlengthiheadvals              r?   file_to_dictr   (  s   F141B1B41HI1H#IIn%1HDI
4y1}XXa[F[F
48}v 	RQV$vC3x1}!eCFm,C fL$  %  M= J. " !&C!s   DDD#"D#c                     U[         L a  U R                  5       $ U[        L a   [        U 5        gg! [         a     gf = f)NTF)r   isdigitr   r   )r   _types     r?   is_validr   K  sD    |{{}~	#J   		s   1 
>>c           	         S U R                  S5       5        Vs0 s H]  n[        U5      S:X  d  M  [        US   [        US      S   5      (       d  M7  [        US      S   [        US      S   " US   5      _M_     sn$ s  snf )Nc              3   B   #    U  H  oR                  S 5      v   M     g7f): N)r   r   s     r?   r   osd_to_dict.<locals>.<genexpr>\  s     @::d##s   r   r   ri   r   )r   r   r   OSD_KEYS)r   kvs     r?   osd_to_dictr  Y  s     A		$@@Br7a< 	6$RUHRUOA,>? 	6AHRUOA.r!u55@  s   BB*Bc                 2   [         S/nU (       a  U[        R                  " U 5      -  n [        R                  " U[        R
                  [        R                  S9nUR                  S;  a
  [        5       e/ nUR                  (       av  UR                  R                  [        5      R                  [        5       H@  nUR                  5       n[        R!                  U5      (       d  M/  UR#                  U5        MB     U$ ! [         a    [        5       ef = f)Nz--list-langs)rx   ry   )r   ri   )r[   r   r   rz   runr   STDOUTr   rX   ro   rx   r   r   r   r   LANG_PATTERNmatchr   )r   r   r   	languagesr   r   s         r?   get_languagesr  a  s    ~.HEKK'''??$$
 &$&&I}}MM(()9:@@ID::<D!!$''  & J
   '$&&'s   2D Dc                      [         R                  " [        S/[         R                  [        [         R
                  S9n U R                  [        5      nUR                  [        R                  SS 5      R                  S5      tp#UR                  S5      tp# [        U5      nU[        :  d   e U$ ! [         a    [        5       ef = f! [         ["        4 a    [%        SU S35      ef = f)	z1
Returns Version object of the Tesseract version
z	--version)ry   r   rv   
   Nr   -zInvalid tesseract version: "")rz   check_outputr[   r  r	   r   r   rX   r   r   lstripr3   	printable	partitionr   TESSERACT_MIN_VERSIONAssertionErrorr   
SystemExit)outputraw_versionstr_versionr   versions        r?   get_tesseract_versionr  ~  s    
'((K($$$$	
 -- 01K!(()9)9"#)>?II#NOK!++C0OKH$//// N  '$&&' N+ H7}AFGGHs   =B3 C 3C C+c                    ^ U SXX5/m[         R                  U4S j[         R                  U4S j[         R                  U4S j0U   " 5       $ )zK
Returns the result of a Tesseract OCR run on the provided image to string
txtc                     > [        T S/-   6 $ NTr   rU   s   r?   <lambda>!image_to_string.<locals>.<lambda>      044&=Br>   c                     > S[        T 6 0$ )Ntextr!  r"  s   r?   r#  r$    s    f&8$&?@r>   c                     > [        T 6 $ rR   r!  r"  s   r?   r#  r$        148r>   )r/   r9   r;   r<   r   r   r   r   output_typers   rU   s         @r?   image_to_stringr,    sL     5$6D 	B@8 	  r>   c                 F    US;  a  [        SU 35      eXXX5S/n[        U6 $ )zM
Returns the result of a Tesseract OCR run on the provided image to pdf/hocr
>   pdfhocrzUnsupported extension: T)r   r   )r   r   r   r   r   rs   rU   s          r?   image_to_pdf_or_hocrr0    s8     '29+>??dD4@Dt$$r>   c                 ~    [        5       [        :  a
  [        5       eSUR                  5        3nU SXX4S/n[	        U6 $ )zM
Returns the result of a Tesseract OCR run on the provided image to ALTO XML
z-c tessedit_create_alto=1 r   T)r  TESSERACT_ALTO_VERSIONrd   r   r   )r   r   r   r   rs   rU   s         r?   image_to_alto_xmlr3    sG     !77  )&,,.)9:F5$t<Dt$$r>   c                    ^ UR                  5        S3nU SXX5/m[        R                  U4S j[        R                  U4S j[        R                  U4S j0U   " 5       $ )zJ
Returns string containing recognized characters and their box boundaries
z batch.nochop makeboxr   c                     > [        T S/-   6 $ r   r!  r"  s   r?   r#   image_to_boxes.<locals>.<lambda>  r%  r>   c                  0   > [        S[        T 6  3SS5      $ )Nz char left bottom right top page
r   r   r   r   r"  s   r?   r#  r6    s!    \/0BD0I/JK
r>   c                     > [        T 6 $ rR   r!  r"  s   r?   r#  r6    r)  r>   r   r/   r9   r;   r<   r*  s         @r?   image_to_boxesr;    sd     45F5$6D 	B 

 	8   r>   c                     [         (       d
  [        5       e[        SS.n UR                  U5        [        R                  " [        [        U 6 5      40 UD6$ ! [        [
        4 a     N8f = f)N	)quotingsep)
pandas_installedrA   r   updaterl   r   pdread_csvr   r   )rU   r   r   s      r?   get_pandas_outputrD    sf     ""#D1Ff ;;w1489DVDD z" s   A A*)A*c           	      &  ^^ [        5       [        :  a
  [        5       eSUR                  5        3nU SXX5/m[        R
                  U4S j[        R                  UU4S j[        R                  U4S j[        R                  U4S j0U   " 5       $ )zh
Returns string containing box boundaries, confidences,
and other information. Requires Tesseract 3.05+
z-c tessedit_create_tsv=1 r   c                     > [        T S/-   6 $ r   r!  r"  s   r?   r#  image_to_data.<locals>.<lambda>  r%  r>   c                  $   > [        T S/-   T5      $ r   )rD  )rU   pandas_configs   r?   r#  rG    s    "3D6M#
r>   c                  *   > [        [        T 6 SS5      $ )Nr=  rt   r8  r"  s   r?   r#  rG    s    \*<d*CT2Nr>   c                     > [        T 6 $ rR   r!  r"  s   r?   r#  rG    r)  r>   )	r  r  r^   r   r/   r9   r:   r;   r<   )r   r   r   r   r+  rs   rI  rU   s         `@r?   image_to_datarL    s     !66((89F5$6D 	B 
 	N8   r>   r   c                    ^ SUR                  5        3nU SXX5/m[        R                  U4S j[        R                  U4S j[        R                  U4S j0U   " 5       $ )zF
Returns string containing the orientation and script detection (OSD)
z--psm 0 r   c                     > [        T S/-   6 $ r   r!  r"  s   r?   r#  image_to_osd.<locals>.<lambda>)  r%  r>   c                  &   > [        [        T 6 5      $ rR   )r  r   r"  s   r?   r#  rO  *  s    [);T)BCr>   c                     > [        T 6 $ rR   r!  r"  s   r?   r#  rO  +  r)  r>   r:  r*  s         @r?   image_to_osdrR    s_     '(F5$6D 	BC8 	  r>   c                     [        [        R                  5      S:X  a  [        R                  S   S pOs[        [        R                  5      S:X  a=  [        R                  S   S:X  a&  [        R                  S   [        R                  S   pO[        S[        R                  S9  g [
        R                  " U 5       n[        [        X!S95        S S S 5        g ! , (       d  f       g = f! [         a.  n[        [        U5       S	3[        R                  S9   S nAgS nAf[         a:  n[        [        U5      R                   S
U 3[        R                  S9   S nAgS nAff = f)Nr   ri      r      z(Usage: pytesseract [-l lang] input_file
)file)r   r   r  )r   r   argvprintry   r   r   r,  rX   r   r   typer5   )r   r   imgr   s       r?   mainr[  /  s    
388}!d$	SXX!	t 3!chhqk$9

KZZ!S/#12 "!!! Qm#**- a!!""QC(szz:sB   'C* =CC* 
C'#C* 'C* *
E$4$DE$*0EE$__main__rR   )T)r   r   r   )r   Nr   r   r   F)r   )Nr   r   r.  r   )Nr   r   r   )Zrer   r3   rz   r   
contextlibr   csvr   r   r   	functoolsr   globr   ior   osr	   r
   r   r   os.pathr   r   r   pkgutilr   tempfiler   timer   packaging.versionr   r   r   PILr   r[   r   r   r   r@  r   rB  r   compiler	  r   r   r   r   r   r  r  r2  r/   EnvironmentErrorrA   r|   rO   rX   r^   rd   rn   r   r   r   r   r   r   r   r   r   r   r   r  r  r  r<   r,  r0  r3  r;  rD  rL  rR  r[  r5   exitr4   r>   r?   <module>rm     s   	    
 %              '  , # %  g&d2x(4  zz+&  $,c2o159o'/    )  3) 3
&\ &
- 

& 

' 

"  $*  : 	
#LP 		
?: F 
 
8 
 
: 
	
, 
	
%* 
	
%, 
	
2
E 
	
B 
	
*( zL r>   