
    )i	>                        S r SSKrSSKrSSKrSSKrSSKrSSKrSSKrSSKJ	r	J
r
JrJrJrJrJrJrJrJrJrJrJr  SSKJr  SSKJr  SSKJr  SSKJr  SS	KJr  SS
KJr  SSKJr  SSKJr  SSK J!r!  SSK J"r"  \RF                  " \$5      r% " S S\&5      r' " S S5      r( " S S\(5      r) " S S\(5      r* " S S\*5      r+ " S S\(5      r, " S S\,5      r- " S S\)5      r. " S S \,5      r/ " S! S"\)5      r0 " S# S$\,5      r1 " S% S&5      r2 " S' S(\\   5      r3S)\\4   S*S4S+ jr5\$S,:X  a  \5" \Rl                  5        gg)-a  Adobe character mapping (CMap) support.

CMaps provide the mapping between character codes and Unicode
code-points to character ids (CIDs).

More information is available on the Adobe website:

  http://opensource.adobe.com/wiki/display/cmap/CMap+Resources

    N)AnyBinaryIODictIterableIteratorListMutableMappingOptionalTextIOTupleUnioncastSet   )name2unicode)KWD)PSEOF)	PSKeyword)	PSLiteral)PSStackParser)PSSyntaxError)literal_name)choplist)nunpackc                       \ rS rSrSrg)	CMapError1    N__name__
__module____qualname____firstlineno____static_attributes__r       O/var/www/html/land-doc-ocr/venv/lib/python3.13/site-packages/pdfminer/cmapdb.pyr   r   1   s    r%   r   c                       \ rS rSrSrS\SS4S jrS\4S jrS\	S	\SS4S
 jr
S\	S\SS4S jrS\S\\\\4   SS4S jrSS jrS\S\\   4S jrSrg)CMapBase5   r   kwargsreturnNc                 .    UR                  5       U l        g N)copyattrsselfr*   s     r&   __init__CMapBase.__init__9   s    28++-
r%   c                 @    U R                   R                  SS5      S:g  $ )NWModer   r/   getr1   s    r&   is_verticalCMapBase.is_vertical<   s    zz~~gq)Q..r%   kvc                      X R                   U'   g r-   r/   )r1   r;   r<   s      r&   set_attrCMapBase.set_attr?   s    

1r%   codecidc                     g r-   r   )r1   rA   rB   s      r&   add_code2cidCMapBase.add_code2cidB       r%   c                     g r-   r   r1   rB   rA   s      r&   add_cid2unichrCMapBase.add_cid2unichrE   rF   r%   c                     g r-   r   )r1   cmaps     r&   use_cmapCMapBase.use_cmapH   rF   r%   c                     [         er-   )NotImplementedError)r1   rA   s     r&   decodeCMapBase.decodeK   s    !!r%   r>   )rL   r(   r+   N)r    r!   r"   r#   debugobjectr2   boolr9   strr?   intrD   r   r   bytesrI   rM   r   rQ   r$   r   r%   r&   r(   r(   5   s    E@ @D @/T /# & T  3 4 # U9eS3H-I d "5 "Xc] "r%   r(   c            	           \ rS rSrS\\\4   SS4S jrS\4S jrS\	SS4S jr
S	\S\\   4S
 jr\R                  SS4S\S\\\\4      S	\\S4   SS4S jjrSrg)CMapO   r*   r+   Nc                 @    [         R                  " U 40 UD6  0 U l        g r-   )r(   r2   code2cidr0   s     r&   r2   CMap.__init__P   s    $)&)+-r%   c                 >    SU R                   R                  S5      -  $ )Nz
<CMap: %s>CMapNamer6   r8   s    r&   __repr__CMap.__repr__T   s    djjnnZ888r%   rL   c                    ^ [        U[        5      (       d   [        [        U5      5      5       eS[        [
        [        4   S[        [
        [        4   SS 4U4S jjmT" U R                  UR                  5        g )Ndstsrcr+   c                    > UR                  5        H.  u  p#[        U[        5      (       a  0 nX@U'   T" XC5        M*  X0U'   M0     g r-   )items
isinstancedict)rd   re   r;   r<   dr.   s        r&   r.   CMap.use_cmap.<locals>.copyZ   s<    ))+a&&+-AFJF &r%   )rh   rZ   rV   typer   rW   rT   r]   )r1   rL   r.   s     @r&   rM   CMap.use_cmapW   sc    $%%6s4:6%	d3;' 	d3;.? 	D 	 	T]]DMM*r%   rA   c              #   *  #    [         R                  SX5        U R                  n[        U5       H^  nX2;   aJ  X#   n[	        U[
        5      (       a  Uv   U R                  nM3  [        [        [
        [        4   U5      nMR  U R                  nM`     g 7f)Nzdecode: %r, %r)	logrS   r]   iterrh   rW   r   r   rT   )r1   rA   rj   ixs        r&   rQ   CMap.decodee   sp     		"D/MMdAvDa%%GAT#v+.2AMM s   BBr   outr]   .c           	      $   Uc  U R                   nSn[        UR                  5       5       Hb  u  pEX44-   n[        U[        5      (       a  UR                  SXe4-  5        M6  U R                  U[        [        [        [        4   U5      US9  Md     g )Nr   zcode %r = cid %d
)rt   r]   rA   )
r]   sortedrg   rh   rW   writedumpr   r   rT   )r1   rt   r]   rA   r;   r<   cs          r&   rx   	CMap.dumps   s|     }}HDX^^-.FQtA!S!!		.!78		cDc6k1BA,FQ	O /r%   r]   )r    r!   r"   r#   r   rV   rW   r2   ra   r(   rM   rX   r   rQ   sysstdoutr   r
   r   rT   r   rx   r$   r   r%   r&   rZ   rZ   O   s    .sCx .T .9# 9+X +$ +"5 "Xc] "  jj04 "	PP 4V,-P CHo	P
 
P Pr%   rZ   c                   0    \ rS rSrS\S\\S4   4S jrSrg)IdentityCMap   rA   r+   .c                 b    [        U5      S-  nU(       a  [        R                  " SU-  U5      $ g)N   z>%dHr   lenstructunpackr1   rA   ns      r&   rQ   IdentityCMap.decode   s*    IN==!T22r%   r   N	r    r!   r"   r#   rX   r   rW   rQ   r$   r   r%   r&   r   r          5 U38_ r%   r   c                   0    \ rS rSrS\S\\S4   4S jrSrg)IdentityCMapByte   rA   r+   .c                 \    [        U5      nU(       a  [        R                  " SU-  U5      $ g)Nz>%dBr   r   r   s      r&   rQ   IdentityCMapByte.decode   s&    I==!T22r%   r   Nr   r   r%   r&   r   r      r   r%   r   c                   |    \ rS rSrS\\\4   SS4S jrS\4S jrS\S\4S jr	\
R                  4S	\SS4S
 jjrSrg)
UnicodeMap   r*   r+   Nc                 @    [         R                  " U 40 UD6  0 U l        g r-   )r(   r2   
cid2unichrr0   s     r&   r2   UnicodeMap.__init__   s    $)&)*,r%   c                 >    SU R                   R                  S5      -  $ )Nz<UnicodeMap: %s>r`   r6   r8   s    r&   ra   UnicodeMap.__repr__   s    !DJJNN:$>>>r%   rB   c                 L    [         R                  SX5        U R                  U   $ )Nget_unichr: %r, %r)ro   rS   r   r1   rB   s     r&   
get_unichrUnicodeMap.get_unichr   s     		&2s##r%   rt   c                     [        U R                  R                  5       5       H  u  p#UR                  SX#4-  5        M     g )Nzcid %d = unicode %r
)rv   r   rg   rw   )r1   rt   r;   r<   s       r&   rx   UnicodeMap.dump   s4    T__2245FQII-67 6r%   r   )r    r!   r"   r#   r   rV   rW   r2   ra   r   r|   r}   r   rx   r$   r   r%   r&   r   r      s]    -sCx -T -?# ?$c $c $ "% 8 8 8 8r%   r   c                   &    \ rS rSrS\S\4S jrSrg)IdentityUnicodeMap   rB   r+   c                 D    [         R                  SX5        [        U5      $ )z+Interpret character id as unicode codepointr   )ro   rS   chrr   s     r&   r   IdentityUnicodeMap.get_unichr   s    		&23xr%   r   N)r    r!   r"   r#   rW   rV   r   r$   r   r%   r&   r   r      s    c c r%   r   c                   *    \ rS rSrS\S\SS4S jrSrg)FileCMap   rA   rB   r+   Nc                 f   [        U[        5      (       a  [        U[        5      (       d$   [        [        U5      [        U5      45      5       eU R                  nUS S  H<  n[        U5      nXS;   a!  [        [        [        [        4   X5   5      nM4  0 nXcU'   UnM>     [        US   5      nX#U'   g )N)	rh   rV   rW   rl   r]   ordr   r   rT   )r1   rA   rB   rj   ry   cits          r&   rD   FileCMap.add_code2cid   s    $$$C)=)= 	
s$Zc#@
 	
= MMcrAQBwc6k*AE2')"  b]"r%   r   )r    r!   r"   r#   rV   rW   rD   r$   r   r%   r&   r   r      s     3 4 r%   r   c                   6    \ rS rSrS\S\\\\4   SS4S jrSr	g)FileUnicodeMap   rB   rA   r+   Nc                    [        U[        5      (       d   [        [        U5      5      5       e[        U[        5      (       aD  [        UR
                  [        5      (       d   e[        UR
                  5      U R                  U'   g [        U[        5      (       a   UR                  SS5      U R                  U'   g [        U[        5      (       a  [        U5      U R                  U'   g [        U5      e)NzUTF-16BEignore)rh   rW   rV   rl   r   namer   r   rX   rQ   r   	TypeErrorrH   s      r&   rI   FileUnicodeMap.add_cid2unichr   s    #s##3Sc^3#dI&&dii----#/		#:DOOC e$$#';;z8#DDOOC c""#&t9DOOC D/!r%   r   )
r    r!   r"   r#   rW   r   r   rX   rI   r$   r   r%   r&   r   r      s(    "# "U9eS3H-I "d "r%   r   c                   8   ^  \ rS rSrS\S\SS4U 4S jjrSrU =r$ )PyCMap   r   moduler+   Nc                    > [         TU ]  US9  UR                  U l        UR                  (       a  SU R
                  S'   g g N)r`   r   r5   )superr2   CODE2CIDr]   IS_VERTICALr/   )r1   r   r   	__class__s      r&   r2   PyCMap.__init__   s:    $'"#DJJw r%   r{   )	r    r!   r"   r#   rV   r   r2   r$   __classcell__r   s   @r&   r   r      s"    $S $# $$ $ $r%   r   c                   <   ^  \ rS rSrS\S\S\SS4U 4S jjrSrU =r	$ )	PyUnicodeMap   r   r   verticalr+   Nc                    > [         TU ]  US9  U(       a!  UR                  U l        SU R                  S'   g UR
                  U l        g r   )r   r2   CID2UNICHR_Vr   r/   CID2UNICHR_H)r1   r   r   r   r   s       r&   r2   PyUnicodeMap.__init__   s>    $'$11DO"#DJJw$11DOr%   r   )
r    r!   r"   r#   rV   r   rU   r2   r$   r   r   s   @r&   r   r      s)    2S 2# 2 2$ 2 2r%   r   c                       \ rS rSr% 0 r\\\4   \S'   0 r	\\\
\   4   \S'    " S S\5      r\S\S\4S j5       r\S\S\4S	 j5       r\SS\S
\S\4S jj5       rSrg)CMapDB   _cmap_cache_umap_cachec                       \ rS rSrSrg)CMapDB.CMapNotFound   r   Nr   r   r%   r&   CMapNotFoundr      s    r%   r   r   r+   c           	         UR                  SS5      nSU-  n[        R                  SU5        [        R                  R                  SS5      [        R                  R                  [        R                  R                  [        5      S5      4nU H  n[        R                  R                  XB5      n[        R                  R                  U5      (       d  MH  [        R                  " U5      n [        [        U5      S[        R                   " UR#                  5       5      5      UR%                  5         s  $    [&        R)                  U5      e! UR%                  5         f = f)	N  z%s.pickle.gzzloading: %r	CMAP_PATHz/usr/share/pdfminer/rL   r   )replacero   rS   osenvironr7   pathjoindirname__file__existsgzipopenrl   rV   pickleloadsreadcloser   r   )clsr   filename
cmap_paths	directoryr   gzfiles          r&   
_load_dataCMapDB._load_data   s    ||D"%!D(		-&JJNN;(>?GGLL2F;

 $I77<<	4Dww~~d##4#D	2v||FKKM/JKLLN $ %%d++ LLNs   07EE#c                    US:X  a	  [        SS9$ US:X  a	  [        SS9$ US:X  a	  [        SS9$ US:X  a	  [        SS9$  U R                  U   $ ! [         a     Of = fU R	                  U5      n[        X5      =U R                  U'   nU$ )Nz
Identity-Hr   )r5   z
Identity-Vr   OneByteIdentityHOneByteIdentityV)r   r   r   KeyErrorr   r   )r   r   datarL   s       r&   get_cmapCMapDB.get_cmap   s    <a((\!a((''#!,,''#!,,	??4(( 		~~d#'-d'99s   A 
AAr   c                      U R                   U   U   $ ! [         a     Of = fU R                  SU-  5      nS Vs/ s H  n[        XU5      PM     Os  snf snU R                   U'   U R                   U   U   $ )Nzto-unicode-%s)FT)r   r   r   r   )r   r   r   r   r<   s        r&   get_unicode_mapCMapDB.get_unicode_map  sx    	??4(22 		~~o45FS Tmd!!<m Tt$X..s    
!!Ar   N)F)r    r!   r"   r#   r   r   rV   r   __annotations__r   r   r   r   r   classmethodr   r   r(   r   rU   r   r   r$   r   r%   r&   r   r      s    %'Kc6k"'13Kc4--.3y  ,c ,c , ,& C H  " /3 /$ /: / /r%   r   c                   \   \ rS rSrS\S\SS4S jrSS jr\" S5      r	\" S	5      r
\" S
5      r\" S5      r\" S5      r\" S5      r\" S5      r\" S5      r\" S5      r\" S5      r\" S5      r\" S5      r\" S5      r\" S5      r\" S5      r\" S5      rS\S\SS4S jrS\SS4S jrSrg)
CMapParseri  rL   fpr+   Nc                 h    [         R                  " X5        Xl        SU l        [	        5       U l        g )NT)r   r2   rL   _in_cmapset	_warnings)r1   rL   r   s      r&   r2   CMapParser.__init__  s(    t(	#&5r%   c                 F     U R                  5         g ! [         a     g f = fr-   )
nextobjectr   r8   s    r&   runCMapParser.run$  s-    	OO 	  		s    
  s	   begincmaps   endcmaps   usecmaps   defs   begincodespaceranges   endcodespaceranges   begincidranges   endcidranges   begincidchars
   endcidchars   beginbfranges
   endbfranges   beginbfchars	   endbfchars   beginnotdefranges   endnotdefrangepostokenc                    X R                   L a  SU l        U R                  5         gX R                  L a  SU l        gU R                  (       d  gX R                  L a?   U R                  S5      u  u  p4u  p5U R                  R                  [        U5      U5        gX R                  L aN   U R                  S5      u  u  p6U R                  R                  [        R                  [        U5      5      5        gX R                  L a  U R                  5         gX R                   L a  U R                  5         gX R"                  L a  U R                  5         gX R$                  L Ga  U R                  5        VVs/ s H  u  pxUPM	     n	nn['        SU	5       GHO  u  pn[)        U
[*        5      (       d  U R-                  S5        M/  [)        U[*        5      (       d  U R-                  S5        MW  [)        U[.        5      (       d  U R-                  S	5        M  [1        U
5      [1        U5      :w  a  U R-                  S
5        M  U
SS nUSS nX:w  a  U R-                  S5        M  U
SS nUSS n[3        U5      n[3        U5      n[1        U5      n[5        UU-
  S-   5       HC  nU[6        R8                  " SUU-   5      U* S -   nU R                  R;                  UU-   U5        ME     GMR     gX R<                  L a  U R                  5         gX R>                  L a  U R                  5        VVs/ s H  u  pxUPM	     n	nn['        SU	5       HP  u  nn[)        U[*        5      (       d  M  [)        U[.        5      (       d  M4  U R                  R;                  UU5        MR     gX R@                  L a  U R                  5         gX RB                  L Ga  U R                  5        VVs/ s H  u  pxUPM	     n	nn['        SU	5       GH  u  pn[)        U
[*        5      (       d  U R-                  S5        M/  [)        U[*        5      (       d  U R-                  S5        MW  [1        U
5      [1        U5      :w  a  U R-                  S5        M  [3        U
5      n[3        U5      n[)        U[D        5      (       ah  [1        U5      UU-
  S-   :w  a  U R-                  S5        [G        [5        UUS-   5      U5       H"  u  nnU R                  R;                  UU5        M$     GM  [)        U[*        5      (       d   eUSS n[3        U5      nUSS n[1        U5      n[5        UU-
  S-   5       HC  nU[6        R8                  " SUU-   5      U* S -   nU R                  R;                  UU-   U5        ME     GM     gX RH                  L a  U R                  5         gX RJ                  L a  U R                  5        VVs/ s H  u  pxUPM	     n	nn['        SU	5       HY  u  nn[)        U[*        5      (       d  M  [)        U[*        5      (       d  M4  U R                  R;                  [3        U5      U5        M[     gX RL                  L a  U R                  5         gX RN                  L a  U R                  5         gU RQ                  X45        g! [         a     gf = f! [         a     g[        R                   a     gf = fs  snnf s  snnf s  snnf s  snnf )zKToUnicode CMaps

See Section 5.9.2 - ToUnicode CMaps of the PDF Reference.
TNFr   r      z0The start object of begincidrange is not a byte.z.The end object of begincidrange is not a byte.z.The cid object of begincidrange is not a byte.z?The start and end byte of begincidrange have different lengths.zGThe prefix of the start and end byte of begincidrange are not the same.z>LzThe start object is not a byte.zThe end object is not a byte.z.The start and end byte have different lengths.zPThe difference between the start and end offsets does not match the code length.))KEYWORD_BEGINCMAPr   popallKEYWORD_ENDCMAPKEYWORD_DEFpoprL   r?   r   r   KEYWORD_USECMAPrM   r   r   r   KEYWORD_BEGINCODESPACERANGEKEYWORD_ENDCODESPACERANGEKEYWORD_BEGINCIDRANGEKEYWORD_ENDCIDRANGEr   rh   rX   
_warn_oncerW   r   r   ranger   packrI   KEYWORD_BEGINCIDCHARKEYWORD_ENDCIDCHARKEYWORD_BEGINBFRANGEKEYWORD_ENDBFRANGElistzipKEYWORD_BEGINBFCHARKEYWORD_ENDBFCHARKEYWORD_BEGINNOTDEFRANGEKEYWORD_ENDNOTDEFRANGEpush)r1   r  r  _r;   r<   cmapname__objobjs
start_byteend_byterB   start_prefix
end_prefixsvarevarstartendvlenrq   rr   rA   unicode_valuevarbaseprefixs                              r&   
do_keywordCMapParser.do_keyword<  s   
 *** DMKKM***!DM}}$$$#'88A; !!		""<?A6 (((#'88A; !		""6??<3I#JK
 444KKM222KKM...KKM,,,)-7IRCD7/74/@+s!*e44OO$VW!(E22OO$TU!#s++OO$TUz?c(m3OO- )#2%cr]
-OO: !"#}dm4ysU{Q/A$v{{4'CTEF'KKAII,,S1Wa8 0; 0A@ ---KKM+++)-7IRCD7'40ddE**z#s/C/CII,,S$7  1 ---KKM+++)-7IRCD708D0A,t!*e44OO$EF!(E22OO$CDz?c(m3OO$TU
+h'dD))4yC%K!O3F /2%sQw2G.N*]		00mD /O &dE2222rs)C"3<D!#2YFs8D"3;?3"V[[tax%@$%HH		00A> 45 1B: ,,,KKM***)-7IRCD7'40dc5))ju.E.EII,,WS\4@  1 111KKM///KKM		3,A !  !   &&   8P 8 8J 8sC   =Z/ +AZ? 7[$)[*[0#[6/
Z<;Z<?
[![! [!msgc                     XR                   ;  a5  U R                   R                  U5        Sn[        R                  X!-   5        gg)z!Warn once for each unique messagezIgnoring (part of) ToUnicode map because the PDF data does not conform to the format. This could result in (cid) values in the output. N)r  addro   warning)r1   r8  base_msgs      r&   r  CMapParser._warn_once  s=    nn$NNs#/ 
 KK' %r%   )r   r  rL   )r+   N)r    r!   r"   r#   r(   r   r2   r  r   r  r  r  r  r  r  r  r  r  r  r  r  r  r   r!  r"  rW   r   r6  rV   r  r$   r   r%   r&   r   r     s   X 8   L)*oO*oOf+K"%&<"= #$8 9 01n-/]+/]+n-L)"#67 !23U c U ) U  U n	(c 	(d 	(r%   r   argvr+   c                     SSK Jn  U" S[        5        U SS  nU HR  n[        US5      n[	        5       n[        XT5      R                  5         UR                  5         UR                  5         MT     g )Nr   )warnzThe function main() from cmapdb.py will be removed in 2023. It was probably introduced for testing purposes a long time ago, and no longer relevant. Feel free to create a GitHub issue if you disagree.r   rb)	warningsr@  DeprecationWarningr   r   r   r  r   rx   )r>  r@  argsfnamer   rL   s         r&   mainrF    sj    	> 		 8D%4  "

		  r%   __main__)7__doc__r   loggingr   os.pathr   r   r|   typingr   r   r   r   r   r   r	   r
   r   r   r   r   r   
encodingdbr   psparserr   r   r   r   r   r   r   utilsr   r   	getLoggerr    ro   	Exceptionr   r(   rZ   r   r   r   r   r   r   r   r   r   r   rV   rF  r>  r   r%   r&   <module>rQ     s:  	   	    
     %     # # "  !		 	" "42P8 2Pj8 | 8 8" t $"Z " $T $2: 26/ 6/rA(y) A(HtCy T ( zN r%   