
    k*i                        S SK Jr  S SKJr  S SKJrJr  S rS rS r	S r
SSS	.S
 jrSSS	.S jrSSS	.S jrSSS	.S jrg)    )annotations)conv_sequences)is_nonesetupPandasc                J    US-  nSnXBU -  -  nXBU-  -  nXBU-
  U-  -  nUS-  $ )N           g      @ )pattern_lentext_lencommon_charstranspositionssims        Z/var/www/html/land-doc-ocr/venv/lib/python3.13/site-packages/rapidfuzz/distance/Jaro_py.py_jaro_calculate_similarityr   	   sG    qN
C+%%C(""C>)\99C9    c                T    U (       a  U(       d  g[        X[        X5      S5      nX2:  $ )z;
filter matches below score_cutoff based on string lengths
Fr   )r   min)r   r   score_cutoffr   s       r   _jaro_length_filterr      s)     h
$[C<VXY
ZCr   c                4    U(       d  g[        XUS5      nXC:  $ )zQ
filter matches below score_cutoff based on string lengths and common characters
Fr   )r   )r   r   r   r   r   s        r   _jaro_common_char_filterr      s!     
$[L!
LCr   c                    [        U 5      n[        U5      nSnX2:  a  US-  S-
  nX2U-   :  a  USX$-    nOUS-  S-
  nX#U-   :  a  U SX4-    n XU4$ )z:
find bounds and skip out of bound parts of the sequences
r   r      N)len)s1s2r   r   bounds        r   _jaro_boundsr   (   s     b'K2wH EA!E)))k)*Bq 1$E))&h&'B5=r   N	processorr   c               H   [        5         [        U 5      (       d  [        U5      (       a  gUb  U" U 5      n U" U5      nU (       d  U(       d  gUc  Sn[        X5      u  p[        U 5      n[        U5      n[	        XEU5      (       d  gUS:X  a  US:X  a  [        U S   US   :H  5      $ [        X5      u  pnS/U-  nS/U-  nSn	[        U 5       H\  u  p[        SX-
  5      n[        X-   US-
  5      n[        XS-   5       H&  nX   (       a  M  X   U:X  d  M  S=Xz'   X'   U	S-  n	  MZ     M^     [        XEX5      (       d  gS=nn[        U5       HC  u  n
nU(       d  M  [        X5       H  nX   (       d  M  US-   n  O   X
   UW   :w  d  M>  US-  nME     [        XEU	U5      $ )a  
Calculates the jaro similarity

Parameters
----------
s1 : Sequence[Hashable]
    First string to compare.
s2 : Sequence[Hashable]
    Second string to compare.
processor: callable, optional
    Optional callable that is used to preprocess the strings before
    comparing them. Default is None, which deactivates this behaviour.
score_cutoff : float, optional
    Optional argument for a score threshold as a float between 0 and 1.0.
    For ratio < score_cutoff 0 is returned instead. Default is None,
    which deactivates this behaviour.

Returns
-------
similarity : float
    similarity between s1 and s2 as a float between 0 and 1.0
r	         ?r   r   FT)r   r   r   r   r   floatr   	enumeratemaxr   ranger   r   )r   r   r!   r   r   r   r   s1_flagss2_flagsr   is1_chlowhijktrans_counts1_fs                     r   
similarityr2   =   s   : Mr{{gbkkr]r]bB#FBb'K2wH {lCCaHMRUbe^$$ (MBEw$Hw!H LbM!QYHqL)sF#A;;25E>,00hk!	 $ " $K<VV AX&441';;AA ( u1~q  ' &k\;WWr   c                   [        XX#S9$ )a  
Calculates the normalized jaro similarity

Parameters
----------
s1 : Sequence[Hashable]
    First string to compare.
s2 : Sequence[Hashable]
    Second string to compare.
processor: callable, optional
    Optional callable that is used to preprocess the strings before
    comparing them. Default is None, which deactivates this behaviour.
score_cutoff : float, optional
    Optional argument for a score threshold as a float between 0 and 1.0.
    For ratio < score_cutoff 0 is returned instead. Default is None,
    which deactivates this behaviour.

Returns
-------
normalized similarity : float
    normalized similarity between s1 and s2 as a float between 0 and 1.0
r    )r2   r   r   r!   r   s       r   normalized_similarityr5      s    : b	MMr   c                   [        5         [        U 5      (       d  [        U5      (       a  gUb  U" U 5      n U" U5      nUb  US:  a  SOSU-
  n[        XUS9nSU-
  nUb  Xc::  a  U$ S$ )a}  
Calculates the jaro distance

Parameters
----------
s1 : Sequence[Hashable]
    First string to compare.
s2 : Sequence[Hashable]
    Second string to compare.
processor: callable, optional
    Optional callable that is used to preprocess the strings before
    comparing them. Default is None, which deactivates this behaviour.
score_cutoff : float, optional
    Optional argument for a score threshold as a float between 0 and 1.0.
    For ratio < score_cutoff 0 is returned instead. Default is None,
    which deactivates this behaviour.

Returns
-------
distance : float
    distance between s1 and s2 as a float between 1.0 and 0.0
r#   N)r   )r   r   r2   )r   r   r!   r   cutoff_distancer   dists          r   distancer9      sy    : Mr{{gbkkr]r]+3|c7IdPSVbPbO
R/
:C9D (D,@4JsJr   c                   [        XX#S9$ )a  
Calculates the normalized jaro distance

Parameters
----------
s1 : Sequence[Hashable]
    First string to compare.
s2 : Sequence[Hashable]
    Second string to compare.
processor: callable, optional
    Optional callable that is used to preprocess the strings before
    comparing them. Default is None, which deactivates this behaviour.
score_cutoff : float, optional
    Optional argument for a score threshold as a float between 0 and 1.0.
    For ratio < score_cutoff 0 is returned instead. Default is None,
    which deactivates this behaviour.

Returns
-------
normalized distance : float
    normalized distance between s1 and s2 as a float between 1.0 and 0.0
r    )r9   r4   s       r   normalized_distancer;      s    : BiKKr   )
__future__r   rapidfuzz._common_pyr   rapidfuzz._utilsr   r   r   r   r   r   r2   r5   r9   r;   r
   r   r   <module>r?      s`    # / 12 WX| NH (K^ Lr   