
    k*iB                        S SK Jr  S SKJrJr  S SKJrJr  S SKJ	r
  S SKJrJr  S rS rS rS	S
S
S
S.S jrS	S
S
S
S.S jrS	S
S
S
S.S jrS	S
S
S
S.S jrS rS
S
S.S jrS
S
S.S jrg
)    )annotations)common_affixconv_sequences)is_nonesetupPandas)Indel_py)EditopEditopsc                    [        U 5      n[        U5      nUu  pVnX6-  XE-  -   nX4:  a  [        XU-  X4-
  U-  -   5      nU$ [        XU-  XC-
  U-  -   5      nU$ )N)lenmin)	s1s2weightslen1len2insertdeletereplacemax_dists	            a/var/www/html/land-doc-ocr/venv/lib/python3.13/site-packages/rapidfuzz/distance/Levenshtein_py.py_levenshtein_maximumr      ss    r7Dr7D%FG}t},H|x4;&2H!HI O x4;&2H!HIO    c                &   [        U 5      nUu  pEn[        [        SUS-   U-  U5      5      nU H[  nUS   n	US==   U-  ss'   [        U5       H7  n
U	nX
   U:w  a  [        Xz   U-   XzS-      U-   X-   5      nXzS-      n	XU
S-   '   M9     M]     US   $ )Nr      )r   listranger   )r   r   r   r   r   r   r   cachech2tempixs               r   _uniform_genericr$      s    r7D%FGq4!8v-v67EQxaFtAAu|6)5Q<&+@$.QQ<D!a%L   9r   c                   U (       d  [        U5      $ S[        U 5      -  S-
  nSn[        U 5      nS[        U 5      S-
  -  n0 nUR                  nSnU  H  n	U" U	S5      U-  Xi'   US-  nM     U HW  n
U" U
S5      nUnX-  U-   U-  U-  U-  nX=U-  ) -  nX-  nXNU-  S:g  -  nXOU-  S:g  -  nUS-  S-  nUS-  nXU-  ) -  nX-  nMY     U$ Nr   r   )r   get)r   r   VPVNcurrDistmaskblock	block_getr#   ch1r    PM_jXD0HPHNs                   r   _uniform_distancer4   ,   s   2w
s2w,!	B	
B2wHR1DE		I	AsA&*
	a  a "}"a'",G*_W$Y1$$$Y1$$Ag]1WG*_W " Or   r   r   r   N)r   	processorscore_cutoff
score_hintc                   UnUb  U" U 5      n U" U5      n[        X5      u  pUb  US:X  a  [        X5      nO)US:X  a  [        R                  " X5      nO[	        XU5      nUb  Xt::  a  U$ US-   $ )a  
Calculates the minimum number of insertions, deletions, and substitutions
required to change one sequence into the other according to Levenshtein with custom
costs for insertion, deletion and substitution

Parameters
----------
s1 : Sequence[Hashable]
    First string to compare.
s2 : Sequence[Hashable]
    Second string to compare.
weights : tuple[int, int, int] or None, optional
    The weights for the three operations in the form
    (insertion, deletion, substitution). Default is (1, 1, 1),
    which gives all three operations a weight of 1.
processor : callable, optional
    Optional callable that is used to preprocess the strings before
    comparing them. Default is None, which deactivates this behaviour.
score_cutoff : int, optional
    Maximum distance between s1 and s2, that is
    considered as a result. If the distance is bigger than score_cutoff,
    score_cutoff + 1 is returned instead. Default is None, which deactivates
    this behaviour.
score_hint : int, optional
    Expected distance between s1 and s2. This is used to select a
    faster implementation. Default is None, which deactivates this behaviour.

Returns
-------
distance : int
    distance between s1 and s2

Raises
------
ValueError
    If unsupported weights are provided a ValueError is thrown

Examples
--------
Find the Levenshtein distance between two strings:

>>> from rapidfuzz.distance import Levenshtein
>>> Levenshtein.distance("lewenstein", "levenshtein")
2

Setting a maximum distance allows the implementation to select
a more efficient implementation:

>>> Levenshtein.distance("lewenstein", "levenshtein", score_cutoff=1)
2

It is possible to select different weights by passing a `weight`
tuple.

>>> Levenshtein.distance("lewenstein", "levenshtein", weights=(1,1,2))
3
r5   )r   r      r   )r   r4   Indeldistancer$   )r   r   r   r6   r7   r8   _dists           r   r<   r<   P   s    D 	Ar]r]B#FB'Y. (	I	~~b%0 (D,@4W|VWGWWr   c                   UnUb  U" U 5      n U" U5      n[        X5      u  pU=(       d    Sn[        XU5      n[        XUS9nXx-
  n	Ub  X:  a  U	$ S$ )aO  
Calculates the levenshtein similarity in the range [max, 0] using custom
costs for insertion, deletion and substitution.

This is calculated as ``max - distance``, where max is the maximal possible
Levenshtein distance given the lengths of the sequences s1/s2 and the weights.

Parameters
----------
s1 : Sequence[Hashable]
    First string to compare.
s2 : Sequence[Hashable]
    Second string to compare.
weights : tuple[int, int, int] or None, optional
    The weights for the three operations in the form
    (insertion, deletion, substitution). Default is (1, 1, 1),
    which gives all three operations a weight of 1.
processor : callable, optional
    Optional callable that is used to preprocess the strings before
    comparing them. Default is None, which deactivates this behaviour.
score_cutoff : int, optional
    Maximum distance between s1 and s2, that is
    considered as a result. If the similarity is smaller than score_cutoff,
    0 is returned instead. Default is None, which deactivates
    this behaviour.
score_hint : int, optional
    Expected similarity between s1 and s2. This is used to select a
    faster implementation. Default is None, which deactivates this behaviour.

Returns
-------
similarity : int
    similarity between s1 and s2

Raises
------
ValueError
    If unsupported weights are provided a ValueError is thrown
r5   r   r   )r   r   r<   )
r   r   r   r6   r7   r8   r=   maximumr>   sims
             r   
similarityrC      sl    ` 	Ar]r]B#FB"G"273GBG,D
.C'3+>3FQFr   c                  Un[        5         [        U 5      (       d  [        U5      (       a  gUb  U" U 5      n U" U5      n[        X5      u  pU=(       d    Sn[        XU5      n[	        XUS9nU(       a  X-  OSn	Ub  X::  a  U	$ S$ )aw  
Calculates a normalized levenshtein distance in the range [1, 0] using custom
costs for insertion, deletion and substitution.

This is calculated as ``distance / max``, where max is the maximal possible
Levenshtein distance given the lengths of the sequences s1/s2 and the weights.

Parameters
----------
s1 : Sequence[Hashable]
    First string to compare.
s2 : Sequence[Hashable]
    Second string to compare.
weights : tuple[int, int, int] or None, optional
    The weights for the three operations in the form
    (insertion, deletion, substitution). Default is (1, 1, 1),
    which gives all three operations a weight of 1.
processor : callable, optional
    Optional callable that is used to preprocess the strings before
    comparing them. Default is None, which deactivates this behaviour.
score_cutoff : float, optional
    Optional argument for a score threshold as a float between 0 and 1.0.
    For norm_dist > score_cutoff 1.0 is returned instead. Default is None,
    which deactivates this behaviour.
score_hint : float, optional
    Expected normalized distance between s1 and s2. This is used to select a
    faster implementation. Default is None, which deactivates this behaviour.

Returns
-------
norm_dist : float
    normalized distance between s1 and s2 as a float between 1.0 and 0.0

Raises
------
ValueError
    If unsupported weights are provided a ValueError is thrown
      ?r5   r@   r   r   )r   r   r   r   r<   )
r   r   r   r6   r7   r8   r=   rA   r>   	norm_dists
             r   normalized_distancerG      s    ^ 	AMr{{gbkkr]r]B#FB"G"273GBG,D")qI%-1J9RQRRr   c                   Un[        5         [        U 5      (       d  [        U5      (       a  gUb  U" U 5      n U" U5      n[        X5      u  pU=(       d    Sn[        XUS9nSU-
  nUb  X:  a  U$ S$ )a  
Calculates a normalized levenshtein similarity in the range [0, 1] using custom
costs for insertion, deletion and substitution.

This is calculated as ``1 - normalized_distance``

Parameters
----------
s1 : Sequence[Hashable]
    First string to compare.
s2 : Sequence[Hashable]
    Second string to compare.
weights : tuple[int, int, int] or None, optional
    The weights for the three operations in the form
    (insertion, deletion, substitution). Default is (1, 1, 1),
    which gives all three operations a weight of 1.
processor : callable, optional
    Optional callable that is used to preprocess the strings before
    comparing them. Default is None, which deactivates this behaviour.
score_cutoff : float, optional
    Optional argument for a score threshold as a float between 0 and 1.0.
    For norm_sim < score_cutoff 0 is returned instead. Default is None,
    which deactivates this behaviour.
score_hint : int, optional
    Expected normalized similarity between s1 and s2. This is used to select a
    faster implementation. Default is None, which deactivates this behaviour.

Returns
-------
norm_sim : float
    normalized similarity between s1 and s2 as a float between 0 and 1.0

Raises
------
ValueError
    If unsupported weights are provided a ValueError is thrown

Examples
--------
Find the normalized Levenshtein similarity between two strings:

>>> from rapidfuzz.distance import Levenshtein
>>> Levenshtein.normalized_similarity("lewenstein", "levenshtein")
0.81818181818181

Setting a score_cutoff allows the implementation to select
a more efficient implementation:

>>> Levenshtein.normalized_similarity("lewenstein", "levenshtein", score_cutoff=0.85)
0.0

It is possible to select different weights by passing a `weight`
tuple.

>>> Levenshtein.normalized_similarity("lewenstein", "levenshtein", weights=(1,1,2))
0.85714285714285

When a different processor is used s1 and s2 do not have to be strings

>>> Levenshtein.normalized_similarity(["lewenstein"], ["levenshtein"], processor=lambda s: s[0])
0.81818181818181
g        r5   r@   rE   r   )r   r   r   rG   )	r   r   r   r6   r7   r8   r=   rF   norm_sims	            r   normalized_similarityrJ     s{    N 	AMr{{gbkkr]r]B#FB"G#BG<IYH$,0H8PqPr   c                   U (       d  [        U5      / / 4$ S[        U 5      -  S-
  nSn[        U 5      nS[        U 5      S-
  -  n0 nUR                  nSnU  H  n	U" U	S5      U-  Xi'   US-  nM     / n
/ nU H}  nU" US5      nUnX-  U-   U-  U-  U-  nX?U-  ) -  nX-  nUUU-  S:g  -  nUUU-  S:g  -  nUS-  S-  nUS-  nUUU-  ) -  nUU-  nU
R                  U5        UR                  U5        M     XJU4$ r&   )r   r'   append)r   r   r(   r)   r*   r+   r,   r-   r#   r.   	matrix_VP	matrix_VNr    r/   r0   r1   r2   r3   s                     r   _matrixrO   v  sS   BR  
s2w,!	B	
B2wHR1DE		I	AsA&*
	a  IIa "}"a'",G*_WR$Y1$$R$Y1$$Ag]1WBG*_"W% ( ++r   r6   r8   c                  UnUb  U" U 5      n U" U5      n[        X5      u  p[        X5      u  pVX[        U 5      U-
   n X[        U5      U-
   n[        X5      u  pxn	[	        / SS5      n
[        U 5      U-   U-   U
l        [        U5      U-   U-   U
l        US:X  a  U
$ S/U-  n[        U 5      n[        U5      nUS:w  a  US:w  a  XS-
     SUS-
  -  -  (       a  US-  nUS-  n[        SX-   X-   5      X'   OaUS-  nU(       a.  XS-
     SUS-
  -  -  (       a  US-  n[        SX-   X-   5      X'   O'US-  nX   X   :w  a  US-  n[        SX-   X-   5      X'   US:w  a  US:w  a  M  US:w  a%  US-  nUS-  n[        SX-   X-   5      X'   US:w  a  M%  US:w  a%  US-  nUS-  n[        SX-   X-   5      X'   US:w  a  M%  Xl        U
$ )u  
Return Editops describing how to turn s1 into s2.

Parameters
----------
s1 : Sequence[Hashable]
    First string to compare.
s2 : Sequence[Hashable]
    Second string to compare.
processor : callable, optional
    Optional callable that is used to preprocess the strings before
    comparing them. Default is None, which deactivates this behaviour.
score_hint : int, optional
    Expected distance between s1 and s2. This is used to select a
    faster implementation. Default is None, which deactivates this behaviour.

Returns
-------
editops : Editops
    edit operations required to turn s1 into s2

Notes
-----
The alignment is calculated using an algorithm of Heikki Hyyrö, which is
described [8]_. It has a time complexity and memory usage of ``O([N/64] * M)``.

References
----------
.. [8] Hyyrö, Heikki. "A Note on Bit-Parallel Alignment Computation."
       Stringology (2004).

Examples
--------
>>> from rapidfuzz.distance import Levenshtein
>>> for tag, src_pos, dest_pos in Levenshtein.editops("qabxcd", "abycdf"):
...    print(("%7s s1[%d] s2[%d]" % (tag, src_pos, dest_pos)))
 delete s1[1] s2[0]
replace s1[3] s2[2]
 insert s1[6] s2[5]
Nr   r   r   r   r   )	r   r   r   rO   r
   _src_len	_dest_lenr	   _editops)r   r   r6   r8   r=   
prefix_len
suffix_lenr>   r(   r)   editopseditop_listcolrows                 r   rW   rW     s)   ^ 	Ar]r]B#FB)"1J	R:-	.B	R:-	.B2?LDbb!QG2w+j8GB*,z9Gqy&4-K
b'C
b'C
(saxAg;!a.)AID1HC &x1A3CS TK1HC 7qS1W~6	$*8S5EsGW$X!q 7bg%AID(.y#:JCL\(]K%' (sax* (	q"8S-=s?OP (
 (	q"8S-=s?OP (
 #Nr   c               2    [        XX#S9R                  5       $ )u.  
Return Opcodes describing how to turn s1 into s2.

Parameters
----------
s1 : Sequence[Hashable]
    First string to compare.
s2 : Sequence[Hashable]
    Second string to compare.
processor : callable, optional
    Optional callable that is used to preprocess the strings before
    comparing them. Default is None, which deactivates this behaviour.
score_hint : int, optional
    Expected distance between s1 and s2. This is used to select a
    faster implementation. Default is None, which deactivates this behaviour.

Returns
-------
opcodes : Opcodes
    edit operations required to turn s1 into s2

Notes
-----
The alignment is calculated using an algorithm of Heikki Hyyrö, which is
described [9]_. It has a time complexity and memory usage of ``O([N/64] * M)``.

References
----------
.. [9] Hyyrö, Heikki. "A Note on Bit-Parallel Alignment Computation."
       Stringology (2004).

Examples
--------
>>> from rapidfuzz.distance import Levenshtein

>>> a = "qabxcd"
>>> b = "abycdf"
>>> for tag, i1, i2, j1, j2 in Levenshtein.opcodes("qabxcd", "abycdf"):
...    print(("%7s a[%d:%d] (%s) b[%d:%d] (%s)" %
...           (tag, i1, i2, a[i1:i2], j1, j2, b[j1:j2])))
 delete a[0:1] (q) b[0:0] ()
  equal a[1:3] (ab) b[0:2] (ab)
replace a[3:4] (x) b[2:3] (y)
  equal a[4:6] (cd) b[3:5] (cd)
 insert a[6:6] () b[5:6] (f)
rP   )rW   
as_opcodes)r   r   r6   r8   s       r   opcodesr]     s    j 2YFQQSSr   )
__future__r   rapidfuzz._common_pyr   r   rapidfuzz._utilsr   r   rapidfuzz.distancer   r;   !rapidfuzz.distance._initialize_pyr	   r
   r   r$   r4   r<   rC   rG   rJ   rO   rW   r]    r   r   <module>rd      s    # = 1 0 =$!P OXl :GB =SH TQn&,Z dV 5Tr   