
    k*i,                        S SK Jr  S SKJrJr  S SKJrJr  S SKJ	r	J
r
  SSS.S jr SS jrSSS.S	 jrSSS.S
 jrSSS.S jrS rSS.S jrSS.S jrg)    )annotations)common_affixconv_sequences)is_nonesetupPandas)EditopEditopsN)	processorscore_cutoffc               p   Ub  U" U 5      n U" U5      nU (       d  g[        X5      u  pS[        U 5      -  S-
  n0 nUR                  nSnU  H  nU" US5      U-  XX'   US-  nM     U H  n	U" U	S5      n
XJ-  nXK-   XK-
  -  nM     [        U5      [        U 5      * S R	                  S5      nUb  X:  a  U$ S$ )a  
Calculates the length of the longest common subsequence

Parameters
----------
s1 : Sequence[Hashable]
    First string to compare.
s2 : Sequence[Hashable]
    Second string to compare.
processor: callable, optional
    Optional callable that is used to preprocess the strings before
    comparing them. Default is None, which deactivates this behaviour.
score_cutoff : int, optional
    Maximum distance between s1 and s2, that is
    considered as a result. If the similarity is smaller than score_cutoff,
    0 is returned instead. Default is None, which deactivates
    this behaviour.

Returns
-------
similarity : int
    similarity between s1 and s2
Nr      0)r   lengetbincount)s1s2r
   r   Sblock	block_getxch1ch2Matchesuress                \/var/www/html/land-doc-ocr/venv/lib/python3.13/site-packages/rapidfuzz/distance/LCSseq_py.py
similarityr   
   s    < r]r]B#FB	
c"gAE		I	AsA&*
	a  C#KUqu  a&#b'

"
"3
'C'3+>3FQF    c                    U(       d  gS[        U5      -  S-
  nU R                  nU H  nU" US5      nXG-  nXH-   XH-
  -  nM     [        U5      [        U5      * S  R                  S5      n	Ub  X:  a  U	$ S$ Nr   r   r   )r   r   r   r   )
r   r   r   r   r   r   r   r   r   r   s
             r   _block_similarityr#   B   s     	
c"gA		IC#KUqu  a&#b'

"
"3
'C'3+>3FQFr    c                   Ub  U" U 5      n U" U5      n[        X5      u  p[        [        U 5      [        U5      5      n[        X5      nXE-
  nUb  Xc::  a  U$ US-   $ )a  
Calculates the LCS distance in the range [0, max].

This is calculated as ``max(len1, len2) - similarity``.

Parameters
----------
s1 : Sequence[Hashable]
    First string to compare.
s2 : Sequence[Hashable]
    Second string to compare.
processor: callable, optional
    Optional callable that is used to preprocess the strings before
    comparing them. Default is None, which deactivates this behaviour.
score_cutoff : int, optional
    Maximum distance between s1 and s2, that is
    considered as a result. If the distance is bigger than score_cutoff,
    score_cutoff + 1 is returned instead. Default is None, which deactivates
    this behaviour.

Returns
-------
distance : int
    distance between s1 and s2

Examples
--------
Find the LCS distance between two strings:

>>> from rapidfuzz.distance import LCSseq
>>> LCSseq.distance("lewenstein", "levenshtein")
2

Setting a maximum distance allows the implementation to select
a more efficient implementation:

>>> LCSseq.distance("lewenstein", "levenshtein", score_cutoff=1)
2

r   )r   maxr   r   )r   r   r
   r   maximumsimdists          r   distancer)   X   si    ^ r]r]B#FB#b'3r7#G
R
C=D (D,@4W|VWGWWr    c               &   [        5         [        U 5      (       d  [        U5      (       a  gUb  U" U 5      n U" U5      nU (       a  U(       d  g[        X5      u  p[        [	        U 5      [	        U5      5      n[        X5      U-  nUb  XS::  a  U$ S$ )a  
Calculates a normalized LCS similarity in the range [1, 0].

This is calculated as ``distance / max(len1, len2)``.

Parameters
----------
s1 : Sequence[Hashable]
    First string to compare.
s2 : Sequence[Hashable]
    Second string to compare.
processor: callable, optional
    Optional callable that is used to preprocess the strings before
    comparing them. Default is None, which deactivates this behaviour.
score_cutoff : float, optional
    Optional argument for a score threshold as a float between 0 and 1.0.
    For norm_dist > score_cutoff 1.0 is returned instead. Default is 1.0,
    which deactivates this behaviour.

Returns
-------
norm_dist : float
    normalized distance between s1 and s2 as a float between 0 and 1.0
      ?r   r   )r   r   r   r%   r   r)   )r   r   r
   r   r&   norm_sims         r   normalized_distancer-      s    > Mr{{gbkkr]r]RB#FB#b'3r7#G')H$,0H8PqPr    c                   [        5         [        U 5      (       d  [        U5      (       a  gUb  U" U 5      n U" U5      nS[        X5      -
  nUb  XC:  a  U$ S$ )a	  
Calculates a normalized LCS similarity in the range [0, 1].

This is calculated as ``1 - normalized_distance``

Parameters
----------
s1 : Sequence[Hashable]
    First string to compare.
s2 : Sequence[Hashable]
    Second string to compare.
processor: callable, optional
    Optional callable that is used to preprocess the strings before
    comparing them. Default is None, which deactivates this behaviour.
score_cutoff : float, optional
    Optional argument for a score threshold as a float between 0 and 1.0.
    For norm_sim < score_cutoff 0 is returned instead. Default is 0,
    which deactivates this behaviour.

Returns
-------
norm_sim : float
    normalized similarity between s1 and s2 as a float between 0 and 1.0

Examples
--------
Find the normalized LCS similarity between two strings:

>>> from rapidfuzz.distance import LCSseq
>>> LCSseq.normalized_similarity("lewenstein", "levenshtein")
0.8181818181818181

Setting a score_cutoff allows the implementation to select
a more efficient implementation:

>>> LCSseq.normalized_similarity("lewenstein", "levenshtein", score_cutoff=0.9)
0.0

When a different processor is used s1 and s2 do not have to be strings

>>> LCSseq.normalized_similarity(["lewenstein"], ["levenshtein"], processor=lambda s: s[0])
0.81818181818181
g        r+   r   )r   r   r-   )r   r   r
   r   r,   s        r   normalized_similarityr/      s[    d Mr{{gbkkr]r](00H$,0H8PqPr    c                J   U (       d  S/ 4$ S[        U 5      -  S-
  n0 nUR                  nSnU  H  nU" US5      U-  X6'   US-  nM     / nU H*  nU" US5      n	X)-  n
X*-   X*-
  -  nUR                  U5        M,     [        U5      [        U 5      * S  R	                  S5      nX4$ r"   )r   r   appendr   r   )r   r   r   r   r   r   r   matrixr   r   r   r'   s               r   _matrixr3      s    2w	
c"gAE		I	AsA&*
	a  FC#KUqua	  a&#b'

"
"3
'C=r    r
   c               ~   Ub  U" U 5      n U" U5      n[        X5      u  p[        X5      u  p4X[        U 5      U-
   n X[        U5      U-
   n[        X5      u  pV[	        / SS5      n[        U 5      U-   U-   Ul        [        U5      U-   U-   Ul        [        U 5      [        U5      -   SU-  -
  nUS:X  a  U$ S/U-  n	[        U 5      n
[        U5      nUS:w  a  U
S:w  a  XkS-
     SU
S-
  -  -  (       a  US-  nU
S-  n
[        SX-   X-   5      X'   O?US-  nU(       a.  XkS-
     SU
S-
  -  -  (       d  US-  n[        SX-   X-   5      X'   OU
S-  n
US:w  a  U
S:w  a  M  U
S:w  a%  US-  nU
S-  n
[        SX-   X-   5      X'   U
S:w  a  M%  US:w  a%  US-  nUS-  n[        SX-   X-   5      X'   US:w  a  M%  Xl        U$ )u  
Return Editops describing how to turn s1 into s2.

Parameters
----------
s1 : Sequence[Hashable]
    First string to compare.
s2 : Sequence[Hashable]
    Second string to compare.
processor: callable, optional
    Optional callable that is used to preprocess the strings before
    comparing them. Default is None, which deactivates this behaviour.

Returns
-------
editops : Editops
    edit operations required to turn s1 into s2

Notes
-----
The alignment is calculated using an algorithm of Heikki Hyyrö, which is
described in [6]_. It has a time complexity and memory usage of ``O([N/64] * M)``.

References
----------
.. [6] Hyyrö, Heikki. "A Note on Bit-Parallel Alignment Computation."
       Stringology (2004).

Examples
--------
>>> from rapidfuzz.distance import LCSseq
>>> for tag, src_pos, dest_pos in LCSseq.editops("qabxcd", "abycdf"):
...    print(("%7s s1[%d] s2[%d]" % (tag, src_pos, dest_pos)))
 delete s1[0] s2[0]
 delete s1[3] s2[2]
 insert s1[4] s2[2]
 insert s1[6] s2[5]
Nr      r   deleteinsert)	r   r   r   r3   r	   _src_len	_dest_lenr   _editops)r   r   r
   
prefix_len
suffix_lenr'   r2   editopsr(   editop_listcolrows               r   r>   r>     s   X r]r]B#FB)"1J	R:-	.B	R:-	.B"/KCb!QG2w+j8GB*,z9Gr7SWq3w&Dqy&4-K
b'C
b'C
(sax'?aC!Gn-AID1HC &x1A3CS TK1HC F7OqS1W~>	$*8S5EsGW$X! q (sax" (	q"8S-=s?OP (
 (	q"8S-=s?OP (
 #Nr    c               2    [        XUS9R                  5       $ )u  
Return Opcodes describing how to turn s1 into s2.

Parameters
----------
s1 : Sequence[Hashable]
    First string to compare.
s2 : Sequence[Hashable]
    Second string to compare.
processor: callable, optional
    Optional callable that is used to preprocess the strings before
    comparing them. Default is None, which deactivates this behaviour.

Returns
-------
opcodes : Opcodes
    edit operations required to turn s1 into s2

Notes
-----
The alignment is calculated using an algorithm of Heikki Hyyrö, which is
described in [7]_. It has a time complexity and memory usage of ``O([N/64] * M)``.

References
----------
.. [7] Hyyrö, Heikki. "A Note on Bit-Parallel Alignment Computation."
       Stringology (2004).

Examples
--------
>>> from rapidfuzz.distance import LCSseq

>>> a = "qabxcd"
>>> b = "abycdf"
>>> for tag, i1, i2, j1, j2 in LCSseq.opcodes(a, b):
...    print(("%7s a[%d:%d] (%s) b[%d:%d] (%s)" %
...           (tag, i1, i2, a[i1:i2], j1, j2, b[j1:j2])))
 delete a[0:1] (q) b[0:0] ()
  equal a[1:3] (ab) b[0:2] (ab)
 delete a[3:4] (x) b[2:2] ()
 insert a[4:4] () b[2:3] (y)
  equal a[4:6] (cd) b[3:5] (cd)
 insert a[6:6] () b[5:6] (f)
r4   )r>   
as_opcodes)r   r   r
   s      r   opcodesrD   x  s    d 2Y/::<<r    )N)
__future__r   rapidfuzz._common_pyr   r   rapidfuzz._utilsr   r   !rapidfuzz.distance._initialize_pyr   r	   r   r#   r)   r-   r/   r3   r>   rD    r    r   <module>rJ      su    # = 1 = 5Gx 	G4 7X| -Qh ;Q|8 	]H 	2=r    