
    k*id                       S SK Jr  S SKJr  S SKJr  S SKJrJrJ	r	J
r
  S SKJr  S SKJrJrJr  S rS\0rS	 rS
 rS rSSS.S jrS rSSS.S jrSSS.S jrSSS.S jrSSS.S jrSSS.S jrSSS.S jr SSS.S jr!SSS.S jr"SSS.S jr#SSS.S jr$\" \\5        \" \\5        \" \\5        \" \\5        \" \\5        \" \ \5        \" \!\5        \" \"\5        \" \#\5        \" \$\5        g)    )annotations)ceil)conv_sequences)
ScorerFlagadd_scorer_attrsis_nonesetupPandas)ScoreAlignment)_block_normalized_similaritydistancenormalized_similarityc                 L    SS[         R                  [         R                  -  S.$ )Nd   r   )optimal_scoreworst_scoreflags)r   
RESULT_F64	SYMMETRIC)_kwargss    Q/var/www/html/land-doc-ocr/venv/lib/python3.13/site-packages/rapidfuzz/fuzz_py.pyget_scorer_flags_fuzzr      s&    &&)=)==     get_scorer_flagsc                <    U(       a  SSU -  U-  -
  OSnX2:  a  U$ S$ )Nr   r    )distlensumscore_cutoffscores       r   _norm_distancer       s)    +1S3:&&sE)50q0r   c                ~   [        U [        [        45      (       a  U R                  5       $ / /nU  Ha  n[        U[        5      (       a  UO
[	        U5      nUR                  5       (       a  UR                  / 5        MM  US   R                  U5        Mc     U Vs/ s H  o"(       d  M  [        U5      PM     sn$ s  snf )N)
isinstancestrbytessplitchrisspaceappendtuple)seqsplitted_seqxchs       r   _split_sequencer/   !   s    #U|$$yy{4LQ$$Q#a&::<<###A&  +0laHE!Hl000s   
B:(B:c                8   U (       d  g[        [        [        U 5      5      [        5      (       a  SR	                  U 5      $ [        [        [        U 5      5      [
        5      (       a  SR	                  U 5      $ / nU  H  nX-  nU[        S5      /-  nM     US S $ )N      r"   )r#   nextiterr$   joinr%   ord)seq_listjoinedr+   s      r   _join_splitted_sequencer:   0   s    $tH~&,,xx!!$tH~&..yy""F3s8*  #2;r   N	processorr   c                   [        5         [        U 5      (       d  [        U5      (       a  gUb  US-  n[        XX#S9nUS-  $ )aj  
Calculates the normalized Indel similarity.

Parameters
----------
s1 : Sequence[Hashable]
    First string to compare.
s2 : Sequence[Hashable]
    Second string to compare.
processor: callable, optional
    Optional callable that is used to preprocess the strings before
    comparing them. Default is None, which deactivates this behaviour.
score_cutoff : float, optional
    Optional argument for a score threshold as a float between 0 and 100.
    For ratio < score_cutoff 0 is returned instead. Default is 0,
    which deactivates this behaviour.

Returns
-------
similarity : float
    similarity between s1 and s2 as a float between 0 and 100

See Also
--------
rapidfuzz.distance.Indel.normalized_similarity : Normalized Indel similarity

Notes
-----
.. image:: img/ratio.svg

Examples
--------
>>> fuzz.ratio("this is a test", "this is a test!")
96.55171966552734
r   r   r;   )r	   r   indel_normalized_similarity)s1s2r<   r   r   s        r   ratiorA   ?   sD    T Mr{{gbkk')_E3;r   c           	        [        U 5      n[        U 5      n[        U5      n[        SSUSU5      n0 nUR                  nSn	U  H  n
U" U
S5      U	-  Xz'   U	S-  n	M     [	        SU5       Ha  nXS-
     nX;  a  M  [        XpUSU US9nXR                  :  d  M0  U=Ul        nSUl        Xl        UR                  S:X  d  MX  SUl        Us  $    [	        XT-
  5       Hg  nXU-   S-
     nX;  a  M  [        XpXX-    US9nXR                  :  d  M4  U=Ul        nXl        X-   Ul        UR                  S:X  d  M^  SUl        Us  $    [	        XT-
  U5       H\  nX   nX;  a  M  [        XpXS US9nXR                  :  d  M,  U=Ul        nXl        XVl        UR                  S:X  d  MS  SUl        Us  $    U=R                  S-  sl        U$ )zC
implementation of partial_ratio. This assumes len(s1) <= len(s2).
r      Nr   r   )	setlenr
   getrange!indel_block_normalized_similarityr   
dest_startdest_end)r?   r@   r   s1_char_setlen1len2resblock	block_getr-   ch1isubstr_lastls_ratiosubstr_firsts                  r   _partial_ratio_implrW   t   s    b'Kr7Dr7D
AtQ
-CE		I	AsA&*
	a  1d^Qi) 5U2AUabii'//CICNLyyA~	
  4;TA&) 5Uqx@P_klii'//CIN8CLyyA~	
   4;%u* 5U2Uabii'//CINLyyA~	
 & IIIJr   c               6    [        XX#S9nUc  gUR                  $ )u%	  
Searches for the optimal alignment of the shorter string in the
longer string and returns the fuzz.ratio for this alignment.

Parameters
----------
s1 : Sequence[Hashable]
    First string to compare.
s2 : Sequence[Hashable]
    Second string to compare.
processor: callable, optional
    Optional callable that is used to preprocess the strings before
    comparing them. Default is None, which deactivates this behaviour.
score_cutoff : float, optional
    Optional argument for a score threshold as a float between 0 and 100.
    For ratio < score_cutoff 0 is returned instead. Default is 0,
    which deactivates this behaviour.

Returns
-------
similarity : float
    similarity between s1 and s2 as a float between 0 and 100

Notes
-----
Depending on the length of the needle (shorter string) different
implementations are used to improve the performance.

short needle (length ≤ 64):
    When using a short needle length the fuzz.ratio is calculated for all
    alignments that could result in an optimal alignment. It is
    guaranteed to find the optimal alignment. For short needles this is very
    fast, since for them fuzz.ratio runs in ``O(N)`` time. This results in a worst
    case performance of ``O(NM)``.

.. image:: img/partial_ratio_short_needle.svg

long needle (length > 64):
    For long needles a similar implementation to FuzzyWuzzy is used.
    This implementation only considers alignments which start at one
    of the longest common substrings. This results in a worst case performance
    of ``O(N[N/64]M)``. However usually most of the alignments can be skipped.
    The following Python code shows the concept:

    .. code-block:: python

        blocks = SequenceMatcher(None, needle, longer, False).get_matching_blocks()
        score = 0
        for block in blocks:
            long_start = block[1] - block[0] if (block[1] - block[0]) > 0 else 0
            long_end = long_start + len(shorter)
            long_substr = longer[long_start:long_end]
            score = max(score, fuzz.ratio(needle, long_substr))

    This is a lot faster than checking all possible alignments. However it
    only finds one of the best alignments and not necessarily the optimal one.

.. image:: img/partial_ratio_long_needle.svg

Examples
--------
>>> fuzz.partial_ratio("this is a test", "this is a test!")
100.0
r;   r   )partial_ratio_alignmentr   )r?   r@   r<   r   	alignments        r   partial_ratior[      s$    N ()_I??r   c                  [        5         [        U 5      (       d  [        U5      (       a  gUb  U" U 5      n U" U5      nUc  SnU (       d  U(       d  [        SSSSS5      $ [        X5      u  p[	        U 5      n[	        U5      nXE::  a  U nUnOUnU n[        XgUS-  5      nUR                  S:w  a  XE:X  a  [        X8R                  5      n[        XvUS-  5      n	U	R                  UR                  :  aA  [        U	R                  U	R                  U	R                  U	R                  U	R                  5      nUR                  U:  a  gXE::  a  U$ [        UR                  UR                  UR                  UR                  UR                  5      $ )a  
Searches for the optimal alignment of the shorter string in the
longer string and returns the fuzz.ratio and the corresponding
alignment.

Parameters
----------
s1 : str | bytes
    First string to compare.
s2 : str | bytes
    Second string to compare.
processor: callable, optional
    Optional callable that is used to preprocess the strings before
    comparing them. Default is None, which deactivates this behaviour.
score_cutoff : float, optional
    Optional argument for a score threshold as a float between 0 and 100.
    For ratio < score_cutoff None is returned instead. Default is 0,
    which deactivates this behaviour.

Returns
-------
alignment : ScoreAlignment, optional
    alignment between s1 and s2 with the score as a float between 0 and 100

Examples
--------
>>> s1 = "a certain string"
>>> s2 = "cetain"
>>> res = fuzz.partial_ratio_alignment(s1, s2)
>>> res
ScoreAlignment(score=83.33333333333334, src_start=2, src_end=8, dest_start=0, dest_end=6)

Using the alignment information it is possible to calculate the same fuzz.ratio

>>> fuzz.ratio(s1[res.src_start:res.src_end], s2[res.dest_start:res.dest_end])
83.33333333333334
Nr   g      Y@r   )r	   r   r
   r   rF   rW   r   maxrJ   rK   	src_startsrc_end)
r?   r@   r<   r   rM   rN   shorterlongerrO   res2s
             r   rY   rY     sL   X Mr{{gbkkr]r]beQ1a00B#FBr7Dr7D|
g|c/A
BC
yyCDL<3"6L34FG::		! T__dmmT^^]a]i]ijC
yy<|
#))S^^S\\3==RUR]R]^^r   c                   [        5         [        U 5      (       d  [        U5      (       a  gUb  U" U 5      n U" U5      n[        X5      u  p[        [	        [        U 5      5      5      n[        [	        [        U5      5      5      n[        XEUS9$ )a#  
Sorts the words in the strings and calculates the fuzz.ratio between them

Parameters
----------
s1 : str
    First string to compare.
s2 : str
    Second string to compare.
processor: callable, optional
    Optional callable that is used to preprocess the strings before
    comparing them. Default is None, which deactivates this behaviour.
score_cutoff : float, optional
    Optional argument for a score threshold as a float between 0 and 100.
    For ratio < score_cutoff 0 is returned instead. Default is 0,
    which deactivates this behaviour.

Returns
-------
similarity : float
    similarity between s1 and s2 as a float between 0 and 100

Notes
-----
.. image:: img/token_sort_ratio.svg

Examples
--------
>>> fuzz.token_sort_ratio("fuzzy wuzzy was a bear", "wuzzy fuzzy was a bear")
100.0
r   rD   )r	   r   r   r:   sortedr/   rA   r?   r@   r<   r   	sorted_s1	sorted_s2s         r   token_sort_ratiorh   W  sw    L Mr{{gbkkr]r]B#FB'r/B(CDI'r/B(CDILAAr   c               d   [        5         [        U 5      (       d  [        U5      (       a  gUb  U" U 5      n U" U5      nUc  Sn[        X5      u  p[        [	        U 5      5      n[        [	        U5      5      nU(       a  U(       d  gUR                  U5      nUR                  U5      nUR                  U5      nU(       a  U(       a  U(       d  g[        [        U5      5      n	[        [        U5      5      n
[        U	5      n[        U
5      n[        [        U5      5      nXS:g  -   U-   nXS:g  -   U-   nSn[        X-   SUS-  -
  -  5      n[        XUS9nUU::  a  [        UX-   U5      nU(       d  U$ US:g  U-   n[        UX-   U5      nUS:g  U-   n[        UX-   U5      n[        UUU5      $ )a  
Compares the words in the strings based on unique and common words between them
using fuzz.ratio

Parameters
----------
s1 : str
    First string to compare.
s2 : str
    Second string to compare.
processor: callable, optional
    Optional callable that is used to preprocess the strings before
    comparing them. Default is None, which deactivates this behaviour.
score_cutoff : float, optional
    Optional argument for a score threshold as a float between 0 and 100.
    For ratio < score_cutoff 0 is returned instead. Default is 0,
    which deactivates this behaviour.

Returns
-------
similarity : float
    similarity between s1 and s2 as a float between 0 and 100

Notes
-----
.. image:: img/token_set_ratio.svg

Examples
--------
>>> fuzz.token_sort_ratio("fuzzy was a bear", "fuzzy fuzzy was a bear")
83.8709716796875
>>> fuzz.token_set_ratio("fuzzy was a bear", "fuzzy fuzzy was a bear")
100.0
# Returns 100.0 if one string is a subset of the other, regardless of extra content in the longer string
>>> fuzz.token_set_ratio("fuzzy was a bear but not a dog", "fuzzy was a bear")
100.0
# Score is reduced only when there is explicit disagreement in the two strings
>>> fuzz.token_set_ratio("fuzzy was a bear but not a dog", "fuzzy was a bear but not a cat")
92.3076923076923
r   r   g        rC   rD   )r	   r   r   rE   r/   intersection
differencer:   rd   rF   r   indel_distancer    r]   )r?   r@   r<   r   tokens_atokens_b	intersectdiff_abdiff_badiff_ab_joineddiff_ba_joinedab_lenba_lensect_lensect_ab_lensect_ba_lenresultcutoff_distancer   sect_ab_distsect_ab_ratiosect_ba_distsect_ba_ratios                          r   token_set_ratior     s   ^ Mr{{gbkkr]r]B#FB?2&'H?2&'H 8%%h/I!!(+G!!(+G ',VG_=N,VG_=N F F*956H !m,v5K!m,v5KFK5!lS>P:PQRO.WDk&?N 
 MV+L"<1GVMMV+L"<1GVMv}m44r   c          
         [        5         [        U 5      (       d  [        U5      (       a  gUb  U" U 5      n U" U5      n[        [        XSUS9[	        XSUS95      $ )a  
Helper method that returns the maximum of fuzz.token_set_ratio and fuzz.token_sort_ratio
(faster than manually executing the two functions)

Parameters
----------
s1 : str
    First string to compare.
s2 : str
    Second string to compare.
processor: callable, optional
    Optional callable that is used to preprocess the strings before
    comparing them. Default is None, which deactivates this behaviour.
score_cutoff : float, optional
    Optional argument for a score threshold as a float between 0 and 100.
    For ratio < score_cutoff 0 is returned instead. Default is 0,
    which deactivates this behaviour.

Returns
-------
similarity : float
    similarity between s1 and s2 as a float between 0 and 100

Notes
-----
.. image:: img/token_ratio.svg
r   Nr;   )r	   r   r]   r   rh   r?   r@   r<   r   s       r   token_ratior     s]    D Mr{{gbkkr]r] $\J4lK r   c                   [        5         [        U 5      (       d  [        U5      (       a  gUb  U" U 5      n U" U5      n[        X5      u  p[        [	        [        U 5      5      5      n[        [	        [        U5      5      5      n[        XEUS9$ )a  
sorts the words in the strings and calculates the fuzz.partial_ratio between them

Parameters
----------
s1 : str
    First string to compare.
s2 : str
    Second string to compare.
processor: callable, optional
    Optional callable that is used to preprocess the strings before
    comparing them. Default is None, which deactivates this behaviour.
score_cutoff : float, optional
    Optional argument for a score threshold as a float between 0 and 100.
    For ratio < score_cutoff 0 is returned instead. Default is 0,
    which deactivates this behaviour.

Returns
-------
similarity : float
    similarity between s1 and s2 as a float between 0 and 100

Notes
-----
.. image:: img/partial_token_sort_ratio.svg
r   rD   )r	   r   r   r:   rd   r/   r[   re   s         r   partial_token_sort_ratior   +  sw    B Mr{{gbkkr]r]B#FB'r/B(CDI'r/B(CDILIIr   c                  [        5         [        U 5      (       d  [        U5      (       a  gUb  U" U 5      n U" U5      n[        X5      u  p[        [	        U 5      5      n[        [	        U5      5      nU(       a  U(       d  gUR                  U5      (       a  g[        [        UR                  U5      5      5      n[        [        UR                  U5      5      5      n[        XgUS9$ )a  
Compares the words in the strings based on unique and common words between them
using fuzz.partial_ratio

Parameters
----------
s1 : str
    First string to compare.
s2 : str
    Second string to compare.
processor: callable, optional
    Optional callable that is used to preprocess the strings before
    comparing them. Default is None, which deactivates this behaviour.
score_cutoff : float, optional
    Optional argument for a score threshold as a float between 0 and 100.
    For ratio < score_cutoff 0 is returned instead. Default is 0,
    which deactivates this behaviour.

Returns
-------
similarity : float
    similarity between s1 and s2 as a float between 0 and 100

Notes
-----
.. image:: img/partial_token_set_ratio.svg
r   r   rD   )
r	   r   r   rE   r/   rj   r:   rd   rk   r[   )r?   r@   r<   r   rm   rn   rp   rq   s           r   partial_token_set_ratior   Z  s    D Mr{{gbkkr]r]B#FB?2&'H?2&'H 8 X&&%fX-@-@-J&KLG%fX-@-@-J&KLGEEr   c                  [        5         [        U 5      (       d  [        U5      (       a  gUb  U" U 5      n U" U5      nUc  Sn[        X5      u  p[        U 5      n[        U5      n[	        U5      n[	        U5      nUR                  U5      (       a  gUR                  U5      nUR                  U5      n	[        [        [        U5      5      [        [        U5      5      US9n
[        U5      [        U5      :X  a  [        U5      [        U	5      :X  a  U
$ [        X:5      n[        U
[        [        [        U5      5      [        [        U	5      5      US95      $ )a  
Helper method that returns the maximum of fuzz.partial_token_set_ratio and
fuzz.partial_token_sort_ratio (faster than manually executing the two functions)

Parameters
----------
s1 : str
    First string to compare.
s2 : str
    Second string to compare.
processor: callable, optional
    Optional callable that is used to preprocess the strings before
    comparing them. Default is None, which deactivates this behaviour.
score_cutoff : float, optional
    Optional argument for a score threshold as a float between 0 and 100.
    For ratio < score_cutoff 0 is returned instead. Default is 0,
    which deactivates this behaviour.

Returns
-------
similarity : float
    similarity between s1 and s2 as a float between 0 and 100

Notes
-----
.. image:: img/partial_token_ratio.svg
r   r   rD   )r	   r   r   r/   rE   rj   rk   r[   r:   rd   rF   r]   )r?   r@   r<   r   tokens_split_atokens_split_brm   rn   rp   rq   ry   s              r   partial_token_ratior     s7   D Mr{{gbkkr]r]B#FB$R(N$R(N>"H>"H X&&!!(+G!!(+G~ 67~ 67!F >c'l*s>/Bc'l/R|,L#F7O4#F7O4%	
 r   c          
        [        5         [        U 5      (       d  [        U5      (       a  gSnUb  U" U 5      n U" U5      nU (       a  U(       d  gUc  Sn[        U 5      n[        U5      nXV:  a  XV-  OXe-  n[        XUS9nUS:  a&  [	        X85      U-  n[	        U[        XUSS9U-  5      $ US::  a  SOS	n	[	        X85      U	-  n[	        U[        XUS9U	-  5      n[	        X85      U-  n[	        U[        XUSS9U-  U	-  5      $ )
a  
Calculates a weighted ratio based on the other ratio algorithms

Parameters
----------
s1 : str
    First string to compare.
s2 : str
    Second string to compare.
processor: callable, optional
    Optional callable that is used to preprocess the strings before
    comparing them. Default is None, which deactivates this behaviour.
score_cutoff : float, optional
    Optional argument for a score threshold as a float between 0 and 100.
    For ratio < score_cutoff 0 is returned instead. Default is 0,
    which deactivates this behaviour.

Returns
-------
similarity : float
    similarity between s1 and s2 as a float between 0 and 100

Notes
-----
.. image:: img/WRatio.svg
r   gffffff?NrD   g      ?)r   r<   g       @g?g333333?)r	   r   rF   rA   r]   r   r[   r   )
r?   r@   r<   r   UNBASE_SCALErM   rN   	len_ratio	end_ratioPARTIAL_SCALEs
             r   WRatior     s#   B Mr{{gbkkLr]r] Rr7Dr7D#{Ib<8I3<3lB\TJ\Y
 	

 %+CM|/-?LI}R,OR__`I|/,>LBNQ]]`mm r   c                   [        5         [        U 5      (       d  [        U5      (       a  gUb  U" U 5      n U" U5      nU (       a  U(       d  g[        XUS9$ )ao  
Calculates a quick ratio between two strings using fuzz.ratio.

Since v3.0 this behaves similar to fuzz.ratio with the exception that this
returns 0 when comparing two empty strings

Parameters
----------
s1 : Sequence[Hashable]
    First string to compare.
s2 : Sequence[Hashable]
    Second string to compare.
processor: callable, optional
    Optional callable that is used to preprocess the strings before
    comparing them. Default is None, which deactivates this behaviour.
score_cutoff : float, optional
    Optional argument for a score threshold as a float between 0 and 100.
    For ratio < score_cutoff 0 is returned instead. Default is 0,
    which deactivates this behaviour.

Returns
-------
similarity : float
    similarity between s1 and s2 as a float between 0 and 100

Examples
--------
>>> fuzz.QRatio("this is a test", "this is a test!")
96.55171966552734
r   rD   )r	   r   rA   r   s       r   QRatior   0  sM    J Mr{{gbkkr]r] Rl33r   )%
__future__r   mathr   rapidfuzz._common_pyr   rapidfuzz._utilsr   r   r   r	   rapidfuzz.distancer
   rapidfuzz.distance.Indel_pyr   rI   r   rl   r   r>   r   fuzz_attributer    r/   r:   rA   rW   r[   rY   rh   r   r   r   r   r   r   r   r   r   r   <module>r      sW   #  / O O -  %&;<1
1& 2j?L Kd P_n 1Bp l5f .j ,Jf 9F@ Mh G\ 14h  '  / !> 2 . 1 n - )> : (. 9 $n 5  (  (r   