
    _i                     8   S SK r S SKrS SKrS SKJrJrJr  S SKrS SKJ	r	  S SK
Jr  S SKJr  S SKJr  S SKJr  S SKJr  S SKJr  S	S
KJrJr  S	SKJr  \R6                  " \R8                  \R:                  S9 " S S\5      5       r " S S\	R>                  5      r g)    N)DictOptionalTuple)Models)
TorchModel)Tensor)MODELS)update_conf)Tasks   )
GlobalCMVNload_kaldi_cmvn)FSMN)module_namec                      ^  \ rS rSrSr     SS\S\S\S\S\S\\	   4U 4S	 jjjr
S
 rS\\\4   4S jrS rSrU =r$ )FSMNDecorator   z>A decorator of FSMN for integrating into modelscope framework 	model_dir	cmvn_filebackbone	input_dim
output_dimtrainingc                    > [         T	U ]  " U/UQ70 UD6  SU l        SU l        U(       a  U R	                  X#UU5      U l        gU[
        R                  R                  US5      S.U l        g)aB  initialize the fsmn model from the `model_dir` path.

Args:
    model_dir (str): the model path.
    cmvn_file (str): cmvn file
    backbone (dict): params related to backbone
    input_dim (int): input dimension of network
    output_dim (int): output dimension of network
    training (bool): training or inference mode
Nzconfig.yaml)model_workspaceconfig_path)super__init__model	model_cfg
init_modelospathjoin)
selfr   r   r   r   r   r   argskwargs	__class__s
            g/var/www/html/land-ocr/venv/lib/python3.13/site-packages/modelscope/models/audio/kws/nearfield/model.pyr   FSMNDecorator.__init__   sd    & 	4T4V4
i)35DJ $-!ww||I}EDN    c                 \    [        U S5      (       a  U R                  R                  5         g g )Ntmp_dir)hasattrr-   cleanupr%   s    r)   __del__FSMNDecorator.__del__<   s$    4##LL  " $r+   returnc                 p    U R                   b  Ub  U R                   R                  U5      $ U R                  $ )z8
Args:
    input (torch.Tensor): Input tensor (B, T, D)
)r   forwardr    )r%   inputs     r)   r5   FSMNDecorator.forward@   s2    
 ::!e&7::%%e,,>>!r+   c                 x   Ub^  [        U5      u  pV[        [        R                  " U5      R	                  5       [        R                  " U5      R	                  5       5      nOS nSnS n	US   n
US   nUS   nUS   nUS   nUS   nUS   nUS	   nUS
   n[        X:XXUUUUU5      nS nS n[        X4XXUU5      nU$ )N   input_affine_dim
num_layers
linear_dimproj_dim
left_orderright_orderleft_strideright_strideoutput_affine_dim)r   r   torch
from_numpyfloatr   KWSModel)r%   r   r   r   r   meanistdglobal_cmvn
hidden_dimpreprocessingr:   r;   r<   r=   r>   r?   r@   rA   rB   
classifier
activation	kws_models                         r)   r!   FSMNDecorator.init_modelJ   s    (3JD$  &,,.  &,,.K
 K
#$67l+
l+
J'l+
}-}-/$%89	Z k;$&7E 

YJ*j*N	r+   )r   r    )NNi  i'
  F)__name__
__module____qualname____firstlineno____doc__strdictintr   boolr   r1   r   r   r5   r!   __static_attributes____classcell__r(   s   @r)   r   r      s     K #'"&"%#',1    	
 ! $D> B#"S&[ 1 " r+   r   c                   ~  ^  \ rS rSrSrS\S\S\S\\R                     S\\R                     S\R                  S	\R                  S
\R                  4U 4S jjr	S r
S r\R                  " SSS\R                  S94S\R                  S\R                  S\\R                  \R                  4   4S jjrS rSrU =r$ )rF   l   ar  Our model consists of four parts:
1. global_cmvn: Optional, (idim, idim)
2. preprocessing: feature dimension projection, (idim, hdim)
3. backbone: backbone or feature extractor of the whole network, (hdim, hdim)
4. classifier: output layer or classifier of KWS model, (hdim, odim)
5. activation:
    nn.Sigmoid for wakeup word
    nn.Identity for speech command dataset
idimodimhdimrI   rK   r   rL   rM   c	                    > [         T	U ]  5         Xl        X l        X0l        X@l        XPl        X`l        Xpl        Xl	        g)a  
Args:
    idim (int): input dimension of network
    odim (int): output dimension of network
    hdim (int): hidden dimension of network
    global_cmvn (nn.Module): cmvn for input feature, (idim, idim)
    preprocessing (nn.Module): feature dimension projection, (idim, hdim)
    backbone (nn.Module): backbone or feature extractor of the whole network, (hdim, hdim)
    classifier (nn.Module): output layer or classifier of KWS model, (hdim, odim)
    activation (nn.Module): nn.Identity for training, nn.Sigmoid for inference
N)
r   r   r^   r_   r`   rI   rK   r   rL   rM   )
r%   r^   r_   r`   rI   rK   r   rL   rM   r(   s
            r)   r   KWSModel.__init__w   s:    , 				&* $$r+   c                 6    U R                   R                  5       $ N)r   to_kaldi_netr0   s    r)   re   KWSModel.to_kaldi_net   s    }}))++r+   c                 8    U R                   R                  U5      $ rd   )r   to_pytorch_net)r%   
kaldi_files     r)   rh   KWSModel.to_pytorch_net   s    }}++J77r+   r   )dtypexin_cacher3   c                    U R                   b  U R                  U5      nU R                  b  U R                  U5      nU R                  X5      u  pU R                  b  U R                  U5      nU R                  b  U R	                  U5      nX4$ rd   )rI   rK   r   rL   rM   )r%   rl   rm   	out_caches       r)   r5   KWSModel.forward   s    
 '  #A)""1%A}}Q1??&"A??&"A|r+   c                     U R                   b  U R                   R                  5         U R                  R                  5         g rd   )rK   fuse_modulesr   r0   s    r)   rr   KWSModel.fuse_modules   s0    )++-""$r+   )rM   r   rL   rI   r`   r^   r_   rK   )rP   rQ   rR   rS   rT   rW   r   nnModuler   re   rh   rC   zerosrE   r   r   r5   rr   rY   rZ   r[   s   @r)   rF   rF   l   s    %% % 	%
 bii(%  		*% ))% II% II%@,8 "'Q1EKK!H<< ,, 
u||U\\)	*	$% %r+   rF   )!r"   systempfiletypingr   r   r   rC   torch.nnrt   modelscope.metainfor   modelscope.modelsr   modelscope.models.baser   modelscope.models.builderr	   "modelscope.utils.audio.audio_utilsr
   modelscope.utils.constantr   cmvnr   r   fsmnr   register_modulekeyword_spotting"speech_kws_fsmn_char_ctc_nearfieldr   ru   rF    r+   r)   <module>r      s    
 
  ( (   & ( ) , : + -  	99;QJ Q;QhF%ryy F%r+   