model: FunASRNano model_conf: lsm_weight: 0.1 length_normalized_loss: true audio_encoder: SenseVoiceEncoderSmall audio_encoder_conf: output_size: 512 attention_heads: 4 linear_units: 2048 num_blocks: 50 tp_blocks: 20 dropout_rate: 0.1 positional_dropout_rate: 0.1 attention_dropout_rate: 0.1 input_layer: pe pos_enc_class: SinusoidalPositionEncoder normalize_before: true kernel_size: 11 sanm_shfit: 0 selfattention_layer_type: sanm freeze: true freeze_layer_num: -1 feat_permute: true llm: Qwen3-0.6b llm_conf: hub: hf freeze: true llm_dtype: bf16 init_param_path: Qwen3-0.6B use_lora: false lora_conf: freeze_lora: true task_type: CAUSAL_LM r: 16 lora_alpha: 32 lora_dropout: 0.05 bias: none target_modules: - q_proj - v_proj init_param_path: "" audio_adaptor: Transformer audio_adaptor_conf: downsample_rate: 1 ffn_dim: 2048 llm_dim: 1024 encoder_dim: 512 n_layer: 2 freeze: true ctc_decoder: Transformer detach_ctc_decoder: true ctc_decoder_conf: downsample_rate: 1 ffn_dim: 2048 llm_dim: 512 encoder_dim: 512 n_layer: 5 freeze: false ctc_weight: 1.0 ctc_conf: dropout_rate: 0.0 ctc_type: builtin reduce: true ignore_nan_grad: true frontend: WavFrontend frontend_conf: fs: 16000 window: hamming n_mels: 80 frame_length: 25 frame_shift: 10 lfr_m: 7 lfr_n: 6 cmvn_file: null train_conf: use_lora: ${llm_conf.use_lora} accum_grad: 1 grad_clip: 5 max_epoch: 2 keep_nbest_models: 200 log_interval: 100 effective_save_name_excludes: - llm. resume: true validate_interval: 2000 save_checkpoint_interval: 2000 avg_nbest_model: 100 use_bf16: false use_deepspeed: true deepspeed_config: null save_init_model: false optim: adamw optim_conf: lr: 5.0e-06 weight_decay: 0.0 scheduler: warmuplr scheduler_conf: warmup_steps: 2500 dataset: FunASRNano dataset_conf: index_ds: FunASRNano batch_sampler: BatchSampler batch_type: token batch_size: 6000 max_token_length: 3500 shuffle: true sort_size: 1024 batch_size_scale_ratio_max: 2 num_workers: 4 audio_adaptor_downsample_rate: ${audio_adaptor_conf.downsample_rate} audio_encoder_downsample_rate: 6 data_split_num: 256 batch_size_sample_max: 10 retry: 2000 batch_size_token_max: 6000 max_source_length: 12000 max_target_length: 2048 preprocessor_text: TextPreprocessHasRepeatedWords preprocessor_text_conf: max_ngram_length: 15 max_occurrences: 10 prompt_classes: MultiContextPrompt prompt_conf: max_neg_hotwords_num: 0 min_neg_hotwords_num: 0 use_hist: false use_one_pass_result: true use_hotwords: true use_asr_hotwords: true chinese_hotwords_list: null english_hotwords_list: null ctc_tokenizer: SenseVoiceTokenizer ctc_target_normalize: true ctc_tokenizer_conf: vocab_path: null is_multilingual: true num_languages: 8749 min_source_length: 10 batch_size_scale_threshold: 3000 use_dynamic_output_ratio: 0.0 tokenizer: HuggingfaceTokenizer tokenizer_conf: init_param_path: ${llm_conf.init_param_path} enable_tf32: true debug: false train_data_set_list: null valid_data_set_list: null init_param: null output_dir: null