FunAudioLLM
/

Fun-ASR-Nano-2512

Model card Files Files and versions

Fun-ASR-Nano-2512 / config.yaml

pengzhendong's picture

Update config.yaml

a7088d6 verified about 1 hour ago

history blame contribute delete

3.14 kB

	model: FunASRNano
	model_conf:
	lsm_weight: 0.1
	length_normalized_loss: true
	audio_encoder: SenseVoiceEncoderSmall
	audio_encoder_conf:
	output_size: 512
	attention_heads: 4
	linear_units: 2048
	num_blocks: 50
	tp_blocks: 20
	dropout_rate: 0.1
	positional_dropout_rate: 0.1
	attention_dropout_rate: 0.1
	input_layer: pe
	pos_enc_class: SinusoidalPositionEncoder
	normalize_before: true
	kernel_size: 11
	sanm_shfit: 0
	selfattention_layer_type: sanm
	freeze: true
	freeze_layer_num: -1
	feat_permute: true
	llm: Qwen3-0.6b
	llm_conf:
	hub: hf
	freeze: true
	llm_dtype: bf16
	init_param_path: Qwen3-0.6B
	use_lora: false
	lora_conf:
	freeze_lora: true
	task_type: CAUSAL_LM
	r: 16
	lora_alpha: 32
	lora_dropout: 0.05
	bias: none
	target_modules:
	- q_proj
	- v_proj
	init_param_path: ""
	audio_adaptor: Transformer
	audio_adaptor_conf:
	downsample_rate: 1
	use_low_frame_rate: true
	ffn_dim: 2048
	llm_dim: 1024
	encoder_dim: 512
	n_layer: 2
	freeze: true
	ctc_decoder: Transformer
	detach_ctc_decoder: true
	ctc_decoder_conf:
	downsample_rate: 1
	ffn_dim: 2048
	llm_dim: 512
	encoder_dim: 512
	n_layer: 5
	freeze: false
	ctc_weight: 1.0
	ctc_conf:
	dropout_rate: 0.0
	ctc_type: builtin
	reduce: true
	ignore_nan_grad: true
	frontend: WavFrontend
	frontend_conf:
	fs: 16000
	window: hamming
	n_mels: 80
	frame_length: 25
	frame_shift: 10
	lfr_m: 7
	lfr_n: 6
	cmvn_file: null
	train_conf:
	use_lora: ${llm_conf.use_lora}
	accum_grad: 1
	grad_clip: 5
	max_epoch: 2
	keep_nbest_models: 200
	log_interval: 100
	effective_save_name_excludes:
	- llm.
	resume: true
	validate_interval: 2000
	save_checkpoint_interval: 2000
	avg_nbest_model: 100
	use_bf16: false
	use_deepspeed: true
	deepspeed_config: null
	save_init_model: false
	optim: adamw
	optim_conf:
	lr: 5.0e-06
	weight_decay: 0.0
	scheduler: warmuplr
	scheduler_conf:
	warmup_steps: 2500
	dataset: FunASR
	dataset_conf:
	index_ds: FunASR
	batch_sampler: BatchSampler
	batch_type: token
	batch_size: 6000
	max_token_length: 3500
	shuffle: true
	sort_size: 1024
	batch_size_scale_ratio_max: 2
	num_workers: 4
	audio_adaptor_downsample_rate: ${audio_adaptor_conf.downsample_rate}
	audio_encoder_downsample_rate: 6
	data_split_num: 256
	batch_size_sample_max: 10
	retry: 2000
	batch_size_token_max: 6000
	max_source_length: 12000
	max_target_length: 2048
	prompt_classes: MultiContextPrompt
	prompt_conf:
	max_neg_hotwords_num: 0
	min_neg_hotwords_num: 0
	use_hist: false
	use_one_pass_result: true
	use_hotwords: true
	use_asr_hotwords: true
	chinese_hotwords_list: null
	english_hotwords_list: null
	ctc_tokenizer: SenseVoiceTokenizer
	ctc_target_normalize: true
	ctc_tokenizer_conf:
	vocab_path: null
	is_multilingual: true
	num_languages: 8749
	min_source_length: 10
	batch_size_scale_threshold: 3000
	use_dynamic_output_ratio: 0.0
	tokenizer: HuggingfaceTokenizer
	tokenizer_conf:
	init_param_path: ${llm_conf.init_param_path}
	enable_tf32: true
	debug: false
	train_data_set_list: null
	valid_data_set_list: null
	init_param: null
	output_dir: null