| conformer_args: |
| dropout_p: 0.2 |
| encoder: |
| - mhsa_pro |
| - conv |
| - ffn |
| encoder_dim: 2048 |
| kernel_size: 3 |
| norm: postnorm |
| num_heads: 8 |
| num_layers: 8 |
| timeshift: false |
| data_args: |
| batch_size: 64 |
| continuum_norm: true |
| create_umap: false |
| data_dir: /data/lamost/data |
| dataset: SpectraDataset |
| exp_num: 4 |
| lc_freq: 0.0208 |
| log_dir: /data/lightSpec/logs |
| max_days_lc: 720 |
| max_len_spectra: 4096 |
| model_name: MultiTaskRegressor |
| num_epochs: 1000 |
| test_run: false |
| model_args: |
| activation: silu |
| avg_output: true |
| beta: 1 |
| checkpoint_num: 1 |
| checkpoint_path: /data/lightSpec/logs/spec_decode2_2025-02-15/MultiTaskRegressor_spectra_decode_3.pth |
| dropout_p: 0.2 |
| encoder_dims: |
| - 64 |
| - 128 |
| - 256 |
| - 1024 |
| - 2048 |
| in_channels: 1 |
| kernel_size: 3 |
| load_checkpoint: true |
| num_layers: 5 |
| num_quantiles: 5 |
| output_dim: 3 |
| stride: 1 |
| transformer_layers: 4 |
| model_name: MultiTaskRegressor |
| model_structure: "DistributedDataParallel(\n (module): MultiTaskRegressor(\n (encoder):\ |
| \ MultiEncoder(\n (backbone): CNNEncoder(\n (activation): SiLU()\n \ |
| \ (embedding): Sequential(\n (0): Conv1d(1, 64, kernel_size=(3,),\ |
| \ stride=(1,), padding=same, bias=False)\n (1): BatchNorm1d(64, eps=1e-05,\ |
| \ momentum=0.1, affine=True, track_running_stats=True)\n (2): SiLU()\n\ |
| \ )\n (layers): ModuleList(\n (0): ConvBlock(\n \ |
| \ (activation): SiLU()\n (layers): Sequential(\n (0):\ |
| \ Conv1d(64, 128, kernel_size=(3,), stride=(1,), padding=same, bias=False)\n \ |
| \ (1): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n\ |
| \ (2): SiLU()\n )\n )\n (1): ConvBlock(\n\ |
| \ (activation): SiLU()\n (layers): Sequential(\n \ |
| \ (0): Conv1d(128, 256, kernel_size=(3,), stride=(1,), padding=same, bias=False)\n\ |
| \ (1): BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n\ |
| \ (2): SiLU()\n )\n )\n (2): ConvBlock(\n\ |
| \ (activation): SiLU()\n (layers): Sequential(\n \ |
| \ (0): Conv1d(256, 1024, kernel_size=(3,), stride=(1,), padding=same, bias=False)\n\ |
| \ (1): BatchNorm1d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n\ |
| \ (2): SiLU()\n )\n )\n (3): ConvBlock(\n\ |
| \ (activation): SiLU()\n (layers): Sequential(\n \ |
| \ (0): Conv1d(1024, 2048, kernel_size=(3,), stride=(1,), padding=same, bias=False)\n\ |
| \ (1): BatchNorm1d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n\ |
| \ (2): SiLU()\n )\n )\n (4): ConvBlock(\n\ |
| \ (activation): SiLU()\n (layers): Sequential(\n \ |
| \ (0): Conv1d(2048, 2048, kernel_size=(3,), stride=(1,), padding=same, bias=False)\n\ |
| \ (1): BatchNorm1d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n\ |
| \ (2): SiLU()\n )\n )\n )\n (pool):\ |
| \ MaxPool1d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)\n \ |
| \ )\n (pe): RotaryEmbedding()\n (encoder): ConformerEncoder(\n \ |
| \ (blocks): ModuleList(\n (0-7): 8 x ConformerBlock(\n (modlist):\ |
| \ ModuleList(\n (0): PostNorm(\n (module): MHA_rotary(\n\ |
| \ (query): Linear(in_features=2048, out_features=2048, bias=True)\n\ |
| \ (key): Linear(in_features=2048, out_features=2048, bias=True)\n\ |
| \ (value): Linear(in_features=2048, out_features=2048, bias=True)\n\ |
| \ (rotary_emb): RotaryEmbedding()\n (output):\ |
| \ Linear(in_features=2048, out_features=2048, bias=True)\n )\n \ |
| \ (norm): LayerNorm((2048,), eps=1e-05, elementwise_affine=True)\n\ |
| \ )\n (1): PostNorm(\n (module): ConvBlock(\n\ |
| \ (layers): Sequential(\n (0): Conv1d(2048,\ |
| \ 2048, kernel_size=(3,), stride=(1,), padding=same, bias=False)\n \ |
| \ (1): BatchNorm1d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n\ |
| \ (2): SiLU()\n )\n )\n \ |
| \ (norm): LayerNorm((2048,), eps=1e-05, elementwise_affine=True)\n \ |
| \ )\n (2): PostNorm(\n (module): FeedForwardModule(\n\ |
| \ (sequential): Sequential(\n (0): LayerNorm((2048,),\ |
| \ eps=1e-05, elementwise_affine=True)\n (1): Linear(\n \ |
| \ (linear): Linear(in_features=2048, out_features=8192, bias=True)\n\ |
| \ )\n (2): SiLU()\n (3):\ |
| \ Dropout(p=0.2, inplace=False)\n (4): Linear(\n \ |
| \ (linear): Linear(in_features=8192, out_features=2048, bias=True)\n \ |
| \ )\n (5): Dropout(p=0.2, inplace=False)\n \ |
| \ )\n )\n (norm): LayerNorm((2048,),\ |
| \ eps=1e-05, elementwise_affine=True)\n )\n )\n \ |
| \ )\n )\n )\n )\n (decoder): CNNDecoder(\n (activation):\ |
| \ SiLU()\n (initial_expand): Linear(in_features=2048, out_features=8192, bias=True)\n\ |
| \ (layers): ModuleList(\n (0): Sequential(\n (0): ConvTranspose1d(2048,\ |
| \ 1024, kernel_size=(4,), stride=(2,), padding=(1,), bias=False)\n (1):\ |
| \ BatchNorm1d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n\ |
| \ (2): SiLU()\n )\n (1): Sequential(\n (0): ConvTranspose1d(1024,\ |
| \ 256, kernel_size=(4,), stride=(2,), padding=(1,), bias=False)\n (1):\ |
| \ BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n\ |
| \ (2): SiLU()\n )\n (2): Sequential(\n (0): ConvTranspose1d(256,\ |
| \ 128, kernel_size=(4,), stride=(2,), padding=(1,), bias=False)\n (1):\ |
| \ BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n\ |
| \ (2): SiLU()\n )\n (3): Sequential(\n (0): ConvTranspose1d(128,\ |
| \ 64, kernel_size=(4,), stride=(2,), padding=(1,), bias=False)\n (1): BatchNorm1d(64,\ |
| \ eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n (2):\ |
| \ SiLU()\n )\n (4): Sequential(\n (0): ConvTranspose1d(64,\ |
| \ 64, kernel_size=(4,), stride=(2,), padding=(1,), bias=False)\n (1): BatchNorm1d(64,\ |
| \ eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n (2):\ |
| \ SiLU()\n )\n )\n (final_conv): ConvTranspose1d(64, 1, kernel_size=(3,),\ |
| \ stride=(1,), padding=(1,))\n )\n (activation): SiLU()\n (regressor):\ |
| \ Sequential(\n (0): Linear(in_features=2048, out_features=1024, bias=True)\n\ |
| \ (1): BatchNorm1d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n\ |
| \ (2): SiLU()\n (3): Dropout(p=0.2, inplace=False)\n (4): Linear(in_features=1024,\ |
| \ out_features=15, bias=True)\n )\n )\n)" |
| num_params: 551944464 |
| optim_args: |
| max_lr: 2e-5 |
| quantiles: |
| - 0.1 |
| - 0.25 |
| - 0.5 |
| - 0.75 |
| - 0.9 |
| steps_per_epoch: 3500 |
| warmup_pct: 0.3 |
| weight_decay: 5e-6 |
| transforms: "Compose(\n LAMOSTSpectrumPreprocessor(blue_range=(3841, 5800), red_range=(5800,\ |
| \ 8798), resample_step=0.0001)\n ToTensor\n)" |
|
|